summaryrefslogtreecommitdiff
path: root/tools
diff options
context:
space:
mode:
Diffstat (limited to 'tools')
-rw-r--r--tools/arch/powerpc/include/uapi/asm/kvm.h5
-rw-r--r--tools/arch/powerpc/include/uapi/asm/perf_regs.h20
-rw-r--r--tools/arch/riscv/include/uapi/asm/unistd.h2
-rw-r--r--tools/arch/s390/include/uapi/asm/kvm.h7
-rw-r--r--tools/arch/x86/include/asm/cpufeatures.h4
-rw-r--r--tools/arch/x86/include/asm/mcsafe_test.h13
-rw-r--r--tools/arch/x86/include/asm/msr-index.h26
-rw-r--r--tools/arch/x86/include/asm/orc_types.h34
-rw-r--r--tools/arch/x86/lib/memcpy_64.S115
-rw-r--r--tools/arch/x86/tools/gen-insn-attr-x86.awk50
-rw-r--r--tools/bootconfig/samples/bad-override.bconf3
-rw-r--r--tools/bootconfig/samples/bad-override2.bconf3
-rw-r--r--tools/bootconfig/samples/good-override.bconf6
-rwxr-xr-xtools/bootconfig/test-bootconfig.sh38
-rw-r--r--tools/bpf/Makefile4
-rw-r--r--tools/bpf/bpftool/Makefile2
-rw-r--r--tools/bpf/bpftool/btf_dumper.c2
-rw-r--r--tools/bpf/bpftool/gen.c22
-rw-r--r--tools/bpf/bpftool/iter.c9
-rw-r--r--tools/bpf/bpftool/link.c4
-rw-r--r--tools/bpf/bpftool/main.h10
-rw-r--r--tools/bpf/bpftool/pids.c2
-rw-r--r--tools/bpf/bpftool/prog.c16
-rw-r--r--tools/bpf/resolve_btfids/Makefile1
-rw-r--r--tools/bpf/resolve_btfids/main.c37
-rw-r--r--tools/build/Makefile.feature5
-rw-r--r--tools/build/feature/Makefile6
-rw-r--r--tools/build/feature/test-libdebuginfod.c8
-rw-r--r--tools/cgroup/iocost_monitor.py54
-rw-r--r--tools/cgroup/memcg_slabinfo.py226
-rw-r--r--tools/gpio/gpio-event-mon.c146
-rw-r--r--tools/gpio/gpio-hammer.c56
-rw-r--r--tools/gpio/gpio-utils.c176
-rw-r--r--tools/gpio/gpio-utils.h48
-rw-r--r--tools/gpio/gpio-watch.c16
-rw-r--r--tools/gpio/lsgpio.c60
-rw-r--r--tools/iio/iio_event_monitor.c2
-rw-r--r--tools/include/linux/jhash.h2
-rw-r--r--tools/include/linux/objtool.h129
-rw-r--r--tools/include/linux/static_call_types.h35
-rw-r--r--tools/include/uapi/asm-generic/unistd.h16
-rw-r--r--tools/include/uapi/drm/i915_drm.h4
-rw-r--r--tools/include/uapi/linux/bpf.h25
-rw-r--r--tools/include/uapi/linux/in.h3
-rw-r--r--tools/include/uapi/linux/kvm.h10
-rw-r--r--tools/include/uapi/linux/perf_event.h28
-rw-r--r--tools/include/uapi/linux/vhost.h2
-rw-r--r--tools/io_uring/io_uring-bench.c4
-rw-r--r--tools/lib/api/fd/array.c23
-rw-r--r--tools/lib/api/fd/array.h16
-rw-r--r--tools/lib/bpf/Makefile4
-rw-r--r--tools/lib/bpf/bpf.c3
-rw-r--r--tools/lib/bpf/bpf.h5
-rw-r--r--tools/lib/bpf/bpf_helpers.h2
-rw-r--r--tools/lib/bpf/btf.c91
-rw-r--r--tools/lib/bpf/btf.h2
-rw-r--r--tools/lib/bpf/btf_dump.c39
-rw-r--r--tools/lib/bpf/libbpf.c44
-rw-r--r--tools/lib/bpf/libbpf.h5
-rw-r--r--tools/lib/bpf/libbpf.map2
-rw-r--r--tools/lib/perf/Documentation/libperf-counting.txt14
-rw-r--r--tools/lib/perf/Documentation/libperf-sampling.txt13
-rw-r--r--tools/lib/perf/Documentation/libperf.txt4
-rw-r--r--tools/lib/perf/evlist.c6
-rw-r--r--tools/lib/perf/include/internal/evlist.h2
-rw-r--r--tools/lib/perf/include/perf/event.h9
-rw-r--r--tools/lib/rbtree.c2
-rw-r--r--tools/lib/subcmd/help.c10
-rw-r--r--tools/lib/traceevent/Documentation/libtraceevent-plugins.txt25
-rw-r--r--tools/lib/traceevent/event-parse-local.h22
-rw-r--r--tools/lib/traceevent/event-parse.c1004
-rw-r--r--tools/lib/traceevent/event-parse.h36
-rw-r--r--tools/lib/traceevent/event-plugin.c285
-rw-r--r--tools/lib/traceevent/kbuffer.h17
-rw-r--r--tools/lib/traceevent/plugins/Build2
-rw-r--r--tools/lib/traceevent/plugins/Makefile2
-rw-r--r--tools/lib/traceevent/plugins/plugin_function.c123
-rw-r--r--tools/lib/traceevent/plugins/plugin_futex.c123
-rw-r--r--tools/lib/traceevent/plugins/plugin_hrtimer.c17
-rw-r--r--tools/lib/traceevent/plugins/plugin_jbd2.c17
-rw-r--r--tools/lib/traceevent/plugins/plugin_kmem.c17
-rw-r--r--tools/lib/traceevent/plugins/plugin_kvm.c42
-rw-r--r--tools/lib/traceevent/plugins/plugin_mac80211.c17
-rw-r--r--tools/lib/traceevent/plugins/plugin_sched_switch.c17
-rw-r--r--tools/lib/traceevent/plugins/plugin_tlb.c66
-rw-r--r--tools/memory-model/Documentation/cheatsheet.txt33
-rw-r--r--tools/memory-model/Documentation/litmus-tests.txt1074
-rw-r--r--tools/memory-model/Documentation/recipes.txt4
-rw-r--r--tools/memory-model/Documentation/references.txt2
-rw-r--r--tools/memory-model/Documentation/simple.txt271
-rw-r--r--tools/memory-model/README160
-rw-r--r--tools/objtool/Makefile6
-rw-r--r--tools/objtool/arch.h4
-rw-r--r--tools/objtool/arch/x86/Build1
-rw-r--r--tools/objtool/arch/x86/decode.c37
-rw-r--r--tools/objtool/arch/x86/include/arch_special.h20
-rw-r--r--tools/objtool/arch/x86/special.c145
-rw-r--r--tools/objtool/builtin-check.c15
-rw-r--r--tools/objtool/builtin-orc.c27
-rw-r--r--tools/objtool/check.c424
-rw-r--r--tools/objtool/check.h10
-rw-r--r--tools/objtool/elf.c8
-rw-r--r--tools/objtool/elf.h3
-rw-r--r--tools/objtool/objtool.c30
-rw-r--r--tools/objtool/objtool.h7
-rw-r--r--tools/objtool/orc_dump.c9
-rw-r--r--tools/objtool/orc_gen.c12
-rw-r--r--tools/objtool/special.c48
-rw-r--r--tools/objtool/special.h10
-rwxr-xr-xtools/objtool/sync-check.sh33
-rw-r--r--tools/objtool/weak.c6
-rw-r--r--tools/perf/Documentation/itrace.txt14
-rw-r--r--tools/perf/Documentation/perf-bench.txt11
-rw-r--r--tools/perf/Documentation/perf-config.txt5
-rw-r--r--tools/perf/Documentation/perf-data.txt3
-rw-r--r--tools/perf/Documentation/perf-ftrace.txt75
-rw-r--r--tools/perf/Documentation/perf-intel-pt.txt63
-rw-r--r--tools/perf/Documentation/perf-list.txt1
-rw-r--r--tools/perf/Documentation/perf-record.txt48
-rw-r--r--tools/perf/Documentation/perf-script.txt4
-rw-r--r--tools/perf/Documentation/perf-stat.txt51
-rw-r--r--tools/perf/Documentation/perf.data-file-format.txt13
-rw-r--r--tools/perf/Makefile.config10
-rw-r--r--tools/perf/Makefile.perf11
-rw-r--r--tools/perf/arch/arm/util/auxtrace.c9
-rw-r--r--tools/perf/arch/arm/util/cs-etm.c6
-rw-r--r--tools/perf/arch/powerpc/entry/syscalls/syscall.tbl14
-rw-r--r--tools/perf/arch/powerpc/include/perf_regs.h8
-rw-r--r--tools/perf/arch/powerpc/util/book3s_hcalls.h2
-rw-r--r--tools/perf/arch/powerpc/util/header.c9
-rw-r--r--tools/perf/arch/powerpc/util/perf_regs.c55
-rw-r--r--tools/perf/arch/powerpc/util/utils_header.h15
-rw-r--r--tools/perf/arch/s390/entry/syscalls/syscall.tbl14
-rw-r--r--tools/perf/arch/x86/entry/syscalls/syscall_64.tbl13
-rw-r--r--tools/perf/arch/x86/util/intel-pt.c4
-rw-r--r--tools/perf/bench/Build3
-rw-r--r--tools/perf/bench/bench.h2
-rw-r--r--tools/perf/bench/find-bit-bench.c135
-rw-r--r--tools/perf/bench/mem-functions.c21
-rw-r--r--tools/perf/bench/mem-memcpy-x86-64-lib.c24
-rw-r--r--tools/perf/bench/numa.c77
-rw-r--r--tools/perf/bench/sched-messaging.c4
-rw-r--r--tools/perf/bench/synthesize.c4
-rw-r--r--tools/perf/bench/syscall.c81
-rw-r--r--tools/perf/builtin-bench.c9
-rw-r--r--tools/perf/builtin-c2c.c2
-rw-r--r--tools/perf/builtin-data.c1
-rw-r--r--tools/perf/builtin-ftrace.c436
-rw-r--r--tools/perf/builtin-inject.c4
-rw-r--r--tools/perf/builtin-kmem.c3
-rw-r--r--tools/perf/builtin-kvm.c2
-rw-r--r--tools/perf/builtin-record.c267
-rw-r--r--tools/perf/builtin-report.c12
-rw-r--r--tools/perf/builtin-sched.c38
-rw-r--r--tools/perf/builtin-script.c233
-rw-r--r--tools/perf/builtin-stat.c208
-rw-r--r--tools/perf/builtin-top.c4
-rw-r--r--tools/perf/builtin-trace.c9
-rwxr-xr-xtools/perf/check-headers.sh3
-rw-r--r--tools/perf/pmu-events/arch/powerpc/power9/metrics.json48
-rw-r--r--tools/perf/pmu-events/arch/powerpc/power9/nest_metrics.json35
-rw-r--r--tools/perf/pmu-events/arch/x86/amdzen1/core.json2
-rw-r--r--tools/perf/pmu-events/arch/x86/amdzen2/core.json2
-rw-r--r--tools/perf/pmu-events/jevents.c2
-rw-r--r--tools/perf/tests/Build1
-rw-r--r--tools/perf/tests/attr/README2
-rw-r--r--tools/perf/tests/attr/test-record-group229
-rw-r--r--tools/perf/tests/attr/test-record-pfm-period9
-rw-r--r--tools/perf/tests/bp_signal.c5
-rw-r--r--tools/perf/tests/bpf.c2
-rw-r--r--tools/perf/tests/builtin-test.c4
-rw-r--r--tools/perf/tests/code-reading.c2
-rw-r--r--tools/perf/tests/expr.c15
-rw-r--r--tools/perf/tests/fdarray.c22
-rw-r--r--tools/perf/tests/parse-events.c46
-rw-r--r--tools/perf/tests/parse-metric.c359
-rw-r--r--tools/perf/tests/perf-record.c4
-rw-r--r--tools/perf/tests/pmu-events.c138
-rw-r--r--tools/perf/tests/pmu.c1
-rwxr-xr-xtools/perf/tests/shell/record+script_probe_vfs_getname.sh4
-rw-r--r--tools/perf/tests/tests.h1
-rw-r--r--tools/perf/trace/beauty/include/linux/socket.h442
-rw-r--r--tools/perf/trace/beauty/sockaddr.c9
-rwxr-xr-xtools/perf/trace/beauty/socket.sh24
-rw-r--r--tools/perf/ui/browsers/annotate.c2
-rw-r--r--tools/perf/ui/browsers/hists.c3
-rw-r--r--tools/perf/util/Build64
-rw-r--r--tools/perf/util/annotate.c15
-rw-r--r--tools/perf/util/arm-spe-decoder/arm-spe-decoder.c6
-rw-r--r--tools/perf/util/auxtrace.c50
-rw-r--r--tools/perf/util/auxtrace.h31
-rw-r--r--tools/perf/util/build-id.c19
-rw-r--r--tools/perf/util/clockid.c119
-rw-r--r--tools/perf/util/clockid.h11
-rw-r--r--tools/perf/util/cs-etm.c9
-rw-r--r--tools/perf/util/data-convert-bt.c57
-rw-r--r--tools/perf/util/data-convert.h1
-rw-r--r--tools/perf/util/debug.c61
-rw-r--r--tools/perf/util/dso.c5
-rw-r--r--tools/perf/util/dso.h11
-rw-r--r--tools/perf/util/env.h14
-rw-r--r--tools/perf/util/event.c60
-rw-r--r--tools/perf/util/event.h7
-rw-r--r--tools/perf/util/evlist.c194
-rw-r--r--tools/perf/util/evlist.h59
-rw-r--r--tools/perf/util/evsel.c33
-rw-r--r--tools/perf/util/expr.c156
-rw-r--r--tools/perf/util/expr.h34
-rw-r--r--tools/perf/util/expr.l3
-rw-r--r--tools/perf/util/expr.y33
-rw-r--r--tools/perf/util/header.c134
-rw-r--r--tools/perf/util/header.h1
-rw-r--r--tools/perf/util/intel-pt-decoder/intel-pt-decoder.c214
-rw-r--r--tools/perf/util/intel-pt-decoder/intel-pt-decoder.h1
-rw-r--r--tools/perf/util/intel-pt.c131
-rw-r--r--tools/perf/util/jitdump.c31
-rw-r--r--tools/perf/util/machine.c71
-rw-r--r--tools/perf/util/machine.h3
-rw-r--r--tools/perf/util/map.c25
-rw-r--r--tools/perf/util/map.h10
-rw-r--r--tools/perf/util/metricgroup.c568
-rw-r--r--tools/perf/util/metricgroup.h16
-rw-r--r--tools/perf/util/parse-events.c120
-rw-r--r--tools/perf/util/parse-events.h16
-rw-r--r--tools/perf/util/parse-events.l28
-rw-r--r--tools/perf/util/parse-events.y49
-rw-r--r--tools/perf/util/parse-sublevel-options.c70
-rw-r--r--tools/perf/util/parse-sublevel-options.h11
-rw-r--r--tools/perf/util/perf_api_probe.c10
-rw-r--r--tools/perf/util/perf_api_probe.h1
-rw-r--r--tools/perf/util/perf_event_attr_fprintf.c1
-rw-r--r--tools/perf/util/pmu.c24
-rw-r--r--tools/perf/util/pmu.h4
-rw-r--r--tools/perf/util/probe-event.c18
-rw-r--r--tools/perf/util/probe-finder.c5
-rw-r--r--tools/perf/util/record.c34
-rw-r--r--tools/perf/util/record.h5
-rw-r--r--tools/perf/util/session.c41
-rw-r--r--tools/perf/util/stat-display.c2
-rw-r--r--tools/perf/util/stat-shadow.c103
-rw-r--r--tools/perf/util/stat.h8
-rw-r--r--tools/perf/util/symbol-elf.c8
-rw-r--r--tools/perf/util/symbol.c28
-rw-r--r--tools/perf/util/tool.h3
-rw-r--r--tools/perf/util/zstd.c2
-rw-r--r--tools/power/acpi/Makefile2
-rw-r--r--tools/power/acpi/os_specific/service_layers/oslinuxtbl.c2
-rw-r--r--tools/power/cpupower/utils/cpufreq-set.c14
-rwxr-xr-xtools/power/pm-graph/sleepgraph.py2
-rw-r--r--tools/power/x86/intel-speed-select/isst-config.c23
-rw-r--r--tools/power/x86/intel-speed-select/isst-core.c8
-rw-r--r--tools/power/x86/intel-speed-select/isst.h2
-rw-r--r--tools/testing/ktest/examples/README2
-rw-r--r--tools/testing/ktest/examples/crosstests.conf2
-rwxr-xr-xtools/testing/ktest/ktest.pl103
-rw-r--r--tools/testing/ktest/sample.conf18
-rw-r--r--tools/testing/nvdimm/dax-dev.c22
-rw-r--r--tools/testing/nvdimm/test/iomap.c2
-rw-r--r--tools/testing/nvdimm/test/nfit.c416
-rw-r--r--tools/testing/selftests/Makefile1
-rw-r--r--tools/testing/selftests/arm64/Makefile2
-rw-r--r--tools/testing/selftests/arm64/fp/.gitignore5
-rw-r--r--tools/testing/selftests/arm64/fp/Makefile17
-rw-r--r--tools/testing/selftests/arm64/fp/README100
-rw-r--r--tools/testing/selftests/arm64/fp/asm-offsets.h11
-rw-r--r--tools/testing/selftests/arm64/fp/assembler.h57
-rwxr-xr-xtools/testing/selftests/arm64/fp/fpsimd-stress60
-rw-r--r--tools/testing/selftests/arm64/fp/fpsimd-test.S482
-rw-r--r--tools/testing/selftests/arm64/fp/sve-probe-vls.c58
-rw-r--r--tools/testing/selftests/arm64/fp/sve-ptrace-asm.S33
-rw-r--r--tools/testing/selftests/arm64/fp/sve-ptrace.c336
-rwxr-xr-xtools/testing/selftests/arm64/fp/sve-stress59
-rw-r--r--tools/testing/selftests/arm64/fp/sve-test.S672
-rw-r--r--tools/testing/selftests/arm64/fp/vlset.c155
-rw-r--r--tools/testing/selftests/arm64/mte/.gitignore6
-rw-r--r--tools/testing/selftests/arm64/mte/Makefile29
-rw-r--r--tools/testing/selftests/arm64/mte/check_buffer_fill.c475
-rw-r--r--tools/testing/selftests/arm64/mte/check_child_memory.c195
-rw-r--r--tools/testing/selftests/arm64/mte/check_ksm_options.c159
-rw-r--r--tools/testing/selftests/arm64/mte/check_mmap_options.c262
-rw-r--r--tools/testing/selftests/arm64/mte/check_tags_inclusion.c185
-rw-r--r--tools/testing/selftests/arm64/mte/check_user_mem.c111
-rw-r--r--tools/testing/selftests/arm64/mte/mte_common_util.c341
-rw-r--r--tools/testing/selftests/arm64/mte/mte_common_util.h118
-rw-r--r--tools/testing/selftests/arm64/mte/mte_def.h60
-rw-r--r--tools/testing/selftests/arm64/mte/mte_helper.S128
-rw-r--r--tools/testing/selftests/arm64/pauth/.gitignore2
-rw-r--r--tools/testing/selftests/arm64/pauth/Makefile39
-rw-r--r--tools/testing/selftests/arm64/pauth/exec_target.c34
-rw-r--r--tools/testing/selftests/arm64/pauth/helper.c39
-rw-r--r--tools/testing/selftests/arm64/pauth/helper.h28
-rw-r--r--tools/testing/selftests/arm64/pauth/pac.c368
-rw-r--r--tools/testing/selftests/arm64/pauth/pac_corruptor.S19
-rw-r--r--tools/testing/selftests/bpf/.gitignore1
-rw-r--r--tools/testing/selftests/bpf/Makefile55
-rw-r--r--tools/testing/selftests/bpf/prog_tests/bpf_iter.c40
-rw-r--r--tools/testing/selftests/bpf/prog_tests/bpf_obj_id.c8
-rw-r--r--tools/testing/selftests/bpf/prog_tests/btf_dump.c27
-rw-r--r--tools/testing/selftests/bpf/prog_tests/core_extern.c4
-rw-r--r--tools/testing/selftests/bpf/prog_tests/core_reloc.c20
-rw-r--r--tools/testing/selftests/bpf/prog_tests/fexit_bpf2bpf.c6
-rw-r--r--tools/testing/selftests/bpf/prog_tests/flow_dissector.c2
-rw-r--r--tools/testing/selftests/bpf/prog_tests/global_data.c6
-rw-r--r--tools/testing/selftests/bpf/prog_tests/mmap.c19
-rw-r--r--tools/testing/selftests/bpf/prog_tests/prog_run_xattr.c2
-rw-r--r--tools/testing/selftests/bpf/prog_tests/send_signal.c18
-rw-r--r--tools/testing/selftests/bpf/prog_tests/sk_lookup.c1
-rw-r--r--tools/testing/selftests/bpf/prog_tests/skb_ctx.c2
-rw-r--r--tools/testing/selftests/bpf/prog_tests/stacktrace_build_id_nmi.c4
-rw-r--r--tools/testing/selftests/bpf/prog_tests/test_global_funcs.c2
-rw-r--r--tools/testing/selftests/bpf/prog_tests/varlen.c8
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_iter_bpf_hash_map.c15
-rw-r--r--tools/testing/selftests/bpf/progs/core_reloc_types.h69
-rw-r--r--tools/testing/selftests/bpf/progs/test_tcpbpf_kern.c41
-rw-r--r--tools/testing/selftests/bpf/progs/test_varlen.c6
-rw-r--r--tools/testing/selftests/bpf/settings1
-rw-r--r--tools/testing/selftests/bpf/test_btf.c8
-rw-r--r--tools/testing/selftests/bpf/test_maps.c2
-rw-r--r--tools/testing/selftests/bpf/test_progs.c4
-rw-r--r--tools/testing/selftests/bpf/test_progs.h5
-rw-r--r--tools/testing/selftests/bpf/test_tcpnotify_user.c13
-rw-r--r--tools/testing/selftests/cgroup/.gitignore1
-rw-r--r--tools/testing/selftests/cgroup/Makefile2
-rw-r--r--tools/testing/selftests/cgroup/cgroup_util.c2
-rw-r--r--tools/testing/selftests/cgroup/test_kmem.c450
-rw-r--r--tools/testing/selftests/clone3/clone3.c45
-rw-r--r--tools/testing/selftests/clone3/clone3_cap_checkpoint_restore.c4
-rw-r--r--tools/testing/selftests/clone3/clone3_clear_sighand.c2
-rw-r--r--tools/testing/selftests/clone3/clone3_selftests.h24
-rw-r--r--tools/testing/selftests/clone3/clone3_set_tid.c4
-rw-r--r--tools/testing/selftests/exec/.gitignore1
-rw-r--r--tools/testing/selftests/exec/Makefile5
-rw-r--r--tools/testing/selftests/exec/non-regular.c196
-rwxr-xr-xtools/testing/selftests/firmware/fw_filesystem.sh91
-rw-r--r--tools/testing/selftests/ftrace/test.d/dynevent/add_remove_kprobe.tc2
-rw-r--r--tools/testing/selftests/ftrace/test.d/dynevent/clear_select_events.tc2
-rw-r--r--tools/testing/selftests/ftrace/test.d/dynevent/generic_clear_event.tc2
-rw-r--r--tools/testing/selftests/ftrace/test.d/ftrace/func-filter-stacktrace.tc4
-rw-r--r--tools/testing/selftests/ftrace/test.d/kprobe/add_and_remove.tc2
-rw-r--r--tools/testing/selftests/ftrace/test.d/kprobe/busy_check.tc2
-rw-r--r--tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args.tc4
-rw-r--r--tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_comm.tc2
-rw-r--r--tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_string.tc4
-rw-r--r--tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_symbol.tc10
-rw-r--r--tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_type.tc2
-rw-r--r--tools/testing/selftests/ftrace/test.d/kprobe/kprobe_ftrace.tc14
-rw-r--r--tools/testing/selftests/ftrace/test.d/kprobe/kprobe_multiprobe.tc2
-rw-r--r--tools/testing/selftests/ftrace/test.d/kprobe/kprobe_syntax_errors.tc12
-rw-r--r--tools/testing/selftests/ftrace/test.d/kprobe/kretprobe_args.tc4
-rw-r--r--tools/testing/selftests/ftrace/test.d/kprobe/profile.tc2
-rwxr-xr-xtools/testing/selftests/kmod/kmod.sh4
-rw-r--r--tools/testing/selftests/kselftest_harness.h5
-rw-r--r--tools/testing/selftests/kvm/x86_64/debug_regs.c6
-rwxr-xr-xtools/testing/selftests/lkdtm/run.sh2
-rw-r--r--tools/testing/selftests/mincore/.gitignore2
-rw-r--r--tools/testing/selftests/mincore/Makefile6
-rw-r--r--tools/testing/selftests/mincore/mincore_selftest.c361
-rwxr-xr-xtools/testing/selftests/net/icmp_redirect.sh2
-rw-r--r--tools/testing/selftests/net/mptcp/config2
-rw-r--r--tools/testing/selftests/net/mptcp/mptcp_connect.c9
-rwxr-xr-xtools/testing/selftests/net/rtnetlink.sh47
-rwxr-xr-xtools/testing/selftests/netfilter/nft_flowtable.sh130
-rw-r--r--tools/testing/selftests/pidfd/pidfd.h4
-rw-r--r--tools/testing/selftests/pidfd/pidfd_setns_test.c2
-rw-r--r--tools/testing/selftests/pidfd/pidfd_wait.c304
-rw-r--r--tools/testing/selftests/powerpc/alignment/alignment_handler.c150
-rw-r--r--tools/testing/selftests/powerpc/benchmarks/context_switch.c21
-rw-r--r--tools/testing/selftests/powerpc/copyloops/.gitignore2
-rw-r--r--tools/testing/selftests/powerpc/copyloops/Makefile6
l---------tools/testing/selftests/powerpc/copyloops/copy_mc_64.S1
l---------tools/testing/selftests/powerpc/copyloops/memcpy_mcsafe_64.S1
-rwxr-xr-xtools/testing/selftests/powerpc/eeh/eeh-functions.sh11
-rw-r--r--tools/testing/selftests/powerpc/include/instructions.h77
-rw-r--r--tools/testing/selftests/powerpc/include/pkeys.h136
-rw-r--r--tools/testing/selftests/powerpc/include/reg.h6
-rw-r--r--tools/testing/selftests/powerpc/include/utils.h29
-rw-r--r--tools/testing/selftests/powerpc/math/.gitignore1
-rw-r--r--tools/testing/selftests/powerpc/math/Makefile10
-rw-r--r--tools/testing/selftests/powerpc/math/fpu_denormal.c38
-rw-r--r--tools/testing/selftests/powerpc/math/vmx_preempt.c3
-rw-r--r--tools/testing/selftests/powerpc/math/vmx_signal.c3
-rw-r--r--tools/testing/selftests/powerpc/math/vmx_syscall.c7
-rw-r--r--tools/testing/selftests/powerpc/math/vsx_preempt.c2
-rw-r--r--tools/testing/selftests/powerpc/mm/.gitignore4
-rw-r--r--tools/testing/selftests/powerpc/mm/Makefile14
-rw-r--r--tools/testing/selftests/powerpc/mm/bad_accesses.c28
-rw-r--r--tools/testing/selftests/powerpc/mm/pkey_exec_prot.c294
-rw-r--r--tools/testing/selftests/powerpc/mm/pkey_siginfo.c333
-rw-r--r--tools/testing/selftests/powerpc/mm/prot_sao.c10
-rw-r--r--tools/testing/selftests/powerpc/mm/stack_expansion_ldst.c202
-rw-r--r--tools/testing/selftests/powerpc/mm/stack_expansion_signal.c118
-rw-r--r--tools/testing/selftests/powerpc/pmu/count_stcx_fail.c4
-rw-r--r--tools/testing/selftests/powerpc/pmu/ebb/back_to_back_ebbs_test.c2
-rw-r--r--tools/testing/selftests/powerpc/pmu/ebb/cycles_test.c2
-rw-r--r--tools/testing/selftests/powerpc/pmu/ebb/cycles_with_freeze_test.c2
-rw-r--r--tools/testing/selftests/powerpc/pmu/ebb/cycles_with_mmcr2_test.c2
-rw-r--r--tools/testing/selftests/powerpc/pmu/ebb/ebb.c2
-rw-r--r--tools/testing/selftests/powerpc/pmu/ebb/ebb_on_willing_child_test.c2
-rw-r--r--tools/testing/selftests/powerpc/pmu/ebb/lost_exception_test.c1
-rw-r--r--tools/testing/selftests/powerpc/pmu/ebb/multi_counter_test.c7
-rw-r--r--tools/testing/selftests/powerpc/pmu/ebb/multi_ebb_procs_test.c2
-rw-r--r--tools/testing/selftests/powerpc/pmu/ebb/pmae_handling_test.c2
-rw-r--r--tools/testing/selftests/powerpc/pmu/ebb/pmc56_overflow_test.c2
-rw-r--r--tools/testing/selftests/powerpc/pmu/lib.h1
-rw-r--r--tools/testing/selftests/powerpc/pmu/per_event_excludes.c7
-rw-r--r--tools/testing/selftests/powerpc/ptrace/core-pkey.c2
-rw-r--r--tools/testing/selftests/powerpc/ptrace/ptrace-pkey.c57
-rw-r--r--tools/testing/selftests/powerpc/ptrace/ptrace-tar.c3
-rw-r--r--tools/testing/selftests/powerpc/ptrace/ptrace-vsx.c2
-rw-r--r--tools/testing/selftests/powerpc/security/spectre_v2.c10
-rw-r--r--tools/testing/selftests/powerpc/stringloops/Makefile2
-rw-r--r--tools/testing/selftests/powerpc/stringloops/memcmp.c46
-rw-r--r--tools/testing/selftests/powerpc/utils.c65
-rw-r--r--tools/testing/selftests/rseq/param_test.c223
-rw-r--r--tools/testing/selftests/rseq/rseq-x86.h57
-rwxr-xr-xtools/testing/selftests/rseq/run_param_test.sh2
-rw-r--r--tools/testing/selftests/seccomp/seccomp_bpf.c440
-rw-r--r--tools/testing/selftests/splice/.gitignore1
-rw-r--r--tools/testing/selftests/splice/Makefile4
-rw-r--r--tools/testing/selftests/splice/config1
-rw-r--r--tools/testing/selftests/splice/settings1
-rwxr-xr-xtools/testing/selftests/splice/short_splice_read.sh56
-rw-r--r--tools/testing/selftests/splice/splice_read.c57
-rw-r--r--tools/testing/selftests/timers/Makefile1
-rw-r--r--tools/testing/selftests/timers/settings1
-rw-r--r--tools/testing/selftests/vm/Makefile17
-rw-r--r--tools/testing/selftests/vm/compaction_test.c11
-rw-r--r--tools/testing/selftests/vm/gup_benchmark.c32
-rw-r--r--tools/testing/selftests/vm/hmm-tests.c39
-rw-r--r--tools/testing/selftests/vm/map_hugetlb.c2
-rw-r--r--tools/testing/selftests/vm/userfaultfd.c296
-rw-r--r--tools/testing/selftests/x86/fsgsbase.c68
-rw-r--r--tools/testing/selftests/x86/test_vsyscall.c22
-rw-r--r--tools/usb/Build2
-rw-r--r--tools/usb/Makefile53
-rw-r--r--tools/virtio/linux/virtio_config.h6
-rw-r--r--tools/vm/page-types.c2
437 files changed, 19350 insertions, 3336 deletions
diff --git a/tools/arch/powerpc/include/uapi/asm/kvm.h b/tools/arch/powerpc/include/uapi/asm/kvm.h
index 264e266a85bf..c3af3f324c5a 100644
--- a/tools/arch/powerpc/include/uapi/asm/kvm.h
+++ b/tools/arch/powerpc/include/uapi/asm/kvm.h
@@ -640,6 +640,11 @@ struct kvm_ppc_cpu_char {
#define KVM_REG_PPC_ONLINE (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0xbf)
#define KVM_REG_PPC_PTCR (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xc0)
+/* POWER10 registers */
+#define KVM_REG_PPC_MMCR3 (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xc1)
+#define KVM_REG_PPC_SIER2 (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xc2)
+#define KVM_REG_PPC_SIER3 (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xc3)
+
/* Transactional Memory checkpointed state:
* This is all GPRs, all VSX regs and a subset of SPRs
*/
diff --git a/tools/arch/powerpc/include/uapi/asm/perf_regs.h b/tools/arch/powerpc/include/uapi/asm/perf_regs.h
index f599064dd8dc..bdf5f10f8b9f 100644
--- a/tools/arch/powerpc/include/uapi/asm/perf_regs.h
+++ b/tools/arch/powerpc/include/uapi/asm/perf_regs.h
@@ -48,6 +48,24 @@ enum perf_event_powerpc_regs {
PERF_REG_POWERPC_DSISR,
PERF_REG_POWERPC_SIER,
PERF_REG_POWERPC_MMCRA,
- PERF_REG_POWERPC_MAX,
+ /* Extended registers */
+ PERF_REG_POWERPC_MMCR0,
+ PERF_REG_POWERPC_MMCR1,
+ PERF_REG_POWERPC_MMCR2,
+ PERF_REG_POWERPC_MMCR3,
+ PERF_REG_POWERPC_SIER2,
+ PERF_REG_POWERPC_SIER3,
+ /* Max regs without the extended regs */
+ PERF_REG_POWERPC_MAX = PERF_REG_POWERPC_MMCRA + 1,
};
+
+#define PERF_REG_PMU_MASK ((1ULL << PERF_REG_POWERPC_MAX) - 1)
+
+/* PERF_REG_EXTENDED_MASK value for CPU_FTR_ARCH_300 */
+#define PERF_REG_PMU_MASK_300 (((1ULL << (PERF_REG_POWERPC_MMCR2 + 1)) - 1) - PERF_REG_PMU_MASK)
+/* PERF_REG_EXTENDED_MASK value for CPU_FTR_ARCH_31 */
+#define PERF_REG_PMU_MASK_31 (((1ULL << (PERF_REG_POWERPC_SIER3 + 1)) - 1) - PERF_REG_PMU_MASK)
+
+#define PERF_REG_MAX_ISA_300 (PERF_REG_POWERPC_MMCR2 + 1)
+#define PERF_REG_MAX_ISA_31 (PERF_REG_POWERPC_SIER3 + 1)
#endif /* _UAPI_ASM_POWERPC_PERF_REGS_H */
diff --git a/tools/arch/riscv/include/uapi/asm/unistd.h b/tools/arch/riscv/include/uapi/asm/unistd.h
index 0e2eeeb1fd27..f506cca520b0 100644
--- a/tools/arch/riscv/include/uapi/asm/unistd.h
+++ b/tools/arch/riscv/include/uapi/asm/unistd.h
@@ -12,7 +12,7 @@
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
- * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ * along with this program. If not, see <https://www.gnu.org/licenses/>.
*/
#ifdef __LP64__
diff --git a/tools/arch/s390/include/uapi/asm/kvm.h b/tools/arch/s390/include/uapi/asm/kvm.h
index 436ec7636927..7a6b14874d65 100644
--- a/tools/arch/s390/include/uapi/asm/kvm.h
+++ b/tools/arch/s390/include/uapi/asm/kvm.h
@@ -231,11 +231,13 @@ struct kvm_guest_debug_arch {
#define KVM_SYNC_GSCB (1UL << 9)
#define KVM_SYNC_BPBC (1UL << 10)
#define KVM_SYNC_ETOKEN (1UL << 11)
+#define KVM_SYNC_DIAG318 (1UL << 12)
#define KVM_SYNC_S390_VALID_FIELDS \
(KVM_SYNC_PREFIX | KVM_SYNC_GPRS | KVM_SYNC_ACRS | KVM_SYNC_CRS | \
KVM_SYNC_ARCH0 | KVM_SYNC_PFAULT | KVM_SYNC_VRS | KVM_SYNC_RICCB | \
- KVM_SYNC_FPRS | KVM_SYNC_GSCB | KVM_SYNC_BPBC | KVM_SYNC_ETOKEN)
+ KVM_SYNC_FPRS | KVM_SYNC_GSCB | KVM_SYNC_BPBC | KVM_SYNC_ETOKEN | \
+ KVM_SYNC_DIAG318)
/* length and alignment of the sdnx as a power of two */
#define SDNXC 8
@@ -264,7 +266,8 @@ struct kvm_sync_regs {
__u8 reserved2 : 7;
__u8 padding1[51]; /* riccb needs to be 64byte aligned */
__u8 riccb[64]; /* runtime instrumentation controls block */
- __u8 padding2[192]; /* sdnx needs to be 256byte aligned */
+ __u64 diag318; /* diagnose 0x318 info */
+ __u8 padding2[184]; /* sdnx needs to be 256byte aligned */
union {
__u8 sdnx[SDNXL]; /* state description annex */
struct {
diff --git a/tools/arch/x86/include/asm/cpufeatures.h b/tools/arch/x86/include/asm/cpufeatures.h
index 02dabc9e77b0..2901d5df4366 100644
--- a/tools/arch/x86/include/asm/cpufeatures.h
+++ b/tools/arch/x86/include/asm/cpufeatures.h
@@ -96,6 +96,7 @@
#define X86_FEATURE_SYSCALL32 ( 3*32+14) /* "" syscall in IA32 userspace */
#define X86_FEATURE_SYSENTER32 ( 3*32+15) /* "" sysenter in IA32 userspace */
#define X86_FEATURE_REP_GOOD ( 3*32+16) /* REP microcode works well */
+/* free ( 3*32+17) */
#define X86_FEATURE_LFENCE_RDTSC ( 3*32+18) /* "" LFENCE synchronizes RDTSC */
#define X86_FEATURE_ACC_POWER ( 3*32+19) /* AMD Accumulated Power Mechanism */
#define X86_FEATURE_NOPL ( 3*32+20) /* The NOPL (0F 1F) instructions */
@@ -107,6 +108,7 @@
#define X86_FEATURE_EXTD_APICID ( 3*32+26) /* Extended APICID (8 bits) */
#define X86_FEATURE_AMD_DCM ( 3*32+27) /* AMD multi-node processor */
#define X86_FEATURE_APERFMPERF ( 3*32+28) /* P-State hardware coordination feedback capability (APERF/MPERF MSRs) */
+/* free ( 3*32+29) */
#define X86_FEATURE_NONSTOP_TSC_S3 ( 3*32+30) /* TSC doesn't stop in S3 state */
#define X86_FEATURE_TSC_KNOWN_FREQ ( 3*32+31) /* TSC has known frequency */
@@ -365,7 +367,9 @@
#define X86_FEATURE_SRBDS_CTRL (18*32+ 9) /* "" SRBDS mitigation MSR available */
#define X86_FEATURE_MD_CLEAR (18*32+10) /* VERW clears CPU buffers */
#define X86_FEATURE_TSX_FORCE_ABORT (18*32+13) /* "" TSX_FORCE_ABORT */
+#define X86_FEATURE_SERIALIZE (18*32+14) /* SERIALIZE instruction */
#define X86_FEATURE_PCONFIG (18*32+18) /* Intel PCONFIG */
+#define X86_FEATURE_ARCH_LBR (18*32+19) /* Intel ARCH LBR */
#define X86_FEATURE_SPEC_CTRL (18*32+26) /* "" Speculation Control (IBRS + IBPB) */
#define X86_FEATURE_INTEL_STIBP (18*32+27) /* "" Single Thread Indirect Branch Predictors */
#define X86_FEATURE_FLUSH_L1D (18*32+28) /* Flush L1D cache */
diff --git a/tools/arch/x86/include/asm/mcsafe_test.h b/tools/arch/x86/include/asm/mcsafe_test.h
deleted file mode 100644
index 2ccd588fbad4..000000000000
--- a/tools/arch/x86/include/asm/mcsafe_test.h
+++ /dev/null
@@ -1,13 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _MCSAFE_TEST_H_
-#define _MCSAFE_TEST_H_
-
-.macro MCSAFE_TEST_CTL
-.endm
-
-.macro MCSAFE_TEST_SRC reg count target
-.endm
-
-.macro MCSAFE_TEST_DST reg count target
-.endm
-#endif /* _MCSAFE_TEST_H_ */
diff --git a/tools/arch/x86/include/asm/msr-index.h b/tools/arch/x86/include/asm/msr-index.h
index e8370e64a155..2859ee4f39a8 100644
--- a/tools/arch/x86/include/asm/msr-index.h
+++ b/tools/arch/x86/include/asm/msr-index.h
@@ -149,6 +149,10 @@
#define MSR_LBR_SELECT 0x000001c8
#define MSR_LBR_TOS 0x000001c9
+
+#define MSR_IA32_POWER_CTL 0x000001fc
+#define MSR_IA32_POWER_CTL_BIT_EE 19
+
#define MSR_LBR_NHM_FROM 0x00000680
#define MSR_LBR_NHM_TO 0x000006c0
#define MSR_LBR_CORE_FROM 0x00000040
@@ -158,7 +162,23 @@
#define LBR_INFO_MISPRED BIT_ULL(63)
#define LBR_INFO_IN_TX BIT_ULL(62)
#define LBR_INFO_ABORT BIT_ULL(61)
+#define LBR_INFO_CYC_CNT_VALID BIT_ULL(60)
#define LBR_INFO_CYCLES 0xffff
+#define LBR_INFO_BR_TYPE_OFFSET 56
+#define LBR_INFO_BR_TYPE (0xfull << LBR_INFO_BR_TYPE_OFFSET)
+
+#define MSR_ARCH_LBR_CTL 0x000014ce
+#define ARCH_LBR_CTL_LBREN BIT(0)
+#define ARCH_LBR_CTL_CPL_OFFSET 1
+#define ARCH_LBR_CTL_CPL (0x3ull << ARCH_LBR_CTL_CPL_OFFSET)
+#define ARCH_LBR_CTL_STACK_OFFSET 3
+#define ARCH_LBR_CTL_STACK (0x1ull << ARCH_LBR_CTL_STACK_OFFSET)
+#define ARCH_LBR_CTL_FILTER_OFFSET 16
+#define ARCH_LBR_CTL_FILTER (0x7full << ARCH_LBR_CTL_FILTER_OFFSET)
+#define MSR_ARCH_LBR_DEPTH 0x000014cf
+#define MSR_ARCH_LBR_FROM_0 0x00001500
+#define MSR_ARCH_LBR_TO_0 0x00001600
+#define MSR_ARCH_LBR_INFO_0 0x00001200
#define MSR_IA32_PEBS_ENABLE 0x000003f1
#define MSR_PEBS_DATA_CFG 0x000003f2
@@ -253,8 +273,6 @@
#define MSR_PEBS_FRONTEND 0x000003f7
-#define MSR_IA32_POWER_CTL 0x000001fc
-
#define MSR_IA32_MC0_CTL 0x00000400
#define MSR_IA32_MC0_STATUS 0x00000401
#define MSR_IA32_MC0_ADDR 0x00000402
@@ -418,7 +436,6 @@
#define MSR_AMD64_PATCH_LEVEL 0x0000008b
#define MSR_AMD64_TSC_RATIO 0xc0000104
#define MSR_AMD64_NB_CFG 0xc001001f
-#define MSR_AMD64_CPUID_FN_1 0xc0011004
#define MSR_AMD64_PATCH_LOADER 0xc0010020
#define MSR_AMD_PERF_CTL 0xc0010062
#define MSR_AMD_PERF_STATUS 0xc0010063
@@ -427,6 +444,7 @@
#define MSR_AMD64_OSVW_STATUS 0xc0010141
#define MSR_AMD_PPIN_CTL 0xc00102f0
#define MSR_AMD_PPIN 0xc00102f1
+#define MSR_AMD64_CPUID_FN_1 0xc0011004
#define MSR_AMD64_LS_CFG 0xc0011020
#define MSR_AMD64_DC_CFG 0xc0011022
#define MSR_AMD64_BU_CFG2 0xc001102a
@@ -466,6 +484,8 @@
#define MSR_F16H_DR0_ADDR_MASK 0xc0011027
/* Fam 15h MSRs */
+#define MSR_F15H_CU_PWR_ACCUMULATOR 0xc001007a
+#define MSR_F15H_CU_MAX_PWR_ACCUMULATOR 0xc001007b
#define MSR_F15H_PERF_CTL 0xc0010200
#define MSR_F15H_PERF_CTL0 MSR_F15H_PERF_CTL
#define MSR_F15H_PERF_CTL1 (MSR_F15H_PERF_CTL + 2)
diff --git a/tools/arch/x86/include/asm/orc_types.h b/tools/arch/x86/include/asm/orc_types.h
index d25534940bde..fdbffec4cfde 100644
--- a/tools/arch/x86/include/asm/orc_types.h
+++ b/tools/arch/x86/include/asm/orc_types.h
@@ -39,27 +39,6 @@
#define ORC_REG_SP_INDIRECT 9
#define ORC_REG_MAX 15
-/*
- * ORC_TYPE_CALL: Indicates that sp_reg+sp_offset resolves to PREV_SP (the
- * caller's SP right before it made the call). Used for all callable
- * functions, i.e. all C code and all callable asm functions.
- *
- * ORC_TYPE_REGS: Used in entry code to indicate that sp_reg+sp_offset points
- * to a fully populated pt_regs from a syscall, interrupt, or exception.
- *
- * ORC_TYPE_REGS_IRET: Used in entry code to indicate that sp_reg+sp_offset
- * points to the iret return frame.
- *
- * The UNWIND_HINT macros are used only for the unwind_hint struct. They
- * aren't used in struct orc_entry due to size and complexity constraints.
- * Objtool converts them to real types when it converts the hints to orc
- * entries.
- */
-#define ORC_TYPE_CALL 0
-#define ORC_TYPE_REGS 1
-#define ORC_TYPE_REGS_IRET 2
-#define UNWIND_HINT_TYPE_RET_OFFSET 3
-
#ifndef __ASSEMBLY__
/*
* This struct is more or less a vastly simplified version of the DWARF Call
@@ -78,19 +57,6 @@ struct orc_entry {
unsigned end:1;
} __packed;
-/*
- * This struct is used by asm and inline asm code to manually annotate the
- * location of registers on the stack for the ORC unwinder.
- *
- * Type can be either ORC_TYPE_* or UNWIND_HINT_TYPE_*.
- */
-struct unwind_hint {
- u32 ip;
- s16 sp_offset;
- u8 sp_reg;
- u8 type;
- u8 end;
-};
#endif /* __ASSEMBLY__ */
#endif /* _ORC_TYPES_H */
diff --git a/tools/arch/x86/lib/memcpy_64.S b/tools/arch/x86/lib/memcpy_64.S
index 45f8e1b02241..0b5b8ae56bd9 100644
--- a/tools/arch/x86/lib/memcpy_64.S
+++ b/tools/arch/x86/lib/memcpy_64.S
@@ -4,7 +4,6 @@
#include <linux/linkage.h>
#include <asm/errno.h>
#include <asm/cpufeatures.h>
-#include <asm/mcsafe_test.h>
#include <asm/alternative-asm.h>
#include <asm/export.h>
@@ -187,117 +186,3 @@ SYM_FUNC_START(memcpy_orig)
SYM_FUNC_END(memcpy_orig)
.popsection
-
-#ifndef CONFIG_UML
-
-MCSAFE_TEST_CTL
-
-/*
- * __memcpy_mcsafe - memory copy with machine check exception handling
- * Note that we only catch machine checks when reading the source addresses.
- * Writes to target are posted and don't generate machine checks.
- */
-SYM_FUNC_START(__memcpy_mcsafe)
- cmpl $8, %edx
- /* Less than 8 bytes? Go to byte copy loop */
- jb .L_no_whole_words
-
- /* Check for bad alignment of source */
- testl $7, %esi
- /* Already aligned */
- jz .L_8byte_aligned
-
- /* Copy one byte at a time until source is 8-byte aligned */
- movl %esi, %ecx
- andl $7, %ecx
- subl $8, %ecx
- negl %ecx
- subl %ecx, %edx
-.L_read_leading_bytes:
- movb (%rsi), %al
- MCSAFE_TEST_SRC %rsi 1 .E_leading_bytes
- MCSAFE_TEST_DST %rdi 1 .E_leading_bytes
-.L_write_leading_bytes:
- movb %al, (%rdi)
- incq %rsi
- incq %rdi
- decl %ecx
- jnz .L_read_leading_bytes
-
-.L_8byte_aligned:
- movl %edx, %ecx
- andl $7, %edx
- shrl $3, %ecx
- jz .L_no_whole_words
-
-.L_read_words:
- movq (%rsi), %r8
- MCSAFE_TEST_SRC %rsi 8 .E_read_words
- MCSAFE_TEST_DST %rdi 8 .E_write_words
-.L_write_words:
- movq %r8, (%rdi)
- addq $8, %rsi
- addq $8, %rdi
- decl %ecx
- jnz .L_read_words
-
- /* Any trailing bytes? */
-.L_no_whole_words:
- andl %edx, %edx
- jz .L_done_memcpy_trap
-
- /* Copy trailing bytes */
- movl %edx, %ecx
-.L_read_trailing_bytes:
- movb (%rsi), %al
- MCSAFE_TEST_SRC %rsi 1 .E_trailing_bytes
- MCSAFE_TEST_DST %rdi 1 .E_trailing_bytes
-.L_write_trailing_bytes:
- movb %al, (%rdi)
- incq %rsi
- incq %rdi
- decl %ecx
- jnz .L_read_trailing_bytes
-
- /* Copy successful. Return zero */
-.L_done_memcpy_trap:
- xorl %eax, %eax
-.L_done:
- ret
-SYM_FUNC_END(__memcpy_mcsafe)
-EXPORT_SYMBOL_GPL(__memcpy_mcsafe)
-
- .section .fixup, "ax"
- /*
- * Return number of bytes not copied for any failure. Note that
- * there is no "tail" handling since the source buffer is 8-byte
- * aligned and poison is cacheline aligned.
- */
-.E_read_words:
- shll $3, %ecx
-.E_leading_bytes:
- addl %edx, %ecx
-.E_trailing_bytes:
- mov %ecx, %eax
- jmp .L_done
-
- /*
- * For write fault handling, given the destination is unaligned,
- * we handle faults on multi-byte writes with a byte-by-byte
- * copy up to the write-protected page.
- */
-.E_write_words:
- shll $3, %ecx
- addl %edx, %ecx
- movl %ecx, %edx
- jmp mcsafe_handle_tail
-
- .previous
-
- _ASM_EXTABLE_FAULT(.L_read_leading_bytes, .E_leading_bytes)
- _ASM_EXTABLE_FAULT(.L_read_words, .E_read_words)
- _ASM_EXTABLE_FAULT(.L_read_trailing_bytes, .E_trailing_bytes)
- _ASM_EXTABLE(.L_write_leading_bytes, .E_leading_bytes)
- _ASM_EXTABLE(.L_write_words, .E_write_words)
- _ASM_EXTABLE(.L_write_trailing_bytes, .E_trailing_bytes)
-#endif
diff --git a/tools/arch/x86/tools/gen-insn-attr-x86.awk b/tools/arch/x86/tools/gen-insn-attr-x86.awk
index a42015b305f4..af38469afd14 100644
--- a/tools/arch/x86/tools/gen-insn-attr-x86.awk
+++ b/tools/arch/x86/tools/gen-insn-attr-x86.awk
@@ -362,6 +362,9 @@ function convert_operands(count,opnd, i,j,imm,mod)
END {
if (awkchecked != "")
exit 1
+
+ print "#ifndef __BOOT_COMPRESSED\n"
+
# print escape opcode map's array
print "/* Escape opcode map array */"
print "const insn_attr_t * const inat_escape_tables[INAT_ESC_MAX + 1]" \
@@ -388,6 +391,51 @@ END {
for (j = 0; j < max_lprefix; j++)
if (atable[i,j])
print " ["i"]["j"] = "atable[i,j]","
- print "};"
+ print "};\n"
+
+ print "#else /* !__BOOT_COMPRESSED */\n"
+
+ print "/* Escape opcode map array */"
+ print "static const insn_attr_t *inat_escape_tables[INAT_ESC_MAX + 1]" \
+ "[INAT_LSTPFX_MAX + 1];"
+ print ""
+
+ print "/* Group opcode map array */"
+ print "static const insn_attr_t *inat_group_tables[INAT_GRP_MAX + 1]"\
+ "[INAT_LSTPFX_MAX + 1];"
+ print ""
+
+ print "/* AVX opcode map array */"
+ print "static const insn_attr_t *inat_avx_tables[X86_VEX_M_MAX + 1]"\
+ "[INAT_LSTPFX_MAX + 1];"
+ print ""
+
+ print "static void inat_init_tables(void)"
+ print "{"
+
+ # print escape opcode map's array
+ print "\t/* Print Escape opcode map array */"
+ for (i = 0; i < geid; i++)
+ for (j = 0; j < max_lprefix; j++)
+ if (etable[i,j])
+ print "\tinat_escape_tables["i"]["j"] = "etable[i,j]";"
+ print ""
+
+ # print group opcode map's array
+ print "\t/* Print Group opcode map array */"
+ for (i = 0; i < ggid; i++)
+ for (j = 0; j < max_lprefix; j++)
+ if (gtable[i,j])
+ print "\tinat_group_tables["i"]["j"] = "gtable[i,j]";"
+ print ""
+ # print AVX opcode map's array
+ print "\t/* Print AVX opcode map array */"
+ for (i = 0; i < gaid; i++)
+ for (j = 0; j < max_lprefix; j++)
+ if (atable[i,j])
+ print "\tinat_avx_tables["i"]["j"] = "atable[i,j]";"
+
+ print "}"
+ print "#endif"
}
diff --git a/tools/bootconfig/samples/bad-override.bconf b/tools/bootconfig/samples/bad-override.bconf
new file mode 100644
index 000000000000..fde6c561512e
--- /dev/null
+++ b/tools/bootconfig/samples/bad-override.bconf
@@ -0,0 +1,3 @@
+key.subkey = value
+# We can not override pre-defined subkeys with value
+key := value
diff --git a/tools/bootconfig/samples/bad-override2.bconf b/tools/bootconfig/samples/bad-override2.bconf
new file mode 100644
index 000000000000..688587cb023c
--- /dev/null
+++ b/tools/bootconfig/samples/bad-override2.bconf
@@ -0,0 +1,3 @@
+key = value
+# We can not override pre-defined value with subkey
+key.subkey := value
diff --git a/tools/bootconfig/samples/good-override.bconf b/tools/bootconfig/samples/good-override.bconf
new file mode 100644
index 000000000000..7d31d5f8fbd8
--- /dev/null
+++ b/tools/bootconfig/samples/good-override.bconf
@@ -0,0 +1,6 @@
+# Override the value
+key.word = 1,2,4
+key.word := 2,3
+
+# No pre-defined key
+key.new.word := "new"
diff --git a/tools/bootconfig/test-bootconfig.sh b/tools/bootconfig/test-bootconfig.sh
index 3c2ab9e75730..d295e406a756 100755
--- a/tools/bootconfig/test-bootconfig.sh
+++ b/tools/bootconfig/test-bootconfig.sh
@@ -117,6 +117,19 @@ xpass grep -q "bar" $OUTFILE
xpass grep -q "baz" $OUTFILE
xpass grep -q "qux" $OUTFILE
+echo "Override same-key values"
+cat > $TEMPCONF << EOF
+key = bar, baz
+key := qux
+EOF
+echo > $INITRD
+
+xpass $BOOTCONF -a $TEMPCONF $INITRD
+$BOOTCONF $INITRD > $OUTFILE
+xfail grep -q "bar" $OUTFILE
+xfail grep -q "baz" $OUTFILE
+xpass grep -q "qux" $OUTFILE
+
echo "Double/single quotes test"
echo "key = '\"string\"';" > $TEMPCONF
$BOOTCONF -a $TEMPCONF $INITRD
@@ -124,6 +137,31 @@ $BOOTCONF $INITRD > $TEMPCONF
cat $TEMPCONF
xpass grep \'\"string\"\' $TEMPCONF
+echo "Repeat same-key tree"
+cat > $TEMPCONF << EOF
+foo
+bar
+foo { buz }
+EOF
+echo > $INITRD
+
+xpass $BOOTCONF -a $TEMPCONF $INITRD
+$BOOTCONF $INITRD > $OUTFILE
+xpass grep -q "bar" $OUTFILE
+
+
+echo "Remove/keep tailing spaces"
+cat > $TEMPCONF << EOF
+foo = val # comment
+bar = "val2 " # comment
+EOF
+echo > $INITRD
+
+xpass $BOOTCONF -a $TEMPCONF $INITRD
+$BOOTCONF $INITRD > $OUTFILE
+xfail grep -q val[[:space:]] $OUTFILE
+xpass grep -q val2[[:space:]] $OUTFILE
+
echo "=== expected failure cases ==="
for i in samples/bad-* ; do
xfail $BOOTCONF -a $i $INITRD
diff --git a/tools/bpf/Makefile b/tools/bpf/Makefile
index 0a6d09a3e91f..39bb322707b4 100644
--- a/tools/bpf/Makefile
+++ b/tools/bpf/Makefile
@@ -38,7 +38,7 @@ FEATURE_TESTS = libbfd disassembler-four-args
FEATURE_DISPLAY = libbfd disassembler-four-args
check_feat := 1
-NON_CHECK_FEAT_TARGETS := clean bpftool_clean runqslower_clean
+NON_CHECK_FEAT_TARGETS := clean bpftool_clean runqslower_clean resolve_btfids_clean
ifdef MAKECMDGOALS
ifeq ($(filter-out $(NON_CHECK_FEAT_TARGETS),$(MAKECMDGOALS)),)
check_feat := 0
@@ -89,7 +89,7 @@ $(OUTPUT)bpf_exp.lex.c: $(OUTPUT)bpf_exp.yacc.c
$(OUTPUT)bpf_exp.yacc.o: $(OUTPUT)bpf_exp.yacc.c
$(OUTPUT)bpf_exp.lex.o: $(OUTPUT)bpf_exp.lex.c
-clean: bpftool_clean runqslower_clean
+clean: bpftool_clean runqslower_clean resolve_btfids_clean
$(call QUIET_CLEAN, bpf-progs)
$(Q)$(RM) -r -- $(OUTPUT)*.o $(OUTPUT)bpf_jit_disasm $(OUTPUT)bpf_dbg \
$(OUTPUT)bpf_asm $(OUTPUT)bpf_exp.yacc.* $(OUTPUT)bpf_exp.lex.*
diff --git a/tools/bpf/bpftool/Makefile b/tools/bpf/bpftool/Makefile
index 8462690a039b..4828913703b6 100644
--- a/tools/bpf/bpftool/Makefile
+++ b/tools/bpf/bpftool/Makefile
@@ -25,7 +25,7 @@ endif
LIBBPF = $(LIBBPF_PATH)libbpf.a
-BPFTOOL_VERSION := $(shell make -rR --no-print-directory -sC ../../.. kernelversion)
+BPFTOOL_VERSION ?= $(shell make -rR --no-print-directory -sC ../../.. kernelversion)
$(LIBBPF): FORCE
$(if $(LIBBPF_OUTPUT),@mkdir -p $(LIBBPF_OUTPUT))
diff --git a/tools/bpf/bpftool/btf_dumper.c b/tools/bpf/bpftool/btf_dumper.c
index ede162f83eea..0e9310727281 100644
--- a/tools/bpf/bpftool/btf_dumper.c
+++ b/tools/bpf/bpftool/btf_dumper.c
@@ -67,7 +67,7 @@ static int dump_prog_id_as_func_ptr(const struct btf_dumper *d,
if (!info->btf_id || !info->nr_func_info ||
btf__get_from_id(info->btf_id, &prog_btf))
goto print;
- finfo = (struct bpf_func_info *)info->func_info;
+ finfo = u64_to_ptr(info->func_info);
func_type = btf__type_by_id(prog_btf, finfo->type_id);
if (!func_type || !btf_is_func(func_type))
goto print;
diff --git a/tools/bpf/bpftool/gen.c b/tools/bpf/bpftool/gen.c
index 8a4c2b3b0cd6..f61184653633 100644
--- a/tools/bpf/bpftool/gen.c
+++ b/tools/bpf/bpftool/gen.c
@@ -143,6 +143,20 @@ static int codegen_datasec_def(struct bpf_object *obj,
var_name, align);
return -EINVAL;
}
+ /* Assume 32-bit architectures when generating data section
+ * struct memory layout. Given bpftool can't know which target
+ * host architecture it's emitting skeleton for, we need to be
+ * conservative and assume 32-bit one to ensure enough padding
+ * bytes are generated for pointer and long types. This will
+ * still work correctly for 64-bit architectures, because in
+ * the worst case we'll generate unnecessary padding field,
+ * which on 64-bit architectures is not strictly necessary and
+ * would be handled by natural 8-byte alignment. But it still
+ * will be a correct memory layout, based on recorded offsets
+ * in BTF.
+ */
+ if (align > 4)
+ align = 4;
align_off = (off + align - 1) / align * align;
if (align_off != need_off) {
@@ -397,7 +411,7 @@ static int do_skeleton(int argc, char **argv)
{ \n\
struct %1$s *obj; \n\
\n\
- obj = (typeof(obj))calloc(1, sizeof(*obj)); \n\
+ obj = (struct %1$s *)calloc(1, sizeof(*obj)); \n\
if (!obj) \n\
return NULL; \n\
if (%1$s__create_skeleton(obj)) \n\
@@ -461,7 +475,7 @@ static int do_skeleton(int argc, char **argv)
{ \n\
struct bpf_object_skeleton *s; \n\
\n\
- s = (typeof(s))calloc(1, sizeof(*s)); \n\
+ s = (struct bpf_object_skeleton *)calloc(1, sizeof(*s));\n\
if (!s) \n\
return -1; \n\
obj->skeleton = s; \n\
@@ -479,7 +493,7 @@ static int do_skeleton(int argc, char **argv)
/* maps */ \n\
s->map_cnt = %zu; \n\
s->map_skel_sz = sizeof(*s->maps); \n\
- s->maps = (typeof(s->maps))calloc(s->map_cnt, s->map_skel_sz);\n\
+ s->maps = (struct bpf_map_skeleton *)calloc(s->map_cnt, s->map_skel_sz);\n\
if (!s->maps) \n\
goto err; \n\
",
@@ -515,7 +529,7 @@ static int do_skeleton(int argc, char **argv)
/* programs */ \n\
s->prog_cnt = %zu; \n\
s->prog_skel_sz = sizeof(*s->progs); \n\
- s->progs = (typeof(s->progs))calloc(s->prog_cnt, s->prog_skel_sz);\n\
+ s->progs = (struct bpf_prog_skeleton *)calloc(s->prog_cnt, s->prog_skel_sz);\n\
if (!s->progs) \n\
goto err; \n\
",
diff --git a/tools/bpf/bpftool/iter.c b/tools/bpf/bpftool/iter.c
index c9dba7543dba..3b1aad7535dd 100644
--- a/tools/bpf/bpftool/iter.c
+++ b/tools/bpf/bpftool/iter.c
@@ -11,6 +11,7 @@
static int do_pin(int argc, char **argv)
{
DECLARE_LIBBPF_OPTS(bpf_iter_attach_opts, iter_opts);
+ union bpf_iter_link_info linfo;
const char *objfile, *path;
struct bpf_program *prog;
struct bpf_object *obj;
@@ -36,6 +37,11 @@ static int do_pin(int argc, char **argv)
map_fd = map_parse_fd(&argc, &argv);
if (map_fd < 0)
return -1;
+
+ memset(&linfo, 0, sizeof(linfo));
+ linfo.map.map_fd = map_fd;
+ iter_opts.link_info = &linfo;
+ iter_opts.link_info_len = sizeof(linfo);
}
}
@@ -57,9 +63,6 @@ static int do_pin(int argc, char **argv)
goto close_obj;
}
- if (map_fd >= 0)
- iter_opts.map_fd = map_fd;
-
link = bpf_program__attach_iter(prog, &iter_opts);
if (IS_ERR(link)) {
err = PTR_ERR(link);
diff --git a/tools/bpf/bpftool/link.c b/tools/bpf/bpftool/link.c
index 1b793759170e..a89f09e3c848 100644
--- a/tools/bpf/bpftool/link.c
+++ b/tools/bpf/bpftool/link.c
@@ -106,7 +106,7 @@ static int show_link_close_json(int fd, struct bpf_link_info *info)
switch (info->type) {
case BPF_LINK_TYPE_RAW_TRACEPOINT:
jsonw_string_field(json_wtr, "tp_name",
- (const char *)info->raw_tracepoint.tp_name);
+ u64_to_ptr(info->raw_tracepoint.tp_name));
break;
case BPF_LINK_TYPE_TRACING:
err = get_prog_info(info->prog_id, &prog_info);
@@ -185,7 +185,7 @@ static int show_link_close_plain(int fd, struct bpf_link_info *info)
switch (info->type) {
case BPF_LINK_TYPE_RAW_TRACEPOINT:
printf("\n\ttp '%s' ",
- (const char *)info->raw_tracepoint.tp_name);
+ (const char *)u64_to_ptr(info->raw_tracepoint.tp_name));
break;
case BPF_LINK_TYPE_TRACING:
err = get_prog_info(info->prog_id, &prog_info);
diff --git a/tools/bpf/bpftool/main.h b/tools/bpf/bpftool/main.h
index e3a79b5a9960..c46e52137b87 100644
--- a/tools/bpf/bpftool/main.h
+++ b/tools/bpf/bpftool/main.h
@@ -21,7 +21,15 @@
/* Make sure we do not use kernel-only integer typedefs */
#pragma GCC poison u8 u16 u32 u64 s8 s16 s32 s64
-#define ptr_to_u64(ptr) ((__u64)(unsigned long)(ptr))
+static inline __u64 ptr_to_u64(const void *ptr)
+{
+ return (__u64)(unsigned long)ptr;
+}
+
+static inline void *u64_to_ptr(__u64 ptr)
+{
+ return (void *)(unsigned long)ptr;
+}
#define NEXT_ARG() ({ argc--; argv++; if (argc < 0) usage(); })
#define NEXT_ARGP() ({ (*argc)--; (*argv)++; if (*argc < 0) usage(); })
diff --git a/tools/bpf/bpftool/pids.c b/tools/bpf/bpftool/pids.c
index e3b116325403..df7d8ec76036 100644
--- a/tools/bpf/bpftool/pids.c
+++ b/tools/bpf/bpftool/pids.c
@@ -134,6 +134,8 @@ int build_obj_refs_table(struct obj_refs_table *table, enum bpf_obj_type type)
while (true) {
ret = read(fd, buf, sizeof(buf));
if (ret < 0) {
+ if (errno == EAGAIN)
+ continue;
err = -errno;
p_err("failed to read PID iterator output: %d", err);
goto out;
diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c
index 158995d853b0..d393eb8263a6 100644
--- a/tools/bpf/bpftool/prog.c
+++ b/tools/bpf/bpftool/prog.c
@@ -428,14 +428,14 @@ prog_dump(struct bpf_prog_info *info, enum dump_mode mode,
p_info("no instructions returned");
return -1;
}
- buf = (unsigned char *)(info->jited_prog_insns);
+ buf = u64_to_ptr(info->jited_prog_insns);
member_len = info->jited_prog_len;
} else { /* DUMP_XLATED */
if (info->xlated_prog_len == 0 || !info->xlated_prog_insns) {
p_err("error retrieving insn dump: kernel.kptr_restrict set?");
return -1;
}
- buf = (unsigned char *)info->xlated_prog_insns;
+ buf = u64_to_ptr(info->xlated_prog_insns);
member_len = info->xlated_prog_len;
}
@@ -444,7 +444,7 @@ prog_dump(struct bpf_prog_info *info, enum dump_mode mode,
return -1;
}
- func_info = (void *)info->func_info;
+ func_info = u64_to_ptr(info->func_info);
if (info->nr_line_info) {
prog_linfo = bpf_prog_linfo__new(info);
@@ -462,7 +462,7 @@ prog_dump(struct bpf_prog_info *info, enum dump_mode mode,
n = write(fd, buf, member_len);
close(fd);
- if (n != member_len) {
+ if (n != (ssize_t)member_len) {
p_err("error writing output file: %s",
n < 0 ? strerror(errno) : "short write");
return -1;
@@ -492,13 +492,13 @@ prog_dump(struct bpf_prog_info *info, enum dump_mode mode,
__u32 i;
if (info->nr_jited_ksyms) {
kernel_syms_load(&dd);
- ksyms = (__u64 *) info->jited_ksyms;
+ ksyms = u64_to_ptr(info->jited_ksyms);
}
if (json_output)
jsonw_start_array(json_wtr);
- lens = (__u32 *) info->jited_func_lens;
+ lens = u64_to_ptr(info->jited_func_lens);
for (i = 0; i < info->nr_jited_func_lens; i++) {
if (ksyms) {
sym = kernel_syms_search(&dd, ksyms[i]);
@@ -559,7 +559,7 @@ prog_dump(struct bpf_prog_info *info, enum dump_mode mode,
} else {
kernel_syms_load(&dd);
dd.nr_jited_ksyms = info->nr_jited_ksyms;
- dd.jited_ksyms = (__u64 *) info->jited_ksyms;
+ dd.jited_ksyms = u64_to_ptr(info->jited_ksyms);
dd.btf = btf;
dd.func_info = func_info;
dd.finfo_rec_size = info->func_info_rec_size;
@@ -1681,7 +1681,7 @@ static char *profile_target_name(int tgt_fd)
goto out;
}
- func_info = (struct bpf_func_info *)(info_linear->info.func_info);
+ func_info = u64_to_ptr(info_linear->info.func_info);
t = btf__type_by_id(btf, func_info[0].type_id);
if (!t) {
p_err("btf %d doesn't have type %d",
diff --git a/tools/bpf/resolve_btfids/Makefile b/tools/bpf/resolve_btfids/Makefile
index a88cd4426398..fe8eb537688b 100644
--- a/tools/bpf/resolve_btfids/Makefile
+++ b/tools/bpf/resolve_btfids/Makefile
@@ -80,6 +80,7 @@ libbpf-clean:
clean: libsubcmd-clean libbpf-clean fixdep-clean
$(call msg,CLEAN,$(BINARY))
$(Q)$(RM) -f $(BINARY); \
+ $(RM) -rf $(if $(OUTPUT),$(OUTPUT),.)/feature; \
find $(if $(OUTPUT),$(OUTPUT),.) -name \*.o -or -name \*.o.cmd -or -name \*.o.d | xargs $(RM)
tags:
diff --git a/tools/bpf/resolve_btfids/main.c b/tools/bpf/resolve_btfids/main.c
index 52d883325a23..0def0bb1f783 100644
--- a/tools/bpf/resolve_btfids/main.c
+++ b/tools/bpf/resolve_btfids/main.c
@@ -233,6 +233,39 @@ static struct btf_id *add_symbol(struct rb_root *root, char *name, size_t size)
return btf_id__add(root, id, false);
}
+/*
+ * The data of compressed section should be aligned to 4
+ * (for 32bit) or 8 (for 64 bit) bytes. The binutils ld
+ * sets sh_addralign to 1, which makes libelf fail with
+ * misaligned section error during the update:
+ * FAILED elf_update(WRITE): invalid section alignment
+ *
+ * While waiting for ld fix, we fix the compressed sections
+ * sh_addralign value manualy.
+ */
+static int compressed_section_fix(Elf *elf, Elf_Scn *scn, GElf_Shdr *sh)
+{
+ int expected = gelf_getclass(elf) == ELFCLASS32 ? 4 : 8;
+
+ if (!(sh->sh_flags & SHF_COMPRESSED))
+ return 0;
+
+ if (sh->sh_addralign == expected)
+ return 0;
+
+ pr_debug2(" - fixing wrong alignment sh_addralign %u, expected %u\n",
+ sh->sh_addralign, expected);
+
+ sh->sh_addralign = expected;
+
+ if (gelf_update_shdr(scn, sh) == 0) {
+ printf("FAILED cannot update section header: %s\n",
+ elf_errmsg(-1));
+ return -1;
+ }
+ return 0;
+}
+
static int elf_collect(struct object *obj)
{
Elf_Scn *scn = NULL;
@@ -309,6 +342,9 @@ static int elf_collect(struct object *obj)
obj->efile.idlist_shndx = idx;
obj->efile.idlist_addr = sh.sh_addr;
}
+
+ if (compressed_section_fix(elf, scn, &sh))
+ return -1;
}
return 0;
@@ -566,6 +602,7 @@ static int sets_patch(struct object *obj)
next = rb_next(next);
}
+ return 0;
}
static int symbols_patch(struct object *obj)
diff --git a/tools/build/Makefile.feature b/tools/build/Makefile.feature
index cb152370fdef..c1daf4d57518 100644
--- a/tools/build/Makefile.feature
+++ b/tools/build/Makefile.feature
@@ -8,7 +8,7 @@ endif
feature_check = $(eval $(feature_check_code))
define feature_check_code
- feature-$(1) := $(shell $(MAKE) OUTPUT=$(OUTPUT_FEATURES) CFLAGS="$(EXTRA_CFLAGS) $(FEATURE_CHECK_CFLAGS-$(1))" CXXFLAGS="$(EXTRA_CXXFLAGS) $(FEATURE_CHECK_CXXFLAGS-$(1))" LDFLAGS="$(LDFLAGS) $(FEATURE_CHECK_LDFLAGS-$(1))" -C $(feature_dir) $(OUTPUT_FEATURES)test-$1.bin >/dev/null 2>/dev/null && echo 1 || echo 0)
+ feature-$(1) := $(shell $(MAKE) OUTPUT=$(OUTPUT_FEATURES) CC="$(CC)" CXX="$(CXX)" CFLAGS="$(EXTRA_CFLAGS) $(FEATURE_CHECK_CFLAGS-$(1))" CXXFLAGS="$(EXTRA_CXXFLAGS) $(FEATURE_CHECK_CXXFLAGS-$(1))" LDFLAGS="$(LDFLAGS) $(FEATURE_CHECK_LDFLAGS-$(1))" -C $(feature_dir) $(OUTPUT_FEATURES)test-$1.bin >/dev/null 2>/dev/null && echo 1 || echo 0)
endef
feature_set = $(eval $(feature_set_code))
@@ -98,7 +98,8 @@ FEATURE_TESTS_EXTRA := \
llvm-version \
clang \
libbpf \
- libpfm4
+ libpfm4 \
+ libdebuginfod
FEATURE_TESTS ?= $(FEATURE_TESTS_BASIC)
diff --git a/tools/build/feature/Makefile b/tools/build/feature/Makefile
index 88371f7f0369..d220fe952747 100644
--- a/tools/build/feature/Makefile
+++ b/tools/build/feature/Makefile
@@ -26,6 +26,7 @@ FILES= \
test-libelf-gelf_getnote.bin \
test-libelf-getshdrstrndx.bin \
test-libelf-mmap.bin \
+ test-libdebuginfod.bin \
test-libnuma.bin \
test-numa_num_possible_cpus.bin \
test-libperl.bin \
@@ -74,8 +75,6 @@ FILES= \
FILES := $(addprefix $(OUTPUT),$(FILES))
-CC ?= $(CROSS_COMPILE)gcc
-CXX ?= $(CROSS_COMPILE)g++
PKG_CONFIG ?= $(CROSS_COMPILE)pkg-config
LLVM_CONFIG ?= llvm-config
CLANG ?= clang
@@ -159,6 +158,9 @@ $(OUTPUT)test-libelf-gelf_getnote.bin:
$(OUTPUT)test-libelf-getshdrstrndx.bin:
$(BUILD) -lelf
+$(OUTPUT)test-libdebuginfod.bin:
+ $(BUILD) -ldebuginfod
+
$(OUTPUT)test-libnuma.bin:
$(BUILD) -lnuma
diff --git a/tools/build/feature/test-libdebuginfod.c b/tools/build/feature/test-libdebuginfod.c
new file mode 100644
index 000000000000..da22548b8413
--- /dev/null
+++ b/tools/build/feature/test-libdebuginfod.c
@@ -0,0 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <elfutils/debuginfod.h>
+
+int main(void)
+{
+ debuginfod_client* c = debuginfod_begin();
+ return (long)c;
+}
diff --git a/tools/cgroup/iocost_monitor.py b/tools/cgroup/iocost_monitor.py
index f4699f9b46ba..c4ff907c078b 100644
--- a/tools/cgroup/iocost_monitor.py
+++ b/tools/cgroup/iocost_monitor.py
@@ -45,8 +45,7 @@ except:
err('The kernel does not have iocost enabled')
IOC_RUNNING = prog['IOC_RUNNING'].value_()
-NR_USAGE_SLOTS = prog['NR_USAGE_SLOTS'].value_()
-HWEIGHT_WHOLE = prog['HWEIGHT_WHOLE'].value_()
+WEIGHT_ONE = prog['WEIGHT_ONE'].value_()
VTIME_PER_SEC = prog['VTIME_PER_SEC'].value_()
VTIME_PER_USEC = prog['VTIME_PER_USEC'].value_()
AUTOP_SSD_FAST = prog['AUTOP_SSD_FAST'].value_()
@@ -100,7 +99,7 @@ class IocStat:
self.period_ms = ioc.period_us.value_() / 1_000
self.period_at = ioc.period_at.value_() / 1_000_000
self.vperiod_at = ioc.period_at_vtime.value_() / VTIME_PER_SEC
- self.vrate_pct = ioc.vtime_rate.counter.value_() * 100 / VTIME_PER_USEC
+ self.vrate_pct = ioc.vtime_base_rate.value_() * 100 / VTIME_PER_USEC
self.busy_level = ioc.busy_level.value_()
self.autop_idx = ioc.autop_idx.value_()
self.user_cost_model = ioc.user_cost_model.value_()
@@ -136,7 +135,7 @@ class IocStat:
def table_header_str(self):
return f'{"":25} active {"weight":>9} {"hweight%":>13} {"inflt%":>6} ' \
- f'{"dbt":>3} {"delay":>6} {"usages%"}'
+ f'{"debt":>7} {"delay":>7} {"usage%"}'
class IocgStat:
def __init__(self, iocg):
@@ -144,11 +143,11 @@ class IocgStat:
blkg = iocg.pd.blkg
self.is_active = not list_empty(iocg.active_list.address_of_())
- self.weight = iocg.weight.value_()
- self.active = iocg.active.value_()
- self.inuse = iocg.inuse.value_()
- self.hwa_pct = iocg.hweight_active.value_() * 100 / HWEIGHT_WHOLE
- self.hwi_pct = iocg.hweight_inuse.value_() * 100 / HWEIGHT_WHOLE
+ self.weight = iocg.weight.value_() / WEIGHT_ONE
+ self.active = iocg.active.value_() / WEIGHT_ONE
+ self.inuse = iocg.inuse.value_() / WEIGHT_ONE
+ self.hwa_pct = iocg.hweight_active.value_() * 100 / WEIGHT_ONE
+ self.hwi_pct = iocg.hweight_inuse.value_() * 100 / WEIGHT_ONE
self.address = iocg.value_()
vdone = iocg.done_vtime.counter.value_()
@@ -160,23 +159,13 @@ class IocgStat:
else:
self.inflight_pct = 0
- # vdebt used to be an atomic64_t and is now u64, support both
- try:
- self.debt_ms = iocg.abs_vdebt.counter.value_() / VTIME_PER_USEC / 1000
- except:
- self.debt_ms = iocg.abs_vdebt.value_() / VTIME_PER_USEC / 1000
-
- self.use_delay = blkg.use_delay.counter.value_()
- self.delay_ms = blkg.delay_nsec.counter.value_() / 1_000_000
-
- usage_idx = iocg.usage_idx.value_()
- self.usages = []
- self.usage = 0
- for i in range(NR_USAGE_SLOTS):
- usage = iocg.usages[(usage_idx + 1 + i) % NR_USAGE_SLOTS].value_()
- upct = usage * 100 / HWEIGHT_WHOLE
- self.usages.append(upct)
- self.usage = max(self.usage, upct)
+ self.usage = (100 * iocg.usage_delta_us.value_() /
+ ioc.period_us.value_()) if self.active else 0
+ self.debt_ms = iocg.abs_vdebt.value_() / VTIME_PER_USEC / 1000
+ if blkg.use_delay.counter.value_() != 0:
+ self.delay_ms = blkg.delay_nsec.counter.value_() / 1_000_000
+ else:
+ self.delay_ms = 0
def dict(self, now, path):
out = { 'cgroup' : path,
@@ -189,25 +178,20 @@ class IocgStat:
'hweight_inuse_pct' : self.hwi_pct,
'inflight_pct' : self.inflight_pct,
'debt_ms' : self.debt_ms,
- 'use_delay' : self.use_delay,
'delay_ms' : self.delay_ms,
'usage_pct' : self.usage,
'address' : self.address }
- for i in range(len(self.usages)):
- out[f'usage_pct_{i}'] = str(self.usages[i])
return out
def table_row_str(self, path):
out = f'{path[-28:]:28} ' \
f'{"*" if self.is_active else " "} ' \
- f'{self.inuse:5}/{self.active:5} ' \
+ f'{round(self.inuse):5}/{round(self.active):5} ' \
f'{self.hwi_pct:6.2f}/{self.hwa_pct:6.2f} ' \
f'{self.inflight_pct:6.2f} ' \
- f'{min(math.ceil(self.debt_ms), 999):3} ' \
- f'{min(self.use_delay, 99):2}*'\
- f'{min(math.ceil(self.delay_ms), 999):03} '
- for u in self.usages:
- out += f'{min(round(u), 999):03d}:'
+ f'{self.debt_ms:7.2f} ' \
+ f'{self.delay_ms:7.2f} '\
+ f'{min(self.usage, 999):6.2f}'
out = out.rstrip(':')
return out
diff --git a/tools/cgroup/memcg_slabinfo.py b/tools/cgroup/memcg_slabinfo.py
new file mode 100644
index 000000000000..c4225ed63565
--- /dev/null
+++ b/tools/cgroup/memcg_slabinfo.py
@@ -0,0 +1,226 @@
+#!/usr/bin/env drgn
+#
+# Copyright (C) 2020 Roman Gushchin <guro@fb.com>
+# Copyright (C) 2020 Facebook
+
+from os import stat
+import argparse
+import sys
+
+from drgn.helpers.linux import list_for_each_entry, list_empty
+from drgn.helpers.linux import for_each_page
+from drgn.helpers.linux.cpumask import for_each_online_cpu
+from drgn.helpers.linux.percpu import per_cpu_ptr
+from drgn import container_of, FaultError, Object
+
+
+DESC = """
+This is a drgn script to provide slab statistics for memory cgroups.
+It supports cgroup v2 and v1 and can emulate memory.kmem.slabinfo
+interface of cgroup v1.
+For drgn, visit https://github.com/osandov/drgn.
+"""
+
+
+MEMCGS = {}
+
+OO_SHIFT = 16
+OO_MASK = ((1 << OO_SHIFT) - 1)
+
+
+def err(s):
+ print('slabinfo.py: error: %s' % s, file=sys.stderr, flush=True)
+ sys.exit(1)
+
+
+def find_memcg_ids(css=prog['root_mem_cgroup'].css, prefix=''):
+ if not list_empty(css.children.address_of_()):
+ for css in list_for_each_entry('struct cgroup_subsys_state',
+ css.children.address_of_(),
+ 'sibling'):
+ name = prefix + '/' + css.cgroup.kn.name.string_().decode('utf-8')
+ memcg = container_of(css, 'struct mem_cgroup', 'css')
+ MEMCGS[css.cgroup.kn.id.value_()] = memcg
+ find_memcg_ids(css, name)
+
+
+def is_root_cache(s):
+ try:
+ return False if s.memcg_params.root_cache else True
+ except AttributeError:
+ return True
+
+
+def cache_name(s):
+ if is_root_cache(s):
+ return s.name.string_().decode('utf-8')
+ else:
+ return s.memcg_params.root_cache.name.string_().decode('utf-8')
+
+
+# SLUB
+
+def oo_order(s):
+ return s.oo.x >> OO_SHIFT
+
+
+def oo_objects(s):
+ return s.oo.x & OO_MASK
+
+
+def count_partial(n, fn):
+ nr_pages = 0
+ for page in list_for_each_entry('struct page', n.partial.address_of_(),
+ 'lru'):
+ nr_pages += fn(page)
+ return nr_pages
+
+
+def count_free(page):
+ return page.objects - page.inuse
+
+
+def slub_get_slabinfo(s, cfg):
+ nr_slabs = 0
+ nr_objs = 0
+ nr_free = 0
+
+ for node in range(cfg['nr_nodes']):
+ n = s.node[node]
+ nr_slabs += n.nr_slabs.counter.value_()
+ nr_objs += n.total_objects.counter.value_()
+ nr_free += count_partial(n, count_free)
+
+ return {'active_objs': nr_objs - nr_free,
+ 'num_objs': nr_objs,
+ 'active_slabs': nr_slabs,
+ 'num_slabs': nr_slabs,
+ 'objects_per_slab': oo_objects(s),
+ 'cache_order': oo_order(s),
+ 'limit': 0,
+ 'batchcount': 0,
+ 'shared': 0,
+ 'shared_avail': 0}
+
+
+def cache_show(s, cfg, objs):
+ if cfg['allocator'] == 'SLUB':
+ sinfo = slub_get_slabinfo(s, cfg)
+ else:
+ err('SLAB isn\'t supported yet')
+
+ if cfg['shared_slab_pages']:
+ sinfo['active_objs'] = objs
+ sinfo['num_objs'] = objs
+
+ print('%-17s %6lu %6lu %6u %4u %4d'
+ ' : tunables %4u %4u %4u'
+ ' : slabdata %6lu %6lu %6lu' % (
+ cache_name(s), sinfo['active_objs'], sinfo['num_objs'],
+ s.size, sinfo['objects_per_slab'], 1 << sinfo['cache_order'],
+ sinfo['limit'], sinfo['batchcount'], sinfo['shared'],
+ sinfo['active_slabs'], sinfo['num_slabs'],
+ sinfo['shared_avail']))
+
+
+def detect_kernel_config():
+ cfg = {}
+
+ cfg['nr_nodes'] = prog['nr_online_nodes'].value_()
+
+ if prog.type('struct kmem_cache').members[1][1] == 'flags':
+ cfg['allocator'] = 'SLUB'
+ elif prog.type('struct kmem_cache').members[1][1] == 'batchcount':
+ cfg['allocator'] = 'SLAB'
+ else:
+ err('Can\'t determine the slab allocator')
+
+ cfg['shared_slab_pages'] = False
+ try:
+ if prog.type('struct obj_cgroup'):
+ cfg['shared_slab_pages'] = True
+ except:
+ pass
+
+ return cfg
+
+
+def for_each_slab_page(prog):
+ PGSlab = 1 << prog.constant('PG_slab')
+ PGHead = 1 << prog.constant('PG_head')
+
+ for page in for_each_page(prog):
+ try:
+ if page.flags.value_() & PGSlab:
+ yield page
+ except FaultError:
+ pass
+
+
+def main():
+ parser = argparse.ArgumentParser(description=DESC,
+ formatter_class=
+ argparse.RawTextHelpFormatter)
+ parser.add_argument('cgroup', metavar='CGROUP',
+ help='Target memory cgroup')
+ args = parser.parse_args()
+
+ try:
+ cgroup_id = stat(args.cgroup).st_ino
+ find_memcg_ids()
+ memcg = MEMCGS[cgroup_id]
+ except KeyError:
+ err('Can\'t find the memory cgroup')
+
+ cfg = detect_kernel_config()
+
+ print('# name <active_objs> <num_objs> <objsize> <objperslab> <pagesperslab>'
+ ' : tunables <limit> <batchcount> <sharedfactor>'
+ ' : slabdata <active_slabs> <num_slabs> <sharedavail>')
+
+ if cfg['shared_slab_pages']:
+ obj_cgroups = set()
+ stats = {}
+ caches = {}
+
+ # find memcg pointers belonging to the specified cgroup
+ obj_cgroups.add(memcg.objcg.value_())
+ for ptr in list_for_each_entry('struct obj_cgroup',
+ memcg.objcg_list.address_of_(),
+ 'list'):
+ obj_cgroups.add(ptr.value_())
+
+ # look over all slab pages, belonging to non-root memcgs
+ # and look for objects belonging to the given memory cgroup
+ for page in for_each_slab_page(prog):
+ objcg_vec_raw = page.obj_cgroups.value_()
+ if objcg_vec_raw == 0:
+ continue
+ cache = page.slab_cache
+ if not cache:
+ continue
+ addr = cache.value_()
+ caches[addr] = cache
+ # clear the lowest bit to get the true obj_cgroups
+ objcg_vec = Object(prog, page.obj_cgroups.type_,
+ value=objcg_vec_raw & ~1)
+
+ if addr not in stats:
+ stats[addr] = 0
+
+ for i in range(oo_objects(cache)):
+ if objcg_vec[i].value_() in obj_cgroups:
+ stats[addr] += 1
+
+ for addr in caches:
+ if stats[addr] > 0:
+ cache_show(caches[addr], cfg, stats[addr])
+
+ else:
+ for s in list_for_each_entry('struct kmem_cache',
+ memcg.kmem_caches.address_of_(),
+ 'memcg_params.kmem_caches_node'):
+ cache_show(s, cfg, None)
+
+
+main()
diff --git a/tools/gpio/gpio-event-mon.c b/tools/gpio/gpio-event-mon.c
index 1a303a81aeef..90c3155f05b1 100644
--- a/tools/gpio/gpio-event-mon.c
+++ b/tools/gpio/gpio-event-mon.c
@@ -23,17 +23,17 @@
#include <sys/ioctl.h>
#include <sys/types.h>
#include <linux/gpio.h>
+#include "gpio-utils.h"
int monitor_device(const char *device_name,
- unsigned int line,
- uint32_t handleflags,
- uint32_t eventflags,
+ unsigned int *lines,
+ unsigned int num_lines,
+ struct gpio_v2_line_config *config,
unsigned int loops)
{
- struct gpioevent_request req;
- struct gpiohandle_data data;
+ struct gpio_v2_line_values values;
char *chrdev_name;
- int fd;
+ int cfd, lfd;
int ret;
int i = 0;
@@ -41,44 +41,55 @@ int monitor_device(const char *device_name,
if (ret < 0)
return -ENOMEM;
- fd = open(chrdev_name, 0);
- if (fd == -1) {
+ cfd = open(chrdev_name, 0);
+ if (cfd == -1) {
ret = -errno;
fprintf(stderr, "Failed to open %s\n", chrdev_name);
goto exit_free_name;
}
- req.lineoffset = line;
- req.handleflags = handleflags;
- req.eventflags = eventflags;
- strcpy(req.consumer_label, "gpio-event-mon");
-
- ret = ioctl(fd, GPIO_GET_LINEEVENT_IOCTL, &req);
- if (ret == -1) {
- ret = -errno;
- fprintf(stderr, "Failed to issue GET EVENT "
- "IOCTL (%d)\n",
- ret);
- goto exit_close_error;
- }
+ ret = gpiotools_request_line(device_name, lines, num_lines, config,
+ "gpio-event-mon");
+ if (ret < 0)
+ goto exit_device_close;
+ else
+ lfd = ret;
/* Read initial states */
- ret = ioctl(req.fd, GPIOHANDLE_GET_LINE_VALUES_IOCTL, &data);
- if (ret == -1) {
- ret = -errno;
- fprintf(stderr, "Failed to issue GPIOHANDLE GET LINE "
- "VALUES IOCTL (%d)\n",
+ values.mask = 0;
+ values.bits = 0;
+ for (i = 0; i < num_lines; i++)
+ gpiotools_set_bit(&values.mask, i);
+ ret = gpiotools_get_values(lfd, &values);
+ if (ret < 0) {
+ fprintf(stderr,
+ "Failed to issue GPIO LINE GET VALUES IOCTL (%d)\n",
ret);
- goto exit_close_error;
+ goto exit_line_close;
}
- fprintf(stdout, "Monitoring line %d on %s\n", line, device_name);
- fprintf(stdout, "Initial line value: %d\n", data.values[0]);
+ if (num_lines == 1) {
+ fprintf(stdout, "Monitoring line %d on %s\n", lines[0], device_name);
+ fprintf(stdout, "Initial line value: %d\n",
+ gpiotools_test_bit(values.bits, 0));
+ } else {
+ fprintf(stdout, "Monitoring lines %d", lines[0]);
+ for (i = 1; i < num_lines - 1; i++)
+ fprintf(stdout, ", %d", lines[i]);
+ fprintf(stdout, " and %d on %s\n", lines[i], device_name);
+ fprintf(stdout, "Initial line values: %d",
+ gpiotools_test_bit(values.bits, 0));
+ for (i = 1; i < num_lines - 1; i++)
+ fprintf(stdout, ", %d",
+ gpiotools_test_bit(values.bits, i));
+ fprintf(stdout, " and %d\n",
+ gpiotools_test_bit(values.bits, i));
+ }
while (1) {
- struct gpioevent_data event;
+ struct gpio_v2_line_event event;
- ret = read(req.fd, &event, sizeof(event));
+ ret = read(lfd, &event, sizeof(event));
if (ret == -1) {
if (errno == -EAGAIN) {
fprintf(stderr, "nothing available\n");
@@ -96,12 +107,14 @@ int monitor_device(const char *device_name,
ret = -EIO;
break;
}
- fprintf(stdout, "GPIO EVENT %llu: ", event.timestamp);
+ fprintf(stdout, "GPIO EVENT at %llu on line %d (%d|%d) ",
+ event.timestamp_ns, event.offset, event.line_seqno,
+ event.seqno);
switch (event.id) {
- case GPIOEVENT_EVENT_RISING_EDGE:
+ case GPIO_V2_LINE_EVENT_RISING_EDGE:
fprintf(stdout, "rising edge");
break;
- case GPIOEVENT_EVENT_FALLING_EDGE:
+ case GPIO_V2_LINE_EVENT_FALLING_EDGE:
fprintf(stdout, "falling edge");
break;
default:
@@ -114,8 +127,11 @@ int monitor_device(const char *device_name,
break;
}
-exit_close_error:
- if (close(fd) == -1)
+exit_line_close:
+ if (close(lfd) == -1)
+ perror("Failed to close line file");
+exit_device_close:
+ if (close(cfd) == -1)
perror("Failed to close GPIO character device file");
exit_free_name:
free(chrdev_name);
@@ -127,29 +143,37 @@ void print_usage(void)
fprintf(stderr, "Usage: gpio-event-mon [options]...\n"
"Listen to events on GPIO lines, 0->1 1->0\n"
" -n <name> Listen on GPIOs on a named device (must be stated)\n"
- " -o <n> Offset to monitor\n"
+ " -o <n> Offset of line to monitor (may be repeated)\n"
" -d Set line as open drain\n"
" -s Set line as open source\n"
" -r Listen for rising edges\n"
" -f Listen for falling edges\n"
+ " -b <n> Debounce the line with period n microseconds\n"
" [-c <n>] Do <n> loops (optional, infinite loop if not stated)\n"
" -? This helptext\n"
"\n"
"Example:\n"
- "gpio-event-mon -n gpiochip0 -o 4 -r -f\n"
+ "gpio-event-mon -n gpiochip0 -o 4 -r -f -b 10000\n"
);
}
+#define EDGE_FLAGS \
+ (GPIO_V2_LINE_FLAG_EDGE_RISING | \
+ GPIO_V2_LINE_FLAG_EDGE_FALLING)
+
int main(int argc, char **argv)
{
const char *device_name = NULL;
- unsigned int line = -1;
+ unsigned int lines[GPIO_V2_LINES_MAX];
+ unsigned int num_lines = 0;
unsigned int loops = 0;
- uint32_t handleflags = GPIOHANDLE_REQUEST_INPUT;
- uint32_t eventflags = 0;
- int c;
+ struct gpio_v2_line_config config;
+ int c, attr, i;
+ unsigned long debounce_period_us = 0;
- while ((c = getopt(argc, argv, "c:n:o:dsrf?")) != -1) {
+ memset(&config, 0, sizeof(config));
+ config.flags = GPIO_V2_LINE_FLAG_INPUT;
+ while ((c = getopt(argc, argv, "c:n:o:b:dsrf?")) != -1) {
switch (c) {
case 'c':
loops = strtoul(optarg, NULL, 10);
@@ -158,19 +182,27 @@ int main(int argc, char **argv)
device_name = optarg;
break;
case 'o':
- line = strtoul(optarg, NULL, 10);
+ if (num_lines >= GPIO_V2_LINES_MAX) {
+ print_usage();
+ return -1;
+ }
+ lines[num_lines] = strtoul(optarg, NULL, 10);
+ num_lines++;
+ break;
+ case 'b':
+ debounce_period_us = strtoul(optarg, NULL, 10);
break;
case 'd':
- handleflags |= GPIOHANDLE_REQUEST_OPEN_DRAIN;
+ config.flags |= GPIO_V2_LINE_FLAG_OPEN_DRAIN;
break;
case 's':
- handleflags |= GPIOHANDLE_REQUEST_OPEN_SOURCE;
+ config.flags |= GPIO_V2_LINE_FLAG_OPEN_SOURCE;
break;
case 'r':
- eventflags |= GPIOEVENT_REQUEST_RISING_EDGE;
+ config.flags |= GPIO_V2_LINE_FLAG_EDGE_RISING;
break;
case 'f':
- eventflags |= GPIOEVENT_REQUEST_FALLING_EDGE;
+ config.flags |= GPIO_V2_LINE_FLAG_EDGE_FALLING;
break;
case '?':
print_usage();
@@ -178,15 +210,23 @@ int main(int argc, char **argv)
}
}
- if (!device_name || line == -1) {
+ if (debounce_period_us) {
+ attr = config.num_attrs;
+ config.num_attrs++;
+ for (i = 0; i < num_lines; i++)
+ gpiotools_set_bit(&config.attrs[attr].mask, i);
+ config.attrs[attr].attr.id = GPIO_V2_LINE_ATTR_ID_DEBOUNCE;
+ config.attrs[attr].attr.debounce_period_us = debounce_period_us;
+ }
+
+ if (!device_name || num_lines == 0) {
print_usage();
return -1;
}
- if (!eventflags) {
+ if (!(config.flags & EDGE_FLAGS)) {
printf("No flags specified, listening on both rising and "
"falling edges\n");
- eventflags = GPIOEVENT_REQUEST_BOTH_EDGES;
+ config.flags |= EDGE_FLAGS;
}
- return monitor_device(device_name, line, handleflags,
- eventflags, loops);
+ return monitor_device(device_name, lines, num_lines, &config, loops);
}
diff --git a/tools/gpio/gpio-hammer.c b/tools/gpio/gpio-hammer.c
index 9fd926e8cb52..54fdf59dd320 100644
--- a/tools/gpio/gpio-hammer.c
+++ b/tools/gpio/gpio-hammer.c
@@ -22,39 +22,46 @@
#include <linux/gpio.h>
#include "gpio-utils.h"
-int hammer_device(const char *device_name, unsigned int *lines, int nlines,
+int hammer_device(const char *device_name, unsigned int *lines, int num_lines,
unsigned int loops)
{
- struct gpiohandle_data data;
+ struct gpio_v2_line_values values;
+ struct gpio_v2_line_config config;
char swirr[] = "-\\|/";
int fd;
int ret;
int i, j;
unsigned int iteration = 0;
- memset(&data.values, 0, sizeof(data.values));
- ret = gpiotools_request_linehandle(device_name, lines, nlines,
- GPIOHANDLE_REQUEST_OUTPUT, &data,
- "gpio-hammer");
+ memset(&config, 0, sizeof(config));
+ config.flags = GPIO_V2_LINE_FLAG_OUTPUT;
+
+ ret = gpiotools_request_line(device_name, lines, num_lines,
+ &config, "gpio-hammer");
if (ret < 0)
goto exit_error;
else
fd = ret;
- ret = gpiotools_get_values(fd, &data);
+ values.mask = 0;
+ values.bits = 0;
+ for (i = 0; i < num_lines; i++)
+ gpiotools_set_bit(&values.mask, i);
+
+ ret = gpiotools_get_values(fd, &values);
if (ret < 0)
goto exit_close_error;
fprintf(stdout, "Hammer lines [");
- for (i = 0; i < nlines; i++) {
+ for (i = 0; i < num_lines; i++) {
fprintf(stdout, "%d", lines[i]);
- if (i != (nlines - 1))
+ if (i != (num_lines - 1))
fprintf(stdout, ", ");
}
fprintf(stdout, "] on %s, initial states: [", device_name);
- for (i = 0; i < nlines; i++) {
- fprintf(stdout, "%d", data.values[i]);
- if (i != (nlines - 1))
+ for (i = 0; i < num_lines; i++) {
+ fprintf(stdout, "%d", gpiotools_test_bit(values.bits, i));
+ if (i != (num_lines - 1))
fprintf(stdout, ", ");
}
fprintf(stdout, "]\n");
@@ -63,15 +70,15 @@ int hammer_device(const char *device_name, unsigned int *lines, int nlines,
j = 0;
while (1) {
/* Invert all lines so we blink */
- for (i = 0; i < nlines; i++)
- data.values[i] = !data.values[i];
+ for (i = 0; i < num_lines; i++)
+ gpiotools_change_bit(&values.bits, i);
- ret = gpiotools_set_values(fd, &data);
+ ret = gpiotools_set_values(fd, &values);
if (ret < 0)
goto exit_close_error;
/* Re-read values to get status */
- ret = gpiotools_get_values(fd, &data);
+ ret = gpiotools_get_values(fd, &values);
if (ret < 0)
goto exit_close_error;
@@ -81,9 +88,10 @@ int hammer_device(const char *device_name, unsigned int *lines, int nlines,
j = 0;
fprintf(stdout, "[");
- for (i = 0; i < nlines; i++) {
- fprintf(stdout, "%d: %d", lines[i], data.values[i]);
- if (i != (nlines - 1))
+ for (i = 0; i < num_lines; i++) {
+ fprintf(stdout, "%d: %d", lines[i],
+ gpiotools_test_bit(values.bits, i));
+ if (i != (num_lines - 1))
fprintf(stdout, ", ");
}
fprintf(stdout, "]\r");
@@ -97,7 +105,7 @@ int hammer_device(const char *device_name, unsigned int *lines, int nlines,
ret = 0;
exit_close_error:
- gpiotools_release_linehandle(fd);
+ gpiotools_release_line(fd);
exit_error:
return ret;
}
@@ -121,7 +129,7 @@ int main(int argc, char **argv)
const char *device_name = NULL;
unsigned int lines[GPIOHANDLES_MAX];
unsigned int loops = 0;
- int nlines;
+ int num_lines;
int c;
int i;
@@ -158,11 +166,11 @@ int main(int argc, char **argv)
return -1;
}
- nlines = i;
+ num_lines = i;
- if (!device_name || !nlines) {
+ if (!device_name || !num_lines) {
print_usage();
return -1;
}
- return hammer_device(device_name, lines, nlines, loops);
+ return hammer_device(device_name, lines, num_lines, loops);
}
diff --git a/tools/gpio/gpio-utils.c b/tools/gpio/gpio-utils.c
index 16a5d9cb9da2..37187e056c8b 100644
--- a/tools/gpio/gpio-utils.c
+++ b/tools/gpio/gpio-utils.c
@@ -38,7 +38,7 @@
* such as "gpiochip0"
* @lines: An array desired lines, specified by offset
* index for the associated GPIO device.
- * @nline: The number of lines to request.
+ * @num_lines: The number of lines to request.
* @flag: The new flag for requsted gpio. Reference
* "linux/gpio.h" for the meaning of flag.
* @data: Default value will be set to gpio when flag is
@@ -56,7 +56,7 @@
* On failure return the errno.
*/
int gpiotools_request_linehandle(const char *device_name, unsigned int *lines,
- unsigned int nlines, unsigned int flag,
+ unsigned int num_lines, unsigned int flag,
struct gpiohandle_data *data,
const char *consumer_label)
{
@@ -78,12 +78,12 @@ int gpiotools_request_linehandle(const char *device_name, unsigned int *lines,
goto exit_free_name;
}
- for (i = 0; i < nlines; i++)
+ for (i = 0; i < num_lines; i++)
req.lineoffsets[i] = lines[i];
req.flags = flag;
strcpy(req.consumer_label, consumer_label);
- req.lines = nlines;
+ req.lines = num_lines;
if (flag & GPIOHANDLE_REQUEST_OUTPUT)
memcpy(req.default_values, data, sizeof(req.default_values));
@@ -100,20 +100,87 @@ exit_free_name:
free(chrdev_name);
return ret < 0 ? ret : req.fd;
}
+
+/**
+ * gpiotools_request_line() - request gpio lines in a gpiochip
+ * @device_name: The name of gpiochip without prefix "/dev/",
+ * such as "gpiochip0"
+ * @lines: An array desired lines, specified by offset
+ * index for the associated GPIO device.
+ * @num_lines: The number of lines to request.
+ * @config: The new config for requested gpio. Reference
+ * "linux/gpio.h" for config details.
+ * @consumer: The name of consumer, such as "sysfs",
+ * "powerkey". This is useful for other users to
+ * know who is using.
+ *
+ * Request gpio lines through the ioctl provided by chardev. User
+ * could call gpiotools_set_values() and gpiotools_get_values() to
+ * read and write respectively through the returned fd. Call
+ * gpiotools_release_line() to release these lines after that.
+ *
+ * Return: On success return the fd;
+ * On failure return the errno.
+ */
+int gpiotools_request_line(const char *device_name, unsigned int *lines,
+ unsigned int num_lines,
+ struct gpio_v2_line_config *config,
+ const char *consumer)
+{
+ struct gpio_v2_line_request req;
+ char *chrdev_name;
+ int fd;
+ int i;
+ int ret;
+
+ ret = asprintf(&chrdev_name, "/dev/%s", device_name);
+ if (ret < 0)
+ return -ENOMEM;
+
+ fd = open(chrdev_name, 0);
+ if (fd == -1) {
+ ret = -errno;
+ fprintf(stderr, "Failed to open %s, %s\n",
+ chrdev_name, strerror(errno));
+ goto exit_free_name;
+ }
+
+ memset(&req, 0, sizeof(req));
+ for (i = 0; i < num_lines; i++)
+ req.offsets[i] = lines[i];
+
+ req.config = *config;
+ strcpy(req.consumer, consumer);
+ req.num_lines = num_lines;
+
+ ret = ioctl(fd, GPIO_V2_GET_LINE_IOCTL, &req);
+ if (ret == -1) {
+ ret = -errno;
+ fprintf(stderr, "Failed to issue %s (%d), %s\n",
+ "GPIO_GET_LINE_IOCTL", ret, strerror(errno));
+ }
+
+ if (close(fd) == -1)
+ perror("Failed to close GPIO character device file");
+exit_free_name:
+ free(chrdev_name);
+ return ret < 0 ? ret : req.fd;
+}
+
/**
* gpiotools_set_values(): Set the value of gpio(s)
* @fd: The fd returned by
- * gpiotools_request_linehandle().
- * @data: The array of values want to set.
+ * gpiotools_request_line().
+ * @values: The array of values want to set.
*
* Return: On success return 0;
* On failure return the errno.
*/
-int gpiotools_set_values(const int fd, struct gpiohandle_data *data)
+int gpiotools_set_values(const int fd, struct gpio_v2_line_values *values)
{
int ret;
- ret = ioctl(fd, GPIOHANDLE_SET_LINE_VALUES_IOCTL, data);
+ ret = ioctl(fd, GPIO_V2_LINE_SET_VALUES_IOCTL, values);
if (ret == -1) {
ret = -errno;
fprintf(stderr, "Failed to issue %s (%d), %s\n",
@@ -127,17 +194,17 @@ int gpiotools_set_values(const int fd, struct gpiohandle_data *data)
/**
* gpiotools_get_values(): Get the value of gpio(s)
* @fd: The fd returned by
- * gpiotools_request_linehandle().
- * @data: The array of values get from hardware.
+ * gpiotools_request_line().
+ * @values: The array of values get from hardware.
*
* Return: On success return 0;
* On failure return the errno.
*/
-int gpiotools_get_values(const int fd, struct gpiohandle_data *data)
+int gpiotools_get_values(const int fd, struct gpio_v2_line_values *values)
{
int ret;
- ret = ioctl(fd, GPIOHANDLE_GET_LINE_VALUES_IOCTL, data);
+ ret = ioctl(fd, GPIO_V2_LINE_GET_VALUES_IOCTL, values);
if (ret == -1) {
ret = -errno;
fprintf(stderr, "Failed to issue %s (%d), %s\n",
@@ -170,6 +237,27 @@ int gpiotools_release_linehandle(const int fd)
}
/**
+ * gpiotools_release_line(): Release the line(s) of gpiochip
+ * @fd: The fd returned by
+ * gpiotools_request_line().
+ *
+ * Return: On success return 0;
+ * On failure return the errno.
+ */
+int gpiotools_release_line(const int fd)
+{
+ int ret;
+
+ ret = close(fd);
+ if (ret == -1) {
+ perror("Failed to close GPIO LINE device file");
+ ret = -errno;
+ }
+
+ return ret;
+}
+
+/**
* gpiotools_get(): Get value from specific line
* @device_name: The name of gpiochip without prefix "/dev/",
* such as "gpiochip0"
@@ -180,11 +268,14 @@ int gpiotools_release_linehandle(const int fd)
*/
int gpiotools_get(const char *device_name, unsigned int line)
{
- struct gpiohandle_data data;
+ int ret;
+ unsigned int value;
unsigned int lines[] = {line};
- gpiotools_gets(device_name, lines, 1, &data);
- return data.values[0];
+ ret = gpiotools_gets(device_name, lines, 1, &value);
+ if (ret)
+ return ret;
+ return value;
}
@@ -194,28 +285,36 @@ int gpiotools_get(const char *device_name, unsigned int line)
* such as "gpiochip0".
* @lines: An array desired lines, specified by offset
* index for the associated GPIO device.
- * @nline: The number of lines to request.
- * @data: The array of values get from gpiochip.
+ * @num_lines: The number of lines to request.
+ * @values: The array of values get from gpiochip.
*
* Return: On success return 0;
* On failure return the errno.
*/
int gpiotools_gets(const char *device_name, unsigned int *lines,
- unsigned int nlines, struct gpiohandle_data *data)
+ unsigned int num_lines, unsigned int *values)
{
- int fd;
+ int fd, i;
int ret;
int ret_close;
+ struct gpio_v2_line_config config;
+ struct gpio_v2_line_values lv;
- ret = gpiotools_request_linehandle(device_name, lines, nlines,
- GPIOHANDLE_REQUEST_INPUT, data,
- CONSUMER);
+ memset(&config, 0, sizeof(config));
+ config.flags = GPIO_V2_LINE_FLAG_INPUT;
+ ret = gpiotools_request_line(device_name, lines, num_lines,
+ &config, CONSUMER);
if (ret < 0)
return ret;
fd = ret;
- ret = gpiotools_get_values(fd, data);
- ret_close = gpiotools_release_linehandle(fd);
+ for (i = 0; i < num_lines; i++)
+ gpiotools_set_bit(&lv.mask, i);
+ ret = gpiotools_get_values(fd, &lv);
+ if (!ret)
+ for (i = 0; i < num_lines; i++)
+ values[i] = gpiotools_test_bit(lv.bits, i);
+ ret_close = gpiotools_release_line(fd);
return ret < 0 ? ret : ret_close;
}
@@ -232,11 +331,9 @@ int gpiotools_gets(const char *device_name, unsigned int *lines,
int gpiotools_set(const char *device_name, unsigned int line,
unsigned int value)
{
- struct gpiohandle_data data;
unsigned int lines[] = {line};
- data.values[0] = value;
- return gpiotools_sets(device_name, lines, 1, &data);
+ return gpiotools_sets(device_name, lines, 1, &value);
}
/**
@@ -245,23 +342,32 @@ int gpiotools_set(const char *device_name, unsigned int line,
* such as "gpiochip0".
* @lines: An array desired lines, specified by offset
* index for the associated GPIO device.
- * @nline: The number of lines to request.
- * @data: The array of values set to gpiochip, must be
+ * @num_lines: The number of lines to request.
+ * @value: The array of values set to gpiochip, must be
* 0(low) or 1(high).
*
* Return: On success return 0;
* On failure return the errno.
*/
int gpiotools_sets(const char *device_name, unsigned int *lines,
- unsigned int nlines, struct gpiohandle_data *data)
+ unsigned int num_lines, unsigned int *values)
{
- int ret;
+ int ret, i;
+ struct gpio_v2_line_config config;
- ret = gpiotools_request_linehandle(device_name, lines, nlines,
- GPIOHANDLE_REQUEST_OUTPUT, data,
- CONSUMER);
+ memset(&config, 0, sizeof(config));
+ config.flags = GPIO_V2_LINE_FLAG_OUTPUT;
+ config.num_attrs = 1;
+ config.attrs[0].attr.id = GPIO_V2_LINE_ATTR_ID_OUTPUT_VALUES;
+ for (i = 0; i < num_lines; i++) {
+ gpiotools_set_bit(&config.attrs[0].mask, i);
+ gpiotools_assign_bit(&config.attrs[0].attr.values,
+ i, values[i]);
+ }
+ ret = gpiotools_request_line(device_name, lines, num_lines,
+ &config, CONSUMER);
if (ret < 0)
return ret;
- return gpiotools_release_linehandle(ret);
+ return gpiotools_release_line(ret);
}
diff --git a/tools/gpio/gpio-utils.h b/tools/gpio/gpio-utils.h
index cf37f13f3dcb..6c69a9f1c253 100644
--- a/tools/gpio/gpio-utils.h
+++ b/tools/gpio/gpio-utils.h
@@ -12,7 +12,9 @@
#ifndef _GPIO_UTILS_H_
#define _GPIO_UTILS_H_
+#include <stdbool.h>
#include <string.h>
+#include <linux/types.h>
#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]))
@@ -23,19 +25,55 @@ static inline int check_prefix(const char *str, const char *prefix)
}
int gpiotools_request_linehandle(const char *device_name, unsigned int *lines,
- unsigned int nlines, unsigned int flag,
+ unsigned int num_lines, unsigned int flag,
struct gpiohandle_data *data,
const char *consumer_label);
-int gpiotools_set_values(const int fd, struct gpiohandle_data *data);
-int gpiotools_get_values(const int fd, struct gpiohandle_data *data);
int gpiotools_release_linehandle(const int fd);
+int gpiotools_request_line(const char *device_name,
+ unsigned int *lines,
+ unsigned int num_lines,
+ struct gpio_v2_line_config *config,
+ const char *consumer);
+int gpiotools_set_values(const int fd, struct gpio_v2_line_values *values);
+int gpiotools_get_values(const int fd, struct gpio_v2_line_values *values);
+int gpiotools_release_line(const int fd);
+
int gpiotools_get(const char *device_name, unsigned int line);
int gpiotools_gets(const char *device_name, unsigned int *lines,
- unsigned int nlines, struct gpiohandle_data *data);
+ unsigned int num_lines, unsigned int *values);
int gpiotools_set(const char *device_name, unsigned int line,
unsigned int value);
int gpiotools_sets(const char *device_name, unsigned int *lines,
- unsigned int nlines, struct gpiohandle_data *data);
+ unsigned int num_lines, unsigned int *values);
+
+/* helper functions for gpio_v2_line_values bits */
+static inline void gpiotools_set_bit(__u64 *b, int n)
+{
+ *b |= _BITULL(n);
+}
+
+static inline void gpiotools_change_bit(__u64 *b, int n)
+{
+ *b ^= _BITULL(n);
+}
+
+static inline void gpiotools_clear_bit(__u64 *b, int n)
+{
+ *b &= ~_BITULL(n);
+}
+
+static inline int gpiotools_test_bit(__u64 b, int n)
+{
+ return !!(b & _BITULL(n));
+}
+
+static inline void gpiotools_assign_bit(__u64 *b, int n, bool value)
+{
+ if (value)
+ gpiotools_set_bit(b, n);
+ else
+ gpiotools_clear_bit(b, n);
+}
#endif /* _GPIO_UTILS_H_ */
diff --git a/tools/gpio/gpio-watch.c b/tools/gpio/gpio-watch.c
index 5cea24fddfa7..f229ec62301b 100644
--- a/tools/gpio/gpio-watch.c
+++ b/tools/gpio/gpio-watch.c
@@ -21,8 +21,8 @@
int main(int argc, char **argv)
{
- struct gpioline_info_changed chg;
- struct gpioline_info req;
+ struct gpio_v2_line_info_changed chg;
+ struct gpio_v2_line_info req;
struct pollfd pfd;
int fd, i, j, ret;
char *event, *end;
@@ -40,11 +40,11 @@ int main(int argc, char **argv)
for (i = 0, j = 2; i < argc - 2; i++, j++) {
memset(&req, 0, sizeof(req));
- req.line_offset = strtoul(argv[j], &end, 0);
+ req.offset = strtoul(argv[j], &end, 0);
if (*end != '\0')
goto err_usage;
- ret = ioctl(fd, GPIO_GET_LINEINFO_WATCH_IOCTL, &req);
+ ret = ioctl(fd, GPIO_V2_GET_LINEINFO_WATCH_IOCTL, &req);
if (ret) {
perror("unable to set up line watch");
return EXIT_FAILURE;
@@ -71,13 +71,13 @@ int main(int argc, char **argv)
}
switch (chg.event_type) {
- case GPIOLINE_CHANGED_REQUESTED:
+ case GPIO_V2_LINE_CHANGED_REQUESTED:
event = "requested";
break;
- case GPIOLINE_CHANGED_RELEASED:
+ case GPIO_V2_LINE_CHANGED_RELEASED:
event = "released";
break;
- case GPIOLINE_CHANGED_CONFIG:
+ case GPIO_V2_LINE_CHANGED_CONFIG:
event = "config changed";
break;
default:
@@ -87,7 +87,7 @@ int main(int argc, char **argv)
}
printf("line %u: %s at %llu\n",
- chg.info.line_offset, event, chg.timestamp);
+ chg.info.offset, event, chg.timestamp_ns);
}
}
diff --git a/tools/gpio/lsgpio.c b/tools/gpio/lsgpio.c
index b08d7a5e779b..5a05a454d0c9 100644
--- a/tools/gpio/lsgpio.c
+++ b/tools/gpio/lsgpio.c
@@ -25,57 +25,73 @@
struct gpio_flag {
char *name;
- unsigned long mask;
+ unsigned long long mask;
};
struct gpio_flag flagnames[] = {
{
- .name = "kernel",
- .mask = GPIOLINE_FLAG_KERNEL,
+ .name = "used",
+ .mask = GPIO_V2_LINE_FLAG_USED,
+ },
+ {
+ .name = "input",
+ .mask = GPIO_V2_LINE_FLAG_INPUT,
},
{
.name = "output",
- .mask = GPIOLINE_FLAG_IS_OUT,
+ .mask = GPIO_V2_LINE_FLAG_OUTPUT,
},
{
.name = "active-low",
- .mask = GPIOLINE_FLAG_ACTIVE_LOW,
+ .mask = GPIO_V2_LINE_FLAG_ACTIVE_LOW,
},
{
.name = "open-drain",
- .mask = GPIOLINE_FLAG_OPEN_DRAIN,
+ .mask = GPIO_V2_LINE_FLAG_OPEN_DRAIN,
},
{
.name = "open-source",
- .mask = GPIOLINE_FLAG_OPEN_SOURCE,
+ .mask = GPIO_V2_LINE_FLAG_OPEN_SOURCE,
},
{
.name = "pull-up",
- .mask = GPIOLINE_FLAG_BIAS_PULL_UP,
+ .mask = GPIO_V2_LINE_FLAG_BIAS_PULL_UP,
},
{
.name = "pull-down",
- .mask = GPIOLINE_FLAG_BIAS_PULL_DOWN,
+ .mask = GPIO_V2_LINE_FLAG_BIAS_PULL_DOWN,
},
{
.name = "bias-disabled",
- .mask = GPIOLINE_FLAG_BIAS_DISABLE,
+ .mask = GPIO_V2_LINE_FLAG_BIAS_DISABLED,
},
};
-void print_flags(unsigned long flags)
+static void print_attributes(struct gpio_v2_line_info *info)
{
int i;
- int printed = 0;
+ const char *field_format = "%s";
for (i = 0; i < ARRAY_SIZE(flagnames); i++) {
- if (flags & flagnames[i].mask) {
- if (printed)
- fprintf(stdout, " ");
- fprintf(stdout, "%s", flagnames[i].name);
- printed++;
+ if (info->flags & flagnames[i].mask) {
+ fprintf(stdout, field_format, flagnames[i].name);
+ field_format = ", %s";
}
}
+
+ if ((info->flags & GPIO_V2_LINE_FLAG_EDGE_RISING) &&
+ (info->flags & GPIO_V2_LINE_FLAG_EDGE_FALLING))
+ fprintf(stdout, field_format, "both-edges");
+ else if (info->flags & GPIO_V2_LINE_FLAG_EDGE_RISING)
+ fprintf(stdout, field_format, "rising-edge");
+ else if (info->flags & GPIO_V2_LINE_FLAG_EDGE_FALLING)
+ fprintf(stdout, field_format, "falling-edge");
+
+ for (i = 0; i < info->num_attrs; i++) {
+ if (info->attrs[i].id == GPIO_V2_LINE_ATTR_ID_DEBOUNCE)
+ fprintf(stdout, ", debounce_period=%dusec",
+ info->attrs[0].debounce_period_us);
+ }
}
int list_device(const char *device_name)
@@ -109,18 +125,18 @@ int list_device(const char *device_name)
/* Loop over the lines and print info */
for (i = 0; i < cinfo.lines; i++) {
- struct gpioline_info linfo;
+ struct gpio_v2_line_info linfo;
memset(&linfo, 0, sizeof(linfo));
- linfo.line_offset = i;
+ linfo.offset = i;
- ret = ioctl(fd, GPIO_GET_LINEINFO_IOCTL, &linfo);
+ ret = ioctl(fd, GPIO_V2_GET_LINEINFO_IOCTL, &linfo);
if (ret == -1) {
ret = -errno;
perror("Failed to issue LINEINFO IOCTL\n");
goto exit_close_error;
}
- fprintf(stdout, "\tline %2d:", linfo.line_offset);
+ fprintf(stdout, "\tline %2d:", linfo.offset);
if (linfo.name[0])
fprintf(stdout, " \"%s\"", linfo.name);
else
@@ -131,7 +147,7 @@ int list_device(const char *device_name)
fprintf(stdout, " unused");
if (linfo.flags) {
fprintf(stdout, " [");
- print_flags(linfo.flags);
+ print_attributes(&linfo);
fprintf(stdout, "]");
}
fprintf(stdout, "\n");
diff --git a/tools/iio/iio_event_monitor.c b/tools/iio/iio_event_monitor.c
index f115d166c985..bb03859db89d 100644
--- a/tools/iio/iio_event_monitor.c
+++ b/tools/iio/iio_event_monitor.c
@@ -119,6 +119,7 @@ static const char * const iio_modifier_names[] = {
[IIO_MOD_PM2P5] = "pm2p5",
[IIO_MOD_PM4] = "pm4",
[IIO_MOD_PM10] = "pm10",
+ [IIO_MOD_O2] = "o2",
};
static bool event_is_known(struct iio_event_data *event)
@@ -211,6 +212,7 @@ static bool event_is_known(struct iio_event_data *event)
case IIO_MOD_PM2P5:
case IIO_MOD_PM4:
case IIO_MOD_PM10:
+ case IIO_MOD_O2:
break;
default:
return false;
diff --git a/tools/include/linux/jhash.h b/tools/include/linux/jhash.h
index 348c6f47e4cc..af8d0fe1c6ce 100644
--- a/tools/include/linux/jhash.h
+++ b/tools/include/linux/jhash.h
@@ -5,7 +5,7 @@
*
* Copyright (C) 2006. Bob Jenkins (bob_jenkins@burtleburtle.net)
*
- * http://burtleburtle.net/bob/hash/
+ * https://burtleburtle.net/bob/hash/
*
* These are the credits from Bob's sources:
*
diff --git a/tools/include/linux/objtool.h b/tools/include/linux/objtool.h
new file mode 100644
index 000000000000..ab82c793c897
--- /dev/null
+++ b/tools/include/linux/objtool.h
@@ -0,0 +1,129 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _LINUX_OBJTOOL_H
+#define _LINUX_OBJTOOL_H
+
+#ifndef __ASSEMBLY__
+
+#include <linux/types.h>
+
+/*
+ * This struct is used by asm and inline asm code to manually annotate the
+ * location of registers on the stack.
+ */
+struct unwind_hint {
+ u32 ip;
+ s16 sp_offset;
+ u8 sp_reg;
+ u8 type;
+ u8 end;
+};
+#endif
+
+/*
+ * UNWIND_HINT_TYPE_CALL: Indicates that sp_reg+sp_offset resolves to PREV_SP
+ * (the caller's SP right before it made the call). Used for all callable
+ * functions, i.e. all C code and all callable asm functions.
+ *
+ * UNWIND_HINT_TYPE_REGS: Used in entry code to indicate that sp_reg+sp_offset
+ * points to a fully populated pt_regs from a syscall, interrupt, or exception.
+ *
+ * UNWIND_HINT_TYPE_REGS_PARTIAL: Used in entry code to indicate that
+ * sp_reg+sp_offset points to the iret return frame.
+ */
+#define UNWIND_HINT_TYPE_CALL 0
+#define UNWIND_HINT_TYPE_REGS 1
+#define UNWIND_HINT_TYPE_REGS_PARTIAL 2
+#define UNWIND_HINT_TYPE_RET_OFFSET 3
+
+#ifdef CONFIG_STACK_VALIDATION
+
+#ifndef __ASSEMBLY__
+
+#define UNWIND_HINT(sp_reg, sp_offset, type, end) \
+ "987: \n\t" \
+ ".pushsection .discard.unwind_hints\n\t" \
+ /* struct unwind_hint */ \
+ ".long 987b - .\n\t" \
+ ".short " __stringify(sp_offset) "\n\t" \
+ ".byte " __stringify(sp_reg) "\n\t" \
+ ".byte " __stringify(type) "\n\t" \
+ ".byte " __stringify(end) "\n\t" \
+ ".balign 4 \n\t" \
+ ".popsection\n\t"
+
+/*
+ * This macro marks the given function's stack frame as "non-standard", which
+ * tells objtool to ignore the function when doing stack metadata validation.
+ * It should only be used in special cases where you're 100% sure it won't
+ * affect the reliability of frame pointers and kernel stack traces.
+ *
+ * For more information, see tools/objtool/Documentation/stack-validation.txt.
+ */
+#define STACK_FRAME_NON_STANDARD(func) \
+ static void __used __section(.discard.func_stack_frame_non_standard) \
+ *__func_stack_frame_non_standard_##func = func
+
+#else /* __ASSEMBLY__ */
+
+/*
+ * This macro indicates that the following intra-function call is valid.
+ * Any non-annotated intra-function call will cause objtool to issue a warning.
+ */
+#define ANNOTATE_INTRA_FUNCTION_CALL \
+ 999: \
+ .pushsection .discard.intra_function_calls; \
+ .long 999b; \
+ .popsection;
+
+/*
+ * In asm, there are two kinds of code: normal C-type callable functions and
+ * the rest. The normal callable functions can be called by other code, and
+ * don't do anything unusual with the stack. Such normal callable functions
+ * are annotated with the ENTRY/ENDPROC macros. Most asm code falls in this
+ * category. In this case, no special debugging annotations are needed because
+ * objtool can automatically generate the ORC data for the ORC unwinder to read
+ * at runtime.
+ *
+ * Anything which doesn't fall into the above category, such as syscall and
+ * interrupt handlers, tends to not be called directly by other functions, and
+ * often does unusual non-C-function-type things with the stack pointer. Such
+ * code needs to be annotated such that objtool can understand it. The
+ * following CFI hint macros are for this type of code.
+ *
+ * These macros provide hints to objtool about the state of the stack at each
+ * instruction. Objtool starts from the hints and follows the code flow,
+ * making automatic CFI adjustments when it sees pushes and pops, filling out
+ * the debuginfo as necessary. It will also warn if it sees any
+ * inconsistencies.
+ */
+.macro UNWIND_HINT sp_reg:req sp_offset=0 type:req end=0
+.Lunwind_hint_ip_\@:
+ .pushsection .discard.unwind_hints
+ /* struct unwind_hint */
+ .long .Lunwind_hint_ip_\@ - .
+ .short \sp_offset
+ .byte \sp_reg
+ .byte \type
+ .byte \end
+ .balign 4
+ .popsection
+.endm
+
+#endif /* __ASSEMBLY__ */
+
+#else /* !CONFIG_STACK_VALIDATION */
+
+#ifndef __ASSEMBLY__
+
+#define UNWIND_HINT(sp_reg, sp_offset, type, end) \
+ "\n\t"
+#define STACK_FRAME_NON_STANDARD(func)
+#else
+#define ANNOTATE_INTRA_FUNCTION_CALL
+.macro UNWIND_HINT sp_reg:req sp_offset=0 type:req end=0
+.endm
+#endif
+
+#endif /* CONFIG_STACK_VALIDATION */
+
+#endif /* _LINUX_OBJTOOL_H */
diff --git a/tools/include/linux/static_call_types.h b/tools/include/linux/static_call_types.h
new file mode 100644
index 000000000000..89135bb35bf7
--- /dev/null
+++ b/tools/include/linux/static_call_types.h
@@ -0,0 +1,35 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _STATIC_CALL_TYPES_H
+#define _STATIC_CALL_TYPES_H
+
+#include <linux/types.h>
+#include <linux/stringify.h>
+
+#define STATIC_CALL_KEY_PREFIX __SCK__
+#define STATIC_CALL_KEY_PREFIX_STR __stringify(STATIC_CALL_KEY_PREFIX)
+#define STATIC_CALL_KEY_PREFIX_LEN (sizeof(STATIC_CALL_KEY_PREFIX_STR) - 1)
+#define STATIC_CALL_KEY(name) __PASTE(STATIC_CALL_KEY_PREFIX, name)
+
+#define STATIC_CALL_TRAMP_PREFIX __SCT__
+#define STATIC_CALL_TRAMP_PREFIX_STR __stringify(STATIC_CALL_TRAMP_PREFIX)
+#define STATIC_CALL_TRAMP_PREFIX_LEN (sizeof(STATIC_CALL_TRAMP_PREFIX_STR) - 1)
+#define STATIC_CALL_TRAMP(name) __PASTE(STATIC_CALL_TRAMP_PREFIX, name)
+#define STATIC_CALL_TRAMP_STR(name) __stringify(STATIC_CALL_TRAMP(name))
+
+/*
+ * Flags in the low bits of static_call_site::key.
+ */
+#define STATIC_CALL_SITE_TAIL 1UL /* tail call */
+#define STATIC_CALL_SITE_INIT 2UL /* init section */
+#define STATIC_CALL_SITE_FLAGS 3UL
+
+/*
+ * The static call site table needs to be created by external tooling (objtool
+ * or a compiler plugin).
+ */
+struct static_call_site {
+ s32 addr;
+ s32 key;
+};
+
+#endif /* _STATIC_CALL_TYPES_H */
diff --git a/tools/include/uapi/asm-generic/unistd.h b/tools/include/uapi/asm-generic/unistd.h
index c8c189a5f0a6..f2b5d72a46c2 100644
--- a/tools/include/uapi/asm-generic/unistd.h
+++ b/tools/include/uapi/asm-generic/unistd.h
@@ -140,7 +140,7 @@ __SYSCALL(__NR_renameat, sys_renameat)
#define __NR_umount2 39
__SYSCALL(__NR_umount2, sys_umount)
#define __NR_mount 40
-__SC_COMP(__NR_mount, sys_mount, compat_sys_mount)
+__SYSCALL(__NR_mount, sys_mount)
#define __NR_pivot_root 41
__SYSCALL(__NR_pivot_root, sys_pivot_root)
@@ -207,9 +207,9 @@ __SYSCALL(__NR_read, sys_read)
#define __NR_write 64
__SYSCALL(__NR_write, sys_write)
#define __NR_readv 65
-__SC_COMP(__NR_readv, sys_readv, compat_sys_readv)
+__SC_COMP(__NR_readv, sys_readv, sys_readv)
#define __NR_writev 66
-__SC_COMP(__NR_writev, sys_writev, compat_sys_writev)
+__SC_COMP(__NR_writev, sys_writev, sys_writev)
#define __NR_pread64 67
__SC_COMP(__NR_pread64, sys_pread64, compat_sys_pread64)
#define __NR_pwrite64 68
@@ -237,7 +237,7 @@ __SC_COMP(__NR_signalfd4, sys_signalfd4, compat_sys_signalfd4)
/* fs/splice.c */
#define __NR_vmsplice 75
-__SC_COMP(__NR_vmsplice, sys_vmsplice, compat_sys_vmsplice)
+__SYSCALL(__NR_vmsplice, sys_vmsplice)
#define __NR_splice 76
__SYSCALL(__NR_splice, sys_splice)
#define __NR_tee 77
@@ -727,11 +727,9 @@ __SYSCALL(__NR_setns, sys_setns)
#define __NR_sendmmsg 269
__SC_COMP(__NR_sendmmsg, sys_sendmmsg, compat_sys_sendmmsg)
#define __NR_process_vm_readv 270
-__SC_COMP(__NR_process_vm_readv, sys_process_vm_readv, \
- compat_sys_process_vm_readv)
+__SYSCALL(__NR_process_vm_readv, sys_process_vm_readv)
#define __NR_process_vm_writev 271
-__SC_COMP(__NR_process_vm_writev, sys_process_vm_writev, \
- compat_sys_process_vm_writev)
+__SYSCALL(__NR_process_vm_writev, sys_process_vm_writev)
#define __NR_kcmp 272
__SYSCALL(__NR_kcmp, sys_kcmp)
#define __NR_finit_module 273
@@ -850,6 +848,8 @@ __SYSCALL(__NR_pidfd_open, sys_pidfd_open)
#define __NR_clone3 435
__SYSCALL(__NR_clone3, sys_clone3)
#endif
+#define __NR_close_range 436
+__SYSCALL(__NR_close_range, sys_close_range)
#define __NR_openat2 437
__SYSCALL(__NR_openat2, sys_openat2)
diff --git a/tools/include/uapi/drm/i915_drm.h b/tools/include/uapi/drm/i915_drm.h
index 14b67cd6b54b..00546062e023 100644
--- a/tools/include/uapi/drm/i915_drm.h
+++ b/tools/include/uapi/drm/i915_drm.h
@@ -55,7 +55,7 @@ extern "C" {
* cause the related events to not be seen.
*
* I915_RESET_UEVENT - Event is generated just before an attempt to reset the
- * the GPU. The value supplied with the event is always 1. NOTE: Disable
+ * GPU. The value supplied with the event is always 1. NOTE: Disable
* reset via module parameter will cause this event to not be seen.
*/
#define I915_L3_PARITY_UEVENT "L3_PARITY_ERROR"
@@ -1934,7 +1934,7 @@ enum drm_i915_perf_property_id {
/**
* The value specifies which set of OA unit metrics should be
- * be configured, defining the contents of any OA unit reports.
+ * configured, defining the contents of any OA unit reports.
*
* This property is available in perf revision 1.
*/
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index b134e679e9db..b6238b2209b7 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -81,6 +81,12 @@ struct bpf_cgroup_storage_key {
__u32 attach_type; /* program attach type */
};
+union bpf_iter_link_info {
+ struct {
+ __u32 map_fd;
+ } map;
+};
+
/* BPF syscall commands, see bpf(2) man-page for details. */
enum bpf_cmd {
BPF_MAP_CREATE,
@@ -249,13 +255,6 @@ enum bpf_link_type {
MAX_BPF_LINK_TYPE,
};
-enum bpf_iter_link_info {
- BPF_ITER_LINK_UNSPEC = 0,
- BPF_ITER_LINK_MAP_FD = 1,
-
- MAX_BPF_ITER_LINK_INFO,
-};
-
/* cgroup-bpf attach flags used in BPF_PROG_ATTACH command
*
* NONE(default): No further bpf programs allowed in the subtree.
@@ -623,6 +622,8 @@ union bpf_attr {
};
__u32 attach_type; /* attach type */
__u32 flags; /* extra flags */
+ __aligned_u64 iter_info; /* extra bpf_iter_link_info */
+ __u32 iter_info_len; /* iter_info length */
} link_create;
struct { /* struct used by BPF_LINK_UPDATE command */
@@ -766,7 +767,7 @@ union bpf_attr {
*
* Also, note that **bpf_trace_printk**\ () is slow, and should
* only be used for debugging purposes. For this reason, a notice
- * bloc (spanning several lines) is printed to kernel logs and
+ * block (spanning several lines) is printed to kernel logs and
* states that the helper should not be used "for production use"
* the first time this helper is used (or more precisely, when
* **trace_printk**\ () buffers are allocated). For passing values
@@ -1032,14 +1033,14 @@ union bpf_attr {
*
* int ret;
* struct bpf_tunnel_key key = {};
- *
+ *
* ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key), 0);
* if (ret < 0)
* return TC_ACT_SHOT; // drop packet
- *
+ *
* if (key.remote_ipv4 != 0x0a000001)
* return TC_ACT_SHOT; // drop packet
- *
+ *
* return TC_ACT_OK; // accept packet
*
* This interface can also be used with all encapsulation devices
@@ -1146,7 +1147,7 @@ union bpf_attr {
* Description
* Retrieve the realm or the route, that is to say the
* **tclassid** field of the destination for the *skb*. The
- * indentifier retrieved is a user-provided tag, similar to the
+ * identifier retrieved is a user-provided tag, similar to the
* one used with the net_cls cgroup (see description for
* **bpf_get_cgroup_classid**\ () helper), but here this tag is
* held by a route (a destination entry), not by a task.
diff --git a/tools/include/uapi/linux/in.h b/tools/include/uapi/linux/in.h
index 8533bf07450f..7d6687618d80 100644
--- a/tools/include/uapi/linux/in.h
+++ b/tools/include/uapi/linux/in.h
@@ -123,6 +123,7 @@ struct in_addr {
#define IP_CHECKSUM 23
#define IP_BIND_ADDRESS_NO_PORT 24
#define IP_RECVFRAGSIZE 25
+#define IP_RECVERR_RFC4884 26
/* IP_MTU_DISCOVER values */
#define IP_PMTUDISC_DONT 0 /* Never send DF frames */
@@ -134,7 +135,7 @@ struct in_addr {
* this socket to prevent accepting spoofed ones.
*/
#define IP_PMTUDISC_INTERFACE 4
-/* weaker version of IP_PMTUDISC_INTERFACE, which allos packets to get
+/* weaker version of IP_PMTUDISC_INTERFACE, which allows packets to get
* fragmented if they exeed the interface mtu
*/
#define IP_PMTUDISC_OMIT 5
diff --git a/tools/include/uapi/linux/kvm.h b/tools/include/uapi/linux/kvm.h
index 4fdf30316582..7d8eced6f459 100644
--- a/tools/include/uapi/linux/kvm.h
+++ b/tools/include/uapi/linux/kvm.h
@@ -289,6 +289,7 @@ struct kvm_run {
/* KVM_EXIT_FAIL_ENTRY */
struct {
__u64 hardware_entry_failure_reason;
+ __u32 cpu;
} fail_entry;
/* KVM_EXIT_EXCEPTION */
struct {
@@ -789,9 +790,10 @@ struct kvm_ppc_resize_hpt {
#define KVM_VM_PPC_HV 1
#define KVM_VM_PPC_PR 2
-/* on MIPS, 0 forces trap & emulate, 1 forces VZ ASE */
-#define KVM_VM_MIPS_TE 0
+/* on MIPS, 0 indicates auto, 1 forces VZ ASE, 2 forces trap & emulate */
+#define KVM_VM_MIPS_AUTO 0
#define KVM_VM_MIPS_VZ 1
+#define KVM_VM_MIPS_TE 2
#define KVM_S390_SIE_PAGE_OFFSET 1
@@ -1031,6 +1033,10 @@ struct kvm_ppc_resize_hpt {
#define KVM_CAP_PPC_SECURE_GUEST 181
#define KVM_CAP_HALT_POLL 182
#define KVM_CAP_ASYNC_PF_INT 183
+#define KVM_CAP_LAST_CPU 184
+#define KVM_CAP_SMALLER_MAXPHYADDR 185
+#define KVM_CAP_S390_DIAG318 186
+#define KVM_CAP_STEAL_TIME 187
#ifdef KVM_CAP_IRQ_ROUTING
diff --git a/tools/include/uapi/linux/perf_event.h b/tools/include/uapi/linux/perf_event.h
index 21a1edd08cbe..3e5dcdd48a49 100644
--- a/tools/include/uapi/linux/perf_event.h
+++ b/tools/include/uapi/linux/perf_event.h
@@ -383,7 +383,8 @@ struct perf_event_attr {
bpf_event : 1, /* include bpf events */
aux_output : 1, /* generate AUX records instead of events */
cgroup : 1, /* include cgroup events */
- __reserved_1 : 31;
+ text_poke : 1, /* include text poke events */
+ __reserved_1 : 30;
union {
__u32 wakeup_events; /* wakeup every n events */
@@ -1041,12 +1042,35 @@ enum perf_event_type {
*/
PERF_RECORD_CGROUP = 19,
+ /*
+ * Records changes to kernel text i.e. self-modified code. 'old_len' is
+ * the number of old bytes, 'new_len' is the number of new bytes. Either
+ * 'old_len' or 'new_len' may be zero to indicate, for example, the
+ * addition or removal of a trampoline. 'bytes' contains the old bytes
+ * followed immediately by the new bytes.
+ *
+ * struct {
+ * struct perf_event_header header;
+ * u64 addr;
+ * u16 old_len;
+ * u16 new_len;
+ * u8 bytes[];
+ * struct sample_id sample_id;
+ * };
+ */
+ PERF_RECORD_TEXT_POKE = 20,
+
PERF_RECORD_MAX, /* non-ABI */
};
enum perf_record_ksymbol_type {
PERF_RECORD_KSYMBOL_TYPE_UNKNOWN = 0,
PERF_RECORD_KSYMBOL_TYPE_BPF = 1,
+ /*
+ * Out of line code such as kprobe-replaced instructions or optimized
+ * kprobes or ftrace trampolines.
+ */
+ PERF_RECORD_KSYMBOL_TYPE_OOL = 2,
PERF_RECORD_KSYMBOL_TYPE_MAX /* non-ABI */
};
@@ -1172,7 +1196,7 @@ union perf_mem_data_src {
#define PERF_MEM_SNOOPX_FWD 0x01 /* forward */
/* 1 free */
-#define PERF_MEM_SNOOPX_SHIFT 37
+#define PERF_MEM_SNOOPX_SHIFT 38
/* locked instruction */
#define PERF_MEM_LOCK_NA 0x01 /* not available */
diff --git a/tools/include/uapi/linux/vhost.h b/tools/include/uapi/linux/vhost.h
index 0c2349612e77..75232185324a 100644
--- a/tools/include/uapi/linux/vhost.h
+++ b/tools/include/uapi/linux/vhost.h
@@ -91,6 +91,8 @@
/* Use message type V2 */
#define VHOST_BACKEND_F_IOTLB_MSG_V2 0x1
+/* IOTLB can accept batching hints */
+#define VHOST_BACKEND_F_IOTLB_BATCH 0x2
#define VHOST_SET_BACKEND_FEATURES _IOW(VHOST_VIRTIO, 0x25, __u64)
#define VHOST_GET_BACKEND_FEATURES _IOR(VHOST_VIRTIO, 0x26, __u64)
diff --git a/tools/io_uring/io_uring-bench.c b/tools/io_uring/io_uring-bench.c
index 0f257139b003..7703f0118385 100644
--- a/tools/io_uring/io_uring-bench.c
+++ b/tools/io_uring/io_uring-bench.c
@@ -130,7 +130,7 @@ static int io_uring_register_files(struct submitter *s)
s->nr_files);
}
-static int gettid(void)
+static int lk_gettid(void)
{
return syscall(__NR_gettid);
}
@@ -281,7 +281,7 @@ static void *submitter_fn(void *data)
struct io_sq_ring *ring = &s->sq_ring;
int ret, prepped;
- printf("submitter=%d\n", gettid());
+ printf("submitter=%d\n", lk_gettid());
srand48_r(pthread_self(), &s->rand);
diff --git a/tools/lib/api/fd/array.c b/tools/lib/api/fd/array.c
index 58d44d5eee31..5e6cb9debe37 100644
--- a/tools/lib/api/fd/array.c
+++ b/tools/lib/api/fd/array.c
@@ -8,6 +8,7 @@
#include <poll.h>
#include <stdlib.h>
#include <unistd.h>
+#include <string.h>
void fdarray__init(struct fdarray *fda, int nr_autogrow)
{
@@ -19,7 +20,7 @@ void fdarray__init(struct fdarray *fda, int nr_autogrow)
int fdarray__grow(struct fdarray *fda, int nr)
{
- void *priv;
+ struct priv *priv;
int nr_alloc = fda->nr_alloc + nr;
size_t psize = sizeof(fda->priv[0]) * nr_alloc;
size_t size = sizeof(struct pollfd) * nr_alloc;
@@ -34,6 +35,9 @@ int fdarray__grow(struct fdarray *fda, int nr)
return -ENOMEM;
}
+ memset(&entries[fda->nr_alloc], 0, sizeof(struct pollfd) * nr);
+ memset(&priv[fda->nr_alloc], 0, sizeof(fda->priv[0]) * nr);
+
fda->nr_alloc = nr_alloc;
fda->entries = entries;
fda->priv = priv;
@@ -69,7 +73,7 @@ void fdarray__delete(struct fdarray *fda)
free(fda);
}
-int fdarray__add(struct fdarray *fda, int fd, short revents)
+int fdarray__add(struct fdarray *fda, int fd, short revents, enum fdarray_flags flags)
{
int pos = fda->nr;
@@ -79,6 +83,7 @@ int fdarray__add(struct fdarray *fda, int fd, short revents)
fda->entries[fda->nr].fd = fd;
fda->entries[fda->nr].events = revents;
+ fda->priv[fda->nr].flags = flags;
fda->nr++;
return pos;
}
@@ -93,22 +98,22 @@ int fdarray__filter(struct fdarray *fda, short revents,
return 0;
for (fd = 0; fd < fda->nr; ++fd) {
+ if (!fda->entries[fd].events)
+ continue;
+
if (fda->entries[fd].revents & revents) {
if (entry_destructor)
entry_destructor(fda, fd, arg);
+ fda->entries[fd].revents = fda->entries[fd].events = 0;
continue;
}
- if (fd != nr) {
- fda->entries[nr] = fda->entries[fd];
- fda->priv[nr] = fda->priv[fd];
- }
-
- ++nr;
+ if (!(fda->priv[fd].flags & fdarray_flag__nonfilterable))
+ ++nr;
}
- return fda->nr = nr;
+ return nr;
}
int fdarray__poll(struct fdarray *fda, int timeout)
diff --git a/tools/lib/api/fd/array.h b/tools/lib/api/fd/array.h
index b39557d1a88f..7fcf21a33c0c 100644
--- a/tools/lib/api/fd/array.h
+++ b/tools/lib/api/fd/array.h
@@ -21,19 +21,27 @@ struct fdarray {
int nr_alloc;
int nr_autogrow;
struct pollfd *entries;
- union {
- int idx;
- void *ptr;
+ struct priv {
+ union {
+ int idx;
+ void *ptr;
+ };
+ unsigned int flags;
} *priv;
};
+enum fdarray_flags {
+ fdarray_flag__default = 0x00000000,
+ fdarray_flag__nonfilterable = 0x00000001
+};
+
void fdarray__init(struct fdarray *fda, int nr_autogrow);
void fdarray__exit(struct fdarray *fda);
struct fdarray *fdarray__new(int nr_alloc, int nr_autogrow);
void fdarray__delete(struct fdarray *fda);
-int fdarray__add(struct fdarray *fda, int fd, short revents);
+int fdarray__add(struct fdarray *fda, int fd, short revents, enum fdarray_flags flags);
int fdarray__poll(struct fdarray *fda, int timeout);
int fdarray__filter(struct fdarray *fda, short revents,
void (*entry_destructor)(struct fdarray *fda, int fd, void *arg),
diff --git a/tools/lib/bpf/Makefile b/tools/lib/bpf/Makefile
index bf8ed134cb8a..9ae8f4ef0aac 100644
--- a/tools/lib/bpf/Makefile
+++ b/tools/lib/bpf/Makefile
@@ -59,7 +59,7 @@ FEATURE_USER = .libbpf
FEATURE_TESTS = libelf libelf-mmap zlib bpf reallocarray
FEATURE_DISPLAY = libelf zlib bpf
-INCLUDES = -I. -I$(srctree)/tools/include -I$(srctree)/tools/arch/$(ARCH)/include/uapi -I$(srctree)/tools/include/uapi
+INCLUDES = -I. -I$(srctree)/tools/include -I$(srctree)/tools/include/uapi
FEATURE_CHECK_CFLAGS-bpf = $(INCLUDES)
check_feat := 1
@@ -152,6 +152,7 @@ GLOBAL_SYM_COUNT = $(shell readelf -s --wide $(BPF_IN_SHARED) | \
awk '/GLOBAL/ && /DEFAULT/ && !/UND/ {print $$NF}' | \
sort -u | wc -l)
VERSIONED_SYM_COUNT = $(shell readelf --dyn-syms --wide $(OUTPUT)libbpf.so | \
+ awk '/GLOBAL/ && /DEFAULT/ && !/UND/ {print $$NF}' | \
grep -Eo '[^ ]+@LIBBPF_' | cut -d@ -f1 | sort -u | wc -l)
CMD_TARGETS = $(LIB_TARGET) $(PC_FILE)
@@ -219,6 +220,7 @@ check_abi: $(OUTPUT)libbpf.so
awk '/GLOBAL/ && /DEFAULT/ && !/UND/ {print $$NF}'| \
sort -u > $(OUTPUT)libbpf_global_syms.tmp; \
readelf --dyn-syms --wide $(OUTPUT)libbpf.so | \
+ awk '/GLOBAL/ && /DEFAULT/ && !/UND/ {print $$NF}'| \
grep -Eo '[^ ]+@LIBBPF_' | cut -d@ -f1 | \
sort -u > $(OUTPUT)libbpf_versioned_syms.tmp; \
diff -u $(OUTPUT)libbpf_global_syms.tmp \
diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c
index eab14c97c15d..0750681057c2 100644
--- a/tools/lib/bpf/bpf.c
+++ b/tools/lib/bpf/bpf.c
@@ -599,6 +599,9 @@ int bpf_link_create(int prog_fd, int target_fd,
attr.link_create.target_fd = target_fd;
attr.link_create.attach_type = attach_type;
attr.link_create.flags = OPTS_GET(opts, flags, 0);
+ attr.link_create.iter_info =
+ ptr_to_u64(OPTS_GET(opts, iter_info, (void *)0));
+ attr.link_create.iter_info_len = OPTS_GET(opts, iter_info_len, 0);
return sys_bpf(BPF_LINK_CREATE, &attr, sizeof(attr));
}
diff --git a/tools/lib/bpf/bpf.h b/tools/lib/bpf/bpf.h
index 28855fd5b5f4..015d13f25fcc 100644
--- a/tools/lib/bpf/bpf.h
+++ b/tools/lib/bpf/bpf.h
@@ -168,11 +168,14 @@ LIBBPF_API int bpf_prog_detach(int attachable_fd, enum bpf_attach_type type);
LIBBPF_API int bpf_prog_detach2(int prog_fd, int attachable_fd,
enum bpf_attach_type type);
+union bpf_iter_link_info; /* defined in up-to-date linux/bpf.h */
struct bpf_link_create_opts {
size_t sz; /* size of this struct for forward/backward compatibility */
__u32 flags;
+ union bpf_iter_link_info *iter_info;
+ __u32 iter_info_len;
};
-#define bpf_link_create_opts__last_field flags
+#define bpf_link_create_opts__last_field iter_info_len
LIBBPF_API int bpf_link_create(int prog_fd, int target_fd,
enum bpf_attach_type attach_type,
diff --git a/tools/lib/bpf/bpf_helpers.h b/tools/lib/bpf/bpf_helpers.h
index bc14db706b88..e9a4ecddb7a5 100644
--- a/tools/lib/bpf/bpf_helpers.h
+++ b/tools/lib/bpf/bpf_helpers.h
@@ -40,7 +40,7 @@
* Helper macro to manipulate data structures
*/
#ifndef offsetof
-#define offsetof(TYPE, MEMBER) __builtin_offsetof(TYPE, MEMBER)
+#define offsetof(TYPE, MEMBER) ((unsigned long)&((TYPE *)0)->MEMBER)
#endif
#ifndef container_of
#define container_of(ptr, type, member) \
diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c
index 856b09a04563..6bdbc389b493 100644
--- a/tools/lib/bpf/btf.c
+++ b/tools/lib/bpf/btf.c
@@ -41,6 +41,7 @@ struct btf {
__u32 types_size;
__u32 data_size;
int fd;
+ int ptr_sz;
};
static inline __u64 ptr_to_u64(const void *ptr)
@@ -221,6 +222,70 @@ const struct btf_type *btf__type_by_id(const struct btf *btf, __u32 type_id)
return btf->types[type_id];
}
+static int determine_ptr_size(const struct btf *btf)
+{
+ const struct btf_type *t;
+ const char *name;
+ int i;
+
+ for (i = 1; i <= btf->nr_types; i++) {
+ t = btf__type_by_id(btf, i);
+ if (!btf_is_int(t))
+ continue;
+
+ name = btf__name_by_offset(btf, t->name_off);
+ if (!name)
+ continue;
+
+ if (strcmp(name, "long int") == 0 ||
+ strcmp(name, "long unsigned int") == 0) {
+ if (t->size != 4 && t->size != 8)
+ continue;
+ return t->size;
+ }
+ }
+
+ return -1;
+}
+
+static size_t btf_ptr_sz(const struct btf *btf)
+{
+ if (!btf->ptr_sz)
+ ((struct btf *)btf)->ptr_sz = determine_ptr_size(btf);
+ return btf->ptr_sz < 0 ? sizeof(void *) : btf->ptr_sz;
+}
+
+/* Return pointer size this BTF instance assumes. The size is heuristically
+ * determined by looking for 'long' or 'unsigned long' integer type and
+ * recording its size in bytes. If BTF type information doesn't have any such
+ * type, this function returns 0. In the latter case, native architecture's
+ * pointer size is assumed, so will be either 4 or 8, depending on
+ * architecture that libbpf was compiled for. It's possible to override
+ * guessed value by using btf__set_pointer_size() API.
+ */
+size_t btf__pointer_size(const struct btf *btf)
+{
+ if (!btf->ptr_sz)
+ ((struct btf *)btf)->ptr_sz = determine_ptr_size(btf);
+
+ if (btf->ptr_sz < 0)
+ /* not enough BTF type info to guess */
+ return 0;
+
+ return btf->ptr_sz;
+}
+
+/* Override or set pointer size in bytes. Only values of 4 and 8 are
+ * supported.
+ */
+int btf__set_pointer_size(struct btf *btf, size_t ptr_sz)
+{
+ if (ptr_sz != 4 && ptr_sz != 8)
+ return -EINVAL;
+ btf->ptr_sz = ptr_sz;
+ return 0;
+}
+
static bool btf_type_is_void(const struct btf_type *t)
{
return t == &btf_void || btf_is_fwd(t);
@@ -253,7 +318,7 @@ __s64 btf__resolve_size(const struct btf *btf, __u32 type_id)
size = t->size;
goto done;
case BTF_KIND_PTR:
- size = sizeof(void *);
+ size = btf_ptr_sz(btf);
goto done;
case BTF_KIND_TYPEDEF:
case BTF_KIND_VOLATILE:
@@ -293,9 +358,9 @@ int btf__align_of(const struct btf *btf, __u32 id)
switch (kind) {
case BTF_KIND_INT:
case BTF_KIND_ENUM:
- return min(sizeof(void *), (size_t)t->size);
+ return min(btf_ptr_sz(btf), (size_t)t->size);
case BTF_KIND_PTR:
- return sizeof(void *);
+ return btf_ptr_sz(btf);
case BTF_KIND_TYPEDEF:
case BTF_KIND_VOLATILE:
case BTF_KIND_CONST:
@@ -533,6 +598,18 @@ struct btf *btf__parse_elf(const char *path, struct btf_ext **btf_ext)
if (IS_ERR(btf))
goto done;
+ switch (gelf_getclass(elf)) {
+ case ELFCLASS32:
+ btf__set_pointer_size(btf, 4);
+ break;
+ case ELFCLASS64:
+ btf__set_pointer_size(btf, 8);
+ break;
+ default:
+ pr_warn("failed to get ELF class (bitness) for %s\n", path);
+ break;
+ }
+
if (btf_ext && btf_ext_data) {
*btf_ext = btf_ext__new(btf_ext_data->d_buf,
btf_ext_data->d_size);
@@ -564,8 +641,8 @@ done:
struct btf *btf__parse_raw(const char *path)
{
+ struct btf *btf = NULL;
void *data = NULL;
- struct btf *btf;
FILE *f = NULL;
__u16 magic;
int err = 0;
@@ -582,6 +659,12 @@ struct btf *btf__parse_raw(const char *path)
err = -EIO;
goto err_out;
}
+ if (magic == __bswap_16(BTF_MAGIC)) {
+ /* non-native endian raw BTF */
+ pr_warn("non-native BTF endianness is not supported\n");
+ err = -LIBBPF_ERRNO__ENDIAN;
+ goto err_out;
+ }
if (magic != BTF_MAGIC) {
/* definitely not a raw BTF */
err = -EPROTO;
diff --git a/tools/lib/bpf/btf.h b/tools/lib/bpf/btf.h
index f4a1a1d2b9a3..1ca14448df4c 100644
--- a/tools/lib/bpf/btf.h
+++ b/tools/lib/bpf/btf.h
@@ -76,6 +76,8 @@ LIBBPF_API __s32 btf__find_by_name_kind(const struct btf *btf,
LIBBPF_API __u32 btf__get_nr_types(const struct btf *btf);
LIBBPF_API const struct btf_type *btf__type_by_id(const struct btf *btf,
__u32 id);
+LIBBPF_API size_t btf__pointer_size(const struct btf *btf);
+LIBBPF_API int btf__set_pointer_size(struct btf *btf, size_t ptr_sz);
LIBBPF_API __s64 btf__resolve_size(const struct btf *btf, __u32 type_id);
LIBBPF_API int btf__resolve_type(const struct btf *btf, __u32 type_id);
LIBBPF_API int btf__align_of(const struct btf *btf, __u32 id);
diff --git a/tools/lib/bpf/btf_dump.c b/tools/lib/bpf/btf_dump.c
index cf711168d34a..57c00fa63932 100644
--- a/tools/lib/bpf/btf_dump.c
+++ b/tools/lib/bpf/btf_dump.c
@@ -13,6 +13,7 @@
#include <errno.h>
#include <linux/err.h>
#include <linux/btf.h>
+#include <linux/kernel.h>
#include "btf.h"
#include "hashmap.h"
#include "libbpf.h"
@@ -60,6 +61,7 @@ struct btf_dump {
const struct btf_ext *btf_ext;
btf_dump_printf_fn_t printf_fn;
struct btf_dump_opts opts;
+ int ptr_sz;
bool strip_mods;
/* per-type auxiliary state */
@@ -138,6 +140,7 @@ struct btf_dump *btf_dump__new(const struct btf *btf,
d->btf_ext = btf_ext;
d->printf_fn = printf_fn;
d->opts.ctx = opts ? opts->ctx : NULL;
+ d->ptr_sz = btf__pointer_size(btf) ? : sizeof(void *);
d->type_names = hashmap__new(str_hash_fn, str_equal_fn, NULL);
if (IS_ERR(d->type_names)) {
@@ -549,6 +552,9 @@ static int btf_dump_order_type(struct btf_dump *d, __u32 id, bool through_ptr)
}
}
+static void btf_dump_emit_missing_aliases(struct btf_dump *d, __u32 id,
+ const struct btf_type *t);
+
static void btf_dump_emit_struct_fwd(struct btf_dump *d, __u32 id,
const struct btf_type *t);
static void btf_dump_emit_struct_def(struct btf_dump *d, __u32 id,
@@ -671,6 +677,9 @@ static void btf_dump_emit_type(struct btf_dump *d, __u32 id, __u32 cont_id)
switch (kind) {
case BTF_KIND_INT:
+ /* Emit type alias definitions if necessary */
+ btf_dump_emit_missing_aliases(d, id, t);
+
tstate->emit_state = EMITTED;
break;
case BTF_KIND_ENUM:
@@ -797,7 +806,7 @@ static void btf_dump_emit_bit_padding(const struct btf_dump *d,
int align, int lvl)
{
int off_diff = m_off - cur_off;
- int ptr_bits = sizeof(void *) * 8;
+ int ptr_bits = d->ptr_sz * 8;
if (off_diff <= 0)
/* no gap */
@@ -870,7 +879,7 @@ static void btf_dump_emit_struct_def(struct btf_dump *d,
btf_dump_printf(d, ": %d", m_sz);
off = m_off + m_sz;
} else {
- m_sz = max(0, btf__resolve_size(d->btf, m->type));
+ m_sz = max((__s64)0, btf__resolve_size(d->btf, m->type));
off = m_off + m_sz * 8;
}
btf_dump_printf(d, ";");
@@ -890,6 +899,32 @@ static void btf_dump_emit_struct_def(struct btf_dump *d,
btf_dump_printf(d, " __attribute__((packed))");
}
+static const char *missing_base_types[][2] = {
+ /*
+ * GCC emits typedefs to its internal __PolyX_t types when compiling Arm
+ * SIMD intrinsics. Alias them to standard base types.
+ */
+ { "__Poly8_t", "unsigned char" },
+ { "__Poly16_t", "unsigned short" },
+ { "__Poly64_t", "unsigned long long" },
+ { "__Poly128_t", "unsigned __int128" },
+};
+
+static void btf_dump_emit_missing_aliases(struct btf_dump *d, __u32 id,
+ const struct btf_type *t)
+{
+ const char *name = btf_dump_type_name(d, id);
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(missing_base_types); i++) {
+ if (strcmp(name, missing_base_types[i][0]) == 0) {
+ btf_dump_printf(d, "typedef %s %s;\n\n",
+ missing_base_types[i][1], name);
+ break;
+ }
+ }
+}
+
static void btf_dump_emit_enum_fwd(struct btf_dump *d, __u32 id,
const struct btf_type *t)
{
diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index 7be04e45d29c..e493d6048143 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -2264,7 +2264,7 @@ static int bpf_object__init_user_btf_maps(struct bpf_object *obj, bool strict,
data = elf_getdata(scn, NULL);
if (!scn || !data) {
pr_warn("failed to get Elf_Data from map section %d (%s)\n",
- obj->efile.maps_shndx, MAPS_ELF_SEC);
+ obj->efile.btf_maps_shndx, MAPS_ELF_SEC);
return -EINVAL;
}
@@ -2434,6 +2434,8 @@ static int bpf_object__init_btf(struct bpf_object *obj,
BTF_ELF_SEC, err);
goto out;
}
+ /* enforce 8-byte pointers for BPF-targeted BTFs */
+ btf__set_pointer_size(obj->btf, 8);
err = 0;
}
if (btf_ext_data) {
@@ -2542,6 +2544,8 @@ static int bpf_object__sanitize_and_load_btf(struct bpf_object *obj)
if (IS_ERR(kern_btf))
return PTR_ERR(kern_btf);
+ /* enforce 8-byte pointers for BPF-targeted BTFs */
+ btf__set_pointer_size(obj->btf, 8);
bpf_object__sanitize_btf(obj, kern_btf);
}
@@ -3478,10 +3482,11 @@ bpf_object__probe_global_data(struct bpf_object *obj)
map = bpf_create_map_xattr(&map_attr);
if (map < 0) {
- cp = libbpf_strerror_r(errno, errmsg, sizeof(errmsg));
+ ret = -errno;
+ cp = libbpf_strerror_r(ret, errmsg, sizeof(errmsg));
pr_warn("Error in %s():%s(%d). Couldn't create simple array map.\n",
- __func__, cp, errno);
- return -errno;
+ __func__, cp, -ret);
+ return ret;
}
insns[0].imm = map;
@@ -5194,11 +5199,12 @@ static int bpf_object__collect_st_ops_relos(struct bpf_object *obj,
static int bpf_object__collect_map_relos(struct bpf_object *obj,
GElf_Shdr *shdr, Elf_Data *data)
{
- int i, j, nrels, new_sz, ptr_sz = sizeof(void *);
+ const int bpf_ptr_sz = 8, host_ptr_sz = sizeof(void *);
+ int i, j, nrels, new_sz;
const struct btf_var_secinfo *vi = NULL;
const struct btf_type *sec, *var, *def;
+ struct bpf_map *map = NULL, *targ_map;
const struct btf_member *member;
- struct bpf_map *map, *targ_map;
const char *name, *mname;
Elf_Data *symbols;
unsigned int moff;
@@ -5243,7 +5249,7 @@ static int bpf_object__collect_map_relos(struct bpf_object *obj,
vi = btf_var_secinfos(sec) + map->btf_var_idx;
if (vi->offset <= rel.r_offset &&
- rel.r_offset + sizeof(void *) <= vi->offset + vi->size)
+ rel.r_offset + bpf_ptr_sz <= vi->offset + vi->size)
break;
}
if (j == obj->nr_maps) {
@@ -5279,17 +5285,20 @@ static int bpf_object__collect_map_relos(struct bpf_object *obj,
return -EINVAL;
moff = rel.r_offset - vi->offset - moff;
- if (moff % ptr_sz)
+ /* here we use BPF pointer size, which is always 64 bit, as we
+ * are parsing ELF that was built for BPF target
+ */
+ if (moff % bpf_ptr_sz)
return -EINVAL;
- moff /= ptr_sz;
+ moff /= bpf_ptr_sz;
if (moff >= map->init_slots_sz) {
new_sz = moff + 1;
- tmp = realloc(map->init_slots, new_sz * ptr_sz);
+ tmp = realloc(map->init_slots, new_sz * host_ptr_sz);
if (!tmp)
return -ENOMEM;
map->init_slots = tmp;
memset(map->init_slots + map->init_slots_sz, 0,
- (new_sz - map->init_slots_sz) * ptr_sz);
+ (new_sz - map->init_slots_sz) * host_ptr_sz);
map->init_slots_sz = new_sz;
}
map->init_slots[moff] = targ_map;
@@ -6012,9 +6021,10 @@ int bpf_program__pin_instance(struct bpf_program *prog, const char *path,
}
if (bpf_obj_pin(prog->instances.fds[instance], path)) {
- cp = libbpf_strerror_r(errno, errmsg, sizeof(errmsg));
+ err = -errno;
+ cp = libbpf_strerror_r(err, errmsg, sizeof(errmsg));
pr_warn("failed to pin program: %s\n", cp);
- return -errno;
+ return err;
}
pr_debug("pinned program '%s'\n", path);
@@ -6915,7 +6925,7 @@ static const struct bpf_sec_def section_defs[] = {
BPF_XDP_DEVMAP),
BPF_EAPROG_SEC("xdp_cpumap/", BPF_PROG_TYPE_XDP,
BPF_XDP_CPUMAP),
- BPF_EAPROG_SEC("xdp", BPF_PROG_TYPE_XDP,
+ BPF_APROG_SEC("xdp", BPF_PROG_TYPE_XDP,
BPF_XDP),
BPF_PROG_SEC("perf_event", BPF_PROG_TYPE_PERF_EVENT),
BPF_PROG_SEC("lwt_in", BPF_PROG_TYPE_LWT_IN),
@@ -8306,10 +8316,8 @@ bpf_program__attach_iter(struct bpf_program *prog,
if (!OPTS_VALID(opts, bpf_iter_attach_opts))
return ERR_PTR(-EINVAL);
- if (OPTS_HAS(opts, map_fd)) {
- target_fd = opts->map_fd;
- link_create_opts.flags = BPF_ITER_LINK_MAP_FD;
- }
+ link_create_opts.iter_info = OPTS_GET(opts, link_info, (void *)0);
+ link_create_opts.iter_info_len = OPTS_GET(opts, link_info_len, 0);
prog_fd = bpf_program__fd(prog);
if (prog_fd < 0) {
diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h
index 3ed1399bfbbc..5ecb4069a9f0 100644
--- a/tools/lib/bpf/libbpf.h
+++ b/tools/lib/bpf/libbpf.h
@@ -267,9 +267,10 @@ LIBBPF_API struct bpf_link *bpf_map__attach_struct_ops(struct bpf_map *map);
struct bpf_iter_attach_opts {
size_t sz; /* size of this struct for forward/backward compatibility */
- __u32 map_fd;
+ union bpf_iter_link_info *link_info;
+ __u32 link_info_len;
};
-#define bpf_iter_attach_opts__last_field map_fd
+#define bpf_iter_attach_opts__last_field link_info_len
LIBBPF_API struct bpf_link *
bpf_program__attach_iter(struct bpf_program *prog,
diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map
index 0c4722bfdd0a..e35bd6cdbdbf 100644
--- a/tools/lib/bpf/libbpf.map
+++ b/tools/lib/bpf/libbpf.map
@@ -295,5 +295,7 @@ LIBBPF_0.1.0 {
bpf_program__set_sk_lookup;
btf__parse;
btf__parse_raw;
+ btf__pointer_size;
btf__set_fd;
+ btf__set_pointer_size;
} LIBBPF_0.0.9;
diff --git a/tools/lib/perf/Documentation/libperf-counting.txt b/tools/lib/perf/Documentation/libperf-counting.txt
index cae9757f49c1..8b75efcd67ce 100644
--- a/tools/lib/perf/Documentation/libperf-counting.txt
+++ b/tools/lib/perf/Documentation/libperf-counting.txt
@@ -7,13 +7,13 @@ libperf-counting - counting interface
DESCRIPTION
-----------
-The counting interface provides API to meassure and get count for specific perf events.
+The counting interface provides API to measure and get count for specific perf events.
The following test tries to explain count on `counting.c` example.
It is by no means complete guide to counting, but shows libperf basic API for counting.
-The `counting.c` comes with libbperf package and can be compiled and run like:
+The `counting.c` comes with libperf package and can be compiled and run like:
[source,bash]
--
@@ -26,7 +26,8 @@ count 176242, enabled 176242, run 176242
It requires root access, because of the `PERF_COUNT_SW_CPU_CLOCK` event,
which is available only for root.
-The `counting.c` example monitors two events on the current process and displays their count, in a nutshel it:
+The `counting.c` example monitors two events on the current process and displays
+their count, in a nutshell it:
* creates events
* adds them to the event list
@@ -152,7 +153,7 @@ Configure event list with the thread map and open events:
--
Both events are created as disabled (note the `disabled = 1` assignment above),
-so we need to enable the whole list explicitely (both events).
+so we need to enable the whole list explicitly (both events).
From this moment events are counting and we can do our workload.
@@ -167,7 +168,8 @@ When we are done we disable the events list.
79 perf_evlist__disable(evlist);
--
-Now we need to get the counts from events, following code iterates throught the events list and read counts:
+Now we need to get the counts from events, following code iterates through the
+events list and read counts:
[source,c]
--
@@ -178,7 +180,7 @@ Now we need to get the counts from events, following code iterates throught the
85 }
--
-And finaly cleanup.
+And finally cleanup.
We close the whole events list (both events) and remove it together with the threads map:
diff --git a/tools/lib/perf/Documentation/libperf-sampling.txt b/tools/lib/perf/Documentation/libperf-sampling.txt
index d71a7b4fcf5f..d6ca24f6ef78 100644
--- a/tools/lib/perf/Documentation/libperf-sampling.txt
+++ b/tools/lib/perf/Documentation/libperf-sampling.txt
@@ -8,13 +8,13 @@ libperf-sampling - sampling interface
DESCRIPTION
-----------
-The sampling interface provides API to meassure and get count for specific perf events.
+The sampling interface provides API to measure and get count for specific perf events.
The following test tries to explain count on `sampling.c` example.
It is by no means complete guide to sampling, but shows libperf basic API for sampling.
-The `sampling.c` comes with libbperf package and can be compiled and run like:
+The `sampling.c` comes with libperf package and can be compiled and run like:
[source,bash]
--
@@ -33,7 +33,8 @@ cpu 0, pid 4465, tid 4470, ip 7f84fe0ebebf, period 176
It requires root access, because it uses hardware cycles event.
-The `sampling.c` example profiles/samples all CPUs with hardware cycles, in a nutshel it:
+The `sampling.c` example profiles/samples all CPUs with hardware cycles, in a
+nutshell it:
- creates events
- adds them to the event list
@@ -90,7 +91,7 @@ Once the setup is complete we start by defining cycles event using the `struct p
36 };
--
-Next step is to prepare cpus map.
+Next step is to prepare CPUs map.
In this case we will monitor all the available CPUs:
@@ -152,7 +153,7 @@ Once the events list is open, we can create memory maps AKA perf ring buffers:
--
The event is created as disabled (note the `disabled = 1` assignment above),
-so we need to enable the events list explicitely.
+so we need to enable the events list explicitly.
From this moment the cycles event is sampling.
@@ -212,7 +213,7 @@ Each sample needs to get parsed:
106 cpu, pid, tid, ip, period);
--
-And finaly cleanup.
+And finally cleanup.
We close the whole events list (both events) and remove it together with the threads map:
diff --git a/tools/lib/perf/Documentation/libperf.txt b/tools/lib/perf/Documentation/libperf.txt
index 5a6bb512789d..0c74c30ed23a 100644
--- a/tools/lib/perf/Documentation/libperf.txt
+++ b/tools/lib/perf/Documentation/libperf.txt
@@ -29,7 +29,7 @@ SYNOPSIS
void libperf_init(libperf_print_fn_t fn);
--
-*API to handle cpu maps:*
+*API to handle CPU maps:*
[source,c]
--
@@ -217,7 +217,7 @@ Following objects are key to the libperf interface:
[horizontal]
-struct perf_cpu_map:: Provides a cpu list abstraction.
+struct perf_cpu_map:: Provides a CPU list abstraction.
struct perf_thread_map:: Provides a thread list abstraction.
diff --git a/tools/lib/perf/evlist.c b/tools/lib/perf/evlist.c
index 6a875a0f01bb..2208444ecb44 100644
--- a/tools/lib/perf/evlist.c
+++ b/tools/lib/perf/evlist.c
@@ -305,9 +305,9 @@ int perf_evlist__alloc_pollfd(struct perf_evlist *evlist)
}
int perf_evlist__add_pollfd(struct perf_evlist *evlist, int fd,
- void *ptr, short revent)
+ void *ptr, short revent, enum fdarray_flags flags)
{
- int pos = fdarray__add(&evlist->pollfd, fd, revent | POLLERR | POLLHUP);
+ int pos = fdarray__add(&evlist->pollfd, fd, revent | POLLERR | POLLHUP, flags);
if (pos >= 0) {
evlist->pollfd.priv[pos].ptr = ptr;
@@ -488,7 +488,7 @@ mmap_per_evsel(struct perf_evlist *evlist, struct perf_evlist_mmap_ops *ops,
revent = !overwrite ? POLLIN : 0;
if (!evsel->system_wide &&
- perf_evlist__add_pollfd(evlist, fd, map, revent) < 0) {
+ perf_evlist__add_pollfd(evlist, fd, map, revent, fdarray_flag__default) < 0) {
perf_mmap__put(map);
return -1;
}
diff --git a/tools/lib/perf/include/internal/evlist.h b/tools/lib/perf/include/internal/evlist.h
index 74dc8c3f0b66..2d0fa02b036f 100644
--- a/tools/lib/perf/include/internal/evlist.h
+++ b/tools/lib/perf/include/internal/evlist.h
@@ -45,7 +45,7 @@ struct perf_evlist_mmap_ops {
int perf_evlist__alloc_pollfd(struct perf_evlist *evlist);
int perf_evlist__add_pollfd(struct perf_evlist *evlist, int fd,
- void *ptr, short revent);
+ void *ptr, short revent, enum fdarray_flags flags);
int perf_evlist__mmap_ops(struct perf_evlist *evlist,
struct perf_evlist_mmap_ops *ops,
diff --git a/tools/lib/perf/include/perf/event.h b/tools/lib/perf/include/perf/event.h
index 69b44d2cc0f5..842028858d66 100644
--- a/tools/lib/perf/include/perf/event.h
+++ b/tools/lib/perf/include/perf/event.h
@@ -111,6 +111,14 @@ struct perf_record_cgroup {
char path[PATH_MAX];
};
+struct perf_record_text_poke_event {
+ struct perf_event_header header;
+ __u64 addr;
+ __u16 old_len;
+ __u16 new_len;
+ __u8 bytes[];
+};
+
struct perf_record_sample {
struct perf_event_header header;
__u64 array[];
@@ -367,6 +375,7 @@ union perf_event {
struct perf_record_sample sample;
struct perf_record_bpf_event bpf;
struct perf_record_ksymbol ksymbol;
+ struct perf_record_text_poke_event text_poke;
struct perf_record_header_attr attr;
struct perf_record_event_update event_update;
struct perf_record_header_event_type event_type;
diff --git a/tools/lib/rbtree.c b/tools/lib/rbtree.c
index 06ac7bd2144b..727396de6be5 100644
--- a/tools/lib/rbtree.c
+++ b/tools/lib/rbtree.c
@@ -13,7 +13,7 @@
#include <linux/export.h>
/*
- * red-black trees properties: http://en.wikipedia.org/wiki/Rbtree
+ * red-black trees properties: https://en.wikipedia.org/wiki/Rbtree
*
* 1) A node is either red or black
* 2) The root is black
diff --git a/tools/lib/subcmd/help.c b/tools/lib/subcmd/help.c
index 2859f107abc8..bf02d62a3b2b 100644
--- a/tools/lib/subcmd/help.c
+++ b/tools/lib/subcmd/help.c
@@ -65,12 +65,14 @@ void exclude_cmds(struct cmdnames *cmds, struct cmdnames *excludes)
ci = cj = ei = 0;
while (ci < cmds->cnt && ei < excludes->cnt) {
cmp = strcmp(cmds->names[ci]->name, excludes->names[ei]->name);
- if (cmp < 0)
+ if (cmp < 0) {
cmds->names[cj++] = cmds->names[ci++];
- else if (cmp == 0)
- ci++, ei++;
- else if (cmp > 0)
+ } else if (cmp == 0) {
+ ci++;
ei++;
+ } else if (cmp > 0) {
+ ei++;
+ }
}
while (ci < cmds->cnt)
diff --git a/tools/lib/traceevent/Documentation/libtraceevent-plugins.txt b/tools/lib/traceevent/Documentation/libtraceevent-plugins.txt
index 596032ade31f..4d6394397d92 100644
--- a/tools/lib/traceevent/Documentation/libtraceevent-plugins.txt
+++ b/tools/lib/traceevent/Documentation/libtraceevent-plugins.txt
@@ -3,7 +3,7 @@ libtraceevent(3)
NAME
----
-tep_load_plugins, tep_unload_plugins - Load / unload traceevent plugins.
+tep_load_plugins, tep_unload_plugins, tep_load_plugins_hook - Load / unload traceevent plugins.
SYNOPSIS
--------
@@ -13,6 +13,12 @@ SYNOPSIS
struct tep_plugin_list pass:[*]*tep_load_plugins*(struct tep_handle pass:[*]_tep_);
void *tep_unload_plugins*(struct tep_plugin_list pass:[*]_plugin_list_, struct tep_handle pass:[*]_tep_);
+void *tep_load_plugins_hook*(struct tep_handle pass:[*]_tep_, const char pass:[*]_suffix_,
+ void (pass:[*]_load_plugin_)(struct tep_handle pass:[*]tep,
+ const char pass:[*]path,
+ const char pass:[*]name,
+ void pass:[*]data),
+ void pass:[*]_data_);
--
DESCRIPTION
@@ -22,11 +28,13 @@ directories. The _tep_ argument is trace event parser context.
The plugin directories are :
[verse]
--
+ - Directories, specified in _tep_->plugins_dir with priority TEP_PLUGIN_FIRST
- System's plugin directory, defined at the library compile time. It
depends on the library installation prefix and usually is
_(install_preffix)/lib/traceevent/plugins_
- Directory, defined by the environment variable _TRACEEVENT_PLUGIN_DIR_
- User's plugin directory, located at _~/.local/lib/traceevent/plugins_
+ - Directories, specified in _tep_->plugins_dir with priority TEP_PLUGIN_LAST
--
Loading of plugins can be controlled by the _tep_flags_, using the
_tep_set_flag()_ API:
@@ -44,6 +52,12 @@ _tep_load_plugins()_. The _tep_ argument is trace event parser context. The
_plugin_list_ is the list of loaded plugins, returned by
the _tep_load_plugins()_ function.
+The _tep_load_plugins_hook_ function walks through all directories with plugins
+and calls user specified _load_plugin()_ hook for each plugin file. Only files
+with given _suffix_ are considered to be plugins. The _data_ is a user specified
+context, passed to _load_plugin()_. Directories and the walk order are the same
+as in _tep_load_plugins()_ API.
+
RETURN VALUE
------------
The _tep_load_plugins()_ function returns a list of successfully loaded plugins,
@@ -63,6 +77,15 @@ if (plugins == NULL) {
}
...
tep_unload_plugins(plugins, tep);
+...
+void print_plugin(struct tep_handle *tep, const char *path,
+ const char *name, void *data)
+{
+ pritnf("Found libtraceevent plugin %s/%s\n", path, name);
+}
+...
+tep_load_plugins_hook(tep, ".so", print_plugin, NULL);
+...
--
FILES
diff --git a/tools/lib/traceevent/event-parse-local.h b/tools/lib/traceevent/event-parse-local.h
index cee469803a34..d805a920af6f 100644
--- a/tools/lib/traceevent/event-parse-local.h
+++ b/tools/lib/traceevent/event-parse-local.h
@@ -13,6 +13,7 @@ struct func_map;
struct func_list;
struct event_handler;
struct func_resolver;
+struct tep_plugins_dir;
struct tep_handle {
int ref_count;
@@ -47,7 +48,6 @@ struct tep_handle {
struct printk_list *printklist;
unsigned int printk_count;
-
struct tep_event **events;
int nr_events;
struct tep_event **sort_events;
@@ -81,10 +81,30 @@ struct tep_handle {
/* cache */
struct tep_event *last_event;
+
+ struct tep_plugins_dir *plugins_dir;
+};
+
+enum tep_print_parse_type {
+ PRINT_FMT_STRING,
+ PRINT_FMT_ARG_DIGIT,
+ PRINT_FMT_ARG_POINTER,
+ PRINT_FMT_ARG_STRING,
+};
+
+struct tep_print_parse {
+ struct tep_print_parse *next;
+
+ char *format;
+ int ls;
+ enum tep_print_parse_type type;
+ struct tep_print_arg *arg;
+ struct tep_print_arg *len_as_arg;
};
void tep_free_event(struct tep_event *event);
void tep_free_format_field(struct tep_format_field *field);
+void tep_free_plugin_paths(struct tep_handle *tep);
unsigned short tep_data2host2(struct tep_handle *tep, unsigned short data);
unsigned int tep_data2host4(struct tep_handle *tep, unsigned int data);
diff --git a/tools/lib/traceevent/event-parse.c b/tools/lib/traceevent/event-parse.c
index ba4f33804af1..5acc18b32606 100644
--- a/tools/lib/traceevent/event-parse.c
+++ b/tools/lib/traceevent/event-parse.c
@@ -4565,43 +4565,93 @@ get_bprint_format(void *data, int size __maybe_unused,
return format;
}
-static void print_mac_arg(struct trace_seq *s, int mac, void *data, int size,
- struct tep_event *event, struct tep_print_arg *arg)
+static int print_mac_arg(struct trace_seq *s, const char *format,
+ void *data, int size, struct tep_event *event,
+ struct tep_print_arg *arg)
{
- unsigned char *buf;
const char *fmt = "%.2x:%.2x:%.2x:%.2x:%.2x:%.2x";
+ bool reverse = false;
+ unsigned char *buf;
+ int ret = 0;
if (arg->type == TEP_PRINT_FUNC) {
process_defined_func(s, data, size, event, arg);
- return;
+ return 0;
}
if (arg->type != TEP_PRINT_FIELD) {
trace_seq_printf(s, "ARG TYPE NOT FIELD BUT %d",
arg->type);
- return;
+ return 0;
}
- if (mac == 'm')
+ if (format[0] == 'm') {
fmt = "%.2x%.2x%.2x%.2x%.2x%.2x";
+ } else if (format[0] == 'M' && format[1] == 'F') {
+ fmt = "%.2x-%.2x-%.2x-%.2x-%.2x-%.2x";
+ ret++;
+ }
+ if (format[1] == 'R') {
+ reverse = true;
+ ret++;
+ }
+
if (!arg->field.field) {
arg->field.field =
tep_find_any_field(event, arg->field.name);
if (!arg->field.field) {
do_warning_event(event, "%s: field %s not found",
__func__, arg->field.name);
- return;
+ return ret;
}
}
if (arg->field.field->size != 6) {
trace_seq_printf(s, "INVALIDMAC");
- return;
+ return ret;
}
+
buf = data + arg->field.field->offset;
- trace_seq_printf(s, fmt, buf[0], buf[1], buf[2], buf[3], buf[4], buf[5]);
+ if (reverse)
+ trace_seq_printf(s, fmt, buf[5], buf[4], buf[3], buf[2], buf[1], buf[0]);
+ else
+ trace_seq_printf(s, fmt, buf[0], buf[1], buf[2], buf[3], buf[4], buf[5]);
+
+ return ret;
}
-static void print_ip4_addr(struct trace_seq *s, char i, unsigned char *buf)
+static int parse_ip4_print_args(struct tep_handle *tep,
+ const char *ptr, bool *reverse)
+{
+ int ret = 0;
+
+ *reverse = false;
+
+ /* hnbl */
+ switch (*ptr) {
+ case 'h':
+ if (tep->file_bigendian)
+ *reverse = false;
+ else
+ *reverse = true;
+ ret++;
+ break;
+ case 'l':
+ *reverse = true;
+ ret++;
+ break;
+ case 'n':
+ case 'b':
+ ret++;
+ /* fall through */
+ default:
+ *reverse = false;
+ break;
+ }
+
+ return ret;
+}
+
+static void print_ip4_addr(struct trace_seq *s, char i, bool reverse, unsigned char *buf)
{
const char *fmt;
@@ -4610,7 +4660,11 @@ static void print_ip4_addr(struct trace_seq *s, char i, unsigned char *buf)
else
fmt = "%d.%d.%d.%d";
- trace_seq_printf(s, fmt, buf[0], buf[1], buf[2], buf[3]);
+ if (reverse)
+ trace_seq_printf(s, fmt, buf[3], buf[2], buf[1], buf[0]);
+ else
+ trace_seq_printf(s, fmt, buf[0], buf[1], buf[2], buf[3]);
+
}
static inline bool ipv6_addr_v4mapped(const struct in6_addr *a)
@@ -4693,7 +4747,7 @@ static void print_ip6c_addr(struct trace_seq *s, unsigned char *addr)
if (useIPv4) {
if (needcolon)
trace_seq_printf(s, ":");
- print_ip4_addr(s, 'I', &in6.s6_addr[12]);
+ print_ip4_addr(s, 'I', false, &in6.s6_addr[12]);
}
return;
@@ -4722,16 +4776,20 @@ static int print_ipv4_arg(struct trace_seq *s, const char *ptr, char i,
void *data, int size, struct tep_event *event,
struct tep_print_arg *arg)
{
+ bool reverse = false;
unsigned char *buf;
+ int ret;
+
+ ret = parse_ip4_print_args(event->tep, ptr, &reverse);
if (arg->type == TEP_PRINT_FUNC) {
process_defined_func(s, data, size, event, arg);
- return 0;
+ return ret;
}
if (arg->type != TEP_PRINT_FIELD) {
trace_seq_printf(s, "ARG TYPE NOT FIELD BUT %d", arg->type);
- return 0;
+ return ret;
}
if (!arg->field.field) {
@@ -4740,7 +4798,7 @@ static int print_ipv4_arg(struct trace_seq *s, const char *ptr, char i,
if (!arg->field.field) {
do_warning("%s: field %s not found",
__func__, arg->field.name);
- return 0;
+ return ret;
}
}
@@ -4748,11 +4806,12 @@ static int print_ipv4_arg(struct trace_seq *s, const char *ptr, char i,
if (arg->field.field->size != 4) {
trace_seq_printf(s, "INVALIDIPv4");
- return 0;
+ return ret;
}
- print_ip4_addr(s, i, buf);
- return 0;
+ print_ip4_addr(s, i, reverse, buf);
+ return ret;
+
}
static int print_ipv6_arg(struct trace_seq *s, const char *ptr, char i,
@@ -4812,7 +4871,9 @@ static int print_ipsa_arg(struct trace_seq *s, const char *ptr, char i,
char have_c = 0, have_p = 0;
unsigned char *buf;
struct sockaddr_storage *sa;
+ bool reverse = false;
int rc = 0;
+ int ret;
/* pISpc */
if (i == 'I') {
@@ -4827,6 +4888,9 @@ static int print_ipsa_arg(struct trace_seq *s, const char *ptr, char i,
rc++;
}
}
+ ret = parse_ip4_print_args(event->tep, ptr, &reverse);
+ ptr += ret;
+ rc += ret;
if (arg->type == TEP_PRINT_FUNC) {
process_defined_func(s, data, size, event, arg);
@@ -4858,7 +4922,7 @@ static int print_ipsa_arg(struct trace_seq *s, const char *ptr, char i,
return rc;
}
- print_ip4_addr(s, i, (unsigned char *) &sa4->sin_addr);
+ print_ip4_addr(s, i, reverse, (unsigned char *) &sa4->sin_addr);
if (have_p)
trace_seq_printf(s, ":%d", ntohs(sa4->sin_port));
@@ -4892,25 +4956,20 @@ static int print_ip_arg(struct trace_seq *s, const char *ptr,
struct tep_print_arg *arg)
{
char i = *ptr; /* 'i' or 'I' */
- char ver;
- int rc = 0;
+ int rc = 1;
+ /* IP version */
ptr++;
- rc++;
- ver = *ptr;
- ptr++;
- rc++;
-
- switch (ver) {
+ switch (*ptr) {
case '4':
- rc += print_ipv4_arg(s, ptr, i, data, size, event, arg);
+ rc += print_ipv4_arg(s, ptr + 1, i, data, size, event, arg);
break;
case '6':
- rc += print_ipv6_arg(s, ptr, i, data, size, event, arg);
+ rc += print_ipv6_arg(s, ptr + 1, i, data, size, event, arg);
break;
case 'S':
- rc += print_ipsa_arg(s, ptr, i, data, size, event, arg);
+ rc += print_ipsa_arg(s, ptr + 1, i, data, size, event, arg);
break;
default:
return 0;
@@ -4919,6 +4978,133 @@ static int print_ip_arg(struct trace_seq *s, const char *ptr,
return rc;
}
+static const int guid_index[16] = {3, 2, 1, 0, 5, 4, 7, 6, 8, 9, 10, 11, 12, 13, 14, 15};
+static const int uuid_index[16] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15};
+
+static int print_uuid_arg(struct trace_seq *s, const char *ptr,
+ void *data, int size, struct tep_event *event,
+ struct tep_print_arg *arg)
+{
+ const int *index = uuid_index;
+ char *format = "%02x";
+ int ret = 0;
+ char *buf;
+ int i;
+
+ switch (*(ptr + 1)) {
+ case 'L':
+ format = "%02X";
+ /* fall through */
+ case 'l':
+ index = guid_index;
+ ret++;
+ break;
+ case 'B':
+ format = "%02X";
+ /* fall through */
+ case 'b':
+ ret++;
+ break;
+ }
+
+ if (arg->type == TEP_PRINT_FUNC) {
+ process_defined_func(s, data, size, event, arg);
+ return ret;
+ }
+
+ if (arg->type != TEP_PRINT_FIELD) {
+ trace_seq_printf(s, "ARG TYPE NOT FIELD BUT %d", arg->type);
+ return ret;
+ }
+
+ if (!arg->field.field) {
+ arg->field.field =
+ tep_find_any_field(event, arg->field.name);
+ if (!arg->field.field) {
+ do_warning("%s: field %s not found",
+ __func__, arg->field.name);
+ return ret;
+ }
+ }
+
+ if (arg->field.field->size != 16) {
+ trace_seq_printf(s, "INVALIDUUID");
+ return ret;
+ }
+
+ buf = data + arg->field.field->offset;
+
+ for (i = 0; i < 16; i++) {
+ trace_seq_printf(s, format, buf[index[i]] & 0xff);
+ switch (i) {
+ case 3:
+ case 5:
+ case 7:
+ case 9:
+ trace_seq_printf(s, "-");
+ break;
+ }
+ }
+
+ return ret;
+}
+
+static int print_raw_buff_arg(struct trace_seq *s, const char *ptr,
+ void *data, int size, struct tep_event *event,
+ struct tep_print_arg *arg, int print_len)
+{
+ int plen = print_len;
+ char *delim = " ";
+ int ret = 0;
+ char *buf;
+ int i;
+ unsigned long offset;
+ int arr_len;
+
+ switch (*(ptr + 1)) {
+ case 'C':
+ delim = ":";
+ ret++;
+ break;
+ case 'D':
+ delim = "-";
+ ret++;
+ break;
+ case 'N':
+ delim = "";
+ ret++;
+ break;
+ }
+
+ if (arg->type == TEP_PRINT_FUNC) {
+ process_defined_func(s, data, size, event, arg);
+ return ret;
+ }
+
+ if (arg->type != TEP_PRINT_DYNAMIC_ARRAY) {
+ trace_seq_printf(s, "ARG TYPE NOT FIELD BUT %d", arg->type);
+ return ret;
+ }
+
+ offset = tep_read_number(event->tep,
+ data + arg->dynarray.field->offset,
+ arg->dynarray.field->size);
+ arr_len = (unsigned long long)(offset >> 16);
+ buf = data + (offset & 0xffff);
+
+ if (arr_len < plen)
+ plen = arr_len;
+
+ if (plen < 1)
+ return ret;
+
+ trace_seq_printf(s, "%02x", buf[0] & 0xff);
+ for (i = 1; i < plen; i++)
+ trace_seq_printf(s, "%s%02x", delim, buf[i] & 0xff);
+
+ return ret;
+}
+
static int is_printable_array(char *p, unsigned int len)
{
unsigned int i;
@@ -5007,264 +5193,567 @@ void tep_print_fields(struct trace_seq *s, void *data,
}
}
-static void pretty_print(struct trace_seq *s, void *data, int size, struct tep_event *event)
+static int print_function(struct trace_seq *s, const char *format,
+ void *data, int size, struct tep_event *event,
+ struct tep_print_arg *arg)
{
- struct tep_handle *tep = event->tep;
- struct tep_print_fmt *print_fmt = &event->print_fmt;
- struct tep_print_arg *arg = print_fmt->args;
- struct tep_print_arg *args = NULL;
- const char *ptr = print_fmt->format;
- unsigned long long val;
struct func_map *func;
- const char *saveptr;
- struct trace_seq p;
- char *bprint_fmt = NULL;
- char format[32];
- int show_func;
- int len_as_arg;
- int len_arg = 0;
- int len;
- int ls;
+ unsigned long long val;
- if (event->flags & TEP_EVENT_FL_FAILED) {
- trace_seq_printf(s, "[FAILED TO PARSE]");
- tep_print_fields(s, data, size, event);
- return;
+ val = eval_num_arg(data, size, event, arg);
+ func = find_func(event->tep, val);
+ if (func) {
+ trace_seq_puts(s, func->func);
+ if (*format == 'F' || *format == 'S')
+ trace_seq_printf(s, "+0x%llx", val - func->addr);
+ } else {
+ if (event->tep->long_size == 4)
+ trace_seq_printf(s, "0x%lx", (long)val);
+ else
+ trace_seq_printf(s, "0x%llx", (long long)val);
}
- if (event->flags & TEP_EVENT_FL_ISBPRINT) {
- bprint_fmt = get_bprint_format(data, size, event);
- args = make_bprint_args(bprint_fmt, data, size, event);
- arg = args;
- ptr = bprint_fmt;
+ return 0;
+}
+
+static int print_arg_pointer(struct trace_seq *s, const char *format, int plen,
+ void *data, int size,
+ struct tep_event *event, struct tep_print_arg *arg)
+{
+ unsigned long long val;
+ int ret = 1;
+
+ if (arg->type == TEP_PRINT_BSTRING) {
+ trace_seq_puts(s, arg->string.string);
+ return 0;
+ }
+ while (*format) {
+ if (*format == 'p') {
+ format++;
+ break;
+ }
+ format++;
}
- for (; *ptr; ptr++) {
- ls = 0;
- if (*ptr == '\\') {
- ptr++;
- switch (*ptr) {
+ switch (*format) {
+ case 'F':
+ case 'f':
+ case 'S':
+ case 's':
+ ret += print_function(s, format, data, size, event, arg);
+ break;
+ case 'M':
+ case 'm':
+ ret += print_mac_arg(s, format, data, size, event, arg);
+ break;
+ case 'I':
+ case 'i':
+ ret += print_ip_arg(s, format, data, size, event, arg);
+ break;
+ case 'U':
+ ret += print_uuid_arg(s, format, data, size, event, arg);
+ break;
+ case 'h':
+ ret += print_raw_buff_arg(s, format, data, size, event, arg, plen);
+ break;
+ default:
+ ret = 0;
+ val = eval_num_arg(data, size, event, arg);
+ trace_seq_printf(s, "%p", (void *)(intptr_t)val);
+ break;
+ }
+
+ return ret;
+
+}
+
+static int print_arg_number(struct trace_seq *s, const char *format, int plen,
+ void *data, int size, int ls,
+ struct tep_event *event, struct tep_print_arg *arg)
+{
+ unsigned long long val;
+
+ val = eval_num_arg(data, size, event, arg);
+
+ switch (ls) {
+ case -2:
+ if (plen >= 0)
+ trace_seq_printf(s, format, plen, (char)val);
+ else
+ trace_seq_printf(s, format, (char)val);
+ break;
+ case -1:
+ if (plen >= 0)
+ trace_seq_printf(s, format, plen, (short)val);
+ else
+ trace_seq_printf(s, format, (short)val);
+ break;
+ case 0:
+ if (plen >= 0)
+ trace_seq_printf(s, format, plen, (int)val);
+ else
+ trace_seq_printf(s, format, (int)val);
+ break;
+ case 1:
+ if (plen >= 0)
+ trace_seq_printf(s, format, plen, (long)val);
+ else
+ trace_seq_printf(s, format, (long)val);
+ break;
+ case 2:
+ if (plen >= 0)
+ trace_seq_printf(s, format, plen, (long long)val);
+ else
+ trace_seq_printf(s, format, (long long)val);
+ break;
+ default:
+ do_warning_event(event, "bad count (%d)", ls);
+ event->flags |= TEP_EVENT_FL_FAILED;
+ }
+ return 0;
+}
+
+
+static void print_arg_string(struct trace_seq *s, const char *format, int plen,
+ void *data, int size,
+ struct tep_event *event, struct tep_print_arg *arg)
+{
+ struct trace_seq p;
+
+ /* Use helper trace_seq */
+ trace_seq_init(&p);
+ print_str_arg(&p, data, size, event,
+ format, plen, arg);
+ trace_seq_terminate(&p);
+ trace_seq_puts(s, p.buffer);
+ trace_seq_destroy(&p);
+}
+
+static int parse_arg_format_pointer(const char *format)
+{
+ int ret = 0;
+ int index;
+ int loop;
+
+ switch (*format) {
+ case 'F':
+ case 'S':
+ case 'f':
+ case 's':
+ ret++;
+ break;
+ case 'M':
+ case 'm':
+ /* [mM]R , [mM]F */
+ switch (format[1]) {
+ case 'R':
+ case 'F':
+ ret++;
+ break;
+ }
+ ret++;
+ break;
+ case 'I':
+ case 'i':
+ index = 2;
+ loop = 1;
+ switch (format[1]) {
+ case 'S':
+ /*[S][pfs]*/
+ while (loop) {
+ switch (format[index]) {
+ case 'p':
+ case 'f':
+ case 's':
+ ret++;
+ index++;
+ break;
+ default:
+ loop = 0;
+ break;
+ }
+ }
+ /* fall through */
+ case '4':
+ /* [4S][hnbl] */
+ switch (format[index]) {
+ case 'h':
case 'n':
- trace_seq_putc(s, '\n');
- break;
- case 't':
- trace_seq_putc(s, '\t');
- break;
- case 'r':
- trace_seq_putc(s, '\r');
- break;
- case '\\':
- trace_seq_putc(s, '\\');
+ case 'l':
+ case 'b':
+ ret++;
+ index++;
break;
- default:
- trace_seq_putc(s, *ptr);
+ }
+ if (format[1] == '4') {
+ ret++;
break;
}
+ /* fall through */
+ case '6':
+ /* [6S]c */
+ if (format[index] == 'c')
+ ret++;
+ ret++;
+ break;
+ }
+ ret++;
+ break;
+ case 'U':
+ switch (format[1]) {
+ case 'L':
+ case 'l':
+ case 'B':
+ case 'b':
+ ret++;
+ break;
+ }
+ ret++;
+ break;
+ case 'h':
+ switch (format[1]) {
+ case 'C':
+ case 'D':
+ case 'N':
+ ret++;
+ break;
+ }
+ ret++;
+ break;
+ default:
+ break;
+ }
- } else if (*ptr == '%') {
- saveptr = ptr;
- show_func = 0;
- len_as_arg = 0;
- cont_process:
- ptr++;
- switch (*ptr) {
- case '%':
- trace_seq_putc(s, '%');
- break;
- case '#':
- /* FIXME: need to handle properly */
- goto cont_process;
- case 'h':
- ls--;
- goto cont_process;
- case 'l':
- ls++;
- goto cont_process;
- case 'L':
- ls = 2;
- goto cont_process;
- case '*':
- /* The argument is the length. */
- if (!arg) {
- do_warning_event(event, "no argument match");
- event->flags |= TEP_EVENT_FL_FAILED;
- goto out_failed;
- }
- len_arg = eval_num_arg(data, size, event, arg);
- len_as_arg = 1;
- arg = arg->next;
- goto cont_process;
- case '.':
- case 'z':
- case 'Z':
- case '0' ... '9':
- case '-':
- goto cont_process;
- case 'p':
- if (tep->long_size == 4)
- ls = 1;
- else
- ls = 2;
+ return ret;
+}
- if (isalnum(ptr[1]))
- ptr++;
+static void free_parse_args(struct tep_print_parse *arg)
+{
+ struct tep_print_parse *del;
- if (arg->type == TEP_PRINT_BSTRING) {
- trace_seq_puts(s, arg->string.string);
- arg = arg->next;
- break;
- }
+ while (arg) {
+ del = arg;
+ arg = del->next;
+ free(del->format);
+ free(del);
+ }
+}
- if (*ptr == 'F' || *ptr == 'f' ||
- *ptr == 'S' || *ptr == 's') {
- show_func = *ptr;
- } else if (*ptr == 'M' || *ptr == 'm') {
- print_mac_arg(s, *ptr, data, size, event, arg);
- arg = arg->next;
- break;
- } else if (*ptr == 'I' || *ptr == 'i') {
- int n;
+static int parse_arg_add(struct tep_print_parse **parse, char *format,
+ enum tep_print_parse_type type,
+ struct tep_print_arg *arg,
+ struct tep_print_arg *len_as_arg,
+ int ls)
+{
+ struct tep_print_parse *parg = NULL;
- n = print_ip_arg(s, ptr, data, size, event, arg);
- if (n > 0) {
- ptr += n - 1;
- arg = arg->next;
- break;
- }
- }
+ parg = calloc(1, sizeof(*parg));
+ if (!parg)
+ goto error;
+ parg->format = strdup(format);
+ if (!parg->format)
+ goto error;
+ parg->type = type;
+ parg->arg = arg;
+ parg->len_as_arg = len_as_arg;
+ parg->ls = ls;
+ *parse = parg;
+ return 0;
+error:
+ if (parg) {
+ free(parg->format);
+ free(parg);
+ }
+ return -1;
+}
- /* fall through */
- case 'd':
- case 'u':
- case 'i':
- case 'x':
- case 'X':
- case 'o':
- if (!arg) {
- do_warning_event(event, "no argument match");
- event->flags |= TEP_EVENT_FL_FAILED;
- goto out_failed;
- }
+static int parse_arg_format(struct tep_print_parse **parse,
+ struct tep_event *event,
+ const char *format, struct tep_print_arg **arg)
+{
+ struct tep_print_arg *len_arg = NULL;
+ char print_format[32];
+ const char *start = format;
+ int ret = 0;
+ int ls = 0;
+ int res;
+ int len;
- len = ((unsigned long)ptr + 1) -
- (unsigned long)saveptr;
+ format++;
+ ret++;
+ for (; *format; format++) {
+ switch (*format) {
+ case '#':
+ /* FIXME: need to handle properly */
+ break;
+ case 'h':
+ ls--;
+ break;
+ case 'l':
+ ls++;
+ break;
+ case 'L':
+ ls = 2;
+ break;
+ case '.':
+ case 'z':
+ case 'Z':
+ case '0' ... '9':
+ case '-':
+ break;
+ case '*':
+ /* The argument is the length. */
+ if (!*arg) {
+ do_warning_event(event, "no argument match");
+ event->flags |= TEP_EVENT_FL_FAILED;
+ goto out_failed;
+ }
+ if (len_arg) {
+ do_warning_event(event, "argument already matched");
+ event->flags |= TEP_EVENT_FL_FAILED;
+ goto out_failed;
+ }
+ len_arg = *arg;
+ *arg = (*arg)->next;
+ break;
+ case 'p':
+ if (!*arg) {
+ do_warning_event(event, "no argument match");
+ event->flags |= TEP_EVENT_FL_FAILED;
+ goto out_failed;
+ }
+ res = parse_arg_format_pointer(format + 1);
+ if (res > 0) {
+ format += res;
+ ret += res;
+ }
+ len = ((unsigned long)format + 1) -
+ (unsigned long)start;
+ /* should never happen */
+ if (len > 31) {
+ do_warning_event(event, "bad format!");
+ event->flags |= TEP_EVENT_FL_FAILED;
+ len = 31;
+ }
+ memcpy(print_format, start, len);
+ print_format[len] = 0;
- /* should never happen */
- if (len > 31) {
- do_warning_event(event, "bad format!");
- event->flags |= TEP_EVENT_FL_FAILED;
- len = 31;
- }
+ parse_arg_add(parse, print_format,
+ PRINT_FMT_ARG_POINTER, *arg, len_arg, ls);
+ *arg = (*arg)->next;
+ ret++;
+ return ret;
+ case 'd':
+ case 'u':
+ case 'i':
+ case 'x':
+ case 'X':
+ case 'o':
+ if (!*arg) {
+ do_warning_event(event, "no argument match");
+ event->flags |= TEP_EVENT_FL_FAILED;
+ goto out_failed;
+ }
- memcpy(format, saveptr, len);
- format[len] = 0;
+ len = ((unsigned long)format + 1) -
+ (unsigned long)start;
- val = eval_num_arg(data, size, event, arg);
- arg = arg->next;
+ /* should never happen */
+ if (len > 30) {
+ do_warning_event(event, "bad format!");
+ event->flags |= TEP_EVENT_FL_FAILED;
+ len = 31;
+ }
+ memcpy(print_format, start, len);
+ print_format[len] = 0;
- if (show_func) {
- func = find_func(tep, val);
- if (func) {
- trace_seq_puts(s, func->func);
- if (show_func == 'F')
- trace_seq_printf(s,
- "+0x%llx",
- val - func->addr);
- break;
- }
- }
- if (tep->long_size == 8 && ls == 1 &&
- sizeof(long) != 8) {
- char *p;
-
- /* make %l into %ll */
- if (ls == 1 && (p = strchr(format, 'l')))
- memmove(p+1, p, strlen(p)+1);
- else if (strcmp(format, "%p") == 0)
- strcpy(format, "0x%llx");
- ls = 2;
- }
- switch (ls) {
- case -2:
- if (len_as_arg)
- trace_seq_printf(s, format, len_arg, (char)val);
- else
- trace_seq_printf(s, format, (char)val);
- break;
- case -1:
- if (len_as_arg)
- trace_seq_printf(s, format, len_arg, (short)val);
- else
- trace_seq_printf(s, format, (short)val);
- break;
- case 0:
- if (len_as_arg)
- trace_seq_printf(s, format, len_arg, (int)val);
- else
- trace_seq_printf(s, format, (int)val);
- break;
- case 1:
- if (len_as_arg)
- trace_seq_printf(s, format, len_arg, (long)val);
- else
- trace_seq_printf(s, format, (long)val);
- break;
- case 2:
- if (len_as_arg)
- trace_seq_printf(s, format, len_arg,
- (long long)val);
- else
- trace_seq_printf(s, format, (long long)val);
- break;
- default:
- do_warning_event(event, "bad count (%d)", ls);
- event->flags |= TEP_EVENT_FL_FAILED;
- }
- break;
- case 's':
- if (!arg) {
- do_warning_event(event, "no matching argument");
- event->flags |= TEP_EVENT_FL_FAILED;
- goto out_failed;
- }
+ if (event->tep->long_size == 8 && ls == 1 &&
+ sizeof(long) != 8) {
+ char *p;
+
+ /* make %l into %ll */
+ if (ls == 1 && (p = strchr(print_format, 'l')))
+ memmove(p+1, p, strlen(p)+1);
+ ls = 2;
+ }
+ if (ls < -2 || ls > 2) {
+ do_warning_event(event, "bad count (%d)", ls);
+ event->flags |= TEP_EVENT_FL_FAILED;
+ }
+ parse_arg_add(parse, print_format,
+ PRINT_FMT_ARG_DIGIT, *arg, len_arg, ls);
+ *arg = (*arg)->next;
+ ret++;
+ return ret;
+ case 's':
+ if (!*arg) {
+ do_warning_event(event, "no matching argument");
+ event->flags |= TEP_EVENT_FL_FAILED;
+ goto out_failed;
+ }
- len = ((unsigned long)ptr + 1) -
- (unsigned long)saveptr;
+ len = ((unsigned long)format + 1) -
+ (unsigned long)start;
- /* should never happen */
- if (len > 31) {
- do_warning_event(event, "bad format!");
- event->flags |= TEP_EVENT_FL_FAILED;
- len = 31;
- }
+ /* should never happen */
+ if (len > 31) {
+ do_warning_event(event, "bad format!");
+ event->flags |= TEP_EVENT_FL_FAILED;
+ len = 31;
+ }
+
+ memcpy(print_format, start, len);
+ print_format[len] = 0;
+
+ parse_arg_add(parse, print_format,
+ PRINT_FMT_ARG_STRING, *arg, len_arg, 0);
+ *arg = (*arg)->next;
+ ret++;
+ return ret;
+ default:
+ snprintf(print_format, 32, ">%c<", *format);
+ parse_arg_add(parse, print_format,
+ PRINT_FMT_STRING, NULL, NULL, 0);
+ ret++;
+ return ret;
+ }
+ ret++;
+ }
+
+out_failed:
+ return ret;
- memcpy(format, saveptr, len);
- format[len] = 0;
- if (!len_as_arg)
- len_arg = -1;
- /* Use helper trace_seq */
- trace_seq_init(&p);
- print_str_arg(&p, data, size, event,
- format, len_arg, arg);
- trace_seq_terminate(&p);
- trace_seq_puts(s, p.buffer);
- trace_seq_destroy(&p);
- arg = arg->next;
+}
+
+static int parse_arg_string(struct tep_print_parse **parse, const char *format)
+{
+ struct trace_seq s;
+ int ret = 0;
+
+ trace_seq_init(&s);
+ for (; *format; format++) {
+ if (*format == '\\') {
+ format++;
+ ret++;
+ switch (*format) {
+ case 'n':
+ trace_seq_putc(&s, '\n');
+ break;
+ case 't':
+ trace_seq_putc(&s, '\t');
+ break;
+ case 'r':
+ trace_seq_putc(&s, '\r');
+ break;
+ case '\\':
+ trace_seq_putc(&s, '\\');
break;
default:
- trace_seq_printf(s, ">%c<", *ptr);
-
+ trace_seq_putc(&s, *format);
+ break;
}
+ } else if (*format == '%') {
+ if (*(format + 1) == '%') {
+ trace_seq_putc(&s, '%');
+ format++;
+ ret++;
+ } else
+ break;
} else
- trace_seq_putc(s, *ptr);
+ trace_seq_putc(&s, *format);
+
+ ret++;
+ }
+ trace_seq_terminate(&s);
+ parse_arg_add(parse, s.buffer, PRINT_FMT_STRING, NULL, NULL, 0);
+ trace_seq_destroy(&s);
+
+ return ret;
+}
+
+static struct tep_print_parse *
+parse_args(struct tep_event *event, const char *format, struct tep_print_arg *arg)
+{
+ struct tep_print_parse *parse_ret = NULL;
+ struct tep_print_parse **parse = NULL;
+ int ret;
+ int len;
+
+ len = strlen(format);
+ while (*format) {
+ if (!parse_ret)
+ parse = &parse_ret;
+ if (*format == '%' && *(format + 1) != '%')
+ ret = parse_arg_format(parse, event, format, &arg);
+ else
+ ret = parse_arg_string(parse, format);
+ if (*parse)
+ parse = &((*parse)->next);
+
+ len -= ret;
+ if (len > 0)
+ format += ret;
+ else
+ break;
+ }
+ return parse_ret;
+}
+
+static void print_event_cache(struct tep_print_parse *parse, struct trace_seq *s,
+ void *data, int size, struct tep_event *event)
+{
+ int len_arg;
+
+ while (parse) {
+ if (parse->len_as_arg)
+ len_arg = eval_num_arg(data, size, event, parse->len_as_arg);
+ switch (parse->type) {
+ case PRINT_FMT_ARG_DIGIT:
+ print_arg_number(s, parse->format,
+ parse->len_as_arg ? len_arg : -1, data,
+ size, parse->ls, event, parse->arg);
+ break;
+ case PRINT_FMT_ARG_POINTER:
+ print_arg_pointer(s, parse->format,
+ parse->len_as_arg ? len_arg : 1,
+ data, size, event, parse->arg);
+ break;
+ case PRINT_FMT_ARG_STRING:
+ print_arg_string(s, parse->format,
+ parse->len_as_arg ? len_arg : -1,
+ data, size, event, parse->arg);
+ break;
+ case PRINT_FMT_STRING:
+ default:
+ trace_seq_printf(s, "%s", parse->format);
+ break;
+ }
+ parse = parse->next;
}
+}
+
+static void pretty_print(struct trace_seq *s, void *data, int size, struct tep_event *event)
+{
+ struct tep_print_parse *parse = event->print_fmt.print_cache;
+ struct tep_print_arg *args = NULL;
+ char *bprint_fmt = NULL;
if (event->flags & TEP_EVENT_FL_FAILED) {
-out_failed:
trace_seq_printf(s, "[FAILED TO PARSE]");
+ tep_print_fields(s, data, size, event);
+ return;
}
- if (args) {
+ if (event->flags & TEP_EVENT_FL_ISBPRINT) {
+ bprint_fmt = get_bprint_format(data, size, event);
+ args = make_bprint_args(bprint_fmt, data, size, event);
+ parse = parse_args(event, bprint_fmt, args);
+ }
+
+ print_event_cache(parse, s, data, size, event);
+
+ if (event->flags & TEP_EVENT_FL_ISBPRINT) {
+ free_parse_args(parse);
free_args(args);
free(bprint_fmt);
}
@@ -6363,9 +6852,13 @@ enum tep_errno __tep_parse_format(struct tep_event **eventp,
*list = arg;
list = &arg->next;
}
- return 0;
}
+ if (!(event->flags & TEP_EVENT_FL_ISBPRINT))
+ event->print_fmt.print_cache = parse_args(event,
+ event->print_fmt.format,
+ event->print_fmt.args);
+
return 0;
event_parse_failed:
@@ -7032,7 +7525,7 @@ void tep_free_event(struct tep_event *event)
free(event->print_fmt.format);
free_args(event->print_fmt.args);
-
+ free_parse_args(event->print_fmt.print_cache);
free(event);
}
@@ -7120,6 +7613,7 @@ void tep_free(struct tep_handle *tep)
free(tep->events);
free(tep->sort_events);
free(tep->func_resolver);
+ tep_free_plugin_paths(tep);
free(tep);
}
diff --git a/tools/lib/traceevent/event-parse.h b/tools/lib/traceevent/event-parse.h
index b77837f75a0d..c29b693e31ee 100644
--- a/tools/lib/traceevent/event-parse.h
+++ b/tools/lib/traceevent/event-parse.h
@@ -1,21 +1,7 @@
+/* SPDX-License-Identifier: LGPL-2.1 */
/*
* Copyright (C) 2009, 2010 Red Hat Inc, Steven Rostedt <srostedt@redhat.com>
*
- * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation;
- * version 2.1 of the License (not later!)
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this program; if not, see <http://www.gnu.org/licenses>
- *
- * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
*/
#ifndef _PARSE_EVENTS_H
#define _PARSE_EVENTS_H
@@ -272,9 +258,12 @@ struct tep_print_arg {
};
};
+struct tep_print_parse;
+
struct tep_print_fmt {
char *format;
struct tep_print_arg *args;
+ struct tep_print_parse *print_cache;
};
struct tep_event {
@@ -379,7 +368,7 @@ enum tep_errno {
* errno since SUS requires the errno has distinct positive values.
* See 'Issue 6' in the link below.
*
- * http://pubs.opengroup.org/onlinepubs/9699919799/basedefs/errno.h.html
+ * https://pubs.opengroup.org/onlinepubs/9699919799/basedefs/errno.h.html
*/
__TEP_ERRNO__START = -100000,
@@ -393,14 +382,29 @@ struct tep_plugin_list;
#define INVALID_PLUGIN_LIST_OPTION ((char **)((unsigned long)-1))
+enum tep_plugin_load_priority {
+ TEP_PLUGIN_FIRST,
+ TEP_PLUGIN_LAST,
+};
+
+int tep_add_plugin_path(struct tep_handle *tep, char *path,
+ enum tep_plugin_load_priority prio);
struct tep_plugin_list *tep_load_plugins(struct tep_handle *tep);
void tep_unload_plugins(struct tep_plugin_list *plugin_list,
struct tep_handle *tep);
+void tep_load_plugins_hook(struct tep_handle *tep, const char *suffix,
+ void (*load_plugin)(struct tep_handle *tep,
+ const char *path,
+ const char *name,
+ void *data),
+ void *data);
char **tep_plugin_list_options(void);
void tep_plugin_free_options_list(char **list);
int tep_plugin_add_options(const char *name,
struct tep_plugin_option *options);
+int tep_plugin_add_option(const char *name, const char *val);
void tep_plugin_remove_options(struct tep_plugin_option *options);
+void tep_plugin_print_options(struct trace_seq *s);
void tep_print_plugins(struct trace_seq *s,
const char *prefix, const char *suffix,
const struct tep_plugin_list *list);
diff --git a/tools/lib/traceevent/event-plugin.c b/tools/lib/traceevent/event-plugin.c
index e1f7ddd5a6cf..e7c2acb8680f 100644
--- a/tools/lib/traceevent/event-plugin.c
+++ b/tools/lib/traceevent/event-plugin.c
@@ -13,6 +13,7 @@
#include <sys/stat.h>
#include <unistd.h>
#include <dirent.h>
+#include <errno.h>
#include "event-parse.h"
#include "event-parse-local.h"
#include "event-utils.h"
@@ -38,6 +39,12 @@ struct tep_plugin_list {
void *handle;
};
+struct tep_plugins_dir {
+ struct tep_plugins_dir *next;
+ char *path;
+ enum tep_plugin_load_priority prio;
+};
+
static void lower_case(char *str)
{
if (!str)
@@ -247,6 +254,170 @@ void tep_plugin_remove_options(struct tep_plugin_option *options)
}
}
+static int parse_option_name(char **option, char **plugin)
+{
+ char *p;
+
+ *plugin = NULL;
+
+ if ((p = strstr(*option, ":"))) {
+ *plugin = *option;
+ *p = '\0';
+ *option = strdup(p + 1);
+ if (!*option)
+ return -1;
+ }
+ return 0;
+}
+
+static struct tep_plugin_option *
+find_registered_option(const char *plugin, const char *option)
+{
+ struct registered_plugin_options *reg;
+ struct tep_plugin_option *op;
+ const char *op_plugin;
+
+ for (reg = registered_options; reg; reg = reg->next) {
+ for (op = reg->options; op->name; op++) {
+ if (op->plugin_alias)
+ op_plugin = op->plugin_alias;
+ else
+ op_plugin = op->file;
+
+ if (plugin && strcmp(plugin, op_plugin) != 0)
+ continue;
+ if (strcmp(option, op->name) != 0)
+ continue;
+
+ return op;
+ }
+ }
+
+ return NULL;
+}
+
+static int process_option(const char *plugin, const char *option, const char *val)
+{
+ struct tep_plugin_option *op;
+
+ op = find_registered_option(plugin, option);
+ if (!op)
+ return 0;
+
+ return update_option_value(op, val);
+}
+
+/**
+ * tep_plugin_add_option - add an option/val pair to set plugin options
+ * @name: The name of the option (format: <plugin>:<option> or just <option>)
+ * @val: (optional) the value for the option
+ *
+ * Modify a plugin option. If @val is given than the value of the option
+ * is set (note, some options just take a boolean, so @val must be either
+ * "1" or "0" or "true" or "false").
+ */
+int tep_plugin_add_option(const char *name, const char *val)
+{
+ struct trace_plugin_options *op;
+ char *option_str;
+ char *plugin;
+
+ option_str = strdup(name);
+ if (!option_str)
+ return -ENOMEM;
+
+ if (parse_option_name(&option_str, &plugin) < 0)
+ return -ENOMEM;
+
+ /* If the option exists, update the val */
+ for (op = trace_plugin_options; op; op = op->next) {
+ /* Both must be NULL or not NULL */
+ if ((!plugin || !op->plugin) && plugin != op->plugin)
+ continue;
+ if (plugin && strcmp(plugin, op->plugin) != 0)
+ continue;
+ if (strcmp(op->option, option_str) != 0)
+ continue;
+
+ /* update option */
+ free(op->value);
+ if (val) {
+ op->value = strdup(val);
+ if (!op->value)
+ goto out_free;
+ } else
+ op->value = NULL;
+
+ /* plugin and option_str don't get freed at the end */
+ free(plugin);
+ free(option_str);
+
+ plugin = op->plugin;
+ option_str = op->option;
+ break;
+ }
+
+ /* If not found, create */
+ if (!op) {
+ op = malloc(sizeof(*op));
+ if (!op)
+ goto out_free;
+ memset(op, 0, sizeof(*op));
+ op->plugin = plugin;
+ op->option = option_str;
+ if (val) {
+ op->value = strdup(val);
+ if (!op->value) {
+ free(op);
+ goto out_free;
+ }
+ }
+ op->next = trace_plugin_options;
+ trace_plugin_options = op;
+ }
+
+ return process_option(plugin, option_str, val);
+
+out_free:
+ free(plugin);
+ free(option_str);
+ return -ENOMEM;
+}
+
+static void print_op_data(struct trace_seq *s, const char *name,
+ const char *op)
+{
+ if (op)
+ trace_seq_printf(s, "%8s:\t%s\n", name, op);
+}
+
+/**
+ * tep_plugin_print_options - print out the registered plugin options
+ * @s: The trace_seq descriptor to write the plugin options into
+ *
+ * Writes a list of options into trace_seq @s.
+ */
+void tep_plugin_print_options(struct trace_seq *s)
+{
+ struct registered_plugin_options *reg;
+ struct tep_plugin_option *op;
+
+ for (reg = registered_options; reg; reg = reg->next) {
+ if (reg != registered_options)
+ trace_seq_printf(s, "============\n");
+ for (op = reg->options; op->name; op++) {
+ if (op != reg->options)
+ trace_seq_printf(s, "------------\n");
+ print_op_data(s, "file", op->file);
+ print_op_data(s, "plugin", op->plugin_alias);
+ print_op_data(s, "option", op->name);
+ print_op_data(s, "desc", op->description);
+ print_op_data(s, "value", op->value);
+ trace_seq_printf(s, "%8s:\t%d\n", "set", op->set);
+ }
+ }
+}
+
/**
* tep_print_plugins - print out the list of plugins loaded
* @s: the trace_seq descripter to write to
@@ -273,6 +444,7 @@ load_plugin(struct tep_handle *tep, const char *path,
const char *file, void *data)
{
struct tep_plugin_list **plugin_list = data;
+ struct tep_plugin_option *options;
tep_plugin_load_func func;
struct tep_plugin_list *list;
const char *alias;
@@ -297,6 +469,16 @@ load_plugin(struct tep_handle *tep, const char *path,
if (!alias)
alias = file;
+ options = dlsym(handle, TEP_PLUGIN_OPTIONS_NAME);
+ if (options) {
+ while (options->name) {
+ ret = update_option(alias, options);
+ if (ret < 0)
+ goto out_free;
+ options++;
+ }
+ }
+
func = dlsym(handle, TEP_PLUGIN_LOADER_NAME);
if (!func) {
warning("could not find func '%s' in plugin '%s'\n%s\n",
@@ -365,28 +547,53 @@ load_plugins_dir(struct tep_handle *tep, const char *suffix,
closedir(dir);
}
-static void
-load_plugins(struct tep_handle *tep, const char *suffix,
- void (*load_plugin)(struct tep_handle *tep,
- const char *path,
- const char *name,
- void *data),
- void *data)
+/**
+ * tep_load_plugins_hook - call a user specified callback to load a plugin
+ * @tep: handler to traceevent context
+ * @suffix: filter only plugin files with given suffix
+ * @load_plugin: user specified callback, called for each plugin file
+ * @data: custom context, passed to @load_plugin
+ *
+ * Searches for traceevent plugin files and calls @load_plugin for each
+ * The order of plugins search is:
+ * - Directories, specified in @tep->plugins_dir and priority TEP_PLUGIN_FIRST
+ * - Directory, specified at compile time with PLUGIN_TRACEEVENT_DIR
+ * - Directory, specified by environment variable TRACEEVENT_PLUGIN_DIR
+ * - In user's home: ~/.local/lib/traceevent/plugins/
+ * - Directories, specified in @tep->plugins_dir and priority TEP_PLUGIN_LAST
+ *
+ */
+void tep_load_plugins_hook(struct tep_handle *tep, const char *suffix,
+ void (*load_plugin)(struct tep_handle *tep,
+ const char *path,
+ const char *name,
+ void *data),
+ void *data)
{
+ struct tep_plugins_dir *dir = NULL;
char *home;
char *path;
char *envdir;
int ret;
- if (tep->flags & TEP_DISABLE_PLUGINS)
+ if (tep && tep->flags & TEP_DISABLE_PLUGINS)
return;
+ if (tep)
+ dir = tep->plugins_dir;
+ while (dir) {
+ if (dir->prio == TEP_PLUGIN_FIRST)
+ load_plugins_dir(tep, suffix, dir->path,
+ load_plugin, data);
+ dir = dir->next;
+ }
+
/*
* If a system plugin directory was defined,
* check that first.
*/
#ifdef PLUGIN_DIR
- if (!(tep->flags & TEP_DISABLE_SYS_PLUGINS))
+ if (!tep || !(tep->flags & TEP_DISABLE_SYS_PLUGINS))
load_plugins_dir(tep, suffix, PLUGIN_DIR,
load_plugin, data);
#endif
@@ -415,6 +622,15 @@ load_plugins(struct tep_handle *tep, const char *suffix,
load_plugins_dir(tep, suffix, path, load_plugin, data);
+ if (tep)
+ dir = tep->plugins_dir;
+ while (dir) {
+ if (dir->prio == TEP_PLUGIN_LAST)
+ load_plugins_dir(tep, suffix, dir->path,
+ load_plugin, data);
+ dir = dir->next;
+ }
+
free(path);
}
@@ -423,10 +639,59 @@ tep_load_plugins(struct tep_handle *tep)
{
struct tep_plugin_list *list = NULL;
- load_plugins(tep, ".so", load_plugin, &list);
+ tep_load_plugins_hook(tep, ".so", load_plugin, &list);
return list;
}
+/**
+ * tep_add_plugin_path - Add a new plugin directory.
+ * @tep: Trace event handler.
+ * @path: Path to a directory. All plugin files in that
+ * directory will be loaded.
+ *@prio: Load priority of the plugins in that directory.
+ *
+ * Returns -1 in case of an error, 0 otherwise.
+ */
+int tep_add_plugin_path(struct tep_handle *tep, char *path,
+ enum tep_plugin_load_priority prio)
+{
+ struct tep_plugins_dir *dir;
+
+ if (!tep || !path)
+ return -1;
+
+ dir = calloc(1, sizeof(*dir));
+ if (!dir)
+ return -1;
+
+ dir->path = strdup(path);
+ if (!dir->path) {
+ free(dir);
+ return -1;
+ }
+ dir->prio = prio;
+ dir->next = tep->plugins_dir;
+ tep->plugins_dir = dir;
+
+ return 0;
+}
+
+void tep_free_plugin_paths(struct tep_handle *tep)
+{
+ struct tep_plugins_dir *dir;
+
+ if (!tep)
+ return;
+
+ dir = tep->plugins_dir;
+ while (dir) {
+ tep->plugins_dir = tep->plugins_dir->next;
+ free(dir->path);
+ free(dir);
+ dir = tep->plugins_dir;
+ }
+}
+
void
tep_unload_plugins(struct tep_plugin_list *plugin_list, struct tep_handle *tep)
{
diff --git a/tools/lib/traceevent/kbuffer.h b/tools/lib/traceevent/kbuffer.h
index 5fa8292e341b..a2b522093cfd 100644
--- a/tools/lib/traceevent/kbuffer.h
+++ b/tools/lib/traceevent/kbuffer.h
@@ -1,22 +1,7 @@
+/* SPDX-License-Identifier: LGPL-2.1 */
/*
* Copyright (C) 2012 Red Hat Inc, Steven Rostedt <srostedt@redhat.com>
*
- * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation;
- * version 2.1 of the License (not later!)
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
*/
#ifndef _KBUFFER_H
#define _KBUFFER_H
diff --git a/tools/lib/traceevent/plugins/Build b/tools/lib/traceevent/plugins/Build
index 210d26910613..dd4da823c38f 100644
--- a/tools/lib/traceevent/plugins/Build
+++ b/tools/lib/traceevent/plugins/Build
@@ -5,6 +5,8 @@ plugin_kvm-y += plugin_kvm.o
plugin_mac80211-y += plugin_mac80211.o
plugin_sched_switch-y += plugin_sched_switch.o
plugin_function-y += plugin_function.o
+plugin_futex-y += plugin_futex.o
plugin_xen-y += plugin_xen.o
plugin_scsi-y += plugin_scsi.o
plugin_cfg80211-y += plugin_cfg80211.o
+plugin_tlb-y += plugin_tlb.o \ No newline at end of file
diff --git a/tools/lib/traceevent/plugins/Makefile b/tools/lib/traceevent/plugins/Makefile
index 680d883efe05..47e802553250 100644
--- a/tools/lib/traceevent/plugins/Makefile
+++ b/tools/lib/traceevent/plugins/Makefile
@@ -134,9 +134,11 @@ PLUGINS += plugin_kvm.so
PLUGINS += plugin_mac80211.so
PLUGINS += plugin_sched_switch.so
PLUGINS += plugin_function.so
+PLUGINS += plugin_futex.so
PLUGINS += plugin_xen.so
PLUGINS += plugin_scsi.so
PLUGINS += plugin_cfg80211.so
+PLUGINS += plugin_tlb.so
PLUGINS := $(addprefix $(OUTPUT),$(PLUGINS))
PLUGINS_IN := $(PLUGINS:.so=-in.o)
diff --git a/tools/lib/traceevent/plugins/plugin_function.c b/tools/lib/traceevent/plugins/plugin_function.c
index 7770fcb78e0f..807b16e1bf0f 100644
--- a/tools/lib/traceevent/plugins/plugin_function.c
+++ b/tools/lib/traceevent/plugins/plugin_function.c
@@ -1,21 +1,6 @@
+// SPDX-License-Identifier: LGPL-2.1
/*
* Copyright (C) 2009, 2010 Red Hat Inc, Steven Rostedt <srostedt@redhat.com>
- *
- * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation;
- * version 2.1 of the License (not later!)
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this program; if not, see <http://www.gnu.org/licenses>
- *
- * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
*/
#include <stdio.h>
#include <stdlib.h>
@@ -50,12 +35,20 @@ struct tep_plugin_option plugin_options[] =
.set = 1,
},
{
+ .name = "offset",
+ .plugin_alias = "ftrace",
+ .description =
+ "Show function names as well as their offsets",
+ .set = 0,
+ },
+ {
.name = NULL,
}
};
static struct tep_plugin_option *ftrace_parent = &plugin_options[0];
static struct tep_plugin_option *ftrace_indent = &plugin_options[1];
+static struct tep_plugin_option *ftrace_offset = &plugin_options[2];
static void add_child(struct func_stack *stack, const char *child, int pos)
{
@@ -123,6 +116,18 @@ static int add_and_get_index(const char *parent, const char *child, int cpu)
return 0;
}
+static void show_function(struct trace_seq *s, struct tep_handle *tep,
+ const char *func, unsigned long long function)
+{
+ unsigned long long offset;
+
+ trace_seq_printf(s, "%s", func);
+ if (ftrace_offset->set) {
+ offset = tep_find_function_address(tep, function);
+ trace_seq_printf(s, "+0x%x ", (int)(function - offset));
+ }
+}
+
static int function_handler(struct trace_seq *s, struct tep_record *record,
struct tep_event *event, void *context)
{
@@ -149,14 +154,14 @@ static int function_handler(struct trace_seq *s, struct tep_record *record,
trace_seq_printf(s, "%*s", index*3, "");
if (func)
- trace_seq_printf(s, "%s", func);
+ show_function(s, tep, func, function);
else
trace_seq_printf(s, "0x%llx", function);
if (ftrace_parent->set) {
trace_seq_printf(s, " <-- ");
if (parent)
- trace_seq_printf(s, "%s", parent);
+ show_function(s, tep, parent, pfunction);
else
trace_seq_printf(s, "0x%llx", pfunction);
}
@@ -164,11 +169,93 @@ static int function_handler(struct trace_seq *s, struct tep_record *record,
return 0;
}
+static int
+trace_stack_handler(struct trace_seq *s, struct tep_record *record,
+ struct tep_event *event, void *context)
+{
+ struct tep_format_field *field;
+ unsigned long long addr;
+ const char *func;
+ int long_size;
+ void *data = record->data;
+
+ field = tep_find_any_field(event, "caller");
+ if (!field) {
+ trace_seq_printf(s, "<CANT FIND FIELD %s>", "caller");
+ return 0;
+ }
+
+ trace_seq_puts(s, "<stack trace >\n");
+
+ long_size = tep_get_long_size(event->tep);
+
+ for (data += field->offset; data < record->data + record->size;
+ data += long_size) {
+ addr = tep_read_number(event->tep, data, long_size);
+
+ if ((long_size == 8 && addr == (unsigned long long)-1) ||
+ ((int)addr == -1))
+ break;
+
+ func = tep_find_function(event->tep, addr);
+ if (func)
+ trace_seq_printf(s, "=> %s (%llx)\n", func, addr);
+ else
+ trace_seq_printf(s, "=> %llx\n", addr);
+ }
+
+ return 0;
+}
+
+static int
+trace_raw_data_handler(struct trace_seq *s, struct tep_record *record,
+ struct tep_event *event, void *context)
+{
+ struct tep_format_field *field;
+ unsigned long long id;
+ int long_size;
+ void *data = record->data;
+
+ if (tep_get_field_val(s, event, "id", record, &id, 1))
+ return trace_seq_putc(s, '!');
+
+ trace_seq_printf(s, "# %llx", id);
+
+ field = tep_find_any_field(event, "buf");
+ if (!field) {
+ trace_seq_printf(s, "<CANT FIND FIELD %s>", "buf");
+ return 0;
+ }
+
+ long_size = tep_get_long_size(event->tep);
+
+ for (data += field->offset; data < record->data + record->size;
+ data += long_size) {
+ int size = sizeof(long);
+ int left = (record->data + record->size) - data;
+ int i;
+
+ if (size > left)
+ size = left;
+
+ for (i = 0; i < size; i++)
+ trace_seq_printf(s, " %02x", *(unsigned char *)(data + i));
+ }
+
+ return 0;
+}
+
int TEP_PLUGIN_LOADER(struct tep_handle *tep)
{
tep_register_event_handler(tep, -1, "ftrace", "function",
function_handler, NULL);
+ tep_register_event_handler(tep, -1, "ftrace", "kernel_stack",
+ trace_stack_handler, NULL);
+
+ tep_register_event_handler(tep, -1, "ftrace", "raw_data",
+ trace_raw_data_handler, NULL);
+
tep_plugin_add_options("ftrace", plugin_options);
return 0;
diff --git a/tools/lib/traceevent/plugins/plugin_futex.c b/tools/lib/traceevent/plugins/plugin_futex.c
new file mode 100644
index 000000000000..eb7c9f8a850a
--- /dev/null
+++ b/tools/lib/traceevent/plugins/plugin_futex.c
@@ -0,0 +1,123 @@
+// SPDX-License-Identifier: LGPL-2.1
+/*
+ * Copyright (C) 2017 National Instruments Corp.
+ *
+ * Author: Julia Cartwright <julia@ni.com>
+ *
+ */
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <linux/futex.h>
+
+#include "event-parse.h"
+
+#define ARRAY_SIZE(_a) (sizeof(_a) / sizeof((_a)[0]))
+
+struct futex_args {
+ unsigned long long uaddr;
+ unsigned long long op;
+ unsigned long long val;
+ unsigned long long utime; /* or val2 */
+ unsigned long long uaddr2;
+ unsigned long long val3;
+};
+
+struct futex_op {
+ const char *name;
+ const char *fmt_val;
+ const char *fmt_utime;
+ const char *fmt_uaddr2;
+ const char *fmt_val3;
+};
+
+static const struct futex_op futex_op_tbl[] = {
+ { "FUTEX_WAIT", " val=0x%08llx", " utime=0x%08llx", NULL, NULL },
+ { "FUTEX_WAKE", " val=%llu", NULL, NULL, NULL },
+ { "FUTEX_FD", " val=%llu", NULL, NULL, NULL },
+ { "FUTEX_REQUEUE", " val=%llu", " val2=%llu", " uaddr2=0x%08llx", NULL },
+ { "FUTEX_CMP_REQUEUE", " val=%llu", " val2=%llu", " uaddr2=0x%08llx", " val3=0x%08llx" },
+ { "FUTEX_WAKE_OP", " val=%llu", " val2=%llu", " uaddr2=0x%08llx", " val3=0x%08llx" },
+ { "FUTEX_LOCK_PI", NULL, " utime=0x%08llx", NULL, NULL },
+ { "FUTEX_UNLOCK_PI", NULL, NULL, NULL, NULL },
+ { "FUTEX_TRYLOCK_PI", NULL, NULL, NULL, NULL },
+ { "FUTEX_WAIT_BITSET", " val=0x%08llx", " utime=0x%08llx", NULL, " val3=0x%08llx" },
+ { "FUTEX_WAKE_BITSET", " val=%llu", NULL, NULL, " val3=0x%08llx" },
+ { "FUTEX_WAIT_REQUEUE_PI", " val=0x%08llx", " utime=0x%08llx", " uaddr2=0x%08llx", " val3=0x%08llx" },
+ { "FUTEX_CMP_REQUEUE_PI", " val=%llu", " val2=%llu", " uaddr2=0x%08llx", " val3=0x%08llx" },
+};
+
+
+static void futex_print(struct trace_seq *s, const struct futex_args *args,
+ const struct futex_op *fop)
+{
+ trace_seq_printf(s, " uaddr=0x%08llx", args->uaddr);
+
+ if (fop->fmt_val)
+ trace_seq_printf(s, fop->fmt_val, args->val);
+
+ if (fop->fmt_utime)
+ trace_seq_printf(s,fop->fmt_utime, args->utime);
+
+ if (fop->fmt_uaddr2)
+ trace_seq_printf(s, fop->fmt_uaddr2, args->uaddr2);
+
+ if (fop->fmt_val3)
+ trace_seq_printf(s, fop->fmt_val3, args->val3);
+}
+
+static int futex_handler(struct trace_seq *s, struct tep_record *record,
+ struct tep_event *event, void *context)
+{
+ const struct futex_op *fop;
+ struct futex_args args;
+ unsigned long long cmd;
+
+ if (tep_get_field_val(s, event, "uaddr", record, &args.uaddr, 1))
+ return 1;
+
+ if (tep_get_field_val(s, event, "op", record, &args.op, 1))
+ return 1;
+
+ if (tep_get_field_val(s, event, "val", record, &args.val, 1))
+ return 1;
+
+ if (tep_get_field_val(s, event, "utime", record, &args.utime, 1))
+ return 1;
+
+ if (tep_get_field_val(s, event, "uaddr2", record, &args.uaddr2, 1))
+ return 1;
+
+ if (tep_get_field_val(s, event, "val3", record, &args.val3, 1))
+ return 1;
+
+ cmd = args.op & FUTEX_CMD_MASK;
+ if (cmd >= ARRAY_SIZE(futex_op_tbl))
+ return 1;
+
+ fop = &futex_op_tbl[cmd];
+
+ trace_seq_printf(s, "op=%s", fop->name);
+
+ if (args.op & FUTEX_PRIVATE_FLAG)
+ trace_seq_puts(s, "|FUTEX_PRIVATE_FLAG");
+
+ if (args.op & FUTEX_CLOCK_REALTIME)
+ trace_seq_puts(s, "|FUTEX_CLOCK_REALTIME");
+
+ futex_print(s, &args, fop);
+ return 0;
+}
+
+int TEP_PLUGIN_LOADER(struct tep_handle *tep)
+{
+ tep_register_event_handler(tep, -1, "syscalls", "sys_enter_futex",
+ futex_handler, NULL);
+ return 0;
+}
+
+void TEP_PLUGIN_UNLOADER(struct tep_handle *tep)
+{
+ tep_unregister_event_handler(tep, -1, "syscalls", "sys_enter_futex",
+ futex_handler, NULL);
+}
diff --git a/tools/lib/traceevent/plugins/plugin_hrtimer.c b/tools/lib/traceevent/plugins/plugin_hrtimer.c
index bb434e0ed03a..d98466788f14 100644
--- a/tools/lib/traceevent/plugins/plugin_hrtimer.c
+++ b/tools/lib/traceevent/plugins/plugin_hrtimer.c
@@ -1,22 +1,7 @@
+// SPDX-License-Identifier: LGPL-2.1
/*
* Copyright (C) 2009 Red Hat Inc, Steven Rostedt <srostedt@redhat.com>
* Copyright (C) 2009 Johannes Berg <johannes@sipsolutions.net>
- *
- * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation;
- * version 2.1 of the License (not later!)
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this program; if not, see <http://www.gnu.org/licenses>
- *
- * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
*/
#include <stdio.h>
#include <stdlib.h>
diff --git a/tools/lib/traceevent/plugins/plugin_jbd2.c b/tools/lib/traceevent/plugins/plugin_jbd2.c
index 04fc125f38cb..69111a68d3cf 100644
--- a/tools/lib/traceevent/plugins/plugin_jbd2.c
+++ b/tools/lib/traceevent/plugins/plugin_jbd2.c
@@ -1,21 +1,6 @@
+// SPDX-License-Identifier: LGPL-2.1
/*
* Copyright (C) 2010 Red Hat Inc, Steven Rostedt <srostedt@redhat.com>
- *
- * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation;
- * version 2.1 of the License (not later!)
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this program; if not, see <http://www.gnu.org/licenses>
- *
- * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
*/
#include <stdio.h>
#include <stdlib.h>
diff --git a/tools/lib/traceevent/plugins/plugin_kmem.c b/tools/lib/traceevent/plugins/plugin_kmem.c
index edaec5d962c3..4b4f7f9616e3 100644
--- a/tools/lib/traceevent/plugins/plugin_kmem.c
+++ b/tools/lib/traceevent/plugins/plugin_kmem.c
@@ -1,21 +1,6 @@
+// SPDX-License-Identifier: LGPL-2.1
/*
* Copyright (C) 2009 Red Hat Inc, Steven Rostedt <srostedt@redhat.com>
- *
- * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation;
- * version 2.1 of the License (not later!)
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this program; if not, see <http://www.gnu.org/licenses>
- *
- * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
*/
#include <stdio.h>
#include <stdlib.h>
diff --git a/tools/lib/traceevent/plugins/plugin_kvm.c b/tools/lib/traceevent/plugins/plugin_kvm.c
index c8e623065a7e..51ceeb9147eb 100644
--- a/tools/lib/traceevent/plugins/plugin_kvm.c
+++ b/tools/lib/traceevent/plugins/plugin_kvm.c
@@ -1,21 +1,6 @@
+// SPDX-License-Identifier: LGPL-2.1
/*
* Copyright (C) 2009 Red Hat Inc, Steven Rostedt <srostedt@redhat.com>
- *
- * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation;
- * version 2.1 of the License (not later!)
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this program; if not, see <http://www.gnu.org/licenses>
- *
- * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
*/
#include <stdio.h>
#include <stdlib.h>
@@ -155,7 +140,23 @@ static const char *disassemble(unsigned char *insn, int len, uint64_t rip,
_ER(EXIT_WRITE_DR5, 0x035) \
_ER(EXIT_WRITE_DR6, 0x036) \
_ER(EXIT_WRITE_DR7, 0x037) \
- _ER(EXIT_EXCP_BASE, 0x040) \
+ _ER(EXIT_EXCP_DE, 0x040) \
+ _ER(EXIT_EXCP_DB, 0x041) \
+ _ER(EXIT_EXCP_BP, 0x043) \
+ _ER(EXIT_EXCP_OF, 0x044) \
+ _ER(EXIT_EXCP_BR, 0x045) \
+ _ER(EXIT_EXCP_UD, 0x046) \
+ _ER(EXIT_EXCP_NM, 0x047) \
+ _ER(EXIT_EXCP_DF, 0x048) \
+ _ER(EXIT_EXCP_TS, 0x04a) \
+ _ER(EXIT_EXCP_NP, 0x04b) \
+ _ER(EXIT_EXCP_SS, 0x04c) \
+ _ER(EXIT_EXCP_GP, 0x04d) \
+ _ER(EXIT_EXCP_PF, 0x04e) \
+ _ER(EXIT_EXCP_MF, 0x050) \
+ _ER(EXIT_EXCP_AC, 0x051) \
+ _ER(EXIT_EXCP_MC, 0x052) \
+ _ER(EXIT_EXCP_XF, 0x053) \
_ER(EXIT_INTR, 0x060) \
_ER(EXIT_NMI, 0x061) \
_ER(EXIT_SMI, 0x062) \
@@ -201,7 +202,10 @@ static const char *disassemble(unsigned char *insn, int len, uint64_t rip,
_ER(EXIT_MONITOR, 0x08a) \
_ER(EXIT_MWAIT, 0x08b) \
_ER(EXIT_MWAIT_COND, 0x08c) \
- _ER(EXIT_NPF, 0x400) \
+ _ER(EXIT_XSETBV, 0x08d) \
+ _ER(EXIT_NPF, 0x400) \
+ _ER(EXIT_AVIC_INCOMPLETE_IPI, 0x401) \
+ _ER(EXIT_AVIC_UNACCELERATED_ACCESS, 0x402) \
_ER(EXIT_ERR, -1)
#define _ER(reason, val) { #reason, val },
@@ -241,7 +245,7 @@ static const char *find_exit_reason(unsigned isa, int val)
}
if (!strings)
return "UNKNOWN-ISA";
- for (i = 0; strings[i].val >= 0; i++)
+ for (i = 0; strings[i].str; i++)
if (strings[i].val == val)
break;
diff --git a/tools/lib/traceevent/plugins/plugin_mac80211.c b/tools/lib/traceevent/plugins/plugin_mac80211.c
index 884303c26b5c..f48071e3cfb8 100644
--- a/tools/lib/traceevent/plugins/plugin_mac80211.c
+++ b/tools/lib/traceevent/plugins/plugin_mac80211.c
@@ -1,21 +1,6 @@
+// SPDX-License-Identifier: LGPL-2.1
/*
* Copyright (C) 2009 Johannes Berg <johannes@sipsolutions.net>
- *
- * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation;
- * version 2.1 of the License (not later!)
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this program; if not, see <http://www.gnu.org/licenses>
- *
- * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
*/
#include <stdio.h>
#include <stdlib.h>
diff --git a/tools/lib/traceevent/plugins/plugin_sched_switch.c b/tools/lib/traceevent/plugins/plugin_sched_switch.c
index 957389a0ff7a..e12fa103820a 100644
--- a/tools/lib/traceevent/plugins/plugin_sched_switch.c
+++ b/tools/lib/traceevent/plugins/plugin_sched_switch.c
@@ -1,21 +1,6 @@
+// SPDX-License-Identifier: LGPL-2.1
/*
* Copyright (C) 2009, 2010 Red Hat Inc, Steven Rostedt <srostedt@redhat.com>
- *
- * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation;
- * version 2.1 of the License (not later!)
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this program; if not, see <http://www.gnu.org/licenses>
- *
- * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
*/
#include <stdio.h>
#include <stdlib.h>
diff --git a/tools/lib/traceevent/plugins/plugin_tlb.c b/tools/lib/traceevent/plugins/plugin_tlb.c
new file mode 100644
index 000000000000..43657fb60504
--- /dev/null
+++ b/tools/lib/traceevent/plugins/plugin_tlb.c
@@ -0,0 +1,66 @@
+// SPDX-License-Identifier: LGPL-2.1
+/*
+ * Copyright (C) 2015 Red Hat Inc, Steven Rostedt <srostedt@redhat.com>
+ */
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "event-parse.h"
+
+enum tlb_flush_reason {
+ TLB_FLUSH_ON_TASK_SWITCH,
+ TLB_REMOTE_SHOOTDOWN,
+ TLB_LOCAL_SHOOTDOWN,
+ TLB_LOCAL_MM_SHOOTDOWN,
+ NR_TLB_FLUSH_REASONS,
+};
+
+static int tlb_flush_handler(struct trace_seq *s, struct tep_record *record,
+ struct tep_event *event, void *context)
+{
+ unsigned long long val;
+
+ trace_seq_printf(s, "pages=");
+
+ tep_print_num_field(s, "%ld", event, "pages", record, 1);
+
+ if (tep_get_field_val(s, event, "reason", record, &val, 1) < 0)
+ return -1;
+
+ trace_seq_puts(s, " reason=");
+
+ switch (val) {
+ case TLB_FLUSH_ON_TASK_SWITCH:
+ trace_seq_puts(s, "flush on task switch");
+ break;
+ case TLB_REMOTE_SHOOTDOWN:
+ trace_seq_puts(s, "remote shootdown");
+ break;
+ case TLB_LOCAL_SHOOTDOWN:
+ trace_seq_puts(s, "local shootdown");
+ break;
+ case TLB_LOCAL_MM_SHOOTDOWN:
+ trace_seq_puts(s, "local mm shootdown");
+ break;
+ }
+
+ trace_seq_printf(s, " (%lld)", val);
+
+ return 0;
+}
+
+int TEP_PLUGIN_LOADER(struct tep_handle *tep)
+{
+ tep_register_event_handler(tep, -1, "tlb", "tlb_flush",
+ tlb_flush_handler, NULL);
+
+ return 0;
+}
+
+void TEP_PLUGIN_UNLOADER(struct tep_handle *tep)
+{
+ tep_unregister_event_handler(tep, -1,
+ "tlb", "tlb_flush",
+ tlb_flush_handler, NULL);
+}
diff --git a/tools/memory-model/Documentation/cheatsheet.txt b/tools/memory-model/Documentation/cheatsheet.txt
index 33ba98d72b16..99d00870b160 100644
--- a/tools/memory-model/Documentation/cheatsheet.txt
+++ b/tools/memory-model/Documentation/cheatsheet.txt
@@ -3,9 +3,9 @@
C Self R W RMW Self R W DR DW RMW SV
-- ---- - - --- ---- - - -- -- --- --
-Store, e.g., WRITE_ONCE() Y Y
-Load, e.g., READ_ONCE() Y Y Y Y
-Unsuccessful RMW operation Y Y Y Y
+Relaxed store Y Y
+Relaxed load Y Y Y Y
+Relaxed RMW operation Y Y Y Y
rcu_dereference() Y Y Y Y
Successful *_acquire() R Y Y Y Y Y Y
Successful *_release() C Y Y Y W Y
@@ -17,14 +17,19 @@ smp_mb__before_atomic() CP Y Y Y a a a a Y
smp_mb__after_atomic() CP a a Y Y Y Y Y Y
-Key: C: Ordering is cumulative
- P: Ordering propagates
- R: Read, for example, READ_ONCE(), or read portion of RMW
- W: Write, for example, WRITE_ONCE(), or write portion of RMW
- Y: Provides ordering
- a: Provides ordering given intervening RMW atomic operation
- DR: Dependent read (address dependency)
- DW: Dependent write (address, data, or control dependency)
- RMW: Atomic read-modify-write operation
- SELF: Orders self, as opposed to accesses before and/or after
- SV: Orders later accesses to the same variable
+Key: Relaxed: A relaxed operation is either READ_ONCE(), WRITE_ONCE(),
+ a *_relaxed() RMW operation, an unsuccessful RMW
+ operation, a non-value-returning RMW operation such
+ as atomic_inc(), or one of the atomic*_read() and
+ atomic*_set() family of operations.
+ C: Ordering is cumulative
+ P: Ordering propagates
+ R: Read, for example, READ_ONCE(), or read portion of RMW
+ W: Write, for example, WRITE_ONCE(), or write portion of RMW
+ Y: Provides ordering
+ a: Provides ordering given intervening RMW atomic operation
+ DR: Dependent read (address dependency)
+ DW: Dependent write (address, data, or control dependency)
+ RMW: Atomic read-modify-write operation
+ SELF: Orders self, as opposed to accesses before and/or after
+ SV: Orders later accesses to the same variable
diff --git a/tools/memory-model/Documentation/litmus-tests.txt b/tools/memory-model/Documentation/litmus-tests.txt
new file mode 100644
index 000000000000..2f840dcd15cf
--- /dev/null
+++ b/tools/memory-model/Documentation/litmus-tests.txt
@@ -0,0 +1,1074 @@
+Linux-Kernel Memory Model Litmus Tests
+======================================
+
+This file describes the LKMM litmus-test format by example, describes
+some tricks and traps, and finally outlines LKMM's limitations. Earlier
+versions of this material appeared in a number of LWN articles, including:
+
+https://lwn.net/Articles/720550/
+ A formal kernel memory-ordering model (part 2)
+https://lwn.net/Articles/608550/
+ Axiomatic validation of memory barriers and atomic instructions
+https://lwn.net/Articles/470681/
+ Validating Memory Barriers and Atomic Instructions
+
+This document presents information in decreasing order of applicability,
+so that, where possible, the information that has proven more commonly
+useful is shown near the beginning.
+
+For information on installing LKMM, including the underlying "herd7"
+tool, please see tools/memory-model/README.
+
+
+Copy-Pasta
+==========
+
+As with other software, it is often better (if less macho) to adapt an
+existing litmus test than it is to create one from scratch. A number
+of litmus tests may be found in the kernel source tree:
+
+ tools/memory-model/litmus-tests/
+ Documentation/litmus-tests/
+
+Several thousand more example litmus tests are available on github
+and kernel.org:
+
+ https://github.com/paulmckrcu/litmus
+ https://git.kernel.org/pub/scm/linux/kernel/git/paulmck/perfbook.git/tree/CodeSamples/formal/herd
+ https://git.kernel.org/pub/scm/linux/kernel/git/paulmck/perfbook.git/tree/CodeSamples/formal/litmus
+
+The -l and -L arguments to "git grep" can be quite helpful in identifying
+existing litmus tests that are similar to the one you need. But even if
+you start with an existing litmus test, it is still helpful to have a
+good understanding of the litmus-test format.
+
+
+Examples and Format
+===================
+
+This section describes the overall format of litmus tests, starting
+with a small example of the message-passing pattern and moving on to
+more complex examples that illustrate explicit initialization and LKMM's
+minimalistic set of flow-control statements.
+
+
+Message-Passing Example
+-----------------------
+
+This section gives an overview of the format of a litmus test using an
+example based on the common message-passing use case. This use case
+appears often in the Linux kernel. For example, a flag (modeled by "y"
+below) indicates that a buffer (modeled by "x" below) is now completely
+filled in and ready for use. It would be very bad if the consumer saw the
+flag set, but, due to memory misordering, saw old values in the buffer.
+
+This example asks whether smp_store_release() and smp_load_acquire()
+suffices to avoid this bad outcome:
+
+ 1 C MP+pooncerelease+poacquireonce
+ 2
+ 3 {}
+ 4
+ 5 P0(int *x, int *y)
+ 6 {
+ 7 WRITE_ONCE(*x, 1);
+ 8 smp_store_release(y, 1);
+ 9 }
+10
+11 P1(int *x, int *y)
+12 {
+13 int r0;
+14 int r1;
+15
+16 r0 = smp_load_acquire(y);
+17 r1 = READ_ONCE(*x);
+18 }
+19
+20 exists (1:r0=1 /\ 1:r1=0)
+
+Line 1 starts with "C", which identifies this file as being in the
+LKMM C-language format (which, as we will see, is a small fragment
+of the full C language). The remainder of line 1 is the name of
+the test, which by convention is the filename with the ".litmus"
+suffix stripped. In this case, the actual test may be found in
+tools/memory-model/litmus-tests/MP+pooncerelease+poacquireonce.litmus
+in the Linux-kernel source tree.
+
+Mechanically generated litmus tests will often have an optional
+double-quoted comment string on the second line. Such strings are ignored
+when running the test. Yes, you can add your own comments to litmus
+tests, but this is a bit involved due to the use of multiple parsers.
+For now, you can use C-language comments in the C code, and these comments
+may be in either the "/* */" or the "//" style. A later section will
+cover the full litmus-test commenting story.
+
+Line 3 is the initialization section. Because the default initialization
+to zero suffices for this test, the "{}" syntax is used, which mean the
+initialization section is empty. Litmus tests requiring non-default
+initialization must have non-empty initialization sections, as in the
+example that will be presented later in this document.
+
+Lines 5-9 show the first process and lines 11-18 the second process. Each
+process corresponds to a Linux-kernel task (or kthread, workqueue, thread,
+and so on; LKMM discussions often use these terms interchangeably).
+The name of the first process is "P0" and that of the second "P1".
+You can name your processes anything you like as long as the names consist
+of a single "P" followed by a number, and as long as the numbers are
+consecutive starting with zero. This can actually be quite helpful,
+for example, a .litmus file matching "^P1(" but not matching "^P2("
+must contain a two-process litmus test.
+
+The argument list for each function are pointers to the global variables
+used by that function. Unlike normal C-language function parameters, the
+names are significant. The fact that both P0() and P1() have a formal
+parameter named "x" means that these two processes are working with the
+same global variable, also named "x". So the "int *x, int *y" on P0()
+and P1() mean that both processes are working with two shared global
+variables, "x" and "y". Global variables are always passed to processes
+by reference, hence "P0(int *x, int *y)", but *never* "P0(int x, int y)".
+
+P0() has no local variables, but P1() has two of them named "r0" and "r1".
+These names may be freely chosen, but for historical reasons stemming from
+other litmus-test formats, it is conventional to use names consisting of
+"r" followed by a number as shown here. A common bug in litmus tests
+is forgetting to add a global variable to a process's parameter list.
+This will sometimes result in an error message, but can also cause the
+intended global to instead be silently treated as an undeclared local
+variable.
+
+Each process's code is similar to Linux-kernel C, as can be seen on lines
+7-8 and 13-17. This code may use many of the Linux kernel's atomic
+operations, some of its exclusive-lock functions, and some of its RCU
+and SRCU functions. An approximate list of the currently supported
+functions may be found in the linux-kernel.def file.
+
+The P0() process does "WRITE_ONCE(*x, 1)" on line 7. Because "x" is a
+pointer in P0()'s parameter list, this does an unordered store to global
+variable "x". Line 8 does "smp_store_release(y, 1)", and because "y"
+is also in P0()'s parameter list, this does a release store to global
+variable "y".
+
+The P1() process declares two local variables on lines 13 and 14.
+Line 16 does "r0 = smp_load_acquire(y)" which does an acquire load
+from global variable "y" into local variable "r0". Line 17 does a
+"r1 = READ_ONCE(*x)", which does an unordered load from "*x" into local
+variable "r1". Both "x" and "y" are in P1()'s parameter list, so both
+reference the same global variables that are used by P0().
+
+Line 20 is the "exists" assertion expression to evaluate the final state.
+This final state is evaluated after the dust has settled: both processes
+have completed and all of their memory references and memory barriers
+have propagated to all parts of the system. The references to the local
+variables "r0" and "r1" in line 24 must be prefixed with "1:" to specify
+which process they are local to.
+
+Note that the assertion expression is written in the litmus-test
+language rather than in C. For example, single "=" is an equality
+operator rather than an assignment. The "/\" character combination means
+"and". Similarly, "\/" stands for "or". Both of these are ASCII-art
+representations of the corresponding mathematical symbols. Finally,
+"~" stands for "logical not", which is "!" in C, and not to be confused
+with the C-language "~" operator which instead stands for "bitwise not".
+Parentheses may be used to override precedence.
+
+The "exists" assertion on line 20 is satisfied if the consumer sees the
+flag ("y") set but the buffer ("x") as not yet filled in, that is, if P1()
+loaded a value from "x" that was equal to 1 but loaded a value from "y"
+that was still equal to zero.
+
+This example can be checked by running the following command, which
+absolutely must be run from the tools/memory-model directory and from
+this directory only:
+
+herd7 -conf linux-kernel.cfg litmus-tests/MP+pooncerelease+poacquireonce.litmus
+
+The output is the result of something similar to a full state-space
+search, and is as follows:
+
+ 1 Test MP+pooncerelease+poacquireonce Allowed
+ 2 States 3
+ 3 1:r0=0; 1:r1=0;
+ 4 1:r0=0; 1:r1=1;
+ 5 1:r0=1; 1:r1=1;
+ 6 No
+ 7 Witnesses
+ 8 Positive: 0 Negative: 3
+ 9 Condition exists (1:r0=1 /\ 1:r1=0)
+10 Observation MP+pooncerelease+poacquireonce Never 0 3
+11 Time MP+pooncerelease+poacquireonce 0.00
+12 Hash=579aaa14d8c35a39429b02e698241d09
+
+The most pertinent line is line 10, which contains "Never 0 3", which
+indicates that the bad result flagged by the "exists" clause never
+happens. This line might instead say "Sometimes" to indicate that the
+bad result happened in some but not all executions, or it might say
+"Always" to indicate that the bad result happened in all executions.
+(The herd7 tool doesn't judge, so it is only an LKMM convention that the
+"exists" clause indicates a bad result. To see this, invert the "exists"
+clause's condition and run the test.) The numbers ("0 3") at the end
+of this line indicate the number of end states satisfying the "exists"
+clause (0) and the number not not satisfying that clause (3).
+
+Another important part of this output is shown in lines 2-5, repeated here:
+
+ 2 States 3
+ 3 1:r0=0; 1:r1=0;
+ 4 1:r0=0; 1:r1=1;
+ 5 1:r0=1; 1:r1=1;
+
+Line 2 gives the total number of end states, and each of lines 3-5 list
+one of these states, with the first ("1:r0=0; 1:r1=0;") indicating that
+both of P1()'s loads returned the value "0". As expected, given the
+"Never" on line 10, the state flagged by the "exists" clause is not
+listed. This full list of states can be helpful when debugging a new
+litmus test.
+
+The rest of the output is not normally needed, either due to irrelevance
+or due to being redundant with the lines discussed above. However, the
+following paragraph lists them for the benefit of readers possessed of
+an insatiable curiosity. Other readers should feel free to skip ahead.
+
+Line 1 echos the test name, along with the "Test" and "Allowed". Line 6's
+"No" says that the "exists" clause was not satisfied by any execution,
+and as such it has the same meaning as line 10's "Never". Line 7 is a
+lead-in to line 8's "Positive: 0 Negative: 3", which lists the number
+of end states satisfying and not satisfying the "exists" clause, just
+like the two numbers at the end of line 10. Line 9 repeats the "exists"
+clause so that you don't have to look it up in the litmus-test file.
+The number at the end of line 11 (which begins with "Time") gives the
+time in seconds required to analyze the litmus test. Small tests such
+as this one complete in a few milliseconds, so "0.00" is quite common.
+Line 12 gives a hash of the contents for the litmus-test file, and is used
+by tooling that manages litmus tests and their output. This tooling is
+used by people modifying LKMM itself, and among other things lets such
+people know which of the several thousand relevant litmus tests were
+affected by a given change to LKMM.
+
+
+Initialization
+--------------
+
+The previous example relied on the default zero initialization for
+"x" and "y", but a similar litmus test could instead initialize them
+to some other value:
+
+ 1 C MP+pooncerelease+poacquireonce
+ 2
+ 3 {
+ 4 x=42;
+ 5 y=42;
+ 6 }
+ 7
+ 8 P0(int *x, int *y)
+ 9 {
+10 WRITE_ONCE(*x, 1);
+11 smp_store_release(y, 1);
+12 }
+13
+14 P1(int *x, int *y)
+15 {
+16 int r0;
+17 int r1;
+18
+19 r0 = smp_load_acquire(y);
+20 r1 = READ_ONCE(*x);
+21 }
+22
+23 exists (1:r0=1 /\ 1:r1=42)
+
+Lines 3-6 now initialize both "x" and "y" to the value 42. This also
+means that the "exists" clause on line 23 must change "1:r1=0" to
+"1:r1=42".
+
+Running the test gives the same overall result as before, but with the
+value 42 appearing in place of the value zero:
+
+ 1 Test MP+pooncerelease+poacquireonce Allowed
+ 2 States 3
+ 3 1:r0=1; 1:r1=1;
+ 4 1:r0=42; 1:r1=1;
+ 5 1:r0=42; 1:r1=42;
+ 6 No
+ 7 Witnesses
+ 8 Positive: 0 Negative: 3
+ 9 Condition exists (1:r0=1 /\ 1:r1=42)
+10 Observation MP+pooncerelease+poacquireonce Never 0 3
+11 Time MP+pooncerelease+poacquireonce 0.02
+12 Hash=ab9a9b7940a75a792266be279a980156
+
+It is tempting to avoid the open-coded repetitions of the value "42"
+by defining another global variable "initval=42" and replacing all
+occurrences of "42" with "initval". This will not, repeat *not*,
+initialize "x" and "y" to 42, but instead to the address of "initval"
+(try it!). See the section below on linked lists to learn more about
+why this approach to initialization can be useful.
+
+
+Control Structures
+------------------
+
+LKMM supports the C-language "if" statement, which allows modeling of
+conditional branches. In LKMM, conditional branches can affect ordering,
+but only if you are *very* careful (compilers are surprisingly able
+to optimize away conditional branches). The following example shows
+the "load buffering" (LB) use case that is used in the Linux kernel to
+synchronize between ring-buffer producers and consumers. In the example
+below, P0() is one side checking to see if an operation may proceed and
+P1() is the other side completing its update.
+
+ 1 C LB+fencembonceonce+ctrlonceonce
+ 2
+ 3 {}
+ 4
+ 5 P0(int *x, int *y)
+ 6 {
+ 7 int r0;
+ 8
+ 9 r0 = READ_ONCE(*x);
+10 if (r0)
+11 WRITE_ONCE(*y, 1);
+12 }
+13
+14 P1(int *x, int *y)
+15 {
+16 int r0;
+17
+18 r0 = READ_ONCE(*y);
+19 smp_mb();
+20 WRITE_ONCE(*x, 1);
+21 }
+22
+23 exists (0:r0=1 /\ 1:r0=1)
+
+P1()'s "if" statement on line 10 works as expected, so that line 11 is
+executed only if line 9 loads a non-zero value from "x". Because P1()'s
+write of "1" to "x" happens only after P1()'s read from "y", one would
+hope that the "exists" clause cannot be satisfied. LKMM agrees:
+
+ 1 Test LB+fencembonceonce+ctrlonceonce Allowed
+ 2 States 2
+ 3 0:r0=0; 1:r0=0;
+ 4 0:r0=1; 1:r0=0;
+ 5 No
+ 6 Witnesses
+ 7 Positive: 0 Negative: 2
+ 8 Condition exists (0:r0=1 /\ 1:r0=1)
+ 9 Observation LB+fencembonceonce+ctrlonceonce Never 0 2
+10 Time LB+fencembonceonce+ctrlonceonce 0.00
+11 Hash=e5260556f6de495fd39b556d1b831c3b
+
+However, there is no "while" statement due to the fact that full
+state-space search has some difficulty with iteration. However, there
+are tricks that may be used to handle some special cases, which are
+discussed below. In addition, loop-unrolling tricks may be applied,
+albeit sparingly.
+
+
+Tricks and Traps
+================
+
+This section covers extracting debug output from herd7, emulating
+spin loops, handling trivial linked lists, adding comments to litmus tests,
+emulating call_rcu(), and finally tricks to improve herd7 performance
+in order to better handle large litmus tests.
+
+
+Debug Output
+------------
+
+By default, the herd7 state output includes all variables mentioned
+in the "exists" clause. But sometimes debugging efforts are greatly
+aided by the values of other variables. Consider this litmus test
+(tools/memory-order/litmus-tests/SB+rfionceonce-poonceonces.litmus but
+slightly modified), which probes an obscure corner of hardware memory
+ordering:
+
+ 1 C SB+rfionceonce-poonceonces
+ 2
+ 3 {}
+ 4
+ 5 P0(int *x, int *y)
+ 6 {
+ 7 int r1;
+ 8 int r2;
+ 9
+10 WRITE_ONCE(*x, 1);
+11 r1 = READ_ONCE(*x);
+12 r2 = READ_ONCE(*y);
+13 }
+14
+15 P1(int *x, int *y)
+16 {
+17 int r3;
+18 int r4;
+19
+20 WRITE_ONCE(*y, 1);
+21 r3 = READ_ONCE(*y);
+22 r4 = READ_ONCE(*x);
+23 }
+24
+25 exists (0:r2=0 /\ 1:r4=0)
+
+The herd7 output is as follows:
+
+ 1 Test SB+rfionceonce-poonceonces Allowed
+ 2 States 4
+ 3 0:r2=0; 1:r4=0;
+ 4 0:r2=0; 1:r4=1;
+ 5 0:r2=1; 1:r4=0;
+ 6 0:r2=1; 1:r4=1;
+ 7 Ok
+ 8 Witnesses
+ 9 Positive: 1 Negative: 3
+10 Condition exists (0:r2=0 /\ 1:r4=0)
+11 Observation SB+rfionceonce-poonceonces Sometimes 1 3
+12 Time SB+rfionceonce-poonceonces 0.01
+13 Hash=c7f30fe0faebb7d565405d55b7318ada
+
+(This output indicates that CPUs are permitted to "snoop their own
+store buffers", which all of Linux's CPU families other than s390 will
+happily do. Such snooping results in disagreement among CPUs on the
+order of stores from different CPUs, which is rarely an issue.)
+
+But the herd7 output shows only the two variables mentioned in the
+"exists" clause. Someone modifying this test might wish to know the
+values of "x", "y", "0:r1", and "0:r3" as well. The "locations"
+statement on line 25 shows how to cause herd7 to display additional
+variables:
+
+ 1 C SB+rfionceonce-poonceonces
+ 2
+ 3 {}
+ 4
+ 5 P0(int *x, int *y)
+ 6 {
+ 7 int r1;
+ 8 int r2;
+ 9
+10 WRITE_ONCE(*x, 1);
+11 r1 = READ_ONCE(*x);
+12 r2 = READ_ONCE(*y);
+13 }
+14
+15 P1(int *x, int *y)
+16 {
+17 int r3;
+18 int r4;
+19
+20 WRITE_ONCE(*y, 1);
+21 r3 = READ_ONCE(*y);
+22 r4 = READ_ONCE(*x);
+23 }
+24
+25 locations [0:r1; 1:r3; x; y]
+26 exists (0:r2=0 /\ 1:r4=0)
+
+The herd7 output then displays the values of all the variables:
+
+ 1 Test SB+rfionceonce-poonceonces Allowed
+ 2 States 4
+ 3 0:r1=1; 0:r2=0; 1:r3=1; 1:r4=0; x=1; y=1;
+ 4 0:r1=1; 0:r2=0; 1:r3=1; 1:r4=1; x=1; y=1;
+ 5 0:r1=1; 0:r2=1; 1:r3=1; 1:r4=0; x=1; y=1;
+ 6 0:r1=1; 0:r2=1; 1:r3=1; 1:r4=1; x=1; y=1;
+ 7 Ok
+ 8 Witnesses
+ 9 Positive: 1 Negative: 3
+10 Condition exists (0:r2=0 /\ 1:r4=0)
+11 Observation SB+rfionceonce-poonceonces Sometimes 1 3
+12 Time SB+rfionceonce-poonceonces 0.01
+13 Hash=40de8418c4b395388f6501cafd1ed38d
+
+What if you would like to know the value of a particular global variable
+at some particular point in a given process's execution? One approach
+is to use a READ_ONCE() to load that global variable into a new local
+variable, then add that local variable to the "locations" clause.
+But be careful: In some litmus tests, adding a READ_ONCE() will change
+the outcome! For one example, please see the C-READ_ONCE.litmus and
+C-READ_ONCE-omitted.litmus tests located here:
+
+ https://github.com/paulmckrcu/litmus/blob/master/manual/kernel/
+
+
+Spin Loops
+----------
+
+The analysis carried out by herd7 explores full state space, which is
+at best of exponential time complexity. Adding processes and increasing
+the amount of code in a give process can greatly increase execution time.
+Potentially infinite loops, such as those used to wait for locks to
+become available, are clearly problematic.
+
+Fortunately, it is possible to avoid state-space explosion by specially
+modeling such loops. For example, the following litmus tests emulates
+locking using xchg_acquire(), but instead of enclosing xchg_acquire()
+in a spin loop, it instead excludes executions that fail to acquire the
+lock using a herd7 "filter" clause. Note that for exclusive locking, you
+are better off using the spin_lock() and spin_unlock() that LKMM directly
+models, if for no other reason that these are much faster. However, the
+techniques illustrated in this section can be used for other purposes,
+such as emulating reader-writer locking, which LKMM does not yet model.
+
+ 1 C C-SB+l-o-o-u+l-o-o-u-X
+ 2
+ 3 {
+ 4 }
+ 5
+ 6 P0(int *sl, int *x0, int *x1)
+ 7 {
+ 8 int r2;
+ 9 int r1;
+10
+11 r2 = xchg_acquire(sl, 1);
+12 WRITE_ONCE(*x0, 1);
+13 r1 = READ_ONCE(*x1);
+14 smp_store_release(sl, 0);
+15 }
+16
+17 P1(int *sl, int *x0, int *x1)
+18 {
+19 int r2;
+20 int r1;
+21
+22 r2 = xchg_acquire(sl, 1);
+23 WRITE_ONCE(*x1, 1);
+24 r1 = READ_ONCE(*x0);
+25 smp_store_release(sl, 0);
+26 }
+27
+28 filter (0:r2=0 /\ 1:r2=0)
+29 exists (0:r1=0 /\ 1:r1=0)
+
+This litmus test may be found here:
+
+https://git.kernel.org/pub/scm/linux/kernel/git/paulmck/perfbook.git/tree/CodeSamples/formal/herd/C-SB+l-o-o-u+l-o-o-u-X.litmus
+
+This test uses two global variables, "x1" and "x2", and also emulates a
+single global spinlock named "sl". This spinlock is held by whichever
+process changes the value of "sl" from "0" to "1", and is released when
+that process sets "sl" back to "0". P0()'s lock acquisition is emulated
+on line 11 using xchg_acquire(), which unconditionally stores the value
+"1" to "sl" and stores either "0" or "1" to "r2", depending on whether
+the lock acquisition was successful or unsuccessful (due to "sl" already
+having the value "1"), respectively. P1() operates in a similar manner.
+
+Rather unconventionally, execution appears to proceed to the critical
+section on lines 12 and 13 in either case. Line 14 then uses an
+smp_store_release() to store zero to "sl", thus emulating lock release.
+
+The case where xchg_acquire() fails to acquire the lock is handled by
+the "filter" clause on line 28, which tells herd7 to keep only those
+executions in which both "0:r2" and "1:r2" are zero, that is to pay
+attention only to those executions in which both locks are actually
+acquired. Thus, the bogus executions that would execute the critical
+sections are discarded and any effects that they might have had are
+ignored. Note well that the "filter" clause keeps those executions
+for which its expression is satisfied, that is, for which the expression
+evaluates to true. In other words, the "filter" clause says what to
+keep, not what to discard.
+
+The result of running this test is as follows:
+
+ 1 Test C-SB+l-o-o-u+l-o-o-u-X Allowed
+ 2 States 2
+ 3 0:r1=0; 1:r1=1;
+ 4 0:r1=1; 1:r1=0;
+ 5 No
+ 6 Witnesses
+ 7 Positive: 0 Negative: 2
+ 8 Condition exists (0:r1=0 /\ 1:r1=0)
+ 9 Observation C-SB+l-o-o-u+l-o-o-u-X Never 0 2
+10 Time C-SB+l-o-o-u+l-o-o-u-X 0.03
+
+The "Never" on line 9 indicates that this use of xchg_acquire() and
+smp_store_release() really does correctly emulate locking.
+
+Why doesn't the litmus test take the simpler approach of using a spin loop
+to handle failed spinlock acquisitions, like the kernel does? The key
+insight behind this litmus test is that spin loops have no effect on the
+possible "exists"-clause outcomes of program execution in the absence
+of deadlock. In other words, given a high-quality lock-acquisition
+primitive in a deadlock-free program running on high-quality hardware,
+each lock acquisition will eventually succeed. Because herd7 already
+explores the full state space, the length of time required to actually
+acquire the lock does not matter. After all, herd7 already models all
+possible durations of the xchg_acquire() statements.
+
+Why not just add the "filter" clause to the "exists" clause, thus
+avoiding the "filter" clause entirely? This does work, but is slower.
+The reason that the "filter" clause is faster is that (in the common case)
+herd7 knows to abandon an execution as soon as the "filter" expression
+fails to be satisfied. In contrast, the "exists" clause is evaluated
+only at the end of time, thus requiring herd7 to waste time on bogus
+executions in which both critical sections proceed concurrently. In
+addition, some LKMM users like the separation of concerns provided by
+using the both the "filter" and "exists" clauses.
+
+Readers lacking a pathological interest in odd corner cases should feel
+free to skip the remainder of this section.
+
+But what if the litmus test were to temporarily set "0:r2" to a non-zero
+value? Wouldn't that cause herd7 to abandon the execution prematurely
+due to an early mismatch of the "filter" clause?
+
+Why not just try it? Line 4 of the following modified litmus test
+introduces a new global variable "x2" that is initialized to "1". Line 23
+of P1() reads that variable into "1:r2" to force an early mismatch with
+the "filter" clause. Line 24 does a known-true "if" condition to avoid
+and static analysis that herd7 might do. Finally the "exists" clause
+on line 32 is updated to a condition that is alway satisfied at the end
+of the test.
+
+ 1 C C-SB+l-o-o-u+l-o-o-u-X
+ 2
+ 3 {
+ 4 x2=1;
+ 5 }
+ 6
+ 7 P0(int *sl, int *x0, int *x1)
+ 8 {
+ 9 int r2;
+10 int r1;
+11
+12 r2 = xchg_acquire(sl, 1);
+13 WRITE_ONCE(*x0, 1);
+14 r1 = READ_ONCE(*x1);
+15 smp_store_release(sl, 0);
+16 }
+17
+18 P1(int *sl, int *x0, int *x1, int *x2)
+19 {
+20 int r2;
+21 int r1;
+22
+23 r2 = READ_ONCE(*x2);
+24 if (r2)
+25 r2 = xchg_acquire(sl, 1);
+26 WRITE_ONCE(*x1, 1);
+27 r1 = READ_ONCE(*x0);
+28 smp_store_release(sl, 0);
+29 }
+30
+31 filter (0:r2=0 /\ 1:r2=0)
+32 exists (x1=1)
+
+If the "filter" clause were to check each variable at each point in the
+execution, running this litmus test would display no executions because
+all executions would be filtered out at line 23. However, the output
+is instead as follows:
+
+ 1 Test C-SB+l-o-o-u+l-o-o-u-X Allowed
+ 2 States 1
+ 3 x1=1;
+ 4 Ok
+ 5 Witnesses
+ 6 Positive: 2 Negative: 0
+ 7 Condition exists (x1=1)
+ 8 Observation C-SB+l-o-o-u+l-o-o-u-X Always 2 0
+ 9 Time C-SB+l-o-o-u+l-o-o-u-X 0.04
+10 Hash=080bc508da7f291e122c6de76c0088e3
+
+Line 3 shows that there is one execution that did not get filtered out,
+so the "filter" clause is evaluated only on the last assignment to
+the variables that it checks. In this case, the "filter" clause is a
+disjunction, so it might be evaluated twice, once at the final (and only)
+assignment to "0:r2" and once at the final assignment to "1:r2".
+
+
+Linked Lists
+------------
+
+LKMM can handle linked lists, but only linked lists in which each node
+contains nothing except a pointer to the next node in the list. This is
+of course quite restrictive, but there is nevertheless quite a bit that
+can be done within these confines, as can be seen in the litmus test
+at tools/memory-model/litmus-tests/MP+onceassign+derefonce.litmus:
+
+ 1 C MP+onceassign+derefonce
+ 2
+ 3 {
+ 4 y=z;
+ 5 z=0;
+ 6 }
+ 7
+ 8 P0(int *x, int **y)
+ 9 {
+10 WRITE_ONCE(*x, 1);
+11 rcu_assign_pointer(*y, x);
+12 }
+13
+14 P1(int *x, int **y)
+15 {
+16 int *r0;
+17 int r1;
+18
+19 rcu_read_lock();
+20 r0 = rcu_dereference(*y);
+21 r1 = READ_ONCE(*r0);
+22 rcu_read_unlock();
+23 }
+24
+25 exists (1:r0=x /\ 1:r1=0)
+
+Line 4's "y=z" may seem odd, given that "z" has not yet been initialized.
+But "y=z" does not set the value of "y" to that of "z", but instead
+sets the value of "y" to the *address* of "z". Lines 4 and 5 therefore
+create a simple linked list, with "y" pointing to "z" and "z" having a
+NULL pointer. A much longer linked list could be created if desired,
+and circular singly linked lists can also be created and manipulated.
+
+The "exists" clause works the same way, with the "1:r0=x" comparing P1()'s
+"r0" not to the value of "x", but again to its address. This term of the
+"exists" clause therefore tests whether line 20's load from "y" saw the
+value stored by line 11, which is in fact what is required in this case.
+
+P0()'s line 10 initializes "x" to the value 1 then line 11 links to "x"
+from "y", replacing "z".
+
+P1()'s line 20 loads a pointer from "y", and line 21 dereferences that
+pointer. The RCU read-side critical section spanning lines 19-22 is just
+for show in this example. Note that the address used for line 21's load
+depends on (in this case, "is exactly the same as") the value loaded by
+line 20. This is an example of what is called an "address dependency".
+This particular address dependency extends from the load on line 20 to the
+load on line 21. Address dependencies provide a weak form of ordering.
+
+Running this test results in the following:
+
+ 1 Test MP+onceassign+derefonce Allowed
+ 2 States 2
+ 3 1:r0=x; 1:r1=1;
+ 4 1:r0=z; 1:r1=0;
+ 5 No
+ 6 Witnesses
+ 7 Positive: 0 Negative: 2
+ 8 Condition exists (1:r0=x /\ 1:r1=0)
+ 9 Observation MP+onceassign+derefonce Never 0 2
+10 Time MP+onceassign+derefonce 0.00
+11 Hash=49ef7a741563570102448a256a0c8568
+
+The only possible outcomes feature P1() loading a pointer to "z"
+(which contains zero) on the one hand and P1() loading a pointer to "x"
+(which contains the value one) on the other. This should be reassuring
+because it says that RCU readers cannot see the old preinitialization
+values when accessing a newly inserted list node. This undesirable
+scenario is flagged by the "exists" clause, and would occur if P1()
+loaded a pointer to "x", but obtained the pre-initialization value of
+zero after dereferencing that pointer.
+
+
+Comments
+--------
+
+Different portions of a litmus test are processed by different parsers,
+which has the charming effect of requiring different comment syntax in
+different portions of the litmus test. The C-syntax portions use
+C-language comments (either "/* */" or "//"), while the other portions
+use Ocaml comments "(* *)".
+
+The following litmus test illustrates the comment style corresponding
+to each syntactic unit of the test:
+
+ 1 C MP+onceassign+derefonce (* A *)
+ 2
+ 3 (* B *)
+ 4
+ 5 {
+ 6 y=z; (* C *)
+ 7 z=0;
+ 8 } // D
+ 9
+10 // E
+11
+12 P0(int *x, int **y) // F
+13 {
+14 WRITE_ONCE(*x, 1); // G
+15 rcu_assign_pointer(*y, x);
+16 }
+17
+18 // H
+19
+20 P1(int *x, int **y)
+21 {
+22 int *r0;
+23 int r1;
+24
+25 rcu_read_lock();
+26 r0 = rcu_dereference(*y);
+27 r1 = READ_ONCE(*r0);
+28 rcu_read_unlock();
+29 }
+30
+31 // I
+32
+33 exists (* J *) (1:r0=x /\ (* K *) 1:r1=0) (* L *)
+
+In short, use C-language comments in the C code and Ocaml comments in
+the rest of the litmus test.
+
+On the other hand, if you prefer C-style comments everywhere, the
+C preprocessor is your friend.
+
+
+Asynchronous RCU Grace Periods
+------------------------------
+
+The following litmus test is derived from the example show in
+Documentation/litmus-tests/rcu/RCU+sync+free.litmus, but converted to
+emulate call_rcu():
+
+ 1 C RCU+sync+free
+ 2
+ 3 {
+ 4 int x = 1;
+ 5 int *y = &x;
+ 6 int z = 1;
+ 7 }
+ 8
+ 9 P0(int *x, int *z, int **y)
+10 {
+11 int *r0;
+12 int r1;
+13
+14 rcu_read_lock();
+15 r0 = rcu_dereference(*y);
+16 r1 = READ_ONCE(*r0);
+17 rcu_read_unlock();
+18 }
+19
+20 P1(int *z, int **y, int *c)
+21 {
+22 rcu_assign_pointer(*y, z);
+23 smp_store_release(*c, 1); // Emulate call_rcu().
+24 }
+25
+26 P2(int *x, int *z, int **y, int *c)
+27 {
+28 int r0;
+29
+30 r0 = smp_load_acquire(*c); // Note call_rcu() request.
+31 synchronize_rcu(); // Wait one grace period.
+32 WRITE_ONCE(*x, 0); // Emulate the RCU callback.
+33 }
+34
+35 filter (2:r0=1) (* Reject too-early starts. *)
+36 exists (0:r0=x /\ 0:r1=0)
+
+Lines 4-6 initialize a linked list headed by "y" that initially contains
+"x". In addition, "z" is pre-initialized to prepare for P1(), which
+will replace "x" with "z" in this list.
+
+P0() on lines 9-18 enters an RCU read-side critical section, loads the
+list header "y" and dereferences it, leaving the node in "0:r0" and
+the node's value in "0:r1".
+
+P1() on lines 20-24 updates the list header to instead reference "z",
+then emulates call_rcu() by doing a release store into "c".
+
+P2() on lines 27-33 emulates the behind-the-scenes effect of doing a
+call_rcu(). Line 30 first does an acquire load from "c", then line 31
+waits for an RCU grace period to elapse, and finally line 32 emulates
+the RCU callback, which in turn emulates a call to kfree().
+
+Of course, it is possible for P2() to start too soon, so that the
+value of "2:r0" is zero rather than the required value of "1".
+The "filter" clause on line 35 handles this possibility, rejecting
+all executions in which "2:r0" is not equal to the value "1".
+
+
+Performance
+-----------
+
+LKMM's exploration of the full state-space can be extremely helpful,
+but it does not come for free. The price is exponential computational
+complexity in terms of the number of processes, the average number
+of statements in each process, and the total number of stores in the
+litmus test.
+
+So it is best to start small and then work up. Where possible, break
+your code down into small pieces each representing a core concurrency
+requirement.
+
+That said, herd7 is quite fast. On an unprepossessing x86 laptop, it
+was able to analyze the following 10-process RCU litmus test in about
+six seconds.
+
+https://github.com/paulmckrcu/litmus/blob/master/auto/C-RW-R+RW-R+RW-G+RW-G+RW-G+RW-G+RW-R+RW-R+RW-R+RW-R.litmus
+
+One way to make herd7 run faster is to use the "-speedcheck true" option.
+This option prevents herd7 from generating all possible end states,
+instead causing it to focus solely on whether or not the "exists"
+clause can be satisfied. With this option, herd7 evaluates the above
+litmus test in about 300 milliseconds, for more than an order of magnitude
+improvement in performance.
+
+Larger 16-process litmus tests that would normally consume 15 minutes
+of time complete in about 40 seconds with this option. To be fair,
+you do get an extra 65,535 states when you leave off the "-speedcheck
+true" option.
+
+https://github.com/paulmckrcu/litmus/blob/master/auto/C-RW-R+RW-R+RW-G+RW-G+RW-G+RW-G+RW-R+RW-R+RW-R+RW-R+RW-G+RW-G+RW-G+RW-G+RW-R+RW-R.litmus
+
+Nevertheless, litmus-test analysis really is of exponential complexity,
+whether with or without "-speedcheck true". Increasing by just three
+processes to a 19-process litmus test requires 2 hours and 40 minutes
+without, and about 8 minutes with "-speedcheck true". Each of these
+results represent roughly an order of magnitude slowdown compared to the
+16-process litmus test. Again, to be fair, the multi-hour run explores
+no fewer than 524,287 additional states compared to the shorter one.
+
+https://github.com/paulmckrcu/litmus/blob/master/auto/C-RW-R+RW-R+RW-G+RW-G+RW-G+RW-G+RW-R+RW-R+RW-R+RW-R+RW-R+RW-R+RW-G+RW-G+RW-G+RW-G+RW-R+RW-R+RW-R.litmus
+
+If you don't like command-line arguments, you can obtain a similar speedup
+by adding a "filter" clause with exactly the same expression as your
+"exists" clause.
+
+However, please note that seeing the full set of states can be extremely
+helpful when developing and debugging litmus tests.
+
+
+LIMITATIONS
+===========
+
+Limitations of the Linux-kernel memory model (LKMM) include:
+
+1. Compiler optimizations are not accurately modeled. Of course,
+ the use of READ_ONCE() and WRITE_ONCE() limits the compiler's
+ ability to optimize, but under some circumstances it is possible
+ for the compiler to undermine the memory model. For more
+ information, see Documentation/explanation.txt (in particular,
+ the "THE PROGRAM ORDER RELATION: po AND po-loc" and "A WARNING"
+ sections).
+
+ Note that this limitation in turn limits LKMM's ability to
+ accurately model address, control, and data dependencies.
+ For example, if the compiler can deduce the value of some variable
+ carrying a dependency, then the compiler can break that dependency
+ by substituting a constant of that value.
+
+2. Multiple access sizes for a single variable are not supported,
+ and neither are misaligned or partially overlapping accesses.
+
+3. Exceptions and interrupts are not modeled. In some cases,
+ this limitation can be overcome by modeling the interrupt or
+ exception with an additional process.
+
+4. I/O such as MMIO or DMA is not supported.
+
+5. Self-modifying code (such as that found in the kernel's
+ alternatives mechanism, function tracer, Berkeley Packet Filter
+ JIT compiler, and module loader) is not supported.
+
+6. Complete modeling of all variants of atomic read-modify-write
+ operations, locking primitives, and RCU is not provided.
+ For example, call_rcu() and rcu_barrier() are not supported.
+ However, a substantial amount of support is provided for these
+ operations, as shown in the linux-kernel.def file.
+
+ Here are specific limitations:
+
+ a. When rcu_assign_pointer() is passed NULL, the Linux
+ kernel provides no ordering, but LKMM models this
+ case as a store release.
+
+ b. The "unless" RMW operations are not currently modeled:
+ atomic_long_add_unless(), atomic_inc_unless_negative(),
+ and atomic_dec_unless_positive(). These can be emulated
+ in litmus tests, for example, by using atomic_cmpxchg().
+
+ One exception of this limitation is atomic_add_unless(),
+ which is provided directly by herd7 (so no corresponding
+ definition in linux-kernel.def). atomic_add_unless() is
+ modeled by herd7 therefore it can be used in litmus tests.
+
+ c. The call_rcu() function is not modeled. As was shown above,
+ it can be emulated in litmus tests by adding another
+ process that invokes synchronize_rcu() and the body of the
+ callback function, with (for example) a release-acquire
+ from the site of the emulated call_rcu() to the beginning
+ of the additional process.
+
+ d. The rcu_barrier() function is not modeled. It can be
+ emulated in litmus tests emulating call_rcu() via
+ (for example) a release-acquire from the end of each
+ additional call_rcu() process to the site of the
+ emulated rcu-barrier().
+
+ e. Although sleepable RCU (SRCU) is now modeled, there
+ are some subtle differences between its semantics and
+ those in the Linux kernel. For example, the kernel
+ might interpret the following sequence as two partially
+ overlapping SRCU read-side critical sections:
+
+ 1 r1 = srcu_read_lock(&my_srcu);
+ 2 do_something_1();
+ 3 r2 = srcu_read_lock(&my_srcu);
+ 4 do_something_2();
+ 5 srcu_read_unlock(&my_srcu, r1);
+ 6 do_something_3();
+ 7 srcu_read_unlock(&my_srcu, r2);
+
+ In contrast, LKMM will interpret this as a nested pair of
+ SRCU read-side critical sections, with the outer critical
+ section spanning lines 1-7 and the inner critical section
+ spanning lines 3-5.
+
+ This difference would be more of a concern had anyone
+ identified a reasonable use case for partially overlapping
+ SRCU read-side critical sections. For more information
+ on the trickiness of such overlapping, please see:
+ https://paulmck.livejournal.com/40593.html
+
+ f. Reader-writer locking is not modeled. It can be
+ emulated in litmus tests using atomic read-modify-write
+ operations.
+
+The fragment of the C language supported by these litmus tests is quite
+limited and in some ways non-standard:
+
+1. There is no automatic C-preprocessor pass. You can of course
+ run it manually, if you choose.
+
+2. There is no way to create functions other than the Pn() functions
+ that model the concurrent processes.
+
+3. The Pn() functions' formal parameters must be pointers to the
+ global shared variables. Nothing can be passed by value into
+ these functions.
+
+4. The only functions that can be invoked are those built directly
+ into herd7 or that are defined in the linux-kernel.def file.
+
+5. The "switch", "do", "for", "while", and "goto" C statements are
+ not supported. The "switch" statement can be emulated by the
+ "if" statement. The "do", "for", and "while" statements can
+ often be emulated by manually unrolling the loop, or perhaps by
+ enlisting the aid of the C preprocessor to minimize the resulting
+ code duplication. Some uses of "goto" can be emulated by "if",
+ and some others by unrolling.
+
+6. Although you can use a wide variety of types in litmus-test
+ variable declarations, and especially in global-variable
+ declarations, the "herd7" tool understands only int and
+ pointer types. There is no support for floating-point types,
+ enumerations, characters, strings, arrays, or structures.
+
+7. Parsing of variable declarations is very loose, with almost no
+ type checking.
+
+8. Initializers differ from their C-language counterparts.
+ For example, when an initializer contains the name of a shared
+ variable, that name denotes a pointer to that variable, not
+ the current value of that variable. For example, "int x = y"
+ is interpreted the way "int x = &y" would be in C.
+
+9. Dynamic memory allocation is not supported, although this can
+ be worked around in some cases by supplying multiple statically
+ allocated variables.
+
+Some of these limitations may be overcome in the future, but others are
+more likely to be addressed by incorporating the Linux-kernel memory model
+into other tools.
+
+Finally, please note that LKMM is subject to change as hardware, use cases,
+and compilers evolve.
diff --git a/tools/memory-model/Documentation/recipes.txt b/tools/memory-model/Documentation/recipes.txt
index 63c4adfed884..03f58b11c252 100644
--- a/tools/memory-model/Documentation/recipes.txt
+++ b/tools/memory-model/Documentation/recipes.txt
@@ -1,7 +1,7 @@
This document provides "recipes", that is, litmus tests for commonly
occurring situations, as well as a few that illustrate subtly broken but
attractive nuisances. Many of these recipes include example code from
-v4.13 of the Linux kernel.
+v5.7 of the Linux kernel.
The first section covers simple special cases, the second section
takes off the training wheels to cover more involved examples,
@@ -278,7 +278,7 @@ is present if the value loaded determines the address of a later access
first place (control dependency). Note that the term "data dependency"
is sometimes casually used to cover both address and data dependencies.
-In lib/prime_numbers.c, the expand_to_next_prime() function invokes
+In lib/math/prime_numbers.c, the expand_to_next_prime() function invokes
rcu_assign_pointer(), and the next_prime_number() function invokes
rcu_dereference(). This combination mediates access to a bit vector
that is expanded as additional primes are needed.
diff --git a/tools/memory-model/Documentation/references.txt b/tools/memory-model/Documentation/references.txt
index ecbbaa5396d4..c5fdfd19df24 100644
--- a/tools/memory-model/Documentation/references.txt
+++ b/tools/memory-model/Documentation/references.txt
@@ -120,7 +120,7 @@ o Jade Alglave, Luc Maranget, and Michael Tautschnig. 2014. "Herding
o Jade Alglave, Patrick Cousot, and Luc Maranget. 2016. "Syntax and
semantics of the weak consistency model specification language
- cat". CoRR abs/1608.07531 (2016). http://arxiv.org/abs/1608.07531
+ cat". CoRR abs/1608.07531 (2016). https://arxiv.org/abs/1608.07531
Memory-model comparisons
diff --git a/tools/memory-model/Documentation/simple.txt b/tools/memory-model/Documentation/simple.txt
new file mode 100644
index 000000000000..81e1a0ec5342
--- /dev/null
+++ b/tools/memory-model/Documentation/simple.txt
@@ -0,0 +1,271 @@
+This document provides options for those wishing to keep their
+memory-ordering lives simple, as is necessary for those whose domain
+is complex. After all, there are bugs other than memory-ordering bugs,
+and the time spent gaining memory-ordering knowledge is not available
+for gaining domain knowledge. Furthermore Linux-kernel memory model
+(LKMM) is quite complex, with subtle differences in code often having
+dramatic effects on correctness.
+
+The options near the beginning of this list are quite simple. The idea
+is not that kernel hackers don't already know about them, but rather
+that they might need the occasional reminder.
+
+Please note that this is a generic guide, and that specific subsystems
+will often have special requirements or idioms. For example, developers
+of MMIO-based device drivers will often need to use mb(), rmb(), and
+wmb(), and therefore might find smp_mb(), smp_rmb(), and smp_wmb()
+to be more natural than smp_load_acquire() and smp_store_release().
+On the other hand, those coming in from other environments will likely
+be more familiar with these last two.
+
+
+Single-threaded code
+====================
+
+In single-threaded code, there is no reordering, at least assuming
+that your toolchain and hardware are working correctly. In addition,
+it is generally a mistake to assume your code will only run in a single
+threaded context as the kernel can enter the same code path on multiple
+CPUs at the same time. One important exception is a function that makes
+no external data references.
+
+In the general case, you will need to take explicit steps to ensure that
+your code really is executed within a single thread that does not access
+shared variables. A simple way to achieve this is to define a global lock
+that you acquire at the beginning of your code and release at the end,
+taking care to ensure that all references to your code's shared data are
+also carried out under that same lock. Because only one thread can hold
+this lock at a given time, your code will be executed single-threaded.
+This approach is called "code locking".
+
+Code locking can severely limit both performance and scalability, so it
+should be used with caution, and only on code paths that execute rarely.
+After all, a huge amount of effort was required to remove the Linux
+kernel's old "Big Kernel Lock", so let's please be very careful about
+adding new "little kernel locks".
+
+One of the advantages of locking is that, in happy contrast with the
+year 1981, almost all kernel developers are very familiar with locking.
+The Linux kernel's lockdep (CONFIG_PROVE_LOCKING=y) is very helpful with
+the formerly feared deadlock scenarios.
+
+Please use the standard locking primitives provided by the kernel rather
+than rolling your own. For one thing, the standard primitives interact
+properly with lockdep. For another thing, these primitives have been
+tuned to deal better with high contention. And for one final thing, it is
+surprisingly hard to correctly code production-quality lock acquisition
+and release functions. After all, even simple non-production-quality
+locking functions must carefully prevent both the CPU and the compiler
+from moving code in either direction across the locking function.
+
+Despite the scalability limitations of single-threaded code, RCU
+takes this approach for much of its grace-period processing and also
+for early-boot operation. The reason RCU is able to scale despite
+single-threaded grace-period processing is use of batching, where all
+updates that accumulated during one grace period are handled by the
+next one. In other words, slowing down grace-period processing makes
+it more efficient. Nor is RCU unique: Similar batching optimizations
+are used in many I/O operations.
+
+
+Packaged code
+=============
+
+Even if performance and scalability concerns prevent your code from
+being completely single-threaded, it is often possible to use library
+functions that handle the concurrency nearly or entirely on their own.
+This approach delegates any LKMM worries to the library maintainer.
+
+In the kernel, what is the "library"? Quite a bit. It includes the
+contents of the lib/ directory, much of the include/linux/ directory along
+with a lot of other heavily used APIs. But heavily used examples include
+the list macros (for example, include/linux/{,rcu}list.h), workqueues,
+smp_call_function(), and the various hash tables and search trees.
+
+
+Data locking
+============
+
+With code locking, we use single-threaded code execution to guarantee
+serialized access to the data that the code is accessing. However,
+we can also achieve this by instead associating the lock with specific
+instances of the data structures. This creates a "critical section"
+in the code execution that will execute as though it is single threaded.
+By placing all the accesses and modifications to a shared data structure
+inside a critical section, we ensure that the execution context that
+holds the lock has exclusive access to the shared data.
+
+The poster boy for this approach is the hash table, where placing a lock
+in each hash bucket allows operations on different buckets to proceed
+concurrently. This works because the buckets do not overlap with each
+other, so that an operation on one bucket does not interfere with any
+other bucket.
+
+As the number of buckets increases, data locking scales naturally.
+In particular, if the amount of data increases with the number of CPUs,
+increasing the number of buckets as the number of CPUs increase results
+in a naturally scalable data structure.
+
+
+Per-CPU processing
+==================
+
+Partitioning processing and data over CPUs allows each CPU to take
+a single-threaded approach while providing excellent performance and
+scalability. Of course, there is no free lunch: The dark side of this
+excellence is substantially increased memory footprint.
+
+In addition, it is sometimes necessary to occasionally update some global
+view of this processing and data, in which case something like locking
+must be used to protect this global view. This is the approach taken
+by the percpu_counter infrastructure. In many cases, there are already
+generic/library variants of commonly used per-cpu constructs available.
+Please use them rather than rolling your own.
+
+RCU uses DEFINE_PER_CPU*() declaration to create a number of per-CPU
+data sets. For example, each CPU does private quiescent-state processing
+within its instance of the per-CPU rcu_data structure, and then uses data
+locking to report quiescent states up the grace-period combining tree.
+
+
+Packaged primitives: Sequence locking
+=====================================
+
+Lockless programming is considered by many to be more difficult than
+lock-based programming, but there are a few lockless design patterns that
+have been built out into an API. One of these APIs is sequence locking.
+Although this APIs can be used in extremely complex ways, there are simple
+and effective ways of using it that avoid the need to pay attention to
+memory ordering.
+
+The basic keep-things-simple rule for sequence locking is "do not write
+in read-side code". Yes, you can do writes from within sequence-locking
+readers, but it won't be so simple. For example, such writes will be
+lockless and should be idempotent.
+
+For more sophisticated use cases, LKMM can guide you, including use
+cases involving combining sequence locking with other synchronization
+primitives. (LKMM does not yet know about sequence locking, so it is
+currently necessary to open-code it in your litmus tests.)
+
+Additional information may be found in include/linux/seqlock.h.
+
+Packaged primitives: RCU
+========================
+
+Another lockless design pattern that has been baked into an API
+is RCU. The Linux kernel makes sophisticated use of RCU, but the
+keep-things-simple rules for RCU are "do not write in read-side code"
+and "do not update anything that is visible to and accessed by readers",
+and "protect updates with locking".
+
+These rules are illustrated by the functions foo_update_a() and
+foo_get_a() shown in Documentation/RCU/whatisRCU.rst. Additional
+RCU usage patterns maybe found in Documentation/RCU and in the
+source code.
+
+
+Packaged primitives: Atomic operations
+======================================
+
+Back in the day, the Linux kernel had three types of atomic operations:
+
+1. Initialization and read-out, such as atomic_set() and atomic_read().
+
+2. Operations that did not return a value and provided no ordering,
+ such as atomic_inc() and atomic_dec().
+
+3. Operations that returned a value and provided full ordering, such as
+ atomic_add_return() and atomic_dec_and_test(). Note that some
+ value-returning operations provide full ordering only conditionally.
+ For example, cmpxchg() provides ordering only upon success.
+
+More recent kernels have operations that return a value but do not
+provide full ordering. These are flagged with either a _relaxed()
+suffix (providing no ordering), or an _acquire() or _release() suffix
+(providing limited ordering).
+
+Additional information may be found in these files:
+
+Documentation/atomic_t.txt
+Documentation/atomic_bitops.txt
+Documentation/core-api/atomic_ops.rst
+Documentation/core-api/refcount-vs-atomic.rst
+
+Reading code using these primitives is often also quite helpful.
+
+
+Lockless, fully ordered
+=======================
+
+When using locking, there often comes a time when it is necessary
+to access some variable or another without holding the data lock
+that serializes access to that variable.
+
+If you want to keep things simple, use the initialization and read-out
+operations from the previous section only when there are no racing
+accesses. Otherwise, use only fully ordered operations when accessing
+or modifying the variable. This approach guarantees that code prior
+to a given access to that variable will be seen by all CPUs has having
+happened before any code following any later access to that same variable.
+
+Please note that per-CPU functions are not atomic operations and
+hence they do not provide any ordering guarantees at all.
+
+If the lockless accesses are frequently executed reads that are used
+only for heuristics, or if they are frequently executed writes that
+are used only for statistics, please see the next section.
+
+
+Lockless statistics and heuristics
+==================================
+
+Unordered primitives such as atomic_read(), atomic_set(), READ_ONCE(), and
+WRITE_ONCE() can safely be used in some cases. These primitives provide
+no ordering, but they do prevent the compiler from carrying out a number
+of destructive optimizations (for which please see the next section).
+One example use for these primitives is statistics, such as per-CPU
+counters exemplified by the rt_cache_stat structure's routing-cache
+statistics counters. Another example use case is heuristics, such as
+the jiffies_till_first_fqs and jiffies_till_next_fqs kernel parameters
+controlling how often RCU scans for idle CPUs.
+
+But be careful. "Unordered" really does mean "unordered". It is all
+too easy to assume ordering, and this assumption must be avoided when
+using these primitives.
+
+
+Don't let the compiler trip you up
+==================================
+
+It can be quite tempting to use plain C-language accesses for lockless
+loads from and stores to shared variables. Although this is both
+possible and quite common in the Linux kernel, it does require a
+surprising amount of analysis, care, and knowledge about the compiler.
+Yes, some decades ago it was not unfair to consider a C compiler to be
+an assembler with added syntax and better portability, but the advent of
+sophisticated optimizing compilers mean that those days are long gone.
+Today's optimizing compilers can profoundly rewrite your code during the
+translation process, and have long been ready, willing, and able to do so.
+
+Therefore, if you really need to use C-language assignments instead of
+READ_ONCE(), WRITE_ONCE(), and so on, you will need to have a very good
+understanding of both the C standard and your compiler. Here are some
+introductory references and some tooling to start you on this noble quest:
+
+Who's afraid of a big bad optimizing compiler?
+ https://lwn.net/Articles/793253/
+Calibrating your fear of big bad optimizing compilers
+ https://lwn.net/Articles/799218/
+Concurrency bugs should fear the big bad data-race detector (part 1)
+ https://lwn.net/Articles/816850/
+Concurrency bugs should fear the big bad data-race detector (part 2)
+ https://lwn.net/Articles/816854/
+
+
+More complex use cases
+======================
+
+If the alternatives above do not do what you need, please look at the
+recipes-pairs.txt file to peel off the next layer of the memory-ordering
+onion.
diff --git a/tools/memory-model/README b/tools/memory-model/README
index ecb7385376bf..c8144d4aafa0 100644
--- a/tools/memory-model/README
+++ b/tools/memory-model/README
@@ -63,10 +63,32 @@ BASIC USAGE: HERD7
==================
The memory model is used, in conjunction with "herd7", to exhaustively
-explore the state space of small litmus tests.
+explore the state space of small litmus tests. Documentation describing
+the format, features, capabilities and limitations of these litmus
+tests is available in tools/memory-model/Documentation/litmus-tests.txt.
-For example, to run SB+fencembonceonces.litmus against the memory model:
+Example litmus tests may be found in the Linux-kernel source tree:
+ tools/memory-model/litmus-tests/
+ Documentation/litmus-tests/
+
+Several thousand more example litmus tests are available here:
+
+ https://github.com/paulmckrcu/litmus
+ https://git.kernel.org/pub/scm/linux/kernel/git/paulmck/perfbook.git/tree/CodeSamples/formal/herd
+ https://git.kernel.org/pub/scm/linux/kernel/git/paulmck/perfbook.git/tree/CodeSamples/formal/litmus
+
+Documentation describing litmus tests and now to use them may be found
+here:
+
+ tools/memory-model/Documentation/litmus-tests.txt
+
+The remainder of this section uses the SB+fencembonceonces.litmus test
+located in the tools/memory-model directory.
+
+To run SB+fencembonceonces.litmus against the memory model:
+
+ $ cd $LINUX_SOURCE_TREE/tools/memory-model
$ herd7 -conf linux-kernel.cfg litmus-tests/SB+fencembonceonces.litmus
Here is the corresponding output:
@@ -87,7 +109,11 @@ Here is the corresponding output:
The "Positive: 0 Negative: 3" and the "Never 0 3" each indicate that
this litmus test's "exists" clause can not be satisfied.
-See "herd7 -help" or "herdtools7/doc/" for more information.
+See "herd7 -help" or "herdtools7/doc/" for more information on running the
+tool itself, but please be aware that this documentation is intended for
+people who work on the memory model itself, that is, people making changes
+to the tools/memory-model/linux-kernel.* files. It is not intended for
+people focusing on writing, understanding, and running LKMM litmus tests.
=====================
@@ -124,7 +150,11 @@ that during two million trials, the state specified in this litmus
test's "exists" clause was not reached.
And, as with "herd7", please see "klitmus7 -help" or "herdtools7/doc/"
-for more information.
+for more information. And again, please be aware that this documentation
+is intended for people who work on the memory model itself, that is,
+people making changes to the tools/memory-model/linux-kernel.* files.
+It is not intended for people focusing on writing, understanding, and
+running LKMM litmus tests.
====================
@@ -137,12 +167,21 @@ Documentation/cheatsheet.txt
Documentation/explanation.txt
Describes the memory model in detail.
+Documentation/litmus-tests.txt
+ Describes the format, features, capabilities, and limitations
+ of the litmus tests that LKMM can evaluate.
+
Documentation/recipes.txt
Lists common memory-ordering patterns.
Documentation/references.txt
Provides background reading.
+Documentation/simple.txt
+ Starting point for someone new to Linux-kernel concurrency.
+ And also for those needing a reminder of the simpler approaches
+ to concurrency!
+
linux-kernel.bell
Categorizes the relevant instructions, including memory
references, memory barriers, atomic read-modify-write operations,
@@ -187,116 +226,3 @@ README
This file.
scripts Various scripts, see scripts/README.
-
-
-===========
-LIMITATIONS
-===========
-
-The Linux-kernel memory model (LKMM) has the following limitations:
-
-1. Compiler optimizations are not accurately modeled. Of course,
- the use of READ_ONCE() and WRITE_ONCE() limits the compiler's
- ability to optimize, but under some circumstances it is possible
- for the compiler to undermine the memory model. For more
- information, see Documentation/explanation.txt (in particular,
- the "THE PROGRAM ORDER RELATION: po AND po-loc" and "A WARNING"
- sections).
-
- Note that this limitation in turn limits LKMM's ability to
- accurately model address, control, and data dependencies.
- For example, if the compiler can deduce the value of some variable
- carrying a dependency, then the compiler can break that dependency
- by substituting a constant of that value.
-
-2. Multiple access sizes for a single variable are not supported,
- and neither are misaligned or partially overlapping accesses.
-
-3. Exceptions and interrupts are not modeled. In some cases,
- this limitation can be overcome by modeling the interrupt or
- exception with an additional process.
-
-4. I/O such as MMIO or DMA is not supported.
-
-5. Self-modifying code (such as that found in the kernel's
- alternatives mechanism, function tracer, Berkeley Packet Filter
- JIT compiler, and module loader) is not supported.
-
-6. Complete modeling of all variants of atomic read-modify-write
- operations, locking primitives, and RCU is not provided.
- For example, call_rcu() and rcu_barrier() are not supported.
- However, a substantial amount of support is provided for these
- operations, as shown in the linux-kernel.def file.
-
- a. When rcu_assign_pointer() is passed NULL, the Linux
- kernel provides no ordering, but LKMM models this
- case as a store release.
-
- b. The "unless" RMW operations are not currently modeled:
- atomic_long_add_unless(), atomic_inc_unless_negative(),
- and atomic_dec_unless_positive(). These can be emulated
- in litmus tests, for example, by using atomic_cmpxchg().
-
- One exception of this limitation is atomic_add_unless(),
- which is provided directly by herd7 (so no corresponding
- definition in linux-kernel.def). atomic_add_unless() is
- modeled by herd7 therefore it can be used in litmus tests.
-
- c. The call_rcu() function is not modeled. It can be
- emulated in litmus tests by adding another process that
- invokes synchronize_rcu() and the body of the callback
- function, with (for example) a release-acquire from
- the site of the emulated call_rcu() to the beginning
- of the additional process.
-
- d. The rcu_barrier() function is not modeled. It can be
- emulated in litmus tests emulating call_rcu() via
- (for example) a release-acquire from the end of each
- additional call_rcu() process to the site of the
- emulated rcu-barrier().
-
- e. Although sleepable RCU (SRCU) is now modeled, there
- are some subtle differences between its semantics and
- those in the Linux kernel. For example, the kernel
- might interpret the following sequence as two partially
- overlapping SRCU read-side critical sections:
-
- 1 r1 = srcu_read_lock(&my_srcu);
- 2 do_something_1();
- 3 r2 = srcu_read_lock(&my_srcu);
- 4 do_something_2();
- 5 srcu_read_unlock(&my_srcu, r1);
- 6 do_something_3();
- 7 srcu_read_unlock(&my_srcu, r2);
-
- In contrast, LKMM will interpret this as a nested pair of
- SRCU read-side critical sections, with the outer critical
- section spanning lines 1-7 and the inner critical section
- spanning lines 3-5.
-
- This difference would be more of a concern had anyone
- identified a reasonable use case for partially overlapping
- SRCU read-side critical sections. For more information,
- please see: https://paulmck.livejournal.com/40593.html
-
- f. Reader-writer locking is not modeled. It can be
- emulated in litmus tests using atomic read-modify-write
- operations.
-
-The "herd7" tool has some additional limitations of its own, apart from
-the memory model:
-
-1. Non-trivial data structures such as arrays or structures are
- not supported. However, pointers are supported, allowing trivial
- linked lists to be constructed.
-
-2. Dynamic memory allocation is not supported, although this can
- be worked around in some cases by supplying multiple statically
- allocated variables.
-
-Some of these limitations may be overcome in the future, but others are
-more likely to be addressed by incorporating the Linux-kernel memory model
-into other tools.
-
-Finally, please note that LKMM is subject to change as hardware, use cases,
-and compilers evolve.
diff --git a/tools/objtool/Makefile b/tools/objtool/Makefile
index 7770edcda3a0..4ea9a833dde7 100644
--- a/tools/objtool/Makefile
+++ b/tools/objtool/Makefile
@@ -37,7 +37,7 @@ INCLUDES := -I$(srctree)/tools/include \
-I$(srctree)/tools/arch/$(HOSTARCH)/include/uapi \
-I$(srctree)/tools/arch/$(SRCARCH)/include \
-I$(srctree)/tools/objtool/arch/$(SRCARCH)/include
-WARNINGS := $(EXTRA_WARNINGS) -Wno-switch-default -Wno-switch-enum -Wno-packed
+WARNINGS := $(EXTRA_WARNINGS) -Wno-switch-default -Wno-switch-enum -Wno-packed -Wno-nested-externs
CFLAGS := -Werror $(WARNINGS) $(KBUILD_HOSTCFLAGS) -g $(INCLUDES) $(LIBELF_FLAGS)
LDFLAGS += $(LIBELF_LIBS) $(LIBSUBCMD) $(KBUILD_HOSTLDFLAGS)
@@ -55,6 +55,10 @@ ifeq ($(SRCARCH),x86)
SUBCMD_ORC := y
endif
+ifeq ($(SUBCMD_ORC),y)
+ CFLAGS += -DINSN_USE_ORC
+endif
+
export SUBCMD_CHECK SUBCMD_ORC
export srctree OUTPUT CFLAGS SRCARCH AWK
include $(srctree)/tools/build/Makefile.include
diff --git a/tools/objtool/arch.h b/tools/objtool/arch.h
index 2e2ce089b0e9..4a84c3081b8e 100644
--- a/tools/objtool/arch.h
+++ b/tools/objtool/arch.h
@@ -11,7 +11,9 @@
#include "objtool.h"
#include "cfi.h"
+#ifdef INSN_USE_ORC
#include <asm/orc_types.h>
+#endif
enum insn_type {
INSN_JUMP_CONDITIONAL,
@@ -86,4 +88,6 @@ unsigned long arch_dest_reloc_offset(int addend);
const char *arch_nop_insn(int len);
+int arch_decode_hint_reg(struct instruction *insn, u8 sp_reg);
+
#endif /* _ARCH_H */
diff --git a/tools/objtool/arch/x86/Build b/tools/objtool/arch/x86/Build
index 7c5004008e97..9f7869b5c5e0 100644
--- a/tools/objtool/arch/x86/Build
+++ b/tools/objtool/arch/x86/Build
@@ -1,3 +1,4 @@
+objtool-y += special.o
objtool-y += decode.o
inat_tables_script = ../arch/x86/tools/gen-insn-attr-x86.awk
diff --git a/tools/objtool/arch/x86/decode.c b/tools/objtool/arch/x86/decode.c
index 1967370440b3..cde9c36e40ae 100644
--- a/tools/objtool/arch/x86/decode.c
+++ b/tools/objtool/arch/x86/decode.c
@@ -15,6 +15,7 @@
#include "../../elf.h"
#include "../../arch.h"
#include "../../warn.h"
+#include <asm/orc_types.h>
static unsigned char op_to_cfi_reg[][2] = {
{CFI_AX, CFI_R8},
@@ -583,3 +584,39 @@ const char *arch_nop_insn(int len)
return nops[len-1];
}
+
+int arch_decode_hint_reg(struct instruction *insn, u8 sp_reg)
+{
+ struct cfi_reg *cfa = &insn->cfi.cfa;
+
+ switch (sp_reg) {
+ case ORC_REG_UNDEFINED:
+ cfa->base = CFI_UNDEFINED;
+ break;
+ case ORC_REG_SP:
+ cfa->base = CFI_SP;
+ break;
+ case ORC_REG_BP:
+ cfa->base = CFI_BP;
+ break;
+ case ORC_REG_SP_INDIRECT:
+ cfa->base = CFI_SP_INDIRECT;
+ break;
+ case ORC_REG_R10:
+ cfa->base = CFI_R10;
+ break;
+ case ORC_REG_R13:
+ cfa->base = CFI_R13;
+ break;
+ case ORC_REG_DI:
+ cfa->base = CFI_DI;
+ break;
+ case ORC_REG_DX:
+ cfa->base = CFI_DX;
+ break;
+ default:
+ return -1;
+ }
+
+ return 0;
+}
diff --git a/tools/objtool/arch/x86/include/arch_special.h b/tools/objtool/arch/x86/include/arch_special.h
new file mode 100644
index 000000000000..d818b2bffa02
--- /dev/null
+++ b/tools/objtool/arch/x86/include/arch_special.h
@@ -0,0 +1,20 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+#ifndef _X86_ARCH_SPECIAL_H
+#define _X86_ARCH_SPECIAL_H
+
+#define EX_ENTRY_SIZE 12
+#define EX_ORIG_OFFSET 0
+#define EX_NEW_OFFSET 4
+
+#define JUMP_ENTRY_SIZE 16
+#define JUMP_ORIG_OFFSET 0
+#define JUMP_NEW_OFFSET 4
+
+#define ALT_ENTRY_SIZE 13
+#define ALT_ORIG_OFFSET 0
+#define ALT_NEW_OFFSET 4
+#define ALT_FEATURE_OFFSET 8
+#define ALT_ORIG_LEN_OFFSET 10
+#define ALT_NEW_LEN_OFFSET 11
+
+#endif /* _X86_ARCH_SPECIAL_H */
diff --git a/tools/objtool/arch/x86/special.c b/tools/objtool/arch/x86/special.c
new file mode 100644
index 000000000000..fd4af88c0ea5
--- /dev/null
+++ b/tools/objtool/arch/x86/special.c
@@ -0,0 +1,145 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+#include <string.h>
+
+#include "../../special.h"
+#include "../../builtin.h"
+
+#define X86_FEATURE_POPCNT (4 * 32 + 23)
+#define X86_FEATURE_SMAP (9 * 32 + 20)
+
+void arch_handle_alternative(unsigned short feature, struct special_alt *alt)
+{
+ switch (feature) {
+ case X86_FEATURE_SMAP:
+ /*
+ * If UACCESS validation is enabled; force that alternative;
+ * otherwise force it the other way.
+ *
+ * What we want to avoid is having both the original and the
+ * alternative code flow at the same time, in that case we can
+ * find paths that see the STAC but take the NOP instead of
+ * CLAC and the other way around.
+ */
+ if (uaccess)
+ alt->skip_orig = true;
+ else
+ alt->skip_alt = true;
+ break;
+ case X86_FEATURE_POPCNT:
+ /*
+ * It has been requested that we don't validate the !POPCNT
+ * feature path which is a "very very small percentage of
+ * machines".
+ */
+ alt->skip_orig = true;
+ break;
+ default:
+ break;
+ }
+}
+
+bool arch_support_alt_relocation(struct special_alt *special_alt,
+ struct instruction *insn,
+ struct reloc *reloc)
+{
+ /*
+ * The x86 alternatives code adjusts the offsets only when it
+ * encounters a branch instruction at the very beginning of the
+ * replacement group.
+ */
+ return insn->offset == special_alt->new_off &&
+ (insn->type == INSN_CALL || is_static_jump(insn));
+}
+
+/*
+ * There are 3 basic jump table patterns:
+ *
+ * 1. jmpq *[rodata addr](,%reg,8)
+ *
+ * This is the most common case by far. It jumps to an address in a simple
+ * jump table which is stored in .rodata.
+ *
+ * 2. jmpq *[rodata addr](%rip)
+ *
+ * This is caused by a rare GCC quirk, currently only seen in three driver
+ * functions in the kernel, only with certain obscure non-distro configs.
+ *
+ * As part of an optimization, GCC makes a copy of an existing switch jump
+ * table, modifies it, and then hard-codes the jump (albeit with an indirect
+ * jump) to use a single entry in the table. The rest of the jump table and
+ * some of its jump targets remain as dead code.
+ *
+ * In such a case we can just crudely ignore all unreachable instruction
+ * warnings for the entire object file. Ideally we would just ignore them
+ * for the function, but that would require redesigning the code quite a
+ * bit. And honestly that's just not worth doing: unreachable instruction
+ * warnings are of questionable value anyway, and this is such a rare issue.
+ *
+ * 3. mov [rodata addr],%reg1
+ * ... some instructions ...
+ * jmpq *(%reg1,%reg2,8)
+ *
+ * This is a fairly uncommon pattern which is new for GCC 6. As of this
+ * writing, there are 11 occurrences of it in the allmodconfig kernel.
+ *
+ * As of GCC 7 there are quite a few more of these and the 'in between' code
+ * is significant. Esp. with KASAN enabled some of the code between the mov
+ * and jmpq uses .rodata itself, which can confuse things.
+ *
+ * TODO: Once we have DWARF CFI and smarter instruction decoding logic,
+ * ensure the same register is used in the mov and jump instructions.
+ *
+ * NOTE: RETPOLINE made it harder still to decode dynamic jumps.
+ */
+struct reloc *arch_find_switch_table(struct objtool_file *file,
+ struct instruction *insn)
+{
+ struct reloc *text_reloc, *rodata_reloc;
+ struct section *table_sec;
+ unsigned long table_offset;
+
+ /* look for a relocation which references .rodata */
+ text_reloc = find_reloc_by_dest_range(file->elf, insn->sec,
+ insn->offset, insn->len);
+ if (!text_reloc || text_reloc->sym->type != STT_SECTION ||
+ !text_reloc->sym->sec->rodata)
+ return NULL;
+
+ table_offset = text_reloc->addend;
+ table_sec = text_reloc->sym->sec;
+
+ if (text_reloc->type == R_X86_64_PC32)
+ table_offset += 4;
+
+ /*
+ * Make sure the .rodata address isn't associated with a
+ * symbol. GCC jump tables are anonymous data.
+ *
+ * Also support C jump tables which are in the same format as
+ * switch jump tables. For objtool to recognize them, they
+ * need to be placed in the C_JUMP_TABLE_SECTION section. They
+ * have symbols associated with them.
+ */
+ if (find_symbol_containing(table_sec, table_offset) &&
+ strcmp(table_sec->name, C_JUMP_TABLE_SECTION))
+ return NULL;
+
+ /*
+ * Each table entry has a rela associated with it. The rela
+ * should reference text in the same function as the original
+ * instruction.
+ */
+ rodata_reloc = find_reloc_by_dest(file->elf, table_sec, table_offset);
+ if (!rodata_reloc)
+ return NULL;
+
+ /*
+ * Use of RIP-relative switch jumps is quite rare, and
+ * indicates a rare GCC quirk/bug which can leave dead
+ * code behind.
+ */
+ if (text_reloc->type == R_X86_64_PC32)
+ file->ignore_unreachables = true;
+
+ return rodata_reloc;
+}
diff --git a/tools/objtool/builtin-check.c b/tools/objtool/builtin-check.c
index 7a44174967b5..c6d199bfd0ae 100644
--- a/tools/objtool/builtin-check.c
+++ b/tools/objtool/builtin-check.c
@@ -41,6 +41,8 @@ const struct option check_options[] = {
int cmd_check(int argc, const char **argv)
{
const char *objname, *s;
+ struct objtool_file *file;
+ int ret;
argc = parse_options(argc, argv, check_options, check_usage, 0);
@@ -53,5 +55,16 @@ int cmd_check(int argc, const char **argv)
if (s && !s[9])
vmlinux = true;
- return check(objname, false);
+ file = objtool_open_read(objname);
+ if (!file)
+ return 1;
+
+ ret = check(file);
+ if (ret)
+ return ret;
+
+ if (file->elf->changed)
+ return elf_write(file->elf);
+
+ return 0;
}
diff --git a/tools/objtool/builtin-orc.c b/tools/objtool/builtin-orc.c
index b1dfe2007962..7b31121fa60b 100644
--- a/tools/objtool/builtin-orc.c
+++ b/tools/objtool/builtin-orc.c
@@ -31,13 +31,38 @@ int cmd_orc(int argc, const char **argv)
usage_with_options(orc_usage, check_options);
if (!strncmp(argv[0], "gen", 3)) {
+ struct objtool_file *file;
+ int ret;
+
argc = parse_options(argc, argv, check_options, orc_usage, 0);
if (argc != 1)
usage_with_options(orc_usage, check_options);
objname = argv[0];
- return check(objname, true);
+ file = objtool_open_read(objname);
+ if (!file)
+ return 1;
+
+ ret = check(file);
+ if (ret)
+ return ret;
+
+ if (list_empty(&file->insn_list))
+ return 0;
+
+ ret = create_orc(file);
+ if (ret)
+ return ret;
+
+ ret = create_orc_sections(file);
+ if (ret)
+ return ret;
+
+ if (!file->elf->changed)
+ return 0;
+
+ return elf_write(file->elf);
}
if (!strcmp(argv[0], "dump")) {
diff --git a/tools/objtool/check.c b/tools/objtool/check.c
index e034a8f24f46..c6ab44543c92 100644
--- a/tools/objtool/check.c
+++ b/tools/objtool/check.c
@@ -14,20 +14,19 @@
#include "warn.h"
#include "arch_elf.h"
+#include <linux/objtool.h>
#include <linux/hashtable.h>
#include <linux/kernel.h>
+#include <linux/static_call_types.h>
#define FAKE_JUMP_OFFSET -1
-#define C_JUMP_TABLE_SECTION ".rodata..c_jump_table"
-
struct alternative {
struct list_head list;
struct instruction *insn;
bool skip_orig;
};
-const char *objname;
struct cfi_init_state initial_func_cfi;
struct instruction *find_insn(struct objtool_file *file,
@@ -110,12 +109,6 @@ static struct instruction *prev_insn_same_sym(struct objtool_file *file,
for (insn = next_insn_same_sec(file, insn); insn; \
insn = next_insn_same_sec(file, insn))
-static bool is_static_jump(struct instruction *insn)
-{
- return insn->type == INSN_JUMP_CONDITIONAL ||
- insn->type == INSN_JUMP_UNCONDITIONAL;
-}
-
static bool is_sibling_call(struct instruction *insn)
{
/* An indirect jump is either a sibling call or a jump to a table. */
@@ -433,6 +426,103 @@ reachable:
return 0;
}
+static int create_static_call_sections(struct objtool_file *file)
+{
+ struct section *sec, *reloc_sec;
+ struct reloc *reloc;
+ struct static_call_site *site;
+ struct instruction *insn;
+ struct symbol *key_sym;
+ char *key_name, *tmp;
+ int idx;
+
+ sec = find_section_by_name(file->elf, ".static_call_sites");
+ if (sec) {
+ INIT_LIST_HEAD(&file->static_call_list);
+ WARN("file already has .static_call_sites section, skipping");
+ return 0;
+ }
+
+ if (list_empty(&file->static_call_list))
+ return 0;
+
+ idx = 0;
+ list_for_each_entry(insn, &file->static_call_list, static_call_node)
+ idx++;
+
+ sec = elf_create_section(file->elf, ".static_call_sites", SHF_WRITE,
+ sizeof(struct static_call_site), idx);
+ if (!sec)
+ return -1;
+
+ reloc_sec = elf_create_reloc_section(file->elf, sec, SHT_RELA);
+ if (!reloc_sec)
+ return -1;
+
+ idx = 0;
+ list_for_each_entry(insn, &file->static_call_list, static_call_node) {
+
+ site = (struct static_call_site *)sec->data->d_buf + idx;
+ memset(site, 0, sizeof(struct static_call_site));
+
+ /* populate reloc for 'addr' */
+ reloc = malloc(sizeof(*reloc));
+ if (!reloc) {
+ perror("malloc");
+ return -1;
+ }
+ memset(reloc, 0, sizeof(*reloc));
+ reloc->sym = insn->sec->sym;
+ reloc->addend = insn->offset;
+ reloc->type = R_X86_64_PC32;
+ reloc->offset = idx * sizeof(struct static_call_site);
+ reloc->sec = reloc_sec;
+ elf_add_reloc(file->elf, reloc);
+
+ /* find key symbol */
+ key_name = strdup(insn->call_dest->name);
+ if (!key_name) {
+ perror("strdup");
+ return -1;
+ }
+ if (strncmp(key_name, STATIC_CALL_TRAMP_PREFIX_STR,
+ STATIC_CALL_TRAMP_PREFIX_LEN)) {
+ WARN("static_call: trampoline name malformed: %s", key_name);
+ return -1;
+ }
+ tmp = key_name + STATIC_CALL_TRAMP_PREFIX_LEN - STATIC_CALL_KEY_PREFIX_LEN;
+ memcpy(tmp, STATIC_CALL_KEY_PREFIX_STR, STATIC_CALL_KEY_PREFIX_LEN);
+
+ key_sym = find_symbol_by_name(file->elf, tmp);
+ if (!key_sym) {
+ WARN("static_call: can't find static_call_key symbol: %s", tmp);
+ return -1;
+ }
+ free(key_name);
+
+ /* populate reloc for 'key' */
+ reloc = malloc(sizeof(*reloc));
+ if (!reloc) {
+ perror("malloc");
+ return -1;
+ }
+ memset(reloc, 0, sizeof(*reloc));
+ reloc->sym = key_sym;
+ reloc->addend = is_sibling_call(insn) ? STATIC_CALL_SITE_TAIL : 0;
+ reloc->type = R_X86_64_PC32;
+ reloc->offset = idx * sizeof(struct static_call_site) + 4;
+ reloc->sec = reloc_sec;
+ elf_add_reloc(file->elf, reloc);
+
+ idx++;
+ }
+
+ if (elf_rebuild_reloc_section(file->elf, reloc_sec))
+ return -1;
+
+ return 0;
+}
+
/*
* Warnings shouldn't be reported for ignored functions.
*/
@@ -493,6 +583,8 @@ static const char *uaccess_safe_builtin[] = {
"__asan_store4_noabort",
"__asan_store8_noabort",
"__asan_store16_noabort",
+ "__kasan_check_read",
+ "__kasan_check_write",
/* KASAN in-line */
"__asan_report_load_n_noabort",
"__asan_report_load1_noabort",
@@ -528,6 +620,61 @@ static const char *uaccess_safe_builtin[] = {
"__tsan_write4",
"__tsan_write8",
"__tsan_write16",
+ "__tsan_read_write1",
+ "__tsan_read_write2",
+ "__tsan_read_write4",
+ "__tsan_read_write8",
+ "__tsan_read_write16",
+ "__tsan_atomic8_load",
+ "__tsan_atomic16_load",
+ "__tsan_atomic32_load",
+ "__tsan_atomic64_load",
+ "__tsan_atomic8_store",
+ "__tsan_atomic16_store",
+ "__tsan_atomic32_store",
+ "__tsan_atomic64_store",
+ "__tsan_atomic8_exchange",
+ "__tsan_atomic16_exchange",
+ "__tsan_atomic32_exchange",
+ "__tsan_atomic64_exchange",
+ "__tsan_atomic8_fetch_add",
+ "__tsan_atomic16_fetch_add",
+ "__tsan_atomic32_fetch_add",
+ "__tsan_atomic64_fetch_add",
+ "__tsan_atomic8_fetch_sub",
+ "__tsan_atomic16_fetch_sub",
+ "__tsan_atomic32_fetch_sub",
+ "__tsan_atomic64_fetch_sub",
+ "__tsan_atomic8_fetch_and",
+ "__tsan_atomic16_fetch_and",
+ "__tsan_atomic32_fetch_and",
+ "__tsan_atomic64_fetch_and",
+ "__tsan_atomic8_fetch_or",
+ "__tsan_atomic16_fetch_or",
+ "__tsan_atomic32_fetch_or",
+ "__tsan_atomic64_fetch_or",
+ "__tsan_atomic8_fetch_xor",
+ "__tsan_atomic16_fetch_xor",
+ "__tsan_atomic32_fetch_xor",
+ "__tsan_atomic64_fetch_xor",
+ "__tsan_atomic8_fetch_nand",
+ "__tsan_atomic16_fetch_nand",
+ "__tsan_atomic32_fetch_nand",
+ "__tsan_atomic64_fetch_nand",
+ "__tsan_atomic8_compare_exchange_strong",
+ "__tsan_atomic16_compare_exchange_strong",
+ "__tsan_atomic32_compare_exchange_strong",
+ "__tsan_atomic64_compare_exchange_strong",
+ "__tsan_atomic8_compare_exchange_weak",
+ "__tsan_atomic16_compare_exchange_weak",
+ "__tsan_atomic32_compare_exchange_weak",
+ "__tsan_atomic64_compare_exchange_weak",
+ "__tsan_atomic8_compare_exchange_val",
+ "__tsan_atomic16_compare_exchange_val",
+ "__tsan_atomic32_compare_exchange_val",
+ "__tsan_atomic64_compare_exchange_val",
+ "__tsan_atomic_thread_fence",
+ "__tsan_atomic_signal_fence",
/* KCOV */
"write_comp_data",
"check_kcov_mode",
@@ -548,8 +695,9 @@ static const char *uaccess_safe_builtin[] = {
"__ubsan_handle_shift_out_of_bounds",
/* misc */
"csum_partial_copy_generic",
- "__memcpy_mcsafe",
- "mcsafe_handle_tail",
+ "copy_mc_fragile",
+ "copy_mc_fragile_handle_tail",
+ "copy_mc_enhanced_fast_string",
"ftrace_likely_update", /* CONFIG_TRACE_BRANCH_PROFILING */
NULL
};
@@ -619,7 +767,7 @@ static int add_jump_destinations(struct objtool_file *file)
if (!is_static_jump(insn))
continue;
- if (insn->ignore || insn->offset == FAKE_JUMP_OFFSET)
+ if (insn->offset == FAKE_JUMP_OFFSET)
continue;
reloc = find_reloc_by_dest_range(file->elf, insn->sec,
@@ -649,6 +797,10 @@ static int add_jump_destinations(struct objtool_file *file)
} else {
/* external sibling call */
insn->call_dest = reloc->sym;
+ if (insn->call_dest->static_call_tramp) {
+ list_add_tail(&insn->static_call_node,
+ &file->static_call_list);
+ }
continue;
}
@@ -700,6 +852,10 @@ static int add_jump_destinations(struct objtool_file *file)
/* internal sibling call */
insn->call_dest = insn->jump_dest->func;
+ if (insn->call_dest->static_call_tramp) {
+ list_add_tail(&insn->static_call_node,
+ &file->static_call_list);
+ }
}
}
}
@@ -717,6 +873,17 @@ static void remove_insn_ops(struct instruction *insn)
}
}
+static struct symbol *find_call_destination(struct section *sec, unsigned long offset)
+{
+ struct symbol *call_dest;
+
+ call_dest = find_func_by_offset(sec, offset);
+ if (!call_dest)
+ call_dest = find_symbol_by_offset(sec, offset);
+
+ return call_dest;
+}
+
/*
* Find the destination instructions for all calls.
*/
@@ -734,9 +901,7 @@ static int add_call_destinations(struct objtool_file *file)
insn->offset, insn->len);
if (!reloc) {
dest_off = arch_jump_destination(insn);
- insn->call_dest = find_func_by_offset(insn->sec, dest_off);
- if (!insn->call_dest)
- insn->call_dest = find_symbol_by_offset(insn->sec, dest_off);
+ insn->call_dest = find_call_destination(insn->sec, dest_off);
if (insn->ignore)
continue;
@@ -754,8 +919,8 @@ static int add_call_destinations(struct objtool_file *file)
} else if (reloc->sym->type == STT_SECTION) {
dest_off = arch_dest_reloc_offset(reloc->addend);
- insn->call_dest = find_func_by_offset(reloc->sym->sec,
- dest_off);
+ insn->call_dest = find_call_destination(reloc->sym->sec,
+ dest_off);
if (!insn->call_dest) {
WARN_FUNC("can't find call dest symbol at %s+0x%lx",
insn->sec, insn->offset,
@@ -867,6 +1032,8 @@ static int handle_group_alt(struct objtool_file *file,
alt_group = alt_group_next_index++;
insn = *new_insn;
sec_for_each_insn_from(file, insn) {
+ struct reloc *alt_reloc;
+
if (insn->offset >= special_alt->new_off + special_alt->new_len)
break;
@@ -883,14 +1050,11 @@ static int handle_group_alt(struct objtool_file *file,
* .altinstr_replacement section, unless the arch's
* alternatives code can adjust the relative offsets
* accordingly.
- *
- * The x86 alternatives code adjusts the offsets only when it
- * encounters a branch instruction at the very beginning of the
- * replacement group.
*/
- if ((insn->offset != special_alt->new_off ||
- (insn->type != INSN_CALL && !is_static_jump(insn))) &&
- find_reloc_by_dest_range(file->elf, insn->sec, insn->offset, insn->len)) {
+ alt_reloc = find_reloc_by_dest_range(file->elf, insn->sec,
+ insn->offset, insn->len);
+ if (alt_reloc &&
+ !arch_support_alt_relocation(special_alt, insn, alt_reloc)) {
WARN_FUNC("unsupported relocation in alternatives section",
insn->sec, insn->offset);
@@ -1092,56 +1256,15 @@ static int add_jump_table(struct objtool_file *file, struct instruction *insn,
}
/*
- * find_jump_table() - Given a dynamic jump, find the switch jump table in
- * .rodata associated with it.
- *
- * There are 3 basic patterns:
- *
- * 1. jmpq *[rodata addr](,%reg,8)
- *
- * This is the most common case by far. It jumps to an address in a simple
- * jump table which is stored in .rodata.
- *
- * 2. jmpq *[rodata addr](%rip)
- *
- * This is caused by a rare GCC quirk, currently only seen in three driver
- * functions in the kernel, only with certain obscure non-distro configs.
- *
- * As part of an optimization, GCC makes a copy of an existing switch jump
- * table, modifies it, and then hard-codes the jump (albeit with an indirect
- * jump) to use a single entry in the table. The rest of the jump table and
- * some of its jump targets remain as dead code.
- *
- * In such a case we can just crudely ignore all unreachable instruction
- * warnings for the entire object file. Ideally we would just ignore them
- * for the function, but that would require redesigning the code quite a
- * bit. And honestly that's just not worth doing: unreachable instruction
- * warnings are of questionable value anyway, and this is such a rare issue.
- *
- * 3. mov [rodata addr],%reg1
- * ... some instructions ...
- * jmpq *(%reg1,%reg2,8)
- *
- * This is a fairly uncommon pattern which is new for GCC 6. As of this
- * writing, there are 11 occurrences of it in the allmodconfig kernel.
- *
- * As of GCC 7 there are quite a few more of these and the 'in between' code
- * is significant. Esp. with KASAN enabled some of the code between the mov
- * and jmpq uses .rodata itself, which can confuse things.
- *
- * TODO: Once we have DWARF CFI and smarter instruction decoding logic,
- * ensure the same register is used in the mov and jump instructions.
- *
- * NOTE: RETPOLINE made it harder still to decode dynamic jumps.
+ * find_jump_table() - Given a dynamic jump, find the switch jump table
+ * associated with it.
*/
static struct reloc *find_jump_table(struct objtool_file *file,
struct symbol *func,
struct instruction *insn)
{
- struct reloc *text_reloc, *table_reloc;
+ struct reloc *table_reloc;
struct instruction *dest_insn, *orig_insn = insn;
- struct section *table_sec;
- unsigned long table_offset;
/*
* Backward search using the @first_jump_src links, these help avoid
@@ -1162,52 +1285,13 @@ static struct reloc *find_jump_table(struct objtool_file *file,
insn->jump_dest->offset > orig_insn->offset))
break;
- /* look for a relocation which references .rodata */
- text_reloc = find_reloc_by_dest_range(file->elf, insn->sec,
- insn->offset, insn->len);
- if (!text_reloc || text_reloc->sym->type != STT_SECTION ||
- !text_reloc->sym->sec->rodata)
- continue;
-
- table_offset = text_reloc->addend;
- table_sec = text_reloc->sym->sec;
-
- if (text_reloc->type == R_X86_64_PC32)
- table_offset += 4;
-
- /*
- * Make sure the .rodata address isn't associated with a
- * symbol. GCC jump tables are anonymous data.
- *
- * Also support C jump tables which are in the same format as
- * switch jump tables. For objtool to recognize them, they
- * need to be placed in the C_JUMP_TABLE_SECTION section. They
- * have symbols associated with them.
- */
- if (find_symbol_containing(table_sec, table_offset) &&
- strcmp(table_sec->name, C_JUMP_TABLE_SECTION))
- continue;
-
- /*
- * Each table entry has a reloc associated with it. The reloc
- * should reference text in the same function as the original
- * instruction.
- */
- table_reloc = find_reloc_by_dest(file->elf, table_sec, table_offset);
+ table_reloc = arch_find_switch_table(file, insn);
if (!table_reloc)
continue;
dest_insn = find_insn(file, table_reloc->sym->sec, table_reloc->addend);
if (!dest_insn || !dest_insn->func || dest_insn->func->pfunc != func)
continue;
- /*
- * Use of RIP-relative switch jumps is quite rare, and
- * indicates a rare GCC quirk/bug which can leave dead code
- * behind.
- */
- if (text_reloc->type == R_X86_64_PC32)
- file->ignore_unreachables = true;
-
return table_reloc;
}
@@ -1350,32 +1434,7 @@ static int read_unwind_hints(struct objtool_file *file)
insn->hint = true;
- switch (hint->sp_reg) {
- case ORC_REG_UNDEFINED:
- cfa->base = CFI_UNDEFINED;
- break;
- case ORC_REG_SP:
- cfa->base = CFI_SP;
- break;
- case ORC_REG_BP:
- cfa->base = CFI_BP;
- break;
- case ORC_REG_SP_INDIRECT:
- cfa->base = CFI_SP_INDIRECT;
- break;
- case ORC_REG_R10:
- cfa->base = CFI_R10;
- break;
- case ORC_REG_R13:
- cfa->base = CFI_R13;
- break;
- case ORC_REG_DI:
- cfa->base = CFI_DI;
- break;
- case ORC_REG_DX:
- cfa->base = CFI_DX;
- break;
- default:
+ if (arch_decode_hint_reg(insn, hint->sp_reg)) {
WARN_FUNC("unsupported unwind_hint sp base reg %d",
insn->sec, insn->offset, hint->sp_reg);
return -1;
@@ -1522,6 +1581,23 @@ static int read_intra_function_calls(struct objtool_file *file)
return 0;
}
+static int read_static_call_tramps(struct objtool_file *file)
+{
+ struct section *sec;
+ struct symbol *func;
+
+ for_each_sec(file, sec) {
+ list_for_each_entry(func, &sec->symbol_list, list) {
+ if (func->bind == STB_GLOBAL &&
+ !strncmp(func->name, STATIC_CALL_TRAMP_PREFIX_STR,
+ strlen(STATIC_CALL_TRAMP_PREFIX_STR)))
+ func->static_call_tramp = true;
+ }
+ }
+
+ return 0;
+}
+
static void mark_rodata(struct objtool_file *file)
{
struct section *sec;
@@ -1569,6 +1645,10 @@ static int decode_sections(struct objtool_file *file)
if (ret)
return ret;
+ ret = read_static_call_tramps(file);
+ if (ret)
+ return ret;
+
ret = add_jump_destinations(file);
if (ret)
return ret;
@@ -1768,7 +1848,8 @@ static int update_cfi_state(struct instruction *insn, struct cfi_state *cfi,
return 0;
}
- if (cfi->type == ORC_TYPE_REGS || cfi->type == ORC_TYPE_REGS_IRET)
+ if (cfi->type == UNWIND_HINT_TYPE_REGS ||
+ cfi->type == UNWIND_HINT_TYPE_REGS_PARTIAL)
return update_cfi_state_regs(insn, cfi, op);
switch (op->dest.type) {
@@ -2016,7 +2097,7 @@ static int update_cfi_state(struct instruction *insn, struct cfi_state *cfi,
/* drap: push %rbp */
cfi->stack_size = 0;
- } else if (regs[op->src.reg].base == CFI_UNDEFINED) {
+ } else {
/* drap: push %reg */
save_reg(cfi, op->src.reg, CFI_BP, -cfi->stack_size);
@@ -2045,9 +2126,7 @@ static int update_cfi_state(struct instruction *insn, struct cfi_state *cfi,
/* save drap offset so we know when to restore it */
cfi->drap_offset = op->dest.offset;
- }
-
- else if (regs[op->src.reg].base == CFI_UNDEFINED) {
+ } else {
/* drap: mov reg, disp(%rbp) */
save_reg(cfi, op->src.reg, CFI_BP, op->dest.offset);
@@ -2432,6 +2511,11 @@ static int validate_branch(struct objtool_file *file, struct symbol *func,
if (dead_end_function(file, insn->call_dest))
return 0;
+ if (insn->type == INSN_CALL && insn->call_dest->static_call_tramp) {
+ list_add_tail(&insn->static_call_node,
+ &file->static_call_list);
+ }
+
break;
case INSN_JUMP_CONDITIONAL:
@@ -2612,9 +2696,10 @@ static bool is_ubsan_insn(struct instruction *insn)
"__ubsan_handle_builtin_unreachable"));
}
-static bool ignore_unreachable_insn(struct instruction *insn)
+static bool ignore_unreachable_insn(struct objtool_file *file, struct instruction *insn)
{
int i;
+ struct instruction *prev_insn;
if (insn->ignore || insn->type == INSN_NOP)
return true;
@@ -2631,6 +2716,9 @@ static bool ignore_unreachable_insn(struct instruction *insn)
!strcmp(insn->sec->name, ".altinstr_aux"))
return true;
+ if (insn->type == INSN_JUMP_UNCONDITIONAL && insn->offset == FAKE_JUMP_OFFSET)
+ return true;
+
if (!insn->func)
return false;
@@ -2639,8 +2727,11 @@ static bool ignore_unreachable_insn(struct instruction *insn)
* __builtin_unreachable(). The BUG() macro has an unreachable() after
* the UD2, which causes GCC's undefined trap logic to emit another UD2
* (or occasionally a JMP to UD2).
+ *
+ * It may also insert a UD2 after calling a __noreturn function.
*/
- if (list_prev_entry(insn, list)->dead_end &&
+ prev_insn = list_prev_entry(insn, list);
+ if ((prev_insn->dead_end || dead_end_function(file, prev_insn->call_dest)) &&
(insn->type == INSN_BUG ||
(insn->type == INSN_JUMP_UNCONDITIONAL &&
insn->jump_dest && insn->jump_dest->type == INSN_BUG)))
@@ -2767,7 +2858,7 @@ static int validate_reachable_instructions(struct objtool_file *file)
return 0;
for_each_insn(file, insn) {
- if (insn->visited || ignore_unreachable_insn(insn))
+ if (insn->visited || ignore_unreachable_insn(file, insn))
continue;
WARN_FUNC("unreachable instruction", insn->sec, insn->offset);
@@ -2777,36 +2868,22 @@ static int validate_reachable_instructions(struct objtool_file *file)
return 0;
}
-static struct objtool_file file;
-
-int check(const char *_objname, bool orc)
+int check(struct objtool_file *file)
{
int ret, warnings = 0;
- objname = _objname;
-
- file.elf = elf_open_read(objname, O_RDWR);
- if (!file.elf)
- return 1;
-
- INIT_LIST_HEAD(&file.insn_list);
- hash_init(file.insn_hash);
- file.c_file = !vmlinux && find_section_by_name(file.elf, ".comment");
- file.ignore_unreachables = no_unreachable;
- file.hints = false;
-
arch_initial_func_cfi_state(&initial_func_cfi);
- ret = decode_sections(&file);
+ ret = decode_sections(file);
if (ret < 0)
goto out;
warnings += ret;
- if (list_empty(&file.insn_list))
+ if (list_empty(&file->insn_list))
goto out;
if (vmlinux && !validate_dup) {
- ret = validate_vmlinux_functions(&file);
+ ret = validate_vmlinux_functions(file);
if (ret < 0)
goto out;
@@ -2815,44 +2892,33 @@ int check(const char *_objname, bool orc)
}
if (retpoline) {
- ret = validate_retpoline(&file);
+ ret = validate_retpoline(file);
if (ret < 0)
return ret;
warnings += ret;
}
- ret = validate_functions(&file);
+ ret = validate_functions(file);
if (ret < 0)
goto out;
warnings += ret;
- ret = validate_unwind_hints(&file, NULL);
+ ret = validate_unwind_hints(file, NULL);
if (ret < 0)
goto out;
warnings += ret;
if (!warnings) {
- ret = validate_reachable_instructions(&file);
+ ret = validate_reachable_instructions(file);
if (ret < 0)
goto out;
warnings += ret;
}
- if (orc) {
- ret = create_orc(&file);
- if (ret < 0)
- goto out;
-
- ret = create_orc_sections(&file);
- if (ret < 0)
- goto out;
- }
-
- if (file.elf->changed) {
- ret = elf_write(file.elf);
- if (ret < 0)
- goto out;
- }
+ ret = create_static_call_sections(file);
+ if (ret < 0)
+ goto out;
+ warnings += ret;
out:
if (ret < 0) {
diff --git a/tools/objtool/check.h b/tools/objtool/check.h
index 061aa96e15d3..5ec00a4b891b 100644
--- a/tools/objtool/check.h
+++ b/tools/objtool/check.h
@@ -22,6 +22,7 @@ struct insn_state {
struct instruction {
struct list_head list;
struct hlist_node hash;
+ struct list_head static_call_node;
struct section *sec;
unsigned long offset;
unsigned int len;
@@ -42,9 +43,17 @@ struct instruction {
struct symbol *func;
struct list_head stack_ops;
struct cfi_state cfi;
+#ifdef INSN_USE_ORC
struct orc_entry orc;
+#endif
};
+static inline bool is_static_jump(struct instruction *insn)
+{
+ return insn->type == INSN_JUMP_CONDITIONAL ||
+ insn->type == INSN_JUMP_UNCONDITIONAL;
+}
+
struct instruction *find_insn(struct objtool_file *file,
struct section *sec, unsigned long offset);
@@ -57,5 +66,4 @@ struct instruction *find_insn(struct objtool_file *file,
insn->sec == sec; \
insn = list_next_entry(insn, list))
-
#endif /* _CHECK_H */
diff --git a/tools/objtool/elf.c b/tools/objtool/elf.c
index 3ddbd66f1a37..4e1d7460574b 100644
--- a/tools/objtool/elf.c
+++ b/tools/objtool/elf.c
@@ -652,7 +652,7 @@ err:
}
struct section *elf_create_section(struct elf *elf, const char *name,
- size_t entsize, int nr)
+ unsigned int sh_flags, size_t entsize, int nr)
{
struct section *sec, *shstrtab;
size_t size = entsize * nr;
@@ -712,7 +712,7 @@ struct section *elf_create_section(struct elf *elf, const char *name,
sec->sh.sh_entsize = entsize;
sec->sh.sh_type = SHT_PROGBITS;
sec->sh.sh_addralign = 1;
- sec->sh.sh_flags = SHF_ALLOC;
+ sec->sh.sh_flags = SHF_ALLOC | sh_flags;
/* Add section name to .shstrtab (or .strtab for Clang) */
@@ -767,7 +767,7 @@ static struct section *elf_create_rel_reloc_section(struct elf *elf, struct sect
strcpy(relocname, ".rel");
strcat(relocname, base->name);
- sec = elf_create_section(elf, relocname, sizeof(GElf_Rel), 0);
+ sec = elf_create_section(elf, relocname, 0, sizeof(GElf_Rel), 0);
free(relocname);
if (!sec)
return NULL;
@@ -797,7 +797,7 @@ static struct section *elf_create_rela_reloc_section(struct elf *elf, struct sec
strcpy(relocname, ".rela");
strcat(relocname, base->name);
- sec = elf_create_section(elf, relocname, sizeof(GElf_Rela), 0);
+ sec = elf_create_section(elf, relocname, 0, sizeof(GElf_Rela), 0);
free(relocname);
if (!sec)
return NULL;
diff --git a/tools/objtool/elf.h b/tools/objtool/elf.h
index 6cc80a075166..807f8c670097 100644
--- a/tools/objtool/elf.h
+++ b/tools/objtool/elf.h
@@ -56,6 +56,7 @@ struct symbol {
unsigned int len;
struct symbol *pfunc, *cfunc, *alias;
bool uaccess_safe;
+ bool static_call_tramp;
};
struct reloc {
@@ -120,7 +121,7 @@ static inline u32 reloc_hash(struct reloc *reloc)
}
struct elf *elf_open_read(const char *name, int flags);
-struct section *elf_create_section(struct elf *elf, const char *name, size_t entsize, int nr);
+struct section *elf_create_section(struct elf *elf, const char *name, unsigned int sh_flags, size_t entsize, int nr);
struct section *elf_create_reloc_section(struct elf *elf, struct section *base, int reltype);
void elf_add_reloc(struct elf *elf, struct reloc *reloc);
int elf_write_insn(struct elf *elf, struct section *sec,
diff --git a/tools/objtool/objtool.c b/tools/objtool/objtool.c
index 58fdda510653..9df0cd86d310 100644
--- a/tools/objtool/objtool.c
+++ b/tools/objtool/objtool.c
@@ -22,6 +22,8 @@
#include <linux/kernel.h>
#include "builtin.h"
+#include "objtool.h"
+#include "warn.h"
struct cmd_struct {
const char *name;
@@ -39,6 +41,34 @@ static struct cmd_struct objtool_cmds[] = {
bool help;
+const char *objname;
+static struct objtool_file file;
+
+struct objtool_file *objtool_open_read(const char *_objname)
+{
+ if (objname) {
+ if (strcmp(objname, _objname)) {
+ WARN("won't handle more than one file at a time");
+ return NULL;
+ }
+ return &file;
+ }
+ objname = _objname;
+
+ file.elf = elf_open_read(objname, O_RDWR);
+ if (!file.elf)
+ return NULL;
+
+ INIT_LIST_HEAD(&file.insn_list);
+ hash_init(file.insn_hash);
+ INIT_LIST_HEAD(&file.static_call_list);
+ file.c_file = !vmlinux && find_section_by_name(file.elf, ".comment");
+ file.ignore_unreachables = no_unreachable;
+ file.hints = false;
+
+ return &file;
+}
+
static void cmd_usage(void)
{
unsigned int i, longest = 0;
diff --git a/tools/objtool/objtool.h b/tools/objtool/objtool.h
index 528028a66816..4125d4578b23 100644
--- a/tools/objtool/objtool.h
+++ b/tools/objtool/objtool.h
@@ -12,14 +12,19 @@
#include "elf.h"
+#define __weak __attribute__((weak))
+
struct objtool_file {
struct elf *elf;
struct list_head insn_list;
DECLARE_HASHTABLE(insn_hash, 20);
+ struct list_head static_call_list;
bool ignore_unreachables, c_file, hints, rodata;
};
-int check(const char *objname, bool orc);
+struct objtool_file *objtool_open_read(const char *_objname);
+
+int check(struct objtool_file *file);
int orc_dump(const char *objname);
int create_orc(struct objtool_file *file);
int create_orc_sections(struct objtool_file *file);
diff --git a/tools/objtool/orc_dump.c b/tools/objtool/orc_dump.c
index fca46e006fc2..5e6a95368d35 100644
--- a/tools/objtool/orc_dump.c
+++ b/tools/objtool/orc_dump.c
@@ -4,6 +4,7 @@
*/
#include <unistd.h>
+#include <linux/objtool.h>
#include <asm/orc_types.h>
#include "objtool.h"
#include "warn.h"
@@ -37,12 +38,12 @@ static const char *reg_name(unsigned int reg)
static const char *orc_type_name(unsigned int type)
{
switch (type) {
- case ORC_TYPE_CALL:
+ case UNWIND_HINT_TYPE_CALL:
return "call";
- case ORC_TYPE_REGS:
+ case UNWIND_HINT_TYPE_REGS:
return "regs";
- case ORC_TYPE_REGS_IRET:
- return "iret";
+ case UNWIND_HINT_TYPE_REGS_PARTIAL:
+ return "regs (partial)";
default:
return "?";
}
diff --git a/tools/objtool/orc_gen.c b/tools/objtool/orc_gen.c
index 968f55e6dd94..235663b96adc 100644
--- a/tools/objtool/orc_gen.c
+++ b/tools/objtool/orc_gen.c
@@ -6,6 +6,9 @@
#include <stdlib.h>
#include <string.h>
+#include <linux/objtool.h>
+#include <asm/orc_types.h>
+
#include "check.h"
#include "warn.h"
@@ -18,6 +21,9 @@ int create_orc(struct objtool_file *file)
struct cfi_reg *cfa = &insn->cfi.cfa;
struct cfi_reg *bp = &insn->cfi.regs[CFI_BP];
+ if (!insn->sec->text)
+ continue;
+
orc->end = insn->cfi.end;
if (cfa->base == CFI_UNDEFINED) {
@@ -143,7 +149,7 @@ int create_orc_sections(struct objtool_file *file)
struct orc_entry empty = {
.sp_reg = ORC_REG_UNDEFINED,
.bp_reg = ORC_REG_UNDEFINED,
- .type = ORC_TYPE_CALL,
+ .type = UNWIND_HINT_TYPE_CALL,
};
sec = find_section_by_name(file->elf, ".orc_unwind");
@@ -177,7 +183,7 @@ int create_orc_sections(struct objtool_file *file)
/* create .orc_unwind_ip and .rela.orc_unwind_ip sections */
- sec = elf_create_section(file->elf, ".orc_unwind_ip", sizeof(int), idx);
+ sec = elf_create_section(file->elf, ".orc_unwind_ip", 0, sizeof(int), idx);
if (!sec)
return -1;
@@ -186,7 +192,7 @@ int create_orc_sections(struct objtool_file *file)
return -1;
/* create .orc_unwind section */
- u_sec = elf_create_section(file->elf, ".orc_unwind",
+ u_sec = elf_create_section(file->elf, ".orc_unwind", 0,
sizeof(struct orc_entry), idx);
/* populate sections */
diff --git a/tools/objtool/special.c b/tools/objtool/special.c
index e893f1e48e44..1a2420febd08 100644
--- a/tools/objtool/special.c
+++ b/tools/objtool/special.c
@@ -14,24 +14,7 @@
#include "builtin.h"
#include "special.h"
#include "warn.h"
-
-#define EX_ENTRY_SIZE 12
-#define EX_ORIG_OFFSET 0
-#define EX_NEW_OFFSET 4
-
-#define JUMP_ENTRY_SIZE 16
-#define JUMP_ORIG_OFFSET 0
-#define JUMP_NEW_OFFSET 4
-
-#define ALT_ENTRY_SIZE 13
-#define ALT_ORIG_OFFSET 0
-#define ALT_NEW_OFFSET 4
-#define ALT_FEATURE_OFFSET 8
-#define ALT_ORIG_LEN_OFFSET 10
-#define ALT_NEW_LEN_OFFSET 11
-
-#define X86_FEATURE_POPCNT (4*32+23)
-#define X86_FEATURE_SMAP (9*32+20)
+#include "arch_special.h"
struct special_entry {
const char *sec;
@@ -68,6 +51,10 @@ struct special_entry entries[] = {
{},
};
+void __weak arch_handle_alternative(unsigned short feature, struct special_alt *alt)
+{
+}
+
static int get_alt_entry(struct elf *elf, struct special_entry *entry,
struct section *sec, int idx,
struct special_alt *alt)
@@ -92,30 +79,7 @@ static int get_alt_entry(struct elf *elf, struct special_entry *entry,
feature = *(unsigned short *)(sec->data->d_buf + offset +
entry->feature);
-
- /*
- * It has been requested that we don't validate the !POPCNT
- * feature path which is a "very very small percentage of
- * machines".
- */
- if (feature == X86_FEATURE_POPCNT)
- alt->skip_orig = true;
-
- /*
- * If UACCESS validation is enabled; force that alternative;
- * otherwise force it the other way.
- *
- * What we want to avoid is having both the original and the
- * alternative code flow at the same time, in that case we can
- * find paths that see the STAC but take the NOP instead of
- * CLAC and the other way around.
- */
- if (feature == X86_FEATURE_SMAP) {
- if (uaccess)
- alt->skip_orig = true;
- else
- alt->skip_alt = true;
- }
+ arch_handle_alternative(feature, alt);
}
orig_reloc = find_reloc_by_dest(elf, sec, offset + entry->orig);
diff --git a/tools/objtool/special.h b/tools/objtool/special.h
index 35061530e46e..abddf38ef334 100644
--- a/tools/objtool/special.h
+++ b/tools/objtool/special.h
@@ -7,8 +7,11 @@
#define _SPECIAL_H
#include <stdbool.h>
+#include "check.h"
#include "elf.h"
+#define C_JUMP_TABLE_SECTION ".rodata..c_jump_table"
+
struct special_alt {
struct list_head list;
@@ -28,4 +31,11 @@ struct special_alt {
int special_get_alts(struct elf *elf, struct list_head *alts);
+void arch_handle_alternative(unsigned short feature, struct special_alt *alt);
+
+bool arch_support_alt_relocation(struct special_alt *special_alt,
+ struct instruction *insn,
+ struct reloc *reloc);
+struct reloc *arch_find_switch_table(struct objtool_file *file,
+ struct instruction *insn);
#endif /* _SPECIAL_H */
diff --git a/tools/objtool/sync-check.sh b/tools/objtool/sync-check.sh
index 2a1261bfbb62..606a4b5e929f 100755
--- a/tools/objtool/sync-check.sh
+++ b/tools/objtool/sync-check.sh
@@ -1,13 +1,27 @@
#!/bin/sh
# SPDX-License-Identifier: GPL-2.0
-FILES='
+if [ -z "$SRCARCH" ]; then
+ echo 'sync-check.sh: error: missing $SRCARCH environment variable' >&2
+ exit 1
+fi
+
+FILES="include/linux/objtool.h"
+
+if [ "$SRCARCH" = "x86" ]; then
+FILES="$FILES
arch/x86/include/asm/inat_types.h
arch/x86/include/asm/orc_types.h
arch/x86/include/asm/emulate_prefix.h
arch/x86/lib/x86-opcode-map.txt
arch/x86/tools/gen-insn-attr-x86.awk
-'
+include/linux/static_call_types.h
+arch/x86/include/asm/inat.h -I '^#include [\"<]\(asm/\)*inat_types.h[\">]'
+arch/x86/include/asm/insn.h -I '^#include [\"<]\(asm/\)*inat.h[\">]'
+arch/x86/lib/inat.c -I '^#include [\"<]\(../include/\)*asm/insn.h[\">]'
+arch/x86/lib/insn.c -I '^#include [\"<]\(../include/\)*asm/in\(at\|sn\).h[\">]' -I '^#include [\"<]\(../include/\)*asm/emulate_prefix.h[\">]'
+"
+fi
check_2 () {
file1=$1
@@ -40,11 +54,12 @@ fi
cd ../..
-for i in $FILES; do
- check $i
-done
+while read -r file_entry; do
+ if [ -z "$file_entry" ]; then
+ continue
+ fi
-check arch/x86/include/asm/inat.h '-I "^#include [\"<]\(asm/\)*inat_types.h[\">]"'
-check arch/x86/include/asm/insn.h '-I "^#include [\"<]\(asm/\)*inat.h[\">]"'
-check arch/x86/lib/inat.c '-I "^#include [\"<]\(../include/\)*asm/insn.h[\">]"'
-check arch/x86/lib/insn.c '-I "^#include [\"<]\(../include/\)*asm/in\(at\|sn\).h[\">]" -I "^#include [\"<]\(../include/\)*asm/emulate_prefix.h[\">]"'
+ check $file_entry
+done <<EOF
+$FILES
+EOF
diff --git a/tools/objtool/weak.c b/tools/objtool/weak.c
index 942ea5e8ac36..7843e9a7a72f 100644
--- a/tools/objtool/weak.c
+++ b/tools/objtool/weak.c
@@ -9,17 +9,13 @@
#include <errno.h>
#include "objtool.h"
-#define __weak __attribute__((weak))
-
#define UNSUPPORTED(name) \
({ \
fprintf(stderr, "error: objtool: " name " not implemented\n"); \
return ENOSYS; \
})
-const char __weak *objname;
-
-int __weak check(const char *_objname, bool orc)
+int __weak check(struct objtool_file *file)
{
UNSUPPORTED("check subcommand");
}
diff --git a/tools/perf/Documentation/itrace.txt b/tools/perf/Documentation/itrace.txt
index e817179c5027..d3740c8f399b 100644
--- a/tools/perf/Documentation/itrace.txt
+++ b/tools/perf/Documentation/itrace.txt
@@ -18,6 +18,7 @@
l synthesize last branch entries (use with i or x)
L synthesize last branch entries on existing event records
s skip initial number of events
+ q quicker (less detailed) decoding
The default is all events i.e. the same as --itrace=ibxwpe,
except for perf script where it is --itrace=ce
@@ -47,3 +48,16 @@
--itrace=i0nss1000000
skips the first million instructions.
+
+ The 'e' option may be followed by flags which affect what errors will or
+ will not be reported. Each flag must be preceded by either '+' or '-'.
+ The flags are:
+ o overflow
+ l trace data lost
+
+ If supported, the 'd' option may be followed by flags which affect what
+ debug messages will or will not be logged. Each flag must be preceded
+ by either '+' or '-'. The flags are:
+ a all perf events
+
+ If supported, the 'q' option may be repeated to increase the effect.
diff --git a/tools/perf/Documentation/perf-bench.txt b/tools/perf/Documentation/perf-bench.txt
index bad16512c48d..a0529c7fa5ef 100644
--- a/tools/perf/Documentation/perf-bench.txt
+++ b/tools/perf/Documentation/perf-bench.txt
@@ -49,6 +49,9 @@ SUBSYSTEM
'sched'::
Scheduler and IPC mechanisms.
+'syscall'::
+ System call performance (throughput).
+
'mem'::
Memory access performance.
@@ -137,6 +140,14 @@ Example of *pipe*
59004 ops/sec
---------------------
+SUITES FOR 'syscall'
+~~~~~~~~~~~~~~~~~~
+*basic*::
+Suite for evaluating performance of core system call throughput (both usecs/op and ops/sec metrics).
+This uses a single thread simply doing getppid(2), which is a simple syscall where the result is not
+cached by glibc.
+
+
SUITES FOR 'mem'
~~~~~~~~~~~~~~~~
*memcpy*::
diff --git a/tools/perf/Documentation/perf-config.txt b/tools/perf/Documentation/perf-config.txt
index c7d3df5798e2..76408d986aed 100644
--- a/tools/perf/Documentation/perf-config.txt
+++ b/tools/perf/Documentation/perf-config.txt
@@ -614,8 +614,9 @@ trace.*::
ftrace.*::
ftrace.tracer::
- Can be used to select the default tracer. Possible values are
- 'function' and 'function_graph'.
+ Can be used to select the default tracer when neither -G nor
+ -F option is not specified. Possible values are 'function' and
+ 'function_graph'.
llvm.*::
llvm.clang-path::
diff --git a/tools/perf/Documentation/perf-data.txt b/tools/perf/Documentation/perf-data.txt
index c87180764829..726b9bc9e1a7 100644
--- a/tools/perf/Documentation/perf-data.txt
+++ b/tools/perf/Documentation/perf-data.txt
@@ -27,6 +27,9 @@ OPTIONS for 'convert'
--to-ctf::
Triggers the CTF conversion, specify the path of CTF data directory.
+--tod::
+ Convert time to wall clock time.
+
-i::
Specify input perf data file path.
diff --git a/tools/perf/Documentation/perf-ftrace.txt b/tools/perf/Documentation/perf-ftrace.txt
index b80c84307dc9..78358af9a1c4 100644
--- a/tools/perf/Documentation/perf-ftrace.txt
+++ b/tools/perf/Documentation/perf-ftrace.txt
@@ -24,16 +24,28 @@ OPTIONS
-t::
--tracer=::
- Tracer to use: function_graph or function.
+ Tracer to use when neither -G nor -F option is not
+ specified: function_graph or function.
-v::
--verbose=::
Verbosity level.
+-F::
+--funcs::
+ List all available functions to trace.
+
-p::
--pid=::
Trace on existing process id (comma separated list).
+--tid=::
+ Trace on existing thread id (comma separated list).
+
+-D::
+--delay::
+ Time (ms) to wait before starting tracing after program start.
+
-a::
--all-cpus::
Force system-wide collection. Scripts run without a <command>
@@ -48,39 +60,58 @@ OPTIONS
Ranges of CPUs are specified with -: 0-2.
Default is to trace on all online CPUs.
+-m::
+--buffer-size::
+ Set the size of per-cpu tracing buffer, <size> is expected to
+ be a number with appended unit character - B/K/M/G.
+
+--inherit::
+ Trace children processes spawned by our target.
+
-T::
--trace-funcs=::
- Only trace functions given by the argument. Multiple functions
- can be given by using this option more than once. The function
- argument also can be a glob pattern. It will be passed to
- 'set_ftrace_filter' in tracefs.
+ Select function tracer and set function filter on the given
+ function (or a glob pattern). Multiple functions can be given
+ by using this option more than once. The function argument also
+ can be a glob pattern. It will be passed to 'set_ftrace_filter'
+ in tracefs.
-N::
--notrace-funcs=::
- Do not trace functions given by the argument. Like -T option,
- this can be used more than once to specify multiple functions
- (or glob patterns). It will be passed to 'set_ftrace_notrace'
- in tracefs.
+ Select function tracer and do not trace functions given by the
+ argument. Like -T option, this can be used more than once to
+ specify multiple functions (or glob patterns). It will be
+ passed to 'set_ftrace_notrace' in tracefs.
+
+--func-opts::
+ List of options allowed to set:
+ call-graph - Display kernel stack trace for function tracer.
+ irq-info - Display irq context info for function tracer.
-G::
--graph-funcs=::
- Set graph filter on the given function (or a glob pattern).
- This is useful for the function_graph tracer only and enables
- tracing for functions executed from the given function.
- This can be used more than once to specify multiple functions.
- It will be passed to 'set_graph_function' in tracefs.
+ Select function_graph tracer and set graph filter on the given
+ function (or a glob pattern). This is useful to trace for
+ functions executed from the given function. This can be used more
+ than once to specify multiple functions. It will be passed to
+ 'set_graph_function' in tracefs.
-g::
--nograph-funcs=::
- Set graph notrace filter on the given function (or a glob pattern).
- Like -G option, this is useful for the function_graph tracer only
- and disables tracing for function executed from the given function.
- This can be used more than once to specify multiple functions.
- It will be passed to 'set_graph_notrace' in tracefs.
+ Select function_graph tracer and set graph notrace filter on the
+ given function (or a glob pattern). Like -G option, this is useful
+ for the function_graph tracer only and disables tracing for function
+ executed from the given function. This can be used more than once to
+ specify multiple functions. It will be passed to 'set_graph_notrace'
+ in tracefs.
--D::
---graph-depth=::
- Set max depth for function graph tracer to follow
+--graph-opts::
+ List of options allowed to set:
+ nosleep-time - Measure on-CPU time only for function_graph tracer.
+ noirqs - Ignore functions that happen inside interrupt.
+ verbose - Show process names, PIDs, timestamps, etc.
+ thresh=<n> - Setup trace duration threshold in microseconds.
+ depth=<n> - Set max depth for function graph tracer to follow.
SEE ALSO
--------
diff --git a/tools/perf/Documentation/perf-intel-pt.txt b/tools/perf/Documentation/perf-intel-pt.txt
index f4cd49a7fcdb..d5a266d7f15b 100644
--- a/tools/perf/Documentation/perf-intel-pt.txt
+++ b/tools/perf/Documentation/perf-intel-pt.txt
@@ -825,6 +825,7 @@ The letters are:
l synthesize last branch entries (use with i or x)
L synthesize last branch entries on existing event records
s skip initial number of events
+ q quicker (less detailed) decoding
"Instructions" events look like they were recorded by "perf record -e
instructions".
@@ -871,11 +872,24 @@ Developer Manuals.
Error events show where the decoder lost the trace. Error events
are quite important. Users must know if what they are seeing is a complete
-picture or not.
+picture or not. The "e" option may be followed by flags which affect what errors
+will or will not be reported. Each flag must be preceded by either '+' or '-'.
+The flags supported by Intel PT are:
+ -o Suppress overflow errors
+ -l Suppress trace data lost errors
+For example, for errors but not overflow or data lost errors:
+
+ --itrace=e-o-l
The "d" option will cause the creation of a file "intel_pt.log" containing all
decoded packets and instructions. Note that this option slows down the decoder
-and that the resulting file may be very large.
+and that the resulting file may be very large. The "d" option may be followed
+by flags which affect what debug messages will or will not be logged. Each flag
+must be preceded by either '+' or '-'. The flags support by Intel PT are:
+ -a Suppress logging of perf events
+ +a Log all perf events
+By default, logged perf events are filtered by any specified time ranges, but
+flag +a overrides that.
In addition, the period of the "instructions" event can be specified. e.g.
@@ -956,6 +970,51 @@ at the beginning. This is useful to ignore initialization code.
skips the first million instructions.
+The q option changes the way the trace is decoded. The decoding is much faster
+but much less detailed. Specifically, with the q option, the decoder does not
+decode TNT packets, and does not walk object code, but gets the ip from FUP and
+TIP packets. The q option can be used with the b and i options but the period
+is not used. The q option decodes more quickly, but is useful only if the
+control flow of interest is represented or indicated by FUP, TIP, TIP.PGE, or
+TIP.PGD packets (refer below). However the q option could be used to find time
+ranges that could then be decoded fully using the --time option.
+
+What will *not* be decoded with the (single) q option:
+
+ - direct calls and jmps
+ - conditional branches
+ - non-branch instructions
+
+What *will* be decoded with the (single) q option:
+
+ - asynchronous branches such as interrupts
+ - indirect branches
+ - function return target address *if* the noretcomp config term (refer
+ config terms section) was used
+ - start of (control-flow) tracing
+ - end of (control-flow) tracing, if it is not out of context
+ - power events, ptwrite, transaction start and abort
+ - instruction pointer associated with PSB packets
+
+Note the q option does not specify what events will be synthesized e.g. the p
+option must be used also to show power events.
+
+Repeating the q option (double-q i.e. qq) results in even faster decoding and even
+less detail. The decoder decodes only extended PSB (PSB+) packets, getting the
+instruction pointer if there is a FUP packet within PSB+ (i.e. between PSB and
+PSBEND). Note PSB packets occur regularly in the trace based on the psb_period
+config term (refer config terms section). There will be a FUP packet if the
+PSB+ occurs while control flow is being traced.
+
+What will *not* be decoded with the qq option:
+
+ - everything except instruction pointer associated with PSB packets
+
+What *will* be decoded with the qq option:
+
+ - instruction pointer associated with PSB packets
+
+
dump option
~~~~~~~~~~~
diff --git a/tools/perf/Documentation/perf-list.txt b/tools/perf/Documentation/perf-list.txt
index 376a50b3452d..10ed539a8859 100644
--- a/tools/perf/Documentation/perf-list.txt
+++ b/tools/perf/Documentation/perf-list.txt
@@ -119,6 +119,7 @@ It's also possible to use pmu syntax:
perf record -e r1a8 -a sleep 1
perf record -e cpu/r1a8/ ...
+ perf record -e cpu/r0x1a8/ ...
You should refer to the processor specific documentation for getting these
details. Some of them are referenced in the SEE ALSO section below.
diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt
index fa8a5fcd27ab..bd50cdff08a8 100644
--- a/tools/perf/Documentation/perf-record.txt
+++ b/tools/perf/Documentation/perf-record.txt
@@ -33,6 +33,10 @@ OPTIONS
- a raw PMU event (eventsel+umask) in the form of rNNN where NNN is a
hexadecimal event descriptor.
+ - a symbolic or raw PMU event followed by an optional colon
+ and a list of event modifiers, e.g., cpu-cycles:p. See the
+ linkperf:perf-list[1] man page for details on event modifiers.
+
- a symbolically formed PMU event like 'pmu/param1=0x3,param2/' where
'param1', 'param2', etc are defined as formats for the PMU in
/sys/bus/event_source/devices/<pmu>/format/*.
@@ -407,8 +411,9 @@ if combined with -a or -C options.
-D::
--delay=::
-After starting the program, wait msecs before measuring. This is useful to
-filter out the startup phase of the program, which is often very different.
+After starting the program, wait msecs before measuring (-1: start with events
+disabled). This is useful to filter out the startup phase of the program, which
+is often very different.
-I::
--intr-regs::
@@ -626,6 +631,45 @@ option. The -e option and this one can be mixed and matched. Events
can be grouped using the {} notation.
endif::HAVE_LIBPFM[]
+--control fd:ctl-fd[,ack-fd]
+Listen on ctl-fd descriptor for command to control measurement ('enable': enable events,
+'disable': disable events). Measurements can be started with events disabled using
+--delay=-1 option. Optionally send control command completion ('ack\n') to ack-fd descriptor
+to synchronize with the controlling process. Example of bash shell script to enable and
+disable events during measurements:
+
+#!/bin/bash
+
+ctl_dir=/tmp/
+
+ctl_fifo=${ctl_dir}perf_ctl.fifo
+test -p ${ctl_fifo} && unlink ${ctl_fifo}
+mkfifo ${ctl_fifo}
+exec {ctl_fd}<>${ctl_fifo}
+
+ctl_ack_fifo=${ctl_dir}perf_ctl_ack.fifo
+test -p ${ctl_ack_fifo} && unlink ${ctl_ack_fifo}
+mkfifo ${ctl_ack_fifo}
+exec {ctl_fd_ack}<>${ctl_ack_fifo}
+
+perf record -D -1 -e cpu-cycles -a \
+ --control fd:${ctl_fd},${ctl_fd_ack} \
+ -- sleep 30 &
+perf_pid=$!
+
+sleep 5 && echo 'enable' >&${ctl_fd} && read -u ${ctl_fd_ack} e1 && echo "enabled(${e1})"
+sleep 10 && echo 'disable' >&${ctl_fd} && read -u ${ctl_fd_ack} d1 && echo "disabled(${d1})"
+
+exec {ctl_fd_ack}>&-
+unlink ${ctl_ack_fifo}
+
+exec {ctl_fd}>&-
+unlink ${ctl_fifo}
+
+wait -n ${perf_pid}
+exit $?
+
+
SEE ALSO
--------
linkperf:perf-stat[1], linkperf:perf-list[1], linkperf:perf-intel-pt[1]
diff --git a/tools/perf/Documentation/perf-script.txt b/tools/perf/Documentation/perf-script.txt
index 372dfd110e6d..4f712fb8f175 100644
--- a/tools/perf/Documentation/perf-script.txt
+++ b/tools/perf/Documentation/perf-script.txt
@@ -322,6 +322,10 @@ OPTIONS
--show-cgroup-events
Display cgroup events i.e. events of type PERF_RECORD_CGROUP.
+--show-text-poke-events
+ Display text poke events i.e. events of type PERF_RECORD_TEXT_POKE and
+ PERF_RECORD_KSYMBOL.
+
--demangle::
Demangle symbol names to human readable form. It's enabled by default,
disable with --no-demangle.
diff --git a/tools/perf/Documentation/perf-stat.txt b/tools/perf/Documentation/perf-stat.txt
index b029ee728a0b..db420dd75e43 100644
--- a/tools/perf/Documentation/perf-stat.txt
+++ b/tools/perf/Documentation/perf-stat.txt
@@ -39,6 +39,10 @@ report::
- a raw PMU event (eventsel+umask) in the form of rNNN where NNN is a
hexadecimal event descriptor.
+ - a symbolic or raw PMU event followed by an optional colon
+ and a list of event modifiers, e.g., cpu-cycles:p. See the
+ linkperf:perf-list[1] man page for details on event modifiers.
+
- a symbolically formed event like 'pmu/param1=0x3,param2/' where
param1 and param2 are defined as formats for the PMU in
/sys/bus/event_source/devices/<pmu>/format/*
@@ -176,6 +180,45 @@ with it. --append may be used here. Examples:
3>results perf stat --log-fd 3 -- $cmd
3>>results perf stat --log-fd 3 --append -- $cmd
+--control fd:ctl-fd[,ack-fd]
+Listen on ctl-fd descriptor for command to control measurement ('enable': enable events,
+'disable': disable events). Measurements can be started with events disabled using
+--delay=-1 option. Optionally send control command completion ('ack\n') to ack-fd descriptor
+to synchronize with the controlling process. Example of bash shell script to enable and
+disable events during measurements:
+
+#!/bin/bash
+
+ctl_dir=/tmp/
+
+ctl_fifo=${ctl_dir}perf_ctl.fifo
+test -p ${ctl_fifo} && unlink ${ctl_fifo}
+mkfifo ${ctl_fifo}
+exec {ctl_fd}<>${ctl_fifo}
+
+ctl_ack_fifo=${ctl_dir}perf_ctl_ack.fifo
+test -p ${ctl_ack_fifo} && unlink ${ctl_ack_fifo}
+mkfifo ${ctl_ack_fifo}
+exec {ctl_fd_ack}<>${ctl_ack_fifo}
+
+perf stat -D -1 -e cpu-cycles -a -I 1000 \
+ --control fd:${ctl_fd},${ctl_fd_ack} \
+ -- sleep 30 &
+perf_pid=$!
+
+sleep 5 && echo 'enable' >&${ctl_fd} && read -u ${ctl_fd_ack} e1 && echo "enabled(${e1})"
+sleep 10 && echo 'disable' >&${ctl_fd} && read -u ${ctl_fd_ack} d1 && echo "disabled(${d1})"
+
+exec {ctl_fd_ack}>&-
+unlink ${ctl_ack_fifo}
+
+exec {ctl_fd}>&-
+unlink ${ctl_fifo}
+
+wait -n ${perf_pid}
+exit $?
+
+
--pre::
--post::
Pre and post measurement hooks, e.g.:
@@ -238,8 +281,9 @@ mode, use --per-node in addition to -a. (system-wide).
-D msecs::
--delay msecs::
-After starting the program, wait msecs before measuring. This is useful to
-filter out the startup phase of the program, which is often very different.
+After starting the program, wait msecs before measuring (-1: start with events
+disabled). This is useful to filter out the startup phase of the program,
+which is often very different.
-T::
--transaction::
@@ -376,6 +420,9 @@ counts for all hardware threads in a core but show the sum counts per
hardware thread. This is essentially a replacement for the any bit and
convenient for post processing.
+--summary::
+Print summary for interval mode (-I).
+
EXAMPLES
--------
diff --git a/tools/perf/Documentation/perf.data-file-format.txt b/tools/perf/Documentation/perf.data-file-format.txt
index b6472e463284..9ee96640744e 100644
--- a/tools/perf/Documentation/perf.data-file-format.txt
+++ b/tools/perf/Documentation/perf.data-file-format.txt
@@ -389,6 +389,19 @@ struct {
Example:
cpu pmu capabilities: branches=32, max_precise=3, pmu_name=icelake
+ HEADER_CLOCK_DATA = 29,
+
+ Contains clock id and its reference time together with wall clock
+ time taken at the 'same time', both values are in nanoseconds.
+ The format of data is as below.
+
+struct {
+ u32 version; /* version = 1 */
+ u32 clockid;
+ u64 wall_clock_ns;
+ u64 clockid_time_ns;
+};
+
other bits are reserved and should ignored for now
HEADER_FEAT_BITS = 256,
diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config
index 513633809c81..8137a6046a47 100644
--- a/tools/perf/Makefile.config
+++ b/tools/perf/Makefile.config
@@ -16,7 +16,7 @@ $(shell printf "" > $(OUTPUT).config-detected)
detected = $(shell echo "$(1)=y" >> $(OUTPUT).config-detected)
detected_var = $(shell echo "$(1)=$($(1))" >> $(OUTPUT).config-detected)
-CFLAGS := $(EXTRA_CFLAGS) $(EXTRA_WARNINGS)
+CFLAGS := $(EXTRA_CFLAGS) $(filter-out -Wnested-externs,$(EXTRA_WARNINGS))
include $(srctree)/tools/scripts/Makefile.arch
@@ -501,6 +501,14 @@ ifndef NO_LIBELF
CFLAGS += -DHAVE_ELF_GETSHDRSTRNDX_SUPPORT
endif
+ ifndef NO_LIBDEBUGINFOD
+ $(call feature_check,libdebuginfod)
+ ifeq ($(feature-libdebuginfod), 1)
+ CFLAGS += -DHAVE_DEBUGINFOD_SUPPORT
+ EXTLIBS += -ldebuginfod
+ endif
+ endif
+
ifndef NO_DWARF
ifeq ($(origin PERF_HAVE_DWARF_REGS), undefined)
msg := $(warning DWARF register mappings have not been defined for architecture $(SRCARCH), DWARF support disabled);
diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf
index 86dbb51bb272..6031167939ae 100644
--- a/tools/perf/Makefile.perf
+++ b/tools/perf/Makefile.perf
@@ -124,6 +124,8 @@ include ../scripts/utilities.mak
#
# Define LIBPFM4 to enable libpfm4 events extension.
#
+# Define NO_LIBDEBUGINFOD if you do not want support debuginfod
+#
# As per kernel Makefile, avoid funny character set dependencies
unexport LC_ALL
@@ -418,6 +420,7 @@ export INSTALL SHELL_PATH
SHELL = $(SHELL_PATH)
+beauty_linux_dir := $(srctree)/tools/perf/trace/beauty/include/linux/
linux_uapi_dir := $(srctree)/tools/include/uapi/linux
asm_generic_uapi_dir := $(srctree)/tools/include/uapi/asm-generic
arch_asm_uapi_dir := $(srctree)/tools/arch/$(SRCARCH)/include/uapi/asm/
@@ -501,6 +504,12 @@ socket_ipproto_tbl := $(srctree)/tools/perf/trace/beauty/socket_ipproto.sh
$(socket_ipproto_array): $(linux_uapi_dir)/in.h $(socket_ipproto_tbl)
$(Q)$(SHELL) '$(socket_ipproto_tbl)' $(linux_uapi_dir) > $@
+socket_arrays := $(beauty_outdir)/socket_arrays.c
+socket_tbl := $(srctree)/tools/perf/trace/beauty/socket.sh
+
+$(socket_arrays): $(beauty_linux_dir)/socket.h $(socket_tbl)
+ $(Q)$(SHELL) '$(socket_tbl)' $(beauty_linux_dir) > $@
+
vhost_virtio_ioctl_array := $(beauty_ioctl_outdir)/vhost_virtio_ioctl_array.c
vhost_virtio_hdr_dir := $(srctree)/tools/include/uapi/linux
vhost_virtio_ioctl_tbl := $(srctree)/tools/perf/trace/beauty/vhost_virtio_ioctl.sh
@@ -697,6 +706,7 @@ prepare: $(OUTPUT)PERF-VERSION-FILE $(OUTPUT)common-cmds.h archheaders $(drm_ioc
$(kcmp_type_array) \
$(kvm_ioctl_array) \
$(socket_ipproto_array) \
+ $(socket_arrays) \
$(vhost_virtio_ioctl_array) \
$(madvise_behavior_array) \
$(mmap_flags_array) \
@@ -1006,6 +1016,7 @@ clean:: $(LIBTRACEEVENT)-clean $(LIBAPI)-clean $(LIBBPF)-clean $(LIBSUBCMD)-clea
$(OUTPUT)$(kvm_ioctl_array) \
$(OUTPUT)$(kcmp_type_array) \
$(OUTPUT)$(socket_ipproto_array) \
+ $(OUTPUT)$(socket_arrays) \
$(OUTPUT)$(vhost_virtio_ioctl_array) \
$(OUTPUT)$(perf_ioctl_array) \
$(OUTPUT)$(prctl_option_array) \
diff --git a/tools/perf/arch/arm/util/auxtrace.c b/tools/perf/arch/arm/util/auxtrace.c
index 28a5d0c18b1d..b187bddbd01a 100644
--- a/tools/perf/arch/arm/util/auxtrace.c
+++ b/tools/perf/arch/arm/util/auxtrace.c
@@ -57,17 +57,15 @@ struct auxtrace_record
struct evsel *evsel;
bool found_etm = false;
struct perf_pmu *found_spe = NULL;
- static struct perf_pmu **arm_spe_pmus = NULL;
- static int nr_spes = 0;
+ struct perf_pmu **arm_spe_pmus = NULL;
+ int nr_spes = 0;
int i = 0;
if (!evlist)
return NULL;
cs_etm_pmu = perf_pmu__find(CORESIGHT_ETM_PMU_NAME);
-
- if (!arm_spe_pmus)
- arm_spe_pmus = find_all_arm_spe_pmus(&nr_spes, err);
+ arm_spe_pmus = find_all_arm_spe_pmus(&nr_spes, err);
evlist__for_each_entry(evlist, evsel) {
if (cs_etm_pmu &&
@@ -84,6 +82,7 @@ struct auxtrace_record
}
}
}
+ free(arm_spe_pmus);
if (found_etm && found_spe) {
pr_err("Concurrent ARM Coresight ETM and SPE operation not currently supported\n");
diff --git a/tools/perf/arch/arm/util/cs-etm.c b/tools/perf/arch/arm/util/cs-etm.c
index cea5e33d61d2..cad7bf783413 100644
--- a/tools/perf/arch/arm/util/cs-etm.c
+++ b/tools/perf/arch/arm/util/cs-etm.c
@@ -243,10 +243,10 @@ static int cs_etm_set_sink_attr(struct perf_pmu *pmu,
}
/*
- * No sink was provided on the command line - for _now_ treat
- * this as an error.
+ * No sink was provided on the command line - allow the CoreSight
+ * system to look for a default
*/
- return ret;
+ return 0;
}
static int cs_etm_recording_options(struct auxtrace_record *itr,
diff --git a/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl b/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl
index b190f2eb2611..b168364ac050 100644
--- a/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl
+++ b/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl
@@ -32,7 +32,7 @@
18 spu oldstat sys_ni_syscall
19 common lseek sys_lseek compat_sys_lseek
20 common getpid sys_getpid
-21 nospu mount sys_mount compat_sys_mount
+21 nospu mount sys_mount
22 32 umount sys_oldumount
22 64 umount sys_ni_syscall
22 spu umount sys_ni_syscall
@@ -189,11 +189,11 @@
142 common _newselect sys_select compat_sys_select
143 common flock sys_flock
144 common msync sys_msync
-145 common readv sys_readv compat_sys_readv
-146 common writev sys_writev compat_sys_writev
+145 common readv sys_readv
+146 common writev sys_writev
147 common getsid sys_getsid
148 common fdatasync sys_fdatasync
-149 nospu _sysctl sys_sysctl compat_sys_sysctl
+149 nospu _sysctl sys_ni_syscall
150 common mlock sys_mlock
151 common munlock sys_munlock
152 common mlockall sys_mlockall
@@ -363,7 +363,7 @@
282 common unshare sys_unshare
283 common splice sys_splice
284 common tee sys_tee
-285 common vmsplice sys_vmsplice compat_sys_vmsplice
+285 common vmsplice sys_vmsplice
286 common openat sys_openat compat_sys_openat
287 common mkdirat sys_mkdirat
288 common mknodat sys_mknodat
@@ -443,8 +443,8 @@
348 common syncfs sys_syncfs
349 common sendmmsg sys_sendmmsg compat_sys_sendmmsg
350 common setns sys_setns
-351 nospu process_vm_readv sys_process_vm_readv compat_sys_process_vm_readv
-352 nospu process_vm_writev sys_process_vm_writev compat_sys_process_vm_writev
+351 nospu process_vm_readv sys_process_vm_readv
+352 nospu process_vm_writev sys_process_vm_writev
353 nospu finit_module sys_finit_module
354 nospu kcmp sys_kcmp
355 common sched_setattr sys_sched_setattr
diff --git a/tools/perf/arch/powerpc/include/perf_regs.h b/tools/perf/arch/powerpc/include/perf_regs.h
index e18a3556f5e3..63f3ac91049f 100644
--- a/tools/perf/arch/powerpc/include/perf_regs.h
+++ b/tools/perf/arch/powerpc/include/perf_regs.h
@@ -64,7 +64,13 @@ static const char *reg_names[] = {
[PERF_REG_POWERPC_DAR] = "dar",
[PERF_REG_POWERPC_DSISR] = "dsisr",
[PERF_REG_POWERPC_SIER] = "sier",
- [PERF_REG_POWERPC_MMCRA] = "mmcra"
+ [PERF_REG_POWERPC_MMCRA] = "mmcra",
+ [PERF_REG_POWERPC_MMCR0] = "mmcr0",
+ [PERF_REG_POWERPC_MMCR1] = "mmcr1",
+ [PERF_REG_POWERPC_MMCR2] = "mmcr2",
+ [PERF_REG_POWERPC_MMCR3] = "mmcr3",
+ [PERF_REG_POWERPC_SIER2] = "sier2",
+ [PERF_REG_POWERPC_SIER3] = "sier3",
};
static inline const char *perf_reg_name(int id)
diff --git a/tools/perf/arch/powerpc/util/book3s_hcalls.h b/tools/perf/arch/powerpc/util/book3s_hcalls.h
index 54cfa0530e86..488f4339b83c 100644
--- a/tools/perf/arch/powerpc/util/book3s_hcalls.h
+++ b/tools/perf/arch/powerpc/util/book3s_hcalls.h
@@ -84,7 +84,7 @@
{0x1a4, "H_CREATE_RPT"}, \
{0x1a8, "H_REMOVE_RPT"}, \
{0x1ac, "H_REGISTER_RPAGES"}, \
- {0x1b0, "H_DISABLE_AND_GETC"}, \
+ {0x1b0, "H_DISABLE_AND_GET"}, \
{0x1b4, "H_ERROR_DATA"}, \
{0x1b8, "H_GET_HCA_INFO"}, \
{0x1bc, "H_GET_PERF_COUNT"}, \
diff --git a/tools/perf/arch/powerpc/util/header.c b/tools/perf/arch/powerpc/util/header.c
index d4870074f14c..1a950171a66f 100644
--- a/tools/perf/arch/powerpc/util/header.c
+++ b/tools/perf/arch/powerpc/util/header.c
@@ -7,17 +7,10 @@
#include <string.h>
#include <linux/stringify.h>
#include "header.h"
+#include "utils_header.h"
#include "metricgroup.h"
#include <api/fs/fs.h>
-#define mfspr(rn) ({unsigned long rval; \
- asm volatile("mfspr %0," __stringify(rn) \
- : "=r" (rval)); rval; })
-
-#define SPRN_PVR 0x11F /* Processor Version Register */
-#define PVR_VER(pvr) (((pvr) >> 16) & 0xFFFF) /* Version field */
-#define PVR_REV(pvr) (((pvr) >> 0) & 0xFFFF) /* Revison field */
-
int
get_cpuid(char *buffer, size_t sz)
{
diff --git a/tools/perf/arch/powerpc/util/perf_regs.c b/tools/perf/arch/powerpc/util/perf_regs.c
index 0a5242900248..2b6d4704e3aa 100644
--- a/tools/perf/arch/powerpc/util/perf_regs.c
+++ b/tools/perf/arch/powerpc/util/perf_regs.c
@@ -6,9 +6,16 @@
#include "../../../util/perf_regs.h"
#include "../../../util/debug.h"
+#include "../../../util/event.h"
+#include "../../../util/header.h"
+#include "../../../perf-sys.h"
+#include "utils_header.h"
#include <linux/kernel.h>
+#define PVR_POWER9 0x004E
+#define PVR_POWER10 0x0080
+
const struct sample_reg sample_reg_masks[] = {
SMPL_REG(r0, PERF_REG_POWERPC_R0),
SMPL_REG(r1, PERF_REG_POWERPC_R1),
@@ -55,6 +62,12 @@ const struct sample_reg sample_reg_masks[] = {
SMPL_REG(dsisr, PERF_REG_POWERPC_DSISR),
SMPL_REG(sier, PERF_REG_POWERPC_SIER),
SMPL_REG(mmcra, PERF_REG_POWERPC_MMCRA),
+ SMPL_REG(mmcr0, PERF_REG_POWERPC_MMCR0),
+ SMPL_REG(mmcr1, PERF_REG_POWERPC_MMCR1),
+ SMPL_REG(mmcr2, PERF_REG_POWERPC_MMCR2),
+ SMPL_REG(mmcr3, PERF_REG_POWERPC_MMCR3),
+ SMPL_REG(sier2, PERF_REG_POWERPC_SIER2),
+ SMPL_REG(sier3, PERF_REG_POWERPC_SIER3),
SMPL_REG_END
};
@@ -163,3 +176,45 @@ int arch_sdt_arg_parse_op(char *old_op, char **new_op)
return SDT_ARG_VALID;
}
+
+uint64_t arch__intr_reg_mask(void)
+{
+ struct perf_event_attr attr = {
+ .type = PERF_TYPE_HARDWARE,
+ .config = PERF_COUNT_HW_CPU_CYCLES,
+ .sample_type = PERF_SAMPLE_REGS_INTR,
+ .precise_ip = 1,
+ .disabled = 1,
+ .exclude_kernel = 1,
+ };
+ int fd;
+ u32 version;
+ u64 extended_mask = 0, mask = PERF_REGS_MASK;
+
+ /*
+ * Get the PVR value to set the extended
+ * mask specific to platform.
+ */
+ version = (((mfspr(SPRN_PVR)) >> 16) & 0xFFFF);
+ if (version == PVR_POWER9)
+ extended_mask = PERF_REG_PMU_MASK_300;
+ else if (version == PVR_POWER10)
+ extended_mask = PERF_REG_PMU_MASK_31;
+ else
+ return mask;
+
+ attr.sample_regs_intr = extended_mask;
+ attr.sample_period = 1;
+ event_attr_init(&attr);
+
+ /*
+ * check if the pmu supports perf extended regs, before
+ * returning the register mask to sample.
+ */
+ fd = sys_perf_event_open(&attr, 0, -1, -1, 0);
+ if (fd != -1) {
+ close(fd);
+ mask |= extended_mask;
+ }
+ return mask;
+}
diff --git a/tools/perf/arch/powerpc/util/utils_header.h b/tools/perf/arch/powerpc/util/utils_header.h
new file mode 100644
index 000000000000..5788eb1f1fe3
--- /dev/null
+++ b/tools/perf/arch/powerpc/util/utils_header.h
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __PERF_UTIL_HEADER_H
+#define __PERF_UTIL_HEADER_H
+
+#include <linux/stringify.h>
+
+#define mfspr(rn) ({unsigned long rval; \
+ asm volatile("mfspr %0," __stringify(rn) \
+ : "=r" (rval)); rval; })
+
+#define SPRN_PVR 0x11F /* Processor Version Register */
+#define PVR_VER(pvr) (((pvr) >> 16) & 0xFFFF) /* Version field */
+#define PVR_REV(pvr) (((pvr) >> 0) & 0xFFFF) /* Revison field */
+
+#endif /* __PERF_UTIL_HEADER_H */
diff --git a/tools/perf/arch/s390/entry/syscalls/syscall.tbl b/tools/perf/arch/s390/entry/syscalls/syscall.tbl
index 56ae24b6e4be..d2fa9647ce25 100644
--- a/tools/perf/arch/s390/entry/syscalls/syscall.tbl
+++ b/tools/perf/arch/s390/entry/syscalls/syscall.tbl
@@ -26,7 +26,7 @@
16 32 lchown - compat_sys_s390_lchown16
19 common lseek sys_lseek compat_sys_lseek
20 common getpid sys_getpid sys_getpid
-21 common mount sys_mount compat_sys_mount
+21 common mount sys_mount
22 common umount sys_oldumount compat_sys_oldumount
23 32 setuid - compat_sys_s390_setuid16
24 32 getuid - compat_sys_s390_getuid16
@@ -134,11 +134,11 @@
142 64 select sys_select -
143 common flock sys_flock sys_flock
144 common msync sys_msync compat_sys_msync
-145 common readv sys_readv compat_sys_readv
-146 common writev sys_writev compat_sys_writev
+145 common readv sys_readv
+146 common writev sys_writev
147 common getsid sys_getsid sys_getsid
148 common fdatasync sys_fdatasync sys_fdatasync
-149 common _sysctl sys_sysctl compat_sys_sysctl
+149 common _sysctl - -
150 common mlock sys_mlock compat_sys_mlock
151 common munlock sys_munlock compat_sys_munlock
152 common mlockall sys_mlockall sys_mlockall
@@ -316,7 +316,7 @@
306 common splice sys_splice compat_sys_splice
307 common sync_file_range sys_sync_file_range compat_sys_s390_sync_file_range
308 common tee sys_tee compat_sys_tee
-309 common vmsplice sys_vmsplice compat_sys_vmsplice
+309 common vmsplice sys_vmsplice sys_vmsplice
310 common move_pages sys_move_pages compat_sys_move_pages
311 common getcpu sys_getcpu compat_sys_getcpu
312 common epoll_pwait sys_epoll_pwait compat_sys_epoll_pwait
@@ -347,8 +347,8 @@
337 common clock_adjtime sys_clock_adjtime compat_sys_clock_adjtime
338 common syncfs sys_syncfs sys_syncfs
339 common setns sys_setns sys_setns
-340 common process_vm_readv sys_process_vm_readv compat_sys_process_vm_readv
-341 common process_vm_writev sys_process_vm_writev compat_sys_process_vm_writev
+340 common process_vm_readv sys_process_vm_readv sys_process_vm_readv
+341 common process_vm_writev sys_process_vm_writev sys_process_vm_writev
342 common s390_runtime_instr sys_s390_runtime_instr sys_s390_runtime_instr
343 common kcmp sys_kcmp compat_sys_kcmp
344 common finit_module sys_finit_module compat_sys_finit_module
diff --git a/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl b/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl
index e008d638e641..347809649ba2 100644
--- a/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl
+++ b/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl
@@ -164,7 +164,7 @@
153 common vhangup sys_vhangup
154 common modify_ldt sys_modify_ldt
155 common pivot_root sys_pivot_root
-156 64 _sysctl sys_sysctl
+156 64 _sysctl sys_ni_syscall
157 common prctl sys_prctl
158 common arch_prctl sys_arch_prctl
159 common adjtimex sys_adjtimex
@@ -357,6 +357,7 @@
433 common fspick sys_fspick
434 common pidfd_open sys_pidfd_open
435 common clone3 sys_clone3
+436 common close_range sys_close_range
437 common openat2 sys_openat2
438 common pidfd_getfd sys_pidfd_getfd
439 common faccessat2 sys_faccessat2
@@ -370,8 +371,8 @@
512 x32 rt_sigaction compat_sys_rt_sigaction
513 x32 rt_sigreturn compat_sys_x32_rt_sigreturn
514 x32 ioctl compat_sys_ioctl
-515 x32 readv compat_sys_readv
-516 x32 writev compat_sys_writev
+515 x32 readv sys_readv
+516 x32 writev sys_writev
517 x32 recvfrom compat_sys_recvfrom
518 x32 sendmsg compat_sys_sendmsg
519 x32 recvmsg compat_sys_recvmsg
@@ -387,15 +388,15 @@
529 x32 waitid compat_sys_waitid
530 x32 set_robust_list compat_sys_set_robust_list
531 x32 get_robust_list compat_sys_get_robust_list
-532 x32 vmsplice compat_sys_vmsplice
+532 x32 vmsplice sys_vmsplice
533 x32 move_pages compat_sys_move_pages
534 x32 preadv compat_sys_preadv64
535 x32 pwritev compat_sys_pwritev64
536 x32 rt_tgsigqueueinfo compat_sys_rt_tgsigqueueinfo
537 x32 recvmmsg compat_sys_recvmmsg_time64
538 x32 sendmmsg compat_sys_sendmmsg
-539 x32 process_vm_readv compat_sys_process_vm_readv
-540 x32 process_vm_writev compat_sys_process_vm_writev
+539 x32 process_vm_readv sys_process_vm_readv
+540 x32 process_vm_writev sys_process_vm_writev
541 x32 setsockopt sys_setsockopt
542 x32 getsockopt sys_getsockopt
543 x32 io_setup compat_sys_io_setup
diff --git a/tools/perf/arch/x86/util/intel-pt.c b/tools/perf/arch/x86/util/intel-pt.c
index 6ce451293634..082e5f2a415a 100644
--- a/tools/perf/arch/x86/util/intel-pt.c
+++ b/tools/perf/arch/x86/util/intel-pt.c
@@ -837,6 +837,10 @@ static int intel_pt_recording_options(struct auxtrace_record *itr,
}
}
+ if (have_timing_info && !intel_pt_evsel->core.attr.exclude_kernel &&
+ perf_can_record_text_poke_events() && perf_can_record_cpu_wide())
+ opts->text_poke = true;
+
if (intel_pt_evsel) {
/*
* To obtain the auxtrace buffer file descriptor, the auxtrace
diff --git a/tools/perf/bench/Build b/tools/perf/bench/Build
index 768e408757a0..878db6a59a41 100644
--- a/tools/perf/bench/Build
+++ b/tools/perf/bench/Build
@@ -1,5 +1,6 @@
perf-y += sched-messaging.o
perf-y += sched-pipe.o
+perf-y += syscall.o
perf-y += mem-functions.o
perf-y += futex-hash.o
perf-y += futex-wake.o
@@ -10,8 +11,8 @@ perf-y += epoll-wait.o
perf-y += epoll-ctl.o
perf-y += synthesize.o
perf-y += kallsyms-parse.o
+perf-y += find-bit-bench.o
-perf-$(CONFIG_X86_64) += mem-memcpy-x86-64-lib.o
perf-$(CONFIG_X86_64) += mem-memcpy-x86-64-asm.o
perf-$(CONFIG_X86_64) += mem-memset-x86-64-asm.o
diff --git a/tools/perf/bench/bench.h b/tools/perf/bench/bench.h
index 61cae4966cae..2804812d4154 100644
--- a/tools/perf/bench/bench.h
+++ b/tools/perf/bench/bench.h
@@ -33,8 +33,10 @@ extern struct timeval bench__start, bench__end, bench__runtime;
int bench_numa(int argc, const char **argv);
int bench_sched_messaging(int argc, const char **argv);
int bench_sched_pipe(int argc, const char **argv);
+int bench_syscall_basic(int argc, const char **argv);
int bench_mem_memcpy(int argc, const char **argv);
int bench_mem_memset(int argc, const char **argv);
+int bench_mem_find_bit(int argc, const char **argv);
int bench_futex_hash(int argc, const char **argv);
int bench_futex_wake(int argc, const char **argv);
int bench_futex_wake_parallel(int argc, const char **argv);
diff --git a/tools/perf/bench/find-bit-bench.c b/tools/perf/bench/find-bit-bench.c
new file mode 100644
index 000000000000..73b5bcc5946a
--- /dev/null
+++ b/tools/perf/bench/find-bit-bench.c
@@ -0,0 +1,135 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Benchmark find_next_bit and related bit operations.
+ *
+ * Copyright 2020 Google LLC.
+ */
+#include <stdlib.h>
+#include "bench.h"
+#include "../util/stat.h"
+#include <linux/bitmap.h>
+#include <linux/bitops.h>
+#include <linux/time64.h>
+#include <subcmd/parse-options.h>
+
+static unsigned int outer_iterations = 5;
+static unsigned int inner_iterations = 100000;
+
+static const struct option options[] = {
+ OPT_UINTEGER('i', "outer-iterations", &outer_iterations,
+ "Number of outer iterations used"),
+ OPT_UINTEGER('j', "inner-iterations", &inner_iterations,
+ "Number of inner iterations used"),
+ OPT_END()
+};
+
+static const char *const bench_usage[] = {
+ "perf bench mem find_bit <options>",
+ NULL
+};
+
+static unsigned int accumulator;
+static unsigned int use_of_val;
+
+static noinline void workload(int val)
+{
+ use_of_val += val;
+ accumulator++;
+}
+
+#if (defined(__i386__) || defined(__x86_64__)) && defined(__GCC_ASM_FLAG_OUTPUTS__)
+static bool asm_test_bit(long nr, const unsigned long *addr)
+{
+ bool oldbit;
+
+ asm volatile("bt %2,%1"
+ : "=@ccc" (oldbit)
+ : "m" (*(unsigned long *)addr), "Ir" (nr) : "memory");
+
+ return oldbit;
+}
+#else
+#define asm_test_bit test_bit
+#endif
+
+static int do_for_each_set_bit(unsigned int num_bits)
+{
+ unsigned long *to_test = bitmap_alloc(num_bits);
+ struct timeval start, end, diff;
+ u64 runtime_us;
+ struct stats fb_time_stats, tb_time_stats;
+ double time_average, time_stddev;
+ unsigned int bit, i, j;
+ unsigned int set_bits, skip;
+ unsigned int old;
+
+ init_stats(&fb_time_stats);
+ init_stats(&tb_time_stats);
+
+ for (set_bits = 1; set_bits <= num_bits; set_bits <<= 1) {
+ bitmap_zero(to_test, num_bits);
+ skip = num_bits / set_bits;
+ for (i = 0; i < num_bits; i += skip)
+ set_bit(i, to_test);
+
+ for (i = 0; i < outer_iterations; i++) {
+ old = accumulator;
+ gettimeofday(&start, NULL);
+ for (j = 0; j < inner_iterations; j++) {
+ for_each_set_bit(bit, to_test, num_bits)
+ workload(bit);
+ }
+ gettimeofday(&end, NULL);
+ assert(old + (inner_iterations * set_bits) == accumulator);
+ timersub(&end, &start, &diff);
+ runtime_us = diff.tv_sec * USEC_PER_SEC + diff.tv_usec;
+ update_stats(&fb_time_stats, runtime_us);
+
+ old = accumulator;
+ gettimeofday(&start, NULL);
+ for (j = 0; j < inner_iterations; j++) {
+ for (bit = 0; bit < num_bits; bit++) {
+ if (asm_test_bit(bit, to_test))
+ workload(bit);
+ }
+ }
+ gettimeofday(&end, NULL);
+ assert(old + (inner_iterations * set_bits) == accumulator);
+ timersub(&end, &start, &diff);
+ runtime_us = diff.tv_sec * USEC_PER_SEC + diff.tv_usec;
+ update_stats(&tb_time_stats, runtime_us);
+ }
+
+ printf("%d operations %d bits set of %d bits\n",
+ inner_iterations, set_bits, num_bits);
+ time_average = avg_stats(&fb_time_stats);
+ time_stddev = stddev_stats(&fb_time_stats);
+ printf(" Average for_each_set_bit took: %.3f usec (+- %.3f usec)\n",
+ time_average, time_stddev);
+ time_average = avg_stats(&tb_time_stats);
+ time_stddev = stddev_stats(&tb_time_stats);
+ printf(" Average test_bit loop took: %.3f usec (+- %.3f usec)\n",
+ time_average, time_stddev);
+
+ if (use_of_val == accumulator) /* Try to avoid compiler tricks. */
+ printf("\n");
+ }
+ bitmap_free(to_test);
+ return 0;
+}
+
+int bench_mem_find_bit(int argc, const char **argv)
+{
+ int err = 0, i;
+
+ argc = parse_options(argc, argv, options, bench_usage, 0);
+ if (argc) {
+ usage_with_options(bench_usage, options);
+ exit(EXIT_FAILURE);
+ }
+
+ for (i = 1; i <= 2048; i <<= 1)
+ do_for_each_set_bit(i);
+
+ return err;
+}
diff --git a/tools/perf/bench/mem-functions.c b/tools/perf/bench/mem-functions.c
index 9235b76501be..19d45c377ac1 100644
--- a/tools/perf/bench/mem-functions.c
+++ b/tools/perf/bench/mem-functions.c
@@ -223,12 +223,8 @@ static int bench_mem_common(int argc, const char **argv, struct bench_mem_info *
return 0;
}
-static u64 do_memcpy_cycles(const struct function *r, size_t size, void *src, void *dst)
+static void memcpy_prefault(memcpy_t fn, size_t size, void *src, void *dst)
{
- u64 cycle_start = 0ULL, cycle_end = 0ULL;
- memcpy_t fn = r->fn.memcpy;
- int i;
-
/* Make sure to always prefault zero pages even if MMAP_THRESH is crossed: */
memset(src, 0, size);
@@ -237,6 +233,15 @@ static u64 do_memcpy_cycles(const struct function *r, size_t size, void *src, vo
* to not measure page fault overhead:
*/
fn(dst, src, size);
+}
+
+static u64 do_memcpy_cycles(const struct function *r, size_t size, void *src, void *dst)
+{
+ u64 cycle_start = 0ULL, cycle_end = 0ULL;
+ memcpy_t fn = r->fn.memcpy;
+ int i;
+
+ memcpy_prefault(fn, size, src, dst);
cycle_start = get_cycles();
for (i = 0; i < nr_loops; ++i)
@@ -252,11 +257,7 @@ static double do_memcpy_gettimeofday(const struct function *r, size_t size, void
memcpy_t fn = r->fn.memcpy;
int i;
- /*
- * We prefault the freshly allocated memory range here,
- * to not measure page fault overhead:
- */
- fn(dst, src, size);
+ memcpy_prefault(fn, size, src, dst);
BUG_ON(gettimeofday(&tv_start, NULL));
for (i = 0; i < nr_loops; ++i)
diff --git a/tools/perf/bench/mem-memcpy-x86-64-lib.c b/tools/perf/bench/mem-memcpy-x86-64-lib.c
deleted file mode 100644
index 4130734dde84..000000000000
--- a/tools/perf/bench/mem-memcpy-x86-64-lib.c
+++ /dev/null
@@ -1,24 +0,0 @@
-/*
- * From code in arch/x86/lib/usercopy_64.c, copied to keep tools/ copy
- * of the kernel's arch/x86/lib/memcpy_64.s used in 'perf bench mem memcpy'
- * happy.
- */
-#include <linux/types.h>
-
-unsigned long __memcpy_mcsafe(void *dst, const void *src, size_t cnt);
-unsigned long mcsafe_handle_tail(char *to, char *from, unsigned len);
-
-unsigned long mcsafe_handle_tail(char *to, char *from, unsigned len)
-{
- for (; len; --len, to++, from++) {
- /*
- * Call the assembly routine back directly since
- * memcpy_mcsafe() may silently fallback to memcpy.
- */
- unsigned long rem = __memcpy_mcsafe(to, from, 1);
-
- if (rem)
- break;
- }
- return len;
-}
diff --git a/tools/perf/bench/numa.c b/tools/perf/bench/numa.c
index 5797253b9700..f85bceccc459 100644
--- a/tools/perf/bench/numa.c
+++ b/tools/perf/bench/numa.c
@@ -247,17 +247,22 @@ static int is_node_present(int node)
*/
static bool node_has_cpus(int node)
{
- struct bitmask *cpu = numa_allocate_cpumask();
- unsigned int i;
+ struct bitmask *cpumask = numa_allocate_cpumask();
+ bool ret = false; /* fall back to nocpus */
+ int cpu;
- if (cpu && !numa_node_to_cpus(node, cpu)) {
- for (i = 0; i < cpu->size; i++) {
- if (numa_bitmask_isbitset(cpu, i))
- return true;
+ BUG_ON(!cpumask);
+ if (!numa_node_to_cpus(node, cpumask)) {
+ for (cpu = 0; cpu < (int)cpumask->size; cpu++) {
+ if (numa_bitmask_isbitset(cpumask, cpu)) {
+ ret = true;
+ break;
+ }
}
}
+ numa_free_cpumask(cpumask);
- return false; /* lets fall back to nocpus safely */
+ return ret;
}
static cpu_set_t bind_to_cpu(int target_cpu)
@@ -288,14 +293,10 @@ static cpu_set_t bind_to_cpu(int target_cpu)
static cpu_set_t bind_to_node(int target_node)
{
- int cpus_per_node = g->p.nr_cpus / nr_numa_nodes();
cpu_set_t orig_mask, mask;
int cpu;
int ret;
- BUG_ON(cpus_per_node * nr_numa_nodes() != g->p.nr_cpus);
- BUG_ON(!cpus_per_node);
-
ret = sched_getaffinity(0, sizeof(orig_mask), &orig_mask);
BUG_ON(ret);
@@ -305,13 +306,16 @@ static cpu_set_t bind_to_node(int target_node)
for (cpu = 0; cpu < g->p.nr_cpus; cpu++)
CPU_SET(cpu, &mask);
} else {
- int cpu_start = (target_node + 0) * cpus_per_node;
- int cpu_stop = (target_node + 1) * cpus_per_node;
+ struct bitmask *cpumask = numa_allocate_cpumask();
- BUG_ON(cpu_stop > g->p.nr_cpus);
-
- for (cpu = cpu_start; cpu < cpu_stop; cpu++)
- CPU_SET(cpu, &mask);
+ BUG_ON(!cpumask);
+ if (!numa_node_to_cpus(target_node, cpumask)) {
+ for (cpu = 0; cpu < (int)cpumask->size; cpu++) {
+ if (numa_bitmask_isbitset(cpumask, cpu))
+ CPU_SET(cpu, &mask);
+ }
+ }
+ numa_free_cpumask(cpumask);
}
ret = sched_setaffinity(0, sizeof(mask), &mask);
@@ -729,8 +733,6 @@ static int parse_nodes_opt(const struct option *opt __maybe_unused,
return -1;
return parse_node_list(arg);
-
- return 0;
}
#define BIT(x) (1ul << x)
@@ -813,12 +815,12 @@ static u64 do_work(u8 *__data, long bytes, int nr, int nr_max, int loop, u64 val
}
}
} else if (!g->p.data_backwards || (nr + loop) & 1) {
+ /* Process data forwards: */
d0 = data + off;
d = data + off + 1;
d1 = data + words;
- /* Process data forwards: */
for (;;) {
if (unlikely(d >= d1))
d = data;
@@ -836,7 +838,6 @@ static u64 do_work(u8 *__data, long bytes, int nr, int nr_max, int loop, u64 val
d = data + off - 1;
d1 = data + words;
- /* Process data forwards: */
for (;;) {
if (unlikely(d < data))
d = data + words-1;
@@ -1733,12 +1734,12 @@ err:
*/
static const char *tests[][MAX_ARGS] = {
/* Basic single-stream NUMA bandwidth measurements: */
- { "RAM-bw-local,", "mem", "-p", "1", "-t", "1", "-P", "1024",
+ { "RAM-bw-local,", "mem", "-p", "1", "-t", "1", "-P", "1024",
"-C" , "0", "-M", "0", OPT_BW_RAM },
{ "RAM-bw-local-NOTHP,",
"mem", "-p", "1", "-t", "1", "-P", "1024",
"-C" , "0", "-M", "0", OPT_BW_RAM_NOTHP },
- { "RAM-bw-remote,", "mem", "-p", "1", "-t", "1", "-P", "1024",
+ { "RAM-bw-remote,", "mem", "-p", "1", "-t", "1", "-P", "1024",
"-C" , "0", "-M", "1", OPT_BW_RAM },
/* 2-stream NUMA bandwidth measurements: */
@@ -1755,7 +1756,7 @@ static const char *tests[][MAX_ARGS] = {
{ " 1x3-convergence,", "mem", "-p", "1", "-t", "3", "-P", "512", OPT_CONV },
{ " 1x4-convergence,", "mem", "-p", "1", "-t", "4", "-P", "512", OPT_CONV },
{ " 1x6-convergence,", "mem", "-p", "1", "-t", "6", "-P", "1020", OPT_CONV },
- { " 2x3-convergence,", "mem", "-p", "3", "-t", "3", "-P", "1020", OPT_CONV },
+ { " 2x3-convergence,", "mem", "-p", "2", "-t", "3", "-P", "1020", OPT_CONV },
{ " 3x3-convergence,", "mem", "-p", "3", "-t", "3", "-P", "1020", OPT_CONV },
{ " 4x4-convergence,", "mem", "-p", "4", "-t", "4", "-P", "512", OPT_CONV },
{ " 4x4-convergence-NOTHP,",
@@ -1780,24 +1781,24 @@ static const char *tests[][MAX_ARGS] = {
"mem", "-p", "8", "-t", "1", "-P", " 512", OPT_BW_NOTHP },
{ "16x1-bw-process,", "mem", "-p", "16", "-t", "1", "-P", "256", OPT_BW },
- { " 4x1-bw-thread,", "mem", "-p", "1", "-t", "4", "-T", "256", OPT_BW },
- { " 8x1-bw-thread,", "mem", "-p", "1", "-t", "8", "-T", "256", OPT_BW },
- { "16x1-bw-thread,", "mem", "-p", "1", "-t", "16", "-T", "128", OPT_BW },
- { "32x1-bw-thread,", "mem", "-p", "1", "-t", "32", "-T", "64", OPT_BW },
+ { " 1x4-bw-thread,", "mem", "-p", "1", "-t", "4", "-T", "256", OPT_BW },
+ { " 1x8-bw-thread,", "mem", "-p", "1", "-t", "8", "-T", "256", OPT_BW },
+ { "1x16-bw-thread,", "mem", "-p", "1", "-t", "16", "-T", "128", OPT_BW },
+ { "1x32-bw-thread,", "mem", "-p", "1", "-t", "32", "-T", "64", OPT_BW },
- { " 2x3-bw-thread,", "mem", "-p", "2", "-t", "3", "-P", "512", OPT_BW },
- { " 4x4-bw-thread,", "mem", "-p", "4", "-t", "4", "-P", "512", OPT_BW },
- { " 4x6-bw-thread,", "mem", "-p", "4", "-t", "6", "-P", "512", OPT_BW },
- { " 4x8-bw-thread,", "mem", "-p", "4", "-t", "8", "-P", "512", OPT_BW },
- { " 4x8-bw-thread-NOTHP,",
+ { " 2x3-bw-process,", "mem", "-p", "2", "-t", "3", "-P", "512", OPT_BW },
+ { " 4x4-bw-process,", "mem", "-p", "4", "-t", "4", "-P", "512", OPT_BW },
+ { " 4x6-bw-process,", "mem", "-p", "4", "-t", "6", "-P", "512", OPT_BW },
+ { " 4x8-bw-process,", "mem", "-p", "4", "-t", "8", "-P", "512", OPT_BW },
+ { " 4x8-bw-process-NOTHP,",
"mem", "-p", "4", "-t", "8", "-P", "512", OPT_BW_NOTHP },
- { " 3x3-bw-thread,", "mem", "-p", "3", "-t", "3", "-P", "512", OPT_BW },
- { " 5x5-bw-thread,", "mem", "-p", "5", "-t", "5", "-P", "512", OPT_BW },
+ { " 3x3-bw-process,", "mem", "-p", "3", "-t", "3", "-P", "512", OPT_BW },
+ { " 5x5-bw-process,", "mem", "-p", "5", "-t", "5", "-P", "512", OPT_BW },
- { "2x16-bw-thread,", "mem", "-p", "2", "-t", "16", "-P", "512", OPT_BW },
- { "1x32-bw-thread,", "mem", "-p", "1", "-t", "32", "-P", "2048", OPT_BW },
+ { "2x16-bw-process,", "mem", "-p", "2", "-t", "16", "-P", "512", OPT_BW },
+ { "1x32-bw-process,", "mem", "-p", "1", "-t", "32", "-P", "2048", OPT_BW },
- { "numa02-bw,", "mem", "-p", "1", "-t", "32", "-T", "32", OPT_BW },
+ { "numa02-bw,", "mem", "-p", "1", "-t", "32", "-T", "32", OPT_BW },
{ "numa02-bw-NOTHP,", "mem", "-p", "1", "-t", "32", "-T", "32", OPT_BW_NOTHP },
{ "numa01-bw-thread,", "mem", "-p", "2", "-t", "16", "-T", "192", OPT_BW },
{ "numa01-bw-thread-NOTHP,",
diff --git a/tools/perf/bench/sched-messaging.c b/tools/perf/bench/sched-messaging.c
index 71d830d7b923..cecce93ccc63 100644
--- a/tools/perf/bench/sched-messaging.c
+++ b/tools/perf/bench/sched-messaging.c
@@ -66,11 +66,10 @@ static void fdpair(int fds[2])
/* Block until we're ready to go */
static void ready(int ready_out, int wakefd)
{
- char dummy;
struct pollfd pollfd = { .fd = wakefd, .events = POLLIN };
/* Tell them we're ready. */
- if (write(ready_out, &dummy, 1) != 1)
+ if (write(ready_out, "R", 1) != 1)
err(EXIT_FAILURE, "CLIENT: ready write");
/* Wait for "GO" signal */
@@ -85,6 +84,7 @@ static void *sender(struct sender_context *ctx)
unsigned int i, j;
ready(ctx->ready_out, ctx->wakefd);
+ memset(data, 'S', sizeof(data));
/* Now pump to every receiver. */
for (i = 0; i < nr_loops; i++) {
diff --git a/tools/perf/bench/synthesize.c b/tools/perf/bench/synthesize.c
index 8d624aea1c5e..b2924e3181dc 100644
--- a/tools/perf/bench/synthesize.c
+++ b/tools/perf/bench/synthesize.c
@@ -162,8 +162,8 @@ static int do_run_multi_threaded(struct target *target,
init_stats(&event_stats);
for (i = 0; i < multi_iterations; i++) {
session = perf_session__new(NULL, false, NULL);
- if (!session)
- return -ENOMEM;
+ if (IS_ERR(session))
+ return PTR_ERR(session);
atomic_set(&event_count, 0);
gettimeofday(&start, NULL);
diff --git a/tools/perf/bench/syscall.c b/tools/perf/bench/syscall.c
new file mode 100644
index 000000000000..5fe621cff8e9
--- /dev/null
+++ b/tools/perf/bench/syscall.c
@@ -0,0 +1,81 @@
+/*
+ *
+ * syscall.c
+ *
+ * syscall: Benchmark for system call performance
+ */
+#include "../perf.h"
+#include "../util/util.h"
+#include <subcmd/parse-options.h>
+#include "../builtin.h"
+#include "bench.h"
+
+#include <stdio.h>
+#include <sys/time.h>
+#include <sys/syscall.h>
+#include <sys/types.h>
+#include <unistd.h>
+#include <stdlib.h>
+
+#define LOOPS_DEFAULT 10000000
+static int loops = LOOPS_DEFAULT;
+
+static const struct option options[] = {
+ OPT_INTEGER('l', "loop", &loops, "Specify number of loops"),
+ OPT_END()
+};
+
+static const char * const bench_syscall_usage[] = {
+ "perf bench syscall <options>",
+ NULL
+};
+
+int bench_syscall_basic(int argc, const char **argv)
+{
+ struct timeval start, stop, diff;
+ unsigned long long result_usec = 0;
+ int i;
+
+ argc = parse_options(argc, argv, options, bench_syscall_usage, 0);
+
+ gettimeofday(&start, NULL);
+
+ for (i = 0; i < loops; i++)
+ getppid();
+
+ gettimeofday(&stop, NULL);
+ timersub(&stop, &start, &diff);
+
+ switch (bench_format) {
+ case BENCH_FORMAT_DEFAULT:
+ printf("# Executed %'d getppid() calls\n", loops);
+
+ result_usec = diff.tv_sec * 1000000;
+ result_usec += diff.tv_usec;
+
+ printf(" %14s: %lu.%03lu [sec]\n\n", "Total time",
+ diff.tv_sec,
+ (unsigned long) (diff.tv_usec/1000));
+
+ printf(" %14lf usecs/op\n",
+ (double)result_usec / (double)loops);
+ printf(" %'14d ops/sec\n",
+ (int)((double)loops /
+ ((double)result_usec / (double)1000000)));
+ break;
+
+ case BENCH_FORMAT_SIMPLE:
+ printf("%lu.%03lu\n",
+ diff.tv_sec,
+ (unsigned long) (diff.tv_usec / 1000));
+ break;
+
+ default:
+ /* reaching here is something disaster */
+ fprintf(stderr, "Unknown format:%d\n", bench_format);
+ exit(1);
+ break;
+ }
+
+ return 0;
+}
diff --git a/tools/perf/builtin-bench.c b/tools/perf/builtin-bench.c
index cad31b1d3438..4f176039fc8f 100644
--- a/tools/perf/builtin-bench.c
+++ b/tools/perf/builtin-bench.c
@@ -11,6 +11,7 @@
* Available benchmark collection list:
*
* sched ... scheduler and IPC performance
+ * syscall ... System call performance
* mem ... memory access performance
* numa ... NUMA scheduling and MM performance
* futex ... Futex performance
@@ -49,9 +50,16 @@ static struct bench sched_benchmarks[] = {
{ NULL, NULL, NULL }
};
+static struct bench syscall_benchmarks[] = {
+ { "basic", "Benchmark for basic getppid(2) calls", bench_syscall_basic },
+ { "all", "Run all syscall benchmarks", NULL },
+ { NULL, NULL, NULL },
+};
+
static struct bench mem_benchmarks[] = {
{ "memcpy", "Benchmark for memcpy() functions", bench_mem_memcpy },
{ "memset", "Benchmark for memset() functions", bench_mem_memset },
+ { "find_bit", "Benchmark for find_bit() functions", bench_mem_find_bit },
{ "all", "Run all memory access benchmarks", NULL },
{ NULL, NULL, NULL }
};
@@ -90,6 +98,7 @@ struct collection {
static struct collection collections[] = {
{ "sched", "Scheduler and IPC benchmarks", sched_benchmarks },
+ { "syscall", "System call benchmarks", syscall_benchmarks },
{ "mem", "Memory access benchmarks", mem_benchmarks },
#ifdef HAVE_LIBNUMA_SUPPORT
{ "numa", "NUMA scheduling and MM benchmarks", numa_benchmarks },
diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c
index d617d5682c68..5938b100eaf4 100644
--- a/tools/perf/builtin-c2c.c
+++ b/tools/perf/builtin-c2c.c
@@ -2582,7 +2582,7 @@ parse_callchain_opt(const struct option *opt, const char *arg, int unset)
static int setup_callchain(struct evlist *evlist)
{
- u64 sample_type = perf_evlist__combined_sample_type(evlist);
+ u64 sample_type = evlist__combined_sample_type(evlist);
enum perf_call_graph_mode mode = CALLCHAIN_NONE;
if ((sample_type & PERF_SAMPLE_REGS_USER) &&
diff --git a/tools/perf/builtin-data.c b/tools/perf/builtin-data.c
index ca2fb44874e4..8d23b8d6ee8e 100644
--- a/tools/perf/builtin-data.c
+++ b/tools/perf/builtin-data.c
@@ -65,6 +65,7 @@ static int cmd_data_convert(int argc, const char **argv)
OPT_STRING('i', "input", &input_name, "file", "input file name"),
#ifdef HAVE_LIBBABELTRACE_SUPPORT
OPT_STRING(0, "to-ctf", &to_ctf, NULL, "Convert to CTF format"),
+ OPT_BOOLEAN(0, "tod", &opts.tod, "Convert time to wall clock time"),
#endif
OPT_BOOLEAN('f', "force", &opts.force, "don't complain, do it"),
OPT_BOOLEAN(0, "all", &opts.all, "Convert all events"),
diff --git a/tools/perf/builtin-ftrace.c b/tools/perf/builtin-ftrace.c
index 2bfc1b0db536..1d44bc2f63d8 100644
--- a/tools/perf/builtin-ftrace.c
+++ b/tools/perf/builtin-ftrace.c
@@ -3,6 +3,7 @@
* builtin-ftrace.c
*
* Copyright (c) 2013 LG Electronics, Namhyung Kim <namhyung@kernel.org>
+ * Copyright (c) 2020 Changbin Du <changbin.du@gmail.com>, significant enhancement.
*/
#include "builtin.h"
@@ -26,6 +27,8 @@
#include "thread_map.h"
#include "util/cap.h"
#include "util/config.h"
+#include "util/units.h"
+#include "util/parse-sublevel-options.h"
#define DEFAULT_TRACER "function_graph"
@@ -33,11 +36,21 @@ struct perf_ftrace {
struct evlist *evlist;
struct target target;
const char *tracer;
+ bool list_avail_functions;
struct list_head filters;
struct list_head notrace;
struct list_head graph_funcs;
struct list_head nograph_funcs;
int graph_depth;
+ unsigned long percpu_buffer_size;
+ bool inherit;
+ int func_stack_trace;
+ int func_irq_info;
+ int graph_nosleep_time;
+ int graph_noirqs;
+ int graph_verbose;
+ int graph_thresh;
+ unsigned int initial_delay;
};
struct filter_entry {
@@ -128,9 +141,85 @@ static int append_tracing_file(const char *name, const char *val)
return __write_tracing_file(name, val, true);
}
+static int read_tracing_file_to_stdout(const char *name)
+{
+ char buf[4096];
+ char *file;
+ int fd;
+ int ret = -1;
+
+ file = get_tracing_file(name);
+ if (!file) {
+ pr_debug("cannot get tracing file: %s\n", name);
+ return -1;
+ }
+
+ fd = open(file, O_RDONLY);
+ if (fd < 0) {
+ pr_debug("cannot open tracing file: %s: %s\n",
+ name, str_error_r(errno, buf, sizeof(buf)));
+ goto out;
+ }
+
+ /* read contents to stdout */
+ while (true) {
+ int n = read(fd, buf, sizeof(buf));
+ if (n == 0)
+ break;
+ else if (n < 0)
+ goto out_close;
+
+ if (fwrite(buf, n, 1, stdout) != 1)
+ goto out_close;
+ }
+ ret = 0;
+
+out_close:
+ close(fd);
+out:
+ put_tracing_file(file);
+ return ret;
+}
+
+static int write_tracing_file_int(const char *name, int value)
+{
+ char buf[16];
+
+ snprintf(buf, sizeof(buf), "%d", value);
+ if (write_tracing_file(name, buf) < 0)
+ return -1;
+
+ return 0;
+}
+
+static int write_tracing_option_file(const char *name, const char *val)
+{
+ char *file;
+ int ret;
+
+ if (asprintf(&file, "options/%s", name) < 0)
+ return -1;
+
+ ret = __write_tracing_file(file, val, false);
+ free(file);
+ return ret;
+}
+
static int reset_tracing_cpu(void);
static void reset_tracing_filters(void);
+static void reset_tracing_options(struct perf_ftrace *ftrace __maybe_unused)
+{
+ write_tracing_option_file("function-fork", "0");
+ write_tracing_option_file("func_stack_trace", "0");
+ write_tracing_option_file("sleep-time", "1");
+ write_tracing_option_file("funcgraph-irqs", "1");
+ write_tracing_option_file("funcgraph-proc", "0");
+ write_tracing_option_file("funcgraph-abstime", "0");
+ write_tracing_option_file("latency-format", "0");
+ write_tracing_option_file("irq-info", "0");
+}
+
static int reset_tracing_files(struct perf_ftrace *ftrace __maybe_unused)
{
if (write_tracing_file("tracing_on", "0") < 0)
@@ -148,7 +237,11 @@ static int reset_tracing_files(struct perf_ftrace *ftrace __maybe_unused)
if (write_tracing_file("max_graph_depth", "0") < 0)
return -1;
+ if (write_tracing_file("tracing_thresh", "0") < 0)
+ return -1;
+
reset_tracing_filters();
+ reset_tracing_options(ftrace);
return 0;
}
@@ -204,6 +297,28 @@ static int set_tracing_cpu(struct perf_ftrace *ftrace)
return set_tracing_cpumask(cpumap);
}
+static int set_tracing_func_stack_trace(struct perf_ftrace *ftrace)
+{
+ if (!ftrace->func_stack_trace)
+ return 0;
+
+ if (write_tracing_option_file("func_stack_trace", "1") < 0)
+ return -1;
+
+ return 0;
+}
+
+static int set_tracing_func_irqinfo(struct perf_ftrace *ftrace)
+{
+ if (!ftrace->func_irq_info)
+ return 0;
+
+ if (write_tracing_option_file("irq-info", "1") < 0)
+ return -1;
+
+ return 0;
+}
+
static int reset_tracing_cpu(void)
{
struct perf_cpu_map *cpumap = perf_cpu_map__new(NULL);
@@ -258,8 +373,6 @@ static void reset_tracing_filters(void)
static int set_tracing_depth(struct perf_ftrace *ftrace)
{
- char buf[16];
-
if (ftrace->graph_depth == 0)
return 0;
@@ -268,10 +381,152 @@ static int set_tracing_depth(struct perf_ftrace *ftrace)
return -1;
}
- snprintf(buf, sizeof(buf), "%d", ftrace->graph_depth);
+ if (write_tracing_file_int("max_graph_depth", ftrace->graph_depth) < 0)
+ return -1;
+
+ return 0;
+}
+
+static int set_tracing_percpu_buffer_size(struct perf_ftrace *ftrace)
+{
+ int ret;
+
+ if (ftrace->percpu_buffer_size == 0)
+ return 0;
+
+ ret = write_tracing_file_int("buffer_size_kb",
+ ftrace->percpu_buffer_size / 1024);
+ if (ret < 0)
+ return ret;
+
+ return 0;
+}
+
+static int set_tracing_trace_inherit(struct perf_ftrace *ftrace)
+{
+ if (!ftrace->inherit)
+ return 0;
+
+ if (write_tracing_option_file("function-fork", "1") < 0)
+ return -1;
+
+ return 0;
+}
+
+static int set_tracing_sleep_time(struct perf_ftrace *ftrace)
+{
+ if (!ftrace->graph_nosleep_time)
+ return 0;
+
+ if (write_tracing_option_file("sleep-time", "0") < 0)
+ return -1;
+
+ return 0;
+}
+
+static int set_tracing_funcgraph_irqs(struct perf_ftrace *ftrace)
+{
+ if (!ftrace->graph_noirqs)
+ return 0;
+
+ if (write_tracing_option_file("funcgraph-irqs", "0") < 0)
+ return -1;
+
+ return 0;
+}
+
+static int set_tracing_funcgraph_verbose(struct perf_ftrace *ftrace)
+{
+ if (!ftrace->graph_verbose)
+ return 0;
+
+ if (write_tracing_option_file("funcgraph-proc", "1") < 0)
+ return -1;
+
+ if (write_tracing_option_file("funcgraph-abstime", "1") < 0)
+ return -1;
+
+ if (write_tracing_option_file("latency-format", "1") < 0)
+ return -1;
+
+ return 0;
+}
+
+static int set_tracing_thresh(struct perf_ftrace *ftrace)
+{
+ int ret;
+
+ if (ftrace->graph_thresh == 0)
+ return 0;
+
+ ret = write_tracing_file_int("tracing_thresh", ftrace->graph_thresh);
+ if (ret < 0)
+ return ret;
+
+ return 0;
+}
+
+static int set_tracing_options(struct perf_ftrace *ftrace)
+{
+ if (set_tracing_pid(ftrace) < 0) {
+ pr_err("failed to set ftrace pid\n");
+ return -1;
+ }
+
+ if (set_tracing_cpu(ftrace) < 0) {
+ pr_err("failed to set tracing cpumask\n");
+ return -1;
+ }
+
+ if (set_tracing_func_stack_trace(ftrace) < 0) {
+ pr_err("failed to set tracing option func_stack_trace\n");
+ return -1;
+ }
+
+ if (set_tracing_func_irqinfo(ftrace) < 0) {
+ pr_err("failed to set tracing option irq-info\n");
+ return -1;
+ }
+
+ if (set_tracing_filters(ftrace) < 0) {
+ pr_err("failed to set tracing filters\n");
+ return -1;
+ }
+
+ if (set_tracing_depth(ftrace) < 0) {
+ pr_err("failed to set graph depth\n");
+ return -1;
+ }
+
+ if (set_tracing_percpu_buffer_size(ftrace) < 0) {
+ pr_err("failed to set tracing per-cpu buffer size\n");
+ return -1;
+ }
+
+ if (set_tracing_trace_inherit(ftrace) < 0) {
+ pr_err("failed to set tracing option function-fork\n");
+ return -1;
+ }
+
+ if (set_tracing_sleep_time(ftrace) < 0) {
+ pr_err("failed to set tracing option sleep-time\n");
+ return -1;
+ }
+
+ if (set_tracing_funcgraph_irqs(ftrace) < 0) {
+ pr_err("failed to set tracing option funcgraph-irqs\n");
+ return -1;
+ }
- if (write_tracing_file("max_graph_depth", buf) < 0)
+ if (set_tracing_funcgraph_verbose(ftrace) < 0) {
+ pr_err("failed to set tracing option funcgraph-proc/funcgraph-abstime\n");
return -1;
+ }
+
+ if (set_tracing_thresh(ftrace) < 0) {
+ pr_err("failed to set tracing thresh\n");
+ return -1;
+ }
return 0;
}
@@ -302,6 +557,9 @@ static int __cmd_ftrace(struct perf_ftrace *ftrace, int argc, const char **argv)
signal(SIGCHLD, sig_handler);
signal(SIGPIPE, sig_handler);
+ if (ftrace->list_avail_functions)
+ return read_tracing_file_to_stdout("available_filter_functions");
+
if (reset_tracing_files(ftrace) < 0) {
pr_err("failed to reset ftrace\n");
goto out;
@@ -317,25 +575,8 @@ static int __cmd_ftrace(struct perf_ftrace *ftrace, int argc, const char **argv)
goto out;
}
- if (set_tracing_pid(ftrace) < 0) {
- pr_err("failed to set ftrace pid\n");
+ if (set_tracing_options(ftrace) < 0)
goto out_reset;
- }
-
- if (set_tracing_cpu(ftrace) < 0) {
- pr_err("failed to set tracing cpumask\n");
- goto out_reset;
- }
-
- if (set_tracing_filters(ftrace) < 0) {
- pr_err("failed to set tracing filters\n");
- goto out_reset;
- }
-
- if (set_tracing_depth(ftrace) < 0) {
- pr_err("failed to set graph depth\n");
- goto out_reset;
- }
if (write_tracing_file("current_tracer", ftrace->tracer) < 0) {
pr_err("failed to set current_tracer to %s\n", ftrace->tracer);
@@ -362,13 +603,26 @@ static int __cmd_ftrace(struct perf_ftrace *ftrace, int argc, const char **argv)
fcntl(trace_fd, F_SETFL, O_NONBLOCK);
pollfd.fd = trace_fd;
- if (write_tracing_file("tracing_on", "1") < 0) {
- pr_err("can't enable tracing\n");
- goto out_close_fd;
+ /* display column headers */
+ read_tracing_file_to_stdout("trace");
+
+ if (!ftrace->initial_delay) {
+ if (write_tracing_file("tracing_on", "1") < 0) {
+ pr_err("can't enable tracing\n");
+ goto out_close_fd;
+ }
}
perf_evlist__start_workload(ftrace->evlist);
+ if (ftrace->initial_delay) {
+ usleep(ftrace->initial_delay * 1000);
+ if (write_tracing_file("tracing_on", "1") < 0) {
+ pr_err("can't enable tracing\n");
+ goto out_close_fd;
+ }
+ }
+
while (!done) {
if (poll(&pollfd, 1, -1) < 0)
break;
@@ -455,6 +709,99 @@ static void delete_filter_func(struct list_head *head)
}
}
+static int parse_buffer_size(const struct option *opt,
+ const char *str, int unset)
+{
+ unsigned long *s = (unsigned long *)opt->value;
+ static struct parse_tag tags_size[] = {
+ { .tag = 'B', .mult = 1 },
+ { .tag = 'K', .mult = 1 << 10 },
+ { .tag = 'M', .mult = 1 << 20 },
+ { .tag = 'G', .mult = 1 << 30 },
+ { .tag = 0 },
+ };
+ unsigned long val;
+
+ if (unset) {
+ *s = 0;
+ return 0;
+ }
+
+ val = parse_tag_value(str, tags_size);
+ if (val != (unsigned long) -1) {
+ if (val < 1024) {
+ pr_err("buffer size too small, must larger than 1KB.");
+ return -1;
+ }
+ *s = val;
+ return 0;
+ }
+
+ return -1;
+}
+
+static int parse_func_tracer_opts(const struct option *opt,
+ const char *str, int unset)
+{
+ int ret;
+ struct perf_ftrace *ftrace = (struct perf_ftrace *) opt->value;
+ struct sublevel_option func_tracer_opts[] = {
+ { .name = "call-graph", .value_ptr = &ftrace->func_stack_trace },
+ { .name = "irq-info", .value_ptr = &ftrace->func_irq_info },
+ { .name = NULL, }
+ };
+
+ if (unset)
+ return 0;
+
+ ret = perf_parse_sublevel_options(str, func_tracer_opts);
+ if (ret)
+ return ret;
+
+ return 0;
+}
+
+static int parse_graph_tracer_opts(const struct option *opt,
+ const char *str, int unset)
+{
+ int ret;
+ struct perf_ftrace *ftrace = (struct perf_ftrace *) opt->value;
+ struct sublevel_option graph_tracer_opts[] = {
+ { .name = "nosleep-time", .value_ptr = &ftrace->graph_nosleep_time },
+ { .name = "noirqs", .value_ptr = &ftrace->graph_noirqs },
+ { .name = "verbose", .value_ptr = &ftrace->graph_verbose },
+ { .name = "thresh", .value_ptr = &ftrace->graph_thresh },
+ { .name = "depth", .value_ptr = &ftrace->graph_depth },
+ { .name = NULL, }
+ };
+
+ if (unset)
+ return 0;
+
+ ret = perf_parse_sublevel_options(str, graph_tracer_opts);
+ if (ret)
+ return ret;
+
+ return 0;
+}
+
+static void select_tracer(struct perf_ftrace *ftrace)
+{
+ bool graph = !list_empty(&ftrace->graph_funcs) ||
+ !list_empty(&ftrace->nograph_funcs);
+ bool func = !list_empty(&ftrace->filters) ||
+ !list_empty(&ftrace->notrace);
+
+ /* The function_graph has priority over function tracer. */
+ if (graph)
+ ftrace->tracer = "function_graph";
+ else if (func)
+ ftrace->tracer = "function";
+ /* Otherwise, the default tracer is used. */
+
+ pr_debug("%s tracer is used\n", ftrace->tracer);
+}
+
int cmd_ftrace(int argc, const char **argv)
{
int ret;
@@ -469,25 +816,42 @@ int cmd_ftrace(int argc, const char **argv)
};
const struct option ftrace_options[] = {
OPT_STRING('t', "tracer", &ftrace.tracer, "tracer",
- "tracer to use: function_graph(default) or function"),
+ "Tracer to use: function_graph(default) or function"),
+ OPT_BOOLEAN('F', "funcs", &ftrace.list_avail_functions,
+ "Show available functions to filter"),
OPT_STRING('p', "pid", &ftrace.target.pid, "pid",
- "trace on existing process id"),
+ "Trace on existing process id"),
+ /* TODO: Add short option -t after -t/--tracer can be removed. */
+ OPT_STRING(0, "tid", &ftrace.target.tid, "tid",
+ "Trace on existing thread id (exclusive to --pid)"),
OPT_INCR('v', "verbose", &verbose,
- "be more verbose"),
+ "Be more verbose"),
OPT_BOOLEAN('a', "all-cpus", &ftrace.target.system_wide,
- "system-wide collection from all CPUs"),
+ "System-wide collection from all CPUs"),
OPT_STRING('C', "cpu", &ftrace.target.cpu_list, "cpu",
- "list of cpus to monitor"),
+ "List of cpus to monitor"),
OPT_CALLBACK('T', "trace-funcs", &ftrace.filters, "func",
- "trace given functions only", parse_filter_func),
+ "Trace given functions using function tracer",
+ parse_filter_func),
OPT_CALLBACK('N', "notrace-funcs", &ftrace.notrace, "func",
- "do not trace given functions", parse_filter_func),
+ "Do not trace given functions", parse_filter_func),
+ OPT_CALLBACK(0, "func-opts", &ftrace, "options",
+ "Function tracer options, available options: call-graph,irq-info",
+ parse_func_tracer_opts),
OPT_CALLBACK('G', "graph-funcs", &ftrace.graph_funcs, "func",
- "Set graph filter on given functions", parse_filter_func),
+ "Trace given functions using function_graph tracer",
+ parse_filter_func),
OPT_CALLBACK('g', "nograph-funcs", &ftrace.nograph_funcs, "func",
"Set nograph filter on given functions", parse_filter_func),
- OPT_INTEGER('D', "graph-depth", &ftrace.graph_depth,
- "Max depth for function graph tracer"),
+ OPT_CALLBACK(0, "graph-opts", &ftrace, "options",
+ "Graph tracer options, available options: nosleep-time,noirqs,verbose,thresh=<n>,depth=<n>",
+ parse_graph_tracer_opts),
+ OPT_CALLBACK('m', "buffer-size", &ftrace.percpu_buffer_size, "size",
+ "Size of per cpu buffer, needs to use a B, K, M or G suffix.", parse_buffer_size),
+ OPT_BOOLEAN(0, "inherit", &ftrace.inherit,
+ "Trace children processes"),
+ OPT_UINTEGER('D', "delay", &ftrace.initial_delay,
+ "Number of milliseconds to wait before starting tracing after program start"),
OPT_END()
};
@@ -505,6 +869,8 @@ int cmd_ftrace(int argc, const char **argv)
if (!argc && target__none(&ftrace.target))
ftrace.target.system_wide = true;
+ select_tracer(&ftrace);
+
ret = target__validate(&ftrace.target);
if (ret) {
char errbuf[512];
diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c
index 4a6de4b03ac0..6d2f410d773a 100644
--- a/tools/perf/builtin-inject.c
+++ b/tools/perf/builtin-inject.c
@@ -292,7 +292,7 @@ static int perf_event__jit_repipe_mmap(struct perf_tool *tool,
* if jit marker, then inject jit mmaps and generate ELF images
*/
ret = jit_process(inject->session, &inject->output, machine,
- event->mmap.filename, sample->pid, &n);
+ event->mmap.filename, event->mmap.pid, &n);
if (ret < 0)
return ret;
if (ret) {
@@ -330,7 +330,7 @@ static int perf_event__jit_repipe_mmap2(struct perf_tool *tool,
* if jit marker, then inject jit mmaps and generate ELF images
*/
ret = jit_process(inject->session, &inject->output, machine,
- event->mmap2.filename, sample->pid, &n);
+ event->mmap2.filename, event->mmap2.pid, &n);
if (ret < 0)
return ret;
if (ret) {
diff --git a/tools/perf/builtin-kmem.c b/tools/perf/builtin-kmem.c
index 38a5ab683ebc..a50dae2c4ae9 100644
--- a/tools/perf/builtin-kmem.c
+++ b/tools/perf/builtin-kmem.c
@@ -1933,7 +1933,8 @@ int cmd_kmem(int argc, const char **argv)
return ret;
argc = parse_options_subcommand(argc, argv, kmem_options,
- kmem_subcommands, kmem_usage, 0);
+ kmem_subcommands, kmem_usage,
+ PARSE_OPT_STOP_AT_NON_OPTION);
if (!argc)
usage_with_options(kmem_usage, kmem_options);
diff --git a/tools/perf/builtin-kvm.c b/tools/perf/builtin-kvm.c
index 95a77058023e..460945ded6dd 100644
--- a/tools/perf/builtin-kvm.c
+++ b/tools/perf/builtin-kvm.c
@@ -1319,7 +1319,7 @@ static struct evlist *kvm_live_event_list(void)
*name = '\0';
name++;
- if (perf_evlist__add_newtp(evlist, sys, name, NULL)) {
+ if (evlist__add_newtp(evlist, sys, name, NULL)) {
pr_err("Failed to add %s tracepoint to the list\n", *events_tp);
free(tp);
goto out;
diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index a37e7910e9e9..772f1057647f 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -46,6 +46,7 @@
#include "util/bpf-event.h"
#include "util/util.h"
#include "util/pfm.h"
+#include "util/clockid.h"
#include "asm/bug.h"
#include "perf.h"
@@ -70,6 +71,7 @@
#include <linux/time64.h>
#include <linux/zalloc.h>
#include <linux/bitmap.h>
+#include <sys/time.h>
struct switch_output {
bool enabled;
@@ -765,6 +767,43 @@ static int record__auxtrace_init(struct record *rec __maybe_unused)
#endif
+static int record__config_text_poke(struct evlist *evlist)
+{
+ struct evsel *evsel;
+ int err;
+
+ /* Nothing to do if text poke is already configured */
+ evlist__for_each_entry(evlist, evsel) {
+ if (evsel->core.attr.text_poke)
+ return 0;
+ }
+
+ err = parse_events(evlist, "dummy:u", NULL);
+ if (err)
+ return err;
+
+ evsel = evlist__last(evlist);
+
+ evsel->core.attr.freq = 0;
+ evsel->core.attr.sample_period = 1;
+ evsel->core.attr.text_poke = 1;
+ evsel->core.attr.ksymbol = 1;
+
+ evsel->core.system_wide = true;
+ evsel->no_aux_samples = true;
+ evsel->immediate = true;
+
+ /* Text poke must be collected on all CPUs */
+ perf_cpu_map__put(evsel->core.own_cpus);
+ evsel->core.own_cpus = perf_cpu_map__new(NULL);
+ perf_cpu_map__put(evsel->core.cpus);
+ evsel->core.cpus = perf_cpu_map__get(evsel->core.own_cpus);
+
+ evsel__set_sample_bit(evsel, TIME);
+
+ return 0;
+}
+
static bool record__kcore_readable(struct machine *machine)
{
char kcore[PATH_MAX];
@@ -855,7 +894,7 @@ static int record__open(struct record *rec)
pos = perf_evlist__get_tracking_event(evlist);
if (!evsel__is_dummy_event(pos)) {
/* Set up dummy event. */
- if (perf_evlist__add_dummy(evlist))
+ if (evlist__add_dummy(evlist))
return -ENOMEM;
pos = evlist__last(evlist);
perf_evlist__set_tracking_event(evlist, pos);
@@ -1166,6 +1205,9 @@ static void record__init_features(struct record *rec)
if (!(rec->opts.use_clockid && rec->opts.clockid_res_ns))
perf_header__clear_feat(&session->header, HEADER_CLOCKID);
+ if (!rec->opts.use_clockid)
+ perf_header__clear_feat(&session->header, HEADER_CLOCK_DATA);
+
perf_header__clear_feat(&session->header, HEADER_DIR_FORMAT);
if (!record__comp_enabled(rec))
perf_header__clear_feat(&session->header, HEADER_COMPRESSED);
@@ -1489,7 +1531,7 @@ static int record__setup_sb_evlist(struct record *rec)
evlist__set_cb(rec->sb_evlist, record__process_signal_event, rec);
rec->thread_id = pthread_self();
}
-
+#ifdef HAVE_LIBBPF_SUPPORT
if (!opts->no_bpf_event) {
if (rec->sb_evlist == NULL) {
rec->sb_evlist = evlist__new();
@@ -1505,7 +1547,7 @@ static int record__setup_sb_evlist(struct record *rec)
return -1;
}
}
-
+#endif
if (perf_evlist__start_sb_thread(rec->sb_evlist, &rec->opts.target)) {
pr_debug("Couldn't start the BPF side band thread:\nBPF programs starting from now on won't be annotatable\n");
opts->no_bpf_event = true;
@@ -1514,6 +1556,43 @@ static int record__setup_sb_evlist(struct record *rec)
return 0;
}
+static int record__init_clock(struct record *rec)
+{
+ struct perf_session *session = rec->session;
+ struct timespec ref_clockid;
+ struct timeval ref_tod;
+ u64 ref;
+
+ if (!rec->opts.use_clockid)
+ return 0;
+
+ if (rec->opts.use_clockid && rec->opts.clockid_res_ns)
+ session->header.env.clock.clockid_res_ns = rec->opts.clockid_res_ns;
+
+ session->header.env.clock.clockid = rec->opts.clockid;
+
+ if (gettimeofday(&ref_tod, NULL) != 0) {
+ pr_err("gettimeofday failed, cannot set reference time.\n");
+ return -1;
+ }
+
+ if (clock_gettime(rec->opts.clockid, &ref_clockid)) {
+ pr_err("clock_gettime failed, cannot set reference time.\n");
+ return -1;
+ }
+
+ ref = (u64) ref_tod.tv_sec * NSEC_PER_SEC +
+ (u64) ref_tod.tv_usec * NSEC_PER_USEC;
+
+ session->header.env.clock.tod_ns = ref;
+
+ ref = (u64) ref_clockid.tv_sec * NSEC_PER_SEC +
+ (u64) ref_clockid.tv_nsec;
+
+ session->header.env.clock.clockid_ns = ref;
+ return 0;
+}
+
static int __cmd_record(struct record *rec, int argc, const char **argv)
{
int err;
@@ -1527,6 +1606,7 @@ static int __cmd_record(struct record *rec, int argc, const char **argv)
bool disabled = false, draining = false;
int fd;
float ratio = 0;
+ enum evlist_ctl_cmd cmd = EVLIST_CTL_CMD_UNSUPPORTED;
atexit(record__sig_exit);
signal(SIGCHLD, sig_handler);
@@ -1593,10 +1673,10 @@ static int __cmd_record(struct record *rec, int argc, const char **argv)
return -1;
}
- record__init_features(rec);
+ if (record__init_clock(rec))
+ return -1;
- if (rec->opts.use_clockid && rec->opts.clockid_res_ns)
- session->header.env.clockid_res_ns = rec->opts.clockid_res_ns;
+ record__init_features(rec);
if (forks) {
err = perf_evlist__prepare_workload(rec->evlist, &opts->target,
@@ -1646,7 +1726,7 @@ static int __cmd_record(struct record *rec, int argc, const char **argv)
* Normally perf_session__new would do this, but it doesn't have the
* evlist.
*/
- if (rec->tool.ordered_events && !perf_evlist__sample_id_all(rec->evlist)) {
+ if (rec->tool.ordered_events && !evlist__sample_id_all(rec->evlist)) {
pr_warning("WARNING: No sample_id_all support, falling back to unordered processing\n");
rec->tool.ordered_events = false;
}
@@ -1748,9 +1828,16 @@ static int __cmd_record(struct record *rec, int argc, const char **argv)
perf_evlist__start_workload(rec->evlist);
}
+ if (evlist__initialize_ctlfd(rec->evlist, opts->ctl_fd, opts->ctl_fd_ack))
+ goto out_child;
+
if (opts->initial_delay) {
- usleep(opts->initial_delay * USEC_PER_MSEC);
- evlist__enable(rec->evlist);
+ pr_info(EVLIST_DISABLED_MSG);
+ if (opts->initial_delay > 0) {
+ usleep(opts->initial_delay * USEC_PER_MSEC);
+ evlist__enable(rec->evlist);
+ pr_info(EVLIST_ENABLED_MSG);
+ }
}
trigger_ready(&auxtrace_snapshot_trigger);
@@ -1842,6 +1929,21 @@ static int __cmd_record(struct record *rec, int argc, const char **argv)
draining = true;
}
+ if (evlist__ctlfd_process(rec->evlist, &cmd) > 0) {
+ switch (cmd) {
+ case EVLIST_CTL_CMD_ENABLE:
+ pr_info(EVLIST_ENABLED_MSG);
+ break;
+ case EVLIST_CTL_CMD_DISABLE:
+ pr_info(EVLIST_DISABLED_MSG);
+ break;
+ case EVLIST_CTL_CMD_ACK:
+ case EVLIST_CTL_CMD_UNSUPPORTED:
+ default:
+ break;
+ }
+ }
+
/*
* When perf is starting the traced process, at the end events
* die with the process and we wait for that. Thus no need to
@@ -1875,6 +1977,7 @@ static int __cmd_record(struct record *rec, int argc, const char **argv)
record__synthesize_workload(rec, true);
out_child:
+ evlist__finalize_ctlfd(rec->evlist);
record__mmap_read_all(rec, true);
record__aio_mmap_read_sync(rec);
@@ -2041,103 +2144,6 @@ static int perf_record_config(const char *var, const char *value, void *cb)
return 0;
}
-struct clockid_map {
- const char *name;
- int clockid;
-};
-
-#define CLOCKID_MAP(n, c) \
- { .name = n, .clockid = (c), }
-
-#define CLOCKID_END { .name = NULL, }
-
-
-/*
- * Add the missing ones, we need to build on many distros...
- */
-#ifndef CLOCK_MONOTONIC_RAW
-#define CLOCK_MONOTONIC_RAW 4
-#endif
-#ifndef CLOCK_BOOTTIME
-#define CLOCK_BOOTTIME 7
-#endif
-#ifndef CLOCK_TAI
-#define CLOCK_TAI 11
-#endif
-
-static const struct clockid_map clockids[] = {
- /* available for all events, NMI safe */
- CLOCKID_MAP("monotonic", CLOCK_MONOTONIC),
- CLOCKID_MAP("monotonic_raw", CLOCK_MONOTONIC_RAW),
-
- /* available for some events */
- CLOCKID_MAP("realtime", CLOCK_REALTIME),
- CLOCKID_MAP("boottime", CLOCK_BOOTTIME),
- CLOCKID_MAP("tai", CLOCK_TAI),
-
- /* available for the lazy */
- CLOCKID_MAP("mono", CLOCK_MONOTONIC),
- CLOCKID_MAP("raw", CLOCK_MONOTONIC_RAW),
- CLOCKID_MAP("real", CLOCK_REALTIME),
- CLOCKID_MAP("boot", CLOCK_BOOTTIME),
-
- CLOCKID_END,
-};
-
-static int get_clockid_res(clockid_t clk_id, u64 *res_ns)
-{
- struct timespec res;
-
- *res_ns = 0;
- if (!clock_getres(clk_id, &res))
- *res_ns = res.tv_nsec + res.tv_sec * NSEC_PER_SEC;
- else
- pr_warning("WARNING: Failed to determine specified clock resolution.\n");
-
- return 0;
-}
-
-static int parse_clockid(const struct option *opt, const char *str, int unset)
-{
- struct record_opts *opts = (struct record_opts *)opt->value;
- const struct clockid_map *cm;
- const char *ostr = str;
-
- if (unset) {
- opts->use_clockid = 0;
- return 0;
- }
-
- /* no arg passed */
- if (!str)
- return 0;
-
- /* no setting it twice */
- if (opts->use_clockid)
- return -1;
-
- opts->use_clockid = true;
-
- /* if its a number, we're done */
- if (sscanf(str, "%d", &opts->clockid) == 1)
- return get_clockid_res(opts->clockid, &opts->clockid_res_ns);
-
- /* allow a "CLOCK_" prefix to the name */
- if (!strncasecmp(str, "CLOCK_", 6))
- str += 6;
-
- for (cm = clockids; cm->name; cm++) {
- if (!strcasecmp(str, cm->name)) {
- opts->clockid = cm->clockid;
- return get_clockid_res(opts->clockid,
- &opts->clockid_res_ns);
- }
- }
-
- opts->use_clockid = false;
- ui__warning("unknown clockid %s, check man page\n", ostr);
- return -1;
-}
static int record__parse_affinity(const struct option *opt, const char *str, int unset)
{
@@ -2224,6 +2230,33 @@ out_free:
return ret;
}
+static int parse_control_option(const struct option *opt,
+ const char *str,
+ int unset __maybe_unused)
+{
+ char *comma = NULL, *endptr = NULL;
+ struct record_opts *config = (struct record_opts *)opt->value;
+
+ if (strncmp(str, "fd:", 3))
+ return -EINVAL;
+
+ config->ctl_fd = strtoul(&str[3], &endptr, 0);
+ if (endptr == &str[3])
+ return -EINVAL;
+
+ comma = strchr(str, ',');
+ if (comma) {
+ if (endptr != comma)
+ return -EINVAL;
+
+ config->ctl_fd_ack = strtoul(comma + 1, &endptr, 0);
+ if (endptr == comma + 1 || *endptr != '\0')
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
static void switch_output_size_warn(struct record *rec)
{
u64 wakeup_size = evlist__mmap_size(rec->opts.mmap_pages);
@@ -2360,6 +2393,8 @@ static struct record record = {
},
.mmap_flush = MMAP_FLUSH_DEFAULT,
.nr_threads_synthesize = 1,
+ .ctl_fd = -1,
+ .ctl_fd_ack = -1,
},
.tool = {
.sample = process_sample_event,
@@ -2417,7 +2452,7 @@ static struct option __record_options[] = {
OPT_BOOLEAN(0, "tail-synthesize", &record.opts.tail_synthesize,
"synthesize non-sample events at the end of output"),
OPT_BOOLEAN(0, "overwrite", &record.opts.overwrite, "use overwrite mode"),
- OPT_BOOLEAN(0, "no-bpf-event", &record.opts.no_bpf_event, "record bpf events"),
+ OPT_BOOLEAN(0, "no-bpf-event", &record.opts.no_bpf_event, "do not record bpf events"),
OPT_BOOLEAN(0, "strict-freq", &record.opts.strict_freq,
"Fail if the specified frequency can't be used"),
OPT_CALLBACK('F', "freq", &record.opts, "freq or 'max'",
@@ -2462,8 +2497,8 @@ static struct option __record_options[] = {
OPT_CALLBACK('G', "cgroup", &record.evlist, "name",
"monitor event in cgroup name only",
parse_cgroups),
- OPT_UINTEGER('D', "delay", &record.opts.initial_delay,
- "ms to wait before starting measurement after program start"),
+ OPT_INTEGER('D', "delay", &record.opts.initial_delay,
+ "ms to wait before starting measurement after program start (-1: start with events disabled)"),
OPT_BOOLEAN(0, "kcore", &record.opts.kcore, "copy /proc/kcore"),
OPT_STRING('u', "uid", &record.opts.target.uid_str, "user",
"user to profile"),
@@ -2561,6 +2596,10 @@ static struct option __record_options[] = {
"libpfm4 event selector. use 'perf list' to list available events",
parse_libpfm_events_option),
#endif
+ OPT_CALLBACK(0, "control", &record.opts, "fd:ctl-fd[,ack-fd]",
+ "Listen on ctl-fd descriptor for command to control measurement ('enable': enable events, 'disable': disable events).\n"
+ "\t\t\t Optionally send control command completion ('ack\\n') to ack-fd descriptor.",
+ parse_control_option),
OPT_END()
};
@@ -2722,7 +2761,7 @@ int cmd_record(int argc, const char **argv)
record.opts.tail_synthesize = true;
if (rec->evlist->core.nr_entries == 0 &&
- __perf_evlist__add_default(rec->evlist, !record.opts.no_samples) < 0) {
+ __evlist__add_default(rec->evlist, !record.opts.no_samples) < 0) {
pr_err("Not enough memory for event selector list\n");
goto out;
}
@@ -2766,6 +2805,14 @@ int cmd_record(int argc, const char **argv)
if (rec->opts.full_auxtrace)
rec->buildid_all = true;
+ if (rec->opts.text_poke) {
+ err = record__config_text_poke(rec->evlist);
+ if (err) {
+ pr_err("record__config_text_poke failed, error %d\n", err);
+ goto out;
+ }
+ }
+
if (record_opts__config(&rec->opts)) {
err = -EINVAL;
goto out;
diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c
index 5f1d2a878fad..3c74c9c0f3c3 100644
--- a/tools/perf/builtin-report.c
+++ b/tools/perf/builtin-report.c
@@ -338,7 +338,7 @@ static int process_read_event(struct perf_tool *tool,
static int report__setup_sample_type(struct report *rep)
{
struct perf_session *session = rep->session;
- u64 sample_type = perf_evlist__combined_sample_type(session->evlist);
+ u64 sample_type = evlist__combined_sample_type(session->evlist);
bool is_pipe = perf_data__is_pipe(session->data);
if (session->itrace_synth_opts->callchain ||
@@ -410,8 +410,7 @@ static int report__setup_sample_type(struct report *rep)
}
/* ??? handle more cases than just ANY? */
- if (!(perf_evlist__combined_branch_type(session->evlist) &
- PERF_SAMPLE_BRANCH_ANY))
+ if (!(evlist__combined_branch_type(session->evlist) & PERF_SAMPLE_BRANCH_ANY))
rep->nonany_branch_mode = true;
#if !defined(HAVE_LIBUNWIND_SUPPORT) && !defined(HAVE_DWARF_SUPPORT)
@@ -1093,7 +1092,7 @@ static int process_attr(struct perf_tool *tool __maybe_unused,
* Check if we need to enable callchains based
* on events sample_type.
*/
- sample_type = perf_evlist__combined_sample_type(*pevlist);
+ sample_type = evlist__combined_sample_type(*pevlist);
callchain_param_setup(sample_type);
return 0;
}
@@ -1333,6 +1332,9 @@ int cmd_report(int argc, const char **argv)
if (report.mmaps_mode)
report.tasks_mode = true;
+ if (dump_trace)
+ report.tool.ordered_events = false;
+
if (quiet)
perf_quiet_option();
@@ -1389,7 +1391,7 @@ repeat:
has_br_stack = perf_header__has_feat(&session->header,
HEADER_BRANCH_STACK);
- if (perf_evlist__combined_sample_type(session->evlist) & PERF_SAMPLE_STACK_USER)
+ if (evlist__combined_sample_type(session->evlist) & PERF_SAMPLE_STACK_USER)
has_br_stack = false;
setup_forced_leader(&report, session->evlist);
diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c
index 459e4229945e..e6fc297cee91 100644
--- a/tools/perf/builtin-sched.c
+++ b/tools/perf/builtin-sched.c
@@ -2398,6 +2398,15 @@ static void timehist_print_wakeup_event(struct perf_sched *sched,
printf("\n");
}
+static int timehist_sched_wakeup_ignore(struct perf_tool *tool __maybe_unused,
+ union perf_event *event __maybe_unused,
+ struct evsel *evsel __maybe_unused,
+ struct perf_sample *sample __maybe_unused,
+ struct machine *machine __maybe_unused)
+{
+ return 0;
+}
+
static int timehist_sched_wakeup_event(struct perf_tool *tool,
union perf_event *event __maybe_unused,
struct evsel *evsel,
@@ -2575,7 +2584,8 @@ static int timehist_sched_change_event(struct perf_tool *tool,
}
if (!sched->idle_hist || thread->tid == 0) {
- timehist_update_runtime_stats(tr, t, tprev);
+ if (!cpu_list || test_bit(sample->cpu, cpu_bitmap))
+ timehist_update_runtime_stats(tr, t, tprev);
if (sched->idle_hist) {
struct idle_thread_runtime *itr = (void *)tr;
@@ -2848,6 +2858,9 @@ static void timehist_print_summary(struct perf_sched *sched,
printf("\nIdle stats:\n");
for (i = 0; i < idle_max_cpu; ++i) {
+ if (cpu_list && !test_bit(i, cpu_bitmap))
+ continue;
+
t = idle_threads[i];
if (!t)
continue;
@@ -2958,9 +2971,10 @@ static int timehist_check_attr(struct perf_sched *sched,
static int perf_sched__timehist(struct perf_sched *sched)
{
- const struct evsel_str_handler handlers[] = {
+ struct evsel_str_handler handlers[] = {
{ "sched:sched_switch", timehist_sched_switch_event, },
{ "sched:sched_wakeup", timehist_sched_wakeup_event, },
+ { "sched:sched_waking", timehist_sched_wakeup_event, },
{ "sched:sched_wakeup_new", timehist_sched_wakeup_event, },
};
const struct evsel_str_handler migrate_handlers[] = {
@@ -3018,6 +3032,11 @@ static int perf_sched__timehist(struct perf_sched *sched)
setup_pager();
+ /* prefer sched_waking if it is captured */
+ if (perf_evlist__find_tracepoint_by_name(session->evlist,
+ "sched:sched_waking"))
+ handlers[1].handler = timehist_sched_wakeup_ignore;
+
/* setup per-evsel handlers */
if (perf_session__set_tracepoints_handlers(session, handlers))
goto out;
@@ -3330,12 +3349,16 @@ static int __cmd_record(int argc, const char **argv)
"-e", "sched:sched_stat_iowait",
"-e", "sched:sched_stat_runtime",
"-e", "sched:sched_process_fork",
- "-e", "sched:sched_wakeup",
"-e", "sched:sched_wakeup_new",
"-e", "sched:sched_migrate_task",
};
+ struct tep_event *waking_event;
- rec_argc = ARRAY_SIZE(record_args) + argc - 1;
+ /*
+ * +2 for either "-e", "sched:sched_wakeup" or
+ * "-e", "sched:sched_waking"
+ */
+ rec_argc = ARRAY_SIZE(record_args) + 2 + argc - 1;
rec_argv = calloc(rec_argc + 1, sizeof(char *));
if (rec_argv == NULL)
@@ -3344,6 +3367,13 @@ static int __cmd_record(int argc, const char **argv)
for (i = 0; i < ARRAY_SIZE(record_args); i++)
rec_argv[i] = strdup(record_args[i]);
+ rec_argv[i++] = "-e";
+ waking_event = trace_event__tp_format("sched", "sched_waking");
+ if (!IS_ERR(waking_event))
+ rec_argv[i++] = strdup("sched:sched_waking");
+ else
+ rec_argv[i++] = strdup("sched:sched_wakeup");
+
for (j = 1; j < (unsigned int)argc; j++, i++)
rec_argv[i] = argv[j];
diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c
index 447457786362..484ce6067d23 100644
--- a/tools/perf/builtin-script.c
+++ b/tools/perf/builtin-script.c
@@ -82,38 +82,64 @@ static bool native_arch;
unsigned int scripting_max_stack = PERF_MAX_STACK_DEPTH;
enum perf_output_field {
- PERF_OUTPUT_COMM = 1U << 0,
- PERF_OUTPUT_TID = 1U << 1,
- PERF_OUTPUT_PID = 1U << 2,
- PERF_OUTPUT_TIME = 1U << 3,
- PERF_OUTPUT_CPU = 1U << 4,
- PERF_OUTPUT_EVNAME = 1U << 5,
- PERF_OUTPUT_TRACE = 1U << 6,
- PERF_OUTPUT_IP = 1U << 7,
- PERF_OUTPUT_SYM = 1U << 8,
- PERF_OUTPUT_DSO = 1U << 9,
- PERF_OUTPUT_ADDR = 1U << 10,
- PERF_OUTPUT_SYMOFFSET = 1U << 11,
- PERF_OUTPUT_SRCLINE = 1U << 12,
- PERF_OUTPUT_PERIOD = 1U << 13,
- PERF_OUTPUT_IREGS = 1U << 14,
- PERF_OUTPUT_BRSTACK = 1U << 15,
- PERF_OUTPUT_BRSTACKSYM = 1U << 16,
- PERF_OUTPUT_DATA_SRC = 1U << 17,
- PERF_OUTPUT_WEIGHT = 1U << 18,
- PERF_OUTPUT_BPF_OUTPUT = 1U << 19,
- PERF_OUTPUT_CALLINDENT = 1U << 20,
- PERF_OUTPUT_INSN = 1U << 21,
- PERF_OUTPUT_INSNLEN = 1U << 22,
- PERF_OUTPUT_BRSTACKINSN = 1U << 23,
- PERF_OUTPUT_BRSTACKOFF = 1U << 24,
- PERF_OUTPUT_SYNTH = 1U << 25,
- PERF_OUTPUT_PHYS_ADDR = 1U << 26,
- PERF_OUTPUT_UREGS = 1U << 27,
- PERF_OUTPUT_METRIC = 1U << 28,
- PERF_OUTPUT_MISC = 1U << 29,
- PERF_OUTPUT_SRCCODE = 1U << 30,
- PERF_OUTPUT_IPC = 1U << 31,
+ PERF_OUTPUT_COMM = 1ULL << 0,
+ PERF_OUTPUT_TID = 1ULL << 1,
+ PERF_OUTPUT_PID = 1ULL << 2,
+ PERF_OUTPUT_TIME = 1ULL << 3,
+ PERF_OUTPUT_CPU = 1ULL << 4,
+ PERF_OUTPUT_EVNAME = 1ULL << 5,
+ PERF_OUTPUT_TRACE = 1ULL << 6,
+ PERF_OUTPUT_IP = 1ULL << 7,
+ PERF_OUTPUT_SYM = 1ULL << 8,
+ PERF_OUTPUT_DSO = 1ULL << 9,
+ PERF_OUTPUT_ADDR = 1ULL << 10,
+ PERF_OUTPUT_SYMOFFSET = 1ULL << 11,
+ PERF_OUTPUT_SRCLINE = 1ULL << 12,
+ PERF_OUTPUT_PERIOD = 1ULL << 13,
+ PERF_OUTPUT_IREGS = 1ULL << 14,
+ PERF_OUTPUT_BRSTACK = 1ULL << 15,
+ PERF_OUTPUT_BRSTACKSYM = 1ULL << 16,
+ PERF_OUTPUT_DATA_SRC = 1ULL << 17,
+ PERF_OUTPUT_WEIGHT = 1ULL << 18,
+ PERF_OUTPUT_BPF_OUTPUT = 1ULL << 19,
+ PERF_OUTPUT_CALLINDENT = 1ULL << 20,
+ PERF_OUTPUT_INSN = 1ULL << 21,
+ PERF_OUTPUT_INSNLEN = 1ULL << 22,
+ PERF_OUTPUT_BRSTACKINSN = 1ULL << 23,
+ PERF_OUTPUT_BRSTACKOFF = 1ULL << 24,
+ PERF_OUTPUT_SYNTH = 1ULL << 25,
+ PERF_OUTPUT_PHYS_ADDR = 1ULL << 26,
+ PERF_OUTPUT_UREGS = 1ULL << 27,
+ PERF_OUTPUT_METRIC = 1ULL << 28,
+ PERF_OUTPUT_MISC = 1ULL << 29,
+ PERF_OUTPUT_SRCCODE = 1ULL << 30,
+ PERF_OUTPUT_IPC = 1ULL << 31,
+ PERF_OUTPUT_TOD = 1ULL << 32,
+};
+
+struct perf_script {
+ struct perf_tool tool;
+ struct perf_session *session;
+ bool show_task_events;
+ bool show_mmap_events;
+ bool show_switch_events;
+ bool show_namespace_events;
+ bool show_lost_events;
+ bool show_round_events;
+ bool show_bpf_events;
+ bool show_cgroup_events;
+ bool show_text_poke_events;
+ bool allocated;
+ bool per_event_dump;
+ bool stitch_lbr;
+ struct evswitch evswitch;
+ struct perf_cpu_map *cpus;
+ struct perf_thread_map *threads;
+ int name_width;
+ const char *time_str;
+ struct perf_time_interval *ptime_range;
+ int range_size;
+ int range_num;
};
struct output_option {
@@ -152,6 +178,7 @@ struct output_option {
{.str = "misc", .field = PERF_OUTPUT_MISC},
{.str = "srccode", .field = PERF_OUTPUT_SRCCODE},
{.str = "ipc", .field = PERF_OUTPUT_IPC},
+ {.str = "tod", .field = PERF_OUTPUT_TOD},
};
enum {
@@ -388,7 +415,7 @@ static int evsel__check_stype(struct evsel *evsel, u64 sample_type, const char *
return evsel__do_check_stype(evsel, sample_type, sample_msg, field, false);
}
-static int perf_evsel__check_attr(struct evsel *evsel, struct perf_session *session)
+static int evsel__check_attr(struct evsel *evsel, struct perf_session *session)
{
struct perf_event_attr *attr = &evsel->core.attr;
bool allow_user_set;
@@ -443,8 +470,7 @@ static int perf_evsel__check_attr(struct evsel *evsel, struct perf_session *sess
return -EINVAL;
}
if (PRINT_FIELD(BRSTACKINSN) && !allow_user_set &&
- !(perf_evlist__combined_branch_type(session->evlist) &
- PERF_SAMPLE_BRANCH_ANY)) {
+ !(evlist__combined_branch_type(session->evlist) & PERF_SAMPLE_BRANCH_ANY)) {
pr_err("Display of branch stack assembler requested, but non all-branch filter set\n"
"Hint: run 'perf record -b ...'\n");
return -EINVAL;
@@ -503,6 +529,7 @@ static void set_print_ip_opts(struct perf_event_attr *attr)
*/
static int perf_session__check_output_opt(struct perf_session *session)
{
+ bool tod = false;
unsigned int j;
struct evsel *evsel;
@@ -522,13 +549,14 @@ static int perf_session__check_output_opt(struct perf_session *session)
}
if (evsel && output[j].fields &&
- perf_evsel__check_attr(evsel, session))
+ evsel__check_attr(evsel, session))
return -1;
if (evsel == NULL)
continue;
set_print_ip_opts(&evsel->core.attr);
+ tod |= output[j].fields & PERF_OUTPUT_TOD;
}
if (!no_callchain) {
@@ -569,13 +597,17 @@ static int perf_session__check_output_opt(struct perf_session *session)
}
}
+ if (tod && !session->header.env.clock.enabled) {
+ pr_err("Can't provide 'tod' time, missing clock data. "
+ "Please record with -k/--clockid option.\n");
+ return -1;
+ }
out:
return 0;
}
static int perf_sample__fprintf_regs(struct regs_dump *regs, uint64_t mask,
- FILE *fp
-)
+ FILE *fp)
{
unsigned i = 0, r;
int printed = 0;
@@ -593,6 +625,56 @@ static int perf_sample__fprintf_regs(struct regs_dump *regs, uint64_t mask,
return printed;
}
+#define DEFAULT_TOD_FMT "%F %H:%M:%S"
+
+static char*
+tod_scnprintf(struct perf_script *script, char *buf, int buflen,
+ u64 timestamp)
+{
+ u64 tod_ns, clockid_ns;
+ struct perf_env *env;
+ unsigned long nsec;
+ struct tm ltime;
+ char date[64];
+ time_t sec;
+
+ buf[0] = '\0';
+ if (buflen < 64 || !script)
+ return buf;
+
+ env = &script->session->header.env;
+ if (!env->clock.enabled) {
+ scnprintf(buf, buflen, "disabled");
+ return buf;
+ }
+
+ clockid_ns = env->clock.clockid_ns;
+ tod_ns = env->clock.tod_ns;
+
+ if (timestamp > clockid_ns)
+ tod_ns += timestamp - clockid_ns;
+ else
+ tod_ns -= clockid_ns - timestamp;
+
+ sec = (time_t) (tod_ns / NSEC_PER_SEC);
+ nsec = tod_ns - sec * NSEC_PER_SEC;
+
+ if (localtime_r(&sec, &ltime) == NULL) {
+ scnprintf(buf, buflen, "failed");
+ } else {
+ strftime(date, sizeof(date), DEFAULT_TOD_FMT, &ltime);
+
+ if (symbol_conf.nanosecs) {
+ snprintf(buf, buflen, "%s.%09lu", date, nsec);
+ } else {
+ snprintf(buf, buflen, "%s.%06lu",
+ date, nsec / NSEC_PER_USEC);
+ }
+ }
+
+ return buf;
+}
+
static int perf_sample__fprintf_iregs(struct perf_sample *sample,
struct perf_event_attr *attr, FILE *fp)
{
@@ -607,7 +689,8 @@ static int perf_sample__fprintf_uregs(struct perf_sample *sample,
attr->sample_regs_user, fp);
}
-static int perf_sample__fprintf_start(struct perf_sample *sample,
+static int perf_sample__fprintf_start(struct perf_script *script,
+ struct perf_sample *sample,
struct thread *thread,
struct evsel *evsel,
u32 type, FILE *fp)
@@ -616,6 +699,7 @@ static int perf_sample__fprintf_start(struct perf_sample *sample,
unsigned long secs;
unsigned long long nsecs;
int printed = 0;
+ char tstr[128];
if (PRINT_FIELD(COMM)) {
if (latency_format)
@@ -684,6 +768,11 @@ static int perf_sample__fprintf_start(struct perf_sample *sample,
printed += ret;
}
+ if (PRINT_FIELD(TOD)) {
+ tod_scnprintf(script, tstr, sizeof(tstr), sample->time);
+ printed += fprintf(fp, "%s ", tstr);
+ }
+
if (PRINT_FIELD(TIME)) {
u64 t = sample->time;
if (reltime) {
@@ -1668,31 +1757,7 @@ static int perf_sample__fprintf_synth(struct perf_sample *sample,
return 0;
}
-struct perf_script {
- struct perf_tool tool;
- struct perf_session *session;
- bool show_task_events;
- bool show_mmap_events;
- bool show_switch_events;
- bool show_namespace_events;
- bool show_lost_events;
- bool show_round_events;
- bool show_bpf_events;
- bool show_cgroup_events;
- bool allocated;
- bool per_event_dump;
- bool stitch_lbr;
- struct evswitch evswitch;
- struct perf_cpu_map *cpus;
- struct perf_thread_map *threads;
- int name_width;
- const char *time_str;
- struct perf_time_interval *ptime_range;
- int range_size;
- int range_num;
-};
-
-static int perf_evlist__max_name_len(struct evlist *evlist)
+static int evlist__max_name_len(struct evlist *evlist)
{
struct evsel *evsel;
int max = 0;
@@ -1739,7 +1804,7 @@ static void script_print_metric(struct perf_stat_config *config __maybe_unused,
if (!fmt)
return;
- perf_sample__fprintf_start(mctx->sample, mctx->thread, mctx->evsel,
+ perf_sample__fprintf_start(NULL, mctx->sample, mctx->thread, mctx->evsel,
PERF_RECORD_SAMPLE, mctx->fp);
fputs("\tmetric: ", mctx->fp);
if (color)
@@ -1754,7 +1819,7 @@ static void script_new_line(struct perf_stat_config *config __maybe_unused,
{
struct metric_ctx *mctx = ctx;
- perf_sample__fprintf_start(mctx->sample, mctx->thread, mctx->evsel,
+ perf_sample__fprintf_start(NULL, mctx->sample, mctx->thread, mctx->evsel,
PERF_RECORD_SAMPLE, mctx->fp);
fputs("\tmetric: ", mctx->fp);
}
@@ -1865,7 +1930,7 @@ static void process_event(struct perf_script *script,
++es->samples;
- perf_sample__fprintf_start(sample, thread, evsel,
+ perf_sample__fprintf_start(script, sample, thread, evsel,
PERF_RECORD_SAMPLE, fp);
if (PRINT_FIELD(PERIOD))
@@ -1875,7 +1940,7 @@ static void process_event(struct perf_script *script,
const char *evname = evsel__name(evsel);
if (!script->name_width)
- script->name_width = perf_evlist__max_name_len(script->session->evlist);
+ script->name_width = evlist__max_name_len(script->session->evlist);
fprintf(fp, "%*s: ", script->name_width, evname ?: "[unknown]");
}
@@ -2120,7 +2185,7 @@ static int process_attr(struct perf_tool *tool, union perf_event *event,
}
if (evsel->core.attr.sample_type) {
- err = perf_evsel__check_attr(evsel, scr->session);
+ err = evsel__check_attr(evsel, scr->session);
if (err)
return err;
}
@@ -2129,7 +2194,7 @@ static int process_attr(struct perf_tool *tool, union perf_event *event,
* Check if we need to enable callchains based
* on events sample_type.
*/
- sample_type = perf_evlist__combined_sample_type(evlist);
+ sample_type = evlist__combined_sample_type(evlist);
callchain_param_setup(sample_type);
/* Enable fields for callchain entries */
@@ -2174,11 +2239,11 @@ static int print_event_with_time(struct perf_tool *tool,
thread = machine__findnew_thread(machine, pid, tid);
if (thread && evsel) {
- perf_sample__fprintf_start(sample, thread, evsel,
+ perf_sample__fprintf_start(script, sample, thread, evsel,
event->header.type, stdout);
}
- perf_event__fprintf(event, stdout);
+ perf_event__fprintf(event, machine, stdout);
thread__put(thread);
@@ -2313,7 +2378,7 @@ process_finished_round_event(struct perf_tool *tool __maybe_unused,
struct ordered_events *oe __maybe_unused)
{
- perf_event__fprintf(event, stdout);
+ perf_event__fprintf(event, NULL, stdout);
return 0;
}
@@ -2330,6 +2395,18 @@ process_bpf_events(struct perf_tool *tool __maybe_unused,
sample->tid);
}
+static int process_text_poke_events(struct perf_tool *tool,
+ union perf_event *event,
+ struct perf_sample *sample,
+ struct machine *machine)
+{
+ if (perf_event__process_text_poke(tool, event, sample, machine) < 0)
+ return -1;
+
+ return print_event(tool, event, sample, machine, sample->pid,
+ sample->tid);
+}
+
static void sig_handler(int sig __maybe_unused)
{
session_done = 1;
@@ -2438,6 +2515,10 @@ static int __cmd_script(struct perf_script *script)
script->tool.ksymbol = process_bpf_events;
script->tool.bpf = process_bpf_events;
}
+ if (script->show_text_poke_events) {
+ script->tool.ksymbol = process_bpf_events;
+ script->tool.text_poke = process_text_poke_events;
+ }
if (perf_script__setup_per_event_dump(script)) {
pr_err("Couldn't create the per event dump files\n");
@@ -3171,7 +3252,7 @@ static int have_cmd(int argc, const char **argv)
static void script__setup_sample_type(struct perf_script *script)
{
struct perf_session *session = script->session;
- u64 sample_type = perf_evlist__combined_sample_type(session->evlist);
+ u64 sample_type = evlist__combined_sample_type(session->evlist);
if (symbol_conf.use_callchain || symbol_conf.cumulate_callchain) {
if ((sample_type & PERF_SAMPLE_REGS_USER) &&
@@ -3423,7 +3504,7 @@ int cmd_script(int argc, const char **argv)
"Fields: comm,tid,pid,time,cpu,event,trace,ip,sym,dso,"
"addr,symoff,srcline,period,iregs,uregs,brstack,"
"brstacksym,flags,bpf-output,brstackinsn,brstackoff,"
- "callindent,insn,insnlen,synth,phys_addr,metric,misc,ipc",
+ "callindent,insn,insnlen,synth,phys_addr,metric,misc,ipc,tod",
parse_output_fields),
OPT_BOOLEAN('a', "all-cpus", &system_wide,
"system-wide collection from all CPUs"),
@@ -3474,6 +3555,8 @@ int cmd_script(int argc, const char **argv)
"Show round events (if recorded)"),
OPT_BOOLEAN('\0', "show-bpf-events", &script.show_bpf_events,
"Show bpf related events (if recorded)"),
+ OPT_BOOLEAN('\0', "show-text-poke-events", &script.show_text_poke_events,
+ "Show text poke related events (if recorded)"),
OPT_BOOLEAN('\0', "per-event-dump", &script.per_event_dump,
"Dump trace output to files named by the monitored events"),
OPT_BOOLEAN('f', "force", &symbol_conf.force, "don't complain, do it"),
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index 9be020e0098a..fddc97cac984 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -188,6 +188,8 @@ static struct perf_stat_config stat_config = {
.metric_only_len = METRIC_ONLY_LEN,
.walltime_nsecs_stats = &walltime_nsecs_stats,
.big_num = true,
+ .ctl_fd = -1,
+ .ctl_fd_ack = -1
};
static bool cpus_map_matched(struct evsel *a, struct evsel *b)
@@ -402,7 +404,7 @@ static void read_counters(struct timespec *rs)
{
struct evsel *counter;
- if (!stat_config.summary && (read_affinity_counters(rs) < 0))
+ if (!stat_config.stop_read_counter && (read_affinity_counters(rs) < 0))
return;
evlist__for_each_entry(evsel_list, counter) {
@@ -475,18 +477,38 @@ static void process_interval(void)
print_counters(&rs, 0, NULL);
}
+static bool handle_interval(unsigned int interval, int *times)
+{
+ if (interval) {
+ process_interval();
+ if (interval_count && !(--(*times)))
+ return true;
+ }
+ return false;
+}
+
static void enable_counters(void)
{
- if (stat_config.initial_delay)
+ if (stat_config.initial_delay < 0) {
+ pr_info(EVLIST_DISABLED_MSG);
+ return;
+ }
+
+ if (stat_config.initial_delay > 0) {
+ pr_info(EVLIST_DISABLED_MSG);
usleep(stat_config.initial_delay * USEC_PER_MSEC);
+ }
/*
* We need to enable counters only if:
* - we don't have tracee (attaching to task or cpu)
* - we have initial delay configured
*/
- if (!target__none(&target) || stat_config.initial_delay)
+ if (!target__none(&target) || stat_config.initial_delay) {
evlist__enable(evsel_list);
+ if (stat_config.initial_delay > 0)
+ pr_info(EVLIST_ENABLED_MSG);
+ }
}
static void disable_counters(void)
@@ -540,6 +562,86 @@ static bool is_target_alive(struct target *_target,
return false;
}
+static void process_evlist(struct evlist *evlist, unsigned int interval)
+{
+ enum evlist_ctl_cmd cmd = EVLIST_CTL_CMD_UNSUPPORTED;
+
+ if (evlist__ctlfd_process(evlist, &cmd) > 0) {
+ switch (cmd) {
+ case EVLIST_CTL_CMD_ENABLE:
+ pr_info(EVLIST_ENABLED_MSG);
+ if (interval)
+ process_interval();
+ break;
+ case EVLIST_CTL_CMD_DISABLE:
+ if (interval)
+ process_interval();
+ pr_info(EVLIST_DISABLED_MSG);
+ break;
+ case EVLIST_CTL_CMD_ACK:
+ case EVLIST_CTL_CMD_UNSUPPORTED:
+ default:
+ break;
+ }
+ }
+}
+
+static void compute_tts(struct timespec *time_start, struct timespec *time_stop,
+ int *time_to_sleep)
+{
+ int tts = *time_to_sleep;
+ struct timespec time_diff;
+
+ diff_timespec(&time_diff, time_stop, time_start);
+
+ tts -= time_diff.tv_sec * MSEC_PER_SEC +
+ time_diff.tv_nsec / NSEC_PER_MSEC;
+
+ if (tts < 0)
+ tts = 0;
+
+ *time_to_sleep = tts;
+}
+
+static int dispatch_events(bool forks, int timeout, int interval, int *times)
+{
+ int child_exited = 0, status = 0;
+ int time_to_sleep, sleep_time;
+ struct timespec time_start, time_stop;
+
+ if (interval)
+ sleep_time = interval;
+ else if (timeout)
+ sleep_time = timeout;
+ else
+ sleep_time = 1000;
+
+ time_to_sleep = sleep_time;
+
+ while (!done) {
+ if (forks)
+ child_exited = waitpid(child_pid, &status, WNOHANG);
+ else
+ child_exited = !is_target_alive(&target, evsel_list->core.threads) ? 1 : 0;
+
+ if (child_exited)
+ break;
+
+ clock_gettime(CLOCK_MONOTONIC, &time_start);
+ if (!(evlist__poll(evsel_list, time_to_sleep) > 0)) { /* poll timeout or EINTR */
+ if (timeout || handle_interval(interval, times))
+ break;
+ time_to_sleep = sleep_time;
+ } else { /* fd revent */
+ process_evlist(evsel_list, interval);
+ clock_gettime(CLOCK_MONOTONIC, &time_stop);
+ compute_tts(&time_start, &time_stop, &time_to_sleep);
+ }
+ }
+
+ return status;
+}
+
enum counter_recovery {
COUNTER_SKIP,
COUNTER_RETRY,
@@ -603,7 +705,6 @@ static int __run_perf_stat(int argc, const char **argv, int run_idx)
char msg[BUFSIZ];
unsigned long long t0, t1;
struct evsel *counter;
- struct timespec ts;
size_t l;
int status = 0;
const bool forks = (argc > 0);
@@ -612,17 +713,6 @@ static int __run_perf_stat(int argc, const char **argv, int run_idx)
int i, cpu;
bool second_pass = false;
- if (interval) {
- ts.tv_sec = interval / USEC_PER_MSEC;
- ts.tv_nsec = (interval % USEC_PER_MSEC) * NSEC_PER_MSEC;
- } else if (timeout) {
- ts.tv_sec = timeout / USEC_PER_MSEC;
- ts.tv_nsec = (timeout % USEC_PER_MSEC) * NSEC_PER_MSEC;
- } else {
- ts.tv_sec = 1;
- ts.tv_nsec = 0;
- }
-
if (forks) {
if (perf_evlist__prepare_workload(evsel_list, &target, argv, is_pipe,
workload_exec_failed_signal) < 0) {
@@ -779,16 +869,8 @@ try_again_reset:
perf_evlist__start_workload(evsel_list);
enable_counters();
- if (interval || timeout) {
- while (!waitpid(child_pid, &status, WNOHANG)) {
- nanosleep(&ts, NULL);
- if (timeout)
- break;
- process_interval();
- if (interval_count && !(--times))
- break;
- }
- }
+ if (interval || timeout || evlist__ctlfd_initialized(evsel_list))
+ status = dispatch_events(forks, timeout, interval, &times);
if (child_pid != -1) {
if (timeout)
kill(child_pid, SIGTERM);
@@ -805,18 +887,7 @@ try_again_reset:
psignal(WTERMSIG(status), argv[0]);
} else {
enable_counters();
- while (!done) {
- nanosleep(&ts, NULL);
- if (!is_target_alive(&target, evsel_list->core.threads))
- break;
- if (timeout)
- break;
- if (interval) {
- process_interval();
- if (interval_count && !(--times))
- break;
- }
- }
+ status = dispatch_events(forks, timeout, interval, &times);
}
disable_counters();
@@ -826,9 +897,9 @@ try_again_reset:
if (stat_config.walltime_run_table)
stat_config.walltime_run[run_idx] = t1 - t0;
- if (interval) {
+ if (interval && stat_config.summary) {
stat_config.interval = 0;
- stat_config.summary = true;
+ stat_config.stop_read_counter = true;
init_stats(&walltime_nsecs_stats);
update_stats(&walltime_nsecs_stats, t1 - t0);
@@ -970,6 +1041,33 @@ static int parse_metric_groups(const struct option *opt,
&stat_config.metric_events);
}
+static int parse_control_option(const struct option *opt,
+ const char *str,
+ int unset __maybe_unused)
+{
+ char *comma = NULL, *endptr = NULL;
+ struct perf_stat_config *config = (struct perf_stat_config *)opt->value;
+
+ if (strncmp(str, "fd:", 3))
+ return -EINVAL;
+
+ config->ctl_fd = strtoul(&str[3], &endptr, 0);
+ if (endptr == &str[3])
+ return -EINVAL;
+
+ comma = strchr(str, ',');
+ if (comma) {
+ if (endptr != comma)
+ return -EINVAL;
+
+ config->ctl_fd_ack = strtoul(comma + 1, &endptr, 0);
+ if (endptr == comma + 1 || *endptr != '\0')
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
static struct option stat_options[] = {
OPT_BOOLEAN('T', "transaction", &transaction_run,
"hardware transaction statistics"),
@@ -1041,8 +1139,8 @@ static struct option stat_options[] = {
"aggregate counts per thread", AGGR_THREAD),
OPT_SET_UINT(0, "per-node", &stat_config.aggr_mode,
"aggregate counts per numa node", AGGR_NODE),
- OPT_UINTEGER('D', "delay", &stat_config.initial_delay,
- "ms to wait before starting measurement after program start"),
+ OPT_INTEGER('D', "delay", &stat_config.initial_delay,
+ "ms to wait before starting measurement after program start (-1: start with events disabled)"),
OPT_CALLBACK_NOOPT(0, "metric-only", &stat_config.metric_only, NULL,
"Only print computed metrics. No raw values", enable_metric_only),
OPT_BOOLEAN(0, "metric-no-group", &stat_config.metric_no_group,
@@ -1066,11 +1164,17 @@ static struct option stat_options[] = {
"Use with 'percore' event qualifier to show the event "
"counts of one hardware thread by sum up total hardware "
"threads of same physical core"),
+ OPT_BOOLEAN(0, "summary", &stat_config.summary,
+ "print summary for interval mode"),
#ifdef HAVE_LIBPFM
OPT_CALLBACK(0, "pfm-events", &evsel_list, "event",
"libpfm4 event selector. use 'perf list' to list available events",
parse_libpfm_events_option),
#endif
+ OPT_CALLBACK(0, "control", &stat_config, "fd:ctl-fd[,ack-fd]",
+ "Listen on ctl-fd descriptor for command to control measurement ('enable': enable events, 'disable': disable events).\n"
+ "\t\t\t Optionally send control command completion ('ack\\n') to ack-fd descriptor.",
+ parse_control_option),
OPT_END()
};
@@ -1679,19 +1783,17 @@ static int add_default_attributes(void)
if (target__has_cpu(&target))
default_attrs0[0].config = PERF_COUNT_SW_CPU_CLOCK;
- if (perf_evlist__add_default_attrs(evsel_list, default_attrs0) < 0)
+ if (evlist__add_default_attrs(evsel_list, default_attrs0) < 0)
return -1;
if (pmu_have_event("cpu", "stalled-cycles-frontend")) {
- if (perf_evlist__add_default_attrs(evsel_list,
- frontend_attrs) < 0)
+ if (evlist__add_default_attrs(evsel_list, frontend_attrs) < 0)
return -1;
}
if (pmu_have_event("cpu", "stalled-cycles-backend")) {
- if (perf_evlist__add_default_attrs(evsel_list,
- backend_attrs) < 0)
+ if (evlist__add_default_attrs(evsel_list, backend_attrs) < 0)
return -1;
}
- if (perf_evlist__add_default_attrs(evsel_list, default_attrs1) < 0)
+ if (evlist__add_default_attrs(evsel_list, default_attrs1) < 0)
return -1;
}
@@ -1701,21 +1803,21 @@ static int add_default_attributes(void)
return 0;
/* Append detailed run extra attributes: */
- if (perf_evlist__add_default_attrs(evsel_list, detailed_attrs) < 0)
+ if (evlist__add_default_attrs(evsel_list, detailed_attrs) < 0)
return -1;
if (detailed_run < 2)
return 0;
/* Append very detailed run extra attributes: */
- if (perf_evlist__add_default_attrs(evsel_list, very_detailed_attrs) < 0)
+ if (evlist__add_default_attrs(evsel_list, very_detailed_attrs) < 0)
return -1;
if (detailed_run < 3)
return 0;
/* Append very, very detailed run extra attributes: */
- return perf_evlist__add_default_attrs(evsel_list, very_very_detailed_attrs);
+ return evlist__add_default_attrs(evsel_list, very_very_detailed_attrs);
}
static const char * const stat_record_usage[] = {
@@ -2242,6 +2344,9 @@ int cmd_stat(int argc, const char **argv)
signal(SIGALRM, skip_signal);
signal(SIGABRT, skip_signal);
+ if (evlist__initialize_ctlfd(evsel_list, stat_config.ctl_fd, stat_config.ctl_fd_ack))
+ goto out;
+
status = 0;
for (run_idx = 0; forever || run_idx < stat_config.run_count; run_idx++) {
if (stat_config.run_count != 1 && verbose > 0)
@@ -2261,6 +2366,8 @@ int cmd_stat(int argc, const char **argv)
if (!forever && status != -1 && (!interval || stat_config.summary))
print_counters(NULL, argc, argv);
+ evlist__finalize_ctlfd(evsel_list);
+
if (STAT_RECORD) {
/*
* We synthesize the kernel mmap record just so that older tools
@@ -2307,6 +2414,7 @@ out:
evlist__delete(evsel_list);
+ metricgroup__rblist_exit(&stat_config.metric_events);
runtime_stat_delete(&stat_config);
return status;
diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c
index 13889d73f8dd..7c64134472c7 100644
--- a/tools/perf/builtin-top.c
+++ b/tools/perf/builtin-top.c
@@ -1627,7 +1627,7 @@ int cmd_top(int argc, const char **argv)
goto out_delete_evlist;
if (!top.evlist->core.nr_entries &&
- perf_evlist__add_default(top.evlist) < 0) {
+ evlist__add_default(top.evlist) < 0) {
pr_err("Not enough memory for event selector list\n");
goto out_delete_evlist;
}
@@ -1746,6 +1746,7 @@ int cmd_top(int argc, const char **argv)
goto out_delete_evlist;
}
+#ifdef HAVE_LIBBPF_SUPPORT
if (!top.record_opts.no_bpf_event) {
top.sb_evlist = evlist__new();
@@ -1759,6 +1760,7 @@ int cmd_top(int argc, const char **argv)
goto out_delete_evlist;
}
}
+#endif
if (perf_evlist__start_sb_thread(top.sb_evlist, target)) {
pr_debug("Couldn't start the BPF side band thread:\nBPF programs starting from now on won't be annotatable\n");
diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c
index 4cbb64edc998..bea461b6f937 100644
--- a/tools/perf/builtin-trace.c
+++ b/tools/perf/builtin-trace.c
@@ -3917,8 +3917,7 @@ static int trace__run(struct trace *trace, int argc, const char **argv)
}
if (trace->sched &&
- perf_evlist__add_newtp(evlist, "sched", "sched_stat_runtime",
- trace__sched_stat_runtime))
+ evlist__add_newtp(evlist, "sched", "sched_stat_runtime", trace__sched_stat_runtime))
goto out_error_sched_stat_runtime;
/*
* If a global cgroup was set, apply it to all the events without an
@@ -4150,11 +4149,11 @@ out_error_raw_syscalls:
goto out_error;
out_error_mmap:
- perf_evlist__strerror_mmap(evlist, errno, errbuf, sizeof(errbuf));
+ evlist__strerror_mmap(evlist, errno, errbuf, sizeof(errbuf));
goto out_error;
out_error_open:
- perf_evlist__strerror_open(evlist, errno, errbuf, sizeof(errbuf));
+ evlist__strerror_open(evlist, errno, errbuf, sizeof(errbuf));
out_error:
fprintf(trace->output, "%s\n", errbuf);
@@ -4813,7 +4812,7 @@ int cmd_trace(int argc, const char **argv)
"per thread proc mmap processing timeout in ms"),
OPT_CALLBACK('G', "cgroup", &trace, "name", "monitor event in cgroup name only",
trace__parse_cgroups),
- OPT_UINTEGER('D', "delay", &trace.opts.initial_delay,
+ OPT_INTEGER('D', "delay", &trace.opts.initial_delay,
"ms to wait before starting measurement after program "
"start"),
OPTS_EVSWITCH(&trace.evswitch),
diff --git a/tools/perf/check-headers.sh b/tools/perf/check-headers.sh
index 94c2bc22c2bb..0b4d6431b072 100755
--- a/tools/perf/check-headers.sh
+++ b/tools/perf/check-headers.sh
@@ -128,6 +128,9 @@ check arch/x86/lib/insn.c '-I "^#include [\"<]\(../include/\)*asm/in
# diff non-symmetric files
check_2 tools/perf/arch/x86/entry/syscalls/syscall_64.tbl arch/x86/entry/syscalls/syscall_64.tbl
+# These will require a beauty_check when we get some more like that
+check_2 tools/perf/trace/beauty/include/linux/socket.h include/linux/socket.h
+
# check duplicated library files
check_2 tools/perf/util/hashmap.h tools/lib/bpf/hashmap.h
check_2 tools/perf/util/hashmap.c tools/lib/bpf/hashmap.c
diff --git a/tools/perf/pmu-events/arch/powerpc/power9/metrics.json b/tools/perf/pmu-events/arch/powerpc/power9/metrics.json
index 80816d6402e9..f8784c608479 100644
--- a/tools/perf/pmu-events/arch/powerpc/power9/metrics.json
+++ b/tools/perf/pmu-events/arch/powerpc/power9/metrics.json
@@ -60,7 +60,7 @@
},
{
"BriefDescription": "Stalls due to short latency decimal floating ops.",
- "MetricExpr": "(PM_CMPLU_STALL_DFU - PM_CMPLU_STALL_DFLONG)/PM_RUN_INST_CMPL",
+ "MetricExpr": "dfu_stall_cpi - dflong_stall_cpi",
"MetricGroup": "cpi_breakdown",
"MetricName": "dfu_other_stall_cpi"
},
@@ -72,7 +72,7 @@
},
{
"BriefDescription": "Completion stall by Dcache miss which resolved off node memory/cache",
- "MetricExpr": "(PM_CMPLU_STALL_DMISS_L3MISS - PM_CMPLU_STALL_DMISS_L21_L31 - PM_CMPLU_STALL_DMISS_LMEM - PM_CMPLU_STALL_DMISS_REMOTE)/PM_RUN_INST_CMPL",
+ "MetricExpr": "dmiss_non_local_stall_cpi - dmiss_remote_stall_cpi",
"MetricGroup": "cpi_breakdown",
"MetricName": "dmiss_distant_stall_cpi"
},
@@ -90,7 +90,7 @@
},
{
"BriefDescription": "Completion stall due to cache miss that resolves in the L2 or L3 without conflict",
- "MetricExpr": "(PM_CMPLU_STALL_DMISS_L2L3 - PM_CMPLU_STALL_DMISS_L2L3_CONFLICT)/PM_RUN_INST_CMPL",
+ "MetricExpr": "dmiss_l2l3_stall_cpi - dmiss_l2l3_conflict_stall_cpi",
"MetricGroup": "cpi_breakdown",
"MetricName": "dmiss_l2l3_noconflict_stall_cpi"
},
@@ -114,7 +114,7 @@
},
{
"BriefDescription": "Completion stall by Dcache miss which resolved outside of local memory",
- "MetricExpr": "(PM_CMPLU_STALL_DMISS_L3MISS - PM_CMPLU_STALL_DMISS_L21_L31 - PM_CMPLU_STALL_DMISS_LMEM)/PM_RUN_INST_CMPL",
+ "MetricExpr": "dmiss_l3miss_stall_cpi - dmiss_l21_l31_stall_cpi - dmiss_lmem_stall_cpi",
"MetricGroup": "cpi_breakdown",
"MetricName": "dmiss_non_local_stall_cpi"
},
@@ -126,7 +126,7 @@
},
{
"BriefDescription": "Stalls due to short latency double precision ops.",
- "MetricExpr": "(PM_CMPLU_STALL_DP - PM_CMPLU_STALL_DPLONG)/PM_RUN_INST_CMPL",
+ "MetricExpr": "dp_stall_cpi - dplong_stall_cpi",
"MetricGroup": "cpi_breakdown",
"MetricName": "dp_other_stall_cpi"
},
@@ -155,7 +155,7 @@
"MetricName": "emq_full_stall_cpi"
},
{
- "MetricExpr": "(PM_CMPLU_STALL_ERAT_MISS + PM_CMPLU_STALL_EMQ_FULL)/PM_RUN_INST_CMPL",
+ "MetricExpr": "erat_miss_stall_cpi + emq_full_stall_cpi",
"MetricGroup": "cpi_breakdown",
"MetricName": "emq_stall_cpi"
},
@@ -173,7 +173,7 @@
},
{
"BriefDescription": "Completion stall due to execution units for other reasons.",
- "MetricExpr": "(PM_CMPLU_STALL_EXEC_UNIT - PM_CMPLU_STALL_FXU - PM_CMPLU_STALL_DP - PM_CMPLU_STALL_DFU - PM_CMPLU_STALL_PM - PM_CMPLU_STALL_CRYPTO - PM_CMPLU_STALL_VFXU - PM_CMPLU_STALL_VDP)/PM_RUN_INST_CMPL",
+ "MetricExpr": "exec_unit_stall_cpi - scalar_stall_cpi - vector_stall_cpi",
"MetricGroup": "cpi_breakdown",
"MetricName": "exec_unit_other_stall_cpi"
},
@@ -197,7 +197,7 @@
},
{
"BriefDescription": "Stalls due to short latency integer ops",
- "MetricExpr": "(PM_CMPLU_STALL_FXU - PM_CMPLU_STALL_FXLONG)/PM_RUN_INST_CMPL",
+ "MetricExpr": "fxu_stall_cpi - fxlong_stall_cpi",
"MetricGroup": "cpi_breakdown",
"MetricName": "fxu_other_stall_cpi"
},
@@ -221,7 +221,7 @@
},
{
"BriefDescription": "Instruction Completion Table other stalls",
- "MetricExpr": "(PM_ICT_NOSLOT_CYC - PM_ICT_NOSLOT_IC_MISS - PM_ICT_NOSLOT_BR_MPRED_ICMISS - PM_ICT_NOSLOT_BR_MPRED - PM_ICT_NOSLOT_DISP_HELD)/PM_RUN_INST_CMPL",
+ "MetricExpr": "nothing_dispatched_cpi - ict_noslot_ic_miss_cpi - ict_noslot_br_mpred_icmiss_cpi - ict_noslot_br_mpred_cpi - ict_noslot_disp_held_cpi",
"MetricGroup": "cpi_breakdown",
"MetricName": "ict_noslot_cyc_other_cpi"
},
@@ -245,7 +245,7 @@
},
{
"BriefDescription": "ICT_NOSLOT_DISP_HELD_OTHER_CPI",
- "MetricExpr": "(PM_ICT_NOSLOT_DISP_HELD - PM_ICT_NOSLOT_DISP_HELD_HB_FULL - PM_ICT_NOSLOT_DISP_HELD_SYNC - PM_ICT_NOSLOT_DISP_HELD_TBEGIN - PM_ICT_NOSLOT_DISP_HELD_ISSQ)/PM_RUN_INST_CMPL",
+ "MetricExpr": "ict_noslot_disp_held_cpi - ict_noslot_disp_held_hb_full_cpi - ict_noslot_disp_held_sync_cpi - ict_noslot_disp_held_tbegin_cpi - ict_noslot_disp_held_issq_cpi",
"MetricGroup": "cpi_breakdown",
"MetricName": "ict_noslot_disp_held_other_cpi"
},
@@ -263,7 +263,7 @@
},
{
"BriefDescription": "ICT_NOSLOT_IC_L2_CPI",
- "MetricExpr": "(PM_ICT_NOSLOT_IC_MISS - PM_ICT_NOSLOT_IC_L3 - PM_ICT_NOSLOT_IC_L3MISS)/PM_RUN_INST_CMPL",
+ "MetricExpr": "ict_noslot_ic_miss_cpi - ict_noslot_ic_l3_cpi - ict_noslot_ic_l3miss_cpi",
"MetricGroup": "cpi_breakdown",
"MetricName": "ict_noslot_ic_l2_cpi"
},
@@ -286,7 +286,7 @@
"MetricName": "ict_noslot_ic_miss_cpi"
},
{
- "MetricExpr": "(PM_NTC_ISSUE_HELD_DARQ_FULL + PM_NTC_ISSUE_HELD_ARB + PM_NTC_ISSUE_HELD_OTHER)/PM_RUN_INST_CMPL",
+ "MetricExpr": "ntc_issue_held_darq_full_cpi + ntc_issue_held_arb_cpi + ntc_issue_held_other_cpi",
"MetricGroup": "cpi_breakdown",
"MetricName": "issue_hold_cpi"
},
@@ -327,7 +327,7 @@
"MetricName": "lrq_other_stall_cpi"
},
{
- "MetricExpr": "(PM_CMPLU_STALL_LMQ_FULL + PM_CMPLU_STALL_ST_FWD + PM_CMPLU_STALL_LHS + PM_CMPLU_STALL_LSU_MFSPR + PM_CMPLU_STALL_LARX + PM_CMPLU_STALL_LRQ_OTHER)/PM_RUN_INST_CMPL",
+ "MetricExpr": "lmq_full_stall_cpi + st_fwd_stall_cpi + lhs_stall_cpi + lsu_mfspr_stall_cpi + larx_stall_cpi + lrq_other_stall_cpi",
"MetricGroup": "cpi_breakdown",
"MetricName": "lrq_stall_cpi"
},
@@ -338,7 +338,7 @@
"MetricName": "lsaq_arb_stall_cpi"
},
{
- "MetricExpr": "(PM_CMPLU_STALL_LRQ_FULL + PM_CMPLU_STALL_SRQ_FULL + PM_CMPLU_STALL_LSAQ_ARB)/PM_RUN_INST_CMPL",
+ "MetricExpr": "lrq_full_stall_cpi + srq_full_stall_cpi + lsaq_arb_stall_cpi",
"MetricGroup": "cpi_breakdown",
"MetricName": "lsaq_stall_cpi"
},
@@ -362,7 +362,7 @@
},
{
"BriefDescription": "Completion LSU stall for other reasons",
- "MetricExpr": "(PM_CMPLU_STALL_LSU - PM_CMPLU_STALL_LSU_FIN - PM_CMPLU_STALL_STORE_FINISH - PM_CMPLU_STALL_STORE_DATA - PM_CMPLU_STALL_EIEIO - PM_CMPLU_STALL_STCX - PM_CMPLU_STALL_SLB - PM_CMPLU_STALL_TEND - PM_CMPLU_STALL_PASTE - PM_CMPLU_STALL_TLBIE - PM_CMPLU_STALL_STORE_PIPE_ARB - PM_CMPLU_STALL_STORE_FIN_ARB - PM_CMPLU_STALL_LOAD_FINISH + PM_CMPLU_STALL_DCACHE_MISS - PM_CMPLU_STALL_LMQ_FULL - PM_CMPLU_STALL_ST_FWD - PM_CMPLU_STALL_LHS - PM_CMPLU_STALL_LSU_MFSPR - PM_CMPLU_STALL_LARX - PM_CMPLU_STALL_LRQ_OTHER + PM_CMPLU_STALL_ERAT_MISS + PM_CMPLU_STALL_EMQ_FULL - PM_CMPLU_STALL_LRQ_FULL - PM_CMPLU_STALL_SRQ_FULL - PM_CMPLU_STALL_LSAQ_ARB) / PM_RUN_INST_CMPL",
+ "MetricExpr": "lsu_stall_cpi - lsu_fin_stall_cpi - store_finish_stall_cpi - srq_stall_cpi - load_finish_stall_cpi + lsu_stall_dcache_miss_cpi - lrq_stall_cpi + emq_stall_cpi - lsaq_stall_cpi",
"MetricGroup": "cpi_breakdown",
"MetricName": "lsu_other_stall_cpi"
},
@@ -434,13 +434,13 @@
},
{
"BriefDescription": "Cycles unaccounted for.",
- "MetricExpr": "(PM_RUN_CYC - PM_1PLUS_PPC_CMPL - PM_CMPLU_STALL_THRD - PM_CMPLU_STALL - PM_ICT_NOSLOT_CYC)/PM_RUN_INST_CMPL",
+ "MetricExpr": "run_cpi - completion_cpi - thread_block_stall_cpi - stall_cpi - nothing_dispatched_cpi",
"MetricGroup": "cpi_breakdown",
"MetricName": "other_cpi"
},
{
"BriefDescription": "Completion stall for other reasons",
- "MetricExpr": "(PM_CMPLU_STALL - PM_CMPLU_STALL_NTC_DISP_FIN - PM_CMPLU_STALL_NTC_FLUSH - PM_CMPLU_STALL_LSU - PM_CMPLU_STALL_EXEC_UNIT - PM_CMPLU_STALL_BRU)/PM_RUN_INST_CMPL",
+ "MetricExpr": "stall_cpi - ntc_disp_fin_stall_cpi - ntc_flush_stall_cpi - lsu_stall_cpi - exec_unit_stall_cpi - bru_stall_cpi",
"MetricGroup": "cpi_breakdown",
"MetricName": "other_stall_cpi"
},
@@ -469,7 +469,7 @@
"MetricName": "run_cyc_cpi"
},
{
- "MetricExpr": "(PM_CMPLU_STALL_FXU + PM_CMPLU_STALL_DP + PM_CMPLU_STALL_DFU + PM_CMPLU_STALL_PM + PM_CMPLU_STALL_CRYPTO)/PM_RUN_INST_CMPL",
+ "MetricExpr": "fxu_stall_cpi + dp_stall_cpi + dfu_stall_cpi + pm_stall_cpi + crypto_stall_cpi",
"MetricGroup": "cpi_breakdown",
"MetricName": "scalar_stall_cpi"
},
@@ -492,7 +492,7 @@
"MetricName": "srq_full_stall_cpi"
},
{
- "MetricExpr": "(PM_CMPLU_STALL_STORE_DATA + PM_CMPLU_STALL_EIEIO + PM_CMPLU_STALL_STCX + PM_CMPLU_STALL_SLB + PM_CMPLU_STALL_TEND + PM_CMPLU_STALL_PASTE + PM_CMPLU_STALL_TLBIE + PM_CMPLU_STALL_STORE_PIPE_ARB + PM_CMPLU_STALL_STORE_FIN_ARB)/PM_RUN_INST_CMPL",
+ "MetricExpr": "store_data_stall_cpi + eieio_stall_cpi + stcx_stall_cpi + slb_stall_cpi + tend_stall_cpi + paste_stall_cpi + tlbie_stall_cpi + store_pipe_arb_stall_cpi + store_fin_arb_stall_cpi",
"MetricGroup": "cpi_breakdown",
"MetricName": "srq_stall_cpi"
},
@@ -558,7 +558,7 @@
},
{
"BriefDescription": "Vector stalls due to small latency double precision ops",
- "MetricExpr": "(PM_CMPLU_STALL_VDP - PM_CMPLU_STALL_VDPLONG)/PM_RUN_INST_CMPL",
+ "MetricExpr": "vdp_stall_cpi - vdplong_stall_cpi",
"MetricGroup": "cpi_breakdown",
"MetricName": "vdp_other_stall_cpi"
},
@@ -575,7 +575,7 @@
"MetricName": "vdplong_stall_cpi"
},
{
- "MetricExpr": "(PM_CMPLU_STALL_VFXU + PM_CMPLU_STALL_VDP)/PM_RUN_INST_CMPL",
+ "MetricExpr": "vfxu_stall_cpi + vdp_stall_cpi",
"MetricGroup": "cpi_breakdown",
"MetricName": "vector_stall_cpi"
},
@@ -587,7 +587,7 @@
},
{
"BriefDescription": "Vector stalls due to small latency integer ops",
- "MetricExpr": "(PM_CMPLU_STALL_VFXU - PM_CMPLU_STALL_VFXLONG)/PM_RUN_INST_CMPL",
+ "MetricExpr": "vfxu_stall_cpi - vfxlong_stall_cpi",
"MetricGroup": "cpi_breakdown",
"MetricName": "vfxu_other_stall_cpi"
},
@@ -1844,7 +1844,7 @@
},
{
"BriefDescription": "% of DL1 reloads from Private L3, other core per Inst",
- "MetricExpr": "(PM_DATA_FROM_L31_MOD + PM_DATA_FROM_L31_SHR) * 100 / PM_RUN_INST_CMPL",
+ "MetricExpr": "dl1_reload_from_l31_mod_rate_percent + dl1_reload_from_l31_shr_rate_percent",
"MetricName": "dl1_reload_from_l31_rate_percent"
},
{
@@ -1979,7 +1979,7 @@
},
{
"BriefDescription": "Completion stall because a different thread was using the completion pipe",
- "MetricExpr": "(PM_CMPLU_STALL_THRD - PM_CMPLU_STALL_EXCEPTION - PM_CMPLU_STALL_ANY_SYNC - PM_CMPLU_STALL_SYNC_PMU_INT - PM_CMPLU_STALL_SPEC_FINISH - PM_CMPLU_STALL_FLUSH_ANY_THREAD - PM_CMPLU_STALL_LSU_FLUSH_NEXT - PM_CMPLU_STALL_NESTED_TBEGIN - PM_CMPLU_STALL_NESTED_TEND - PM_CMPLU_STALL_MTFPSCR)/PM_RUN_INST_CMPL",
+ "MetricExpr": "thread_block_stall_cpi - exception_stall_cpi - any_sync_stall_cpi - sync_pmu_int_stall_cpi - spec_finish_stall_cpi - flush_any_thread_stall_cpi - lsu_flush_next_stall_cpi - nested_tbegin_stall_cpi - nested_tend_stall_cpi - mtfpscr_stall_cpi",
"MetricName": "other_thread_cmpl_stall"
},
{
diff --git a/tools/perf/pmu-events/arch/powerpc/power9/nest_metrics.json b/tools/perf/pmu-events/arch/powerpc/power9/nest_metrics.json
index c121e526442a..8383a37647ad 100644
--- a/tools/perf/pmu-events/arch/powerpc/power9/nest_metrics.json
+++ b/tools/perf/pmu-events/arch/powerpc/power9/nest_metrics.json
@@ -15,5 +15,40 @@
"MetricExpr": "(hv_24x7@PM_PB_CYC\\,chip\\=?@ )",
"MetricName": "PowerBUS_Frequency",
"ScaleUnit": "2.5e-7GHz"
+ },
+ {
+ "MetricExpr" : "nest_mcs01_imc@PM_MCS01_128B_RD_DISP_PORT01@ + nest_mcs01_imc@PM_MCS01_128B_RD_DISP_PORT23@",
+ "MetricName" : "mcs01-read",
+ "MetricGroup" : "memory_bw",
+ "ScaleUnit": "6.1e-5MB"
+ },
+ {
+ "MetricExpr" : "nest_mcs23_imc@PM_MCS23_128B_RD_DISP_PORT01@ + nest_mcs23_imc@PM_MCS23_128B_RD_DISP_PORT23@",
+ "MetricName" : "mcs23-read",
+ "MetricGroup" : "memory_bw",
+ "ScaleUnit": "6.1e-5MB"
+ },
+ {
+ "MetricExpr" : "nest_mcs01_imc@PM_MCS01_128B_WR_DISP_PORT01@ + nest_mcs01_imc@PM_MCS01_128B_WR_DISP_PORT23@",
+ "MetricName" : "mcs01-write",
+ "MetricGroup" : "memory_bw",
+ "ScaleUnit": "6.1e-5MB"
+ },
+ {
+ "MetricExpr" : "nest_mcs23_imc@PM_MCS23_128B_WR_DISP_PORT01@ + nest_mcs23_imc@PM_MCS23_128B_WR_DISP_PORT23@",
+ "MetricName" : "mcs23-write",
+ "MetricGroup" : "memory-bandwidth",
+ "ScaleUnit": "6.1e-5MB"
+ },
+ {
+ "MetricExpr" : "nest_powerbus0_imc@PM_PB_CYC@",
+ "MetricName" : "powerbus_freq",
+ "ScaleUnit": "1e-9GHz"
+ },
+ {
+ "MetricExpr" : "(nest_mcs01_imc@PM_MCS01_128B_RD_DISP_PORT01@ + nest_mcs01_imc@PM_MCS01_128B_RD_DISP_PORT23@ + nest_mcs23_imc@PM_MCS23_128B_RD_DISP_PORT01@ + nest_mcs23_imc@PM_MCS23_128B_RD_DISP_PORT23@ + nest_mcs01_imc@PM_MCS01_128B_WR_DISP_PORT01@ + nest_mcs01_imc@PM_MCS01_128B_WR_DISP_PORT23@ + nest_mcs23_imc@PM_MCS23_128B_WR_DISP_PORT01@ + nest_mcs23_imc@PM_MCS23_128B_WR_DISP_PORT23@)",
+ "MetricName" : "Memory-bandwidth-MCS",
+ "MetricGroup" : "memory_bw",
+ "ScaleUnit": "6.1e-5MB"
}
]
diff --git a/tools/perf/pmu-events/arch/x86/amdzen1/core.json b/tools/perf/pmu-events/arch/x86/amdzen1/core.json
index 7e1aa8273935..653b11b23399 100644
--- a/tools/perf/pmu-events/arch/x86/amdzen1/core.json
+++ b/tools/perf/pmu-events/arch/x86/amdzen1/core.json
@@ -61,7 +61,7 @@
{
"EventName": "ex_ret_brn_ind_misp",
"EventCode": "0xca",
- "BriefDescription": "Retired Indirect Branch Instructions Mispredicted.",
+ "BriefDescription": "Retired Indirect Branch Instructions Mispredicted."
},
{
"EventName": "ex_ret_mmx_fp_instr.sse_instr",
diff --git a/tools/perf/pmu-events/arch/x86/amdzen2/core.json b/tools/perf/pmu-events/arch/x86/amdzen2/core.json
index de89e5a44ff1..4b75183da94a 100644
--- a/tools/perf/pmu-events/arch/x86/amdzen2/core.json
+++ b/tools/perf/pmu-events/arch/x86/amdzen2/core.json
@@ -125,6 +125,6 @@
{
"EventName": "ex_ret_fus_brnch_inst",
"EventCode": "0x1d0",
- "BriefDescription": "Retired Fused Instructions. The number of fuse-branch instructions retired per cycle. The number of events logged per cycle can vary from 0-8.",
+ "BriefDescription": "Retired Fused Instructions. The number of fuse-branch instructions retired per cycle. The number of events logged per cycle can vary from 0-8."
}
]
diff --git a/tools/perf/pmu-events/jevents.c b/tools/perf/pmu-events/jevents.c
index fa86c5f997cc..fc9c158bfa13 100644
--- a/tools/perf/pmu-events/jevents.c
+++ b/tools/perf/pmu-events/jevents.c
@@ -137,7 +137,7 @@ static char *fixregex(char *s)
return s;
/* allocate space for a new string */
- fixed = (char *) malloc(len + 1);
+ fixed = (char *) malloc(len + esc_count + 1);
if (!fixed)
return NULL;
diff --git a/tools/perf/tests/Build b/tools/perf/tests/Build
index cd00498a5dce..84352fc49a20 100644
--- a/tools/perf/tests/Build
+++ b/tools/perf/tests/Build
@@ -59,6 +59,7 @@ perf-y += genelf.o
perf-y += api-io.o
perf-y += demangle-java-test.o
perf-y += pfm.o
+perf-y += parse-metric.o
$(OUTPUT)tests/llvm-src-base.c: tests/bpf-script-example.c tests/Build
$(call rule_mkdir)
diff --git a/tools/perf/tests/attr/README b/tools/perf/tests/attr/README
index 430024f618f1..a36f49fb4dbe 100644
--- a/tools/perf/tests/attr/README
+++ b/tools/perf/tests/attr/README
@@ -49,10 +49,12 @@ Following tests are defined (with perf commands):
perf record --call-graph fp kill (test-record-graph-fp)
perf record --group -e cycles,instructions kill (test-record-group)
perf record -e '{cycles,instructions}' kill (test-record-group1)
+ perf record -e '{cycles/period=1/,instructions/period=2/}:S' kill (test-record-group2)
perf record -D kill (test-record-no-delay)
perf record -i kill (test-record-no-inherit)
perf record -n kill (test-record-no-samples)
perf record -c 100 -P kill (test-record-period)
+ perf record -c 1 --pfm-events=cycles:period=2 (test-record-pfm-period)
perf record -R kill (test-record-raw)
perf stat -e cycles kill (test-stat-basic)
perf stat kill (test-stat-default)
diff --git a/tools/perf/tests/attr/test-record-group2 b/tools/perf/tests/attr/test-record-group2
new file mode 100644
index 000000000000..6b9f8d182ce1
--- /dev/null
+++ b/tools/perf/tests/attr/test-record-group2
@@ -0,0 +1,29 @@
+[config]
+command = record
+args = --no-bpf-event -e '{cycles/period=1234000/,instructions/period=6789000/}:S' kill >/dev/null 2>&1
+ret = 1
+
+[event-1:base-record]
+fd=1
+group_fd=-1
+config=0|1
+sample_period=1234000
+sample_type=87
+read_format=12
+inherit=0
+freq=0
+
+[event-2:base-record]
+fd=2
+group_fd=1
+config=0|1
+sample_period=6789000
+sample_type=87
+read_format=12
+disabled=0
+inherit=0
+mmap=0
+comm=0
+freq=0
+enable_on_exec=0
+task=0
diff --git a/tools/perf/tests/attr/test-record-pfm-period b/tools/perf/tests/attr/test-record-pfm-period
new file mode 100644
index 000000000000..368f5b814094
--- /dev/null
+++ b/tools/perf/tests/attr/test-record-pfm-period
@@ -0,0 +1,9 @@
+[config]
+command = record
+args = --no-bpf-event -c 10000 --pfm-events=cycles:period=77777 kill >/dev/null 2>&1
+ret = 1
+
+[event:base-record]
+sample_period=77777
+sample_type=7
+freq=0
diff --git a/tools/perf/tests/bp_signal.c b/tools/perf/tests/bp_signal.c
index da8ec1e8e064..cc9fbcedb364 100644
--- a/tools/perf/tests/bp_signal.c
+++ b/tools/perf/tests/bp_signal.c
@@ -45,10 +45,13 @@ volatile long the_var;
#if defined (__x86_64__)
extern void __test_function(volatile long *ptr);
asm (
+ ".pushsection .text;"
".globl __test_function\n"
+ ".type __test_function, @function;"
"__test_function:\n"
"incq (%rdi)\n"
- "ret\n");
+ "ret\n"
+ ".popsection\n");
#else
static void __test_function(volatile long *ptr)
{
diff --git a/tools/perf/tests/bpf.c b/tools/perf/tests/bpf.c
index 5d20bf8397f0..cd77e334e577 100644
--- a/tools/perf/tests/bpf.c
+++ b/tools/perf/tests/bpf.c
@@ -197,7 +197,7 @@ static int do_test(struct bpf_object *obj, int (*func)(void),
perf_mmap__read_done(&md->core);
}
- if (count != expect) {
+ if (count != expect * evlist->core.nr_entries) {
pr_debug("BPF filter result incorrect, expected %d, got %d samples\n", expect, count);
goto out_delete_evlist;
}
diff --git a/tools/perf/tests/builtin-test.c b/tools/perf/tests/builtin-test.c
index da5b6cc23f25..d328caaba45d 100644
--- a/tools/perf/tests/builtin-test.c
+++ b/tools/perf/tests/builtin-test.c
@@ -338,6 +338,10 @@ static struct test generic_tests[] = {
.func = test__demangle_java,
},
{
+ .desc = "Parse and process metrics",
+ .func = test__parse_metric,
+ },
+ {
.func = NULL,
},
};
diff --git a/tools/perf/tests/code-reading.c b/tools/perf/tests/code-reading.c
index 6fe221d31f07..035c9123549a 100644
--- a/tools/perf/tests/code-reading.c
+++ b/tools/perf/tests/code-reading.c
@@ -678,7 +678,7 @@ static int do_test_code_reading(bool try_kcore)
if (verbose > 0) {
char errbuf[512];
- perf_evlist__strerror_open(evlist, errno, errbuf, sizeof(errbuf));
+ evlist__strerror_open(evlist, errno, errbuf, sizeof(errbuf));
pr_debug("perf_evlist__open() failed!\n%s\n", errbuf);
}
diff --git a/tools/perf/tests/expr.c b/tools/perf/tests/expr.c
index 1cb02ca2b15f..4d01051951cd 100644
--- a/tools/perf/tests/expr.c
+++ b/tools/perf/tests/expr.c
@@ -18,14 +18,15 @@ static int test(struct expr_parse_ctx *ctx, const char *e, double val2)
int test__expr(struct test *t __maybe_unused, int subtest __maybe_unused)
{
+ struct expr_id_data *val_ptr;
const char *p;
- double val, *val_ptr;
+ double val;
int ret;
struct expr_parse_ctx ctx;
expr__ctx_init(&ctx);
- expr__add_id(&ctx, strdup("FOO"), 1);
- expr__add_id(&ctx, strdup("BAR"), 2);
+ expr__add_id_val(&ctx, strdup("FOO"), 1);
+ expr__add_id_val(&ctx, strdup("BAR"), 2);
ret = test(&ctx, "1+1", 2);
ret |= test(&ctx, "FOO+BAR", 3);
@@ -39,6 +40,14 @@ int test__expr(struct test *t __maybe_unused, int subtest __maybe_unused)
ret |= test(&ctx, "1+1 if 3*4 else 0", 2);
ret |= test(&ctx, "1.1 + 2.1", 3.2);
ret |= test(&ctx, ".1 + 2.", 2.1);
+ ret |= test(&ctx, "d_ratio(1, 2)", 0.5);
+ ret |= test(&ctx, "d_ratio(2.5, 0)", 0);
+ ret |= test(&ctx, "1.1 < 2.2", 1);
+ ret |= test(&ctx, "2.2 > 1.1", 1);
+ ret |= test(&ctx, "1.1 < 1.1", 0);
+ ret |= test(&ctx, "2.2 > 2.2", 0);
+ ret |= test(&ctx, "2.2 < 1.1", 0);
+ ret |= test(&ctx, "1.1 > 2.2", 0);
if (ret)
return ret;
diff --git a/tools/perf/tests/fdarray.c b/tools/perf/tests/fdarray.c
index c7c81c4a5b2b..d9eca8e86a6b 100644
--- a/tools/perf/tests/fdarray.c
+++ b/tools/perf/tests/fdarray.c
@@ -12,6 +12,7 @@ static void fdarray__init_revents(struct fdarray *fda, short revents)
for (fd = 0; fd < fda->nr; ++fd) {
fda->entries[fd].fd = fda->nr - fd;
+ fda->entries[fd].events = revents;
fda->entries[fd].revents = revents;
}
}
@@ -29,7 +30,7 @@ static int fdarray__fprintf_prefix(struct fdarray *fda, const char *prefix, FILE
int test__fdarray__filter(struct test *test __maybe_unused, int subtest __maybe_unused)
{
- int nr_fds, expected_fd[2], fd, err = TEST_FAIL;
+ int nr_fds, err = TEST_FAIL;
struct fdarray *fda = fdarray__new(5, 5);
if (fda == NULL) {
@@ -55,7 +56,6 @@ int test__fdarray__filter(struct test *test __maybe_unused, int subtest __maybe_
fdarray__init_revents(fda, POLLHUP);
fda->entries[2].revents = POLLIN;
- expected_fd[0] = fda->entries[2].fd;
pr_debug("\nfiltering all but fda->entries[2]:");
fdarray__fprintf_prefix(fda, "before", stderr);
@@ -66,17 +66,9 @@ int test__fdarray__filter(struct test *test __maybe_unused, int subtest __maybe_
goto out_delete;
}
- if (fda->entries[0].fd != expected_fd[0]) {
- pr_debug("\nfda->entries[0].fd=%d != %d\n",
- fda->entries[0].fd, expected_fd[0]);
- goto out_delete;
- }
-
fdarray__init_revents(fda, POLLHUP);
fda->entries[0].revents = POLLIN;
- expected_fd[0] = fda->entries[0].fd;
fda->entries[3].revents = POLLIN;
- expected_fd[1] = fda->entries[3].fd;
pr_debug("\nfiltering all but (fda->entries[0], fda->entries[3]):");
fdarray__fprintf_prefix(fda, "before", stderr);
@@ -88,14 +80,6 @@ int test__fdarray__filter(struct test *test __maybe_unused, int subtest __maybe_
goto out_delete;
}
- for (fd = 0; fd < 2; ++fd) {
- if (fda->entries[fd].fd != expected_fd[fd]) {
- pr_debug("\nfda->entries[%d].fd=%d != %d\n", fd,
- fda->entries[fd].fd, expected_fd[fd]);
- goto out_delete;
- }
- }
-
pr_debug("\n");
err = 0;
@@ -128,7 +112,7 @@ int test__fdarray__add(struct test *test __maybe_unused, int subtest __maybe_unu
}
#define FDA_ADD(_idx, _fd, _revents, _nr) \
- if (fdarray__add(fda, _fd, _revents) < 0) { \
+ if (fdarray__add(fda, _fd, _revents, fdarray_flag__default) < 0) { \
pr_debug("\n%d: fdarray__add(fda, %d, %d) failed!", \
__LINE__,_fd, _revents); \
goto out_delete; \
diff --git a/tools/perf/tests/parse-events.c b/tools/perf/tests/parse-events.c
index 895188b63f96..aae0fd9045c1 100644
--- a/tools/perf/tests/parse-events.c
+++ b/tools/perf/tests/parse-events.c
@@ -631,6 +631,34 @@ static int test__checkterms_simple(struct list_head *terms)
TEST_ASSERT_VAL("wrong val", term->val.num == 1);
TEST_ASSERT_VAL("wrong config", !strcmp(term->config, "umask"));
+ /*
+ * read
+ *
+ * The perf_pmu__test_parse_init injects 'read' term into
+ * perf_pmu_events_list, so 'read' is evaluated as read term
+ * and not as raw event with 'ead' hex value.
+ */
+ term = list_entry(term->list.next, struct parse_events_term, list);
+ TEST_ASSERT_VAL("wrong type term",
+ term->type_term == PARSE_EVENTS__TERM_TYPE_USER);
+ TEST_ASSERT_VAL("wrong type val",
+ term->type_val == PARSE_EVENTS__TERM_TYPE_NUM);
+ TEST_ASSERT_VAL("wrong val", term->val.num == 1);
+ TEST_ASSERT_VAL("wrong config", !strcmp(term->config, "read"));
+
+ /*
+ * r0xead
+ *
+ * To be still able to pass 'ead' value with 'r' syntax,
+ * we added support to parse 'r0xHEX' event.
+ */
+ term = list_entry(term->list.next, struct parse_events_term, list);
+ TEST_ASSERT_VAL("wrong type term",
+ term->type_term == PARSE_EVENTS__TERM_TYPE_CONFIG);
+ TEST_ASSERT_VAL("wrong type val",
+ term->type_val == PARSE_EVENTS__TERM_TYPE_NUM);
+ TEST_ASSERT_VAL("wrong val", term->val.num == 0xead);
+ TEST_ASSERT_VAL("wrong config", !term->config);
return 0;
}
@@ -691,7 +719,7 @@ static int test__group2(struct evlist *evlist)
TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user);
TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->core.attr.exclude_kernel);
TEST_ASSERT_VAL("wrong exclude_hv", evsel->core.attr.exclude_hv);
- TEST_ASSERT_VAL("wrong exclude guest", !evsel->core.attr.exclude_guest);
+ TEST_ASSERT_VAL("wrong exclude guest", evsel->core.attr.exclude_guest);
TEST_ASSERT_VAL("wrong exclude host", !evsel->core.attr.exclude_host);
TEST_ASSERT_VAL("wrong precise_ip", !evsel->core.attr.precise_ip);
TEST_ASSERT_VAL("wrong leader", evsel__is_group_leader(evsel));
@@ -814,7 +842,7 @@ static int test__group3(struct evlist *evlist __maybe_unused)
TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user);
TEST_ASSERT_VAL("wrong exclude_kernel", evsel->core.attr.exclude_kernel);
TEST_ASSERT_VAL("wrong exclude_hv", evsel->core.attr.exclude_hv);
- TEST_ASSERT_VAL("wrong exclude guest", !evsel->core.attr.exclude_guest);
+ TEST_ASSERT_VAL("wrong exclude guest", evsel->core.attr.exclude_guest);
TEST_ASSERT_VAL("wrong exclude host", !evsel->core.attr.exclude_host);
TEST_ASSERT_VAL("wrong precise_ip", !evsel->core.attr.precise_ip);
TEST_ASSERT_VAL("wrong leader", evsel__is_group_leader(evsel));
@@ -1766,6 +1794,11 @@ static struct evlist_test test__events_pmu[] = {
.check = test__checkevent_raw_pmu,
.id = 4,
},
+ {
+ .name = "software/r0x1a/",
+ .check = test__checkevent_raw_pmu,
+ .id = 4,
+ },
};
struct terms_test {
@@ -1776,7 +1809,7 @@ struct terms_test {
static struct terms_test test__terms[] = {
[0] = {
- .str = "config=10,config1,config2=3,umask=1",
+ .str = "config=10,config1,config2=3,umask=1,read,r0xead",
.check = test__checkterms_simple,
},
};
@@ -1836,6 +1869,13 @@ static int test_term(struct terms_test *t)
INIT_LIST_HEAD(&terms);
+ /*
+ * The perf_pmu__test_parse_init prepares perf_pmu_events_list
+ * which gets freed in parse_events_terms.
+ */
+ if (perf_pmu__test_parse_init())
+ return -1;
+
ret = parse_events_terms(&terms, t->str);
if (ret) {
pr_debug("failed to parse terms '%s', err %d\n",
diff --git a/tools/perf/tests/parse-metric.c b/tools/perf/tests/parse-metric.c
new file mode 100644
index 000000000000..cd7331aac3bd
--- /dev/null
+++ b/tools/perf/tests/parse-metric.c
@@ -0,0 +1,359 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/compiler.h>
+#include <string.h>
+#include <perf/cpumap.h>
+#include <perf/evlist.h>
+#include "metricgroup.h"
+#include "tests.h"
+#include "pmu-events/pmu-events.h"
+#include "evlist.h"
+#include "rblist.h"
+#include "debug.h"
+#include "expr.h"
+#include "stat.h"
+#include <perf/cpumap.h>
+#include <perf/evlist.h>
+
+static struct pmu_event pme_test[] = {
+{
+ .metric_expr = "inst_retired.any / cpu_clk_unhalted.thread",
+ .metric_name = "IPC",
+ .metric_group = "group1",
+},
+{
+ .metric_expr = "idq_uops_not_delivered.core / (4 * (( ( cpu_clk_unhalted.thread / 2 ) * "
+ "( 1 + cpu_clk_unhalted.one_thread_active / cpu_clk_unhalted.ref_xclk ) )))",
+ .metric_name = "Frontend_Bound_SMT",
+},
+{
+ .metric_expr = "l1d\\-loads\\-misses / inst_retired.any",
+ .metric_name = "dcache_miss_cpi",
+},
+{
+ .metric_expr = "l1i\\-loads\\-misses / inst_retired.any",
+ .metric_name = "icache_miss_cycles",
+},
+{
+ .metric_expr = "(dcache_miss_cpi + icache_miss_cycles)",
+ .metric_name = "cache_miss_cycles",
+ .metric_group = "group1",
+},
+{
+ .metric_expr = "l2_rqsts.demand_data_rd_hit + l2_rqsts.pf_hit + l2_rqsts.rfo_hit",
+ .metric_name = "DCache_L2_All_Hits",
+},
+{
+ .metric_expr = "max(l2_rqsts.all_demand_data_rd - l2_rqsts.demand_data_rd_hit, 0) + "
+ "l2_rqsts.pf_miss + l2_rqsts.rfo_miss",
+ .metric_name = "DCache_L2_All_Miss",
+},
+{
+ .metric_expr = "dcache_l2_all_hits + dcache_l2_all_miss",
+ .metric_name = "DCache_L2_All",
+},
+{
+ .metric_expr = "d_ratio(dcache_l2_all_hits, dcache_l2_all)",
+ .metric_name = "DCache_L2_Hits",
+},
+{
+ .metric_expr = "d_ratio(dcache_l2_all_miss, dcache_l2_all)",
+ .metric_name = "DCache_L2_Misses",
+},
+{
+ .metric_expr = "ipc + m2",
+ .metric_name = "M1",
+},
+{
+ .metric_expr = "ipc + m1",
+ .metric_name = "M2",
+},
+{
+ .metric_expr = "1/m3",
+ .metric_name = "M3",
+},
+{
+ .name = NULL,
+}
+};
+
+static struct pmu_events_map map = {
+ .cpuid = "test",
+ .version = "1",
+ .type = "core",
+ .table = pme_test,
+};
+
+struct value {
+ const char *event;
+ u64 val;
+};
+
+static u64 find_value(const char *name, struct value *values)
+{
+ struct value *v = values;
+
+ while (v->event) {
+ if (!strcmp(name, v->event))
+ return v->val;
+ v++;
+ };
+ return 0;
+}
+
+static void load_runtime_stat(struct runtime_stat *st, struct evlist *evlist,
+ struct value *vals)
+{
+ struct evsel *evsel;
+ u64 count;
+
+ evlist__for_each_entry(evlist, evsel) {
+ count = find_value(evsel->name, vals);
+ perf_stat__update_shadow_stats(evsel, count, 0, st);
+ }
+}
+
+static double compute_single(struct rblist *metric_events, struct evlist *evlist,
+ struct runtime_stat *st, const char *name)
+{
+ struct metric_expr *mexp;
+ struct metric_event *me;
+ struct evsel *evsel;
+
+ evlist__for_each_entry(evlist, evsel) {
+ me = metricgroup__lookup(metric_events, evsel, false);
+ if (me != NULL) {
+ list_for_each_entry (mexp, &me->head, nd) {
+ if (strcmp(mexp->metric_name, name))
+ continue;
+ return test_generic_metric(mexp, 0, st);
+ }
+ }
+ }
+ return 0.;
+}
+
+static int __compute_metric(const char *name, struct value *vals,
+ const char *name1, double *ratio1,
+ const char *name2, double *ratio2)
+{
+ struct rblist metric_events = {
+ .nr_entries = 0,
+ };
+ struct perf_cpu_map *cpus;
+ struct runtime_stat st;
+ struct evlist *evlist;
+ int err;
+
+ /*
+ * We need to prepare evlist for stat mode running on CPU 0
+ * because that's where all the stats are going to be created.
+ */
+ evlist = evlist__new();
+ if (!evlist)
+ return -ENOMEM;
+
+ cpus = perf_cpu_map__new("0");
+ if (!cpus) {
+ evlist__delete(evlist);
+ return -ENOMEM;
+ }
+
+ perf_evlist__set_maps(&evlist->core, cpus, NULL);
+
+ /* Parse the metric into metric_events list. */
+ err = metricgroup__parse_groups_test(evlist, &map, name,
+ false, false,
+ &metric_events);
+ if (err)
+ goto out;
+
+ err = perf_evlist__alloc_stats(evlist, false);
+ if (err)
+ goto out;
+
+ /* Load the runtime stats with given numbers for events. */
+ runtime_stat__init(&st);
+ load_runtime_stat(&st, evlist, vals);
+
+ /* And execute the metric */
+ if (name1 && ratio1)
+ *ratio1 = compute_single(&metric_events, evlist, &st, name1);
+ if (name2 && ratio2)
+ *ratio2 = compute_single(&metric_events, evlist, &st, name2);
+
+out:
+ /* ... clenup. */
+ metricgroup__rblist_exit(&metric_events);
+ runtime_stat__exit(&st);
+ perf_evlist__free_stats(evlist);
+ perf_cpu_map__put(cpus);
+ evlist__delete(evlist);
+ return err;
+}
+
+static int compute_metric(const char *name, struct value *vals, double *ratio)
+{
+ return __compute_metric(name, vals, name, ratio, NULL, NULL);
+}
+
+static int compute_metric_group(const char *name, struct value *vals,
+ const char *name1, double *ratio1,
+ const char *name2, double *ratio2)
+{
+ return __compute_metric(name, vals, name1, ratio1, name2, ratio2);
+}
+
+static int test_ipc(void)
+{
+ double ratio;
+ struct value vals[] = {
+ { .event = "inst_retired.any", .val = 300 },
+ { .event = "cpu_clk_unhalted.thread", .val = 200 },
+ { .event = NULL, },
+ };
+
+ TEST_ASSERT_VAL("failed to compute metric",
+ compute_metric("IPC", vals, &ratio) == 0);
+
+ TEST_ASSERT_VAL("IPC failed, wrong ratio",
+ ratio == 1.5);
+ return 0;
+}
+
+static int test_frontend(void)
+{
+ double ratio;
+ struct value vals[] = {
+ { .event = "idq_uops_not_delivered.core", .val = 300 },
+ { .event = "cpu_clk_unhalted.thread", .val = 200 },
+ { .event = "cpu_clk_unhalted.one_thread_active", .val = 400 },
+ { .event = "cpu_clk_unhalted.ref_xclk", .val = 600 },
+ { .event = NULL, },
+ };
+
+ TEST_ASSERT_VAL("failed to compute metric",
+ compute_metric("Frontend_Bound_SMT", vals, &ratio) == 0);
+
+ TEST_ASSERT_VAL("Frontend_Bound_SMT failed, wrong ratio",
+ ratio == 0.45);
+ return 0;
+}
+
+static int test_cache_miss_cycles(void)
+{
+ double ratio;
+ struct value vals[] = {
+ { .event = "l1d-loads-misses", .val = 300 },
+ { .event = "l1i-loads-misses", .val = 200 },
+ { .event = "inst_retired.any", .val = 400 },
+ { .event = NULL, },
+ };
+
+ TEST_ASSERT_VAL("failed to compute metric",
+ compute_metric("cache_miss_cycles", vals, &ratio) == 0);
+
+ TEST_ASSERT_VAL("cache_miss_cycles failed, wrong ratio",
+ ratio == 1.25);
+ return 0;
+}
+
+
+/*
+ * DCache_L2_All_Hits = l2_rqsts.demand_data_rd_hit + l2_rqsts.pf_hit + l2_rqsts.rfo_hi
+ * DCache_L2_All_Miss = max(l2_rqsts.all_demand_data_rd - l2_rqsts.demand_data_rd_hit, 0) +
+ * l2_rqsts.pf_miss + l2_rqsts.rfo_miss
+ * DCache_L2_All = dcache_l2_all_hits + dcache_l2_all_miss
+ * DCache_L2_Hits = d_ratio(dcache_l2_all_hits, dcache_l2_all)
+ * DCache_L2_Misses = d_ratio(dcache_l2_all_miss, dcache_l2_all)
+ *
+ * l2_rqsts.demand_data_rd_hit = 100
+ * l2_rqsts.pf_hit = 200
+ * l2_rqsts.rfo_hi = 300
+ * l2_rqsts.all_demand_data_rd = 400
+ * l2_rqsts.pf_miss = 500
+ * l2_rqsts.rfo_miss = 600
+ *
+ * DCache_L2_All_Hits = 600
+ * DCache_L2_All_Miss = MAX(400 - 100, 0) + 500 + 600 = 1400
+ * DCache_L2_All = 600 + 1400 = 2000
+ * DCache_L2_Hits = 600 / 2000 = 0.3
+ * DCache_L2_Misses = 1400 / 2000 = 0.7
+ */
+static int test_dcache_l2(void)
+{
+ double ratio;
+ struct value vals[] = {
+ { .event = "l2_rqsts.demand_data_rd_hit", .val = 100 },
+ { .event = "l2_rqsts.pf_hit", .val = 200 },
+ { .event = "l2_rqsts.rfo_hit", .val = 300 },
+ { .event = "l2_rqsts.all_demand_data_rd", .val = 400 },
+ { .event = "l2_rqsts.pf_miss", .val = 500 },
+ { .event = "l2_rqsts.rfo_miss", .val = 600 },
+ { .event = NULL, },
+ };
+
+ TEST_ASSERT_VAL("failed to compute metric",
+ compute_metric("DCache_L2_Hits", vals, &ratio) == 0);
+
+ TEST_ASSERT_VAL("DCache_L2_Hits failed, wrong ratio",
+ ratio == 0.3);
+
+ TEST_ASSERT_VAL("failed to compute metric",
+ compute_metric("DCache_L2_Misses", vals, &ratio) == 0);
+
+ TEST_ASSERT_VAL("DCache_L2_Misses failed, wrong ratio",
+ ratio == 0.7);
+ return 0;
+}
+
+static int test_recursion_fail(void)
+{
+ double ratio;
+ struct value vals[] = {
+ { .event = "inst_retired.any", .val = 300 },
+ { .event = "cpu_clk_unhalted.thread", .val = 200 },
+ { .event = NULL, },
+ };
+
+ TEST_ASSERT_VAL("failed to find recursion",
+ compute_metric("M1", vals, &ratio) == -1);
+
+ TEST_ASSERT_VAL("failed to find recursion",
+ compute_metric("M3", vals, &ratio) == -1);
+ return 0;
+}
+
+static int test_metric_group(void)
+{
+ double ratio1, ratio2;
+ struct value vals[] = {
+ { .event = "cpu_clk_unhalted.thread", .val = 200 },
+ { .event = "l1d-loads-misses", .val = 300 },
+ { .event = "l1i-loads-misses", .val = 200 },
+ { .event = "inst_retired.any", .val = 400 },
+ { .event = NULL, },
+ };
+
+ TEST_ASSERT_VAL("failed to find recursion",
+ compute_metric_group("group1", vals,
+ "IPC", &ratio1,
+ "cache_miss_cycles", &ratio2) == 0);
+
+ TEST_ASSERT_VAL("group IPC failed, wrong ratio",
+ ratio1 == 2.0);
+
+ TEST_ASSERT_VAL("group cache_miss_cycles failed, wrong ratio",
+ ratio2 == 1.25);
+ return 0;
+}
+
+int test__parse_metric(struct test *test __maybe_unused, int subtest __maybe_unused)
+{
+ TEST_ASSERT_VAL("IPC failed", test_ipc() == 0);
+ TEST_ASSERT_VAL("frontend failed", test_frontend() == 0);
+ TEST_ASSERT_VAL("cache_miss_cycles failed", test_cache_miss_cycles() == 0);
+ TEST_ASSERT_VAL("DCache_L2 failed", test_dcache_l2() == 0);
+ TEST_ASSERT_VAL("recursion fail failed", test_recursion_fail() == 0);
+ TEST_ASSERT_VAL("test metric group", test_metric_group() == 0);
+ return 0;
+}
diff --git a/tools/perf/tests/perf-record.c b/tools/perf/tests/perf-record.c
index 83adfd846ccd..67d3f5aad016 100644
--- a/tools/perf/tests/perf-record.c
+++ b/tools/perf/tests/perf-record.c
@@ -185,14 +185,14 @@ int test__PERF_RECORD(struct test *test __maybe_unused, int subtest __maybe_unus
err = perf_evlist__parse_sample(evlist, event, &sample);
if (err < 0) {
if (verbose > 0)
- perf_event__fprintf(event, stderr);
+ perf_event__fprintf(event, NULL, stderr);
pr_debug("Couldn't parse sample\n");
goto out_delete_evlist;
}
if (verbose > 0) {
pr_info("%" PRIu64" %d ", sample.time, sample.cpu);
- perf_event__fprintf(event, stderr);
+ perf_event__fprintf(event, NULL, stderr);
}
if (prev_time > sample.time) {
diff --git a/tools/perf/tests/pmu-events.c b/tools/perf/tests/pmu-events.c
index ab64b4a4e284..d3517a74d95e 100644
--- a/tools/perf/tests/pmu-events.c
+++ b/tools/perf/tests/pmu-events.c
@@ -274,6 +274,7 @@ static int __test__pmu_event_aliases(char *pmu_name, int *count)
int res = 0;
bool use_uncore_table;
struct pmu_events_map *map = __test_pmu_get_events_map();
+ struct perf_pmu_alias *a, *tmp;
if (!map)
return -1;
@@ -347,6 +348,10 @@ static int __test__pmu_event_aliases(char *pmu_name, int *count)
pmu_name, alias->name);
}
+ list_for_each_entry_safe(a, tmp, &aliases, list) {
+ list_del(&a->list);
+ perf_pmu_free_alias(a);
+ }
free(pmu);
return res;
}
@@ -390,9 +395,9 @@ static bool is_number(const char *str)
return errno == 0 && end_ptr != str;
}
-static int check_parse_id(const char *id, bool same_cpu, struct pmu_event *pe)
+static int check_parse_id(const char *id, struct parse_events_error *error,
+ struct perf_pmu *fake_pmu)
{
- struct parse_events_error error;
struct evlist *evlist;
int ret;
@@ -401,8 +406,18 @@ static int check_parse_id(const char *id, bool same_cpu, struct pmu_event *pe)
return 0;
evlist = evlist__new();
- memset(&error, 0, sizeof(error));
- ret = parse_events(evlist, id, &error);
+ if (!evlist)
+ return -ENOMEM;
+ ret = __parse_events(evlist, id, error, fake_pmu);
+ evlist__delete(evlist);
+ return ret;
+}
+
+static int check_parse_cpu(const char *id, bool same_cpu, struct pmu_event *pe)
+{
+ struct parse_events_error error = { .idx = 0, };
+
+ int ret = check_parse_id(id, &error, NULL);
if (ret && same_cpu) {
pr_warning("Parse event failed metric '%s' id '%s' expr '%s'\n",
pe->metric_name, id, pe->metric_expr);
@@ -413,7 +428,18 @@ static int check_parse_id(const char *id, bool same_cpu, struct pmu_event *pe)
id, pe->metric_name, pe->metric_expr);
ret = 0;
}
- evlist__delete(evlist);
+ free(error.str);
+ free(error.help);
+ free(error.first_str);
+ free(error.first_help);
+ return ret;
+}
+
+static int check_parse_fake(const char *id)
+{
+ struct parse_events_error error = { .idx = 0, };
+ int ret = check_parse_id(id, &error, &perf_pmu__fake);
+
free(error.str);
free(error.help);
free(error.first_str);
@@ -471,10 +497,10 @@ static int test_parsing(void)
*/
k = 1;
hashmap__for_each_entry((&ctx.ids), cur, bkt)
- expr__add_id(&ctx, strdup(cur->key), k++);
+ expr__add_id_val(&ctx, strdup(cur->key), k++);
hashmap__for_each_entry((&ctx.ids), cur, bkt) {
- if (check_parse_id(cur->key, map == cpus_map,
+ if (check_parse_cpu(cur->key, map == cpus_map,
pe))
ret++;
}
@@ -490,6 +516,100 @@ static int test_parsing(void)
return ret == 0 ? TEST_OK : TEST_SKIP;
}
+struct test_metric {
+ const char *str;
+};
+
+static struct test_metric metrics[] = {
+ { "(unc_p_power_state_occupancy.cores_c0 / unc_p_clockticks) * 100." },
+ { "imx8_ddr0@read\\-cycles@ * 4 * 4", },
+ { "imx8_ddr0@axid\\-read\\,axi_mask\\=0xffff\\,axi_id\\=0x0000@ * 4", },
+ { "(cstate_pkg@c2\\-residency@ / msr@tsc@) * 100", },
+ { "(imx8_ddr0@read\\-cycles@ + imx8_ddr0@write\\-cycles@)", },
+};
+
+static int metric_parse_fake(const char *str)
+{
+ struct expr_parse_ctx ctx;
+ struct hashmap_entry *cur;
+ double result;
+ int ret = -1;
+ size_t bkt;
+ int i;
+
+ pr_debug("parsing '%s'\n", str);
+
+ expr__ctx_init(&ctx);
+ if (expr__find_other(str, NULL, &ctx, 0) < 0) {
+ pr_err("expr__find_other failed\n");
+ return -1;
+ }
+
+ /*
+ * Add all ids with a made up value. The value may
+ * trigger divide by zero when subtracted and so try to
+ * make them unique.
+ */
+ i = 1;
+ hashmap__for_each_entry((&ctx.ids), cur, bkt)
+ expr__add_id_val(&ctx, strdup(cur->key), i++);
+
+ hashmap__for_each_entry((&ctx.ids), cur, bkt) {
+ if (check_parse_fake(cur->key)) {
+ pr_err("check_parse_fake failed\n");
+ goto out;
+ }
+ }
+
+ if (expr__parse(&result, &ctx, str, 1))
+ pr_err("expr__parse failed\n");
+ else
+ ret = 0;
+
+out:
+ expr__ctx_clear(&ctx);
+ return ret;
+}
+
+/*
+ * Parse all the metrics for current architecture,
+ * or all defined cpus via the 'fake_pmu'
+ * in parse_events.
+ */
+static int test_parsing_fake(void)
+{
+ struct pmu_events_map *map;
+ struct pmu_event *pe;
+ unsigned int i, j;
+ int err = 0;
+
+ for (i = 0; i < ARRAY_SIZE(metrics); i++) {
+ err = metric_parse_fake(metrics[i].str);
+ if (err)
+ return err;
+ }
+
+ i = 0;
+ for (;;) {
+ map = &pmu_events_map[i++];
+ if (!map->table)
+ break;
+ j = 0;
+ for (;;) {
+ pe = &map->table[j++];
+ if (!pe->name && !pe->metric_group && !pe->metric_name)
+ break;
+ if (!pe->metric_expr)
+ continue;
+ err = metric_parse_fake(pe->metric_expr);
+ if (err)
+ return err;
+ }
+ }
+
+ return 0;
+}
+
static const struct {
int (*func)(void);
const char *desc;
@@ -506,6 +626,10 @@ static const struct {
.func = test_parsing,
.desc = "Parsing of PMU event table metrics",
},
+ {
+ .func = test_parsing_fake,
+ .desc = "Parsing of PMU event table metrics with fake PMUs",
+ },
};
const char *test__pmu_events_subtest_get_desc(int subtest)
diff --git a/tools/perf/tests/pmu.c b/tools/perf/tests/pmu.c
index 5c11fe2b3040..714e6830a758 100644
--- a/tools/perf/tests/pmu.c
+++ b/tools/perf/tests/pmu.c
@@ -173,6 +173,7 @@ int test__pmu(struct test *test __maybe_unused, int subtest __maybe_unused)
ret = 0;
} while (0);
+ perf_pmu__del_formats(&formats);
test_format_dir_put(format);
return ret;
}
diff --git a/tools/perf/tests/shell/record+script_probe_vfs_getname.sh b/tools/perf/tests/shell/record+script_probe_vfs_getname.sh
index 54030c18bfc2..bf9e729b3ecf 100755
--- a/tools/perf/tests/shell/record+script_probe_vfs_getname.sh
+++ b/tools/perf/tests/shell/record+script_probe_vfs_getname.sh
@@ -20,13 +20,13 @@ file=$(mktemp /tmp/temporary_file.XXXXX)
record_open_file() {
echo "Recording open file:"
- perf record -o ${perfdata} -e probe:vfs_getname touch $file
+ perf record -o ${perfdata} -e probe:vfs_getname\* touch $file
}
perf_script_filenames() {
echo "Looking at perf.data file for vfs_getname records for the file we touched:"
perf script -i ${perfdata} | \
- egrep " +touch +[0-9]+ +\[[0-9]+\] +[0-9]+\.[0-9]+: +probe:vfs_getname: +\([[:xdigit:]]+\) +pathname=\"${file}\""
+ egrep " +touch +[0-9]+ +\[[0-9]+\] +[0-9]+\.[0-9]+: +probe:vfs_getname[_0-9]*: +\([[:xdigit:]]+\) +pathname=\"${file}\""
}
add_probe_vfs_getname || skip_if_no_debuginfo
diff --git a/tools/perf/tests/tests.h b/tools/perf/tests/tests.h
index 76a4e352eaaf..4447a516c689 100644
--- a/tools/perf/tests/tests.h
+++ b/tools/perf/tests/tests.h
@@ -121,6 +121,7 @@ int test__demangle_java(struct test *test, int subtest);
int test__pfm(struct test *test, int subtest);
const char *test__pfm_subtest_get_desc(int subtest);
int test__pfm_subtest_get_nr(void);
+int test__parse_metric(struct test *test, int subtest);
bool test__bp_signal_is_supported(void);
bool test__bp_account_is_supported(void);
diff --git a/tools/perf/trace/beauty/include/linux/socket.h b/tools/perf/trace/beauty/include/linux/socket.h
new file mode 100644
index 000000000000..e9cb30d8cbfb
--- /dev/null
+++ b/tools/perf/trace/beauty/include/linux/socket.h
@@ -0,0 +1,442 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _LINUX_SOCKET_H
+#define _LINUX_SOCKET_H
+
+
+#include <asm/socket.h> /* arch-dependent defines */
+#include <linux/sockios.h> /* the SIOCxxx I/O controls */
+#include <linux/uio.h> /* iovec support */
+#include <linux/types.h> /* pid_t */
+#include <linux/compiler.h> /* __user */
+#include <uapi/linux/socket.h>
+
+struct file;
+struct pid;
+struct cred;
+struct socket;
+
+#define __sockaddr_check_size(size) \
+ BUILD_BUG_ON(((size) > sizeof(struct __kernel_sockaddr_storage)))
+
+#ifdef CONFIG_PROC_FS
+struct seq_file;
+extern void socket_seq_show(struct seq_file *seq);
+#endif
+
+typedef __kernel_sa_family_t sa_family_t;
+
+/*
+ * 1003.1g requires sa_family_t and that sa_data is char.
+ */
+
+struct sockaddr {
+ sa_family_t sa_family; /* address family, AF_xxx */
+ char sa_data[14]; /* 14 bytes of protocol address */
+};
+
+struct linger {
+ int l_onoff; /* Linger active */
+ int l_linger; /* How long to linger for */
+};
+
+#define sockaddr_storage __kernel_sockaddr_storage
+
+/*
+ * As we do 4.4BSD message passing we use a 4.4BSD message passing
+ * system, not 4.3. Thus msg_accrights(len) are now missing. They
+ * belong in an obscure libc emulation or the bin.
+ */
+
+struct msghdr {
+ void *msg_name; /* ptr to socket address structure */
+ int msg_namelen; /* size of socket address structure */
+ struct iov_iter msg_iter; /* data */
+
+ /*
+ * Ancillary data. msg_control_user is the user buffer used for the
+ * recv* side when msg_control_is_user is set, msg_control is the kernel
+ * buffer used for all other cases.
+ */
+ union {
+ void *msg_control;
+ void __user *msg_control_user;
+ };
+ bool msg_control_is_user : 1;
+ __kernel_size_t msg_controllen; /* ancillary data buffer length */
+ unsigned int msg_flags; /* flags on received message */
+ struct kiocb *msg_iocb; /* ptr to iocb for async requests */
+};
+
+struct user_msghdr {
+ void __user *msg_name; /* ptr to socket address structure */
+ int msg_namelen; /* size of socket address structure */
+ struct iovec __user *msg_iov; /* scatter/gather array */
+ __kernel_size_t msg_iovlen; /* # elements in msg_iov */
+ void __user *msg_control; /* ancillary data */
+ __kernel_size_t msg_controllen; /* ancillary data buffer length */
+ unsigned int msg_flags; /* flags on received message */
+};
+
+/* For recvmmsg/sendmmsg */
+struct mmsghdr {
+ struct user_msghdr msg_hdr;
+ unsigned int msg_len;
+};
+
+/*
+ * POSIX 1003.1g - ancillary data object information
+ * Ancillary data consits of a sequence of pairs of
+ * (cmsghdr, cmsg_data[])
+ */
+
+struct cmsghdr {
+ __kernel_size_t cmsg_len; /* data byte count, including hdr */
+ int cmsg_level; /* originating protocol */
+ int cmsg_type; /* protocol-specific type */
+};
+
+/*
+ * Ancillary data object information MACROS
+ * Table 5-14 of POSIX 1003.1g
+ */
+
+#define __CMSG_NXTHDR(ctl, len, cmsg) __cmsg_nxthdr((ctl),(len),(cmsg))
+#define CMSG_NXTHDR(mhdr, cmsg) cmsg_nxthdr((mhdr), (cmsg))
+
+#define CMSG_ALIGN(len) ( ((len)+sizeof(long)-1) & ~(sizeof(long)-1) )
+
+#define CMSG_DATA(cmsg) \
+ ((void *)(cmsg) + sizeof(struct cmsghdr))
+#define CMSG_USER_DATA(cmsg) \
+ ((void __user *)(cmsg) + sizeof(struct cmsghdr))
+#define CMSG_SPACE(len) (sizeof(struct cmsghdr) + CMSG_ALIGN(len))
+#define CMSG_LEN(len) (sizeof(struct cmsghdr) + (len))
+
+#define __CMSG_FIRSTHDR(ctl,len) ((len) >= sizeof(struct cmsghdr) ? \
+ (struct cmsghdr *)(ctl) : \
+ (struct cmsghdr *)NULL)
+#define CMSG_FIRSTHDR(msg) __CMSG_FIRSTHDR((msg)->msg_control, (msg)->msg_controllen)
+#define CMSG_OK(mhdr, cmsg) ((cmsg)->cmsg_len >= sizeof(struct cmsghdr) && \
+ (cmsg)->cmsg_len <= (unsigned long) \
+ ((mhdr)->msg_controllen - \
+ ((char *)(cmsg) - (char *)(mhdr)->msg_control)))
+#define for_each_cmsghdr(cmsg, msg) \
+ for (cmsg = CMSG_FIRSTHDR(msg); \
+ cmsg; \
+ cmsg = CMSG_NXTHDR(msg, cmsg))
+
+/*
+ * Get the next cmsg header
+ *
+ * PLEASE, do not touch this function. If you think, that it is
+ * incorrect, grep kernel sources and think about consequences
+ * before trying to improve it.
+ *
+ * Now it always returns valid, not truncated ancillary object
+ * HEADER. But caller still MUST check, that cmsg->cmsg_len is
+ * inside range, given by msg->msg_controllen before using
+ * ancillary object DATA. --ANK (980731)
+ */
+
+static inline struct cmsghdr * __cmsg_nxthdr(void *__ctl, __kernel_size_t __size,
+ struct cmsghdr *__cmsg)
+{
+ struct cmsghdr * __ptr;
+
+ __ptr = (struct cmsghdr*)(((unsigned char *) __cmsg) + CMSG_ALIGN(__cmsg->cmsg_len));
+ if ((unsigned long)((char*)(__ptr+1) - (char *) __ctl) > __size)
+ return (struct cmsghdr *)0;
+
+ return __ptr;
+}
+
+static inline struct cmsghdr * cmsg_nxthdr (struct msghdr *__msg, struct cmsghdr *__cmsg)
+{
+ return __cmsg_nxthdr(__msg->msg_control, __msg->msg_controllen, __cmsg);
+}
+
+static inline size_t msg_data_left(struct msghdr *msg)
+{
+ return iov_iter_count(&msg->msg_iter);
+}
+
+/* "Socket"-level control message types: */
+
+#define SCM_RIGHTS 0x01 /* rw: access rights (array of int) */
+#define SCM_CREDENTIALS 0x02 /* rw: struct ucred */
+#define SCM_SECURITY 0x03 /* rw: security label */
+
+struct ucred {
+ __u32 pid;
+ __u32 uid;
+ __u32 gid;
+};
+
+/* Supported address families. */
+#define AF_UNSPEC 0
+#define AF_UNIX 1 /* Unix domain sockets */
+#define AF_LOCAL 1 /* POSIX name for AF_UNIX */
+#define AF_INET 2 /* Internet IP Protocol */
+#define AF_AX25 3 /* Amateur Radio AX.25 */
+#define AF_IPX 4 /* Novell IPX */
+#define AF_APPLETALK 5 /* AppleTalk DDP */
+#define AF_NETROM 6 /* Amateur Radio NET/ROM */
+#define AF_BRIDGE 7 /* Multiprotocol bridge */
+#define AF_ATMPVC 8 /* ATM PVCs */
+#define AF_X25 9 /* Reserved for X.25 project */
+#define AF_INET6 10 /* IP version 6 */
+#define AF_ROSE 11 /* Amateur Radio X.25 PLP */
+#define AF_DECnet 12 /* Reserved for DECnet project */
+#define AF_NETBEUI 13 /* Reserved for 802.2LLC project*/
+#define AF_SECURITY 14 /* Security callback pseudo AF */
+#define AF_KEY 15 /* PF_KEY key management API */
+#define AF_NETLINK 16
+#define AF_ROUTE AF_NETLINK /* Alias to emulate 4.4BSD */
+#define AF_PACKET 17 /* Packet family */
+#define AF_ASH 18 /* Ash */
+#define AF_ECONET 19 /* Acorn Econet */
+#define AF_ATMSVC 20 /* ATM SVCs */
+#define AF_RDS 21 /* RDS sockets */
+#define AF_SNA 22 /* Linux SNA Project (nutters!) */
+#define AF_IRDA 23 /* IRDA sockets */
+#define AF_PPPOX 24 /* PPPoX sockets */
+#define AF_WANPIPE 25 /* Wanpipe API Sockets */
+#define AF_LLC 26 /* Linux LLC */
+#define AF_IB 27 /* Native InfiniBand address */
+#define AF_MPLS 28 /* MPLS */
+#define AF_CAN 29 /* Controller Area Network */
+#define AF_TIPC 30 /* TIPC sockets */
+#define AF_BLUETOOTH 31 /* Bluetooth sockets */
+#define AF_IUCV 32 /* IUCV sockets */
+#define AF_RXRPC 33 /* RxRPC sockets */
+#define AF_ISDN 34 /* mISDN sockets */
+#define AF_PHONET 35 /* Phonet sockets */
+#define AF_IEEE802154 36 /* IEEE802154 sockets */
+#define AF_CAIF 37 /* CAIF sockets */
+#define AF_ALG 38 /* Algorithm sockets */
+#define AF_NFC 39 /* NFC sockets */
+#define AF_VSOCK 40 /* vSockets */
+#define AF_KCM 41 /* Kernel Connection Multiplexor*/
+#define AF_QIPCRTR 42 /* Qualcomm IPC Router */
+#define AF_SMC 43 /* smc sockets: reserve number for
+ * PF_SMC protocol family that
+ * reuses AF_INET address family
+ */
+#define AF_XDP 44 /* XDP sockets */
+
+#define AF_MAX 45 /* For now.. */
+
+/* Protocol families, same as address families. */
+#define PF_UNSPEC AF_UNSPEC
+#define PF_UNIX AF_UNIX
+#define PF_LOCAL AF_LOCAL
+#define PF_INET AF_INET
+#define PF_AX25 AF_AX25
+#define PF_IPX AF_IPX
+#define PF_APPLETALK AF_APPLETALK
+#define PF_NETROM AF_NETROM
+#define PF_BRIDGE AF_BRIDGE
+#define PF_ATMPVC AF_ATMPVC
+#define PF_X25 AF_X25
+#define PF_INET6 AF_INET6
+#define PF_ROSE AF_ROSE
+#define PF_DECnet AF_DECnet
+#define PF_NETBEUI AF_NETBEUI
+#define PF_SECURITY AF_SECURITY
+#define PF_KEY AF_KEY
+#define PF_NETLINK AF_NETLINK
+#define PF_ROUTE AF_ROUTE
+#define PF_PACKET AF_PACKET
+#define PF_ASH AF_ASH
+#define PF_ECONET AF_ECONET
+#define PF_ATMSVC AF_ATMSVC
+#define PF_RDS AF_RDS
+#define PF_SNA AF_SNA
+#define PF_IRDA AF_IRDA
+#define PF_PPPOX AF_PPPOX
+#define PF_WANPIPE AF_WANPIPE
+#define PF_LLC AF_LLC
+#define PF_IB AF_IB
+#define PF_MPLS AF_MPLS
+#define PF_CAN AF_CAN
+#define PF_TIPC AF_TIPC
+#define PF_BLUETOOTH AF_BLUETOOTH
+#define PF_IUCV AF_IUCV
+#define PF_RXRPC AF_RXRPC
+#define PF_ISDN AF_ISDN
+#define PF_PHONET AF_PHONET
+#define PF_IEEE802154 AF_IEEE802154
+#define PF_CAIF AF_CAIF
+#define PF_ALG AF_ALG
+#define PF_NFC AF_NFC
+#define PF_VSOCK AF_VSOCK
+#define PF_KCM AF_KCM
+#define PF_QIPCRTR AF_QIPCRTR
+#define PF_SMC AF_SMC
+#define PF_XDP AF_XDP
+#define PF_MAX AF_MAX
+
+/* Maximum queue length specifiable by listen. */
+#define SOMAXCONN 4096
+
+/* Flags we can use with send/ and recv.
+ Added those for 1003.1g not all are supported yet
+ */
+
+#define MSG_OOB 1
+#define MSG_PEEK 2
+#define MSG_DONTROUTE 4
+#define MSG_TRYHARD 4 /* Synonym for MSG_DONTROUTE for DECnet */
+#define MSG_CTRUNC 8
+#define MSG_PROBE 0x10 /* Do not send. Only probe path f.e. for MTU */
+#define MSG_TRUNC 0x20
+#define MSG_DONTWAIT 0x40 /* Nonblocking io */
+#define MSG_EOR 0x80 /* End of record */
+#define MSG_WAITALL 0x100 /* Wait for a full request */
+#define MSG_FIN 0x200
+#define MSG_SYN 0x400
+#define MSG_CONFIRM 0x800 /* Confirm path validity */
+#define MSG_RST 0x1000
+#define MSG_ERRQUEUE 0x2000 /* Fetch message from error queue */
+#define MSG_NOSIGNAL 0x4000 /* Do not generate SIGPIPE */
+#define MSG_MORE 0x8000 /* Sender will send more */
+#define MSG_WAITFORONE 0x10000 /* recvmmsg(): block until 1+ packets avail */
+#define MSG_SENDPAGE_NOPOLICY 0x10000 /* sendpage() internal : do no apply policy */
+#define MSG_SENDPAGE_NOTLAST 0x20000 /* sendpage() internal : not the last page */
+#define MSG_BATCH 0x40000 /* sendmmsg(): more messages coming */
+#define MSG_EOF MSG_FIN
+#define MSG_NO_SHARED_FRAGS 0x80000 /* sendpage() internal : page frags are not shared */
+#define MSG_SENDPAGE_DECRYPTED 0x100000 /* sendpage() internal : page may carry
+ * plain text and require encryption
+ */
+
+#define MSG_ZEROCOPY 0x4000000 /* Use user data in kernel path */
+#define MSG_FASTOPEN 0x20000000 /* Send data in TCP SYN */
+#define MSG_CMSG_CLOEXEC 0x40000000 /* Set close_on_exec for file
+ descriptor received through
+ SCM_RIGHTS */
+#if defined(CONFIG_COMPAT)
+#define MSG_CMSG_COMPAT 0x80000000 /* This message needs 32 bit fixups */
+#else
+#define MSG_CMSG_COMPAT 0 /* We never have 32 bit fixups */
+#endif
+
+
+/* Setsockoptions(2) level. Thanks to BSD these must match IPPROTO_xxx */
+#define SOL_IP 0
+/* #define SOL_ICMP 1 No-no-no! Due to Linux :-) we cannot use SOL_ICMP=1 */
+#define SOL_TCP 6
+#define SOL_UDP 17
+#define SOL_IPV6 41
+#define SOL_ICMPV6 58
+#define SOL_SCTP 132
+#define SOL_UDPLITE 136 /* UDP-Lite (RFC 3828) */
+#define SOL_RAW 255
+#define SOL_IPX 256
+#define SOL_AX25 257
+#define SOL_ATALK 258
+#define SOL_NETROM 259
+#define SOL_ROSE 260
+#define SOL_DECNET 261
+#define SOL_X25 262
+#define SOL_PACKET 263
+#define SOL_ATM 264 /* ATM layer (cell level) */
+#define SOL_AAL 265 /* ATM Adaption Layer (packet level) */
+#define SOL_IRDA 266
+#define SOL_NETBEUI 267
+#define SOL_LLC 268
+#define SOL_DCCP 269
+#define SOL_NETLINK 270
+#define SOL_TIPC 271
+#define SOL_RXRPC 272
+#define SOL_PPPOL2TP 273
+#define SOL_BLUETOOTH 274
+#define SOL_PNPIPE 275
+#define SOL_RDS 276
+#define SOL_IUCV 277
+#define SOL_CAIF 278
+#define SOL_ALG 279
+#define SOL_NFC 280
+#define SOL_KCM 281
+#define SOL_TLS 282
+#define SOL_XDP 283
+
+/* IPX options */
+#define IPX_TYPE 1
+
+extern int move_addr_to_kernel(void __user *uaddr, int ulen, struct sockaddr_storage *kaddr);
+extern int put_cmsg(struct msghdr*, int level, int type, int len, void *data);
+
+struct timespec64;
+struct __kernel_timespec;
+struct old_timespec32;
+
+struct scm_timestamping_internal {
+ struct timespec64 ts[3];
+};
+
+extern void put_cmsg_scm_timestamping64(struct msghdr *msg, struct scm_timestamping_internal *tss);
+extern void put_cmsg_scm_timestamping(struct msghdr *msg, struct scm_timestamping_internal *tss);
+
+/* The __sys_...msg variants allow MSG_CMSG_COMPAT iff
+ * forbid_cmsg_compat==false
+ */
+extern long __sys_recvmsg(int fd, struct user_msghdr __user *msg,
+ unsigned int flags, bool forbid_cmsg_compat);
+extern long __sys_sendmsg(int fd, struct user_msghdr __user *msg,
+ unsigned int flags, bool forbid_cmsg_compat);
+extern int __sys_recvmmsg(int fd, struct mmsghdr __user *mmsg,
+ unsigned int vlen, unsigned int flags,
+ struct __kernel_timespec __user *timeout,
+ struct old_timespec32 __user *timeout32);
+extern int __sys_sendmmsg(int fd, struct mmsghdr __user *mmsg,
+ unsigned int vlen, unsigned int flags,
+ bool forbid_cmsg_compat);
+extern long __sys_sendmsg_sock(struct socket *sock, struct msghdr *msg,
+ unsigned int flags);
+extern long __sys_recvmsg_sock(struct socket *sock, struct msghdr *msg,
+ struct user_msghdr __user *umsg,
+ struct sockaddr __user *uaddr,
+ unsigned int flags);
+extern int sendmsg_copy_msghdr(struct msghdr *msg,
+ struct user_msghdr __user *umsg, unsigned flags,
+ struct iovec **iov);
+extern int recvmsg_copy_msghdr(struct msghdr *msg,
+ struct user_msghdr __user *umsg, unsigned flags,
+ struct sockaddr __user **uaddr,
+ struct iovec **iov);
+extern int __copy_msghdr_from_user(struct msghdr *kmsg,
+ struct user_msghdr __user *umsg,
+ struct sockaddr __user **save_addr,
+ struct iovec __user **uiov, size_t *nsegs);
+
+/* helpers which do the actual work for syscalls */
+extern int __sys_recvfrom(int fd, void __user *ubuf, size_t size,
+ unsigned int flags, struct sockaddr __user *addr,
+ int __user *addr_len);
+extern int __sys_sendto(int fd, void __user *buff, size_t len,
+ unsigned int flags, struct sockaddr __user *addr,
+ int addr_len);
+extern int __sys_accept4_file(struct file *file, unsigned file_flags,
+ struct sockaddr __user *upeer_sockaddr,
+ int __user *upeer_addrlen, int flags,
+ unsigned long nofile);
+extern int __sys_accept4(int fd, struct sockaddr __user *upeer_sockaddr,
+ int __user *upeer_addrlen, int flags);
+extern int __sys_socket(int family, int type, int protocol);
+extern int __sys_bind(int fd, struct sockaddr __user *umyaddr, int addrlen);
+extern int __sys_connect_file(struct file *file, struct sockaddr_storage *addr,
+ int addrlen, int file_flags);
+extern int __sys_connect(int fd, struct sockaddr __user *uservaddr,
+ int addrlen);
+extern int __sys_listen(int fd, int backlog);
+extern int __sys_getsockname(int fd, struct sockaddr __user *usockaddr,
+ int __user *usockaddr_len);
+extern int __sys_getpeername(int fd, struct sockaddr __user *usockaddr,
+ int __user *usockaddr_len);
+extern int __sys_socketpair(int family, int type, int protocol,
+ int __user *usockvec);
+extern int __sys_shutdown(int fd, int how);
+
+extern struct ns_common *get_net_ns(struct ns_common *ns);
+#endif /* _LINUX_SOCKET_H */
diff --git a/tools/perf/trace/beauty/sockaddr.c b/tools/perf/trace/beauty/sockaddr.c
index e0c13e6a5788..cd110634ab09 100644
--- a/tools/perf/trace/beauty/sockaddr.c
+++ b/tools/perf/trace/beauty/sockaddr.c
@@ -7,14 +7,7 @@
#include <sys/un.h>
#include <arpa/inet.h>
-static const char *socket_families[] = {
- "UNSPEC", "LOCAL", "INET", "AX25", "IPX", "APPLETALK", "NETROM",
- "BRIDGE", "ATMPVC", "X25", "INET6", "ROSE", "DECnet", "NETBEUI",
- "SECURITY", "KEY", "NETLINK", "PACKET", "ASH", "ECONET", "ATMSVC",
- "RDS", "SNA", "IRDA", "PPPOX", "WANPIPE", "LLC", "IB", "CAN", "TIPC",
- "BLUETOOTH", "IUCV", "RXRPC", "ISDN", "PHONET", "IEEE802154", "CAIF",
- "ALG", "NFC", "VSOCK",
-};
+#include "trace/beauty/generated/socket_arrays.c"
DEFINE_STRARRAY(socket_families, "PF_");
static size_t af_inet__scnprintf(struct sockaddr *sa, char *bf, size_t size)
diff --git a/tools/perf/trace/beauty/socket.sh b/tools/perf/trace/beauty/socket.sh
new file mode 100755
index 000000000000..3820e5c82293
--- /dev/null
+++ b/tools/perf/trace/beauty/socket.sh
@@ -0,0 +1,24 @@
+#!/bin/sh
+# SPDX-License-Identifier: LGPL-2.1
+
+# This one uses a copy from the kernel sources headers that is in a
+# place used just for these tools/perf/beauty/ usage, we shouldn't not
+# put it in tools/include/linux otherwise they would be used in the
+# normal compiler building process and would drag needless stuff from the
+# kernel.
+
+# When what these scripts need is already in tools/include/ then use it,
+# otherwise grab and check the copy from the kernel sources just for these
+# string table building scripts.
+
+[ $# -eq 1 ] && header_dir=$1 || header_dir=tools/perf/trace/beauty/include/linux/
+
+printf "static const char *socket_families[] = {\n"
+# #define AF_LOCAL 1 /* POSIX name for AF_UNIX */
+regex='^#define[[:space:]]+AF_(\w+)[[:space:]]+([[:digit:]]+).*'
+
+egrep $regex ${header_dir}/socket.h | \
+ sed -r "s/$regex/\2 \1/g" | \
+ xargs printf "\t[%s] = \"%s\",\n" | \
+ egrep -v "\"(UNIX|MAX)\""
+printf "};\n"
diff --git a/tools/perf/ui/browsers/annotate.c b/tools/perf/ui/browsers/annotate.c
index 9023267e5643..bd77825fd5a1 100644
--- a/tools/perf/ui/browsers/annotate.c
+++ b/tools/perf/ui/browsers/annotate.c
@@ -209,7 +209,7 @@ static void annotate_browser__draw_current_jump(struct ui_browser *browser)
ui_browser__mark_fused(browser,
pcnt_width + 3 + notes->widths.addr + width,
from - 1,
- to > from ? true : false);
+ to > from);
}
}
diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c
index be9c4c0549bc..a07626f07208 100644
--- a/tools/perf/ui/browsers/hists.c
+++ b/tools/perf/ui/browsers/hists.c
@@ -3629,8 +3629,8 @@ int perf_evlist__tui_browse_hists(struct evlist *evlist, const char *help,
{
int nr_entries = evlist->core.nr_entries;
-single_entry:
if (perf_evlist__single_entry(evlist)) {
+single_entry: {
struct evsel *first = evlist__first(evlist);
return perf_evsel__hists_browse(first, nr_entries, help,
@@ -3638,6 +3638,7 @@ single_entry:
env, warn_lost_event,
annotation_opts);
}
+ }
if (symbol_conf.event_group) {
struct evsel *pos;
diff --git a/tools/perf/util/Build b/tools/perf/util/Build
index 8d18380ecd10..cd5e41960e64 100644
--- a/tools/perf/util/Build
+++ b/tools/perf/util/Build
@@ -117,6 +117,7 @@ endif
perf-y += parse-branch-options.o
perf-y += dump-insn.o
perf-y += parse-regs-options.o
+perf-y += parse-sublevel-options.o
perf-y += term.o
perf-y += help-unknown-cmd.o
perf-y += mem-events.o
@@ -128,6 +129,7 @@ perf-y += expr-bison.o
perf-y += expr.o
perf-y += branch.o
perf-y += mem2node.o
+perf-y += clockid.o
perf-$(CONFIG_LIBBPF) += bpf-loader.o
perf-$(CONFIG_LIBBPF) += bpf_map.o
@@ -191,36 +193,60 @@ CFLAGS_llvm-utils.o += -DPERF_INCLUDE_DIR="BUILD_STR($(perf_include_dir_SQ))"
# avoid compiler warnings in 32-bit mode
CFLAGS_genelf_debug.o += -Wno-packed
-$(OUTPUT)util/parse-events-flex.c: util/parse-events.l $(OUTPUT)util/parse-events-bison.c
+$(OUTPUT)util/parse-events-flex.c $(OUTPUT)util/parse-events-flex.h: util/parse-events.l $(OUTPUT)util/parse-events-bison.c
$(call rule_mkdir)
- $(Q)$(call echo-cmd,flex)$(FLEX) -o $@ --header-file=$(OUTPUT)util/parse-events-flex.h $(PARSER_DEBUG_FLEX) util/parse-events.l
+ $(Q)$(call echo-cmd,flex)$(FLEX) -o $(OUTPUT)util/parse-events-flex.c \
+ --header-file=$(OUTPUT)util/parse-events-flex.h $(PARSER_DEBUG_FLEX) $<
-$(OUTPUT)util/parse-events-bison.c: util/parse-events.y
+$(OUTPUT)util/parse-events-bison.c $(OUTPUT)util/parse-events-bison.h: util/parse-events.y
$(call rule_mkdir)
- $(Q)$(call echo-cmd,bison)$(BISON) -v util/parse-events.y -d $(PARSER_DEBUG_BISON) -o $@ -p parse_events_
+ $(Q)$(call echo-cmd,bison)$(BISON) -v $< -d $(PARSER_DEBUG_BISON) \
+ -o $(OUTPUT)util/parse-events-bison.c -p parse_events_
-$(OUTPUT)util/expr-flex.c: util/expr.l $(OUTPUT)util/expr-bison.c
+$(OUTPUT)util/expr-flex.c $(OUTPUT)util/expr-flex.h: util/expr.l $(OUTPUT)util/expr-bison.c
$(call rule_mkdir)
- $(Q)$(call echo-cmd,flex)$(FLEX) -o $@ --header-file=$(OUTPUT)util/expr-flex.h $(PARSER_DEBUG_FLEX) util/expr.l
+ $(Q)$(call echo-cmd,flex)$(FLEX) -o $(OUTPUT)util/expr-flex.c \
+ --header-file=$(OUTPUT)util/expr-flex.h $(PARSER_DEBUG_FLEX) $<
-$(OUTPUT)util/expr-bison.c: util/expr.y
+$(OUTPUT)util/expr-bison.c $(OUTPUT)util/expr-bison.h: util/expr.y
$(call rule_mkdir)
- $(Q)$(call echo-cmd,bison)$(BISON) -v util/expr.y -d $(PARSER_DEBUG_BISON) -o $@ -p expr_
+ $(Q)$(call echo-cmd,bison)$(BISON) -v $< -d $(PARSER_DEBUG_BISON) \
+ -o $(OUTPUT)util/expr-bison.c -p expr_
-$(OUTPUT)util/pmu-flex.c: util/pmu.l $(OUTPUT)util/pmu-bison.c
+$(OUTPUT)util/pmu-flex.c $(OUTPUT)util/pmu-flex.h: util/pmu.l $(OUTPUT)util/pmu-bison.c
$(call rule_mkdir)
- $(Q)$(call echo-cmd,flex)$(FLEX) -o $@ --header-file=$(OUTPUT)util/pmu-flex.h util/pmu.l
+ $(Q)$(call echo-cmd,flex)$(FLEX) -o $(OUTPUT)util/pmu-flex.c \
+ --header-file=$(OUTPUT)util/pmu-flex.h $(PARSER_DEBUG_FLEX) $<
-$(OUTPUT)util/pmu-bison.c: util/pmu.y
+$(OUTPUT)util/pmu-bison.c $(OUTPUT)util/pmu-bison.h: util/pmu.y
$(call rule_mkdir)
- $(Q)$(call echo-cmd,bison)$(BISON) -v util/pmu.y -d -o $@ -p perf_pmu_
-
-CFLAGS_parse-events-flex.o += -w
-CFLAGS_pmu-flex.o += -w
-CFLAGS_expr-flex.o += -w
-CFLAGS_parse-events-bison.o += -DYYENABLE_NLS=0 -w
-CFLAGS_pmu-bison.o += -DYYENABLE_NLS=0 -DYYLTYPE_IS_TRIVIAL=0 -w
-CFLAGS_expr-bison.o += -DYYENABLE_NLS=0 -DYYLTYPE_IS_TRIVIAL=0 -w
+ $(Q)$(call echo-cmd,bison)$(BISON) -v $< -d $(PARSER_DEBUG_BISON) \
+ -o $(OUTPUT)util/pmu-bison.c -p perf_pmu_
+
+FLEX_GE_26 := $(shell expr $(shell $(FLEX) --version | sed -e 's/flex \([0-9]\+\).\([0-9]\+\)/\1\2/g') \>\= 26)
+ifeq ($(FLEX_GE_26),1)
+ flex_flags := -Wno-switch-enum -Wno-switch-default -Wno-unused-function -Wno-redundant-decls -Wno-sign-compare -Wno-unused-parameter -Wno-missing-prototypes -Wno-missing-declarations
+ CC_HASNT_MISLEADING_INDENTATION := $(shell echo "int main(void) { return 0 }" | $(CC) -Werror -Wno-misleading-indentation -o /dev/null -xc - 2>&1 | grep -q -- -Wno-misleading-indentation ; echo $$?)
+ ifeq ($(CC_HASNT_MISLEADING_INDENTATION), 1)
+ flex_flags += -Wno-misleading-indentation
+ endif
+else
+ flex_flags := -w
+endif
+CFLAGS_parse-events-flex.o += $(flex_flags)
+CFLAGS_pmu-flex.o += $(flex_flags)
+CFLAGS_expr-flex.o += $(flex_flags)
+
+bison_flags := -DYYENABLE_NLS=0
+BISON_GE_35 := $(shell expr $(shell $(BISON) --version | grep bison | sed -e 's/.\+ \([0-9]\+\).\([0-9]\+\)/\1\2/g') \>\= 35)
+ifeq ($(BISON_GE_35),1)
+ bison_flags += -Wno-unused-parameter -Wno-nested-externs -Wno-implicit-function-declaration -Wno-switch-enum
+else
+ bison_flags += -w
+endif
+CFLAGS_parse-events-bison.o += $(bison_flags)
+CFLAGS_pmu-bison.o += -DYYLTYPE_IS_TRIVIAL=0 $(bison_flags)
+CFLAGS_expr-bison.o += -DYYLTYPE_IS_TRIVIAL=0 $(bison_flags)
$(OUTPUT)util/parse-events.o: $(OUTPUT)util/parse-events-flex.c $(OUTPUT)util/parse-events-bison.c
$(OUTPUT)util/pmu.o: $(OUTPUT)util/pmu-flex.c $(OUTPUT)util/pmu-bison.c
diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c
index 76bfb4a9d94e..0a1fcf787538 100644
--- a/tools/perf/util/annotate.c
+++ b/tools/perf/util/annotate.c
@@ -1621,6 +1621,7 @@ static int dso__disassemble_filename(struct dso *dso, char *filename, size_t fil
char *build_id_filename;
char *build_id_path = NULL;
char *pos;
+ int len;
if (dso->symtab_type == DSO_BINARY_TYPE__KALLSYMS &&
!dso__is_kcore(dso))
@@ -1649,10 +1650,16 @@ static int dso__disassemble_filename(struct dso *dso, char *filename, size_t fil
if (pos && strlen(pos) < SBUILD_ID_SIZE - 2)
dirname(build_id_path);
- if (dso__is_kcore(dso) ||
- readlink(build_id_path, linkname, sizeof(linkname)) < 0 ||
- strstr(linkname, DSO__NAME_KALLSYMS) ||
- access(filename, R_OK)) {
+ if (dso__is_kcore(dso))
+ goto fallback;
+
+ len = readlink(build_id_path, linkname, sizeof(linkname) - 1);
+ if (len < 0)
+ goto fallback;
+
+ linkname[len] = '\0';
+ if (strstr(linkname, DSO__NAME_KALLSYMS) ||
+ access(filename, R_OK)) {
fallback:
/*
* If we don't have build-ids or the build-id file isn't in the
diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c
index 302a14d0aca9..93e063f22be5 100644
--- a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c
+++ b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c
@@ -182,15 +182,15 @@ static int arm_spe_read_record(struct arm_spe_decoder *decoder)
if (payload & BIT(EV_TLB_ACCESS))
decoder->record.type |= ARM_SPE_TLB_ACCESS;
- if ((idx == 1 || idx == 2 || idx == 3) &&
+ if ((idx == 2 || idx == 4 || idx == 8) &&
(payload & BIT(EV_LLC_MISS)))
decoder->record.type |= ARM_SPE_LLC_MISS;
- if ((idx == 1 || idx == 2 || idx == 3) &&
+ if ((idx == 2 || idx == 4 || idx == 8) &&
(payload & BIT(EV_LLC_ACCESS)))
decoder->record.type |= ARM_SPE_LLC_ACCESS;
- if ((idx == 1 || idx == 2 || idx == 3) &&
+ if ((idx == 2 || idx == 4 || idx == 8) &&
(payload & BIT(EV_REMOTE_ACCESS)))
decoder->record.type |= ARM_SPE_REMOTE_ACCESS;
diff --git a/tools/perf/util/auxtrace.c b/tools/perf/util/auxtrace.c
index 25c639ac4ad4..42a85c86421d 100644
--- a/tools/perf/util/auxtrace.c
+++ b/tools/perf/util/auxtrace.c
@@ -1349,6 +1349,47 @@ void itrace_synth_opts__set_default(struct itrace_synth_opts *synth_opts,
synth_opts->initial_skip = 0;
}
+static int get_flag(const char **ptr, unsigned int *flags)
+{
+ while (1) {
+ char c = **ptr;
+
+ if (c >= 'a' && c <= 'z') {
+ *flags |= 1 << (c - 'a');
+ ++*ptr;
+ return 0;
+ } else if (c == ' ') {
+ ++*ptr;
+ continue;
+ } else {
+ return -1;
+ }
+ }
+}
+
+static int get_flags(const char **ptr, unsigned int *plus_flags, unsigned int *minus_flags)
+{
+ while (1) {
+ switch (**ptr) {
+ case '+':
+ ++*ptr;
+ if (get_flag(ptr, plus_flags))
+ return -1;
+ break;
+ case '-':
+ ++*ptr;
+ if (get_flag(ptr, minus_flags))
+ return -1;
+ break;
+ case ' ':
+ ++*ptr;
+ break;
+ default:
+ return 0;
+ }
+ }
+}
+
/*
* Please check tools/perf/Documentation/perf-script.txt for information
* about the options parsed here, which is introduced after this cset,
@@ -1436,9 +1477,15 @@ int itrace_parse_synth_opts(const struct option *opt, const char *str,
break;
case 'e':
synth_opts->errors = true;
+ if (get_flags(&p, &synth_opts->error_plus_flags,
+ &synth_opts->error_minus_flags))
+ goto out_err;
break;
case 'd':
synth_opts->log = true;
+ if (get_flags(&p, &synth_opts->log_plus_flags,
+ &synth_opts->log_minus_flags))
+ goto out_err;
break;
case 'c':
synth_opts->branches = true;
@@ -1507,6 +1554,9 @@ int itrace_parse_synth_opts(const struct option *opt, const char *str,
case 'a':
synth_opts->remote_access = true;
break;
+ case 'q':
+ synth_opts->quick += 1;
+ break;
case ' ':
case ',':
break;
diff --git a/tools/perf/util/auxtrace.h b/tools/perf/util/auxtrace.h
index 142ccf7d34df..951d2d14cf24 100644
--- a/tools/perf/util/auxtrace.h
+++ b/tools/perf/util/auxtrace.h
@@ -55,6 +55,11 @@ enum itrace_period_type {
PERF_ITRACE_PERIOD_NANOSECS,
};
+#define AUXTRACE_ERR_FLG_OVERFLOW (1 << ('o' - 'a'))
+#define AUXTRACE_ERR_FLG_DATA_LOST (1 << ('l' - 'a'))
+
+#define AUXTRACE_LOG_FLG_ALL_PERF_EVTS (1 << ('a' - 'a'))
+
/**
* struct itrace_synth_opts - AUX area tracing synthesis options.
* @set: indicates whether or not options have been set
@@ -91,6 +96,11 @@ enum itrace_period_type {
* @cpu_bitmap: CPUs for which to synthesize events, or NULL for all
* @ptime_range: time intervals to trace or NULL
* @range_num: number of time intervals to trace
+ * @error_plus_flags: flags to affect what errors are reported
+ * @error_minus_flags: flags to affect what errors are reported
+ * @log_plus_flags: flags to affect what is logged
+ * @log_minus_flags: flags to affect what is logged
+ * @quick: quicker (less detailed) decoding
*/
struct itrace_synth_opts {
bool set;
@@ -124,6 +134,11 @@ struct itrace_synth_opts {
unsigned long *cpu_bitmap;
struct perf_time_interval *ptime_range;
int range_num;
+ unsigned int error_plus_flags;
+ unsigned int error_minus_flags;
+ unsigned int log_plus_flags;
+ unsigned int log_minus_flags;
+ unsigned int quick;
};
/**
@@ -604,22 +619,32 @@ bool auxtrace__evsel_is_auxtrace(struct perf_session *session,
struct evsel *evsel);
#define ITRACE_HELP \
-" i: synthesize instructions events\n" \
+" i[period]: synthesize instructions events\n" \
" b: synthesize branches events (branch misses for Arm SPE)\n" \
" c: synthesize branches events (calls only)\n" \
" r: synthesize branches events (returns only)\n" \
" x: synthesize transactions events\n" \
" w: synthesize ptwrite events\n" \
" p: synthesize power events\n" \
-" e: synthesize error events\n" \
-" d: create a debug log\n" \
+" o: synthesize other events recorded due to the use\n" \
+" of aux-output (refer to perf record)\n" \
+" e[flags]: synthesize error events\n" \
+" each flag must be preceded by + or -\n" \
+" error flags are: o (overflow)\n" \
+" l (data lost)\n" \
+" d[flags]: create a debug log\n" \
+" each flag must be preceded by + or -\n" \
+" log flags are: a (all perf events)\n" \
" f: synthesize first level cache events\n" \
" m: synthesize last level cache events\n" \
" t: synthesize TLB events\n" \
" a: synthesize remote access events\n" \
" g[len]: synthesize a call chain (use with i or x)\n" \
+" G[len]: synthesize a call chain on existing event records\n" \
" l[len]: synthesize last branch entries (use with i or x)\n" \
+" L[len]: synthesize last branch entries on existing event records\n" \
" sNUMBER: skip initial number of events\n" \
+" q: quicker (less detailed) decoding\n" \
" PERIOD[ns|us|ms|i|t]: specify period to sample stream\n" \
" concatenate multiple options. Default is ibxwpe or cewp\n"
diff --git a/tools/perf/util/build-id.c b/tools/perf/util/build-id.c
index c076fc7fe025..31207b6e2066 100644
--- a/tools/perf/util/build-id.c
+++ b/tools/perf/util/build-id.c
@@ -31,6 +31,10 @@
#include "probe-file.h"
#include "strlist.h"
+#ifdef HAVE_DEBUGINFOD_SUPPORT
+#include <elfutils/debuginfod.h>
+#endif
+
#include <linux/ctype.h>
#include <linux/zalloc.h>
@@ -636,6 +640,21 @@ static char *build_id_cache__find_debug(const char *sbuild_id,
if (realname && access(realname, R_OK))
zfree(&realname);
nsinfo__mountns_exit(&nsc);
+
+#ifdef HAVE_DEBUGINFOD_SUPPORT
+ if (realname == NULL) {
+ debuginfod_client* c = debuginfod_begin();
+ if (c != NULL) {
+ int fd = debuginfod_find_debuginfo(c,
+ (const unsigned char*)sbuild_id, 0,
+ &realname);
+ if (fd >= 0)
+ close(fd); /* retaining reference by realname */
+ debuginfod_end(c);
+ }
+ }
+#endif
+
out:
free(debugfile);
return realname;
diff --git a/tools/perf/util/clockid.c b/tools/perf/util/clockid.c
new file mode 100644
index 000000000000..74365a5d99c1
--- /dev/null
+++ b/tools/perf/util/clockid.c
@@ -0,0 +1,119 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <subcmd/parse-options.h>
+#include <stdio.h>
+#include <time.h>
+#include <strings.h>
+#include <linux/time64.h>
+#include "debug.h"
+#include "clockid.h"
+#include "record.h"
+
+struct clockid_map {
+ const char *name;
+ int clockid;
+};
+
+#define CLOCKID_MAP(n, c) \
+ { .name = n, .clockid = (c), }
+
+#define CLOCKID_END { .name = NULL, }
+
+
+/*
+ * Add the missing ones, we need to build on many distros...
+ */
+#ifndef CLOCK_MONOTONIC_RAW
+#define CLOCK_MONOTONIC_RAW 4
+#endif
+#ifndef CLOCK_BOOTTIME
+#define CLOCK_BOOTTIME 7
+#endif
+#ifndef CLOCK_TAI
+#define CLOCK_TAI 11
+#endif
+
+static const struct clockid_map clockids[] = {
+ /* available for all events, NMI safe */
+ CLOCKID_MAP("monotonic", CLOCK_MONOTONIC),
+ CLOCKID_MAP("monotonic_raw", CLOCK_MONOTONIC_RAW),
+
+ /* available for some events */
+ CLOCKID_MAP("realtime", CLOCK_REALTIME),
+ CLOCKID_MAP("boottime", CLOCK_BOOTTIME),
+ CLOCKID_MAP("tai", CLOCK_TAI),
+
+ /* available for the lazy */
+ CLOCKID_MAP("mono", CLOCK_MONOTONIC),
+ CLOCKID_MAP("raw", CLOCK_MONOTONIC_RAW),
+ CLOCKID_MAP("real", CLOCK_REALTIME),
+ CLOCKID_MAP("boot", CLOCK_BOOTTIME),
+
+ CLOCKID_END,
+};
+
+static int get_clockid_res(clockid_t clk_id, u64 *res_ns)
+{
+ struct timespec res;
+
+ *res_ns = 0;
+ if (!clock_getres(clk_id, &res))
+ *res_ns = res.tv_nsec + res.tv_sec * NSEC_PER_SEC;
+ else
+ pr_warning("WARNING: Failed to determine specified clock resolution.\n");
+
+ return 0;
+}
+
+int parse_clockid(const struct option *opt, const char *str, int unset)
+{
+ struct record_opts *opts = (struct record_opts *)opt->value;
+ const struct clockid_map *cm;
+ const char *ostr = str;
+
+ if (unset) {
+ opts->use_clockid = 0;
+ return 0;
+ }
+
+ /* no arg passed */
+ if (!str)
+ return 0;
+
+ /* no setting it twice */
+ if (opts->use_clockid)
+ return -1;
+
+ opts->use_clockid = true;
+
+ /* if its a number, we're done */
+ if (sscanf(str, "%d", &opts->clockid) == 1)
+ return get_clockid_res(opts->clockid, &opts->clockid_res_ns);
+
+ /* allow a "CLOCK_" prefix to the name */
+ if (!strncasecmp(str, "CLOCK_", 6))
+ str += 6;
+
+ for (cm = clockids; cm->name; cm++) {
+ if (!strcasecmp(str, cm->name)) {
+ opts->clockid = cm->clockid;
+ return get_clockid_res(opts->clockid,
+ &opts->clockid_res_ns);
+ }
+ }
+
+ opts->use_clockid = false;
+ ui__warning("unknown clockid %s, check man page\n", ostr);
+ return -1;
+}
+
+const char *clockid_name(clockid_t clk_id)
+{
+ const struct clockid_map *cm;
+
+ for (cm = clockids; cm->name; cm++) {
+ if (cm->clockid == clk_id)
+ return cm->name;
+ }
+ return "(not found)";
+}
diff --git a/tools/perf/util/clockid.h b/tools/perf/util/clockid.h
new file mode 100644
index 000000000000..9b49b4711c76
--- /dev/null
+++ b/tools/perf/util/clockid.h
@@ -0,0 +1,11 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef __PERF_CLOCKID_H
+#define __PERF_CLOCKID_H
+
+struct option;
+int parse_clockid(const struct option *opt, const char *str, int unset);
+
+const char *clockid_name(clockid_t clk_id);
+
+#endif
diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c
index c283223fb31f..a2a369e2fbb6 100644
--- a/tools/perf/util/cs-etm.c
+++ b/tools/perf/util/cs-etm.c
@@ -1344,8 +1344,15 @@ static int cs_etm__synth_events(struct cs_etm_auxtrace *etm,
attr.sample_type &= ~(u64)PERF_SAMPLE_ADDR;
}
- if (etm->synth_opts.last_branch)
+ if (etm->synth_opts.last_branch) {
attr.sample_type |= PERF_SAMPLE_BRANCH_STACK;
+ /*
+ * We don't use the hardware index, but the sample generation
+ * code uses the new format branch_stack with this field,
+ * so the event attributes must indicate that it's present.
+ */
+ attr.branch_sample_type |= PERF_SAMPLE_BRANCH_HW_INDEX;
+ }
if (etm->synth_opts.instructions) {
attr.config = PERF_COUNT_HW_INSTRUCTIONS;
diff --git a/tools/perf/util/data-convert-bt.c b/tools/perf/util/data-convert-bt.c
index 5f36fc6a5578..27c5fef9ad54 100644
--- a/tools/perf/util/data-convert-bt.c
+++ b/tools/perf/util/data-convert-bt.c
@@ -31,6 +31,9 @@
#include "config.h"
#include <linux/ctype.h>
#include <linux/err.h>
+#include <linux/time64.h>
+#include "util.h"
+#include "clockid.h"
#define pr_N(n, fmt, ...) \
eprintf(n, debug_data_convert, fmt, ##__VA_ARGS__)
@@ -1381,11 +1384,26 @@ do { \
return 0;
}
-static int ctf_writer__setup_clock(struct ctf_writer *cw)
+static int ctf_writer__setup_clock(struct ctf_writer *cw,
+ struct perf_session *session,
+ bool tod)
{
struct bt_ctf_clock *clock = cw->clock;
+ const char *desc = "perf clock";
+ int64_t offset = 0;
- bt_ctf_clock_set_description(clock, "perf clock");
+ if (tod) {
+ struct perf_env *env = &session->header.env;
+
+ if (!env->clock.enabled) {
+ pr_err("Can't provide --tod time, missing clock data. "
+ "Please record with -k/--clockid option.\n");
+ return -1;
+ }
+
+ desc = clockid_name(env->clock.clockid);
+ offset = env->clock.tod_ns - env->clock.clockid_ns;
+ }
#define SET(__n, __v) \
do { \
@@ -1394,8 +1412,8 @@ do { \
} while (0)
SET(frequency, 1000000000);
- SET(offset_s, 0);
- SET(offset, 0);
+ SET(offset, offset);
+ SET(description, desc);
SET(precision, 10);
SET(is_absolute, 0);
@@ -1481,7 +1499,8 @@ static void ctf_writer__cleanup(struct ctf_writer *cw)
memset(cw, 0, sizeof(*cw));
}
-static int ctf_writer__init(struct ctf_writer *cw, const char *path)
+static int ctf_writer__init(struct ctf_writer *cw, const char *path,
+ struct perf_session *session, bool tod)
{
struct bt_ctf_writer *writer;
struct bt_ctf_stream_class *stream_class;
@@ -1505,7 +1524,7 @@ static int ctf_writer__init(struct ctf_writer *cw, const char *path)
cw->clock = clock;
- if (ctf_writer__setup_clock(cw)) {
+ if (ctf_writer__setup_clock(cw, session, tod)) {
pr("Failed to setup CTF clock.\n");
goto err_cleanup;
}
@@ -1613,17 +1632,15 @@ int bt_convert__perf2ctf(const char *input, const char *path,
if (err)
return err;
- /* CTF writer */
- if (ctf_writer__init(cw, path))
- return -1;
-
err = -1;
/* perf.data session */
session = perf_session__new(&data, 0, &c.tool);
- if (IS_ERR(session)) {
- err = PTR_ERR(session);
- goto free_writer;
- }
+ if (IS_ERR(session))
+ return PTR_ERR(session);
+
+ /* CTF writer */
+ if (ctf_writer__init(cw, path, session, opts->tod))
+ goto free_session;
if (c.queue_size) {
ordered_events__set_alloc_size(&session->ordered_events,
@@ -1632,17 +1649,17 @@ int bt_convert__perf2ctf(const char *input, const char *path,
/* CTF writer env/clock setup */
if (ctf_writer__setup_env(cw, session))
- goto free_session;
+ goto free_writer;
/* CTF events setup */
if (setup_events(cw, session))
- goto free_session;
+ goto free_writer;
if (opts->all && setup_non_sample_events(cw, session))
- goto free_session;
+ goto free_writer;
if (setup_streams(cw, session))
- goto free_session;
+ goto free_writer;
err = perf_session__process_events(session);
if (!err)
@@ -1670,10 +1687,10 @@ int bt_convert__perf2ctf(const char *input, const char *path,
return err;
-free_session:
- perf_session__delete(session);
free_writer:
ctf_writer__cleanup(cw);
+free_session:
+ perf_session__delete(session);
pr_err("Error during conversion setup.\n");
return err;
}
diff --git a/tools/perf/util/data-convert.h b/tools/perf/util/data-convert.h
index af90b6076c06..feab5f114e37 100644
--- a/tools/perf/util/data-convert.h
+++ b/tools/perf/util/data-convert.h
@@ -5,6 +5,7 @@
struct perf_data_convert_opts {
bool force;
bool all;
+ bool tod;
};
#endif /* __DATA_CONVERT_H */
diff --git a/tools/perf/util/debug.c b/tools/perf/util/debug.c
index adb656745ecc..5cda5565777a 100644
--- a/tools/perf/util/debug.c
+++ b/tools/perf/util/debug.c
@@ -20,6 +20,7 @@
#include "target.h"
#include "ui/helpline.h"
#include "ui/ui.h"
+#include "util/parse-sublevel-options.h"
#include <linux/ctype.h>
@@ -173,65 +174,37 @@ void trace_event(union perf_event *event)
trace_event_printer, event);
}
-static struct debug_variable {
- const char *name;
- int *ptr;
-} debug_variables[] = {
- { .name = "verbose", .ptr = &verbose },
- { .name = "ordered-events", .ptr = &debug_ordered_events},
- { .name = "stderr", .ptr = &redirect_to_stderr},
- { .name = "data-convert", .ptr = &debug_data_convert },
- { .name = "perf-event-open", .ptr = &debug_peo_args },
+static struct sublevel_option debug_opts[] = {
+ { .name = "verbose", .value_ptr = &verbose },
+ { .name = "ordered-events", .value_ptr = &debug_ordered_events},
+ { .name = "stderr", .value_ptr = &redirect_to_stderr},
+ { .name = "data-convert", .value_ptr = &debug_data_convert },
+ { .name = "perf-event-open", .value_ptr = &debug_peo_args },
{ .name = NULL, }
};
int perf_debug_option(const char *str)
{
- struct debug_variable *var = &debug_variables[0];
- char *vstr, *s = strdup(str);
- int v = 1;
-
- vstr = strchr(s, '=');
- if (vstr)
- *vstr++ = 0;
-
- while (var->name) {
- if (!strcmp(s, var->name))
- break;
- var++;
- }
-
- if (!var->name) {
- pr_err("Unknown debug variable name '%s'\n", s);
- free(s);
- return -1;
- }
+ int ret;
- if (vstr) {
- v = atoi(vstr);
- /*
- * Allow only values in range (0, 10),
- * otherwise set 0.
- */
- v = (v < 0) || (v > 10) ? 0 : v;
- }
+ ret = perf_parse_sublevel_options(str, debug_opts);
+ if (ret)
+ return ret;
- if (quiet)
- v = -1;
+ /* Allow only verbose value in range (0, 10), otherwise set 0. */
+ verbose = (verbose < 0) || (verbose > 10) ? 0 : verbose;
- *var->ptr = v;
- free(s);
return 0;
}
int perf_quiet_option(void)
{
- struct debug_variable *var = &debug_variables[0];
+ struct sublevel_option *opt = &debug_opts[0];
/* disable all debug messages */
- while (var->name) {
- *var->ptr = -1;
- var++;
+ while (opt->name) {
+ *opt->value_ptr = -1;
+ opt++;
}
return 0;
diff --git a/tools/perf/util/dso.c b/tools/perf/util/dso.c
index 99f0a39c3c59..5a3b4755f0b3 100644
--- a/tools/perf/util/dso.c
+++ b/tools/perf/util/dso.c
@@ -208,6 +208,7 @@ int dso__read_binary_type_filename(const struct dso *dso,
case DSO_BINARY_TYPE__JAVA_JIT:
case DSO_BINARY_TYPE__BPF_PROG_INFO:
case DSO_BINARY_TYPE__BPF_IMAGE:
+ case DSO_BINARY_TYPE__OOL:
case DSO_BINARY_TYPE__NOT_FOUND:
ret = -1;
break;
@@ -898,6 +899,8 @@ static struct dso_cache *dso_cache__populate(struct dso *dso,
if (dso->binary_type == DSO_BINARY_TYPE__BPF_PROG_INFO)
*ret = bpf_read(dso, cache_offset, cache->data);
+ else if (dso->binary_type == DSO_BINARY_TYPE__OOL)
+ *ret = DSO__DATA_CACHE_SIZE;
else
*ret = file_read(dso, machine, cache_offset, cache->data);
@@ -1262,7 +1265,7 @@ struct dso *dso__new_id(const char *name, struct dso_id *id)
dso->has_build_id = 0;
dso->has_srcline = 1;
dso->a2l_fails = 1;
- dso->kernel = DSO_TYPE_USER;
+ dso->kernel = DSO_SPACE__USER;
dso->needs_swap = DSO_SWAP__UNSET;
dso->comp = COMP_ID__NONE;
RB_CLEAR_NODE(&dso->rb_node);
diff --git a/tools/perf/util/dso.h b/tools/perf/util/dso.h
index d3d03274b0d1..8ad17f395a19 100644
--- a/tools/perf/util/dso.h
+++ b/tools/perf/util/dso.h
@@ -42,13 +42,14 @@ enum dso_binary_type {
DSO_BINARY_TYPE__OPENEMBEDDED_DEBUGINFO,
DSO_BINARY_TYPE__BPF_PROG_INFO,
DSO_BINARY_TYPE__BPF_IMAGE,
+ DSO_BINARY_TYPE__OOL,
DSO_BINARY_TYPE__NOT_FOUND,
};
-enum dso_kernel_type {
- DSO_TYPE_USER = 0,
- DSO_TYPE_KERNEL,
- DSO_TYPE_GUEST_KERNEL
+enum dso_space_type {
+ DSO_SPACE__USER = 0,
+ DSO_SPACE__KERNEL,
+ DSO_SPACE__KERNEL_GUEST
};
enum dso_swap_type {
@@ -159,7 +160,7 @@ struct dso {
void *a2l;
char *symsrc_filename;
unsigned int a2l_fails;
- enum dso_kernel_type kernel;
+ enum dso_space_type kernel;
enum dso_swap_type needs_swap;
enum dso_binary_type symtab_type;
enum dso_binary_type binary_type;
diff --git a/tools/perf/util/env.h b/tools/perf/util/env.h
index 1ab2682d5d2b..a12972652006 100644
--- a/tools/perf/util/env.h
+++ b/tools/perf/util/env.h
@@ -77,7 +77,6 @@ struct perf_env {
struct numa_node *numa_nodes;
struct memory_node *memory_nodes;
unsigned long long memory_bsize;
- u64 clockid_res_ns;
/*
* bpf_info_lock protects bpf rbtrees. This is needed because the
@@ -100,6 +99,19 @@ struct perf_env {
/* For fast cpu to numa node lookup via perf_env__numa_node */
int *numa_map;
int nr_numa_map;
+
+ /* For real clock time reference. */
+ struct {
+ u64 tod_ns;
+ u64 clockid_ns;
+ u64 clockid_res_ns;
+ int clockid;
+ /*
+ * enabled is valid for report mode, and is true if above
+ * values are set, it's set in process_clock_data
+ */
+ bool enabled;
+ } clock;
};
enum perf_compress_type {
diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c
index f581550a3015..317a26571845 100644
--- a/tools/perf/util/event.c
+++ b/tools/perf/util/event.c
@@ -31,6 +31,7 @@
#include "stat.h"
#include "session.h"
#include "bpf-event.h"
+#include "print_binary.h"
#include "tool.h"
#include "../perf.h"
@@ -55,6 +56,7 @@ static const char *perf_event__names[] = {
[PERF_RECORD_KSYMBOL] = "KSYMBOL",
[PERF_RECORD_BPF_EVENT] = "BPF_EVENT",
[PERF_RECORD_CGROUP] = "CGROUP",
+ [PERF_RECORD_TEXT_POKE] = "TEXT_POKE",
[PERF_RECORD_HEADER_ATTR] = "ATTR",
[PERF_RECORD_HEADER_EVENT_TYPE] = "EVENT_TYPE",
[PERF_RECORD_HEADER_TRACING_DATA] = "TRACING_DATA",
@@ -267,6 +269,14 @@ int perf_event__process_bpf(struct perf_tool *tool __maybe_unused,
return machine__process_bpf(machine, event, sample);
}
+int perf_event__process_text_poke(struct perf_tool *tool __maybe_unused,
+ union perf_event *event,
+ struct perf_sample *sample,
+ struct machine *machine)
+{
+ return machine__process_text_poke(machine, event, sample);
+}
+
size_t perf_event__fprintf_mmap(union perf_event *event, FILE *fp)
{
return fprintf(fp, " %d/%d: [%#" PRI_lx64 "(%#" PRI_lx64 ") @ %#" PRI_lx64 "]: %c %s\n",
@@ -413,7 +423,52 @@ size_t perf_event__fprintf_bpf(union perf_event *event, FILE *fp)
event->bpf.type, event->bpf.flags, event->bpf.id);
}
-size_t perf_event__fprintf(union perf_event *event, FILE *fp)
+static int text_poke_printer(enum binary_printer_ops op, unsigned int val,
+ void *extra, FILE *fp)
+{
+ bool old = *(bool *)extra;
+
+ switch ((int)op) {
+ case BINARY_PRINT_LINE_BEGIN:
+ return fprintf(fp, " %s bytes:", old ? "Old" : "New");
+ case BINARY_PRINT_NUM_DATA:
+ return fprintf(fp, " %02x", val);
+ case BINARY_PRINT_LINE_END:
+ return fprintf(fp, "\n");
+ default:
+ return 0;
+ }
+}
+
+size_t perf_event__fprintf_text_poke(union perf_event *event, struct machine *machine, FILE *fp)
+{
+ struct perf_record_text_poke_event *tp = &event->text_poke;
+ size_t ret;
+ bool old;
+
+ ret = fprintf(fp, " %" PRI_lx64 " ", tp->addr);
+ if (machine) {
+ struct addr_location al;
+
+ al.map = maps__find(&machine->kmaps, tp->addr);
+ if (al.map && map__load(al.map) >= 0) {
+ al.addr = al.map->map_ip(al.map, tp->addr);
+ al.sym = map__find_symbol(al.map, al.addr);
+ if (al.sym)
+ ret += symbol__fprintf_symname_offs(al.sym, &al, fp);
+ }
+ }
+ ret += fprintf(fp, " old len %u new len %u\n", tp->old_len, tp->new_len);
+ old = true;
+ ret += binary__fprintf(tp->bytes, tp->old_len, 16, text_poke_printer,
+ &old, fp);
+ old = false;
+ ret += binary__fprintf(tp->bytes + tp->old_len, tp->new_len, 16,
+ text_poke_printer, &old, fp);
+ return ret;
+}
+
+size_t perf_event__fprintf(union perf_event *event, struct machine *machine, FILE *fp)
{
size_t ret = fprintf(fp, "PERF_RECORD_%s",
perf_event__name(event->header.type));
@@ -457,6 +512,9 @@ size_t perf_event__fprintf(union perf_event *event, FILE *fp)
case PERF_RECORD_BPF_EVENT:
ret += perf_event__fprintf_bpf(event, fp);
break;
+ case PERF_RECORD_TEXT_POKE:
+ ret += perf_event__fprintf_text_poke(event, machine, fp);
+ break;
default:
ret += fprintf(fp, "\n");
}
diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h
index 6ae01c3c2ffa..b828b99176f4 100644
--- a/tools/perf/util/event.h
+++ b/tools/perf/util/event.h
@@ -351,6 +351,10 @@ int perf_event__process_bpf(struct perf_tool *tool,
union perf_event *event,
struct perf_sample *sample,
struct machine *machine);
+int perf_event__process_text_poke(struct perf_tool *tool,
+ union perf_event *event,
+ struct perf_sample *sample,
+ struct machine *machine);
int perf_event__process(struct perf_tool *tool,
union perf_event *event,
struct perf_sample *sample,
@@ -385,7 +389,8 @@ size_t perf_event__fprintf_namespaces(union perf_event *event, FILE *fp);
size_t perf_event__fprintf_cgroup(union perf_event *event, FILE *fp);
size_t perf_event__fprintf_ksymbol(union perf_event *event, FILE *fp);
size_t perf_event__fprintf_bpf(union perf_event *event, FILE *fp);
-size_t perf_event__fprintf(union perf_event *event, FILE *fp);
+size_t perf_event__fprintf_text_poke(union perf_event *event, struct machine *machine,FILE *fp);
+size_t perf_event__fprintf(union perf_event *event, struct machine *machine, FILE *fp);
int kallsyms__get_function_start(const char *kallsyms_filename,
const char *symbol_name, u64 *addr);
diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c
index ab48be4cf258..c0768c61eb43 100644
--- a/tools/perf/util/evlist.c
+++ b/tools/perf/util/evlist.c
@@ -63,6 +63,9 @@ void evlist__init(struct evlist *evlist, struct perf_cpu_map *cpus,
perf_evlist__set_maps(&evlist->core, cpus, threads);
evlist->workload.pid = -1;
evlist->bkw_mmap_state = BKW_MMAP_NOTREADY;
+ evlist->ctl_fd.fd = -1;
+ evlist->ctl_fd.ack = -1;
+ evlist->ctl_fd.pos = -1;
}
struct evlist *evlist__new(void)
@@ -79,7 +82,7 @@ struct evlist *perf_evlist__new_default(void)
{
struct evlist *evlist = evlist__new();
- if (evlist && perf_evlist__add_default(evlist)) {
+ if (evlist && evlist__add_default(evlist)) {
evlist__delete(evlist);
evlist = NULL;
}
@@ -91,7 +94,7 @@ struct evlist *perf_evlist__new_dummy(void)
{
struct evlist *evlist = evlist__new();
- if (evlist && perf_evlist__add_dummy(evlist)) {
+ if (evlist && evlist__add_dummy(evlist)) {
evlist__delete(evlist);
evlist = NULL;
}
@@ -231,7 +234,7 @@ void perf_evlist__set_leader(struct evlist *evlist)
}
}
-int __perf_evlist__add_default(struct evlist *evlist, bool precise)
+int __evlist__add_default(struct evlist *evlist, bool precise)
{
struct evsel *evsel = evsel__new_cycles(precise);
@@ -242,7 +245,7 @@ int __perf_evlist__add_default(struct evlist *evlist, bool precise)
return 0;
}
-int perf_evlist__add_dummy(struct evlist *evlist)
+int evlist__add_dummy(struct evlist *evlist)
{
struct perf_event_attr attr = {
.type = PERF_TYPE_SOFTWARE,
@@ -258,8 +261,7 @@ int perf_evlist__add_dummy(struct evlist *evlist)
return 0;
}
-static int evlist__add_attrs(struct evlist *evlist,
- struct perf_event_attr *attrs, size_t nr_attrs)
+static int evlist__add_attrs(struct evlist *evlist, struct perf_event_attr *attrs, size_t nr_attrs)
{
struct evsel *evsel, *n;
LIST_HEAD(head);
@@ -282,8 +284,7 @@ out_delete_partial_list:
return -1;
}
-int __perf_evlist__add_default_attrs(struct evlist *evlist,
- struct perf_event_attr *attrs, size_t nr_attrs)
+int __evlist__add_default_attrs(struct evlist *evlist, struct perf_event_attr *attrs, size_t nr_attrs)
{
size_t i;
@@ -322,8 +323,7 @@ perf_evlist__find_tracepoint_by_name(struct evlist *evlist,
return NULL;
}
-int perf_evlist__add_newtp(struct evlist *evlist,
- const char *sys, const char *name, void *handler)
+int evlist__add_newtp(struct evlist *evlist, const char *sys, const char *name, void *handler)
{
struct evsel *evsel = evsel__newtp(sys, name);
@@ -500,7 +500,7 @@ int perf_evlist__enable_event_idx(struct evlist *evlist,
int evlist__add_pollfd(struct evlist *evlist, int fd)
{
- return perf_evlist__add_pollfd(&evlist->core, fd, NULL, POLLIN);
+ return perf_evlist__add_pollfd(&evlist->core, fd, NULL, POLLIN, fdarray_flag__default);
}
int evlist__filter_pollfd(struct evlist *evlist, short revents_and_mask)
@@ -540,7 +540,7 @@ struct evsel *perf_evlist__id2evsel(struct evlist *evlist, u64 id)
if (sid)
return container_of(sid->evsel, struct evsel, core);
- if (!perf_evlist__sample_id_all(evlist))
+ if (!evlist__sample_id_all(evlist))
return evlist__first(evlist);
return NULL;
@@ -946,6 +946,10 @@ int perf_evlist__create_maps(struct evlist *evlist, struct target *target)
perf_evlist__set_maps(&evlist->core, cpus, threads);
+ /* as evlist now has references, put count here */
+ perf_cpu_map__put(cpus);
+ perf_thread_map__put(threads);
+
return 0;
out_delete_threads:
@@ -1088,7 +1092,7 @@ int perf_evlist__append_tp_filter_pid(struct evlist *evlist, pid_t pid)
return perf_evlist__append_tp_filter_pids(evlist, 1, &pid);
}
-bool perf_evlist__valid_sample_type(struct evlist *evlist)
+bool evlist__valid_sample_type(struct evlist *evlist)
{
struct evsel *pos;
@@ -1107,7 +1111,7 @@ bool perf_evlist__valid_sample_type(struct evlist *evlist)
return true;
}
-u64 __perf_evlist__combined_sample_type(struct evlist *evlist)
+u64 __evlist__combined_sample_type(struct evlist *evlist)
{
struct evsel *evsel;
@@ -1120,13 +1124,13 @@ u64 __perf_evlist__combined_sample_type(struct evlist *evlist)
return evlist->combined_sample_type;
}
-u64 perf_evlist__combined_sample_type(struct evlist *evlist)
+u64 evlist__combined_sample_type(struct evlist *evlist)
{
evlist->combined_sample_type = 0;
- return __perf_evlist__combined_sample_type(evlist);
+ return __evlist__combined_sample_type(evlist);
}
-u64 perf_evlist__combined_branch_type(struct evlist *evlist)
+u64 evlist__combined_branch_type(struct evlist *evlist)
{
struct evsel *evsel;
u64 branch_type = 0;
@@ -1191,7 +1195,7 @@ out:
return size;
}
-bool perf_evlist__valid_sample_id_all(struct evlist *evlist)
+bool evlist__valid_sample_id_all(struct evlist *evlist)
{
struct evsel *first = evlist__first(evlist), *pos = first;
@@ -1203,7 +1207,7 @@ bool perf_evlist__valid_sample_id_all(struct evlist *evlist)
return true;
}
-bool perf_evlist__sample_id_all(struct evlist *evlist)
+bool evlist__sample_id_all(struct evlist *evlist)
{
struct evsel *first = evlist__first(evlist);
return first->core.attr.sample_id_all;
@@ -1273,11 +1277,12 @@ static int perf_evlist__create_syswide_maps(struct evlist *evlist)
goto out_put;
perf_evlist__set_maps(&evlist->core, cpus, threads);
-out:
- return err;
+
+ perf_thread_map__put(threads);
out_put:
perf_cpu_map__put(cpus);
- goto out;
+out:
+ return err;
}
int evlist__open(struct evlist *evlist)
@@ -1464,8 +1469,7 @@ int perf_evlist__parse_sample_timestamp(struct evlist *evlist,
return evsel__parse_sample_timestamp(evsel, event, timestamp);
}
-int perf_evlist__strerror_open(struct evlist *evlist,
- int err, char *buf, size_t size)
+int evlist__strerror_open(struct evlist *evlist, int err, char *buf, size_t size)
{
int printed, value;
char sbuf[STRERR_BUFSIZE], *emsg = str_error_r(err, sbuf, sizeof(sbuf));
@@ -1518,7 +1522,7 @@ out_default:
return 0;
}
-int perf_evlist__strerror_mmap(struct evlist *evlist, int err, char *buf, size_t size)
+int evlist__strerror_mmap(struct evlist *evlist, int err, char *buf, size_t size)
{
char sbuf[STRERR_BUFSIZE], *emsg = str_error_r(err, sbuf, sizeof(sbuf));
int pages_attempted = evlist->core.mmap_len / 1024, pages_max_per_user, printed = 0;
@@ -1727,3 +1731,143 @@ struct evsel *perf_evlist__reset_weak_group(struct evlist *evsel_list,
}
return leader;
}
+
+int evlist__initialize_ctlfd(struct evlist *evlist, int fd, int ack)
+{
+ if (fd == -1) {
+ pr_debug("Control descriptor is not initialized\n");
+ return 0;
+ }
+
+ evlist->ctl_fd.pos = perf_evlist__add_pollfd(&evlist->core, fd, NULL, POLLIN,
+ fdarray_flag__nonfilterable);
+ if (evlist->ctl_fd.pos < 0) {
+ evlist->ctl_fd.pos = -1;
+ pr_err("Failed to add ctl fd entry: %m\n");
+ return -1;
+ }
+
+ evlist->ctl_fd.fd = fd;
+ evlist->ctl_fd.ack = ack;
+
+ return 0;
+}
+
+bool evlist__ctlfd_initialized(struct evlist *evlist)
+{
+ return evlist->ctl_fd.pos >= 0;
+}
+
+int evlist__finalize_ctlfd(struct evlist *evlist)
+{
+ struct pollfd *entries = evlist->core.pollfd.entries;
+
+ if (!evlist__ctlfd_initialized(evlist))
+ return 0;
+
+ entries[evlist->ctl_fd.pos].fd = -1;
+ entries[evlist->ctl_fd.pos].events = 0;
+ entries[evlist->ctl_fd.pos].revents = 0;
+
+ evlist->ctl_fd.pos = -1;
+ evlist->ctl_fd.ack = -1;
+ evlist->ctl_fd.fd = -1;
+
+ return 0;
+}
+
+static int evlist__ctlfd_recv(struct evlist *evlist, enum evlist_ctl_cmd *cmd,
+ char *cmd_data, size_t data_size)
+{
+ int err;
+ char c;
+ size_t bytes_read = 0;
+
+ memset(cmd_data, 0, data_size);
+ data_size--;
+
+ do {
+ err = read(evlist->ctl_fd.fd, &c, 1);
+ if (err > 0) {
+ if (c == '\n' || c == '\0')
+ break;
+ cmd_data[bytes_read++] = c;
+ if (bytes_read == data_size)
+ break;
+ } else {
+ if (err == -1)
+ pr_err("Failed to read from ctlfd %d: %m\n", evlist->ctl_fd.fd);
+ break;
+ }
+ } while (1);
+
+ pr_debug("Message from ctl_fd: \"%s%s\"\n", cmd_data,
+ bytes_read == data_size ? "" : c == '\n' ? "\\n" : "\\0");
+
+ if (err > 0) {
+ if (!strncmp(cmd_data, EVLIST_CTL_CMD_ENABLE_TAG,
+ (sizeof(EVLIST_CTL_CMD_ENABLE_TAG)-1))) {
+ *cmd = EVLIST_CTL_CMD_ENABLE;
+ } else if (!strncmp(cmd_data, EVLIST_CTL_CMD_DISABLE_TAG,
+ (sizeof(EVLIST_CTL_CMD_DISABLE_TAG)-1))) {
+ *cmd = EVLIST_CTL_CMD_DISABLE;
+ }
+ }
+
+ return err;
+}
+
+static int evlist__ctlfd_ack(struct evlist *evlist)
+{
+ int err;
+
+ if (evlist->ctl_fd.ack == -1)
+ return 0;
+
+ err = write(evlist->ctl_fd.ack, EVLIST_CTL_CMD_ACK_TAG,
+ sizeof(EVLIST_CTL_CMD_ACK_TAG));
+ if (err == -1)
+ pr_err("failed to write to ctl_ack_fd %d: %m\n", evlist->ctl_fd.ack);
+
+ return err;
+}
+
+int evlist__ctlfd_process(struct evlist *evlist, enum evlist_ctl_cmd *cmd)
+{
+ int err = 0;
+ char cmd_data[EVLIST_CTL_CMD_MAX_LEN];
+ int ctlfd_pos = evlist->ctl_fd.pos;
+ struct pollfd *entries = evlist->core.pollfd.entries;
+
+ if (!evlist__ctlfd_initialized(evlist) || !entries[ctlfd_pos].revents)
+ return 0;
+
+ if (entries[ctlfd_pos].revents & POLLIN) {
+ err = evlist__ctlfd_recv(evlist, cmd, cmd_data,
+ EVLIST_CTL_CMD_MAX_LEN);
+ if (err > 0) {
+ switch (*cmd) {
+ case EVLIST_CTL_CMD_ENABLE:
+ evlist__enable(evlist);
+ break;
+ case EVLIST_CTL_CMD_DISABLE:
+ evlist__disable(evlist);
+ break;
+ case EVLIST_CTL_CMD_ACK:
+ case EVLIST_CTL_CMD_UNSUPPORTED:
+ default:
+ pr_debug("ctlfd: unsupported %d\n", *cmd);
+ break;
+ }
+ if (!(*cmd == EVLIST_CTL_CMD_ACK || *cmd == EVLIST_CTL_CMD_UNSUPPORTED))
+ evlist__ctlfd_ack(evlist);
+ }
+ }
+
+ if (entries[ctlfd_pos].revents & (POLLHUP | POLLERR))
+ evlist__finalize_ctlfd(evlist);
+ else
+ entries[ctlfd_pos].revents = 0;
+
+ return err;
+}
diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h
index a8081dfc19cf..c73f7f7f120b 100644
--- a/tools/perf/util/evlist.h
+++ b/tools/perf/util/evlist.h
@@ -74,6 +74,11 @@ struct evlist {
pthread_t th;
volatile int done;
} thread;
+ struct {
+ int fd; /* control file descriptor */
+ int ack; /* ack file descriptor for control commands */
+ int pos; /* index at evlist core object to check signals */
+ } ctl_fd;
};
struct evsel_str_handler {
@@ -92,20 +97,20 @@ void evlist__delete(struct evlist *evlist);
void evlist__add(struct evlist *evlist, struct evsel *entry);
void evlist__remove(struct evlist *evlist, struct evsel *evsel);
-int __perf_evlist__add_default(struct evlist *evlist, bool precise);
+int __evlist__add_default(struct evlist *evlist, bool precise);
-static inline int perf_evlist__add_default(struct evlist *evlist)
+static inline int evlist__add_default(struct evlist *evlist)
{
- return __perf_evlist__add_default(evlist, true);
+ return __evlist__add_default(evlist, true);
}
-int __perf_evlist__add_default_attrs(struct evlist *evlist,
+int __evlist__add_default_attrs(struct evlist *evlist,
struct perf_event_attr *attrs, size_t nr_attrs);
-#define perf_evlist__add_default_attrs(evlist, array) \
- __perf_evlist__add_default_attrs(evlist, array, ARRAY_SIZE(array))
+#define evlist__add_default_attrs(evlist, array) \
+ __evlist__add_default_attrs(evlist, array, ARRAY_SIZE(array))
-int perf_evlist__add_dummy(struct evlist *evlist);
+int evlist__add_dummy(struct evlist *evlist);
int perf_evlist__add_sb_event(struct evlist *evlist,
struct perf_event_attr *attr,
@@ -116,8 +121,7 @@ int perf_evlist__start_sb_thread(struct evlist *evlist,
struct target *target);
void perf_evlist__stop_sb_thread(struct evlist *evlist);
-int perf_evlist__add_newtp(struct evlist *evlist,
- const char *sys, const char *name, void *handler);
+int evlist__add_newtp(struct evlist *evlist, const char *sys, const char *name, void *handler);
int __evlist__set_tracepoints_handlers(struct evlist *evlist,
const struct evsel_str_handler *assocs,
@@ -219,10 +223,10 @@ int perf_evlist__apply_filters(struct evlist *evlist, struct evsel **err_evsel);
void __perf_evlist__set_leader(struct list_head *list);
void perf_evlist__set_leader(struct evlist *evlist);
-u64 __perf_evlist__combined_sample_type(struct evlist *evlist);
-u64 perf_evlist__combined_sample_type(struct evlist *evlist);
-u64 perf_evlist__combined_branch_type(struct evlist *evlist);
-bool perf_evlist__sample_id_all(struct evlist *evlist);
+u64 __evlist__combined_sample_type(struct evlist *evlist);
+u64 evlist__combined_sample_type(struct evlist *evlist);
+u64 evlist__combined_branch_type(struct evlist *evlist);
+bool evlist__sample_id_all(struct evlist *evlist);
u16 perf_evlist__id_hdr_size(struct evlist *evlist);
int perf_evlist__parse_sample(struct evlist *evlist, union perf_event *event,
@@ -232,8 +236,8 @@ int perf_evlist__parse_sample_timestamp(struct evlist *evlist,
union perf_event *event,
u64 *timestamp);
-bool perf_evlist__valid_sample_type(struct evlist *evlist);
-bool perf_evlist__valid_sample_id_all(struct evlist *evlist);
+bool evlist__valid_sample_type(struct evlist *evlist);
+bool evlist__valid_sample_id_all(struct evlist *evlist);
bool perf_evlist__valid_read_format(struct evlist *evlist);
void perf_evlist__splice_list_tail(struct evlist *evlist,
@@ -258,8 +262,8 @@ static inline struct evsel *evlist__last(struct evlist *evlist)
return container_of(evsel, struct evsel, core);
}
-int perf_evlist__strerror_open(struct evlist *evlist, int err, char *buf, size_t size);
-int perf_evlist__strerror_mmap(struct evlist *evlist, int err, char *buf, size_t size);
+int evlist__strerror_open(struct evlist *evlist, int err, char *buf, size_t size);
+int evlist__strerror_mmap(struct evlist *evlist, int err, char *buf, size_t size);
bool perf_evlist__can_select_event(struct evlist *evlist, const char *str);
void perf_evlist__to_front(struct evlist *evlist,
@@ -356,4 +360,25 @@ void perf_evlist__force_leader(struct evlist *evlist);
struct evsel *perf_evlist__reset_weak_group(struct evlist *evlist,
struct evsel *evsel,
bool close);
+#define EVLIST_CTL_CMD_ENABLE_TAG "enable"
+#define EVLIST_CTL_CMD_DISABLE_TAG "disable"
+#define EVLIST_CTL_CMD_ACK_TAG "ack\n"
+
+#define EVLIST_CTL_CMD_MAX_LEN 64
+
+enum evlist_ctl_cmd {
+ EVLIST_CTL_CMD_UNSUPPORTED = 0,
+ EVLIST_CTL_CMD_ENABLE,
+ EVLIST_CTL_CMD_DISABLE,
+ EVLIST_CTL_CMD_ACK
+};
+
+int evlist__initialize_ctlfd(struct evlist *evlist, int ctl_fd, int ctl_fd_ack);
+int evlist__finalize_ctlfd(struct evlist *evlist);
+bool evlist__ctlfd_initialized(struct evlist *evlist);
+int evlist__ctlfd_process(struct evlist *evlist, enum evlist_ctl_cmd *cmd);
+
+#define EVLIST_ENABLED_MSG "Events enabled\n"
+#define EVLIST_DISABLED_MSG "Events disabled\n"
+
#endif /* __PERF_EVLIST_H */
diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index ef802f6d40c1..459b51e90063 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -976,16 +976,20 @@ void evsel__config(struct evsel *evsel, struct record_opts *opts,
* We default some events to have a default interval. But keep
* it a weak assumption overridable by the user.
*/
- if (!attr->sample_period || (opts->user_freq != UINT_MAX ||
- opts->user_interval != ULLONG_MAX)) {
+ if (!attr->sample_period) {
if (opts->freq) {
- evsel__set_sample_bit(evsel, PERIOD);
attr->freq = 1;
attr->sample_freq = opts->freq;
} else {
attr->sample_period = opts->default_interval;
}
}
+ /*
+ * If attr->freq was set (here or earlier), ask for period
+ * to be sampled.
+ */
+ if (attr->freq)
+ evsel__set_sample_bit(evsel, PERIOD);
if (opts->no_samples)
attr->sample_freq = 0;
@@ -1014,12 +1018,14 @@ void evsel__config(struct evsel *evsel, struct record_opts *opts,
if (callchain && callchain->enabled && !evsel->no_aux_samples)
evsel__config_callchain(evsel, opts, callchain);
- if (opts->sample_intr_regs && !evsel->no_aux_samples) {
+ if (opts->sample_intr_regs && !evsel->no_aux_samples &&
+ !evsel__is_dummy_event(evsel)) {
attr->sample_regs_intr = opts->sample_intr_regs;
evsel__set_sample_bit(evsel, REGS_INTR);
}
- if (opts->sample_user_regs && !evsel->no_aux_samples) {
+ if (opts->sample_user_regs && !evsel->no_aux_samples &&
+ !evsel__is_dummy_event(evsel)) {
attr->sample_regs_user |= opts->sample_user_regs;
evsel__set_sample_bit(evsel, REGS_USER);
}
@@ -1064,7 +1070,12 @@ void evsel__config(struct evsel *evsel, struct record_opts *opts,
attr->mmap = track;
attr->mmap2 = track && !perf_missing_features.mmap2;
attr->comm = track;
- attr->ksymbol = track && !perf_missing_features.ksymbol;
+ /*
+ * ksymbol is tracked separately with text poke because it needs to be
+ * system wide and enabled immediately.
+ */
+ if (!opts->text_poke)
+ attr->ksymbol = track && !perf_missing_features.ksymbol;
attr->bpf_event = track && !opts->no_bpf_event && !perf_missing_features.bpf;
if (opts->record_namespaces)
@@ -2495,8 +2506,10 @@ int evsel__open_strerror(struct evsel *evsel, struct target *target,
return scnprintf(msg + printed, size - printed,
"Consider adjusting /proc/sys/kernel/perf_event_paranoid setting to open\n"
- "access to performance monitoring and observability operations for users\n"
- "without CAP_PERFMON or CAP_SYS_ADMIN Linux capability.\n"
+ "access to performance monitoring and observability operations for processes\n"
+ "without CAP_PERFMON, CAP_SYS_PTRACE or CAP_SYS_ADMIN Linux capability.\n"
+ "More information can be found at 'Perf events and tool security' document:\n"
+ "https://www.kernel.org/doc/html/latest/admin-guide/perf-security.html\n"
"perf_event_paranoid setting is %d:\n"
" -1: Allow use of (almost) all events by all users\n"
" Ignore mlock limit after perf_event_mlock_kb without CAP_IPC_LOCK\n"
@@ -2528,6 +2541,10 @@ int evsel__open_strerror(struct evsel *evsel, struct target *target,
"No such device - did you specify an out-of-range profile CPU?");
break;
case EOPNOTSUPP:
+ if (evsel->core.attr.aux_output)
+ return scnprintf(msg, size,
+ "%s: PMU Hardware doesn't support 'aux_output' feature",
+ evsel__name(evsel));
if (evsel->core.attr.sample_period != 0)
return scnprintf(msg, size,
"%s: PMU Hardware doesn't support sampling/overflow-interrupts. Try 'perf stat'",
diff --git a/tools/perf/util/expr.c b/tools/perf/util/expr.c
index f64ab91c432b..53482ef53c41 100644
--- a/tools/perf/util/expr.c
+++ b/tools/perf/util/expr.c
@@ -1,10 +1,17 @@
// SPDX-License-Identifier: GPL-2.0
#include <stdbool.h>
#include <assert.h>
+#include <errno.h>
+#include <stdlib.h>
+#include <string.h>
+#include "metricgroup.h"
+#include "debug.h"
#include "expr.h"
#include "expr-bison.h"
#include "expr-flex.h"
#include <linux/kernel.h>
+#include <linux/zalloc.h>
+#include <ctype.h>
#ifdef PARSER_DEBUG
extern int expr_debug;
@@ -30,35 +37,144 @@ static bool key_equal(const void *key1, const void *key2,
}
/* Caller must make sure id is allocated */
-int expr__add_id(struct expr_parse_ctx *ctx, const char *name, double val)
+int expr__add_id(struct expr_parse_ctx *ctx, const char *id)
{
- double *val_ptr = NULL, *old_val = NULL;
+ struct expr_id_data *data_ptr = NULL, *old_data = NULL;
char *old_key = NULL;
int ret;
- if (val != 0.0) {
- val_ptr = malloc(sizeof(double));
- if (!val_ptr)
- return -ENOMEM;
- *val_ptr = val;
+ data_ptr = malloc(sizeof(*data_ptr));
+ if (!data_ptr)
+ return -ENOMEM;
+
+ data_ptr->parent = ctx->parent;
+
+ ret = hashmap__set(&ctx->ids, id, data_ptr,
+ (const void **)&old_key, (void **)&old_data);
+ if (ret)
+ free(data_ptr);
+ free(old_key);
+ free(old_data);
+ return ret;
+}
+
+/* Caller must make sure id is allocated */
+int expr__add_id_val(struct expr_parse_ctx *ctx, const char *id, double val)
+{
+ struct expr_id_data *data_ptr = NULL, *old_data = NULL;
+ char *old_key = NULL;
+ int ret;
+
+ data_ptr = malloc(sizeof(*data_ptr));
+ if (!data_ptr)
+ return -ENOMEM;
+ data_ptr->val = val;
+ data_ptr->is_ref = false;
+
+ ret = hashmap__set(&ctx->ids, id, data_ptr,
+ (const void **)&old_key, (void **)&old_data);
+ if (ret)
+ free(data_ptr);
+ free(old_key);
+ free(old_data);
+ return ret;
+}
+
+int expr__add_ref(struct expr_parse_ctx *ctx, struct metric_ref *ref)
+{
+ struct expr_id_data *data_ptr = NULL, *old_data = NULL;
+ char *old_key = NULL;
+ char *name, *p;
+ int ret;
+
+ data_ptr = zalloc(sizeof(*data_ptr));
+ if (!data_ptr)
+ return -ENOMEM;
+
+ name = strdup(ref->metric_name);
+ if (!name) {
+ free(data_ptr);
+ return -ENOMEM;
}
- ret = hashmap__set(&ctx->ids, name, val_ptr,
- (const void **)&old_key, (void **)&old_val);
+
+ /*
+ * The jevents tool converts all metric expressions
+ * to lowercase, including metric references, hence
+ * we need to add lowercase name for metric, so it's
+ * properly found.
+ */
+ for (p = name; *p; p++)
+ *p = tolower(*p);
+
+ /*
+ * Intentionally passing just const char pointers,
+ * originally from 'struct pmu_event' object.
+ * We don't need to change them, so there's no
+ * need to create our own copy.
+ */
+ data_ptr->ref.metric_name = ref->metric_name;
+ data_ptr->ref.metric_expr = ref->metric_expr;
+ data_ptr->ref.counted = false;
+ data_ptr->is_ref = true;
+
+ ret = hashmap__set(&ctx->ids, name, data_ptr,
+ (const void **)&old_key, (void **)&old_data);
+ if (ret)
+ free(data_ptr);
+
+ pr_debug2("adding ref metric %s: %s\n",
+ ref->metric_name, ref->metric_expr);
+
free(old_key);
- free(old_val);
+ free(old_data);
return ret;
}
-int expr__get_id(struct expr_parse_ctx *ctx, const char *id, double *val_ptr)
+int expr__get_id(struct expr_parse_ctx *ctx, const char *id,
+ struct expr_id_data **data)
+{
+ return hashmap__find(&ctx->ids, id, (void **)data) ? 0 : -1;
+}
+
+int expr__resolve_id(struct expr_parse_ctx *ctx, const char *id,
+ struct expr_id_data **datap)
{
- double *data;
+ struct expr_id_data *data;
- if (!hashmap__find(&ctx->ids, id, (void **)&data))
+ if (expr__get_id(ctx, id, datap) || !*datap) {
+ pr_debug("%s not found\n", id);
return -1;
- *val_ptr = (data == NULL) ? 0.0 : *data;
+ }
+
+ data = *datap;
+
+ pr_debug2("lookup: is_ref %d, counted %d, val %f: %s\n",
+ data->is_ref, data->ref.counted, data->val, id);
+
+ if (data->is_ref && !data->ref.counted) {
+ data->ref.counted = true;
+ pr_debug("processing metric: %s ENTRY\n", id);
+ if (expr__parse(&data->val, ctx, data->ref.metric_expr, 1)) {
+ pr_debug("%s failed to count\n", id);
+ return -1;
+ }
+ pr_debug("processing metric: %s EXIT: %f\n", id, data->val);
+ }
+
return 0;
}
+void expr__del_id(struct expr_parse_ctx *ctx, const char *id)
+{
+ struct expr_id_data *old_val = NULL;
+ char *old_key = NULL;
+
+ hashmap__delete(&ctx->ids, id,
+ (const void **)&old_key, (void **)&old_val);
+ free(old_key);
+ free(old_val);
+}
+
void expr__ctx_init(struct expr_parse_ctx *ctx)
{
hashmap__init(&ctx->ids, key_hash, key_equal, NULL);
@@ -88,6 +204,8 @@ __expr__parse(double *val, struct expr_parse_ctx *ctx, const char *expr,
void *scanner;
int ret;
+ pr_debug2("parsing metric: %s\n", expr);
+
ret = expr_lex_init_extra(&scanner_ctx, &scanner);
if (ret)
return ret;
@@ -116,16 +234,10 @@ int expr__parse(double *final_val, struct expr_parse_ctx *ctx,
int expr__find_other(const char *expr, const char *one,
struct expr_parse_ctx *ctx, int runtime)
{
- double *old_val = NULL;
- char *old_key = NULL;
int ret = __expr__parse(NULL, ctx, expr, EXPR_OTHER, runtime);
- if (one) {
- hashmap__delete(&ctx->ids, one,
- (const void **)&old_key, (void **)&old_val);
- free(old_key);
- free(old_val);
- }
+ if (one)
+ expr__del_id(ctx, one);
return ret;
}
diff --git a/tools/perf/util/expr.h b/tools/perf/util/expr.h
index 8a2c1074f90f..fc2b5e824a66 100644
--- a/tools/perf/util/expr.h
+++ b/tools/perf/util/expr.h
@@ -11,8 +11,30 @@
#include "util/hashmap.h"
//#endif
+struct metric_ref;
+
+struct expr_id {
+ char *id;
+ struct expr_id *parent;
+};
+
struct expr_parse_ctx {
- struct hashmap ids;
+ struct hashmap ids;
+ struct expr_id *parent;
+};
+
+struct expr_id_data {
+ union {
+ double val;
+ struct {
+ const char *metric_name;
+ const char *metric_expr;
+ bool counted;
+ } ref;
+ struct expr_id *parent;
+ };
+
+ bool is_ref;
};
struct expr_scanner_ctx {
@@ -22,8 +44,14 @@ struct expr_scanner_ctx {
void expr__ctx_init(struct expr_parse_ctx *ctx);
void expr__ctx_clear(struct expr_parse_ctx *ctx);
-int expr__add_id(struct expr_parse_ctx *ctx, const char *id, double val);
-int expr__get_id(struct expr_parse_ctx *ctx, const char *id, double *val_ptr);
+void expr__del_id(struct expr_parse_ctx *ctx, const char *id);
+int expr__add_id(struct expr_parse_ctx *ctx, const char *id);
+int expr__add_id_val(struct expr_parse_ctx *ctx, const char *id, double val);
+int expr__add_ref(struct expr_parse_ctx *ctx, struct metric_ref *ref);
+int expr__get_id(struct expr_parse_ctx *ctx, const char *id,
+ struct expr_id_data **data);
+int expr__resolve_id(struct expr_parse_ctx *ctx, const char *id,
+ struct expr_id_data **datap);
int expr__parse(double *final_val, struct expr_parse_ctx *ctx,
const char *expr, int runtime);
int expr__find_other(const char *expr, const char *one,
diff --git a/tools/perf/util/expr.l b/tools/perf/util/expr.l
index f397bf8b1a48..13e5e3c75f56 100644
--- a/tools/perf/util/expr.l
+++ b/tools/perf/util/expr.l
@@ -100,6 +100,7 @@ symbol ({spec}|{sym})+
}
}
+d_ratio { return D_RATIO; }
max { return MAX; }
min { return MIN; }
if { return IF; }
@@ -110,6 +111,8 @@ else { return ELSE; }
"|" { return '|'; }
"^" { return '^'; }
"&" { return '&'; }
+"<" { return '<'; }
+">" { return '>'; }
"-" { return '-'; }
"+" { return '+'; }
"*" { return '*'; }
diff --git a/tools/perf/util/expr.y b/tools/perf/util/expr.y
index bf3e898e3055..d34b370391c6 100644
--- a/tools/perf/util/expr.y
+++ b/tools/perf/util/expr.y
@@ -10,6 +10,14 @@
#include "smt.h"
#include <string.h>
+static double d_ratio(double val0, double val1)
+{
+ if (val1 == 0) {
+ return 0;
+ }
+ return val0 / val1;
+}
+
%}
%define api.pure full
@@ -28,11 +36,12 @@
%token <num> NUMBER
%token <str> ID
%destructor { free ($$); } <str>
-%token MIN MAX IF ELSE SMT_ON
+%token MIN MAX IF ELSE SMT_ON D_RATIO
%left MIN MAX IF
%left '|'
%left '^'
%left '&'
+%left '<' '>'
%left '-' '+'
%left '*' '/' '%'
%left NEG NOT
@@ -60,11 +69,12 @@ all_other: all_other other
other: ID
{
- expr__add_id(ctx, $1, 0.0);
+ expr__add_id(ctx, $1);
}
|
MIN | MAX | IF | ELSE | SMT_ON | NUMBER | '|' | '^' | '&' | '-' | '+' | '*' | '/' | '%' | '(' | ')' | ','
-
+|
+'<' | '>' | D_RATIO
all_expr: if_expr { *final_val = $1; }
;
@@ -75,16 +85,22 @@ if_expr:
;
expr: NUMBER
- | ID { if (expr__get_id(ctx, $1, &$$)) {
- pr_debug("%s not found\n", $1);
+ | ID {
+ struct expr_id_data *data;
+
+ if (expr__resolve_id(ctx, $1, &data)) {
+ free($1);
+ YYABORT;
+ }
+
+ $$ = data->val;
free($1);
- YYABORT;
- }
- free($1);
}
| expr '|' expr { $$ = (long)$1 | (long)$3; }
| expr '&' expr { $$ = (long)$1 & (long)$3; }
| expr '^' expr { $$ = (long)$1 ^ (long)$3; }
+ | expr '<' expr { $$ = $1 < $3; }
+ | expr '>' expr { $$ = $1 > $3; }
| expr '+' expr { $$ = $1 + $3; }
| expr '-' expr { $$ = $1 - $3; }
| expr '*' expr { $$ = $1 * $3; }
@@ -105,6 +121,7 @@ expr: NUMBER
| MIN '(' expr ',' expr ')' { $$ = $3 < $5 ? $3 : $5; }
| MAX '(' expr ',' expr ')' { $$ = $3 > $5 ? $3 : $5; }
| SMT_ON { $$ = smt_on() > 0; }
+ | D_RATIO '(' expr ',' expr ')' { $$ = d_ratio($3,$5); }
;
%%
diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c
index 7a67d017d72c..9cf4efdcbbbd 100644
--- a/tools/perf/util/header.c
+++ b/tools/perf/util/header.c
@@ -46,6 +46,7 @@
#include "util/util.h" // perf_exe()
#include "cputopo.h"
#include "bpf-event.h"
+#include "clockid.h"
#include <linux/ctype.h>
#include <internal/lib.h>
@@ -891,8 +892,42 @@ static int write_auxtrace(struct feat_fd *ff,
static int write_clockid(struct feat_fd *ff,
struct evlist *evlist __maybe_unused)
{
- return do_write(ff, &ff->ph->env.clockid_res_ns,
- sizeof(ff->ph->env.clockid_res_ns));
+ return do_write(ff, &ff->ph->env.clock.clockid_res_ns,
+ sizeof(ff->ph->env.clock.clockid_res_ns));
+}
+
+static int write_clock_data(struct feat_fd *ff,
+ struct evlist *evlist __maybe_unused)
+{
+ u64 *data64;
+ u32 data32;
+ int ret;
+
+ /* version */
+ data32 = 1;
+
+ ret = do_write(ff, &data32, sizeof(data32));
+ if (ret < 0)
+ return ret;
+
+ /* clockid */
+ data32 = ff->ph->env.clock.clockid;
+
+ ret = do_write(ff, &data32, sizeof(data32));
+ if (ret < 0)
+ return ret;
+
+ /* TOD ref time */
+ data64 = &ff->ph->env.clock.tod_ns;
+
+ ret = do_write(ff, data64, sizeof(*data64));
+ if (ret < 0)
+ return ret;
+
+ /* clockid ref time */
+ data64 = &ff->ph->env.clock.clockid_ns;
+
+ return do_write(ff, data64, sizeof(*data64));
}
static int write_dir_format(struct feat_fd *ff,
@@ -1546,7 +1581,50 @@ static void print_cpu_topology(struct feat_fd *ff, FILE *fp)
static void print_clockid(struct feat_fd *ff, FILE *fp)
{
fprintf(fp, "# clockid frequency: %"PRIu64" MHz\n",
- ff->ph->env.clockid_res_ns * 1000);
+ ff->ph->env.clock.clockid_res_ns * 1000);
+}
+
+static void print_clock_data(struct feat_fd *ff, FILE *fp)
+{
+ struct timespec clockid_ns;
+ char tstr[64], date[64];
+ struct timeval tod_ns;
+ clockid_t clockid;
+ struct tm ltime;
+ u64 ref;
+
+ if (!ff->ph->env.clock.enabled) {
+ fprintf(fp, "# reference time disabled\n");
+ return;
+ }
+
+ /* Compute TOD time. */
+ ref = ff->ph->env.clock.tod_ns;
+ tod_ns.tv_sec = ref / NSEC_PER_SEC;
+ ref -= tod_ns.tv_sec * NSEC_PER_SEC;
+ tod_ns.tv_usec = ref / NSEC_PER_USEC;
+
+ /* Compute clockid time. */
+ ref = ff->ph->env.clock.clockid_ns;
+ clockid_ns.tv_sec = ref / NSEC_PER_SEC;
+ ref -= clockid_ns.tv_sec * NSEC_PER_SEC;
+ clockid_ns.tv_nsec = ref;
+
+ clockid = ff->ph->env.clock.clockid;
+
+ if (localtime_r(&tod_ns.tv_sec, &ltime) == NULL)
+ snprintf(tstr, sizeof(tstr), "<error>");
+ else {
+ strftime(date, sizeof(date), "%F %T", &ltime);
+ scnprintf(tstr, sizeof(tstr), "%s.%06d",
+ date, (int) tod_ns.tv_usec);
+ }
+
+ fprintf(fp, "# clockid: %s (%u)\n", clockid_name(clockid), clockid);
+ fprintf(fp, "# reference time: %s = %ld.%06d (TOD) = %ld.%09ld (%s)\n",
+ tstr, tod_ns.tv_sec, (int) tod_ns.tv_usec,
+ clockid_ns.tv_sec, clockid_ns.tv_nsec,
+ clockid_name(clockid));
}
static void print_dir_format(struct feat_fd *ff, FILE *fp)
@@ -1978,7 +2056,7 @@ static int __event_process_build_id(struct perf_record_header_build_id *bev,
struct machine *machine;
u16 cpumode;
struct dso *dso;
- enum dso_kernel_type dso_type;
+ enum dso_space_type dso_space;
machine = perf_session__findnew_machine(session, bev->pid);
if (!machine)
@@ -1988,14 +2066,14 @@ static int __event_process_build_id(struct perf_record_header_build_id *bev,
switch (cpumode) {
case PERF_RECORD_MISC_KERNEL:
- dso_type = DSO_TYPE_KERNEL;
+ dso_space = DSO_SPACE__KERNEL;
break;
case PERF_RECORD_MISC_GUEST_KERNEL:
- dso_type = DSO_TYPE_GUEST_KERNEL;
+ dso_space = DSO_SPACE__KERNEL_GUEST;
break;
case PERF_RECORD_MISC_USER:
case PERF_RECORD_MISC_GUEST_USER:
- dso_type = DSO_TYPE_USER;
+ dso_space = DSO_SPACE__USER;
break;
default:
goto out;
@@ -2007,14 +2085,13 @@ static int __event_process_build_id(struct perf_record_header_build_id *bev,
dso__set_build_id(dso, &bev->build_id);
- if (dso_type != DSO_TYPE_USER) {
+ if (dso_space != DSO_SPACE__USER) {
struct kmod_path m = { .name = NULL, };
if (!kmod_path__parse_name(&m, filename) && m.kmod)
dso__set_module_info(dso, &m, machine);
- else
- dso->kernel = dso_type;
+ dso->kernel = dso_space;
free(m.name);
}
@@ -2732,9 +2809,43 @@ out:
static int process_clockid(struct feat_fd *ff,
void *data __maybe_unused)
{
- if (do_read_u64(ff, &ff->ph->env.clockid_res_ns))
+ if (do_read_u64(ff, &ff->ph->env.clock.clockid_res_ns))
+ return -1;
+
+ return 0;
+}
+
+static int process_clock_data(struct feat_fd *ff,
+ void *_data __maybe_unused)
+{
+ u32 data32;
+ u64 data64;
+
+ /* version */
+ if (do_read_u32(ff, &data32))
+ return -1;
+
+ if (data32 != 1)
+ return -1;
+
+ /* clockid */
+ if (do_read_u32(ff, &data32))
+ return -1;
+
+ ff->ph->env.clock.clockid = data32;
+
+ /* TOD ref time */
+ if (do_read_u64(ff, &data64))
+ return -1;
+
+ ff->ph->env.clock.tod_ns = data64;
+
+ /* clockid ref time */
+ if (do_read_u64(ff, &data64))
return -1;
+ ff->ph->env.clock.clockid_ns = data64;
+ ff->ph->env.clock.enabled = true;
return 0;
}
@@ -3008,6 +3119,7 @@ const struct perf_header_feature_ops feat_ops[HEADER_LAST_FEATURE] = {
FEAT_OPR(BPF_BTF, bpf_btf, false),
FEAT_OPR(COMPRESSED, compressed, false),
FEAT_OPR(CPU_PMU_CAPS, cpu_pmu_caps, false),
+ FEAT_OPR(CLOCK_DATA, clock_data, false),
};
struct header_print_data {
diff --git a/tools/perf/util/header.h b/tools/perf/util/header.h
index 650bd1c7a99b..2aca71763ecf 100644
--- a/tools/perf/util/header.h
+++ b/tools/perf/util/header.h
@@ -44,6 +44,7 @@ enum {
HEADER_BPF_BTF,
HEADER_COMPRESSED,
HEADER_CPU_PMU_CAPS,
+ HEADER_CLOCK_DATA,
HEADER_LAST_FEATURE,
HEADER_FEAT_BITS = 256,
};
diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c
index f8ccfd6be0ee..697513f35154 100644
--- a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c
+++ b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c
@@ -55,6 +55,7 @@ enum intel_pt_pkt_state {
INTEL_PT_STATE_TIP_PGD,
INTEL_PT_STATE_FUP,
INTEL_PT_STATE_FUP_NO_TIP,
+ INTEL_PT_STATE_RESAMPLE,
};
static inline bool intel_pt_sample_time(enum intel_pt_pkt_state pkt_state)
@@ -65,6 +66,7 @@ static inline bool intel_pt_sample_time(enum intel_pt_pkt_state pkt_state)
case INTEL_PT_STATE_ERR_RESYNC:
case INTEL_PT_STATE_IN_SYNC:
case INTEL_PT_STATE_TNT_CONT:
+ case INTEL_PT_STATE_RESAMPLE:
return true;
case INTEL_PT_STATE_TNT:
case INTEL_PT_STATE_TIP:
@@ -109,6 +111,9 @@ struct intel_pt_decoder {
bool fixup_last_mtc;
bool have_last_ip;
bool in_psb;
+ bool hop;
+ bool hop_psb_fup;
+ bool leap;
enum intel_pt_param_flags flags;
uint64_t pos;
uint64_t last_ip;
@@ -235,6 +240,8 @@ struct intel_pt_decoder *intel_pt_decoder_new(struct intel_pt_params *params)
decoder->data = params->data;
decoder->return_compression = params->return_compression;
decoder->branch_enable = params->branch_enable;
+ decoder->hop = params->quick >= 1;
+ decoder->leap = params->quick >= 2;
decoder->flags = params->flags;
@@ -275,6 +282,9 @@ struct intel_pt_decoder *intel_pt_decoder_new(struct intel_pt_params *params)
intel_pt_log("timestamp: tsc_ctc_mult %u\n", decoder->tsc_ctc_mult);
intel_pt_log("timestamp: tsc_slip %#x\n", decoder->tsc_slip);
+ if (decoder->hop)
+ intel_pt_log("Hop mode: decoding FUP and TIPs, but not TNT\n");
+
return decoder;
}
@@ -1164,6 +1174,7 @@ static int intel_pt_walk_fup(struct intel_pt_decoder *decoder)
return 0;
if (err == -EAGAIN ||
intel_pt_fup_with_nlip(decoder, &intel_pt_insn, ip, err)) {
+ decoder->pkt_state = INTEL_PT_STATE_IN_SYNC;
if (intel_pt_fup_event(decoder))
return 0;
return -EAGAIN;
@@ -1729,8 +1740,14 @@ static int intel_pt_walk_psbend(struct intel_pt_decoder *decoder)
case INTEL_PT_FUP:
decoder->pge = true;
- if (decoder->packet.count)
+ if (decoder->packet.count) {
intel_pt_set_last_ip(decoder);
+ if (decoder->hop) {
+ /* Act on FUP at PSBEND */
+ decoder->ip = decoder->last_ip;
+ decoder->hop_psb_fup = true;
+ }
+ }
break;
case INTEL_PT_MODE_TSX:
@@ -1874,6 +1891,127 @@ static int intel_pt_walk_fup_tip(struct intel_pt_decoder *decoder)
}
}
+static int intel_pt_resample(struct intel_pt_decoder *decoder)
+{
+ decoder->pkt_state = INTEL_PT_STATE_IN_SYNC;
+ decoder->state.type = INTEL_PT_INSTRUCTION;
+ decoder->state.from_ip = decoder->ip;
+ decoder->state.to_ip = 0;
+ return 0;
+}
+
+#define HOP_PROCESS 0
+#define HOP_IGNORE 1
+#define HOP_RETURN 2
+#define HOP_AGAIN 3
+
+static int intel_pt_scan_for_psb(struct intel_pt_decoder *decoder);
+
+/* Hop mode: Ignore TNT, do not walk code, but get ip from FUPs and TIPs */
+static int intel_pt_hop_trace(struct intel_pt_decoder *decoder, bool *no_tip, int *err)
+{
+ /* Leap from PSB to PSB, getting ip from FUP within PSB+ */
+ if (decoder->leap && !decoder->in_psb && decoder->packet.type != INTEL_PT_PSB) {
+ *err = intel_pt_scan_for_psb(decoder);
+ if (*err)
+ return HOP_RETURN;
+ }
+
+ switch (decoder->packet.type) {
+ case INTEL_PT_TNT:
+ return HOP_IGNORE;
+
+ case INTEL_PT_TIP_PGD:
+ if (!decoder->packet.count)
+ return HOP_IGNORE;
+ intel_pt_set_ip(decoder);
+ decoder->state.type |= INTEL_PT_TRACE_END;
+ decoder->state.from_ip = 0;
+ decoder->state.to_ip = decoder->ip;
+ return HOP_RETURN;
+
+ case INTEL_PT_TIP:
+ if (!decoder->packet.count)
+ return HOP_IGNORE;
+ intel_pt_set_ip(decoder);
+ decoder->state.type = INTEL_PT_INSTRUCTION;
+ decoder->state.from_ip = decoder->ip;
+ decoder->state.to_ip = 0;
+ return HOP_RETURN;
+
+ case INTEL_PT_FUP:
+ if (!decoder->packet.count)
+ return HOP_IGNORE;
+ intel_pt_set_ip(decoder);
+ if (intel_pt_fup_event(decoder))
+ return HOP_RETURN;
+ if (!decoder->branch_enable)
+ *no_tip = true;
+ if (*no_tip) {
+ decoder->state.type = INTEL_PT_INSTRUCTION;
+ decoder->state.from_ip = decoder->ip;
+ decoder->state.to_ip = 0;
+ return HOP_RETURN;
+ }
+ *err = intel_pt_walk_fup_tip(decoder);
+ if (!*err)
+ decoder->pkt_state = INTEL_PT_STATE_RESAMPLE;
+ return HOP_RETURN;
+
+ case INTEL_PT_PSB:
+ decoder->last_ip = 0;
+ decoder->have_last_ip = true;
+ decoder->hop_psb_fup = false;
+ *err = intel_pt_walk_psbend(decoder);
+ if (*err == -EAGAIN)
+ return HOP_AGAIN;
+ if (*err)
+ return HOP_RETURN;
+ if (decoder->hop_psb_fup) {
+ decoder->hop_psb_fup = false;
+ decoder->state.type = INTEL_PT_INSTRUCTION;
+ decoder->state.from_ip = decoder->ip;
+ decoder->state.to_ip = 0;
+ return HOP_RETURN;
+ }
+ if (decoder->cbr != decoder->cbr_seen) {
+ decoder->state.type = 0;
+ return HOP_RETURN;
+ }
+ return HOP_IGNORE;
+
+ case INTEL_PT_BAD:
+ case INTEL_PT_PAD:
+ case INTEL_PT_TIP_PGE:
+ case INTEL_PT_TSC:
+ case INTEL_PT_TMA:
+ case INTEL_PT_MODE_EXEC:
+ case INTEL_PT_MODE_TSX:
+ case INTEL_PT_MTC:
+ case INTEL_PT_CYC:
+ case INTEL_PT_VMCS:
+ case INTEL_PT_PSBEND:
+ case INTEL_PT_CBR:
+ case INTEL_PT_TRACESTOP:
+ case INTEL_PT_PIP:
+ case INTEL_PT_OVF:
+ case INTEL_PT_MNT:
+ case INTEL_PT_PTWRITE:
+ case INTEL_PT_PTWRITE_IP:
+ case INTEL_PT_EXSTOP:
+ case INTEL_PT_EXSTOP_IP:
+ case INTEL_PT_MWAIT:
+ case INTEL_PT_PWRE:
+ case INTEL_PT_PWRX:
+ case INTEL_PT_BBP:
+ case INTEL_PT_BIP:
+ case INTEL_PT_BEP:
+ case INTEL_PT_BEP_IP:
+ default:
+ return HOP_PROCESS;
+ }
+}
+
static int intel_pt_walk_trace(struct intel_pt_decoder *decoder)
{
bool no_tip = false;
@@ -1884,6 +2022,19 @@ static int intel_pt_walk_trace(struct intel_pt_decoder *decoder)
if (err)
return err;
next:
+ if (decoder->hop) {
+ switch (intel_pt_hop_trace(decoder, &no_tip, &err)) {
+ case HOP_IGNORE:
+ continue;
+ case HOP_RETURN:
+ return err;
+ case HOP_AGAIN:
+ goto next;
+ default:
+ break;
+ }
+ }
+
switch (decoder->packet.type) {
case INTEL_PT_TNT:
if (!decoder->packet.count)
@@ -1913,6 +2064,12 @@ next:
decoder->state.from_ip = 0;
decoder->state.to_ip = decoder->ip;
decoder->state.type |= INTEL_PT_TRACE_BEGIN;
+ /*
+ * In hop mode, resample to get the to_ip as an
+ * "instruction" sample.
+ */
+ if (decoder->hop)
+ decoder->pkt_state = INTEL_PT_STATE_RESAMPLE;
return 0;
}
@@ -1942,17 +2099,13 @@ next:
}
if (decoder->set_fup_mwait)
no_tip = true;
+ if (no_tip)
+ decoder->pkt_state = INTEL_PT_STATE_FUP_NO_TIP;
+ else
+ decoder->pkt_state = INTEL_PT_STATE_FUP;
err = intel_pt_walk_fup(decoder);
- if (err != -EAGAIN) {
- if (err)
- return err;
- if (no_tip)
- decoder->pkt_state =
- INTEL_PT_STATE_FUP_NO_TIP;
- else
- decoder->pkt_state = INTEL_PT_STATE_FUP;
- return 0;
- }
+ if (err != -EAGAIN)
+ return err;
if (no_tip) {
no_tip = false;
break;
@@ -1980,8 +2133,10 @@ next:
* possibility of another CBR change that gets caught up
* in the PSB+.
*/
- if (decoder->cbr != decoder->cbr_seen)
+ if (decoder->cbr != decoder->cbr_seen) {
+ decoder->state.type = 0;
return 0;
+ }
break;
case INTEL_PT_PIP:
@@ -2022,8 +2177,10 @@ next:
case INTEL_PT_CBR:
intel_pt_calc_cbr(decoder);
- if (decoder->cbr != decoder->cbr_seen)
+ if (decoder->cbr != decoder->cbr_seen) {
+ decoder->state.type = 0;
return 0;
+ }
break;
case INTEL_PT_MODE_EXEC:
@@ -2032,7 +2189,7 @@ next:
case INTEL_PT_MODE_TSX:
/* MODE_TSX need not be followed by FUP */
- if (!decoder->pge) {
+ if (!decoder->pge || decoder->in_psb) {
intel_pt_update_in_tx(decoder);
break;
}
@@ -2423,7 +2580,11 @@ static int intel_pt_sync_ip(struct intel_pt_decoder *decoder)
if (err)
return err;
- decoder->pkt_state = INTEL_PT_STATE_IN_SYNC;
+ /* In hop mode, resample to get the to_ip as an "instruction" sample */
+ if (decoder->hop)
+ decoder->pkt_state = INTEL_PT_STATE_RESAMPLE;
+ else
+ decoder->pkt_state = INTEL_PT_STATE_IN_SYNC;
decoder->overflow = false;
decoder->state.from_ip = 0;
@@ -2531,6 +2692,7 @@ static int intel_pt_sync(struct intel_pt_decoder *decoder)
decoder->ip = 0;
intel_pt_clear_stack(&decoder->stack);
+leap:
err = intel_pt_scan_for_psb(decoder);
if (err)
return err;
@@ -2544,7 +2706,20 @@ static int intel_pt_sync(struct intel_pt_decoder *decoder)
if (decoder->ip) {
decoder->state.type = 0; /* Do not have a sample */
- decoder->pkt_state = INTEL_PT_STATE_IN_SYNC;
+ /*
+ * In hop mode, resample to get the PSB FUP ip as an
+ * "instruction" sample.
+ */
+ if (decoder->hop)
+ decoder->pkt_state = INTEL_PT_STATE_RESAMPLE;
+ else
+ decoder->pkt_state = INTEL_PT_STATE_IN_SYNC;
+ } else if (decoder->leap) {
+ /*
+ * In leap mode, only PSB+ is decoded, so keeping leaping to the
+ * next PSB until there is an ip.
+ */
+ goto leap;
} else {
return intel_pt_sync_ip(decoder);
}
@@ -2599,19 +2774,18 @@ const struct intel_pt_state *intel_pt_decode(struct intel_pt_decoder *decoder)
err = intel_pt_walk_tip(decoder);
break;
case INTEL_PT_STATE_FUP:
- decoder->pkt_state = INTEL_PT_STATE_IN_SYNC;
err = intel_pt_walk_fup(decoder);
if (err == -EAGAIN)
err = intel_pt_walk_fup_tip(decoder);
- else if (!err)
- decoder->pkt_state = INTEL_PT_STATE_FUP;
break;
case INTEL_PT_STATE_FUP_NO_TIP:
- decoder->pkt_state = INTEL_PT_STATE_IN_SYNC;
err = intel_pt_walk_fup(decoder);
if (err == -EAGAIN)
err = intel_pt_walk_trace(decoder);
break;
+ case INTEL_PT_STATE_RESAMPLE:
+ err = intel_pt_resample(decoder);
+ break;
default:
err = intel_pt_bug(decoder);
break;
diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.h b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.h
index e289e463d635..8645fc265481 100644
--- a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.h
+++ b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.h
@@ -250,6 +250,7 @@ struct intel_pt_params {
uint32_t tsc_ctc_ratio_n;
uint32_t tsc_ctc_ratio_d;
enum intel_pt_param_flags flags;
+ unsigned int quick;
};
struct intel_pt_decoder;
diff --git a/tools/perf/util/intel-pt.c b/tools/perf/util/intel-pt.c
index cb3c1e569a2d..0af4e81c46e2 100644
--- a/tools/perf/util/intel-pt.c
+++ b/tools/perf/util/intel-pt.c
@@ -236,7 +236,7 @@ static void intel_pt_log_event(union perf_event *event)
if (!intel_pt_enable_logging || !f)
return;
- perf_event__fprintf(event, f);
+ perf_event__fprintf(event, NULL, f);
}
static void intel_pt_dump_sample(struct perf_session *session,
@@ -249,6 +249,24 @@ static void intel_pt_dump_sample(struct perf_session *session,
intel_pt_dump(pt, sample->aux_sample.data, sample->aux_sample.size);
}
+static bool intel_pt_log_events(struct intel_pt *pt, u64 tm)
+{
+ struct perf_time_interval *range = pt->synth_opts.ptime_range;
+ int n = pt->synth_opts.range_num;
+
+ if (pt->synth_opts.log_plus_flags & AUXTRACE_LOG_FLG_ALL_PERF_EVTS)
+ return true;
+
+ if (pt->synth_opts.log_minus_flags & AUXTRACE_LOG_FLG_ALL_PERF_EVTS)
+ return false;
+
+ /* perf_time__ranges_skip_sample does not work if time is zero */
+ if (!tm)
+ tm = 1;
+
+ return !n || !perf_time__ranges_skip_sample(range, n, tm);
+}
+
static int intel_pt_do_fix_overlap(struct intel_pt *pt, struct auxtrace_buffer *a,
struct auxtrace_buffer *b)
{
@@ -520,6 +538,17 @@ intel_pt_cache_lookup(struct dso *dso, struct machine *machine, u64 offset)
return auxtrace_cache__lookup(dso->auxtrace_cache, offset);
}
+static void intel_pt_cache_invalidate(struct dso *dso, struct machine *machine,
+ u64 offset)
+{
+ struct auxtrace_cache *c = intel_pt_cache(dso, machine);
+
+ if (!c)
+ return;
+
+ auxtrace_cache__remove(dso->auxtrace_cache, offset);
+}
+
static inline u8 intel_pt_cpumode(struct intel_pt *pt, uint64_t ip)
{
return ip >= pt->kernel_start ?
@@ -1001,6 +1030,7 @@ static struct intel_pt_queue *intel_pt_alloc_queue(struct intel_pt *pt,
params.mtc_period = intel_pt_mtc_period(pt);
params.tsc_ctc_ratio_n = pt->tsc_ctc_ratio_n;
params.tsc_ctc_ratio_d = pt->tsc_ctc_ratio_d;
+ params.quick = pt->synth_opts.quick;
if (pt->filts.cnt > 0)
params.pgd_ip = intel_pt_pgd_ip;
@@ -1394,7 +1424,10 @@ static int intel_pt_synth_instruction_sample(struct intel_pt_queue *ptq)
sample.id = ptq->pt->instructions_id;
sample.stream_id = ptq->pt->instructions_id;
- sample.period = ptq->state->tot_insn_cnt - ptq->last_insn_cnt;
+ if (pt->synth_opts.quick)
+ sample.period = 1;
+ else
+ sample.period = ptq->state->tot_insn_cnt - ptq->last_insn_cnt;
sample.cyc_cnt = ptq->ipc_cyc_cnt - ptq->last_in_cyc_cnt;
if (sample.cyc_cnt) {
@@ -1852,6 +1885,15 @@ static int intel_pt_synth_error(struct intel_pt *pt, int code, int cpu,
char msg[MAX_AUXTRACE_ERROR_MSG];
int err;
+ if (pt->synth_opts.error_minus_flags) {
+ if (code == INTEL_PT_ERR_OVR &&
+ pt->synth_opts.error_minus_flags & AUXTRACE_ERR_FLG_OVERFLOW)
+ return 0;
+ if (code == INTEL_PT_ERR_LOST &&
+ pt->synth_opts.error_minus_flags & AUXTRACE_ERR_FLG_DATA_LOST)
+ return 0;
+ }
+
intel_pt__strerror(code, msg, MAX_AUXTRACE_ERROR_MSG);
auxtrace_synth_error(&event.auxtrace_error, PERF_AUXTRACE_ERROR_ITRACE,
@@ -2566,10 +2608,6 @@ static int intel_pt_context_switch(struct intel_pt *pt, union perf_event *event,
return -EINVAL;
}
- intel_pt_log("context_switch: cpu %d pid %d tid %d time %"PRIu64" tsc %#"PRIx64"\n",
- cpu, pid, tid, sample->time, perf_time_to_tsc(sample->time,
- &pt->tc));
-
ret = intel_pt_sync_switch(pt, cpu, tid, sample->time);
if (ret <= 0)
return ret;
@@ -2594,6 +2632,67 @@ static int intel_pt_process_itrace_start(struct intel_pt *pt,
event->itrace_start.tid);
}
+static int intel_pt_find_map(struct thread *thread, u8 cpumode, u64 addr,
+ struct addr_location *al)
+{
+ if (!al->map || addr < al->map->start || addr >= al->map->end) {
+ if (!thread__find_map(thread, cpumode, addr, al))
+ return -1;
+ }
+
+ return 0;
+}
+
+/* Invalidate all instruction cache entries that overlap the text poke */
+static int intel_pt_text_poke(struct intel_pt *pt, union perf_event *event)
+{
+ u8 cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
+ u64 addr = event->text_poke.addr + event->text_poke.new_len - 1;
+ /* Assume text poke begins in a basic block no more than 4096 bytes */
+ int cnt = 4096 + event->text_poke.new_len;
+ struct thread *thread = pt->unknown_thread;
+ struct addr_location al = { .map = NULL };
+ struct machine *machine = pt->machine;
+ struct intel_pt_cache_entry *e;
+ u64 offset;
+
+ if (!event->text_poke.new_len)
+ return 0;
+
+ for (; cnt; cnt--, addr--) {
+ if (intel_pt_find_map(thread, cpumode, addr, &al)) {
+ if (addr < event->text_poke.addr)
+ return 0;
+ continue;
+ }
+
+ if (!al.map->dso || !al.map->dso->auxtrace_cache)
+ continue;
+
+ offset = al.map->map_ip(al.map, addr);
+
+ e = intel_pt_cache_lookup(al.map->dso, machine, offset);
+ if (!e)
+ continue;
+
+ if (addr + e->byte_cnt + e->length <= event->text_poke.addr) {
+ /*
+ * No overlap. Working backwards there cannot be another
+ * basic block that overlaps the text poke if there is a
+ * branch instruction before the text poke address.
+ */
+ if (e->branch != INTEL_PT_BR_NO_BRANCH)
+ return 0;
+ } else {
+ intel_pt_cache_invalidate(al.map->dso, machine, offset);
+ intel_pt_log("Invalidated instruction cache for %s at %#"PRIx64"\n",
+ al.map->dso->long_name, addr);
+ }
+ }
+
+ return 0;
+}
+
static int intel_pt_process_event(struct perf_session *session,
union perf_event *event,
struct perf_sample *sample,
@@ -2662,9 +2761,14 @@ static int intel_pt_process_event(struct perf_session *session,
event->header.type == PERF_RECORD_SWITCH_CPU_WIDE)
err = intel_pt_context_switch(pt, event, sample);
- intel_pt_log("event %u: cpu %d time %"PRIu64" tsc %#"PRIx64" ",
- event->header.type, sample->cpu, sample->time, timestamp);
- intel_pt_log_event(event);
+ if (!err && event->header.type == PERF_RECORD_TEXT_POKE)
+ err = intel_pt_text_poke(pt, event);
+
+ if (intel_pt_enable_logging && intel_pt_log_events(pt, sample->time)) {
+ intel_pt_log("event %u: cpu %d time %"PRIu64" tsc %#"PRIx64" ",
+ event->header.type, sample->cpu, sample->time, timestamp);
+ intel_pt_log_event(event);
+ }
return err;
}
@@ -2913,8 +3017,15 @@ static int intel_pt_synth_events(struct intel_pt *pt,
if (pt->synth_opts.callchain)
attr.sample_type |= PERF_SAMPLE_CALLCHAIN;
- if (pt->synth_opts.last_branch)
+ if (pt->synth_opts.last_branch) {
attr.sample_type |= PERF_SAMPLE_BRANCH_STACK;
+ /*
+ * We don't use the hardware index, but the sample generation
+ * code uses the new format branch_stack with this field,
+ * so the event attributes must indicate that it's present.
+ */
+ attr.branch_sample_type |= PERF_SAMPLE_BRANCH_HW_INDEX;
+ }
if (pt->synth_opts.instructions) {
attr.config = PERF_COUNT_HW_INSTRUCTIONS;
diff --git a/tools/perf/util/jitdump.c b/tools/perf/util/jitdump.c
index 32bb05e03fb2..0804308ef285 100644
--- a/tools/perf/util/jitdump.c
+++ b/tools/perf/util/jitdump.c
@@ -26,6 +26,7 @@
#include "jit.h"
#include "jitdump.h"
#include "genelf.h"
+#include "thread.h"
#include <linux/ctype.h>
#include <linux/zalloc.h>
@@ -749,6 +750,28 @@ jit_detect(char *mmap_name, pid_t pid)
return 0;
}
+static void jit_add_pid(struct machine *machine, pid_t pid)
+{
+ struct thread *thread = machine__findnew_thread(machine, pid, pid);
+
+ if (!thread) {
+ pr_err("%s: thread %d not found or created\n", __func__, pid);
+ return;
+ }
+
+ thread->priv = (void *)1;
+}
+
+static bool jit_has_pid(struct machine *machine, pid_t pid)
+{
+ struct thread *thread = machine__find_thread(machine, pid, pid);
+
+ if (!thread)
+ return 0;
+
+ return (bool)thread->priv;
+}
+
int
jit_process(struct perf_session *session,
struct perf_data *output,
@@ -764,8 +787,13 @@ jit_process(struct perf_session *session,
/*
* first, detect marker mmap (i.e., the jitdump mmap)
*/
- if (jit_detect(filename, pid))
+ if (jit_detect(filename, pid)) {
+ // Strip //anon* mmaps if we processed a jitdump for this pid
+ if (jit_has_pid(machine, pid) && (strncmp(filename, "//anon", 6) == 0))
+ return 1;
+
return 0;
+ }
memset(&jd, 0, sizeof(jd));
@@ -784,6 +812,7 @@ jit_process(struct perf_session *session,
ret = jit_inject(&jd, filename);
if (!ret) {
+ jit_add_pid(machine, pid);
*nbytes = jd.bytes_written;
ret = 1;
}
diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c
index d5384807372b..85587de027a5 100644
--- a/tools/perf/util/machine.c
+++ b/tools/perf/util/machine.c
@@ -703,7 +703,7 @@ static struct dso *machine__findnew_module_dso(struct machine *machine,
dso__set_module_info(dso, m, machine);
dso__set_long_name(dso, strdup(filename), true);
- dso->kernel = DSO_TYPE_KERNEL;
+ dso->kernel = DSO_SPACE__KERNEL;
}
dso__get(dso);
@@ -736,12 +736,6 @@ int machine__process_switch_event(struct machine *machine __maybe_unused,
return 0;
}
-static int is_bpf_image(const char *name)
-{
- return strncmp(name, "bpf_trampoline_", sizeof("bpf_trampoline_") - 1) == 0 ||
- strncmp(name, "bpf_dispatcher_", sizeof("bpf_dispatcher_") - 1) == 0;
-}
-
static int machine__process_ksymbol_register(struct machine *machine,
union perf_event *event,
struct perf_sample *sample __maybe_unused)
@@ -753,7 +747,7 @@ static int machine__process_ksymbol_register(struct machine *machine,
struct dso *dso = dso__new(event->ksymbol.name);
if (dso) {
- dso->kernel = DSO_TYPE_KERNEL;
+ dso->kernel = DSO_SPACE__KERNEL;
map = map__new2(0, dso);
}
@@ -762,6 +756,12 @@ static int machine__process_ksymbol_register(struct machine *machine,
return -ENOMEM;
}
+ if (event->ksymbol.ksym_type == PERF_RECORD_KSYMBOL_TYPE_OOL) {
+ map->dso->binary_type = DSO_BINARY_TYPE__OOL;
+ map->dso->data.file_size = event->ksymbol.len;
+ dso__set_loaded(map->dso);
+ }
+
map->start = event->ksymbol.addr;
map->end = map->start + event->ksymbol.len;
maps__insert(&machine->kmaps, map);
@@ -808,6 +808,47 @@ int machine__process_ksymbol(struct machine *machine __maybe_unused,
return machine__process_ksymbol_register(machine, event, sample);
}
+int machine__process_text_poke(struct machine *machine, union perf_event *event,
+ struct perf_sample *sample __maybe_unused)
+{
+ struct map *map = maps__find(&machine->kmaps, event->text_poke.addr);
+ u8 cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
+
+ if (dump_trace)
+ perf_event__fprintf_text_poke(event, machine, stdout);
+
+ if (!event->text_poke.new_len)
+ return 0;
+
+ if (cpumode != PERF_RECORD_MISC_KERNEL) {
+ pr_debug("%s: unsupported cpumode - ignoring\n", __func__);
+ return 0;
+ }
+
+ if (map && map->dso) {
+ u8 *new_bytes = event->text_poke.bytes + event->text_poke.old_len;
+ int ret;
+
+ /*
+ * Kernel maps might be changed when loading symbols so loading
+ * must be done prior to using kernel maps.
+ */
+ map__load(map);
+ ret = dso__data_write_cache_addr(map->dso, map, machine,
+ event->text_poke.addr,
+ new_bytes,
+ event->text_poke.new_len);
+ if (ret != event->text_poke.new_len)
+ pr_debug("Failed to write kernel text poke at %#" PRI_lx64 "\n",
+ event->text_poke.addr);
+ } else {
+ pr_debug("Failed to find kernel text poke address map for %#" PRI_lx64 "\n",
+ event->text_poke.addr);
+ }
+
+ return 0;
+}
+
static struct map *machine__addnew_module_map(struct machine *machine, u64 start,
const char *filename)
{
@@ -924,14 +965,14 @@ static struct dso *machine__get_kernel(struct machine *machine)
vmlinux_name = symbol_conf.vmlinux_name;
kernel = machine__findnew_kernel(machine, vmlinux_name,
- "[kernel]", DSO_TYPE_KERNEL);
+ "[kernel]", DSO_SPACE__KERNEL);
} else {
if (symbol_conf.default_guest_vmlinux_name)
vmlinux_name = symbol_conf.default_guest_vmlinux_name;
kernel = machine__findnew_kernel(machine, vmlinux_name,
"[guest.kernel]",
- DSO_TYPE_GUEST_KERNEL);
+ DSO_SPACE__KERNEL_GUEST);
}
if (kernel != NULL && (!kernel->has_build_id))
@@ -1559,7 +1600,7 @@ static int machine__process_kernel_mmap_event(struct machine *machine,
union perf_event *event)
{
struct map *map;
- enum dso_kernel_type kernel_type;
+ enum dso_space_type dso_space;
bool is_kernel_mmap;
/* If we have maps from kcore then we do not need or want any others */
@@ -1567,9 +1608,9 @@ static int machine__process_kernel_mmap_event(struct machine *machine,
return 0;
if (machine__is_host(machine))
- kernel_type = DSO_TYPE_KERNEL;
+ dso_space = DSO_SPACE__KERNEL;
else
- kernel_type = DSO_TYPE_GUEST_KERNEL;
+ dso_space = DSO_SPACE__KERNEL_GUEST;
is_kernel_mmap = memcmp(event->mmap.filename,
machine->mmap_name,
@@ -1629,7 +1670,7 @@ static int machine__process_kernel_mmap_event(struct machine *machine,
if (kernel == NULL)
goto out_problem;
- kernel->kernel = kernel_type;
+ kernel->kernel = dso_space;
if (__machine__create_kernel_maps(machine, kernel) < 0) {
dso__put(kernel);
goto out_problem;
@@ -1930,6 +1971,8 @@ int machine__process_event(struct machine *machine, union perf_event *event,
ret = machine__process_ksymbol(machine, event, sample); break;
case PERF_RECORD_BPF_EVENT:
ret = machine__process_bpf(machine, event, sample); break;
+ case PERF_RECORD_TEXT_POKE:
+ ret = machine__process_text_poke(machine, event, sample); break;
default:
ret = -1;
break;
diff --git a/tools/perf/util/machine.h b/tools/perf/util/machine.h
index fa1be9ea00fa..062c36a8433c 100644
--- a/tools/perf/util/machine.h
+++ b/tools/perf/util/machine.h
@@ -138,6 +138,9 @@ int machine__process_mmap2_event(struct machine *machine, union perf_event *even
int machine__process_ksymbol(struct machine *machine,
union perf_event *event,
struct perf_sample *sample);
+int machine__process_text_poke(struct machine *machine,
+ union perf_event *event,
+ struct perf_sample *sample);
int machine__process_event(struct machine *machine, union perf_event *event,
struct perf_sample *sample);
diff --git a/tools/perf/util/map.c b/tools/perf/util/map.c
index 53d96611e6a6..cc0faf8f1321 100644
--- a/tools/perf/util/map.c
+++ b/tools/perf/util/map.c
@@ -267,6 +267,27 @@ bool __map__is_bpf_prog(const struct map *map)
return name && (strstr(name, "bpf_prog_") == name);
}
+bool __map__is_bpf_image(const struct map *map)
+{
+ const char *name;
+
+ if (map->dso->binary_type == DSO_BINARY_TYPE__BPF_IMAGE)
+ return true;
+
+ /*
+ * If PERF_RECORD_KSYMBOL is not included, the dso will not have
+ * type of DSO_BINARY_TYPE__BPF_IMAGE. In such cases, we can
+ * guess the type based on name.
+ */
+ name = map->dso->short_name;
+ return name && is_bpf_image(name);
+}
+
+bool __map__is_ool(const struct map *map)
+{
+ return map->dso && map->dso->binary_type == DSO_BINARY_TYPE__OOL;
+}
+
bool map__has_symbols(const struct map *map)
{
return dso__has_symbols(map->dso);
@@ -481,7 +502,7 @@ u64 map__rip_2objdump(struct map *map, u64 rip)
* kernel modules also have DSO_TYPE_USER in dso->kernel,
* but all kernel modules are ET_REL, so won't get here.
*/
- if (map->dso->kernel == DSO_TYPE_USER)
+ if (map->dso->kernel == DSO_SPACE__USER)
return rip + map->dso->text_offset;
return map->unmap_ip(map, rip) - map->reloc;
@@ -511,7 +532,7 @@ u64 map__objdump_2mem(struct map *map, u64 ip)
* kernel modules also have DSO_TYPE_USER in dso->kernel,
* but all kernel modules are ET_REL, so won't get here.
*/
- if (map->dso->kernel == DSO_TYPE_USER)
+ if (map->dso->kernel == DSO_SPACE__USER)
return map->unmap_ip(map, ip - map->dso->text_offset);
return ip + map->reloc;
diff --git a/tools/perf/util/map.h b/tools/perf/util/map.h
index 067036e8970c..c2f5d28fe73a 100644
--- a/tools/perf/util/map.h
+++ b/tools/perf/util/map.h
@@ -147,11 +147,14 @@ int map__set_kallsyms_ref_reloc_sym(struct map *map, const char *symbol_name,
bool __map__is_kernel(const struct map *map);
bool __map__is_extra_kernel_map(const struct map *map);
bool __map__is_bpf_prog(const struct map *map);
+bool __map__is_bpf_image(const struct map *map);
+bool __map__is_ool(const struct map *map);
static inline bool __map__is_kmodule(const struct map *map)
{
return !__map__is_kernel(map) && !__map__is_extra_kernel_map(map) &&
- !__map__is_bpf_prog(map);
+ !__map__is_bpf_prog(map) && !__map__is_ool(map) &&
+ !__map__is_bpf_image(map);
}
bool map__has_symbols(const struct map *map);
@@ -163,4 +166,9 @@ static inline bool is_entry_trampoline(const char *name)
return !strcmp(name, ENTRY_TRAMPOLINE_NAME);
}
+static inline bool is_bpf_image(const char *name)
+{
+ return strncmp(name, "bpf_trampoline_", sizeof("bpf_trampoline_") - 1) == 0 ||
+ strncmp(name, "bpf_dispatcher_", sizeof("bpf_dispatcher_") - 1) == 0;
+}
#endif /* __PERF_MAP_H */
diff --git a/tools/perf/util/metricgroup.c b/tools/perf/util/metricgroup.c
index 9e21aa767e41..ab5030fcfed4 100644
--- a/tools/perf/util/metricgroup.c
+++ b/tools/perf/util/metricgroup.c
@@ -24,6 +24,7 @@
#include <subcmd/parse-options.h>
#include <api/fs/fs.h>
#include "util.h"
+#include <asm/bug.h>
struct metric_event *metricgroup__lookup(struct rblist *metric_events,
struct evsel *evsel,
@@ -76,23 +77,79 @@ static struct rb_node *metric_event_new(struct rblist *rblist __maybe_unused,
return &me->nd;
}
+static void metric_event_delete(struct rblist *rblist __maybe_unused,
+ struct rb_node *rb_node)
+{
+ struct metric_event *me = container_of(rb_node, struct metric_event, nd);
+ struct metric_expr *expr, *tmp;
+
+ list_for_each_entry_safe(expr, tmp, &me->head, nd) {
+ free(expr->metric_refs);
+ free(expr->metric_events);
+ free(expr);
+ }
+
+ free(me);
+}
+
static void metricgroup__rblist_init(struct rblist *metric_events)
{
rblist__init(metric_events);
metric_events->node_cmp = metric_event_cmp;
metric_events->node_new = metric_event_new;
+ metric_events->node_delete = metric_event_delete;
}
-struct egroup {
+void metricgroup__rblist_exit(struct rblist *metric_events)
+{
+ rblist__exit(metric_events);
+}
+
+/*
+ * A node in the list of referenced metrics. metric_expr
+ * is held as a convenience to avoid a search through the
+ * metric list.
+ */
+struct metric_ref_node {
+ const char *metric_name;
+ const char *metric_expr;
+ struct list_head list;
+};
+
+struct metric {
struct list_head nd;
struct expr_parse_ctx pctx;
const char *metric_name;
const char *metric_expr;
const char *metric_unit;
+ struct list_head metric_refs;
+ int metric_refs_cnt;
int runtime;
bool has_constraint;
};
+#define RECURSION_ID_MAX 1000
+
+struct expr_ids {
+ struct expr_id id[RECURSION_ID_MAX];
+ int cnt;
+};
+
+static struct expr_id *expr_ids__alloc(struct expr_ids *ids)
+{
+ if (ids->cnt >= RECURSION_ID_MAX)
+ return NULL;
+ return &ids->id[ids->cnt++];
+}
+
+static void expr_ids__exit(struct expr_ids *ids)
+{
+ int i;
+
+ for (i = 0; i < ids->cnt; i++)
+ free(ids->id[i].id);
+}
+
/**
* Find a group of events in perf_evlist that correpond to those from a parsed
* metric expression. Note, as find_evsel_group is called in the same order as
@@ -119,7 +176,7 @@ static struct evsel *find_evsel_group(struct evlist *perf_evlist,
unsigned long *evlist_used)
{
struct evsel *ev, *current_leader = NULL;
- double *val_ptr;
+ struct expr_id_data *val_ptr;
int i = 0, matched_events = 0, events_to_match;
const int idnum = (int)hashmap__size(&pctx->ids);
@@ -206,7 +263,7 @@ static int metricgroup__setup_events(struct list_head *groups,
struct metric_expr *expr;
int i = 0;
int ret = 0;
- struct egroup *eg;
+ struct metric *m;
struct evsel *evsel, *tmp;
unsigned long *evlist_used;
@@ -214,22 +271,23 @@ static int metricgroup__setup_events(struct list_head *groups,
if (!evlist_used)
return -ENOMEM;
- list_for_each_entry (eg, groups, nd) {
+ list_for_each_entry (m, groups, nd) {
struct evsel **metric_events;
+ struct metric_ref *metric_refs = NULL;
metric_events = calloc(sizeof(void *),
- hashmap__size(&eg->pctx.ids) + 1);
+ hashmap__size(&m->pctx.ids) + 1);
if (!metric_events) {
ret = -ENOMEM;
break;
}
- evsel = find_evsel_group(perf_evlist, &eg->pctx,
+ evsel = find_evsel_group(perf_evlist, &m->pctx,
metric_no_merge,
- eg->has_constraint, metric_events,
+ m->has_constraint, metric_events,
evlist_used);
if (!evsel) {
pr_debug("Cannot resolve %s: %s\n",
- eg->metric_name, eg->metric_expr);
+ m->metric_name, m->metric_expr);
free(metric_events);
continue;
}
@@ -247,11 +305,42 @@ static int metricgroup__setup_events(struct list_head *groups,
free(metric_events);
break;
}
- expr->metric_expr = eg->metric_expr;
- expr->metric_name = eg->metric_name;
- expr->metric_unit = eg->metric_unit;
+
+ /*
+ * Collect and store collected nested expressions
+ * for metric processing.
+ */
+ if (m->metric_refs_cnt) {
+ struct metric_ref_node *ref;
+
+ metric_refs = zalloc(sizeof(struct metric_ref) * (m->metric_refs_cnt + 1));
+ if (!metric_refs) {
+ ret = -ENOMEM;
+ free(metric_events);
+ free(expr);
+ break;
+ }
+
+ i = 0;
+ list_for_each_entry(ref, &m->metric_refs, list) {
+ /*
+ * Intentionally passing just const char pointers,
+ * originally from 'struct pmu_event' object.
+ * We don't need to change them, so there's no
+ * need to create our own copy.
+ */
+ metric_refs[i].metric_name = ref->metric_name;
+ metric_refs[i].metric_expr = ref->metric_expr;
+ i++;
+ }
+ };
+
+ expr->metric_refs = metric_refs;
+ expr->metric_expr = m->metric_expr;
+ expr->metric_name = m->metric_name;
+ expr->metric_unit = m->metric_unit;
expr->metric_events = metric_events;
- expr->runtime = eg->runtime;
+ expr->runtime = m->runtime;
list_add(&expr->nd, &me->head);
}
@@ -443,6 +532,9 @@ void metricgroup__print(bool metrics, bool metricgroups, char *filter,
continue;
strlist__add(me->metrics, s);
}
+
+ if (!raw)
+ free(s);
}
free(omg);
}
@@ -552,123 +644,349 @@ int __weak arch_get_runtimeparam(void)
return 1;
}
-static int __metricgroup__add_metric(struct list_head *group_list,
- struct pmu_event *pe,
- bool metric_no_group,
- int runtime)
+static int __add_metric(struct list_head *metric_list,
+ struct pmu_event *pe,
+ bool metric_no_group,
+ int runtime,
+ struct metric **mp,
+ struct expr_id *parent,
+ struct expr_ids *ids)
{
- struct egroup *eg;
+ struct metric_ref_node *ref;
+ struct metric *m;
- eg = malloc(sizeof(*eg));
- if (!eg)
- return -ENOMEM;
+ if (*mp == NULL) {
+ /*
+ * We got in here for the parent group,
+ * allocate it and put it on the list.
+ */
+ m = zalloc(sizeof(*m));
+ if (!m)
+ return -ENOMEM;
+
+ expr__ctx_init(&m->pctx);
+ m->metric_name = pe->metric_name;
+ m->metric_expr = pe->metric_expr;
+ m->metric_unit = pe->unit;
+ m->runtime = runtime;
+ m->has_constraint = metric_no_group || metricgroup__has_constraint(pe);
+ INIT_LIST_HEAD(&m->metric_refs);
+ m->metric_refs_cnt = 0;
+
+ parent = expr_ids__alloc(ids);
+ if (!parent) {
+ free(m);
+ return -EINVAL;
+ }
- expr__ctx_init(&eg->pctx);
- eg->metric_name = pe->metric_name;
- eg->metric_expr = pe->metric_expr;
- eg->metric_unit = pe->unit;
- eg->runtime = runtime;
- eg->has_constraint = metric_no_group || metricgroup__has_constraint(pe);
+ parent->id = strdup(pe->metric_name);
+ if (!parent->id) {
+ free(m);
+ return -ENOMEM;
+ }
+ *mp = m;
+ } else {
+ /*
+ * We got here for the referenced metric, via the
+ * recursive metricgroup__add_metric call, add
+ * it to the parent group.
+ */
+ m = *mp;
+
+ ref = malloc(sizeof(*ref));
+ if (!ref)
+ return -ENOMEM;
+
+ /*
+ * Intentionally passing just const char pointers,
+ * from 'pe' object, so they never go away. We don't
+ * need to change them, so there's no need to create
+ * our own copy.
+ */
+ ref->metric_name = pe->metric_name;
+ ref->metric_expr = pe->metric_expr;
- if (expr__find_other(pe->metric_expr, NULL, &eg->pctx, runtime) < 0) {
- expr__ctx_clear(&eg->pctx);
- free(eg);
+ list_add(&ref->list, &m->metric_refs);
+ m->metric_refs_cnt++;
+ }
+
+ /* Force all found IDs in metric to have us as parent ID. */
+ WARN_ON_ONCE(!parent);
+ m->pctx.parent = parent;
+
+ /*
+ * For both the parent and referenced metrics, we parse
+ * all the metric's IDs and add it to the parent context.
+ */
+ if (expr__find_other(pe->metric_expr, NULL, &m->pctx, runtime) < 0) {
+ if (m->metric_refs_cnt == 0) {
+ expr__ctx_clear(&m->pctx);
+ free(m);
+ *mp = NULL;
+ }
return -EINVAL;
}
- if (list_empty(group_list))
- list_add(&eg->nd, group_list);
+ /*
+ * We add new group only in the 'parent' call,
+ * so bail out for referenced metric case.
+ */
+ if (m->metric_refs_cnt)
+ return 0;
+
+ if (list_empty(metric_list))
+ list_add(&m->nd, metric_list);
else {
struct list_head *pos;
/* Place the largest groups at the front. */
- list_for_each_prev(pos, group_list) {
- struct egroup *old = list_entry(pos, struct egroup, nd);
+ list_for_each_prev(pos, metric_list) {
+ struct metric *old = list_entry(pos, struct metric, nd);
- if (hashmap__size(&eg->pctx.ids) <=
+ if (hashmap__size(&m->pctx.ids) <=
hashmap__size(&old->pctx.ids))
break;
}
- list_add(&eg->nd, pos);
+ list_add(&m->nd, pos);
}
return 0;
}
+#define map_for_each_event(__pe, __idx, __map) \
+ for (__idx = 0, __pe = &__map->table[__idx]; \
+ __pe->name || __pe->metric_group || __pe->metric_name; \
+ __pe = &__map->table[++__idx])
+
+#define map_for_each_metric(__pe, __idx, __map, __metric) \
+ map_for_each_event(__pe, __idx, __map) \
+ if (__pe->metric_expr && \
+ (match_metric(__pe->metric_group, __metric) || \
+ match_metric(__pe->metric_name, __metric)))
+
+static struct pmu_event *find_metric(const char *metric, struct pmu_events_map *map)
+{
+ struct pmu_event *pe;
+ int i;
+
+ map_for_each_event(pe, i, map) {
+ if (match_metric(pe->metric_name, metric))
+ return pe;
+ }
+
+ return NULL;
+}
+
+static int recursion_check(struct metric *m, const char *id, struct expr_id **parent,
+ struct expr_ids *ids)
+{
+ struct expr_id_data *data;
+ struct expr_id *p;
+ int ret;
+
+ /*
+ * We get the parent referenced by 'id' argument and
+ * traverse through all the parent object IDs to check
+ * if we already processed 'id', if we did, it's recursion
+ * and we fail.
+ */
+ ret = expr__get_id(&m->pctx, id, &data);
+ if (ret)
+ return ret;
+
+ p = data->parent;
+
+ while (p->parent) {
+ if (!strcmp(p->id, id)) {
+ pr_err("failed: recursion detected for %s\n", id);
+ return -1;
+ }
+ p = p->parent;
+ }
+
+ /*
+ * If we are over the limit of static entris, the metric
+ * is too difficult/nested to process, fail as well.
+ */
+ p = expr_ids__alloc(ids);
+ if (!p) {
+ pr_err("failed: too many nested metrics\n");
+ return -EINVAL;
+ }
+
+ p->id = strdup(id);
+ p->parent = data->parent;
+ *parent = p;
+
+ return p->id ? 0 : -ENOMEM;
+}
+
+static int add_metric(struct list_head *metric_list,
+ struct pmu_event *pe,
+ bool metric_no_group,
+ struct metric **mp,
+ struct expr_id *parent,
+ struct expr_ids *ids);
+
+static int __resolve_metric(struct metric *m,
+ bool metric_no_group,
+ struct list_head *metric_list,
+ struct pmu_events_map *map,
+ struct expr_ids *ids)
+{
+ struct hashmap_entry *cur;
+ size_t bkt;
+ bool all;
+ int ret;
+
+ /*
+ * Iterate all the parsed IDs and if there's metric,
+ * add it to the context.
+ */
+ do {
+ all = true;
+ hashmap__for_each_entry((&m->pctx.ids), cur, bkt) {
+ struct expr_id *parent;
+ struct pmu_event *pe;
+
+ pe = find_metric(cur->key, map);
+ if (!pe)
+ continue;
+
+ ret = recursion_check(m, cur->key, &parent, ids);
+ if (ret)
+ return ret;
+
+ all = false;
+ /* The metric key itself needs to go out.. */
+ expr__del_id(&m->pctx, cur->key);
+
+ /* ... and it gets resolved to the parent context. */
+ ret = add_metric(metric_list, pe, metric_no_group, &m, parent, ids);
+ if (ret)
+ return ret;
+
+ /*
+ * We added new metric to hashmap, so we need
+ * to break the iteration and start over.
+ */
+ break;
+ }
+ } while (!all);
+
+ return 0;
+}
+
+static int resolve_metric(bool metric_no_group,
+ struct list_head *metric_list,
+ struct pmu_events_map *map,
+ struct expr_ids *ids)
+{
+ struct metric *m;
+ int err;
+
+ list_for_each_entry(m, metric_list, nd) {
+ err = __resolve_metric(m, metric_no_group, metric_list, map, ids);
+ if (err)
+ return err;
+ }
+ return 0;
+}
+
+static int add_metric(struct list_head *metric_list,
+ struct pmu_event *pe,
+ bool metric_no_group,
+ struct metric **m,
+ struct expr_id *parent,
+ struct expr_ids *ids)
+{
+ struct metric *orig = *m;
+ int ret = 0;
+
+ pr_debug("metric expr %s for %s\n", pe->metric_expr, pe->metric_name);
+
+ if (!strstr(pe->metric_expr, "?")) {
+ ret = __add_metric(metric_list, pe, metric_no_group, 1, m, parent, ids);
+ } else {
+ int j, count;
+
+ count = arch_get_runtimeparam();
+
+ /* This loop is added to create multiple
+ * events depend on count value and add
+ * those events to metric_list.
+ */
+
+ for (j = 0; j < count && !ret; j++, *m = orig)
+ ret = __add_metric(metric_list, pe, metric_no_group, j, m, parent, ids);
+ }
+
+ return ret;
+}
+
static int metricgroup__add_metric(const char *metric, bool metric_no_group,
struct strbuf *events,
- struct list_head *group_list)
+ struct list_head *metric_list,
+ struct pmu_events_map *map)
{
- struct pmu_events_map *map = perf_pmu__find_map(NULL);
+ struct expr_ids ids = { .cnt = 0, };
struct pmu_event *pe;
- struct egroup *eg;
+ struct metric *m;
+ LIST_HEAD(list);
int i, ret;
bool has_match = false;
- if (!map)
- return 0;
+ map_for_each_metric(pe, i, map, metric) {
+ has_match = true;
+ m = NULL;
- for (i = 0; ; i++) {
- pe = &map->table[i];
+ ret = add_metric(&list, pe, metric_no_group, &m, NULL, &ids);
+ if (ret)
+ goto out;
- if (!pe->name && !pe->metric_group && !pe->metric_name) {
- /* End of pmu events. */
- if (!has_match)
- return -EINVAL;
- break;
- }
- if (!pe->metric_expr)
- continue;
- if (match_metric(pe->metric_group, metric) ||
- match_metric(pe->metric_name, metric)) {
- has_match = true;
- pr_debug("metric expr %s for %s\n", pe->metric_expr, pe->metric_name);
-
- if (!strstr(pe->metric_expr, "?")) {
- ret = __metricgroup__add_metric(group_list,
- pe,
- metric_no_group,
- 1);
- if (ret)
- return ret;
- } else {
- int j, count;
-
- count = arch_get_runtimeparam();
-
- /* This loop is added to create multiple
- * events depend on count value and add
- * those events to group_list.
- */
+ /*
+ * Process any possible referenced metrics
+ * included in the expression.
+ */
+ ret = resolve_metric(metric_no_group,
+ &list, map, &ids);
+ if (ret)
+ goto out;
+ }
- for (j = 0; j < count; j++) {
- ret = __metricgroup__add_metric(
- group_list, pe,
- metric_no_group, j);
- if (ret)
- return ret;
- }
- }
- }
+ /* End of pmu events. */
+ if (!has_match) {
+ ret = -EINVAL;
+ goto out;
}
- list_for_each_entry(eg, group_list, nd) {
+
+ list_for_each_entry(m, &list, nd) {
if (events->len > 0)
strbuf_addf(events, ",");
- if (eg->has_constraint) {
+ if (m->has_constraint) {
metricgroup__add_metric_non_group(events,
- &eg->pctx);
+ &m->pctx);
} else {
metricgroup__add_metric_weak_group(events,
- &eg->pctx);
+ &m->pctx);
}
}
- return 0;
+
+out:
+ /*
+ * add to metric_list so that they can be released
+ * even if it's failed
+ */
+ list_splice(&list, metric_list);
+ expr_ids__exit(&ids);
+ return ret;
}
static int metricgroup__add_metric_list(const char *list, bool metric_no_group,
struct strbuf *events,
- struct list_head *group_list)
+ struct list_head *metric_list,
+ struct pmu_events_map *map)
{
char *llist, *nlist, *p;
int ret = -EINVAL;
@@ -683,7 +1001,7 @@ static int metricgroup__add_metric_list(const char *list, bool metric_no_group,
while ((p = strsep(&llist, ",")) != NULL) {
ret = metricgroup__add_metric(p, metric_no_group, events,
- group_list);
+ metric_list, map);
if (ret == -EINVAL) {
fprintf(stderr, "Cannot find metric or group `%s'\n",
p);
@@ -698,50 +1016,88 @@ static int metricgroup__add_metric_list(const char *list, bool metric_no_group,
return ret;
}
-static void metricgroup__free_egroups(struct list_head *group_list)
+static void metric__free_refs(struct metric *metric)
{
- struct egroup *eg, *egtmp;
+ struct metric_ref_node *ref, *tmp;
- list_for_each_entry_safe (eg, egtmp, group_list, nd) {
- expr__ctx_clear(&eg->pctx);
- list_del_init(&eg->nd);
- free(eg);
+ list_for_each_entry_safe(ref, tmp, &metric->metric_refs, list) {
+ list_del(&ref->list);
+ free(ref);
}
}
-int metricgroup__parse_groups(const struct option *opt,
- const char *str,
- bool metric_no_group,
- bool metric_no_merge,
- struct rblist *metric_events)
+static void metricgroup__free_metrics(struct list_head *metric_list)
+{
+ struct metric *m, *tmp;
+
+ list_for_each_entry_safe (m, tmp, metric_list, nd) {
+ metric__free_refs(m);
+ expr__ctx_clear(&m->pctx);
+ list_del_init(&m->nd);
+ free(m);
+ }
+}
+
+static int parse_groups(struct evlist *perf_evlist, const char *str,
+ bool metric_no_group,
+ bool metric_no_merge,
+ struct perf_pmu *fake_pmu,
+ struct rblist *metric_events,
+ struct pmu_events_map *map)
{
struct parse_events_error parse_error;
- struct evlist *perf_evlist = *(struct evlist **)opt->value;
struct strbuf extra_events;
- LIST_HEAD(group_list);
+ LIST_HEAD(metric_list);
int ret;
if (metric_events->nr_entries == 0)
metricgroup__rblist_init(metric_events);
ret = metricgroup__add_metric_list(str, metric_no_group,
- &extra_events, &group_list);
+ &extra_events, &metric_list, map);
if (ret)
- return ret;
+ goto out;
pr_debug("adding %s\n", extra_events.buf);
bzero(&parse_error, sizeof(parse_error));
- ret = parse_events(perf_evlist, extra_events.buf, &parse_error);
+ ret = __parse_events(perf_evlist, extra_events.buf, &parse_error, fake_pmu);
if (ret) {
parse_events_print_error(&parse_error, extra_events.buf);
goto out;
}
- strbuf_release(&extra_events);
- ret = metricgroup__setup_events(&group_list, metric_no_merge,
+ ret = metricgroup__setup_events(&metric_list, metric_no_merge,
perf_evlist, metric_events);
out:
- metricgroup__free_egroups(&group_list);
+ metricgroup__free_metrics(&metric_list);
+ strbuf_release(&extra_events);
return ret;
}
+int metricgroup__parse_groups(const struct option *opt,
+ const char *str,
+ bool metric_no_group,
+ bool metric_no_merge,
+ struct rblist *metric_events)
+{
+ struct evlist *perf_evlist = *(struct evlist **)opt->value;
+ struct pmu_events_map *map = perf_pmu__find_map(NULL);
+
+ if (!map)
+ return 0;
+
+ return parse_groups(perf_evlist, str, metric_no_group,
+ metric_no_merge, NULL, metric_events, map);
+}
+
+int metricgroup__parse_groups_test(struct evlist *evlist,
+ struct pmu_events_map *map,
+ const char *str,
+ bool metric_no_group,
+ bool metric_no_merge,
+ struct rblist *metric_events)
+{
+ return parse_groups(evlist, str, metric_no_group,
+ metric_no_merge, &perf_pmu__fake, metric_events, map);
+}
+
bool metricgroup__has_metric(const char *metric)
{
struct pmu_events_map *map = perf_pmu__find_map(NULL);
diff --git a/tools/perf/util/metricgroup.h b/tools/perf/util/metricgroup.h
index 287850bcdeca..62623a39cbec 100644
--- a/tools/perf/util/metricgroup.h
+++ b/tools/perf/util/metricgroup.h
@@ -7,8 +7,10 @@
#include <stdbool.h>
struct evsel;
+struct evlist;
struct option;
struct rblist;
+struct pmu_events_map;
struct metric_event {
struct rb_node nd;
@@ -16,12 +18,18 @@ struct metric_event {
struct list_head head; /* list of metric_expr */
};
+struct metric_ref {
+ const char *metric_name;
+ const char *metric_expr;
+};
+
struct metric_expr {
struct list_head nd;
const char *metric_expr;
const char *metric_name;
const char *metric_unit;
struct evsel **metric_events;
+ struct metric_ref *metric_refs;
int runtime;
};
@@ -34,8 +42,16 @@ int metricgroup__parse_groups(const struct option *opt,
bool metric_no_merge,
struct rblist *metric_events);
+int metricgroup__parse_groups_test(struct evlist *evlist,
+ struct pmu_events_map *map,
+ const char *str,
+ bool metric_no_group,
+ bool metric_no_merge,
+ struct rblist *metric_events);
+
void metricgroup__print(bool metrics, bool groups, char *filter,
bool raw, bool details);
bool metricgroup__has_metric(const char *metric);
int arch_get_runtimeparam(void);
+void metricgroup__rblist_exit(struct rblist *metric_events);
#endif
diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c
index 3decbb203846..667cbca1547a 100644
--- a/tools/perf/util/parse-events.c
+++ b/tools/perf/util/parse-events.c
@@ -37,6 +37,7 @@
#include "util/evsel_config.h"
#include "util/event.h"
#include "util/pfm.h"
+#include "perf.h"
#define MAX_NAME_LEN 100
@@ -410,7 +411,7 @@ static int add_event_tool(struct list_head *list, int *idx,
return -ENOMEM;
evsel->tool_event = tool_event;
if (tool_event == PERF_TOOL_DURATION_TIME)
- evsel->unit = strdup("ns");
+ evsel->unit = "ns";
return 0;
}
@@ -767,8 +768,8 @@ int parse_events_load_bpf_obj(struct parse_events_state *parse_state,
return 0;
errout:
- parse_state->error->help = strdup("(add -v to see detail)");
- parse_state->error->str = strdup(errbuf);
+ parse_events__handle_error(parse_state->error, 0,
+ strdup(errbuf), strdup("(add -v to see detail)"));
return err;
}
@@ -784,36 +785,38 @@ parse_events_config_bpf(struct parse_events_state *parse_state,
return 0;
list_for_each_entry(term, head_config, list) {
- char errbuf[BUFSIZ];
int err;
if (term->type_term != PARSE_EVENTS__TERM_TYPE_USER) {
- snprintf(errbuf, sizeof(errbuf),
- "Invalid config term for BPF object");
- errbuf[BUFSIZ - 1] = '\0';
-
- parse_state->error->idx = term->err_term;
- parse_state->error->str = strdup(errbuf);
+ parse_events__handle_error(parse_state->error, term->err_term,
+ strdup("Invalid config term for BPF object"),
+ NULL);
return -EINVAL;
}
err = bpf__config_obj(obj, term, parse_state->evlist, &error_pos);
if (err) {
+ char errbuf[BUFSIZ];
+ int idx;
+
bpf__strerror_config_obj(obj, term, parse_state->evlist,
&error_pos, err, errbuf,
sizeof(errbuf));
- parse_state->error->help = strdup(
+
+ if (err == -BPF_LOADER_ERRNO__OBJCONF_MAP_VALUE)
+ idx = term->err_val;
+ else
+ idx = term->err_term + error_pos;
+
+ parse_events__handle_error(parse_state->error, idx,
+ strdup(errbuf),
+ strdup(
"Hint:\tValid config terms:\n"
" \tmap:[<arraymap>].value<indices>=[value]\n"
" \tmap:[<eventmap>].event<indices>=[event]\n"
"\n"
" \twhere <indices> is something like [0,3...5] or [all]\n"
-" \t(add -v to see detail)");
- parse_state->error->str = strdup(errbuf);
- if (err == -BPF_LOADER_ERRNO__OBJCONF_MAP_VALUE)
- parse_state->error->idx = term->err_val;
- else
- parse_state->error->idx = term->err_term + error_pos;
+" \t(add -v to see detail)"));
return err;
}
}
@@ -877,8 +880,8 @@ int parse_events_load_bpf(struct parse_events_state *parse_state,
-err, errbuf,
sizeof(errbuf));
- parse_state->error->help = strdup("(add -v to see detail)");
- parse_state->error->str = strdup(errbuf);
+ parse_events__handle_error(parse_state->error, 0,
+ strdup(errbuf), strdup("(add -v to see detail)"));
return err;
}
@@ -1450,7 +1453,7 @@ int parse_events_add_pmu(struct parse_events_state *parse_state,
fprintf(stderr, "' that may result in non-fatal errors\n");
}
- pmu = perf_pmu__find(name);
+ pmu = parse_state->fake_pmu ?: perf_pmu__find(name);
if (!pmu) {
char *err_str;
@@ -1483,7 +1486,7 @@ int parse_events_add_pmu(struct parse_events_state *parse_state,
}
}
- if (perf_pmu__check_alias(pmu, head_config, &info))
+ if (!parse_state->fake_pmu && perf_pmu__check_alias(pmu, head_config, &info))
return -EINVAL;
if (verbose > 1) {
@@ -1516,7 +1519,7 @@ int parse_events_add_pmu(struct parse_events_state *parse_state,
if (pmu->default_config && get_config_chgs(pmu, head_config, &config_terms))
return -ENOMEM;
- if (perf_pmu__config(pmu, &attr, head_config, parse_state->error)) {
+ if (!parse_state->fake_pmu && perf_pmu__config(pmu, &attr, head_config, parse_state->error)) {
struct evsel_config_term *pos, *tmp;
list_for_each_entry_safe(pos, tmp, &config_terms, list) {
@@ -1531,19 +1534,23 @@ int parse_events_add_pmu(struct parse_events_state *parse_state,
evsel = __add_event(list, &parse_state->idx, &attr, true,
get_config_name(head_config), pmu,
&config_terms, auto_merge_stats, NULL);
- if (evsel) {
- evsel->unit = info.unit;
- evsel->scale = info.scale;
- evsel->per_pkg = info.per_pkg;
- evsel->snapshot = info.snapshot;
- evsel->metric_expr = info.metric_expr;
- evsel->metric_name = info.metric_name;
- evsel->pmu_name = name ? strdup(name) : NULL;
- evsel->use_uncore_alias = use_uncore_alias;
- evsel->percore = config_term_percore(&evsel->config_terms);
- }
+ if (!evsel)
+ return -ENOMEM;
- return evsel ? 0 : -ENOMEM;
+ evsel->pmu_name = name ? strdup(name) : NULL;
+ evsel->use_uncore_alias = use_uncore_alias;
+ evsel->percore = config_term_percore(&evsel->config_terms);
+
+ if (parse_state->fake_pmu)
+ return 0;
+
+ evsel->unit = info.unit;
+ evsel->scale = info.scale;
+ evsel->per_pkg = info.per_pkg;
+ evsel->snapshot = info.snapshot;
+ evsel->metric_expr = info.metric_expr;
+ evsel->metric_name = info.metric_name;
+ return 0;
}
int parse_events_multi_pmu_add(struct parse_events_state *parse_state,
@@ -1792,6 +1799,8 @@ static int get_event_modifier(struct event_modifier *mod, char *str,
if (*str == 'u') {
if (!exclude)
exclude = eu = ek = eh = 1;
+ if (!exclude_GH && !perf_guest)
+ eG = 1;
eu = 0;
} else if (*str == 'k') {
if (!exclude)
@@ -2017,6 +2026,32 @@ err:
perf_pmu__parse_cleanup();
}
+/*
+ * This function injects special term in
+ * perf_pmu_events_list so the test code
+ * can check on this functionality.
+ */
+int perf_pmu__test_parse_init(void)
+{
+ struct perf_pmu_event_symbol *list;
+
+ list = malloc(sizeof(*list) * 1);
+ if (!list)
+ return -ENOMEM;
+
+ list->type = PMU_EVENT_SYMBOL;
+ list->symbol = strdup("read");
+
+ if (!list->symbol) {
+ free(list);
+ return -ENOMEM;
+ }
+
+ perf_pmu_events_list = list;
+ perf_pmu_events_list_num = 1;
+ return 0;
+}
+
enum perf_pmu_event_symbol_type
perf_pmu__parse_check(const char *name)
{
@@ -2078,6 +2113,8 @@ int parse_events_terms(struct list_head *terms, const char *str)
int ret;
ret = parse_events__scanner(str, &parse_state);
+ perf_pmu__parse_cleanup();
+
if (!ret) {
list_splice(parse_state.terms, terms);
zfree(&parse_state.terms);
@@ -2088,15 +2125,16 @@ int parse_events_terms(struct list_head *terms, const char *str)
return ret;
}
-int parse_events(struct evlist *evlist, const char *str,
- struct parse_events_error *err)
+int __parse_events(struct evlist *evlist, const char *str,
+ struct parse_events_error *err, struct perf_pmu *fake_pmu)
{
struct parse_events_state parse_state = {
- .list = LIST_HEAD_INIT(parse_state.list),
- .idx = evlist->core.nr_entries,
- .error = err,
- .evlist = evlist,
- .stoken = PE_START_EVENTS,
+ .list = LIST_HEAD_INIT(parse_state.list),
+ .idx = evlist->core.nr_entries,
+ .error = err,
+ .evlist = evlist,
+ .stoken = PE_START_EVENTS,
+ .fake_pmu = fake_pmu,
};
int ret;
diff --git a/tools/perf/util/parse-events.h b/tools/perf/util/parse-events.h
index 1fe23a2f9b36..00cde7d2e30c 100644
--- a/tools/perf/util/parse-events.h
+++ b/tools/perf/util/parse-events.h
@@ -33,8 +33,15 @@ const char *event_type(int type);
int parse_events_option(const struct option *opt, const char *str, int unset);
int parse_events_option_new_evlist(const struct option *opt, const char *str, int unset);
-int parse_events(struct evlist *evlist, const char *str,
- struct parse_events_error *error);
+int __parse_events(struct evlist *evlist, const char *str, struct parse_events_error *error,
+ struct perf_pmu *fake_pmu);
+
+static inline int parse_events(struct evlist *evlist, const char *str,
+ struct parse_events_error *err)
+{
+ return __parse_events(evlist, str, err, NULL);
+}
+
int parse_events_terms(struct list_head *terms, const char *str);
int parse_filter(const struct option *opt, const char *str, int unset);
int exclude_perf(const struct option *opt, const char *arg, int unset);
@@ -127,9 +134,10 @@ struct parse_events_state {
int idx;
int nr_groups;
struct parse_events_error *error;
- struct evlist *evlist;
+ struct evlist *evlist;
struct list_head *terms;
int stoken;
+ struct perf_pmu *fake_pmu;
};
void parse_events__handle_error(struct parse_events_error *err, int idx,
@@ -253,4 +261,6 @@ static inline bool is_sdt_event(char *str __maybe_unused)
}
#endif /* HAVE_LIBELF_SUPPORT */
+int perf_pmu__test_parse_init(void);
+
#endif /* __PERF_PARSE_EVENTS_H */
diff --git a/tools/perf/util/parse-events.l b/tools/perf/util/parse-events.l
index 002802e17059..3ca5fd2829ca 100644
--- a/tools/perf/util/parse-events.l
+++ b/tools/perf/util/parse-events.l
@@ -41,14 +41,6 @@ static int value(yyscan_t scanner, int base)
return __value(yylval, text, base, PE_VALUE);
}
-static int raw(yyscan_t scanner)
-{
- YYSTYPE *yylval = parse_events_get_lval(scanner);
- char *text = parse_events_get_text(scanner);
-
- return __value(yylval, text + 1, 16, PE_RAW);
-}
-
static int str(yyscan_t scanner, int token)
{
YYSTYPE *yylval = parse_events_get_lval(scanner);
@@ -72,6 +64,17 @@ static int str(yyscan_t scanner, int token)
return token;
}
+static int raw(yyscan_t scanner)
+{
+ YYSTYPE *yylval = parse_events_get_lval(scanner);
+ char *text = parse_events_get_text(scanner);
+
+ if (perf_pmu__parse_check(text) == PMU_EVENT_SYMBOL)
+ return str(scanner, PE_NAME);
+
+ return __value(yylval, text + 1, 16, PE_RAW);
+}
+
static bool isbpf_suffix(char *text)
{
int len = strlen(text);
@@ -129,12 +132,16 @@ do { \
yyless(0); \
} while (0)
-static int pmu_str_check(yyscan_t scanner)
+static int pmu_str_check(yyscan_t scanner, struct parse_events_state *parse_state)
{
YYSTYPE *yylval = parse_events_get_lval(scanner);
char *text = parse_events_get_text(scanner);
yylval->str = strdup(text);
+
+ if (parse_state->fake_pmu)
+ return PE_PMU_EVENT_FAKE;
+
switch (perf_pmu__parse_check(text)) {
case PMU_EVENT_SYMBOL_PREFIX:
return PE_PMU_EVENT_PRE;
@@ -289,6 +296,7 @@ percore { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_PERCORE); }
aux-output { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_AUX_OUTPUT); }
aux-sample-size { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_AUX_SAMPLE_SIZE); }
r{num_raw_hex} { return raw(yyscanner); }
+r0x{num_raw_hex} { return raw(yyscanner); }
, { return ','; }
"/" { BEGIN(INITIAL); return '/'; }
{name_minus} { return str(yyscanner, PE_NAME); }
@@ -376,7 +384,7 @@ r{num_raw_hex} { return raw(yyscanner); }
{modifier_event} { return str(yyscanner, PE_MODIFIER_EVENT); }
{bpf_object} { if (!isbpf(yyscanner)) { USER_REJECT }; return str(yyscanner, PE_BPF_OBJECT); }
{bpf_source} { if (!isbpf(yyscanner)) { USER_REJECT }; return str(yyscanner, PE_BPF_SOURCE); }
-{name} { return pmu_str_check(yyscanner); }
+{name} { return pmu_str_check(yyscanner, _parse_state); }
{name_tag} { return str(yyscanner, PE_NAME); }
"/" { BEGIN(config); return '/'; }
- { return '-'; }
diff --git a/tools/perf/util/parse-events.y b/tools/perf/util/parse-events.y
index acef87d9af58..645bf4f1859f 100644
--- a/tools/perf/util/parse-events.y
+++ b/tools/perf/util/parse-events.y
@@ -69,7 +69,7 @@ static void inc_group_count(struct list_head *list,
%token PE_NAME_CACHE_TYPE PE_NAME_CACHE_OP_RESULT
%token PE_PREFIX_MEM PE_PREFIX_RAW PE_PREFIX_GROUP
%token PE_ERROR
-%token PE_PMU_EVENT_PRE PE_PMU_EVENT_SUF PE_KERNEL_PMU_EVENT
+%token PE_PMU_EVENT_PRE PE_PMU_EVENT_SUF PE_KERNEL_PMU_EVENT PE_PMU_EVENT_FAKE
%token PE_ARRAY_ALL PE_ARRAY_RANGE
%token PE_DRV_CFG_TERM
%type <num> PE_VALUE
@@ -87,7 +87,7 @@ static void inc_group_count(struct list_head *list,
%type <str> PE_MODIFIER_EVENT
%type <str> PE_MODIFIER_BP
%type <str> PE_EVENT_NAME
-%type <str> PE_PMU_EVENT_PRE PE_PMU_EVENT_SUF PE_KERNEL_PMU_EVENT
+%type <str> PE_PMU_EVENT_PRE PE_PMU_EVENT_SUF PE_KERNEL_PMU_EVENT PE_PMU_EVENT_FAKE
%type <str> PE_DRV_CFG_TERM
%destructor { free ($$); } <str>
%type <term> event_term
@@ -356,6 +356,43 @@ PE_PMU_EVENT_PRE '-' PE_PMU_EVENT_SUF sep_dc
YYABORT;
$$ = list;
}
+|
+PE_PMU_EVENT_FAKE sep_dc
+{
+ struct list_head *list;
+ int err;
+
+ list = alloc_list();
+ if (!list)
+ YYABORT;
+
+ err = parse_events_add_pmu(_parse_state, list, $1, NULL, false, false);
+ free($1);
+ if (err < 0) {
+ free(list);
+ YYABORT;
+ }
+ $$ = list;
+}
+|
+PE_PMU_EVENT_FAKE opt_pmu_config
+{
+ struct list_head *list;
+ int err;
+
+ list = alloc_list();
+ if (!list)
+ YYABORT;
+
+ err = parse_events_add_pmu(_parse_state, list, $1, $2, false, false);
+ free($1);
+ parse_events_terms__delete($2);
+ if (err < 0) {
+ free(list);
+ YYABORT;
+ }
+ $$ = list;
+}
value_sym:
PE_VALUE_SYM_HW
@@ -474,7 +511,7 @@ PE_PREFIX_MEM PE_VALUE '/' PE_VALUE ':' PE_MODIFIER_BP sep_dc
list = alloc_list();
ABORT_ON(!list);
err = parse_events_add_breakpoint(list, &parse_state->idx,
- (void *) $2, $6, $4);
+ (void *)(uintptr_t) $2, $6, $4);
free($6);
if (err) {
free(list);
@@ -491,7 +528,7 @@ PE_PREFIX_MEM PE_VALUE '/' PE_VALUE sep_dc
list = alloc_list();
ABORT_ON(!list);
if (parse_events_add_breakpoint(list, &parse_state->idx,
- (void *) $2, NULL, $4)) {
+ (void *)(uintptr_t) $2, NULL, $4)) {
free(list);
YYABORT;
}
@@ -507,7 +544,7 @@ PE_PREFIX_MEM PE_VALUE ':' PE_MODIFIER_BP sep_dc
list = alloc_list();
ABORT_ON(!list);
err = parse_events_add_breakpoint(list, &parse_state->idx,
- (void *) $2, $4, 0);
+ (void *)(uintptr_t) $2, $4, 0);
free($4);
if (err) {
free(list);
@@ -524,7 +561,7 @@ PE_PREFIX_MEM PE_VALUE sep_dc
list = alloc_list();
ABORT_ON(!list);
if (parse_events_add_breakpoint(list, &parse_state->idx,
- (void *) $2, NULL, 0)) {
+ (void *)(uintptr_t) $2, NULL, 0)) {
free(list);
YYABORT;
}
diff --git a/tools/perf/util/parse-sublevel-options.c b/tools/perf/util/parse-sublevel-options.c
new file mode 100644
index 000000000000..a841d17ffd57
--- /dev/null
+++ b/tools/perf/util/parse-sublevel-options.c
@@ -0,0 +1,70 @@
+#include <stdlib.h>
+#include <stdint.h>
+#include <string.h>
+#include <stdio.h>
+
+#include "util/debug.h"
+#include "util/parse-sublevel-options.h"
+
+static int parse_one_sublevel_option(const char *str,
+ struct sublevel_option *opts)
+{
+ struct sublevel_option *opt = opts;
+ char *vstr, *s = strdup(str);
+ int v = 1;
+
+ if (!s) {
+ pr_err("no memory\n");
+ return -1;
+ }
+
+ vstr = strchr(s, '=');
+ if (vstr)
+ *vstr++ = 0;
+
+ while (opt->name) {
+ if (!strcmp(s, opt->name))
+ break;
+ opt++;
+ }
+
+ if (!opt->name) {
+ pr_err("Unknown option name '%s'\n", s);
+ free(s);
+ return -1;
+ }
+
+ if (vstr)
+ v = atoi(vstr);
+
+ *opt->value_ptr = v;
+ free(s);
+ return 0;
+}
+
+/* parse options like --foo a=<n>,b,c... */
+int perf_parse_sublevel_options(const char *str, struct sublevel_option *opts)
+{
+ char *s = strdup(str);
+ char *p = NULL;
+ int ret;
+
+ if (!s) {
+ pr_err("no memory\n");
+ return -1;
+ }
+
+ p = strtok(s, ",");
+ while (p) {
+ ret = parse_one_sublevel_option(p, opts);
+ if (ret) {
+ free(s);
+ return ret;
+ }
+
+ p = strtok(NULL, ",");
+ }
+
+ free(s);
+ return 0;
+}
diff --git a/tools/perf/util/parse-sublevel-options.h b/tools/perf/util/parse-sublevel-options.h
new file mode 100644
index 000000000000..9b9efcc2aaad
--- /dev/null
+++ b/tools/perf/util/parse-sublevel-options.h
@@ -0,0 +1,11 @@
+#ifndef _PERF_PARSE_SUBLEVEL_OPTIONS_H
+#define _PERF_PARSE_SUBLEVEL_OPTIONS_H
+
+struct sublevel_option {
+ const char *name;
+ int *value_ptr;
+};
+
+int perf_parse_sublevel_options(const char *str, struct sublevel_option *opts);
+
+#endif \ No newline at end of file
diff --git a/tools/perf/util/perf_api_probe.c b/tools/perf/util/perf_api_probe.c
index 1337965673d7..3840d02f0f7b 100644
--- a/tools/perf/util/perf_api_probe.c
+++ b/tools/perf/util/perf_api_probe.c
@@ -93,6 +93,11 @@ static void perf_probe_context_switch(struct evsel *evsel)
evsel->core.attr.context_switch = 1;
}
+static void perf_probe_text_poke(struct evsel *evsel)
+{
+ evsel->core.attr.text_poke = 1;
+}
+
bool perf_can_sample_identifier(void)
{
return perf_probe_api(perf_probe_sample_identifier);
@@ -108,6 +113,11 @@ bool perf_can_record_switch_events(void)
return perf_probe_api(perf_probe_context_switch);
}
+bool perf_can_record_text_poke_events(void)
+{
+ return perf_probe_api(perf_probe_text_poke);
+}
+
bool perf_can_record_cpu_wide(void)
{
struct perf_event_attr attr = {
diff --git a/tools/perf/util/perf_api_probe.h b/tools/perf/util/perf_api_probe.h
index 706c3c6426e2..d5506a983a94 100644
--- a/tools/perf/util/perf_api_probe.h
+++ b/tools/perf/util/perf_api_probe.h
@@ -9,6 +9,7 @@ bool perf_can_aux_sample(void);
bool perf_can_comm_exec(void);
bool perf_can_record_cpu_wide(void);
bool perf_can_record_switch_events(void);
+bool perf_can_record_text_poke_events(void);
bool perf_can_sample_identifier(void);
#endif // __PERF_API_PROBE_H
diff --git a/tools/perf/util/perf_event_attr_fprintf.c b/tools/perf/util/perf_event_attr_fprintf.c
index b94fa07f5d32..e67a227c0ce7 100644
--- a/tools/perf/util/perf_event_attr_fprintf.c
+++ b/tools/perf/util/perf_event_attr_fprintf.c
@@ -147,6 +147,7 @@ int perf_event_attr__fprintf(FILE *fp, struct perf_event_attr *attr,
PRINT_ATTRf(aux_watermark, p_unsigned);
PRINT_ATTRf(sample_max_stack, p_unsigned);
PRINT_ATTRf(aux_sample_size, p_unsigned);
+ PRINT_ATTRf(text_poke, p_unsigned);
return ret;
}
diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c
index 93fe72a9dc0b..d41caeb35cf6 100644
--- a/tools/perf/util/pmu.c
+++ b/tools/perf/util/pmu.c
@@ -26,6 +26,8 @@
#include "strbuf.h"
#include "fncache.h"
+struct perf_pmu perf_pmu__fake;
+
struct perf_pmu_format {
char *name;
int value;
@@ -272,7 +274,7 @@ static void perf_pmu_update_alias(struct perf_pmu_alias *old,
}
/* Delete an alias entry. */
-static void perf_pmu_free_alias(struct perf_pmu_alias *newalias)
+void perf_pmu_free_alias(struct perf_pmu_alias *newalias)
{
zfree(&newalias->name);
zfree(&newalias->desc);
@@ -1352,6 +1354,17 @@ void perf_pmu__set_format(unsigned long *bits, long from, long to)
set_bit(b, bits);
}
+void perf_pmu__del_formats(struct list_head *formats)
+{
+ struct perf_pmu_format *fmt, *tmp;
+
+ list_for_each_entry_safe(fmt, tmp, formats, list) {
+ list_del(&fmt->list);
+ free(fmt->name);
+ free(fmt);
+ }
+}
+
static int sub_non_neg(int a, int b)
{
if (b > a)
@@ -1400,6 +1413,7 @@ struct sevent {
char *pmu;
char *metric_expr;
char *metric_name;
+ int is_cpu;
};
static int cmp_sevent(const void *a, const void *b)
@@ -1416,6 +1430,11 @@ static int cmp_sevent(const void *a, const void *b)
if (n)
return n;
}
+
+ /* Order CPU core events to be first */
+ if (as->is_cpu != bs->is_cpu)
+ return bs->is_cpu - as->is_cpu;
+
return strcmp(as->name, bs->name);
}
@@ -1475,7 +1494,7 @@ void print_pmu_events(const char *event_glob, bool name_only, bool quiet_flag,
list_for_each_entry(alias, &pmu->aliases, list) {
char *name = alias->desc ? alias->name :
format_alias(buf, sizeof(buf), pmu, alias);
- bool is_cpu = !strcmp(pmu->name, "cpu");
+ bool is_cpu = is_pmu_core(pmu->name);
if (alias->deprecated && !deprecated)
continue;
@@ -1507,6 +1526,7 @@ void print_pmu_events(const char *event_glob, bool name_only, bool quiet_flag,
aliases[j].pmu = pmu->name;
aliases[j].metric_expr = alias->metric_expr;
aliases[j].metric_name = alias->metric_name;
+ aliases[j].is_cpu = is_cpu;
j++;
}
if (pmu->selectable &&
diff --git a/tools/perf/util/pmu.h b/tools/perf/util/pmu.h
index f971d9aa4570..a64e9c9ce731 100644
--- a/tools/perf/util/pmu.h
+++ b/tools/perf/util/pmu.h
@@ -43,6 +43,8 @@ struct perf_pmu {
struct list_head list; /* ELEM */
};
+extern struct perf_pmu perf_pmu__fake;
+
struct perf_pmu_info {
const char *unit;
const char *metric_expr;
@@ -92,6 +94,7 @@ int perf_pmu__new_format(struct list_head *list, char *name,
int config, unsigned long *bits);
void perf_pmu__set_format(unsigned long *bits, long from, long to);
int perf_pmu__format_parse(char *dir, struct list_head *head);
+void perf_pmu__del_formats(struct list_head *formats);
struct perf_pmu *perf_pmu__scan(struct perf_pmu *pmu);
@@ -111,6 +114,7 @@ void pmu_add_cpu_aliases_map(struct list_head *head, struct perf_pmu *pmu,
struct pmu_events_map *perf_pmu__find_map(struct perf_pmu *pmu);
bool pmu_uncore_alias_match(const char *pmu_name, const char *name);
+void perf_pmu_free_alias(struct perf_pmu_alias *alias);
int perf_pmu__convert_scale(const char *scale, char **end, double *sval);
diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c
index df713a5d1e26..99d36ac77c08 100644
--- a/tools/perf/util/probe-event.c
+++ b/tools/perf/util/probe-event.c
@@ -375,9 +375,13 @@ static int find_alternative_probe_point(struct debuginfo *dinfo,
/* Find the address of given function */
map__for_each_symbol_by_name(map, pp->function, sym) {
- if (uprobes)
+ if (uprobes) {
address = sym->start;
- else
+ if (sym->type == STT_GNU_IFUNC)
+ pr_warning("Warning: The probe function (%s) is a GNU indirect function.\n"
+ "Consider identifying the final function used at run time and set the probe directly on that.\n",
+ pp->function);
+ } else
address = map->unmap_ip(map, sym->start) - map->reloc;
break;
}
@@ -2968,6 +2972,16 @@ static int find_probe_trace_events_from_map(struct perf_probe_event *pev,
for (j = 0; j < num_matched_functions; j++) {
sym = syms[j];
+ /* There can be duplicated symbols in the map */
+ for (i = 0; i < j; i++)
+ if (sym->start == syms[i]->start) {
+ pr_debug("Found duplicated symbol %s @ %" PRIx64 "\n",
+ sym->name, sym->start);
+ break;
+ }
+ if (i != j)
+ continue;
+
tev = (*tevs) + ret;
tp = &tev->point;
if (ret == num_matched_functions) {
diff --git a/tools/perf/util/probe-finder.c b/tools/perf/util/probe-finder.c
index 55924255c535..659024342e9a 100644
--- a/tools/perf/util/probe-finder.c
+++ b/tools/perf/util/probe-finder.c
@@ -1408,6 +1408,9 @@ static int fill_empty_trace_arg(struct perf_probe_event *pev,
char *type;
int i, j, ret;
+ if (!ntevs)
+ return -ENOENT;
+
for (i = 0; i < pev->nargs; i++) {
type = NULL;
for (j = 0; j < ntevs; j++) {
@@ -1464,7 +1467,7 @@ int debuginfo__find_trace_events(struct debuginfo *dbg,
if (ret >= 0 && tf.pf.skip_empty_arg)
ret = fill_empty_trace_arg(pev, tf.tevs, tf.ntevs);
- if (ret < 0) {
+ if (ret < 0 || tf.ntevs == 0) {
for (i = 0; i < tf.ntevs; i++)
clear_probe_trace_event(&tf.tevs[i]);
zfree(tevs);
diff --git a/tools/perf/util/record.c b/tools/perf/util/record.c
index a4cc11592f6b..ea9aa1d7cf50 100644
--- a/tools/perf/util/record.c
+++ b/tools/perf/util/record.c
@@ -2,6 +2,7 @@
#include "debug.h"
#include "evlist.h"
#include "evsel.h"
+#include "evsel_config.h"
#include "parse-events.h"
#include <errno.h>
#include <limits.h>
@@ -33,11 +34,24 @@ static struct evsel *evsel__read_sampler(struct evsel *evsel, struct evlist *evl
return leader;
}
+static u64 evsel__config_term_mask(struct evsel *evsel)
+{
+ struct evsel_config_term *term;
+ struct list_head *config_terms = &evsel->config_terms;
+ u64 term_types = 0;
+
+ list_for_each_entry(term, config_terms, list) {
+ term_types |= 1 << term->type;
+ }
+ return term_types;
+}
+
static void evsel__config_leader_sampling(struct evsel *evsel, struct evlist *evlist)
{
struct perf_event_attr *attr = &evsel->core.attr;
struct evsel *leader = evsel->leader;
struct evsel *read_sampler;
+ u64 term_types, freq_mask;
if (!leader->sample_read)
return;
@@ -47,16 +61,20 @@ static void evsel__config_leader_sampling(struct evsel *evsel, struct evlist *ev
if (evsel == read_sampler)
return;
+ term_types = evsel__config_term_mask(evsel);
/*
- * Disable sampling for all group members other than the leader in
- * case the leader 'leads' the sampling, except when the leader is an
- * AUX area event, in which case the 2nd event in the group is the one
- * that 'leads' the sampling.
+ * Disable sampling for all group members except those with explicit
+ * config terms or the leader. In the case of an AUX area event, the 2nd
+ * event in the group is the one that 'leads' the sampling.
*/
- attr->freq = 0;
- attr->sample_freq = 0;
- attr->sample_period = 0;
- attr->write_backward = 0;
+ freq_mask = (1 << EVSEL__CONFIG_TERM_FREQ) | (1 << EVSEL__CONFIG_TERM_PERIOD);
+ if ((term_types & freq_mask) == 0) {
+ attr->freq = 0;
+ attr->sample_freq = 0;
+ attr->sample_period = 0;
+ }
+ if ((term_types & (1 << EVSEL__CONFIG_TERM_OVERWRITE)) == 0)
+ attr->write_backward = 0;
/*
* We don't get a sample for slave events, we make them when delivering
diff --git a/tools/perf/util/record.h b/tools/perf/util/record.h
index 39d1de4b2a36..03678ff25539 100644
--- a/tools/perf/util/record.h
+++ b/tools/perf/util/record.h
@@ -48,6 +48,7 @@ struct record_opts {
bool sample_id;
bool no_bpf_event;
bool kcore;
+ bool text_poke;
unsigned int freq;
unsigned int mmap_pages;
unsigned int auxtrace_mmap_pages;
@@ -61,7 +62,7 @@ struct record_opts {
const char *auxtrace_snapshot_opts;
const char *auxtrace_sample_opts;
bool sample_transaction;
- unsigned initial_delay;
+ int initial_delay;
bool use_clockid;
clockid_t clockid;
u64 clockid_res_ns;
@@ -70,6 +71,8 @@ struct record_opts {
int mmap_flush;
unsigned int comp_level;
unsigned int nr_threads_synthesize;
+ int ctl_fd;
+ int ctl_fd_ack;
};
extern const char * const *record_usage;
diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
index 1a157e84a04a..7a5f03764702 100644
--- a/tools/perf/util/session.c
+++ b/tools/perf/util/session.c
@@ -87,7 +87,7 @@ static int perf_session__process_compressed_event(struct perf_session *session,
session->decomp_last = decomp;
}
- pr_debug("decomp (B): %ld to %ld\n", src_size, decomp_size);
+ pr_debug("decomp (B): %zd to %zd\n", src_size, decomp_size);
return 0;
}
@@ -115,12 +115,12 @@ static int perf_session__open(struct perf_session *session)
if (perf_header__has_feat(&session->header, HEADER_STAT))
return 0;
- if (!perf_evlist__valid_sample_type(session->evlist)) {
+ if (!evlist__valid_sample_type(session->evlist)) {
pr_err("non matching sample_type\n");
return -1;
}
- if (!perf_evlist__valid_sample_id_all(session->evlist)) {
+ if (!evlist__valid_sample_id_all(session->evlist)) {
pr_err("non matching sample_id_all\n");
return -1;
}
@@ -252,10 +252,10 @@ struct perf_session *perf_session__new(struct perf_data *data,
/*
* In pipe-mode, evlist is empty until PERF_RECORD_HEADER_ATTR is
- * processed, so perf_evlist__sample_id_all is not meaningful here.
+ * processed, so evlist__sample_id_all is not meaningful here.
*/
if ((!data || !data->is_pipe) && tool && tool->ordering_requires_timestamps &&
- tool->ordered_events && !perf_evlist__sample_id_all(session->evlist)) {
+ tool->ordered_events && !evlist__sample_id_all(session->evlist)) {
dump_printf("WARNING: No sample_id_all support, falling back to unordered processing\n");
tool->ordered_events = false;
}
@@ -490,6 +490,8 @@ void perf_tool__fill_defaults(struct perf_tool *tool)
tool->ksymbol = perf_event__process_ksymbol;
if (tool->bpf == NULL)
tool->bpf = perf_event__process_bpf;
+ if (tool->text_poke == NULL)
+ tool->text_poke = perf_event__process_text_poke;
if (tool->read == NULL)
tool->read = process_event_sample_stub;
if (tool->throttle == NULL)
@@ -659,6 +661,24 @@ static void perf_event__switch_swap(union perf_event *event, bool sample_id_all)
swap_sample_id_all(event, &event->context_switch + 1);
}
+static void perf_event__text_poke_swap(union perf_event *event, bool sample_id_all)
+{
+ event->text_poke.addr = bswap_64(event->text_poke.addr);
+ event->text_poke.old_len = bswap_16(event->text_poke.old_len);
+ event->text_poke.new_len = bswap_16(event->text_poke.new_len);
+
+ if (sample_id_all) {
+ size_t len = sizeof(event->text_poke.old_len) +
+ sizeof(event->text_poke.new_len) +
+ event->text_poke.old_len +
+ event->text_poke.new_len;
+ void *data = &event->text_poke.old_len;
+
+ data += PERF_ALIGN(len, sizeof(u64));
+ swap_sample_id_all(event, data);
+ }
+}
+
static void perf_event__throttle_swap(union perf_event *event,
bool sample_id_all)
{
@@ -932,6 +952,7 @@ static perf_event__swap_op perf_event__swap_ops[] = {
[PERF_RECORD_SWITCH] = perf_event__switch_swap,
[PERF_RECORD_SWITCH_CPU_WIDE] = perf_event__switch_swap,
[PERF_RECORD_NAMESPACES] = perf_event__namespaces_swap,
+ [PERF_RECORD_TEXT_POKE] = perf_event__text_poke_swap,
[PERF_RECORD_HEADER_ATTR] = perf_event__hdr_attr_swap,
[PERF_RECORD_HEADER_EVENT_TYPE] = perf_event__event_type_swap,
[PERF_RECORD_HEADER_TRACING_DATA] = perf_event__tracing_data_swap,
@@ -1160,10 +1181,10 @@ static void perf_evlist__print_tstamp(struct evlist *evlist,
union perf_event *event,
struct perf_sample *sample)
{
- u64 sample_type = __perf_evlist__combined_sample_type(evlist);
+ u64 sample_type = __evlist__combined_sample_type(evlist);
if (event->header.type != PERF_RECORD_SAMPLE &&
- !perf_evlist__sample_id_all(evlist)) {
+ !evlist__sample_id_all(evlist)) {
fputs("-1 -1 ", stdout);
return;
}
@@ -1474,6 +1495,8 @@ static int machines__deliver_event(struct machines *machines,
return tool->ksymbol(tool, event, sample, machine);
case PERF_RECORD_BPF_EVENT:
return tool->bpf(tool, event, sample, machine);
+ case PERF_RECORD_TEXT_POKE:
+ return tool->text_poke(tool, event, sample, machine);
default:
++evlist->stats.nr_unknown_events;
return -1;
@@ -1655,7 +1678,7 @@ int perf_session__peek_event(struct perf_session *session, off_t file_offset,
return -1;
if (session->header.needs_swap)
- event_swap(event, perf_evlist__sample_id_all(session->evlist));
+ event_swap(event, evlist__sample_id_all(session->evlist));
out_parse_sample:
@@ -1704,7 +1727,7 @@ static s64 perf_session__process_event(struct perf_session *session,
int ret;
if (session->header.needs_swap)
- event_swap(event, perf_evlist__sample_id_all(evlist));
+ event_swap(event, evlist__sample_id_all(evlist));
if (event->header.type >= PERF_RECORD_HEADER_MAX)
return -EINVAL;
diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c
index 57d0706e1330..493ec372fdec 100644
--- a/tools/perf/util/stat-display.c
+++ b/tools/perf/util/stat-display.c
@@ -117,7 +117,7 @@ static void aggr_printout(struct perf_stat_config *config,
cpu_map__id_to_die(id),
config->csv_output ? 0 : -3,
cpu_map__id_to_cpu(id), config->csv_sep);
- } else {
+ } else if (id > -1) {
fprintf(config->output, "CPU%*d%s",
config->csv_output ? 0 : -7,
evsel__cpus(evsel)->map[id],
diff --git a/tools/perf/util/stat-shadow.c b/tools/perf/util/stat-shadow.c
index a7c13a88ecb9..924b54d15d54 100644
--- a/tools/perf/util/stat-shadow.c
+++ b/tools/perf/util/stat-shadow.c
@@ -517,7 +517,7 @@ static void print_l1_dcache_misses(struct perf_stat_config *config,
color = get_ratio_color(GRC_CACHE_MISSES, ratio);
- out->print_metric(config, out->ctx, color, "%7.2f%%", "of all L1-dcache hits", ratio);
+ out->print_metric(config, out->ctx, color, "%7.2f%%", "of all L1-dcache accesses", ratio);
}
static void print_l1_icache_misses(struct perf_stat_config *config,
@@ -538,7 +538,7 @@ static void print_l1_icache_misses(struct perf_stat_config *config,
ratio = avg / total * 100.0;
color = get_ratio_color(GRC_CACHE_MISSES, ratio);
- out->print_metric(config, out->ctx, color, "%7.2f%%", "of all L1-icache hits", ratio);
+ out->print_metric(config, out->ctx, color, "%7.2f%%", "of all L1-icache accesses", ratio);
}
static void print_dtlb_cache_misses(struct perf_stat_config *config,
@@ -558,7 +558,7 @@ static void print_dtlb_cache_misses(struct perf_stat_config *config,
ratio = avg / total * 100.0;
color = get_ratio_color(GRC_CACHE_MISSES, ratio);
- out->print_metric(config, out->ctx, color, "%7.2f%%", "of all dTLB cache hits", ratio);
+ out->print_metric(config, out->ctx, color, "%7.2f%%", "of all dTLB cache accesses", ratio);
}
static void print_itlb_cache_misses(struct perf_stat_config *config,
@@ -578,7 +578,7 @@ static void print_itlb_cache_misses(struct perf_stat_config *config,
ratio = avg / total * 100.0;
color = get_ratio_color(GRC_CACHE_MISSES, ratio);
- out->print_metric(config, out->ctx, color, "%7.2f%%", "of all iTLB cache hits", ratio);
+ out->print_metric(config, out->ctx, color, "%7.2f%%", "of all iTLB cache accesses", ratio);
}
static void print_ll_cache_misses(struct perf_stat_config *config,
@@ -598,7 +598,7 @@ static void print_ll_cache_misses(struct perf_stat_config *config,
ratio = avg / total * 100.0;
color = get_ratio_color(GRC_CACHE_MISSES, ratio);
- out->print_metric(config, out->ctx, color, "%7.2f%%", "of all LL-cache hits", ratio);
+ out->print_metric(config, out->ctx, color, "%7.2f%%", "of all LL-cache accesses", ratio);
}
/*
@@ -730,25 +730,17 @@ static void print_smi_cost(struct perf_stat_config *config,
out->print_metric(config, out->ctx, NULL, "%4.0f", "SMI#", smi_num);
}
-static void generic_metric(struct perf_stat_config *config,
- const char *metric_expr,
- struct evsel **metric_events,
- char *name,
- const char *metric_name,
- const char *metric_unit,
- int runtime,
- int cpu,
- struct perf_stat_output_ctx *out,
- struct runtime_stat *st)
+static int prepare_metric(struct evsel **metric_events,
+ struct metric_ref *metric_refs,
+ struct expr_parse_ctx *pctx,
+ int cpu,
+ struct runtime_stat *st)
{
- print_metric_t print_metric = out->print_metric;
- struct expr_parse_ctx pctx;
- double ratio, scale;
- int i;
- void *ctxp = out->ctx;
+ double scale;
char *n, *pn;
+ int i, j, ret;
- expr__ctx_init(&pctx);
+ expr__ctx_init(pctx);
for (i = 0; metric_events[i]; i++) {
struct saved_value *v;
struct stats *stats;
@@ -771,7 +763,7 @@ static void generic_metric(struct perf_stat_config *config,
n = strdup(metric_events[i]->name);
if (!n)
- return;
+ return -ENOMEM;
/*
* This display code with --no-merge adds [cpu] postfixes.
* These are not supported by the parser. Remove everything
@@ -782,11 +774,42 @@ static void generic_metric(struct perf_stat_config *config,
*pn = 0;
if (metric_total)
- expr__add_id(&pctx, n, metric_total);
+ expr__add_id_val(pctx, n, metric_total);
else
- expr__add_id(&pctx, n, avg_stats(stats)*scale);
+ expr__add_id_val(pctx, n, avg_stats(stats)*scale);
}
+ for (j = 0; metric_refs && metric_refs[j].metric_name; j++) {
+ ret = expr__add_ref(pctx, &metric_refs[j]);
+ if (ret)
+ return ret;
+ }
+
+ return i;
+}
+
+static void generic_metric(struct perf_stat_config *config,
+ const char *metric_expr,
+ struct evsel **metric_events,
+ struct metric_ref *metric_refs,
+ char *name,
+ const char *metric_name,
+ const char *metric_unit,
+ int runtime,
+ int cpu,
+ struct perf_stat_output_ctx *out,
+ struct runtime_stat *st)
+{
+ print_metric_t print_metric = out->print_metric;
+ struct expr_parse_ctx pctx;
+ double ratio, scale;
+ int i;
+ void *ctxp = out->ctx;
+
+ i = prepare_metric(metric_events, metric_refs, &pctx, cpu, st);
+ if (i < 0)
+ return;
+
if (!metric_events[i]) {
if (expr__parse(&ratio, &pctx, metric_expr, runtime) == 0) {
char *unit;
@@ -827,6 +850,22 @@ static void generic_metric(struct perf_stat_config *config,
expr__ctx_clear(&pctx);
}
+double test_generic_metric(struct metric_expr *mexp, int cpu, struct runtime_stat *st)
+{
+ struct expr_parse_ctx pctx;
+ double ratio = 0.0;
+
+ if (prepare_metric(mexp->metric_events, mexp->metric_refs, &pctx, cpu, st) < 0)
+ goto out;
+
+ if (expr__parse(&ratio, &pctx, mexp->metric_expr, 1))
+ ratio = 0.0;
+
+out:
+ expr__ctx_clear(&pctx);
+ return ratio;
+}
+
void perf_stat__print_shadow_stats(struct perf_stat_config *config,
struct evsel *evsel,
double avg, int cpu,
@@ -881,7 +920,7 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config,
if (runtime_stat_n(st, STAT_L1_DCACHE, ctx, cpu) != 0)
print_l1_dcache_misses(config, cpu, evsel, avg, out, st);
else
- print_metric(config, ctxp, NULL, NULL, "of all L1-dcache hits", 0);
+ print_metric(config, ctxp, NULL, NULL, "of all L1-dcache accesses", 0);
} else if (
evsel->core.attr.type == PERF_TYPE_HW_CACHE &&
evsel->core.attr.config == ( PERF_COUNT_HW_CACHE_L1I |
@@ -891,7 +930,7 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config,
if (runtime_stat_n(st, STAT_L1_ICACHE, ctx, cpu) != 0)
print_l1_icache_misses(config, cpu, evsel, avg, out, st);
else
- print_metric(config, ctxp, NULL, NULL, "of all L1-icache hits", 0);
+ print_metric(config, ctxp, NULL, NULL, "of all L1-icache accesses", 0);
} else if (
evsel->core.attr.type == PERF_TYPE_HW_CACHE &&
evsel->core.attr.config == ( PERF_COUNT_HW_CACHE_DTLB |
@@ -901,7 +940,7 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config,
if (runtime_stat_n(st, STAT_DTLB_CACHE, ctx, cpu) != 0)
print_dtlb_cache_misses(config, cpu, evsel, avg, out, st);
else
- print_metric(config, ctxp, NULL, NULL, "of all dTLB cache hits", 0);
+ print_metric(config, ctxp, NULL, NULL, "of all dTLB cache accesses", 0);
} else if (
evsel->core.attr.type == PERF_TYPE_HW_CACHE &&
evsel->core.attr.config == ( PERF_COUNT_HW_CACHE_ITLB |
@@ -911,7 +950,7 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config,
if (runtime_stat_n(st, STAT_ITLB_CACHE, ctx, cpu) != 0)
print_itlb_cache_misses(config, cpu, evsel, avg, out, st);
else
- print_metric(config, ctxp, NULL, NULL, "of all iTLB cache hits", 0);
+ print_metric(config, ctxp, NULL, NULL, "of all iTLB cache accesses", 0);
} else if (
evsel->core.attr.type == PERF_TYPE_HW_CACHE &&
evsel->core.attr.config == ( PERF_COUNT_HW_CACHE_LL |
@@ -921,7 +960,7 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config,
if (runtime_stat_n(st, STAT_LL_CACHE, ctx, cpu) != 0)
print_ll_cache_misses(config, cpu, evsel, avg, out, st);
else
- print_metric(config, ctxp, NULL, NULL, "of all LL-cache hits", 0);
+ print_metric(config, ctxp, NULL, NULL, "of all LL-cache accesses", 0);
} else if (evsel__match(evsel, HARDWARE, HW_CACHE_MISSES)) {
total = runtime_stat_avg(st, STAT_CACHEREFS, ctx, cpu);
@@ -1035,8 +1074,8 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config,
else
print_metric(config, ctxp, NULL, NULL, name, 0);
} else if (evsel->metric_expr) {
- generic_metric(config, evsel->metric_expr, evsel->metric_events, evsel->name,
- evsel->metric_name, NULL, 1, cpu, out, st);
+ generic_metric(config, evsel->metric_expr, evsel->metric_events, NULL,
+ evsel->name, evsel->metric_name, NULL, 1, cpu, out, st);
} else if (runtime_stat_n(st, STAT_NSECS, 0, cpu) != 0) {
char unit = 'M';
char unit_buf[10];
@@ -1064,7 +1103,7 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config,
if (num++ > 0)
out->new_line(config, ctxp);
generic_metric(config, mexp->metric_expr, mexp->metric_events,
- evsel->name, mexp->metric_name,
+ mexp->metric_refs, evsel->name, mexp->metric_name,
mexp->metric_unit, mexp->runtime, cpu, out, st);
}
}
diff --git a/tools/perf/util/stat.h b/tools/perf/util/stat.h
index f75ae679eb28..aa3bed48511b 100644
--- a/tools/perf/util/stat.h
+++ b/tools/perf/util/stat.h
@@ -113,10 +113,11 @@ struct perf_stat_config {
bool summary;
bool metric_no_group;
bool metric_no_merge;
+ bool stop_read_counter;
FILE *output;
unsigned int interval;
unsigned int timeout;
- unsigned int initial_delay;
+ int initial_delay;
unsigned int unit_width;
unsigned int metric_only_len;
int times;
@@ -133,6 +134,8 @@ struct perf_stat_config {
struct perf_cpu_map *cpus_aggr_map;
u64 *walltime_run;
struct rblist metric_events;
+ int ctl_fd;
+ int ctl_fd_ack;
};
void perf_stat__set_big_num(int set);
@@ -230,4 +233,7 @@ perf_evlist__print_counters(struct evlist *evlist,
struct target *_target,
struct timespec *ts,
int argc, const char **argv);
+
+struct metric_expr;
+double test_generic_metric(struct metric_expr *mexp, int cpu, struct runtime_stat *st);
#endif
diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c
index 5e43054bffea..8cc4b0059fb0 100644
--- a/tools/perf/util/symbol-elf.c
+++ b/tools/perf/util/symbol-elf.c
@@ -789,7 +789,7 @@ int symsrc__init(struct symsrc *ss, struct dso *dso, const char *name,
if (ss->opdshdr.sh_type != SHT_PROGBITS)
ss->opdsec = NULL;
- if (dso->kernel == DSO_TYPE_USER)
+ if (dso->kernel == DSO_SPACE__USER)
ss->adjust_symbols = true;
else
ss->adjust_symbols = elf__needs_adjust_symbols(ehdr);
@@ -872,7 +872,7 @@ static int dso__process_kernel_symbol(struct dso *dso, struct map *map,
* kallsyms and identity maps. Overwrite it to
* map to the kernel dso.
*/
- if (*remap_kernel && dso->kernel) {
+ if (*remap_kernel && dso->kernel && !kmodule) {
*remap_kernel = false;
map->start = shdr->sh_addr + ref_reloc(kmap);
map->end = map->start + shdr->sh_size;
@@ -1068,7 +1068,7 @@ int dso__load_sym(struct dso *dso, struct map *map, struct symsrc *syms_ss,
* Initial kernel and module mappings do not map to the dso.
* Flag the fixups.
*/
- if (dso->kernel || kmodule) {
+ if (dso->kernel) {
remap_kernel = true;
adjust_kernel_syms = dso->adjust_symbols;
}
@@ -1130,7 +1130,7 @@ int dso__load_sym(struct dso *dso, struct map *map, struct symsrc *syms_ss,
(sym.st_value & 1))
--sym.st_value;
- if (dso->kernel || kmodule) {
+ if (dso->kernel) {
if (dso__process_kernel_symbol(dso, map, &sym, &shdr, kmaps, kmap, &curr_dso, &curr_map,
section_name, adjust_kernel_syms, kmodule, &remap_kernel))
goto out_elf_end;
diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c
index 5ddf84dcbae7..5151a8c0b791 100644
--- a/tools/perf/util/symbol.c
+++ b/tools/perf/util/symbol.c
@@ -663,9 +663,12 @@ static bool symbol__is_idle(const char *name)
"exit_idle",
"mwait_idle",
"mwait_idle_with_hints",
+ "mwait_idle_with_hints.constprop.0",
"poll_idle",
"ppc64_runlatch_off",
"pseries_dedicated_idle_sleep",
+ "psw_idle",
+ "psw_idle_exit",
NULL
};
int i;
@@ -806,7 +809,7 @@ static int maps__split_kallsyms(struct maps *kmaps, struct dso *dso, u64 delta,
if (strcmp(curr_map->dso->short_name, module)) {
if (curr_map != initial_map &&
- dso->kernel == DSO_TYPE_GUEST_KERNEL &&
+ dso->kernel == DSO_SPACE__KERNEL_GUEST &&
machine__is_default_guest(machine)) {
/*
* We assume all symbols of a module are
@@ -863,7 +866,7 @@ static int maps__split_kallsyms(struct maps *kmaps, struct dso *dso, u64 delta,
goto add_symbol;
}
- if (dso->kernel == DSO_TYPE_GUEST_KERNEL)
+ if (dso->kernel == DSO_SPACE__KERNEL_GUEST)
snprintf(dso_name, sizeof(dso_name),
"[guest.kernel].%d",
kernel_range++);
@@ -907,7 +910,7 @@ discard_symbol:
}
if (curr_map != initial_map &&
- dso->kernel == DSO_TYPE_GUEST_KERNEL &&
+ dso->kernel == DSO_SPACE__KERNEL_GUEST &&
machine__is_default_guest(kmaps->machine)) {
dso__set_loaded(curr_map->dso);
}
@@ -1385,7 +1388,7 @@ static int dso__load_kcore(struct dso *dso, struct map *map,
* Set the data type and long name so that kcore can be read via
* dso__data_read_addr().
*/
- if (dso->kernel == DSO_TYPE_GUEST_KERNEL)
+ if (dso->kernel == DSO_SPACE__KERNEL_GUEST)
dso->binary_type = DSO_BINARY_TYPE__GUEST_KCORE;
else
dso->binary_type = DSO_BINARY_TYPE__KCORE;
@@ -1449,7 +1452,7 @@ int __dso__load_kallsyms(struct dso *dso, const char *filename,
symbols__fixup_end(&dso->symbols);
symbols__fixup_duplicate(&dso->symbols);
- if (dso->kernel == DSO_TYPE_GUEST_KERNEL)
+ if (dso->kernel == DSO_SPACE__KERNEL_GUEST)
dso->symtab_type = DSO_BINARY_TYPE__GUEST_KALLSYMS;
else
dso->symtab_type = DSO_BINARY_TYPE__KALLSYMS;
@@ -1535,17 +1538,17 @@ static bool dso__is_compatible_symtab_type(struct dso *dso, bool kmod,
case DSO_BINARY_TYPE__MIXEDUP_UBUNTU_DEBUGINFO:
case DSO_BINARY_TYPE__BUILDID_DEBUGINFO:
case DSO_BINARY_TYPE__OPENEMBEDDED_DEBUGINFO:
- return !kmod && dso->kernel == DSO_TYPE_USER;
+ return !kmod && dso->kernel == DSO_SPACE__USER;
case DSO_BINARY_TYPE__KALLSYMS:
case DSO_BINARY_TYPE__VMLINUX:
case DSO_BINARY_TYPE__KCORE:
- return dso->kernel == DSO_TYPE_KERNEL;
+ return dso->kernel == DSO_SPACE__KERNEL;
case DSO_BINARY_TYPE__GUEST_KALLSYMS:
case DSO_BINARY_TYPE__GUEST_VMLINUX:
case DSO_BINARY_TYPE__GUEST_KCORE:
- return dso->kernel == DSO_TYPE_GUEST_KERNEL;
+ return dso->kernel == DSO_SPACE__KERNEL_GUEST;
case DSO_BINARY_TYPE__GUEST_KMODULE:
case DSO_BINARY_TYPE__GUEST_KMODULE_COMP:
@@ -1563,6 +1566,7 @@ static bool dso__is_compatible_symtab_type(struct dso *dso, bool kmod,
case DSO_BINARY_TYPE__BPF_PROG_INFO:
case DSO_BINARY_TYPE__BPF_IMAGE:
+ case DSO_BINARY_TYPE__OOL:
case DSO_BINARY_TYPE__NOT_FOUND:
default:
return false;
@@ -1647,9 +1651,9 @@ int dso__load(struct dso *dso, struct map *map)
dso->symtab_type == DSO_BINARY_TYPE__GUEST_KMODULE_COMP;
if (dso->kernel && !kmod) {
- if (dso->kernel == DSO_TYPE_KERNEL)
+ if (dso->kernel == DSO_SPACE__KERNEL)
ret = dso__load_kernel_sym(dso, map);
- else if (dso->kernel == DSO_TYPE_GUEST_KERNEL)
+ else if (dso->kernel == DSO_SPACE__KERNEL_GUEST)
ret = dso__load_guest_kernel_sym(dso, map);
machine = map__kmaps(map)->machine;
@@ -1879,7 +1883,7 @@ int dso__load_vmlinux(struct dso *dso, struct map *map,
else
symbol__join_symfs(symfs_vmlinux, vmlinux);
- if (dso->kernel == DSO_TYPE_GUEST_KERNEL)
+ if (dso->kernel == DSO_SPACE__KERNEL_GUEST)
symtab_type = DSO_BINARY_TYPE__GUEST_VMLINUX;
else
symtab_type = DSO_BINARY_TYPE__VMLINUX;
@@ -1891,7 +1895,7 @@ int dso__load_vmlinux(struct dso *dso, struct map *map,
symsrc__destroy(&ss);
if (err > 0) {
- if (dso->kernel == DSO_TYPE_GUEST_KERNEL)
+ if (dso->kernel == DSO_SPACE__KERNEL_GUEST)
dso->binary_type = DSO_BINARY_TYPE__GUEST_VMLINUX;
else
dso->binary_type = DSO_BINARY_TYPE__VMLINUX;
diff --git a/tools/perf/util/tool.h b/tools/perf/util/tool.h
index 3fb67bd31e4a..bbbc0dcd461f 100644
--- a/tools/perf/util/tool.h
+++ b/tools/perf/util/tool.h
@@ -57,7 +57,8 @@ struct perf_tool {
throttle,
unthrottle,
ksymbol,
- bpf;
+ bpf,
+ text_poke;
event_attr_op attr;
event_attr_op event_update;
diff --git a/tools/perf/util/zstd.c b/tools/perf/util/zstd.c
index d2202392ffdb..48dd2b018c47 100644
--- a/tools/perf/util/zstd.c
+++ b/tools/perf/util/zstd.c
@@ -99,7 +99,7 @@ size_t zstd_decompress_stream(struct zstd_data *data, void *src, size_t src_size
while (input.pos < input.size) {
ret = ZSTD_decompressStream(data->dstream, &output, &input);
if (ZSTD_isError(ret)) {
- pr_err("failed to decompress (B): %ld -> %ld, dst_size %ld : %s\n",
+ pr_err("failed to decompress (B): %zd -> %zd, dst_size %zd : %s\n",
src_size, output.size, dst_size, ZSTD_getErrorName(ret));
break;
}
diff --git a/tools/power/acpi/Makefile b/tools/power/acpi/Makefile
index ebd3e1a1c28e..a249c50ebf55 100644
--- a/tools/power/acpi/Makefile
+++ b/tools/power/acpi/Makefile
@@ -7,6 +7,8 @@
include ../../scripts/Makefile.include
+.NOTPARALLEL:
+
all: acpidbg acpidump ec
clean: acpidbg_clean acpidump_clean ec_clean
install: acpidbg_install acpidump_install ec_install
diff --git a/tools/power/acpi/os_specific/service_layers/oslinuxtbl.c b/tools/power/acpi/os_specific/service_layers/oslinuxtbl.c
index dd38c2b2e1b4..11c5046dce16 100644
--- a/tools/power/acpi/os_specific/service_layers/oslinuxtbl.c
+++ b/tools/power/acpi/os_specific/service_layers/oslinuxtbl.c
@@ -110,7 +110,7 @@ u32 gbl_table_count = 0;
*
* RETURN: Status; Converted from errno.
*
- * DESCRIPTION: Get last errno and conver it to acpi_status.
+ * DESCRIPTION: Get last errno and convert it to acpi_status.
*
*****************************************************************************/
diff --git a/tools/power/cpupower/utils/cpufreq-set.c b/tools/power/cpupower/utils/cpufreq-set.c
index 6ed82fba5aaa..7b2164e07057 100644
--- a/tools/power/cpupower/utils/cpufreq-set.c
+++ b/tools/power/cpupower/utils/cpufreq-set.c
@@ -99,13 +99,17 @@ static unsigned long string_to_frequency(const char *str)
continue;
if (str[cp] == '.') {
- while (power > -1 && isdigit(str[cp+1]))
- cp++, power--;
+ while (power > -1 && isdigit(str[cp+1])) {
+ cp++;
+ power--;
+ }
}
- if (power >= -1) /* not enough => pad */
+ if (power >= -1) { /* not enough => pad */
pad = power + 1;
- else /* to much => strip */
- pad = 0, cp += power + 1;
+ } else { /* too much => strip */
+ pad = 0;
+ cp += power + 1;
+ }
/* check bounds */
if (cp <= 0 || cp + pad > NORM_FREQ_LEN - 1)
return 0;
diff --git a/tools/power/pm-graph/sleepgraph.py b/tools/power/pm-graph/sleepgraph.py
index 46ff97e909c6..1bc36a1db14f 100755
--- a/tools/power/pm-graph/sleepgraph.py
+++ b/tools/power/pm-graph/sleepgraph.py
@@ -171,7 +171,7 @@ class SystemValues:
tracefuncs = {
'sys_sync': {},
'ksys_sync': {},
- '__pm_notifier_call_chain': {},
+ 'pm_notifier_call_chain_robust': {},
'pm_prepare_console': {},
'pm_notifier_call_chain': {},
'freeze_processes': {},
diff --git a/tools/power/x86/intel-speed-select/isst-config.c b/tools/power/x86/intel-speed-select/isst-config.c
index 9f4b190f1d74..cd089a505859 100644
--- a/tools/power/x86/intel-speed-select/isst-config.c
+++ b/tools/power/x86/intel-speed-select/isst-config.c
@@ -15,7 +15,7 @@ struct process_cmd_struct {
int arg;
};
-static const char *version_str = "v1.5";
+static const char *version_str = "v1.6";
static const int supported_api_ver = 1;
static struct isst_if_platform_info isst_platform_info;
static char *progname;
@@ -545,20 +545,23 @@ static void set_cpu_present_cpu_mask(void)
}
}
-int get_core_count(int pkg_id, int die_id)
+int get_max_punit_core_id(int pkg_id, int die_id)
{
- int cnt = 0;
+ int max_id = 0;
+ int i;
- if (pkg_id < MAX_PACKAGE_COUNT && die_id < MAX_DIE_PER_PACKAGE) {
- int i;
+ for (i = 0; i < topo_max_cpus; ++i)
+ {
+ if (!CPU_ISSET_S(i, present_cpumask_size, present_cpumask))
+ continue;
- for (i = 0; i < sizeof(long long) * 8; ++i) {
- if (core_mask[pkg_id][die_id] & (1ULL << i))
- cnt++;
- }
+ if (cpu_map[i].pkg_id == pkg_id &&
+ cpu_map[i].die_id == die_id &&
+ cpu_map[i].punit_cpu_core > max_id)
+ max_id = cpu_map[i].punit_cpu_core;
}
- return cnt;
+ return max_id;
}
int get_cpu_count(int pkg_id, int die_id)
diff --git a/tools/power/x86/intel-speed-select/isst-core.c b/tools/power/x86/intel-speed-select/isst-core.c
index a7f4337c5777..1d7ecb54352e 100644
--- a/tools/power/x86/intel-speed-select/isst-core.c
+++ b/tools/power/x86/intel-speed-select/isst-core.c
@@ -396,7 +396,7 @@ int isst_get_pbf_info(int cpu, int level, struct isst_pbf_info *pbf_info)
{
struct isst_pkg_ctdp_level_info ctdp_level;
struct isst_pkg_ctdp pkg_dev;
- int i, ret, core_cnt, max;
+ int i, ret, max_punit_core, max_mask_index;
unsigned int req, resp;
ret = isst_get_ctdp_levels(cpu, &pkg_dev);
@@ -421,10 +421,10 @@ int isst_get_pbf_info(int cpu, int level, struct isst_pbf_info *pbf_info)
pbf_info->core_cpumask_size = alloc_cpu_set(&pbf_info->core_cpumask);
- core_cnt = get_core_count(get_physical_package_id(cpu), get_physical_die_id(cpu));
- max = core_cnt > 32 ? 2 : 1;
+ max_punit_core = get_max_punit_core_id(get_physical_package_id(cpu), get_physical_die_id(cpu));
+ max_mask_index = max_punit_core > 32 ? 2 : 1;
- for (i = 0; i < max; ++i) {
+ for (i = 0; i < max_mask_index; ++i) {
unsigned long long mask;
int count;
diff --git a/tools/power/x86/intel-speed-select/isst.h b/tools/power/x86/intel-speed-select/isst.h
index 094ba4589a9c..29715e9c2e06 100644
--- a/tools/power/x86/intel-speed-select/isst.h
+++ b/tools/power/x86/intel-speed-select/isst.h
@@ -170,7 +170,7 @@ struct isst_pkg_ctdp {
extern int get_topo_max_cpus(void);
extern int get_cpu_count(int pkg_id, int die_id);
-extern int get_core_count(int pkg_id, int die_id);
+extern int get_max_punit_core_id(int pkg_id, int die_id);
/* Common interfaces */
FILE *get_output_file(void);
diff --git a/tools/testing/ktest/examples/README b/tools/testing/ktest/examples/README
index a12d295a09d8..4f048789b260 100644
--- a/tools/testing/ktest/examples/README
+++ b/tools/testing/ktest/examples/README
@@ -11,7 +11,7 @@ crosstests.conf - this config shows an example of testing a git repo against
lots of different architectures. It only does build tests, but makes
it easy to compile test different archs. You can download the arch
cross compilers from:
- http://kernel.org/pub/tools/crosstool/files/bin/x86_64/
+ https://kernel.org/pub/tools/crosstool/files/bin/x86_64/
test.conf - A generic example of a config. This is based on an actual config
used to perform real testing.
diff --git a/tools/testing/ktest/examples/crosstests.conf b/tools/testing/ktest/examples/crosstests.conf
index 6907f32590b2..3b15e85f26bd 100644
--- a/tools/testing/ktest/examples/crosstests.conf
+++ b/tools/testing/ktest/examples/crosstests.conf
@@ -3,7 +3,7 @@
#
# In this config, it is expected that the tool chains from:
#
-# http://kernel.org/pub/tools/crosstool/files/bin/x86_64/
+# https://kernel.org/pub/tools/crosstool/files/bin/x86_64/
#
# running on a x86_64 system have been downloaded and installed into:
#
diff --git a/tools/testing/ktest/ktest.pl b/tools/testing/ktest/ktest.pl
index 7570e36d636d..cb16d2aac51c 100755
--- a/tools/testing/ktest/ktest.pl
+++ b/tools/testing/ktest/ktest.pl
@@ -11,6 +11,7 @@ use File::Path qw(mkpath);
use File::Copy qw(cp);
use FileHandle;
use FindBin;
+use IO::Handle;
my $VERSION = "0.2";
@@ -81,6 +82,8 @@ my %default = (
"IGNORE_UNUSED" => 0,
);
+my $test_log_start = 0;
+
my $ktest_config = "ktest.conf";
my $version;
my $have_version = 0;
@@ -98,6 +101,7 @@ my $final_post_ktest;
my $pre_ktest;
my $post_ktest;
my $pre_test;
+my $pre_test_die;
my $post_test;
my $pre_build;
my $post_build;
@@ -223,6 +227,7 @@ my $dirname = $FindBin::Bin;
my $mailto;
my $mailer;
my $mail_path;
+my $mail_max_size;
my $mail_command;
my $email_on_error;
my $email_when_finished;
@@ -259,6 +264,7 @@ my %option_map = (
"MAILTO" => \$mailto,
"MAILER" => \$mailer,
"MAIL_PATH" => \$mail_path,
+ "MAIL_MAX_SIZE" => \$mail_max_size,
"MAIL_COMMAND" => \$mail_command,
"EMAIL_ON_ERROR" => \$email_on_error,
"EMAIL_WHEN_FINISHED" => \$email_when_finished,
@@ -273,6 +279,7 @@ my %option_map = (
"PRE_KTEST" => \$pre_ktest,
"POST_KTEST" => \$post_ktest,
"PRE_TEST" => \$pre_test,
+ "PRE_TEST_DIE" => \$pre_test_die,
"POST_TEST" => \$post_test,
"BUILD_TYPE" => \$build_type,
"BUILD_OPTIONS" => \$build_options,
@@ -507,9 +514,7 @@ EOF
sub _logit {
if (defined($opt{"LOG_FILE"})) {
- open(OUT, ">> $opt{LOG_FILE}") or die "Can't write to $opt{LOG_FILE}";
- print OUT @_;
- close(OUT);
+ print LOG @_;
}
}
@@ -909,6 +914,12 @@ sub process_expression {
}
}
+ if ($val =~ s/^\s*NOT\s+(.*)//) {
+ my $express = $1;
+ my $ret = process_expression($name, $express);
+ return !$ret;
+ }
+
if ($val =~ /^\s*0\s*$/) {
return 0;
} elsif ($val =~ /^\s*\d+\s*$/) {
@@ -1485,8 +1496,32 @@ sub dodie {
if ($email_on_error) {
my $name = get_test_name;
+ my $log_file;
+
+ if (defined($opt{"LOG_FILE"})) {
+ my $whence = 0; # beginning of file
+ my $pos = $test_log_start;
+
+ if (defined($mail_max_size)) {
+ my $log_size = tell LOG;
+ $log_size -= $test_log_start;
+ if ($log_size > $mail_max_size) {
+ $whence = 2; # end of file
+ $pos = - $mail_max_size;
+ }
+ }
+ $log_file = "$tmpdir/log";
+ open (L, "$opt{LOG_FILE}") or die "Can't open $opt{LOG_FILE} to read)";
+ open (O, "> $tmpdir/log") or die "Can't open $tmpdir/log\n";
+ seek(L, $pos, $whence);
+ while (<L>) {
+ print O;
+ }
+ close O;
+ close L;
+ }
send_email("KTEST: critical failure for test $i [$name]",
- "Your test started at $script_start_time has failed with:\n@_\n");
+ "Your test started at $script_start_time has failed with:\n@_\n", $log_file);
}
if ($monitor_cnt) {
@@ -1508,7 +1543,7 @@ sub create_pty {
my $TIOCGPTN = 0x80045430;
sysopen($ptm, "/dev/ptmx", O_RDWR | O_NONBLOCK) or
- dodie "Cant open /dev/ptmx";
+ dodie "Can't open /dev/ptmx";
# unlockpt()
$tmp = pack("i", 0);
@@ -1772,8 +1807,6 @@ sub run_command {
(fail "unable to exec $command" and return 0);
if (defined($opt{"LOG_FILE"})) {
- open(LOG, ">>$opt{LOG_FILE}") or
- dodie "failed to write to log";
$dolog = 1;
}
@@ -1821,7 +1854,6 @@ sub run_command {
}
close(CMD);
- close(LOG) if ($dolog);
close(RD) if ($dord);
$end_time = time;
@@ -3188,6 +3220,8 @@ sub config_bisect_end {
doprint "***************************************\n\n";
}
+my $pass = 1;
+
sub run_config_bisect {
my ($good, $bad, $last_result) = @_;
my $reset = "";
@@ -3210,11 +3244,15 @@ sub run_config_bisect {
$ret = run_config_bisect_test $config_bisect_type;
if ($ret) {
- doprint "NEW GOOD CONFIG\n";
+ doprint "NEW GOOD CONFIG ($pass)\n";
+ system("cp $output_config $tmpdir/good_config.tmp.$pass");
+ $pass++;
# Return 3 for good config
return 3;
} else {
- doprint "NEW BAD CONFIG\n";
+ doprint "NEW BAD CONFIG ($pass)\n";
+ system("cp $output_config $tmpdir/bad_config.tmp.$pass");
+ $pass++;
# Return 4 for bad config
return 4;
}
@@ -4077,8 +4115,12 @@ if ($#new_configs >= 0) {
}
}
-if ($opt{"CLEAR_LOG"} && defined($opt{"LOG_FILE"})) {
- unlink $opt{"LOG_FILE"};
+if (defined($opt{"LOG_FILE"})) {
+ if ($opt{"CLEAR_LOG"}) {
+ unlink $opt{"LOG_FILE"};
+ }
+ open(LOG, ">> $opt{LOG_FILE}") or die "Can't write to $opt{LOG_FILE}";
+ LOG->autoflush(1);
}
doprint "\n\nSTARTING AUTOMATED TESTS\n\n";
@@ -4171,7 +4213,7 @@ sub find_mailer {
}
sub do_send_mail {
- my ($subject, $message) = @_;
+ my ($subject, $message, $file) = @_;
if (!defined($mail_path)) {
# find the mailer
@@ -4181,16 +4223,30 @@ sub do_send_mail {
}
}
+ my $header_file = "$tmpdir/header";
+ open (HEAD, ">$header_file") or die "Can not create $header_file\n";
+ print HEAD "To: $mailto\n";
+ print HEAD "Subject: $subject\n\n";
+ print HEAD "$message\n";
+ close HEAD;
+
if (!defined($mail_command)) {
if ($mailer eq "mail" || $mailer eq "mailx") {
- $mail_command = "\$MAIL_PATH/\$MAILER -s \'\$SUBJECT\' \$MAILTO <<< \'\$MESSAGE\'";
+ $mail_command = "cat \$HEADER_FILE \$BODY_FILE | \$MAIL_PATH/\$MAILER -s \'\$SUBJECT\' \$MAILTO";
} elsif ($mailer eq "sendmail" ) {
- $mail_command = "echo \'Subject: \$SUBJECT\n\n\$MESSAGE\' | \$MAIL_PATH/\$MAILER -t \$MAILTO";
+ $mail_command = "cat \$HEADER_FILE \$BODY_FILE | \$MAIL_PATH/\$MAILER -t \$MAILTO";
} else {
die "\nYour mailer: $mailer is not supported.\n";
}
}
+ if (defined($file)) {
+ $mail_command =~ s/\$BODY_FILE/$file/g;
+ } else {
+ $mail_command =~ s/\$BODY_FILE//g;
+ }
+
+ $mail_command =~ s/\$HEADER_FILE/$header_file/g;
$mail_command =~ s/\$MAILER/$mailer/g;
$mail_command =~ s/\$MAIL_PATH/$mail_path/g;
$mail_command =~ s/\$MAILTO/$mailto/g;
@@ -4338,10 +4394,19 @@ for (my $i = 1; $i <= $opt{"NUM_TESTS"}; $i++) {
}
doprint "\n\n";
+
+ if (defined($opt{"LOG_FILE"})) {
+ $test_log_start = tell(LOG);
+ }
+
doprint "RUNNING TEST $i of $opt{NUM_TESTS}$name with option $test_type $run_type$installme\n\n";
if (defined($pre_test)) {
- run_command $pre_test;
+ my $ret = run_command $pre_test;
+ if (!$ret && defined($pre_test_die) &&
+ $pre_test_die) {
+ dodie "failed to pre_test\n";
+ }
}
unlink $dmesg;
@@ -4441,4 +4506,10 @@ if ($email_when_finished) {
send_email("KTEST: Your test has finished!",
"$successes of $opt{NUM_TESTS} tests started at $script_start_time were successful!");
}
+
+if (defined($opt{"LOG_FILE"})) {
+ print "\n See $opt{LOG_FILE} for the record of results.\n\n";
+ close LOG;
+}
+
exit 0;
diff --git a/tools/testing/ktest/sample.conf b/tools/testing/ktest/sample.conf
index 27666b8007ed..5e7d1d729752 100644
--- a/tools/testing/ktest/sample.conf
+++ b/tools/testing/ktest/sample.conf
@@ -442,6 +442,19 @@
# Users can cancel the test by Ctrl^C
# (default 0)
#EMAIL_WHEN_CANCELED = 1
+#
+# If a test ends with an error and EMAIL_ON_ERROR is set as well
+# as a LOG_FILE is defined, then the log of the failing test will
+# be included in the email that is sent.
+# It is possible that the log may be very large, in which case,
+# only the last amount of the log should be sent. To limit how
+# much of the log is sent, set MAIL_MAX_SIZE. This will be the
+# size in bytes of the last portion of the log of the failed
+# test file. That is, if this is set to 100000, then only the
+# last 100 thousand bytes of the log file will be included in
+# the email.
+# (default undef)
+#MAIL_MAX_SIZE = 1000000
# Start a test setup. If you leave this off, all options
# will be default and the test will run once.
@@ -557,6 +570,11 @@
# default (undefined)
#PRE_TEST = ${SSH} reboot_to_special_kernel
+# To kill the entire test if PRE_TEST is defined but fails set this
+# to 1.
+# (default 0)
+#PRE_TEST_DIE = 1
+
# If there is a command you want to run after the individual test case
# completes, then you can set this option.
#
diff --git a/tools/testing/nvdimm/dax-dev.c b/tools/testing/nvdimm/dax-dev.c
index 7e5d979e73cb..fb342a8c98d3 100644
--- a/tools/testing/nvdimm/dax-dev.c
+++ b/tools/testing/nvdimm/dax-dev.c
@@ -9,12 +9,19 @@
phys_addr_t dax_pgoff_to_phys(struct dev_dax *dev_dax, pgoff_t pgoff,
unsigned long size)
{
- struct resource *res = &dev_dax->region->res;
- phys_addr_t addr;
+ int i;
- addr = pgoff * PAGE_SIZE + res->start;
- if (addr >= res->start && addr <= res->end) {
- if (addr + size - 1 <= res->end) {
+ for (i = 0; i < dev_dax->nr_range; i++) {
+ struct dev_dax_range *dax_range = &dev_dax->ranges[i];
+ struct range *range = &dax_range->range;
+ unsigned long long pgoff_end;
+ phys_addr_t addr;
+
+ pgoff_end = dax_range->pgoff + PHYS_PFN(range_len(range)) - 1;
+ if (pgoff < dax_range->pgoff || pgoff > pgoff_end)
+ continue;
+ addr = PFN_PHYS(pgoff - dax_range->pgoff) + range->start;
+ if (addr + size - 1 <= range->end) {
if (get_nfit_res(addr)) {
struct page *page;
@@ -23,9 +30,10 @@ phys_addr_t dax_pgoff_to_phys(struct dev_dax *dev_dax, pgoff_t pgoff,
page = vmalloc_to_page((void *)addr);
return PFN_PHYS(page_to_pfn(page));
- } else
- return addr;
+ }
+ return addr;
}
+ break;
}
return -1;
}
diff --git a/tools/testing/nvdimm/test/iomap.c b/tools/testing/nvdimm/test/iomap.c
index 03e40b3b0106..c62d372d426f 100644
--- a/tools/testing/nvdimm/test/iomap.c
+++ b/tools/testing/nvdimm/test/iomap.c
@@ -126,7 +126,7 @@ static void dev_pagemap_percpu_release(struct percpu_ref *ref)
void *__wrap_devm_memremap_pages(struct device *dev, struct dev_pagemap *pgmap)
{
int error;
- resource_size_t offset = pgmap->res.start;
+ resource_size_t offset = pgmap->range.start;
struct nfit_test_resource *nfit_res = get_nfit_res(offset);
if (!nfit_res)
diff --git a/tools/testing/nvdimm/test/nfit.c b/tools/testing/nvdimm/test/nfit.c
index a8ee5c4d41eb..2ac0fff6dad8 100644
--- a/tools/testing/nvdimm/test/nfit.c
+++ b/tools/testing/nvdimm/test/nfit.c
@@ -23,7 +23,8 @@
#include "nfit_test.h"
#include "../watermark.h"
-#include <asm/mcsafe_test.h>
+#include <asm/copy_mc_test.h>
+#include <asm/mce.h>
/*
* Generate an NFIT table to describe the following topology:
@@ -173,6 +174,9 @@ struct nfit_test_fw {
u64 version;
u32 size_received;
u64 end_time;
+ bool armed;
+ bool missed_activate;
+ unsigned long last_activate;
};
struct nfit_test {
@@ -345,7 +349,7 @@ static int nd_intel_test_finish_fw(struct nfit_test *t,
__func__, t, nd_cmd, buf_len, idx);
if (fw->state == FW_STATE_UPDATED) {
- /* update already done, need cold boot */
+ /* update already done, need activation */
nd_cmd->status = 0x20007;
return 0;
}
@@ -430,6 +434,7 @@ static int nd_intel_test_finish_query(struct nfit_test *t,
}
dev_dbg(dev, "%s: transition out verify\n", __func__);
fw->state = FW_STATE_UPDATED;
+ fw->missed_activate = false;
/* fall through */
case FW_STATE_UPDATED:
nd_cmd->status = 0;
@@ -1178,6 +1183,134 @@ static int nd_intel_test_cmd_master_secure_erase(struct nfit_test *t,
return 0;
}
+static unsigned long last_activate;
+
+static int nvdimm_bus_intel_fw_activate_businfo(struct nfit_test *t,
+ struct nd_intel_bus_fw_activate_businfo *nd_cmd,
+ unsigned int buf_len)
+{
+ int i, armed = 0;
+ int state;
+ u64 tmo;
+
+ for (i = 0; i < NUM_DCR; i++) {
+ struct nfit_test_fw *fw = &t->fw[i];
+
+ if (fw->armed)
+ armed++;
+ }
+
+ /*
+ * Emulate 3 second activation max, and 1 second incremental
+ * quiesce time per dimm requiring multiple activates to get all
+ * DIMMs updated.
+ */
+ if (armed)
+ state = ND_INTEL_FWA_ARMED;
+ else if (!last_activate || time_after(jiffies, last_activate + 3 * HZ))
+ state = ND_INTEL_FWA_IDLE;
+ else
+ state = ND_INTEL_FWA_BUSY;
+
+ tmo = armed * USEC_PER_SEC;
+ *nd_cmd = (struct nd_intel_bus_fw_activate_businfo) {
+ .capability = ND_INTEL_BUS_FWA_CAP_FWQUIESCE
+ | ND_INTEL_BUS_FWA_CAP_OSQUIESCE
+ | ND_INTEL_BUS_FWA_CAP_RESET,
+ .state = state,
+ .activate_tmo = tmo,
+ .cpu_quiesce_tmo = tmo,
+ .io_quiesce_tmo = tmo,
+ .max_quiesce_tmo = 3 * USEC_PER_SEC,
+ };
+
+ return 0;
+}
+
+static int nvdimm_bus_intel_fw_activate(struct nfit_test *t,
+ struct nd_intel_bus_fw_activate *nd_cmd,
+ unsigned int buf_len)
+{
+ struct nd_intel_bus_fw_activate_businfo info;
+ u32 status = 0;
+ int i;
+
+ nvdimm_bus_intel_fw_activate_businfo(t, &info, sizeof(info));
+ if (info.state == ND_INTEL_FWA_BUSY)
+ status = ND_INTEL_BUS_FWA_STATUS_BUSY;
+ else if (info.activate_tmo > info.max_quiesce_tmo)
+ status = ND_INTEL_BUS_FWA_STATUS_TMO;
+ else if (info.state == ND_INTEL_FWA_IDLE)
+ status = ND_INTEL_BUS_FWA_STATUS_NOARM;
+
+ dev_dbg(&t->pdev.dev, "status: %d\n", status);
+ nd_cmd->status = status;
+ if (status && status != ND_INTEL_BUS_FWA_STATUS_TMO)
+ return 0;
+
+ last_activate = jiffies;
+ for (i = 0; i < NUM_DCR; i++) {
+ struct nfit_test_fw *fw = &t->fw[i];
+
+ if (!fw->armed)
+ continue;
+ if (fw->state != FW_STATE_UPDATED)
+ fw->missed_activate = true;
+ else
+ fw->state = FW_STATE_NEW;
+ fw->armed = false;
+ fw->last_activate = last_activate;
+ }
+
+ return 0;
+}
+
+static int nd_intel_test_cmd_fw_activate_dimminfo(struct nfit_test *t,
+ struct nd_intel_fw_activate_dimminfo *nd_cmd,
+ unsigned int buf_len, int dimm)
+{
+ struct nd_intel_bus_fw_activate_businfo info;
+ struct nfit_test_fw *fw = &t->fw[dimm];
+ u32 result, state;
+
+ nvdimm_bus_intel_fw_activate_businfo(t, &info, sizeof(info));
+
+ if (info.state == ND_INTEL_FWA_BUSY)
+ state = ND_INTEL_FWA_BUSY;
+ else if (info.state == ND_INTEL_FWA_IDLE)
+ state = ND_INTEL_FWA_IDLE;
+ else if (fw->armed)
+ state = ND_INTEL_FWA_ARMED;
+ else
+ state = ND_INTEL_FWA_IDLE;
+
+ result = ND_INTEL_DIMM_FWA_NONE;
+ if (last_activate && fw->last_activate == last_activate &&
+ state == ND_INTEL_FWA_IDLE) {
+ if (fw->missed_activate)
+ result = ND_INTEL_DIMM_FWA_NOTSTAGED;
+ else
+ result = ND_INTEL_DIMM_FWA_SUCCESS;
+ }
+
+ *nd_cmd = (struct nd_intel_fw_activate_dimminfo) {
+ .result = result,
+ .state = state,
+ };
+
+ return 0;
+}
+
+static int nd_intel_test_cmd_fw_activate_arm(struct nfit_test *t,
+ struct nd_intel_fw_activate_arm *nd_cmd,
+ unsigned int buf_len, int dimm)
+{
+ struct nfit_test_fw *fw = &t->fw[dimm];
+
+ fw->armed = nd_cmd->activate_arm == ND_INTEL_DIMM_FWA_ARM;
+ nd_cmd->status = 0;
+ return 0;
+}
static int get_dimm(struct nfit_mem *nfit_mem, unsigned int func)
{
@@ -1192,6 +1325,29 @@ static int get_dimm(struct nfit_mem *nfit_mem, unsigned int func)
return i;
}
+static void nfit_ctl_dbg(struct acpi_nfit_desc *acpi_desc,
+ struct nvdimm *nvdimm, unsigned int cmd, void *buf,
+ unsigned int len)
+{
+ struct nfit_test *t = container_of(acpi_desc, typeof(*t), acpi_desc);
+ unsigned int func = cmd;
+ unsigned int family = 0;
+
+ if (cmd == ND_CMD_CALL) {
+ struct nd_cmd_pkg *pkg = buf;
+
+ len = pkg->nd_size_in;
+ family = pkg->nd_family;
+ buf = pkg->nd_payload;
+ func = pkg->nd_command;
+ }
+ dev_dbg(&t->pdev.dev, "%s family: %d cmd: %d: func: %d input length: %d\n",
+ nvdimm ? nvdimm_name(nvdimm) : "bus", family, cmd, func,
+ len);
+ print_hex_dump_debug("nvdimm in ", DUMP_PREFIX_OFFSET, 16, 4,
+ buf, min(len, 256u), true);
+}
+
static int nfit_test_ctl(struct nvdimm_bus_descriptor *nd_desc,
struct nvdimm *nvdimm, unsigned int cmd, void *buf,
unsigned int buf_len, int *cmd_rc)
@@ -1205,6 +1361,8 @@ static int nfit_test_ctl(struct nvdimm_bus_descriptor *nd_desc,
cmd_rc = &__cmd_rc;
*cmd_rc = 0;
+ nfit_ctl_dbg(acpi_desc, nvdimm, cmd, buf, buf_len);
+
if (nvdimm) {
struct nfit_mem *nfit_mem = nvdimm_provider_data(nvdimm);
unsigned long cmd_mask = nvdimm_cmd_mask(nvdimm);
@@ -1224,6 +1382,11 @@ static int nfit_test_ctl(struct nvdimm_bus_descriptor *nd_desc,
i = get_dimm(nfit_mem, func);
if (i < 0)
return i;
+ if (i >= NUM_DCR) {
+ dev_WARN_ONCE(&t->pdev.dev, 1,
+ "ND_CMD_CALL only valid for nfit_test0\n");
+ return -EINVAL;
+ }
switch (func) {
case NVDIMM_INTEL_GET_SECURITY_STATE:
@@ -1252,11 +1415,11 @@ static int nfit_test_ctl(struct nvdimm_bus_descriptor *nd_desc,
break;
case NVDIMM_INTEL_OVERWRITE:
rc = nd_intel_test_cmd_overwrite(t,
- buf, buf_len, i - t->dcr_idx);
+ buf, buf_len, i);
break;
case NVDIMM_INTEL_QUERY_OVERWRITE:
rc = nd_intel_test_cmd_query_overwrite(t,
- buf, buf_len, i - t->dcr_idx);
+ buf, buf_len, i);
break;
case NVDIMM_INTEL_SET_MASTER_PASSPHRASE:
rc = nd_intel_test_cmd_master_set_pass(t,
@@ -1266,54 +1429,59 @@ static int nfit_test_ctl(struct nvdimm_bus_descriptor *nd_desc,
rc = nd_intel_test_cmd_master_secure_erase(t,
buf, buf_len, i);
break;
+ case NVDIMM_INTEL_FW_ACTIVATE_DIMMINFO:
+ rc = nd_intel_test_cmd_fw_activate_dimminfo(
+ t, buf, buf_len, i);
+ break;
+ case NVDIMM_INTEL_FW_ACTIVATE_ARM:
+ rc = nd_intel_test_cmd_fw_activate_arm(
+ t, buf, buf_len, i);
+ break;
case ND_INTEL_ENABLE_LSS_STATUS:
rc = nd_intel_test_cmd_set_lss_status(t,
buf, buf_len);
break;
case ND_INTEL_FW_GET_INFO:
rc = nd_intel_test_get_fw_info(t, buf,
- buf_len, i - t->dcr_idx);
+ buf_len, i);
break;
case ND_INTEL_FW_START_UPDATE:
rc = nd_intel_test_start_update(t, buf,
- buf_len, i - t->dcr_idx);
+ buf_len, i);
break;
case ND_INTEL_FW_SEND_DATA:
rc = nd_intel_test_send_data(t, buf,
- buf_len, i - t->dcr_idx);
+ buf_len, i);
break;
case ND_INTEL_FW_FINISH_UPDATE:
rc = nd_intel_test_finish_fw(t, buf,
- buf_len, i - t->dcr_idx);
+ buf_len, i);
break;
case ND_INTEL_FW_FINISH_QUERY:
rc = nd_intel_test_finish_query(t, buf,
- buf_len, i - t->dcr_idx);
+ buf_len, i);
break;
case ND_INTEL_SMART:
rc = nfit_test_cmd_smart(buf, buf_len,
- &t->smart[i - t->dcr_idx]);
+ &t->smart[i]);
break;
case ND_INTEL_SMART_THRESHOLD:
rc = nfit_test_cmd_smart_threshold(buf,
buf_len,
- &t->smart_threshold[i -
- t->dcr_idx]);
+ &t->smart_threshold[i]);
break;
case ND_INTEL_SMART_SET_THRESHOLD:
rc = nfit_test_cmd_smart_set_threshold(buf,
buf_len,
- &t->smart_threshold[i -
- t->dcr_idx],
- &t->smart[i - t->dcr_idx],
+ &t->smart_threshold[i],
+ &t->smart[i],
&t->pdev.dev, t->dimm_dev[i]);
break;
case ND_INTEL_SMART_INJECT:
rc = nfit_test_cmd_smart_inject(buf,
buf_len,
- &t->smart_threshold[i -
- t->dcr_idx],
- &t->smart[i - t->dcr_idx],
+ &t->smart_threshold[i],
+ &t->smart[i],
&t->pdev.dev, t->dimm_dev[i]);
break;
default:
@@ -1353,9 +1521,9 @@ static int nfit_test_ctl(struct nvdimm_bus_descriptor *nd_desc,
if (!nd_desc)
return -ENOTTY;
- if (cmd == ND_CMD_CALL) {
+ if (cmd == ND_CMD_CALL && call_pkg->nd_family
+ == NVDIMM_BUS_FAMILY_NFIT) {
func = call_pkg->nd_command;
-
buf_len = call_pkg->nd_size_in + call_pkg->nd_size_out;
buf = (void *) call_pkg->nd_payload;
@@ -1379,7 +1547,26 @@ static int nfit_test_ctl(struct nvdimm_bus_descriptor *nd_desc,
default:
return -ENOTTY;
}
- }
+ } else if (cmd == ND_CMD_CALL && call_pkg->nd_family
+ == NVDIMM_BUS_FAMILY_INTEL) {
+ func = call_pkg->nd_command;
+ buf_len = call_pkg->nd_size_in + call_pkg->nd_size_out;
+ buf = (void *) call_pkg->nd_payload;
+
+ switch (func) {
+ case NVDIMM_BUS_INTEL_FW_ACTIVATE_BUSINFO:
+ rc = nvdimm_bus_intel_fw_activate_businfo(t,
+ buf, buf_len);
+ return rc;
+ case NVDIMM_BUS_INTEL_FW_ACTIVATE:
+ rc = nvdimm_bus_intel_fw_activate(t, buf,
+ buf_len);
+ return rc;
+ default:
+ return -ENOTTY;
+ }
+ } else if (cmd == ND_CMD_CALL)
+ return -ENOTTY;
if (!nd_desc || !test_bit(cmd, &nd_desc->cmd_mask))
return -ENOTTY;
@@ -1805,6 +1992,7 @@ static void nfit_test0_setup(struct nfit_test *t)
struct acpi_nfit_flush_address *flush;
struct acpi_nfit_capabilities *pcap;
unsigned int offset = 0, i;
+ unsigned long *acpi_mask;
/*
* spa0 (interleave first half of dimm0 and dimm1, note storage
@@ -2507,10 +2695,10 @@ static void nfit_test0_setup(struct nfit_test *t)
set_bit(ND_CMD_ARS_STATUS, &acpi_desc->bus_cmd_force_en);
set_bit(ND_CMD_CLEAR_ERROR, &acpi_desc->bus_cmd_force_en);
set_bit(ND_CMD_CALL, &acpi_desc->bus_cmd_force_en);
- set_bit(NFIT_CMD_TRANSLATE_SPA, &acpi_desc->bus_nfit_cmd_force_en);
- set_bit(NFIT_CMD_ARS_INJECT_SET, &acpi_desc->bus_nfit_cmd_force_en);
- set_bit(NFIT_CMD_ARS_INJECT_CLEAR, &acpi_desc->bus_nfit_cmd_force_en);
- set_bit(NFIT_CMD_ARS_INJECT_GET, &acpi_desc->bus_nfit_cmd_force_en);
+ set_bit(NFIT_CMD_TRANSLATE_SPA, &acpi_desc->bus_dsm_mask);
+ set_bit(NFIT_CMD_ARS_INJECT_SET, &acpi_desc->bus_dsm_mask);
+ set_bit(NFIT_CMD_ARS_INJECT_CLEAR, &acpi_desc->bus_dsm_mask);
+ set_bit(NFIT_CMD_ARS_INJECT_GET, &acpi_desc->bus_dsm_mask);
set_bit(ND_INTEL_FW_GET_INFO, &acpi_desc->dimm_cmd_force_en);
set_bit(ND_INTEL_FW_START_UPDATE, &acpi_desc->dimm_cmd_force_en);
set_bit(ND_INTEL_FW_SEND_DATA, &acpi_desc->dimm_cmd_force_en);
@@ -2531,6 +2719,12 @@ static void nfit_test0_setup(struct nfit_test *t)
&acpi_desc->dimm_cmd_force_en);
set_bit(NVDIMM_INTEL_MASTER_SECURE_ERASE,
&acpi_desc->dimm_cmd_force_en);
+ set_bit(NVDIMM_INTEL_FW_ACTIVATE_DIMMINFO, &acpi_desc->dimm_cmd_force_en);
+ set_bit(NVDIMM_INTEL_FW_ACTIVATE_ARM, &acpi_desc->dimm_cmd_force_en);
+
+ acpi_mask = &acpi_desc->family_dsm_mask[NVDIMM_BUS_FAMILY_INTEL];
+ set_bit(NVDIMM_BUS_INTEL_FW_ACTIVATE_BUSINFO, acpi_mask);
+ set_bit(NVDIMM_BUS_INTEL_FW_ACTIVATE, acpi_mask);
}
static void nfit_test1_setup(struct nfit_test *t)
@@ -2699,14 +2893,18 @@ static int nfit_ctl_test(struct device *dev)
struct acpi_nfit_desc *acpi_desc;
const u64 test_val = 0x0123456789abcdefULL;
unsigned long mask, cmd_size, offset;
- union {
- struct nd_cmd_get_config_size cfg_size;
- struct nd_cmd_clear_error clear_err;
- struct nd_cmd_ars_status ars_stat;
- struct nd_cmd_ars_cap ars_cap;
- char buf[sizeof(struct nd_cmd_ars_status)
- + sizeof(struct nd_ars_record)];
- } cmds;
+ struct nfit_ctl_test_cmd {
+ struct nd_cmd_pkg pkg;
+ union {
+ struct nd_cmd_get_config_size cfg_size;
+ struct nd_cmd_clear_error clear_err;
+ struct nd_cmd_ars_status ars_stat;
+ struct nd_cmd_ars_cap ars_cap;
+ struct nd_intel_bus_fw_activate_businfo fwa_info;
+ char buf[sizeof(struct nd_cmd_ars_status)
+ + sizeof(struct nd_ars_record)];
+ };
+ } cmd;
adev = devm_kzalloc(dev, sizeof(*adev), GFP_KERNEL);
if (!adev)
@@ -2731,11 +2929,15 @@ static int nfit_ctl_test(struct device *dev)
.module = THIS_MODULE,
.provider_name = "ACPI.NFIT",
.ndctl = acpi_nfit_ctl,
- .bus_dsm_mask = 1UL << NFIT_CMD_TRANSLATE_SPA
- | 1UL << NFIT_CMD_ARS_INJECT_SET
- | 1UL << NFIT_CMD_ARS_INJECT_CLEAR
- | 1UL << NFIT_CMD_ARS_INJECT_GET,
+ .bus_family_mask = 1UL << NVDIMM_BUS_FAMILY_NFIT
+ | 1UL << NVDIMM_BUS_FAMILY_INTEL,
},
+ .bus_dsm_mask = 1UL << NFIT_CMD_TRANSLATE_SPA
+ | 1UL << NFIT_CMD_ARS_INJECT_SET
+ | 1UL << NFIT_CMD_ARS_INJECT_CLEAR
+ | 1UL << NFIT_CMD_ARS_INJECT_GET,
+ .family_dsm_mask[NVDIMM_BUS_FAMILY_INTEL] =
+ NVDIMM_BUS_INTEL_FW_ACTIVATE_CMDMASK,
.dev = &adev->dev,
};
@@ -2766,21 +2968,21 @@ static int nfit_ctl_test(struct device *dev)
/* basic checkout of a typical 'get config size' command */
- cmd_size = sizeof(cmds.cfg_size);
- cmds.cfg_size = (struct nd_cmd_get_config_size) {
+ cmd_size = sizeof(cmd.cfg_size);
+ cmd.cfg_size = (struct nd_cmd_get_config_size) {
.status = 0,
.config_size = SZ_128K,
.max_xfer = SZ_4K,
};
- rc = setup_result(cmds.buf, cmd_size);
+ rc = setup_result(cmd.buf, cmd_size);
if (rc)
return rc;
rc = acpi_nfit_ctl(&acpi_desc->nd_desc, nvdimm, ND_CMD_GET_CONFIG_SIZE,
- cmds.buf, cmd_size, &cmd_rc);
+ cmd.buf, cmd_size, &cmd_rc);
- if (rc < 0 || cmd_rc || cmds.cfg_size.status != 0
- || cmds.cfg_size.config_size != SZ_128K
- || cmds.cfg_size.max_xfer != SZ_4K) {
+ if (rc < 0 || cmd_rc || cmd.cfg_size.status != 0
+ || cmd.cfg_size.config_size != SZ_128K
+ || cmd.cfg_size.max_xfer != SZ_4K) {
dev_dbg(dev, "%s: failed at: %d rc: %d cmd_rc: %d\n",
__func__, __LINE__, rc, cmd_rc);
return -EIO;
@@ -2789,14 +2991,14 @@ static int nfit_ctl_test(struct device *dev)
/* test ars_status with zero output */
cmd_size = offsetof(struct nd_cmd_ars_status, address);
- cmds.ars_stat = (struct nd_cmd_ars_status) {
+ cmd.ars_stat = (struct nd_cmd_ars_status) {
.out_length = 0,
};
- rc = setup_result(cmds.buf, cmd_size);
+ rc = setup_result(cmd.buf, cmd_size);
if (rc)
return rc;
rc = acpi_nfit_ctl(&acpi_desc->nd_desc, NULL, ND_CMD_ARS_STATUS,
- cmds.buf, cmd_size, &cmd_rc);
+ cmd.buf, cmd_size, &cmd_rc);
if (rc < 0 || cmd_rc) {
dev_dbg(dev, "%s: failed at: %d rc: %d cmd_rc: %d\n",
@@ -2806,16 +3008,16 @@ static int nfit_ctl_test(struct device *dev)
/* test ars_cap with benign extended status */
- cmd_size = sizeof(cmds.ars_cap);
- cmds.ars_cap = (struct nd_cmd_ars_cap) {
+ cmd_size = sizeof(cmd.ars_cap);
+ cmd.ars_cap = (struct nd_cmd_ars_cap) {
.status = ND_ARS_PERSISTENT << 16,
};
offset = offsetof(struct nd_cmd_ars_cap, status);
- rc = setup_result(cmds.buf + offset, cmd_size - offset);
+ rc = setup_result(cmd.buf + offset, cmd_size - offset);
if (rc)
return rc;
rc = acpi_nfit_ctl(&acpi_desc->nd_desc, NULL, ND_CMD_ARS_CAP,
- cmds.buf, cmd_size, &cmd_rc);
+ cmd.buf, cmd_size, &cmd_rc);
if (rc < 0 || cmd_rc) {
dev_dbg(dev, "%s: failed at: %d rc: %d cmd_rc: %d\n",
@@ -2825,19 +3027,19 @@ static int nfit_ctl_test(struct device *dev)
/* test ars_status with 'status' trimmed from 'out_length' */
- cmd_size = sizeof(cmds.ars_stat) + sizeof(struct nd_ars_record);
- cmds.ars_stat = (struct nd_cmd_ars_status) {
+ cmd_size = sizeof(cmd.ars_stat) + sizeof(struct nd_ars_record);
+ cmd.ars_stat = (struct nd_cmd_ars_status) {
.out_length = cmd_size - 4,
};
- record = &cmds.ars_stat.records[0];
+ record = &cmd.ars_stat.records[0];
*record = (struct nd_ars_record) {
.length = test_val,
};
- rc = setup_result(cmds.buf, cmd_size);
+ rc = setup_result(cmd.buf, cmd_size);
if (rc)
return rc;
rc = acpi_nfit_ctl(&acpi_desc->nd_desc, NULL, ND_CMD_ARS_STATUS,
- cmds.buf, cmd_size, &cmd_rc);
+ cmd.buf, cmd_size, &cmd_rc);
if (rc < 0 || cmd_rc || record->length != test_val) {
dev_dbg(dev, "%s: failed at: %d rc: %d cmd_rc: %d\n",
@@ -2847,19 +3049,19 @@ static int nfit_ctl_test(struct device *dev)
/* test ars_status with 'Output (Size)' including 'status' */
- cmd_size = sizeof(cmds.ars_stat) + sizeof(struct nd_ars_record);
- cmds.ars_stat = (struct nd_cmd_ars_status) {
+ cmd_size = sizeof(cmd.ars_stat) + sizeof(struct nd_ars_record);
+ cmd.ars_stat = (struct nd_cmd_ars_status) {
.out_length = cmd_size,
};
- record = &cmds.ars_stat.records[0];
+ record = &cmd.ars_stat.records[0];
*record = (struct nd_ars_record) {
.length = test_val,
};
- rc = setup_result(cmds.buf, cmd_size);
+ rc = setup_result(cmd.buf, cmd_size);
if (rc)
return rc;
rc = acpi_nfit_ctl(&acpi_desc->nd_desc, NULL, ND_CMD_ARS_STATUS,
- cmds.buf, cmd_size, &cmd_rc);
+ cmd.buf, cmd_size, &cmd_rc);
if (rc < 0 || cmd_rc || record->length != test_val) {
dev_dbg(dev, "%s: failed at: %d rc: %d cmd_rc: %d\n",
@@ -2869,15 +3071,15 @@ static int nfit_ctl_test(struct device *dev)
/* test extended status for get_config_size results in failure */
- cmd_size = sizeof(cmds.cfg_size);
- cmds.cfg_size = (struct nd_cmd_get_config_size) {
+ cmd_size = sizeof(cmd.cfg_size);
+ cmd.cfg_size = (struct nd_cmd_get_config_size) {
.status = 1 << 16,
};
- rc = setup_result(cmds.buf, cmd_size);
+ rc = setup_result(cmd.buf, cmd_size);
if (rc)
return rc;
rc = acpi_nfit_ctl(&acpi_desc->nd_desc, nvdimm, ND_CMD_GET_CONFIG_SIZE,
- cmds.buf, cmd_size, &cmd_rc);
+ cmd.buf, cmd_size, &cmd_rc);
if (rc < 0 || cmd_rc >= 0) {
dev_dbg(dev, "%s: failed at: %d rc: %d cmd_rc: %d\n",
@@ -2886,16 +3088,46 @@ static int nfit_ctl_test(struct device *dev)
}
/* test clear error */
- cmd_size = sizeof(cmds.clear_err);
- cmds.clear_err = (struct nd_cmd_clear_error) {
+ cmd_size = sizeof(cmd.clear_err);
+ cmd.clear_err = (struct nd_cmd_clear_error) {
.length = 512,
.cleared = 512,
};
- rc = setup_result(cmds.buf, cmd_size);
+ rc = setup_result(cmd.buf, cmd_size);
if (rc)
return rc;
rc = acpi_nfit_ctl(&acpi_desc->nd_desc, NULL, ND_CMD_CLEAR_ERROR,
- cmds.buf, cmd_size, &cmd_rc);
+ cmd.buf, cmd_size, &cmd_rc);
+ if (rc < 0 || cmd_rc) {
+ dev_dbg(dev, "%s: failed at: %d rc: %d cmd_rc: %d\n",
+ __func__, __LINE__, rc, cmd_rc);
+ return -EIO;
+ }
+
+ /* test firmware activate bus info */
+ cmd_size = sizeof(cmd.fwa_info);
+ cmd = (struct nfit_ctl_test_cmd) {
+ .pkg = {
+ .nd_command = NVDIMM_BUS_INTEL_FW_ACTIVATE_BUSINFO,
+ .nd_family = NVDIMM_BUS_FAMILY_INTEL,
+ .nd_size_out = cmd_size,
+ .nd_fw_size = cmd_size,
+ },
+ .fwa_info = {
+ .state = ND_INTEL_FWA_IDLE,
+ .capability = ND_INTEL_BUS_FWA_CAP_FWQUIESCE
+ | ND_INTEL_BUS_FWA_CAP_OSQUIESCE,
+ .activate_tmo = 1,
+ .cpu_quiesce_tmo = 1,
+ .io_quiesce_tmo = 1,
+ .max_quiesce_tmo = 1,
+ },
+ };
+ rc = setup_result(cmd.buf, cmd_size);
+ if (rc)
+ return rc;
+ rc = acpi_nfit_ctl(&acpi_desc->nd_desc, NULL, ND_CMD_CALL,
+ &cmd, sizeof(cmd.pkg) + cmd_size, &cmd_rc);
if (rc < 0 || cmd_rc) {
dev_dbg(dev, "%s: failed at: %d rc: %d cmd_rc: %d\n",
__func__, __LINE__, rc, cmd_rc);
@@ -3052,7 +3284,7 @@ static struct platform_driver nfit_test_driver = {
.id_table = nfit_test_id,
};
-static char mcsafe_buf[PAGE_SIZE] __attribute__((__aligned__(PAGE_SIZE)));
+static char copy_mc_buf[PAGE_SIZE] __attribute__((__aligned__(PAGE_SIZE)));
enum INJECT {
INJECT_NONE,
@@ -3060,7 +3292,7 @@ enum INJECT {
INJECT_DST,
};
-static void mcsafe_test_init(char *dst, char *src, size_t size)
+static void copy_mc_test_init(char *dst, char *src, size_t size)
{
size_t i;
@@ -3069,7 +3301,7 @@ static void mcsafe_test_init(char *dst, char *src, size_t size)
src[i] = (char) i;
}
-static bool mcsafe_test_validate(unsigned char *dst, unsigned char *src,
+static bool copy_mc_test_validate(unsigned char *dst, unsigned char *src,
size_t size, unsigned long rem)
{
size_t i;
@@ -3090,12 +3322,12 @@ static bool mcsafe_test_validate(unsigned char *dst, unsigned char *src,
return true;
}
-void mcsafe_test(void)
+void copy_mc_test(void)
{
char *inject_desc[] = { "none", "source", "destination" };
enum INJECT inj;
- if (IS_ENABLED(CONFIG_MCSAFE_TEST)) {
+ if (IS_ENABLED(CONFIG_COPY_MC_TEST)) {
pr_info("%s: run...\n", __func__);
} else {
pr_info("%s: disabled, skip.\n", __func__);
@@ -3113,31 +3345,31 @@ void mcsafe_test(void)
switch (inj) {
case INJECT_NONE:
- mcsafe_inject_src(NULL);
- mcsafe_inject_dst(NULL);
- dst = &mcsafe_buf[2048];
- src = &mcsafe_buf[1024 - i];
+ copy_mc_inject_src(NULL);
+ copy_mc_inject_dst(NULL);
+ dst = &copy_mc_buf[2048];
+ src = &copy_mc_buf[1024 - i];
expect = 0;
break;
case INJECT_SRC:
- mcsafe_inject_src(&mcsafe_buf[1024]);
- mcsafe_inject_dst(NULL);
- dst = &mcsafe_buf[2048];
- src = &mcsafe_buf[1024 - i];
+ copy_mc_inject_src(&copy_mc_buf[1024]);
+ copy_mc_inject_dst(NULL);
+ dst = &copy_mc_buf[2048];
+ src = &copy_mc_buf[1024 - i];
expect = 512 - i;
break;
case INJECT_DST:
- mcsafe_inject_src(NULL);
- mcsafe_inject_dst(&mcsafe_buf[2048]);
- dst = &mcsafe_buf[2048 - i];
- src = &mcsafe_buf[1024];
+ copy_mc_inject_src(NULL);
+ copy_mc_inject_dst(&copy_mc_buf[2048]);
+ dst = &copy_mc_buf[2048 - i];
+ src = &copy_mc_buf[1024];
expect = 512 - i;
break;
}
- mcsafe_test_init(dst, src, 512);
- rem = __memcpy_mcsafe(dst, src, 512);
- valid = mcsafe_test_validate(dst, src, 512, expect);
+ copy_mc_test_init(dst, src, 512);
+ rem = copy_mc_fragile(dst, src, 512);
+ valid = copy_mc_test_validate(dst, src, 512, expect);
if (rem == expect && valid)
continue;
pr_info("%s: copy(%#lx, %#lx, %d) off: %d rem: %ld %s expect: %ld\n",
@@ -3149,8 +3381,8 @@ void mcsafe_test(void)
}
}
- mcsafe_inject_src(NULL);
- mcsafe_inject_dst(NULL);
+ copy_mc_inject_src(NULL);
+ copy_mc_inject_dst(NULL);
}
static __init int nfit_test_init(void)
@@ -3161,7 +3393,7 @@ static __init int nfit_test_init(void)
libnvdimm_test();
acpi_nfit_test();
device_dax_test();
- mcsafe_test();
+ copy_mc_test();
dax_pmem_test();
dax_pmem_core_test();
#ifdef CONFIG_DEV_DAX_PMEM_COMPAT
diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile
index e03bc15ce731..9018f45d631d 100644
--- a/tools/testing/selftests/Makefile
+++ b/tools/testing/selftests/Makefile
@@ -32,6 +32,7 @@ TARGETS += lkdtm
TARGETS += membarrier
TARGETS += memfd
TARGETS += memory-hotplug
+TARGETS += mincore
TARGETS += mount
TARGETS += mqueue
TARGETS += net
diff --git a/tools/testing/selftests/arm64/Makefile b/tools/testing/selftests/arm64/Makefile
index 93b567d23c8b..2c9d012797a7 100644
--- a/tools/testing/selftests/arm64/Makefile
+++ b/tools/testing/selftests/arm64/Makefile
@@ -4,7 +4,7 @@
ARCH ?= $(shell uname -m 2>/dev/null || echo not)
ifneq (,$(filter $(ARCH),aarch64 arm64))
-ARM64_SUBTARGETS ?= tags signal
+ARM64_SUBTARGETS ?= tags signal pauth fp mte
else
ARM64_SUBTARGETS :=
endif
diff --git a/tools/testing/selftests/arm64/fp/.gitignore b/tools/testing/selftests/arm64/fp/.gitignore
new file mode 100644
index 000000000000..d66f76d2a650
--- /dev/null
+++ b/tools/testing/selftests/arm64/fp/.gitignore
@@ -0,0 +1,5 @@
+fpsimd-test
+sve-probe-vls
+sve-ptrace
+sve-test
+vlset
diff --git a/tools/testing/selftests/arm64/fp/Makefile b/tools/testing/selftests/arm64/fp/Makefile
new file mode 100644
index 000000000000..a57009d3a0dc
--- /dev/null
+++ b/tools/testing/selftests/arm64/fp/Makefile
@@ -0,0 +1,17 @@
+# SPDX-License-Identifier: GPL-2.0
+
+CFLAGS += -I../../../../../usr/include/
+TEST_GEN_PROGS := sve-ptrace sve-probe-vls
+TEST_PROGS_EXTENDED := fpsimd-test fpsimd-stress sve-test sve-stress vlset
+
+all: $(TEST_GEN_PROGS) $(TEST_PROGS_EXTENDED)
+
+fpsimd-test: fpsimd-test.o
+ $(CC) -nostdlib $^ -o $@
+sve-ptrace: sve-ptrace.o sve-ptrace-asm.o
+sve-probe-vls: sve-probe-vls.o
+sve-test: sve-test.o
+ $(CC) -nostdlib $^ -o $@
+vlset: vlset.o
+
+include ../../lib.mk
diff --git a/tools/testing/selftests/arm64/fp/README b/tools/testing/selftests/arm64/fp/README
new file mode 100644
index 000000000000..03e3dad865d8
--- /dev/null
+++ b/tools/testing/selftests/arm64/fp/README
@@ -0,0 +1,100 @@
+This directory contains a mix of tests integrated with kselftest and
+standalone stress tests.
+
+kselftest tests
+===============
+
+sve-probe-vls - Checks the SVE vector length enumeration interface
+sve-ptrace - Checks the SVE ptrace interface
+
+Running the non-kselftest tests
+===============================
+
+sve-stress performs an SVE context switch stress test, as described
+below.
+
+(The fpsimd-stress test works the same way; just substitute "fpsimd" for
+"sve" in the following commands.)
+
+
+The test runs until killed by the user.
+
+If no context switch error was detected, you will see output such as
+the following:
+
+$ ./sve-stress
+(wait for some time)
+^C
+Vector length: 512 bits
+PID: 1573
+Terminated by signal 15, no error, iterations=9467, signals=1014
+Vector length: 512 bits
+PID: 1575
+Terminated by signal 15, no error, iterations=9448, signals=1028
+Vector length: 512 bits
+PID: 1577
+Terminated by signal 15, no error, iterations=9436, signals=1039
+Vector length: 512 bits
+PID: 1579
+Terminated by signal 15, no error, iterations=9421, signals=1039
+Vector length: 512 bits
+PID: 1581
+Terminated by signal 15, no error, iterations=9403, signals=1039
+Vector length: 512 bits
+PID: 1583
+Terminated by signal 15, no error, iterations=9385, signals=1036
+Vector length: 512 bits
+PID: 1585
+Terminated by signal 15, no error, iterations=9376, signals=1039
+Vector length: 512 bits
+PID: 1587
+Terminated by signal 15, no error, iterations=9361, signals=1039
+Vector length: 512 bits
+PID: 1589
+Terminated by signal 15, no error, iterations=9350, signals=1039
+
+
+If an error was detected, details of the mismatch will be printed
+instead of "no error".
+
+Ideally, the test should be allowed to run for many minutes or hours
+to maximise test coverage.
+
+
+KVM stress testing
+==================
+
+To try to reproduce the bugs that we have been observing, sve-stress
+should be run in parallel in two KVM guests, while simultaneously
+running on the host.
+
+1) Start 2 guests, using the following command for each:
+
+$ lkvm run --console=virtio -pconsole=hvc0 --sve Image
+
+(Depending on the hardware GIC implementation, you may also need
+--irqchip=gicv3. New kvmtool defaults to that if appropriate, but I
+can't remember whether my branch is new enough for that. Try without
+the option first.)
+
+Kvmtool occupies the terminal until you kill it (Ctrl+A x),
+or until the guest terminates. It is therefore recommended to run
+each instance in separate terminal (use screen or ssh etc.) This
+allows multiple guests to be run in parallel while running other
+commands on the host.
+
+Within the guest, the host filesystem is accessible, mounted on /host.
+
+2) Run the sve-stress on *each* guest with the Vector-Length set to 32:
+guest$ ./vlset --inherit 32 ./sve-stress
+
+3) Run the sve-stress on the host with the maximum Vector-Length:
+host$ ./vlset --inherit --max ./sve-stress
+
+
+Again, the test should be allowed to run for many minutes or hours to
+maximise test coverage.
+
+If no error is detected, you will see output from each sve-stress
+instance similar to that illustrated above; otherwise details of the
+observed mismatches will be printed.
diff --git a/tools/testing/selftests/arm64/fp/asm-offsets.h b/tools/testing/selftests/arm64/fp/asm-offsets.h
new file mode 100644
index 000000000000..a180851496ec
--- /dev/null
+++ b/tools/testing/selftests/arm64/fp/asm-offsets.h
@@ -0,0 +1,11 @@
+#define sa_sz 32
+#define sa_flags 8
+#define sa_handler 0
+#define sa_mask_sz 8
+#define SIGUSR1 10
+#define SIGTERM 15
+#define SIGINT 2
+#define SIGABRT 6
+#define SA_NODEFER 1073741824
+#define SA_SIGINFO 4
+#define ucontext_regs 184
diff --git a/tools/testing/selftests/arm64/fp/assembler.h b/tools/testing/selftests/arm64/fp/assembler.h
new file mode 100644
index 000000000000..8944f2189206
--- /dev/null
+++ b/tools/testing/selftests/arm64/fp/assembler.h
@@ -0,0 +1,57 @@
+// SPDX-License-Identifier: GPL-2.0-only
+// Copyright (C) 2015-2019 ARM Limited.
+// Original author: Dave Martin <Dave.Martin@arm.com>
+
+#ifndef ASSEMBLER_H
+#define ASSEMBLER_H
+
+.macro __for from:req, to:req
+ .if (\from) == (\to)
+ _for__body %\from
+ .else
+ __for \from, %(\from) + ((\to) - (\from)) / 2
+ __for %(\from) + ((\to) - (\from)) / 2 + 1, \to
+ .endif
+.endm
+
+.macro _for var:req, from:req, to:req, insn:vararg
+ .macro _for__body \var:req
+ .noaltmacro
+ \insn
+ .altmacro
+ .endm
+
+ .altmacro
+ __for \from, \to
+ .noaltmacro
+
+ .purgem _for__body
+.endm
+
+.macro function name
+ .macro endfunction
+ .type \name, @function
+ .purgem endfunction
+ .endm
+\name:
+.endm
+
+.macro define_accessor name, num, insn
+ .macro \name\()_entry n
+ \insn \n, 1
+ ret
+ .endm
+
+function \name
+ adr x2, .L__accessor_tbl\@
+ add x2, x2, x0, lsl #3
+ br x2
+
+.L__accessor_tbl\@:
+ _for x, 0, (\num) - 1, \name\()_entry \x
+endfunction
+
+ .purgem \name\()_entry
+.endm
+
+#endif /* ! ASSEMBLER_H */
diff --git a/tools/testing/selftests/arm64/fp/fpsimd-stress b/tools/testing/selftests/arm64/fp/fpsimd-stress
new file mode 100755
index 000000000000..781b5b022eaf
--- /dev/null
+++ b/tools/testing/selftests/arm64/fp/fpsimd-stress
@@ -0,0 +1,60 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0-only
+# Copyright (C) 2015-2019 ARM Limited.
+# Original author: Dave Martin <Dave.Martin@arm.com>
+
+set -ue
+
+NR_CPUS=`nproc`
+
+pids=
+logs=
+
+cleanup () {
+ trap - INT TERM CHLD
+ set +e
+
+ if [ -n "$pids" ]; then
+ kill $pids
+ wait $pids
+ pids=
+ fi
+
+ if [ -n "$logs" ]; then
+ cat $logs
+ rm $logs
+ logs=
+ fi
+}
+
+interrupt () {
+ cleanup
+ exit 0
+}
+
+child_died () {
+ cleanup
+ exit 1
+}
+
+trap interrupt INT TERM EXIT
+trap child_died CHLD
+
+for x in `seq 0 $((NR_CPUS * 4))`; do
+ log=`mktemp`
+ logs=$logs\ $log
+ ./fpsimd-test >$log &
+ pids=$pids\ $!
+done
+
+# Wait for all child processes to be created:
+sleep 10
+
+while :; do
+ kill -USR1 $pids
+done &
+pids=$pids\ $!
+
+wait
+
+exit 1
diff --git a/tools/testing/selftests/arm64/fp/fpsimd-test.S b/tools/testing/selftests/arm64/fp/fpsimd-test.S
new file mode 100644
index 000000000000..1c5556bdd11d
--- /dev/null
+++ b/tools/testing/selftests/arm64/fp/fpsimd-test.S
@@ -0,0 +1,482 @@
+// SPDX-License-Identifier: GPL-2.0-only
+// Copyright (C) 2015-2019 ARM Limited.
+// Original author: Dave Martin <Dave.Martin@arm.com>
+//
+// Simple FPSIMD context switch test
+// Repeatedly writes unique test patterns into each FPSIMD register
+// and reads them back to verify integrity.
+//
+// for x in `seq 1 NR_CPUS`; do fpsimd-test & pids=$pids\ $! ; done
+// (leave it running for as long as you want...)
+// kill $pids
+
+#include <asm/unistd.h>
+#include "assembler.h"
+#include "asm-offsets.h"
+
+#define NVR 32
+#define MAXVL_B (128 / 8)
+
+.macro _vldr Vn:req, Xt:req
+ ld1 {v\Vn\().2d}, [x\Xt]
+.endm
+
+.macro _vstr Vn:req, Xt:req
+ st1 {v\Vn\().2d}, [x\Xt]
+.endm
+
+// Generate accessor functions to read/write programmatically selected
+// FPSIMD registers.
+// x0 is the register index to access
+// x1 is the memory address to read from (getv,setp) or store to (setv,setp)
+// All clobber x0-x2
+define_accessor setv, NVR, _vldr
+define_accessor getv, NVR, _vstr
+
+// Print a single character x0 to stdout
+// Clobbers x0-x2,x8
+function putc
+ str x0, [sp, #-16]!
+
+ mov x0, #1 // STDOUT_FILENO
+ mov x1, sp
+ mov x2, #1
+ mov x8, #__NR_write
+ svc #0
+
+ add sp, sp, #16
+ ret
+endfunction
+
+// Print a NUL-terminated string starting at address x0 to stdout
+// Clobbers x0-x3,x8
+function puts
+ mov x1, x0
+
+ mov x2, #0
+0: ldrb w3, [x0], #1
+ cbz w3, 1f
+ add x2, x2, #1
+ b 0b
+
+1: mov w0, #1 // STDOUT_FILENO
+ mov x8, #__NR_write
+ svc #0
+
+ ret
+endfunction
+
+// Utility macro to print a literal string
+// Clobbers x0-x4,x8
+.macro puts string
+ .pushsection .rodata.str1.1, "aMS", 1
+.L__puts_literal\@: .string "\string"
+ .popsection
+
+ ldr x0, =.L__puts_literal\@
+ bl puts
+.endm
+
+// Print an unsigned decimal number x0 to stdout
+// Clobbers x0-x4,x8
+function putdec
+ mov x1, sp
+ str x30, [sp, #-32]! // Result can't be > 20 digits
+
+ mov x2, #0
+ strb w2, [x1, #-1]! // Write the NUL terminator
+
+ mov x2, #10
+0: udiv x3, x0, x2 // div-mod loop to generate the digits
+ msub x0, x3, x2, x0
+ add w0, w0, #'0'
+ strb w0, [x1, #-1]!
+ mov x0, x3
+ cbnz x3, 0b
+
+ ldrb w0, [x1]
+ cbnz w0, 1f
+ mov w0, #'0' // Print "0" for 0, not ""
+ strb w0, [x1, #-1]!
+
+1: mov x0, x1
+ bl puts
+
+ ldr x30, [sp], #32
+ ret
+endfunction
+
+// Print an unsigned decimal number x0 to stdout, followed by a newline
+// Clobbers x0-x5,x8
+function putdecn
+ mov x5, x30
+
+ bl putdec
+ mov x0, #'\n'
+ bl putc
+
+ ret x5
+endfunction
+
+
+// Clobbers x0-x3,x8
+function puthexb
+ str x30, [sp, #-0x10]!
+
+ mov w3, w0
+ lsr w0, w0, #4
+ bl puthexnibble
+ mov w0, w3
+
+ ldr x30, [sp], #0x10
+ // fall through to puthexnibble
+endfunction
+// Clobbers x0-x2,x8
+function puthexnibble
+ and w0, w0, #0xf
+ cmp w0, #10
+ blo 1f
+ add w0, w0, #'a' - ('9' + 1)
+1: add w0, w0, #'0'
+ b putc
+endfunction
+
+// x0=data in, x1=size in, clobbers x0-x5,x8
+function dumphex
+ str x30, [sp, #-0x10]!
+
+ mov x4, x0
+ mov x5, x1
+
+0: subs x5, x5, #1
+ b.lo 1f
+ ldrb w0, [x4], #1
+ bl puthexb
+ b 0b
+
+1: ldr x30, [sp], #0x10
+ ret
+endfunction
+
+// Declare some storate space to shadow the SVE register contents:
+.pushsection .text
+.data
+.align 4
+vref:
+ .space MAXVL_B * NVR
+scratch:
+ .space MAXVL_B
+.popsection
+
+// Trivial memory copy: copy x2 bytes, starting at address x1, to address x0.
+// Clobbers x0-x3
+function memcpy
+ cmp x2, #0
+ b.eq 1f
+0: ldrb w3, [x1], #1
+ strb w3, [x0], #1
+ subs x2, x2, #1
+ b.ne 0b
+1: ret
+endfunction
+
+// Generate a test pattern for storage in SVE registers
+// x0: pid (16 bits)
+// x1: register number (6 bits)
+// x2: generation (4 bits)
+function pattern
+ orr w1, w0, w1, lsl #16
+ orr w2, w1, w2, lsl #28
+
+ ldr x0, =scratch
+ mov w1, #MAXVL_B / 4
+
+0: str w2, [x0], #4
+ add w2, w2, #(1 << 22)
+ subs w1, w1, #1
+ bne 0b
+
+ ret
+endfunction
+
+// Get the address of shadow data for FPSIMD V-register V<xn>
+.macro _adrv xd, xn, nrtmp
+ ldr \xd, =vref
+ mov x\nrtmp, #16
+ madd \xd, x\nrtmp, \xn, \xd
+.endm
+
+// Set up test pattern in a FPSIMD V-register
+// x0: pid
+// x1: register number
+// x2: generation
+function setup_vreg
+ mov x4, x30
+
+ mov x6, x1
+ bl pattern
+ _adrv x0, x6, 2
+ mov x5, x0
+ ldr x1, =scratch
+ bl memcpy
+
+ mov x0, x6
+ mov x1, x5
+ bl setv
+
+ ret x4
+endfunction
+
+// Fill x1 bytes starting at x0 with 0xae (for canary purposes)
+// Clobbers x1, x2.
+function memfill_ae
+ mov w2, #0xae
+ b memfill
+endfunction
+
+// Fill x1 bytes starting at x0 with 0.
+// Clobbers x1, x2.
+function memclr
+ mov w2, #0
+endfunction
+ // fall through to memfill
+
+// Trivial memory fill: fill x1 bytes starting at address x0 with byte w2
+// Clobbers x1
+function memfill
+ cmp x1, #0
+ b.eq 1f
+
+0: strb w2, [x0], #1
+ subs x1, x1, #1
+ b.ne 0b
+
+1: ret
+endfunction
+
+// Trivial memory compare: compare x2 bytes starting at address x0 with
+// bytes starting at address x1.
+// Returns only if all bytes match; otherwise, the program is aborted.
+// Clobbers x0-x5.
+function memcmp
+ cbz x2, 1f
+
+ mov x5, #0
+0: ldrb w3, [x0, x5]
+ ldrb w4, [x1, x5]
+ add x5, x5, #1
+ cmp w3, w4
+ b.ne barf
+ subs x2, x2, #1
+ b.ne 0b
+
+1: ret
+endfunction
+
+// Verify that a FPSIMD V-register matches its shadow in memory, else abort
+// x0: reg number
+// Clobbers x0-x5.
+function check_vreg
+ mov x3, x30
+
+ _adrv x5, x0, 6
+ mov x4, x0
+ ldr x7, =scratch
+
+ mov x0, x7
+ mov x1, x6
+ bl memfill_ae
+
+ mov x0, x4
+ mov x1, x7
+ bl getv
+
+ mov x0, x5
+ mov x1, x7
+ mov x2, x6
+ mov x30, x3
+ b memcmp
+endfunction
+
+// Any SVE register modified here can cause corruption in the main
+// thread -- but *only* the registers modified here.
+function irritator_handler
+ // Increment the irritation signal count (x23):
+ ldr x0, [x2, #ucontext_regs + 8 * 23]
+ add x0, x0, #1
+ str x0, [x2, #ucontext_regs + 8 * 23]
+
+ // Corrupt some random V-regs
+ adr x0, .text + (irritator_handler - .text) / 16 * 16
+ movi v0.8b, #7
+ movi v9.16b, #9
+ movi v31.8b, #31
+
+ ret
+endfunction
+
+function terminate_handler
+ mov w21, w0
+ mov x20, x2
+
+ puts "Terminated by signal "
+ mov w0, w21
+ bl putdec
+ puts ", no error, iterations="
+ ldr x0, [x20, #ucontext_regs + 8 * 22]
+ bl putdec
+ puts ", signals="
+ ldr x0, [x20, #ucontext_regs + 8 * 23]
+ bl putdecn
+
+ mov x0, #0
+ mov x8, #__NR_exit
+ svc #0
+endfunction
+
+// w0: signal number
+// x1: sa_action
+// w2: sa_flags
+// Clobbers x0-x6,x8
+function setsignal
+ str x30, [sp, #-((sa_sz + 15) / 16 * 16 + 16)]!
+
+ mov w4, w0
+ mov x5, x1
+ mov w6, w2
+
+ add x0, sp, #16
+ mov x1, #sa_sz
+ bl memclr
+
+ mov w0, w4
+ add x1, sp, #16
+ str w6, [x1, #sa_flags]
+ str x5, [x1, #sa_handler]
+ mov x2, #0
+ mov x3, #sa_mask_sz
+ mov x8, #__NR_rt_sigaction
+ svc #0
+
+ cbz w0, 1f
+
+ puts "sigaction failure\n"
+ b .Labort
+
+1: ldr x30, [sp], #((sa_sz + 15) / 16 * 16 + 16)
+ ret
+endfunction
+
+// Main program entry point
+.globl _start
+function _start
+_start:
+ // Sanity-check and report the vector length
+
+ mov x19, #128
+ cmp x19, #128
+ b.lo 1f
+ cmp x19, #2048
+ b.hi 1f
+ tst x19, #(8 - 1)
+ b.eq 2f
+
+1: puts "Bad vector length: "
+ mov x0, x19
+ bl putdecn
+ b .Labort
+
+2: puts "Vector length:\t"
+ mov x0, x19
+ bl putdec
+ puts " bits\n"
+
+ // Obtain our PID, to ensure test pattern uniqueness between processes
+
+ mov x8, #__NR_getpid
+ svc #0
+ mov x20, x0
+
+ puts "PID:\t"
+ mov x0, x20
+ bl putdecn
+
+ mov x23, #0 // Irritation signal count
+
+ mov w0, #SIGINT
+ adr x1, terminate_handler
+ mov w2, #SA_SIGINFO
+ bl setsignal
+
+ mov w0, #SIGTERM
+ adr x1, terminate_handler
+ mov w2, #SA_SIGINFO
+ bl setsignal
+
+ mov w0, #SIGUSR1
+ adr x1, irritator_handler
+ mov w2, #SA_SIGINFO
+ orr w2, w2, #SA_NODEFER
+ bl setsignal
+
+ mov x22, #0 // generation number, increments per iteration
+.Ltest_loop:
+
+ mov x21, #0 // Set up V-regs & shadow with test pattern
+0: mov x0, x20
+ mov x1, x21
+ and x2, x22, #0xf
+ bl setup_vreg
+ add x21, x21, #1
+ cmp x21, #NVR
+ b.lo 0b
+
+// Can't do this when SVE state is volatile across SVC:
+ mov x8, #__NR_sched_yield // Encourage preemption
+ svc #0
+
+ mov x21, #0
+0: mov x0, x21
+ bl check_vreg
+ add x21, x21, #1
+ cmp x21, #NVR
+ b.lo 0b
+
+ add x22, x22, #1
+ b .Ltest_loop
+
+.Labort:
+ mov x0, #0
+ mov x1, #SIGABRT
+ mov x8, #__NR_kill
+ svc #0
+endfunction
+
+function barf
+ mov x10, x0 // expected data
+ mov x11, x1 // actual data
+ mov x12, x2 // data size
+
+ puts "Mistatch: PID="
+ mov x0, x20
+ bl putdec
+ puts ", iteration="
+ mov x0, x22
+ bl putdec
+ puts ", reg="
+ mov x0, x21
+ bl putdecn
+ puts "\tExpected ["
+ mov x0, x10
+ mov x1, x12
+ bl dumphex
+ puts "]\n\tGot ["
+ mov x0, x11
+ mov x1, x12
+ bl dumphex
+ puts "]\n"
+
+ mov x8, #__NR_exit
+ mov x1, #1
+ svc #0
+endfunction
diff --git a/tools/testing/selftests/arm64/fp/sve-probe-vls.c b/tools/testing/selftests/arm64/fp/sve-probe-vls.c
new file mode 100644
index 000000000000..b29cbc642c57
--- /dev/null
+++ b/tools/testing/selftests/arm64/fp/sve-probe-vls.c
@@ -0,0 +1,58 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2015-2020 ARM Limited.
+ * Original author: Dave Martin <Dave.Martin@arm.com>
+ */
+#include <assert.h>
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/auxv.h>
+#include <sys/prctl.h>
+#include <asm/sigcontext.h>
+
+#include "../../kselftest.h"
+
+int main(int argc, char **argv)
+{
+ unsigned int vq;
+ int vl;
+ static unsigned int vqs[SVE_VQ_MAX];
+ unsigned int nvqs = 0;
+
+ ksft_print_header();
+ ksft_set_plan(2);
+
+ if (!(getauxval(AT_HWCAP) & HWCAP_SVE))
+ ksft_exit_skip("SVE not available");
+
+ /*
+ * Enumerate up to SVE_VQ_MAX vector lengths
+ */
+ for (vq = SVE_VQ_MAX; vq > 0; --vq) {
+ vl = prctl(PR_SVE_SET_VL, vq * 16);
+ if (vl == -1)
+ ksft_exit_fail_msg("PR_SVE_SET_VL failed: %s (%d)\n",
+ strerror(errno), errno);
+
+ vl &= PR_SVE_VL_LEN_MASK;
+
+ if (!sve_vl_valid(vl))
+ ksft_exit_fail_msg("VL %d invalid\n", vl);
+ vq = sve_vq_from_vl(vl);
+
+ if (!(nvqs < SVE_VQ_MAX))
+ ksft_exit_fail_msg("Too many VLs %u >= SVE_VQ_MAX\n",
+ nvqs);
+ vqs[nvqs++] = vq;
+ }
+ ksft_test_result_pass("Enumerated %d vector lengths\n", nvqs);
+ ksft_test_result_pass("All vector lengths valid\n");
+
+ /* Print out the vector lengths in ascending order: */
+ while (nvqs--)
+ ksft_print_msg("%u\n", 16 * vqs[nvqs]);
+
+ ksft_exit_pass();
+}
diff --git a/tools/testing/selftests/arm64/fp/sve-ptrace-asm.S b/tools/testing/selftests/arm64/fp/sve-ptrace-asm.S
new file mode 100644
index 000000000000..3e81f9fab574
--- /dev/null
+++ b/tools/testing/selftests/arm64/fp/sve-ptrace-asm.S
@@ -0,0 +1,33 @@
+// SPDX-License-Identifier: GPL-2.0-only
+// Copyright (C) 2015-2019 ARM Limited.
+// Original author: Dave Martin <Dave.Martin@arm.com>
+#include <asm/unistd.h>
+
+.arch_extension sve
+
+.globl sve_store_patterns
+
+sve_store_patterns:
+ mov x1, x0
+
+ index z0.b, #0, #1
+ str q0, [x1]
+
+ mov w8, #__NR_getpid
+ svc #0
+ str q0, [x1, #0x10]
+
+ mov z1.d, z0.d
+ str q0, [x1, #0x20]
+
+ mov w8, #__NR_getpid
+ svc #0
+ str q0, [x1, #0x30]
+
+ mov z1.d, z0.d
+ str q0, [x1, #0x40]
+
+ ret
+
+.size sve_store_patterns, . - sve_store_patterns
+.type sve_store_patterns, @function
diff --git a/tools/testing/selftests/arm64/fp/sve-ptrace.c b/tools/testing/selftests/arm64/fp/sve-ptrace.c
new file mode 100644
index 000000000000..b2282be6f938
--- /dev/null
+++ b/tools/testing/selftests/arm64/fp/sve-ptrace.c
@@ -0,0 +1,336 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2015-2020 ARM Limited.
+ * Original author: Dave Martin <Dave.Martin@arm.com>
+ */
+#include <errno.h>
+#include <stddef.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <sys/auxv.h>
+#include <sys/ptrace.h>
+#include <sys/types.h>
+#include <sys/uio.h>
+#include <sys/wait.h>
+#include <asm/sigcontext.h>
+#include <asm/ptrace.h>
+
+#include "../../kselftest.h"
+
+/* <linux/elf.h> and <sys/auxv.h> don't like each other, so: */
+#ifndef NT_ARM_SVE
+#define NT_ARM_SVE 0x405
+#endif
+
+/* Number of registers filled in by sve_store_patterns */
+#define NR_VREGS 5
+
+void sve_store_patterns(__uint128_t v[NR_VREGS]);
+
+static void dump(const void *buf, size_t size)
+{
+ size_t i;
+ const unsigned char *p = buf;
+
+ for (i = 0; i < size; ++i)
+ printf(" %.2x", *p++);
+}
+
+static int check_vregs(const __uint128_t vregs[NR_VREGS])
+{
+ int i;
+ int ok = 1;
+
+ for (i = 0; i < NR_VREGS; ++i) {
+ printf("# v[%d]:", i);
+ dump(&vregs[i], sizeof vregs[i]);
+ putchar('\n');
+
+ if (vregs[i] != vregs[0])
+ ok = 0;
+ }
+
+ return ok;
+}
+
+static int do_child(void)
+{
+ if (ptrace(PTRACE_TRACEME, -1, NULL, NULL))
+ ksft_exit_fail_msg("PTRACE_TRACEME", strerror(errno));
+
+ if (raise(SIGSTOP))
+ ksft_exit_fail_msg("raise(SIGSTOP)", strerror(errno));
+
+ return EXIT_SUCCESS;
+}
+
+static struct user_sve_header *get_sve(pid_t pid, void **buf, size_t *size)
+{
+ struct user_sve_header *sve;
+ void *p;
+ size_t sz = sizeof *sve;
+ struct iovec iov;
+
+ while (1) {
+ if (*size < sz) {
+ p = realloc(*buf, sz);
+ if (!p) {
+ errno = ENOMEM;
+ goto error;
+ }
+
+ *buf = p;
+ *size = sz;
+ }
+
+ iov.iov_base = *buf;
+ iov.iov_len = sz;
+ if (ptrace(PTRACE_GETREGSET, pid, NT_ARM_SVE, &iov))
+ goto error;
+
+ sve = *buf;
+ if (sve->size <= sz)
+ break;
+
+ sz = sve->size;
+ }
+
+ return sve;
+
+error:
+ return NULL;
+}
+
+static int set_sve(pid_t pid, const struct user_sve_header *sve)
+{
+ struct iovec iov;
+
+ iov.iov_base = (void *)sve;
+ iov.iov_len = sve->size;
+ return ptrace(PTRACE_SETREGSET, pid, NT_ARM_SVE, &iov);
+}
+
+static void dump_sve_regs(const struct user_sve_header *sve, unsigned int num,
+ unsigned int vlmax)
+{
+ unsigned int vq;
+ unsigned int i;
+
+ if ((sve->flags & SVE_PT_REGS_MASK) != SVE_PT_REGS_SVE)
+ ksft_exit_fail_msg("Dumping non-SVE register\n");
+
+ if (vlmax > sve->vl)
+ vlmax = sve->vl;
+
+ vq = sve_vq_from_vl(sve->vl);
+ for (i = 0; i < num; ++i) {
+ printf("# z%u:", i);
+ dump((const char *)sve + SVE_PT_SVE_ZREG_OFFSET(vq, i),
+ vlmax);
+ printf("%s\n", vlmax == sve->vl ? "" : " ...");
+ }
+}
+
+static int do_parent(pid_t child)
+{
+ int ret = EXIT_FAILURE;
+ pid_t pid;
+ int status;
+ siginfo_t si;
+ void *svebuf = NULL, *newsvebuf;
+ size_t svebufsz = 0, newsvebufsz;
+ struct user_sve_header *sve, *new_sve;
+ struct user_fpsimd_state *fpsimd;
+ unsigned int i, j;
+ unsigned char *p;
+ unsigned int vq;
+
+ /* Attach to the child */
+ while (1) {
+ int sig;
+
+ pid = wait(&status);
+ if (pid == -1) {
+ perror("wait");
+ goto error;
+ }
+
+ /*
+ * This should never happen but it's hard to flag in
+ * the framework.
+ */
+ if (pid != child)
+ continue;
+
+ if (WIFEXITED(status) || WIFSIGNALED(status))
+ ksft_exit_fail_msg("Child died unexpectedly\n");
+
+ ksft_test_result(WIFSTOPPED(status), "WIFSTOPPED(%d)\n",
+ status);
+ if (!WIFSTOPPED(status))
+ goto error;
+
+ sig = WSTOPSIG(status);
+
+ if (ptrace(PTRACE_GETSIGINFO, pid, NULL, &si)) {
+ if (errno == ESRCH)
+ goto disappeared;
+
+ if (errno == EINVAL) {
+ sig = 0; /* bust group-stop */
+ goto cont;
+ }
+
+ ksft_test_result_fail("PTRACE_GETSIGINFO: %s\n",
+ strerror(errno));
+ goto error;
+ }
+
+ if (sig == SIGSTOP && si.si_code == SI_TKILL &&
+ si.si_pid == pid)
+ break;
+
+ cont:
+ if (ptrace(PTRACE_CONT, pid, NULL, sig)) {
+ if (errno == ESRCH)
+ goto disappeared;
+
+ ksft_test_result_fail("PTRACE_CONT: %s\n",
+ strerror(errno));
+ goto error;
+ }
+ }
+
+ sve = get_sve(pid, &svebuf, &svebufsz);
+ if (!sve) {
+ int e = errno;
+
+ ksft_test_result_fail("get_sve: %s\n", strerror(errno));
+ if (e == ESRCH)
+ goto disappeared;
+
+ goto error;
+ } else {
+ ksft_test_result_pass("get_sve\n");
+ }
+
+ ksft_test_result((sve->flags & SVE_PT_REGS_MASK) == SVE_PT_REGS_FPSIMD,
+ "FPSIMD registers\n");
+ if ((sve->flags & SVE_PT_REGS_MASK) != SVE_PT_REGS_FPSIMD)
+ goto error;
+
+ fpsimd = (struct user_fpsimd_state *)((char *)sve +
+ SVE_PT_FPSIMD_OFFSET);
+ for (i = 0; i < 32; ++i) {
+ p = (unsigned char *)&fpsimd->vregs[i];
+
+ for (j = 0; j < sizeof fpsimd->vregs[i]; ++j)
+ p[j] = j;
+ }
+
+ if (set_sve(pid, sve)) {
+ int e = errno;
+
+ ksft_test_result_fail("set_sve(FPSIMD): %s\n",
+ strerror(errno));
+ if (e == ESRCH)
+ goto disappeared;
+
+ goto error;
+ }
+
+ vq = sve_vq_from_vl(sve->vl);
+
+ newsvebufsz = SVE_PT_SVE_ZREG_OFFSET(vq, 1);
+ new_sve = newsvebuf = malloc(newsvebufsz);
+ if (!new_sve) {
+ errno = ENOMEM;
+ perror(NULL);
+ goto error;
+ }
+
+ *new_sve = *sve;
+ new_sve->flags &= ~SVE_PT_REGS_MASK;
+ new_sve->flags |= SVE_PT_REGS_SVE;
+ memset((char *)new_sve + SVE_PT_SVE_ZREG_OFFSET(vq, 0),
+ 0, SVE_PT_SVE_ZREG_SIZE(vq));
+ new_sve->size = SVE_PT_SVE_ZREG_OFFSET(vq, 1);
+ if (set_sve(pid, new_sve)) {
+ int e = errno;
+
+ ksft_test_result_fail("set_sve(ZREG): %s\n", strerror(errno));
+ if (e == ESRCH)
+ goto disappeared;
+
+ goto error;
+ }
+
+ new_sve = get_sve(pid, &newsvebuf, &newsvebufsz);
+ if (!new_sve) {
+ int e = errno;
+
+ ksft_test_result_fail("get_sve(ZREG): %s\n", strerror(errno));
+ if (e == ESRCH)
+ goto disappeared;
+
+ goto error;
+ }
+
+ ksft_test_result((new_sve->flags & SVE_PT_REGS_MASK) == SVE_PT_REGS_SVE,
+ "SVE registers\n");
+ if ((new_sve->flags & SVE_PT_REGS_MASK) != SVE_PT_REGS_SVE)
+ goto error;
+
+ dump_sve_regs(new_sve, 3, sizeof fpsimd->vregs[0]);
+
+ p = (unsigned char *)new_sve + SVE_PT_SVE_ZREG_OFFSET(vq, 1);
+ for (i = 0; i < sizeof fpsimd->vregs[0]; ++i) {
+ unsigned char expected = i;
+
+ if (__BYTE_ORDER == __BIG_ENDIAN)
+ expected = sizeof fpsimd->vregs[0] - 1 - expected;
+
+ ksft_test_result(p[i] == expected, "p[%d] == expected\n", i);
+ if (p[i] != expected)
+ goto error;
+ }
+
+ ret = EXIT_SUCCESS;
+
+error:
+ kill(child, SIGKILL);
+
+disappeared:
+ return ret;
+}
+
+int main(void)
+{
+ int ret = EXIT_SUCCESS;
+ __uint128_t v[NR_VREGS];
+ pid_t child;
+
+ ksft_print_header();
+ ksft_set_plan(20);
+
+ if (!(getauxval(AT_HWCAP) & HWCAP_SVE))
+ ksft_exit_skip("SVE not available\n");
+
+ sve_store_patterns(v);
+
+ if (!check_vregs(v))
+ ksft_exit_fail_msg("Initial check_vregs() failed\n");
+
+ child = fork();
+ if (!child)
+ return do_child();
+
+ if (do_parent(child))
+ ret = EXIT_FAILURE;
+
+ ksft_print_cnts();
+
+ return 0;
+}
diff --git a/tools/testing/selftests/arm64/fp/sve-stress b/tools/testing/selftests/arm64/fp/sve-stress
new file mode 100755
index 000000000000..24dd0922cc02
--- /dev/null
+++ b/tools/testing/selftests/arm64/fp/sve-stress
@@ -0,0 +1,59 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0-only
+# Copyright (C) 2015-2019 ARM Limited.
+# Original author: Dave Martin <Dave.Martin@arm.com>
+
+set -ue
+
+NR_CPUS=`nproc`
+
+pids=
+logs=
+
+cleanup () {
+ trap - INT TERM CHLD
+ set +e
+
+ if [ -n "$pids" ]; then
+ kill $pids
+ wait $pids
+ pids=
+ fi
+
+ if [ -n "$logs" ]; then
+ cat $logs
+ rm $logs
+ logs=
+ fi
+}
+
+interrupt () {
+ cleanup
+ exit 0
+}
+
+child_died () {
+ cleanup
+ exit 1
+}
+
+trap interrupt INT TERM EXIT
+
+for x in `seq 0 $((NR_CPUS * 4))`; do
+ log=`mktemp`
+ logs=$logs\ $log
+ ./sve-test >$log &
+ pids=$pids\ $!
+done
+
+# Wait for all child processes to be created:
+sleep 10
+
+while :; do
+ kill -USR1 $pids
+done &
+pids=$pids\ $!
+
+wait
+
+exit 1
diff --git a/tools/testing/selftests/arm64/fp/sve-test.S b/tools/testing/selftests/arm64/fp/sve-test.S
new file mode 100644
index 000000000000..f95074c9b48b
--- /dev/null
+++ b/tools/testing/selftests/arm64/fp/sve-test.S
@@ -0,0 +1,672 @@
+// SPDX-License-Identifier: GPL-2.0-only
+// Copyright (C) 2015-2019 ARM Limited.
+// Original author: Dave Martin <Dave.Martin@arm.com>
+//
+// Simple Scalable Vector Extension context switch test
+// Repeatedly writes unique test patterns into each SVE register
+// and reads them back to verify integrity.
+//
+// for x in `seq 1 NR_CPUS`; do sve-test & pids=$pids\ $! ; done
+// (leave it running for as long as you want...)
+// kill $pids
+
+#include <asm/unistd.h>
+#include "assembler.h"
+#include "asm-offsets.h"
+
+#define NZR 32
+#define NPR 16
+#define MAXVL_B (2048 / 8)
+
+.arch_extension sve
+
+.macro _sve_ldr_v zt, xn
+ ldr z\zt, [x\xn]
+.endm
+
+.macro _sve_str_v zt, xn
+ str z\zt, [x\xn]
+.endm
+
+.macro _sve_ldr_p pt, xn
+ ldr p\pt, [x\xn]
+.endm
+
+.macro _sve_str_p pt, xn
+ str p\pt, [x\xn]
+.endm
+
+// Generate accessor functions to read/write programmatically selected
+// SVE registers.
+// x0 is the register index to access
+// x1 is the memory address to read from (getz,setp) or store to (setz,setp)
+// All clobber x0-x2
+define_accessor setz, NZR, _sve_ldr_v
+define_accessor getz, NZR, _sve_str_v
+define_accessor setp, NPR, _sve_ldr_p
+define_accessor getp, NPR, _sve_str_p
+
+// Print a single character x0 to stdout
+// Clobbers x0-x2,x8
+function putc
+ str x0, [sp, #-16]!
+
+ mov x0, #1 // STDOUT_FILENO
+ mov x1, sp
+ mov x2, #1
+ mov x8, #__NR_write
+ svc #0
+
+ add sp, sp, #16
+ ret
+endfunction
+
+// Print a NUL-terminated string starting at address x0 to stdout
+// Clobbers x0-x3,x8
+function puts
+ mov x1, x0
+
+ mov x2, #0
+0: ldrb w3, [x0], #1
+ cbz w3, 1f
+ add x2, x2, #1
+ b 0b
+
+1: mov w0, #1 // STDOUT_FILENO
+ mov x8, #__NR_write
+ svc #0
+
+ ret
+endfunction
+
+// Utility macro to print a literal string
+// Clobbers x0-x4,x8
+.macro puts string
+ .pushsection .rodata.str1.1, "aMS", 1
+.L__puts_literal\@: .string "\string"
+ .popsection
+
+ ldr x0, =.L__puts_literal\@
+ bl puts
+.endm
+
+// Print an unsigned decimal number x0 to stdout
+// Clobbers x0-x4,x8
+function putdec
+ mov x1, sp
+ str x30, [sp, #-32]! // Result can't be > 20 digits
+
+ mov x2, #0
+ strb w2, [x1, #-1]! // Write the NUL terminator
+
+ mov x2, #10
+0: udiv x3, x0, x2 // div-mod loop to generate the digits
+ msub x0, x3, x2, x0
+ add w0, w0, #'0'
+ strb w0, [x1, #-1]!
+ mov x0, x3
+ cbnz x3, 0b
+
+ ldrb w0, [x1]
+ cbnz w0, 1f
+ mov w0, #'0' // Print "0" for 0, not ""
+ strb w0, [x1, #-1]!
+
+1: mov x0, x1
+ bl puts
+
+ ldr x30, [sp], #32
+ ret
+endfunction
+
+// Print an unsigned decimal number x0 to stdout, followed by a newline
+// Clobbers x0-x5,x8
+function putdecn
+ mov x5, x30
+
+ bl putdec
+ mov x0, #'\n'
+ bl putc
+
+ ret x5
+endfunction
+
+// Clobbers x0-x3,x8
+function puthexb
+ str x30, [sp, #-0x10]!
+
+ mov w3, w0
+ lsr w0, w0, #4
+ bl puthexnibble
+ mov w0, w3
+
+ ldr x30, [sp], #0x10
+ // fall through to puthexnibble
+endfunction
+// Clobbers x0-x2,x8
+function puthexnibble
+ and w0, w0, #0xf
+ cmp w0, #10
+ blo 1f
+ add w0, w0, #'a' - ('9' + 1)
+1: add w0, w0, #'0'
+ b putc
+endfunction
+
+// x0=data in, x1=size in, clobbers x0-x5,x8
+function dumphex
+ str x30, [sp, #-0x10]!
+
+ mov x4, x0
+ mov x5, x1
+
+0: subs x5, x5, #1
+ b.lo 1f
+ ldrb w0, [x4], #1
+ bl puthexb
+ b 0b
+
+1: ldr x30, [sp], #0x10
+ ret
+endfunction
+
+// Declare some storate space to shadow the SVE register contents:
+.pushsection .text
+.data
+.align 4
+zref:
+ .space MAXVL_B * NZR
+pref:
+ .space MAXVL_B / 8 * NPR
+ffrref:
+ .space MAXVL_B / 8
+scratch:
+ .space MAXVL_B
+.popsection
+
+// Trivial memory copy: copy x2 bytes, starting at address x1, to address x0.
+// Clobbers x0-x3
+function memcpy
+ cmp x2, #0
+ b.eq 1f
+0: ldrb w3, [x1], #1
+ strb w3, [x0], #1
+ subs x2, x2, #1
+ b.ne 0b
+1: ret
+endfunction
+
+// Generate a test pattern for storage in SVE registers
+// x0: pid (16 bits)
+// x1: register number (6 bits)
+// x2: generation (4 bits)
+
+// These values are used to constuct a 32-bit pattern that is repeated in the
+// scratch buffer as many times as will fit:
+// bits 31:28 generation number (increments once per test_loop)
+// bits 27:22 32-bit lane index
+// bits 21:16 register number
+// bits 15: 0 pid
+
+function pattern
+ orr w1, w0, w1, lsl #16
+ orr w2, w1, w2, lsl #28
+
+ ldr x0, =scratch
+ mov w1, #MAXVL_B / 4
+
+0: str w2, [x0], #4
+ add w2, w2, #(1 << 22)
+ subs w1, w1, #1
+ bne 0b
+
+ ret
+endfunction
+
+// Get the address of shadow data for SVE Z-register Z<xn>
+.macro _adrz xd, xn, nrtmp
+ ldr \xd, =zref
+ rdvl x\nrtmp, #1
+ madd \xd, x\nrtmp, \xn, \xd
+.endm
+
+// Get the address of shadow data for SVE P-register P<xn - NZR>
+.macro _adrp xd, xn, nrtmp
+ ldr \xd, =pref
+ rdvl x\nrtmp, #1
+ lsr x\nrtmp, x\nrtmp, #3
+ sub \xn, \xn, #NZR
+ madd \xd, x\nrtmp, \xn, \xd
+.endm
+
+// Set up test pattern in a SVE Z-register
+// x0: pid
+// x1: register number
+// x2: generation
+function setup_zreg
+ mov x4, x30
+
+ mov x6, x1
+ bl pattern
+ _adrz x0, x6, 2
+ mov x5, x0
+ ldr x1, =scratch
+ bl memcpy
+
+ mov x0, x6
+ mov x1, x5
+ bl setz
+
+ ret x4
+endfunction
+
+// Set up test pattern in a SVE P-register
+// x0: pid
+// x1: register number
+// x2: generation
+function setup_preg
+ mov x4, x30
+
+ mov x6, x1
+ bl pattern
+ _adrp x0, x6, 2
+ mov x5, x0
+ ldr x1, =scratch
+ bl memcpy
+
+ mov x0, x6
+ mov x1, x5
+ bl setp
+
+ ret x4
+endfunction
+
+// Set up test pattern in the FFR
+// x0: pid
+// x2: generation
+// Beware: corrupts P0.
+function setup_ffr
+ mov x4, x30
+
+ bl pattern
+ ldr x0, =ffrref
+ ldr x1, =scratch
+ rdvl x2, #1
+ lsr x2, x2, #3
+ bl memcpy
+
+ mov x0, #0
+ ldr x1, =ffrref
+ bl setp
+
+ wrffr p0.b
+
+ ret x4
+endfunction
+
+// Fill x1 bytes starting at x0 with 0xae (for canary purposes)
+// Clobbers x1, x2.
+function memfill_ae
+ mov w2, #0xae
+ b memfill
+endfunction
+
+// Fill x1 bytes starting at x0 with 0.
+// Clobbers x1, x2.
+function memclr
+ mov w2, #0
+endfunction
+ // fall through to memfill
+
+// Trivial memory fill: fill x1 bytes starting at address x0 with byte w2
+// Clobbers x1
+function memfill
+ cmp x1, #0
+ b.eq 1f
+
+0: strb w2, [x0], #1
+ subs x1, x1, #1
+ b.ne 0b
+
+1: ret
+endfunction
+
+// Trivial memory compare: compare x2 bytes starting at address x0 with
+// bytes starting at address x1.
+// Returns only if all bytes match; otherwise, the program is aborted.
+// Clobbers x0-x5.
+function memcmp
+ cbz x2, 2f
+
+ stp x0, x1, [sp, #-0x20]!
+ str x2, [sp, #0x10]
+
+ mov x5, #0
+0: ldrb w3, [x0, x5]
+ ldrb w4, [x1, x5]
+ add x5, x5, #1
+ cmp w3, w4
+ b.ne 1f
+ subs x2, x2, #1
+ b.ne 0b
+
+1: ldr x2, [sp, #0x10]
+ ldp x0, x1, [sp], #0x20
+ b.ne barf
+
+2: ret
+endfunction
+
+// Verify that a SVE Z-register matches its shadow in memory, else abort
+// x0: reg number
+// Clobbers x0-x7.
+function check_zreg
+ mov x3, x30
+
+ _adrz x5, x0, 6
+ mov x4, x0
+ ldr x7, =scratch
+
+ mov x0, x7
+ mov x1, x6
+ bl memfill_ae
+
+ mov x0, x4
+ mov x1, x7
+ bl getz
+
+ mov x0, x5
+ mov x1, x7
+ mov x2, x6
+ mov x30, x3
+ b memcmp
+endfunction
+
+// Verify that a SVE P-register matches its shadow in memory, else abort
+// x0: reg number
+// Clobbers x0-x7.
+function check_preg
+ mov x3, x30
+
+ _adrp x5, x0, 6
+ mov x4, x0
+ ldr x7, =scratch
+
+ mov x0, x7
+ mov x1, x6
+ bl memfill_ae
+
+ mov x0, x4
+ mov x1, x7
+ bl getp
+
+ mov x0, x5
+ mov x1, x7
+ mov x2, x6
+ mov x30, x3
+ b memcmp
+endfunction
+
+// Verify that the FFR matches its shadow in memory, else abort
+// Beware -- corrupts P0.
+// Clobbers x0-x5.
+function check_ffr
+ mov x3, x30
+
+ ldr x4, =scratch
+ rdvl x5, #1
+ lsr x5, x5, #3
+
+ mov x0, x4
+ mov x1, x5
+ bl memfill_ae
+
+ rdffr p0.b
+ mov x0, #0
+ mov x1, x4
+ bl getp
+
+ ldr x0, =ffrref
+ mov x1, x4
+ mov x2, x5
+ mov x30, x3
+ b memcmp
+endfunction
+
+// Any SVE register modified here can cause corruption in the main
+// thread -- but *only* the registers modified here.
+function irritator_handler
+ // Increment the irritation signal count (x23):
+ ldr x0, [x2, #ucontext_regs + 8 * 23]
+ add x0, x0, #1
+ str x0, [x2, #ucontext_regs + 8 * 23]
+
+ // Corrupt some random Z-regs
+ adr x0, .text + (irritator_handler - .text) / 16 * 16
+ movi v0.8b, #1
+ movi v9.16b, #2
+ movi v31.8b, #3
+ // And P0
+ rdffr p0.b
+ // And FFR
+ wrffr p15.b
+
+ ret
+endfunction
+
+function terminate_handler
+ mov w21, w0
+ mov x20, x2
+
+ puts "Terminated by signal "
+ mov w0, w21
+ bl putdec
+ puts ", no error, iterations="
+ ldr x0, [x20, #ucontext_regs + 8 * 22]
+ bl putdec
+ puts ", signals="
+ ldr x0, [x20, #ucontext_regs + 8 * 23]
+ bl putdecn
+
+ mov x0, #0
+ mov x8, #__NR_exit
+ svc #0
+endfunction
+
+// w0: signal number
+// x1: sa_action
+// w2: sa_flags
+// Clobbers x0-x6,x8
+function setsignal
+ str x30, [sp, #-((sa_sz + 15) / 16 * 16 + 16)]!
+
+ mov w4, w0
+ mov x5, x1
+ mov w6, w2
+
+ add x0, sp, #16
+ mov x1, #sa_sz
+ bl memclr
+
+ mov w0, w4
+ add x1, sp, #16
+ str w6, [x1, #sa_flags]
+ str x5, [x1, #sa_handler]
+ mov x2, #0
+ mov x3, #sa_mask_sz
+ mov x8, #__NR_rt_sigaction
+ svc #0
+
+ cbz w0, 1f
+
+ puts "sigaction failure\n"
+ b .Labort
+
+1: ldr x30, [sp], #((sa_sz + 15) / 16 * 16 + 16)
+ ret
+endfunction
+
+// Main program entry point
+.globl _start
+function _start
+_start:
+ // Sanity-check and report the vector length
+
+ rdvl x19, #8
+ cmp x19, #128
+ b.lo 1f
+ cmp x19, #2048
+ b.hi 1f
+ tst x19, #(8 - 1)
+ b.eq 2f
+
+1: puts "Bad vector length: "
+ mov x0, x19
+ bl putdecn
+ b .Labort
+
+2: puts "Vector length:\t"
+ mov x0, x19
+ bl putdec
+ puts " bits\n"
+
+ // Obtain our PID, to ensure test pattern uniqueness between processes
+
+ mov x8, #__NR_getpid
+ svc #0
+ mov x20, x0
+
+ puts "PID:\t"
+ mov x0, x20
+ bl putdecn
+
+ mov x23, #0 // Irritation signal count
+
+ mov w0, #SIGINT
+ adr x1, terminate_handler
+ mov w2, #SA_SIGINFO
+ bl setsignal
+
+ mov w0, #SIGTERM
+ adr x1, terminate_handler
+ mov w2, #SA_SIGINFO
+ bl setsignal
+
+ mov w0, #SIGUSR1
+ adr x1, irritator_handler
+ mov w2, #SA_SIGINFO
+ orr w2, w2, #SA_NODEFER
+ bl setsignal
+
+ mov x22, #0 // generation number, increments per iteration
+.Ltest_loop:
+ rdvl x0, #8
+ cmp x0, x19
+ b.ne vl_barf
+
+ mov x21, #0 // Set up Z-regs & shadow with test pattern
+0: mov x0, x20
+ mov x1, x21
+ and x2, x22, #0xf
+ bl setup_zreg
+ add x21, x21, #1
+ cmp x21, #NZR
+ b.lo 0b
+
+ mov x0, x20 // Set up FFR & shadow with test pattern
+ mov x1, #NZR + NPR
+ and x2, x22, #0xf
+ bl setup_ffr
+
+0: mov x0, x20 // Set up P-regs & shadow with test pattern
+ mov x1, x21
+ and x2, x22, #0xf
+ bl setup_preg
+ add x21, x21, #1
+ cmp x21, #NZR + NPR
+ b.lo 0b
+
+// Can't do this when SVE state is volatile across SVC:
+// mov x8, #__NR_sched_yield // Encourage preemption
+// svc #0
+
+ mov x21, #0
+0: mov x0, x21
+ bl check_zreg
+ add x21, x21, #1
+ cmp x21, #NZR
+ b.lo 0b
+
+0: mov x0, x21
+ bl check_preg
+ add x21, x21, #1
+ cmp x21, #NZR + NPR
+ b.lo 0b
+
+ bl check_ffr
+
+ add x22, x22, #1
+ b .Ltest_loop
+
+.Labort:
+ mov x0, #0
+ mov x1, #SIGABRT
+ mov x8, #__NR_kill
+ svc #0
+endfunction
+
+function barf
+// fpsimd.c acitivty log dump hack
+// ldr w0, =0xdeadc0de
+// mov w8, #__NR_exit
+// svc #0
+// end hack
+ mov x10, x0 // expected data
+ mov x11, x1 // actual data
+ mov x12, x2 // data size
+
+ puts "Mistatch: PID="
+ mov x0, x20
+ bl putdec
+ puts ", iteration="
+ mov x0, x22
+ bl putdec
+ puts ", reg="
+ mov x0, x21
+ bl putdecn
+ puts "\tExpected ["
+ mov x0, x10
+ mov x1, x12
+ bl dumphex
+ puts "]\n\tGot ["
+ mov x0, x11
+ mov x1, x12
+ bl dumphex
+ puts "]\n"
+
+ mov x8, #__NR_getpid
+ svc #0
+// fpsimd.c acitivty log dump hack
+// ldr w0, =0xdeadc0de
+// mov w8, #__NR_exit
+// svc #0
+// ^ end of hack
+ mov x1, #SIGABRT
+ mov x8, #__NR_kill
+ svc #0
+// mov x8, #__NR_exit
+// mov x1, #1
+// svc #0
+endfunction
+
+function vl_barf
+ mov x10, x0
+
+ puts "Bad active VL: "
+ mov x0, x10
+ bl putdecn
+
+ mov x8, #__NR_exit
+ mov x1, #1
+ svc #0
+endfunction
diff --git a/tools/testing/selftests/arm64/fp/vlset.c b/tools/testing/selftests/arm64/fp/vlset.c
new file mode 100644
index 000000000000..308d27a68226
--- /dev/null
+++ b/tools/testing/selftests/arm64/fp/vlset.c
@@ -0,0 +1,155 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2015-2019 ARM Limited.
+ * Original author: Dave Martin <Dave.Martin@arm.com>
+ */
+#define _GNU_SOURCE
+#include <assert.h>
+#include <errno.h>
+#include <limits.h>
+#include <stddef.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <getopt.h>
+#include <unistd.h>
+#include <sys/auxv.h>
+#include <sys/prctl.h>
+#include <asm/hwcap.h>
+#include <asm/sigcontext.h>
+
+static int inherit = 0;
+static int no_inherit = 0;
+static int force = 0;
+static unsigned long vl;
+
+static const struct option options[] = {
+ { "force", no_argument, NULL, 'f' },
+ { "inherit", no_argument, NULL, 'i' },
+ { "max", no_argument, NULL, 'M' },
+ { "no-inherit", no_argument, &no_inherit, 1 },
+ { "help", no_argument, NULL, '?' },
+ {}
+};
+
+static char const *program_name;
+
+static int parse_options(int argc, char **argv)
+{
+ int c;
+ char *rest;
+
+ program_name = strrchr(argv[0], '/');
+ if (program_name)
+ ++program_name;
+ else
+ program_name = argv[0];
+
+ while ((c = getopt_long(argc, argv, "Mfhi", options, NULL)) != -1)
+ switch (c) {
+ case 'M': vl = SVE_VL_MAX; break;
+ case 'f': force = 1; break;
+ case 'i': inherit = 1; break;
+ case 0: break;
+ default: goto error;
+ }
+
+ if (inherit && no_inherit)
+ goto error;
+
+ if (!vl) {
+ /* vector length */
+ if (optind >= argc)
+ goto error;
+
+ errno = 0;
+ vl = strtoul(argv[optind], &rest, 0);
+ if (*rest) {
+ vl = ULONG_MAX;
+ errno = EINVAL;
+ }
+ if (vl == ULONG_MAX && errno) {
+ fprintf(stderr, "%s: %s: %s\n",
+ program_name, argv[optind], strerror(errno));
+ goto error;
+ }
+
+ ++optind;
+ }
+
+ /* command */
+ if (optind >= argc)
+ goto error;
+
+ return 0;
+
+error:
+ fprintf(stderr,
+ "Usage: %s [-f | --force] "
+ "[-i | --inherit | --no-inherit] "
+ "{-M | --max | <vector length>} "
+ "<command> [<arguments> ...]\n",
+ program_name);
+ return -1;
+}
+
+int main(int argc, char **argv)
+{
+ int ret = 126; /* same as sh(1) command-not-executable error */
+ long flags;
+ char *path;
+ int t, e;
+
+ if (parse_options(argc, argv))
+ return 2; /* same as sh(1) builtin incorrect-usage */
+
+ if (vl & ~(vl & PR_SVE_VL_LEN_MASK)) {
+ fprintf(stderr, "%s: Invalid vector length %lu\n",
+ program_name, vl);
+ return 2; /* same as sh(1) builtin incorrect-usage */
+ }
+
+ if (!(getauxval(AT_HWCAP) & HWCAP_SVE)) {
+ fprintf(stderr, "%s: Scalable Vector Extension not present\n",
+ program_name);
+
+ if (!force)
+ goto error;
+
+ fputs("Going ahead anyway (--force): "
+ "This is a debug option. Don't rely on it.\n",
+ stderr);
+ }
+
+ flags = PR_SVE_SET_VL_ONEXEC;
+ if (inherit)
+ flags |= PR_SVE_VL_INHERIT;
+
+ t = prctl(PR_SVE_SET_VL, vl | flags);
+ if (t < 0) {
+ fprintf(stderr, "%s: PR_SVE_SET_VL: %s\n",
+ program_name, strerror(errno));
+ goto error;
+ }
+
+ t = prctl(PR_SVE_GET_VL);
+ if (t == -1) {
+ fprintf(stderr, "%s: PR_SVE_GET_VL: %s\n",
+ program_name, strerror(errno));
+ goto error;
+ }
+ flags = PR_SVE_VL_LEN_MASK;
+ flags = t & ~flags;
+
+ assert(optind < argc);
+ path = argv[optind];
+
+ execvp(path, &argv[optind]);
+ e = errno;
+ if (errno == ENOENT)
+ ret = 127; /* same as sh(1) not-found error */
+ fprintf(stderr, "%s: %s: %s\n", program_name, path, strerror(e));
+
+error:
+ return ret; /* same as sh(1) not-executable error */
+}
diff --git a/tools/testing/selftests/arm64/mte/.gitignore b/tools/testing/selftests/arm64/mte/.gitignore
new file mode 100644
index 000000000000..bc3ac63f3314
--- /dev/null
+++ b/tools/testing/selftests/arm64/mte/.gitignore
@@ -0,0 +1,6 @@
+check_buffer_fill
+check_tags_inclusion
+check_child_memory
+check_mmap_options
+check_ksm_options
+check_user_mem
diff --git a/tools/testing/selftests/arm64/mte/Makefile b/tools/testing/selftests/arm64/mte/Makefile
new file mode 100644
index 000000000000..2480226dfe57
--- /dev/null
+++ b/tools/testing/selftests/arm64/mte/Makefile
@@ -0,0 +1,29 @@
+# SPDX-License-Identifier: GPL-2.0
+# Copyright (C) 2020 ARM Limited
+
+CFLAGS += -std=gnu99 -I.
+SRCS := $(filter-out mte_common_util.c,$(wildcard *.c))
+PROGS := $(patsubst %.c,%,$(SRCS))
+
+#Add mte compiler option
+ifneq ($(shell $(CC) --version 2>&1 | head -n 1 | grep gcc),)
+CFLAGS += -march=armv8.5-a+memtag
+endif
+
+#check if the compiler works well
+mte_cc_support := $(shell if ($(CC) $(CFLAGS) -E -x c /dev/null -o /dev/null 2>&1) then echo "1"; fi)
+
+ifeq ($(mte_cc_support),1)
+# Generated binaries to be installed by top KSFT script
+TEST_GEN_PROGS := $(PROGS)
+
+# Get Kernel headers installed and use them.
+KSFT_KHDR_INSTALL := 1
+endif
+
+# Include KSFT lib.mk.
+include ../../lib.mk
+
+ifeq ($(mte_cc_support),1)
+$(TEST_GEN_PROGS): mte_common_util.c mte_common_util.h mte_helper.S
+endif
diff --git a/tools/testing/selftests/arm64/mte/check_buffer_fill.c b/tools/testing/selftests/arm64/mte/check_buffer_fill.c
new file mode 100644
index 000000000000..242635d79035
--- /dev/null
+++ b/tools/testing/selftests/arm64/mte/check_buffer_fill.c
@@ -0,0 +1,475 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (C) 2020 ARM Limited
+
+#define _GNU_SOURCE
+
+#include <stddef.h>
+#include <stdio.h>
+#include <string.h>
+
+#include "kselftest.h"
+#include "mte_common_util.h"
+#include "mte_def.h"
+
+#define OVERFLOW_RANGE MT_GRANULE_SIZE
+
+static int sizes[] = {
+ 1, 555, 1033, MT_GRANULE_SIZE - 1, MT_GRANULE_SIZE,
+ /* page size - 1*/ 0, /* page_size */ 0, /* page size + 1 */ 0
+};
+
+enum mte_block_test_alloc {
+ UNTAGGED_TAGGED,
+ TAGGED_UNTAGGED,
+ TAGGED_TAGGED,
+ BLOCK_ALLOC_MAX,
+};
+
+static int check_buffer_by_byte(int mem_type, int mode)
+{
+ char *ptr;
+ int i, j, item;
+ bool err;
+
+ mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG);
+ item = sizeof(sizes)/sizeof(int);
+
+ for (i = 0; i < item; i++) {
+ ptr = (char *)mte_allocate_memory(sizes[i], mem_type, 0, true);
+ if (check_allocated_memory(ptr, sizes[i], mem_type, true) != KSFT_PASS)
+ return KSFT_FAIL;
+ mte_initialize_current_context(mode, (uintptr_t)ptr, sizes[i]);
+ /* Set some value in tagged memory */
+ for (j = 0; j < sizes[i]; j++)
+ ptr[j] = '1';
+ mte_wait_after_trig();
+ err = cur_mte_cxt.fault_valid;
+ /* Check the buffer whether it is filled. */
+ for (j = 0; j < sizes[i] && !err; j++) {
+ if (ptr[j] != '1')
+ err = true;
+ }
+ mte_free_memory((void *)ptr, sizes[i], mem_type, true);
+
+ if (err)
+ break;
+ }
+ if (!err)
+ return KSFT_PASS;
+ else
+ return KSFT_FAIL;
+}
+
+static int check_buffer_underflow_by_byte(int mem_type, int mode,
+ int underflow_range)
+{
+ char *ptr;
+ int i, j, item, last_index;
+ bool err;
+ char *und_ptr = NULL;
+
+ mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG);
+ item = sizeof(sizes)/sizeof(int);
+ for (i = 0; i < item; i++) {
+ ptr = (char *)mte_allocate_memory_tag_range(sizes[i], mem_type, 0,
+ underflow_range, 0);
+ if (check_allocated_memory_range(ptr, sizes[i], mem_type,
+ underflow_range, 0) != KSFT_PASS)
+ return KSFT_FAIL;
+
+ mte_initialize_current_context(mode, (uintptr_t)ptr, -underflow_range);
+ last_index = 0;
+ /* Set some value in tagged memory and make the buffer underflow */
+ for (j = sizes[i] - 1; (j >= -underflow_range) &&
+ (cur_mte_cxt.fault_valid == false); j--) {
+ ptr[j] = '1';
+ last_index = j;
+ }
+ mte_wait_after_trig();
+ err = false;
+ /* Check whether the buffer is filled */
+ for (j = 0; j < sizes[i]; j++) {
+ if (ptr[j] != '1') {
+ err = true;
+ ksft_print_msg("Buffer is not filled at index:%d of ptr:0x%lx\n",
+ j, ptr);
+ break;
+ }
+ }
+ if (err)
+ goto check_buffer_underflow_by_byte_err;
+
+ switch (mode) {
+ case MTE_NONE_ERR:
+ if (cur_mte_cxt.fault_valid == true || last_index != -underflow_range) {
+ err = true;
+ break;
+ }
+ /* There were no fault so the underflow area should be filled */
+ und_ptr = (char *) MT_CLEAR_TAG((size_t) ptr - underflow_range);
+ for (j = 0 ; j < underflow_range; j++) {
+ if (und_ptr[j] != '1') {
+ err = true;
+ break;
+ }
+ }
+ break;
+ case MTE_ASYNC_ERR:
+ /* Imprecise fault should occur otherwise return error */
+ if (cur_mte_cxt.fault_valid == false) {
+ err = true;
+ break;
+ }
+ /*
+ * The imprecise fault is checked after the write to the buffer,
+ * so the underflow area before the fault should be filled.
+ */
+ und_ptr = (char *) MT_CLEAR_TAG((size_t) ptr);
+ for (j = last_index ; j < 0 ; j++) {
+ if (und_ptr[j] != '1') {
+ err = true;
+ break;
+ }
+ }
+ break;
+ case MTE_SYNC_ERR:
+ /* Precise fault should occur otherwise return error */
+ if (!cur_mte_cxt.fault_valid || (last_index != (-1))) {
+ err = true;
+ break;
+ }
+ /* Underflow area should not be filled */
+ und_ptr = (char *) MT_CLEAR_TAG((size_t) ptr);
+ if (und_ptr[-1] == '1')
+ err = true;
+ break;
+ default:
+ err = true;
+ break;
+ }
+check_buffer_underflow_by_byte_err:
+ mte_free_memory_tag_range((void *)ptr, sizes[i], mem_type, underflow_range, 0);
+ if (err)
+ break;
+ }
+ return (err ? KSFT_FAIL : KSFT_PASS);
+}
+
+static int check_buffer_overflow_by_byte(int mem_type, int mode,
+ int overflow_range)
+{
+ char *ptr;
+ int i, j, item, last_index;
+ bool err;
+ size_t tagged_size, overflow_size;
+ char *over_ptr = NULL;
+
+ mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG);
+ item = sizeof(sizes)/sizeof(int);
+ for (i = 0; i < item; i++) {
+ ptr = (char *)mte_allocate_memory_tag_range(sizes[i], mem_type, 0,
+ 0, overflow_range);
+ if (check_allocated_memory_range(ptr, sizes[i], mem_type,
+ 0, overflow_range) != KSFT_PASS)
+ return KSFT_FAIL;
+
+ tagged_size = MT_ALIGN_UP(sizes[i]);
+
+ mte_initialize_current_context(mode, (uintptr_t)ptr, sizes[i] + overflow_range);
+
+ /* Set some value in tagged memory and make the buffer underflow */
+ for (j = 0, last_index = 0 ; (j < (sizes[i] + overflow_range)) &&
+ (cur_mte_cxt.fault_valid == false); j++) {
+ ptr[j] = '1';
+ last_index = j;
+ }
+ mte_wait_after_trig();
+ err = false;
+ /* Check whether the buffer is filled */
+ for (j = 0; j < sizes[i]; j++) {
+ if (ptr[j] != '1') {
+ err = true;
+ ksft_print_msg("Buffer is not filled at index:%d of ptr:0x%lx\n",
+ j, ptr);
+ break;
+ }
+ }
+ if (err)
+ goto check_buffer_overflow_by_byte_err;
+
+ overflow_size = overflow_range - (tagged_size - sizes[i]);
+
+ switch (mode) {
+ case MTE_NONE_ERR:
+ if ((cur_mte_cxt.fault_valid == true) ||
+ (last_index != (sizes[i] + overflow_range - 1))) {
+ err = true;
+ break;
+ }
+ /* There were no fault so the overflow area should be filled */
+ over_ptr = (char *) MT_CLEAR_TAG((size_t) ptr + tagged_size);
+ for (j = 0 ; j < overflow_size; j++) {
+ if (over_ptr[j] != '1') {
+ err = true;
+ break;
+ }
+ }
+ break;
+ case MTE_ASYNC_ERR:
+ /* Imprecise fault should occur otherwise return error */
+ if (cur_mte_cxt.fault_valid == false) {
+ err = true;
+ break;
+ }
+ /*
+ * The imprecise fault is checked after the write to the buffer,
+ * so the overflow area should be filled before the fault.
+ */
+ over_ptr = (char *) MT_CLEAR_TAG((size_t) ptr);
+ for (j = tagged_size ; j < last_index; j++) {
+ if (over_ptr[j] != '1') {
+ err = true;
+ break;
+ }
+ }
+ break;
+ case MTE_SYNC_ERR:
+ /* Precise fault should occur otherwise return error */
+ if (!cur_mte_cxt.fault_valid || (last_index != tagged_size)) {
+ err = true;
+ break;
+ }
+ /* Underflow area should not be filled */
+ over_ptr = (char *) MT_CLEAR_TAG((size_t) ptr + tagged_size);
+ for (j = 0 ; j < overflow_size; j++) {
+ if (over_ptr[j] == '1')
+ err = true;
+ }
+ break;
+ default:
+ err = true;
+ break;
+ }
+check_buffer_overflow_by_byte_err:
+ mte_free_memory_tag_range((void *)ptr, sizes[i], mem_type, 0, overflow_range);
+ if (err)
+ break;
+ }
+ return (err ? KSFT_FAIL : KSFT_PASS);
+}
+
+static int check_buffer_by_block_iterate(int mem_type, int mode, size_t size)
+{
+ char *src, *dst;
+ int j, result = KSFT_PASS;
+ enum mte_block_test_alloc alloc_type = UNTAGGED_TAGGED;
+
+ for (alloc_type = UNTAGGED_TAGGED; alloc_type < (int) BLOCK_ALLOC_MAX; alloc_type++) {
+ switch (alloc_type) {
+ case UNTAGGED_TAGGED:
+ src = (char *)mte_allocate_memory(size, mem_type, 0, false);
+ if (check_allocated_memory(src, size, mem_type, false) != KSFT_PASS)
+ return KSFT_FAIL;
+
+ dst = (char *)mte_allocate_memory(size, mem_type, 0, true);
+ if (check_allocated_memory(dst, size, mem_type, true) != KSFT_PASS) {
+ mte_free_memory((void *)src, size, mem_type, false);
+ return KSFT_FAIL;
+ }
+
+ break;
+ case TAGGED_UNTAGGED:
+ dst = (char *)mte_allocate_memory(size, mem_type, 0, false);
+ if (check_allocated_memory(dst, size, mem_type, false) != KSFT_PASS)
+ return KSFT_FAIL;
+
+ src = (char *)mte_allocate_memory(size, mem_type, 0, true);
+ if (check_allocated_memory(src, size, mem_type, true) != KSFT_PASS) {
+ mte_free_memory((void *)dst, size, mem_type, false);
+ return KSFT_FAIL;
+ }
+ break;
+ case TAGGED_TAGGED:
+ src = (char *)mte_allocate_memory(size, mem_type, 0, true);
+ if (check_allocated_memory(src, size, mem_type, true) != KSFT_PASS)
+ return KSFT_FAIL;
+
+ dst = (char *)mte_allocate_memory(size, mem_type, 0, true);
+ if (check_allocated_memory(dst, size, mem_type, true) != KSFT_PASS) {
+ mte_free_memory((void *)src, size, mem_type, true);
+ return KSFT_FAIL;
+ }
+ break;
+ default:
+ return KSFT_FAIL;
+ }
+
+ cur_mte_cxt.fault_valid = false;
+ result = KSFT_PASS;
+ mte_initialize_current_context(mode, (uintptr_t)dst, size);
+ /* Set some value in memory and copy*/
+ memset((void *)src, (int)'1', size);
+ memcpy((void *)dst, (void *)src, size);
+ mte_wait_after_trig();
+ if (cur_mte_cxt.fault_valid) {
+ result = KSFT_FAIL;
+ goto check_buffer_by_block_err;
+ }
+ /* Check the buffer whether it is filled. */
+ for (j = 0; j < size; j++) {
+ if (src[j] != dst[j] || src[j] != '1') {
+ result = KSFT_FAIL;
+ break;
+ }
+ }
+check_buffer_by_block_err:
+ mte_free_memory((void *)src, size, mem_type,
+ MT_FETCH_TAG((uintptr_t)src) ? true : false);
+ mte_free_memory((void *)dst, size, mem_type,
+ MT_FETCH_TAG((uintptr_t)dst) ? true : false);
+ if (result != KSFT_PASS)
+ return result;
+ }
+ return result;
+}
+
+static int check_buffer_by_block(int mem_type, int mode)
+{
+ int i, item, result = KSFT_PASS;
+
+ mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG);
+ item = sizeof(sizes)/sizeof(int);
+ cur_mte_cxt.fault_valid = false;
+ for (i = 0; i < item; i++) {
+ result = check_buffer_by_block_iterate(mem_type, mode, sizes[i]);
+ if (result != KSFT_PASS)
+ break;
+ }
+ return result;
+}
+
+static int compare_memory_tags(char *ptr, size_t size, int tag)
+{
+ int i, new_tag;
+
+ for (i = 0 ; i < size ; i += MT_GRANULE_SIZE) {
+ new_tag = MT_FETCH_TAG((uintptr_t)(mte_get_tag_address(ptr + i)));
+ if (tag != new_tag) {
+ ksft_print_msg("FAIL: child mte tag mismatch\n");
+ return KSFT_FAIL;
+ }
+ }
+ return KSFT_PASS;
+}
+
+static int check_memory_initial_tags(int mem_type, int mode, int mapping)
+{
+ char *ptr;
+ int run, fd;
+ int total = sizeof(sizes)/sizeof(int);
+
+ mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG);
+ for (run = 0; run < total; run++) {
+ /* check initial tags for anonymous mmap */
+ ptr = (char *)mte_allocate_memory(sizes[run], mem_type, mapping, false);
+ if (check_allocated_memory(ptr, sizes[run], mem_type, false) != KSFT_PASS)
+ return KSFT_FAIL;
+ if (compare_memory_tags(ptr, sizes[run], 0) != KSFT_PASS) {
+ mte_free_memory((void *)ptr, sizes[run], mem_type, false);
+ return KSFT_FAIL;
+ }
+ mte_free_memory((void *)ptr, sizes[run], mem_type, false);
+
+ /* check initial tags for file mmap */
+ fd = create_temp_file();
+ if (fd == -1)
+ return KSFT_FAIL;
+ ptr = (char *)mte_allocate_file_memory(sizes[run], mem_type, mapping, false, fd);
+ if (check_allocated_memory(ptr, sizes[run], mem_type, false) != KSFT_PASS) {
+ close(fd);
+ return KSFT_FAIL;
+ }
+ if (compare_memory_tags(ptr, sizes[run], 0) != KSFT_PASS) {
+ mte_free_memory((void *)ptr, sizes[run], mem_type, false);
+ close(fd);
+ return KSFT_FAIL;
+ }
+ mte_free_memory((void *)ptr, sizes[run], mem_type, false);
+ close(fd);
+ }
+ return KSFT_PASS;
+}
+
+int main(int argc, char *argv[])
+{
+ int err;
+ size_t page_size = getpagesize();
+ int item = sizeof(sizes)/sizeof(int);
+
+ sizes[item - 3] = page_size - 1;
+ sizes[item - 2] = page_size;
+ sizes[item - 1] = page_size + 1;
+
+ err = mte_default_setup();
+ if (err)
+ return err;
+
+ /* Register SIGSEGV handler */
+ mte_register_signal(SIGSEGV, mte_default_handler);
+
+ /* Buffer by byte tests */
+ evaluate_test(check_buffer_by_byte(USE_MMAP, MTE_SYNC_ERR),
+ "Check buffer correctness by byte with sync err mode and mmap memory\n");
+ evaluate_test(check_buffer_by_byte(USE_MMAP, MTE_ASYNC_ERR),
+ "Check buffer correctness by byte with async err mode and mmap memory\n");
+ evaluate_test(check_buffer_by_byte(USE_MPROTECT, MTE_SYNC_ERR),
+ "Check buffer correctness by byte with sync err mode and mmap/mprotect memory\n");
+ evaluate_test(check_buffer_by_byte(USE_MPROTECT, MTE_ASYNC_ERR),
+ "Check buffer correctness by byte with async err mode and mmap/mprotect memory\n");
+
+ /* Check buffer underflow with underflow size as 16 */
+ evaluate_test(check_buffer_underflow_by_byte(USE_MMAP, MTE_SYNC_ERR, MT_GRANULE_SIZE),
+ "Check buffer write underflow by byte with sync mode and mmap memory\n");
+ evaluate_test(check_buffer_underflow_by_byte(USE_MMAP, MTE_ASYNC_ERR, MT_GRANULE_SIZE),
+ "Check buffer write underflow by byte with async mode and mmap memory\n");
+ evaluate_test(check_buffer_underflow_by_byte(USE_MMAP, MTE_NONE_ERR, MT_GRANULE_SIZE),
+ "Check buffer write underflow by byte with tag check fault ignore and mmap memory\n");
+
+ /* Check buffer underflow with underflow size as page size */
+ evaluate_test(check_buffer_underflow_by_byte(USE_MMAP, MTE_SYNC_ERR, page_size),
+ "Check buffer write underflow by byte with sync mode and mmap memory\n");
+ evaluate_test(check_buffer_underflow_by_byte(USE_MMAP, MTE_ASYNC_ERR, page_size),
+ "Check buffer write underflow by byte with async mode and mmap memory\n");
+ evaluate_test(check_buffer_underflow_by_byte(USE_MMAP, MTE_NONE_ERR, page_size),
+ "Check buffer write underflow by byte with tag check fault ignore and mmap memory\n");
+
+ /* Check buffer overflow with overflow size as 16 */
+ evaluate_test(check_buffer_overflow_by_byte(USE_MMAP, MTE_SYNC_ERR, MT_GRANULE_SIZE),
+ "Check buffer write overflow by byte with sync mode and mmap memory\n");
+ evaluate_test(check_buffer_overflow_by_byte(USE_MMAP, MTE_ASYNC_ERR, MT_GRANULE_SIZE),
+ "Check buffer write overflow by byte with async mode and mmap memory\n");
+ evaluate_test(check_buffer_overflow_by_byte(USE_MMAP, MTE_NONE_ERR, MT_GRANULE_SIZE),
+ "Check buffer write overflow by byte with tag fault ignore mode and mmap memory\n");
+
+ /* Buffer by block tests */
+ evaluate_test(check_buffer_by_block(USE_MMAP, MTE_SYNC_ERR),
+ "Check buffer write correctness by block with sync mode and mmap memory\n");
+ evaluate_test(check_buffer_by_block(USE_MMAP, MTE_ASYNC_ERR),
+ "Check buffer write correctness by block with async mode and mmap memory\n");
+ evaluate_test(check_buffer_by_block(USE_MMAP, MTE_NONE_ERR),
+ "Check buffer write correctness by block with tag fault ignore and mmap memory\n");
+
+ /* Initial tags are supposed to be 0 */
+ evaluate_test(check_memory_initial_tags(USE_MMAP, MTE_SYNC_ERR, MAP_PRIVATE),
+ "Check initial tags with private mapping, sync error mode and mmap memory\n");
+ evaluate_test(check_memory_initial_tags(USE_MPROTECT, MTE_SYNC_ERR, MAP_PRIVATE),
+ "Check initial tags with private mapping, sync error mode and mmap/mprotect memory\n");
+ evaluate_test(check_memory_initial_tags(USE_MMAP, MTE_SYNC_ERR, MAP_SHARED),
+ "Check initial tags with shared mapping, sync error mode and mmap memory\n");
+ evaluate_test(check_memory_initial_tags(USE_MPROTECT, MTE_SYNC_ERR, MAP_SHARED),
+ "Check initial tags with shared mapping, sync error mode and mmap/mprotect memory\n");
+
+ mte_restore_setup();
+ ksft_print_cnts();
+ return ksft_get_fail_cnt() == 0 ? KSFT_PASS : KSFT_FAIL;
+}
diff --git a/tools/testing/selftests/arm64/mte/check_child_memory.c b/tools/testing/selftests/arm64/mte/check_child_memory.c
new file mode 100644
index 000000000000..97bebdecd29e
--- /dev/null
+++ b/tools/testing/selftests/arm64/mte/check_child_memory.c
@@ -0,0 +1,195 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (C) 2020 ARM Limited
+
+#define _GNU_SOURCE
+
+#include <errno.h>
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <ucontext.h>
+#include <sys/wait.h>
+
+#include "kselftest.h"
+#include "mte_common_util.h"
+#include "mte_def.h"
+
+#define BUFFER_SIZE (5 * MT_GRANULE_SIZE)
+#define RUNS (MT_TAG_COUNT)
+#define UNDERFLOW MT_GRANULE_SIZE
+#define OVERFLOW MT_GRANULE_SIZE
+
+static size_t page_size;
+static int sizes[] = {
+ 1, 537, 989, 1269, MT_GRANULE_SIZE - 1, MT_GRANULE_SIZE,
+ /* page size - 1*/ 0, /* page_size */ 0, /* page size + 1 */ 0
+};
+
+static int check_child_tag_inheritance(char *ptr, int size, int mode)
+{
+ int i, parent_tag, child_tag, fault, child_status;
+ pid_t child;
+
+ parent_tag = MT_FETCH_TAG((uintptr_t)ptr);
+ fault = 0;
+
+ child = fork();
+ if (child == -1) {
+ ksft_print_msg("FAIL: child process creation\n");
+ return KSFT_FAIL;
+ } else if (child == 0) {
+ mte_initialize_current_context(mode, (uintptr_t)ptr, size);
+ /* Do copy on write */
+ memset(ptr, '1', size);
+ mte_wait_after_trig();
+ if (cur_mte_cxt.fault_valid == true) {
+ fault = 1;
+ goto check_child_tag_inheritance_err;
+ }
+ for (i = 0 ; i < size ; i += MT_GRANULE_SIZE) {
+ child_tag = MT_FETCH_TAG((uintptr_t)(mte_get_tag_address(ptr + i)));
+ if (parent_tag != child_tag) {
+ ksft_print_msg("FAIL: child mte tag mismatch\n");
+ fault = 1;
+ goto check_child_tag_inheritance_err;
+ }
+ }
+ mte_initialize_current_context(mode, (uintptr_t)ptr, -UNDERFLOW);
+ memset(ptr - UNDERFLOW, '2', UNDERFLOW);
+ mte_wait_after_trig();
+ if (cur_mte_cxt.fault_valid == false) {
+ fault = 1;
+ goto check_child_tag_inheritance_err;
+ }
+ mte_initialize_current_context(mode, (uintptr_t)ptr, size + OVERFLOW);
+ memset(ptr + size, '3', OVERFLOW);
+ mte_wait_after_trig();
+ if (cur_mte_cxt.fault_valid == false) {
+ fault = 1;
+ goto check_child_tag_inheritance_err;
+ }
+check_child_tag_inheritance_err:
+ _exit(fault);
+ }
+ /* Wait for child process to terminate */
+ wait(&child_status);
+ if (WIFEXITED(child_status))
+ fault = WEXITSTATUS(child_status);
+ else
+ fault = 1;
+ return (fault) ? KSFT_FAIL : KSFT_PASS;
+}
+
+static int check_child_memory_mapping(int mem_type, int mode, int mapping)
+{
+ char *ptr;
+ int run, result;
+ int item = sizeof(sizes)/sizeof(int);
+
+ item = sizeof(sizes)/sizeof(int);
+ mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG);
+ for (run = 0; run < item; run++) {
+ ptr = (char *)mte_allocate_memory_tag_range(sizes[run], mem_type, mapping,
+ UNDERFLOW, OVERFLOW);
+ if (check_allocated_memory_range(ptr, sizes[run], mem_type,
+ UNDERFLOW, OVERFLOW) != KSFT_PASS)
+ return KSFT_FAIL;
+ result = check_child_tag_inheritance(ptr, sizes[run], mode);
+ mte_free_memory_tag_range((void *)ptr, sizes[run], mem_type, UNDERFLOW, OVERFLOW);
+ if (result == KSFT_FAIL)
+ return result;
+ }
+ return KSFT_PASS;
+}
+
+static int check_child_file_mapping(int mem_type, int mode, int mapping)
+{
+ char *ptr, *map_ptr;
+ int run, fd, map_size, result = KSFT_PASS;
+ int total = sizeof(sizes)/sizeof(int);
+
+ mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG);
+ for (run = 0; run < total; run++) {
+ fd = create_temp_file();
+ if (fd == -1)
+ return KSFT_FAIL;
+
+ map_size = sizes[run] + OVERFLOW + UNDERFLOW;
+ map_ptr = (char *)mte_allocate_file_memory(map_size, mem_type, mapping, false, fd);
+ if (check_allocated_memory(map_ptr, map_size, mem_type, false) != KSFT_PASS) {
+ close(fd);
+ return KSFT_FAIL;
+ }
+ ptr = map_ptr + UNDERFLOW;
+ mte_initialize_current_context(mode, (uintptr_t)ptr, sizes[run]);
+ /* Only mte enabled memory will allow tag insertion */
+ ptr = mte_insert_tags((void *)ptr, sizes[run]);
+ if (!ptr || cur_mte_cxt.fault_valid == true) {
+ ksft_print_msg("FAIL: Insert tags on file based memory\n");
+ munmap((void *)map_ptr, map_size);
+ close(fd);
+ return KSFT_FAIL;
+ }
+ result = check_child_tag_inheritance(ptr, sizes[run], mode);
+ mte_clear_tags((void *)ptr, sizes[run]);
+ munmap((void *)map_ptr, map_size);
+ close(fd);
+ if (result != KSFT_PASS)
+ return KSFT_FAIL;
+ }
+ return KSFT_PASS;
+}
+
+int main(int argc, char *argv[])
+{
+ int err;
+ int item = sizeof(sizes)/sizeof(int);
+
+ page_size = getpagesize();
+ if (!page_size) {
+ ksft_print_msg("ERR: Unable to get page size\n");
+ return KSFT_FAIL;
+ }
+ sizes[item - 3] = page_size - 1;
+ sizes[item - 2] = page_size;
+ sizes[item - 1] = page_size + 1;
+
+ err = mte_default_setup();
+ if (err)
+ return err;
+
+ /* Register SIGSEGV handler */
+ mte_register_signal(SIGSEGV, mte_default_handler);
+ mte_register_signal(SIGBUS, mte_default_handler);
+
+ evaluate_test(check_child_memory_mapping(USE_MMAP, MTE_SYNC_ERR, MAP_PRIVATE),
+ "Check child anonymous memory with private mapping, precise mode and mmap memory\n");
+ evaluate_test(check_child_memory_mapping(USE_MMAP, MTE_SYNC_ERR, MAP_SHARED),
+ "Check child anonymous memory with shared mapping, precise mode and mmap memory\n");
+ evaluate_test(check_child_memory_mapping(USE_MMAP, MTE_ASYNC_ERR, MAP_PRIVATE),
+ "Check child anonymous memory with private mapping, imprecise mode and mmap memory\n");
+ evaluate_test(check_child_memory_mapping(USE_MMAP, MTE_ASYNC_ERR, MAP_SHARED),
+ "Check child anonymous memory with shared mapping, imprecise mode and mmap memory\n");
+ evaluate_test(check_child_memory_mapping(USE_MPROTECT, MTE_SYNC_ERR, MAP_PRIVATE),
+ "Check child anonymous memory with private mapping, precise mode and mmap/mprotect memory\n");
+ evaluate_test(check_child_memory_mapping(USE_MPROTECT, MTE_SYNC_ERR, MAP_SHARED),
+ "Check child anonymous memory with shared mapping, precise mode and mmap/mprotect memory\n");
+
+ evaluate_test(check_child_file_mapping(USE_MMAP, MTE_SYNC_ERR, MAP_PRIVATE),
+ "Check child file memory with private mapping, precise mode and mmap memory\n");
+ evaluate_test(check_child_file_mapping(USE_MMAP, MTE_SYNC_ERR, MAP_SHARED),
+ "Check child file memory with shared mapping, precise mode and mmap memory\n");
+ evaluate_test(check_child_memory_mapping(USE_MMAP, MTE_ASYNC_ERR, MAP_PRIVATE),
+ "Check child file memory with private mapping, imprecise mode and mmap memory\n");
+ evaluate_test(check_child_memory_mapping(USE_MMAP, MTE_ASYNC_ERR, MAP_SHARED),
+ "Check child file memory with shared mapping, imprecise mode and mmap memory\n");
+ evaluate_test(check_child_memory_mapping(USE_MPROTECT, MTE_SYNC_ERR, MAP_PRIVATE),
+ "Check child file memory with private mapping, precise mode and mmap/mprotect memory\n");
+ evaluate_test(check_child_memory_mapping(USE_MPROTECT, MTE_SYNC_ERR, MAP_SHARED),
+ "Check child file memory with shared mapping, precise mode and mmap/mprotect memory\n");
+
+ mte_restore_setup();
+ ksft_print_cnts();
+ return ksft_get_fail_cnt() == 0 ? KSFT_PASS : KSFT_FAIL;
+}
diff --git a/tools/testing/selftests/arm64/mte/check_ksm_options.c b/tools/testing/selftests/arm64/mte/check_ksm_options.c
new file mode 100644
index 000000000000..bc41ae630c86
--- /dev/null
+++ b/tools/testing/selftests/arm64/mte/check_ksm_options.c
@@ -0,0 +1,159 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (C) 2020 ARM Limited
+
+#define _GNU_SOURCE
+
+#include <errno.h>
+#include <fcntl.h>
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <ucontext.h>
+#include <sys/mman.h>
+
+#include "kselftest.h"
+#include "mte_common_util.h"
+#include "mte_def.h"
+
+#define TEST_UNIT 10
+#define PATH_KSM "/sys/kernel/mm/ksm/"
+#define MAX_LOOP 4
+
+static size_t page_sz;
+static unsigned long ksm_sysfs[5];
+
+static unsigned long read_sysfs(char *str)
+{
+ FILE *f;
+ unsigned long val = 0;
+
+ f = fopen(str, "r");
+ if (!f) {
+ ksft_print_msg("ERR: missing %s\n", str);
+ return 0;
+ }
+ fscanf(f, "%lu", &val);
+ fclose(f);
+ return val;
+}
+
+static void write_sysfs(char *str, unsigned long val)
+{
+ FILE *f;
+
+ f = fopen(str, "w");
+ if (!f) {
+ ksft_print_msg("ERR: missing %s\n", str);
+ return;
+ }
+ fprintf(f, "%lu", val);
+ fclose(f);
+}
+
+static void mte_ksm_setup(void)
+{
+ ksm_sysfs[0] = read_sysfs(PATH_KSM "merge_across_nodes");
+ write_sysfs(PATH_KSM "merge_across_nodes", 1);
+ ksm_sysfs[1] = read_sysfs(PATH_KSM "sleep_millisecs");
+ write_sysfs(PATH_KSM "sleep_millisecs", 0);
+ ksm_sysfs[2] = read_sysfs(PATH_KSM "run");
+ write_sysfs(PATH_KSM "run", 1);
+ ksm_sysfs[3] = read_sysfs(PATH_KSM "max_page_sharing");
+ write_sysfs(PATH_KSM "max_page_sharing", ksm_sysfs[3] + TEST_UNIT);
+ ksm_sysfs[4] = read_sysfs(PATH_KSM "pages_to_scan");
+ write_sysfs(PATH_KSM "pages_to_scan", ksm_sysfs[4] + TEST_UNIT);
+}
+
+static void mte_ksm_restore(void)
+{
+ write_sysfs(PATH_KSM "merge_across_nodes", ksm_sysfs[0]);
+ write_sysfs(PATH_KSM "sleep_millisecs", ksm_sysfs[1]);
+ write_sysfs(PATH_KSM "run", ksm_sysfs[2]);
+ write_sysfs(PATH_KSM "max_page_sharing", ksm_sysfs[3]);
+ write_sysfs(PATH_KSM "pages_to_scan", ksm_sysfs[4]);
+}
+
+static void mte_ksm_scan(void)
+{
+ int cur_count = read_sysfs(PATH_KSM "full_scans");
+ int scan_count = cur_count + 1;
+ int max_loop_count = MAX_LOOP;
+
+ while ((cur_count < scan_count) && max_loop_count) {
+ sleep(1);
+ cur_count = read_sysfs(PATH_KSM "full_scans");
+ max_loop_count--;
+ }
+#ifdef DEBUG
+ ksft_print_msg("INFO: pages_shared=%lu pages_sharing=%lu\n",
+ read_sysfs(PATH_KSM "pages_shared"),
+ read_sysfs(PATH_KSM "pages_sharing"));
+#endif
+}
+
+static int check_madvise_options(int mem_type, int mode, int mapping)
+{
+ char *ptr;
+ int err, ret;
+
+ err = KSFT_FAIL;
+ if (access(PATH_KSM, F_OK) == -1) {
+ ksft_print_msg("ERR: Kernel KSM config not enabled\n");
+ return err;
+ }
+
+ mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG);
+ ptr = mte_allocate_memory(TEST_UNIT * page_sz, mem_type, mapping, true);
+ if (check_allocated_memory(ptr, TEST_UNIT * page_sz, mem_type, false) != KSFT_PASS)
+ return KSFT_FAIL;
+
+ /* Insert same data in all the pages */
+ memset(ptr, 'A', TEST_UNIT * page_sz);
+ ret = madvise(ptr, TEST_UNIT * page_sz, MADV_MERGEABLE);
+ if (ret) {
+ ksft_print_msg("ERR: madvise failed to set MADV_UNMERGEABLE\n");
+ goto madvise_err;
+ }
+ mte_ksm_scan();
+ /* Tagged pages should not merge */
+ if ((read_sysfs(PATH_KSM "pages_shared") < 1) ||
+ (read_sysfs(PATH_KSM "pages_sharing") < (TEST_UNIT - 1)))
+ err = KSFT_PASS;
+madvise_err:
+ mte_free_memory(ptr, TEST_UNIT * page_sz, mem_type, true);
+ return err;
+}
+
+int main(int argc, char *argv[])
+{
+ int err;
+
+ err = mte_default_setup();
+ if (err)
+ return err;
+ page_sz = getpagesize();
+ if (!page_sz) {
+ ksft_print_msg("ERR: Unable to get page size\n");
+ return KSFT_FAIL;
+ }
+ /* Register signal handlers */
+ mte_register_signal(SIGBUS, mte_default_handler);
+ mte_register_signal(SIGSEGV, mte_default_handler);
+ /* Enable KSM */
+ mte_ksm_setup();
+
+ evaluate_test(check_madvise_options(USE_MMAP, MTE_SYNC_ERR, MAP_PRIVATE),
+ "Check KSM mte page merge for private mapping, sync mode and mmap memory\n");
+ evaluate_test(check_madvise_options(USE_MMAP, MTE_ASYNC_ERR, MAP_PRIVATE),
+ "Check KSM mte page merge for private mapping, async mode and mmap memory\n");
+ evaluate_test(check_madvise_options(USE_MMAP, MTE_SYNC_ERR, MAP_SHARED),
+ "Check KSM mte page merge for shared mapping, sync mode and mmap memory\n");
+ evaluate_test(check_madvise_options(USE_MMAP, MTE_ASYNC_ERR, MAP_SHARED),
+ "Check KSM mte page merge for shared mapping, async mode and mmap memory\n");
+
+ mte_ksm_restore();
+ mte_restore_setup();
+ ksft_print_cnts();
+ return ksft_get_fail_cnt() == 0 ? KSFT_PASS : KSFT_FAIL;
+}
diff --git a/tools/testing/selftests/arm64/mte/check_mmap_options.c b/tools/testing/selftests/arm64/mte/check_mmap_options.c
new file mode 100644
index 000000000000..33b13b86199b
--- /dev/null
+++ b/tools/testing/selftests/arm64/mte/check_mmap_options.c
@@ -0,0 +1,262 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (C) 2020 ARM Limited
+
+#define _GNU_SOURCE
+
+#include <errno.h>
+#include <fcntl.h>
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <ucontext.h>
+#include <sys/mman.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+
+#include "kselftest.h"
+#include "mte_common_util.h"
+#include "mte_def.h"
+
+#define RUNS (MT_TAG_COUNT)
+#define UNDERFLOW MT_GRANULE_SIZE
+#define OVERFLOW MT_GRANULE_SIZE
+#define TAG_CHECK_ON 0
+#define TAG_CHECK_OFF 1
+
+static size_t page_size;
+static int sizes[] = {
+ 1, 537, 989, 1269, MT_GRANULE_SIZE - 1, MT_GRANULE_SIZE,
+ /* page size - 1*/ 0, /* page_size */ 0, /* page size + 1 */ 0
+};
+
+static int check_mte_memory(char *ptr, int size, int mode, int tag_check)
+{
+ mte_initialize_current_context(mode, (uintptr_t)ptr, size);
+ memset(ptr, '1', size);
+ mte_wait_after_trig();
+ if (cur_mte_cxt.fault_valid == true)
+ return KSFT_FAIL;
+
+ mte_initialize_current_context(mode, (uintptr_t)ptr, -UNDERFLOW);
+ memset(ptr - UNDERFLOW, '2', UNDERFLOW);
+ mte_wait_after_trig();
+ if (cur_mte_cxt.fault_valid == false && tag_check == TAG_CHECK_ON)
+ return KSFT_FAIL;
+ if (cur_mte_cxt.fault_valid == true && tag_check == TAG_CHECK_OFF)
+ return KSFT_FAIL;
+
+ mte_initialize_current_context(mode, (uintptr_t)ptr, size + OVERFLOW);
+ memset(ptr + size, '3', OVERFLOW);
+ mte_wait_after_trig();
+ if (cur_mte_cxt.fault_valid == false && tag_check == TAG_CHECK_ON)
+ return KSFT_FAIL;
+ if (cur_mte_cxt.fault_valid == true && tag_check == TAG_CHECK_OFF)
+ return KSFT_FAIL;
+
+ return KSFT_PASS;
+}
+
+static int check_anonymous_memory_mapping(int mem_type, int mode, int mapping, int tag_check)
+{
+ char *ptr, *map_ptr;
+ int run, result, map_size;
+ int item = sizeof(sizes)/sizeof(int);
+
+ item = sizeof(sizes)/sizeof(int);
+ mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG);
+ for (run = 0; run < item; run++) {
+ map_size = sizes[run] + OVERFLOW + UNDERFLOW;
+ map_ptr = (char *)mte_allocate_memory(map_size, mem_type, mapping, false);
+ if (check_allocated_memory(map_ptr, map_size, mem_type, false) != KSFT_PASS)
+ return KSFT_FAIL;
+
+ ptr = map_ptr + UNDERFLOW;
+ mte_initialize_current_context(mode, (uintptr_t)ptr, sizes[run]);
+ /* Only mte enabled memory will allow tag insertion */
+ ptr = mte_insert_tags((void *)ptr, sizes[run]);
+ if (!ptr || cur_mte_cxt.fault_valid == true) {
+ ksft_print_msg("FAIL: Insert tags on anonymous mmap memory\n");
+ munmap((void *)map_ptr, map_size);
+ return KSFT_FAIL;
+ }
+ result = check_mte_memory(ptr, sizes[run], mode, tag_check);
+ mte_clear_tags((void *)ptr, sizes[run]);
+ mte_free_memory((void *)map_ptr, map_size, mem_type, false);
+ if (result == KSFT_FAIL)
+ return KSFT_FAIL;
+ }
+ return KSFT_PASS;
+}
+
+static int check_file_memory_mapping(int mem_type, int mode, int mapping, int tag_check)
+{
+ char *ptr, *map_ptr;
+ int run, fd, map_size;
+ int total = sizeof(sizes)/sizeof(int);
+ int result = KSFT_PASS;
+
+ mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG);
+ for (run = 0; run < total; run++) {
+ fd = create_temp_file();
+ if (fd == -1)
+ return KSFT_FAIL;
+
+ map_size = sizes[run] + UNDERFLOW + OVERFLOW;
+ map_ptr = (char *)mte_allocate_file_memory(map_size, mem_type, mapping, false, fd);
+ if (check_allocated_memory(map_ptr, map_size, mem_type, false) != KSFT_PASS) {
+ close(fd);
+ return KSFT_FAIL;
+ }
+ ptr = map_ptr + UNDERFLOW;
+ mte_initialize_current_context(mode, (uintptr_t)ptr, sizes[run]);
+ /* Only mte enabled memory will allow tag insertion */
+ ptr = mte_insert_tags((void *)ptr, sizes[run]);
+ if (!ptr || cur_mte_cxt.fault_valid == true) {
+ ksft_print_msg("FAIL: Insert tags on file based memory\n");
+ munmap((void *)map_ptr, map_size);
+ close(fd);
+ return KSFT_FAIL;
+ }
+ result = check_mte_memory(ptr, sizes[run], mode, tag_check);
+ mte_clear_tags((void *)ptr, sizes[run]);
+ munmap((void *)map_ptr, map_size);
+ close(fd);
+ if (result == KSFT_FAIL)
+ break;
+ }
+ return result;
+}
+
+static int check_clear_prot_mte_flag(int mem_type, int mode, int mapping)
+{
+ char *ptr, *map_ptr;
+ int run, prot_flag, result, fd, map_size;
+ int total = sizeof(sizes)/sizeof(int);
+
+ prot_flag = PROT_READ | PROT_WRITE;
+ mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG);
+ for (run = 0; run < total; run++) {
+ map_size = sizes[run] + OVERFLOW + UNDERFLOW;
+ ptr = (char *)mte_allocate_memory_tag_range(sizes[run], mem_type, mapping,
+ UNDERFLOW, OVERFLOW);
+ if (check_allocated_memory_range(ptr, sizes[run], mem_type,
+ UNDERFLOW, OVERFLOW) != KSFT_PASS)
+ return KSFT_FAIL;
+ map_ptr = ptr - UNDERFLOW;
+ /* Try to clear PROT_MTE property and verify it by tag checking */
+ if (mprotect(map_ptr, map_size, prot_flag)) {
+ mte_free_memory_tag_range((void *)ptr, sizes[run], mem_type,
+ UNDERFLOW, OVERFLOW);
+ ksft_print_msg("FAIL: mprotect not ignoring clear PROT_MTE property\n");
+ return KSFT_FAIL;
+ }
+ result = check_mte_memory(ptr, sizes[run], mode, TAG_CHECK_ON);
+ mte_free_memory_tag_range((void *)ptr, sizes[run], mem_type, UNDERFLOW, OVERFLOW);
+ if (result != KSFT_PASS)
+ return KSFT_FAIL;
+
+ fd = create_temp_file();
+ if (fd == -1)
+ return KSFT_FAIL;
+ ptr = (char *)mte_allocate_file_memory_tag_range(sizes[run], mem_type, mapping,
+ UNDERFLOW, OVERFLOW, fd);
+ if (check_allocated_memory_range(ptr, sizes[run], mem_type,
+ UNDERFLOW, OVERFLOW) != KSFT_PASS) {
+ close(fd);
+ return KSFT_FAIL;
+ }
+ map_ptr = ptr - UNDERFLOW;
+ /* Try to clear PROT_MTE property and verify it by tag checking */
+ if (mprotect(map_ptr, map_size, prot_flag)) {
+ ksft_print_msg("FAIL: mprotect not ignoring clear PROT_MTE property\n");
+ mte_free_memory_tag_range((void *)ptr, sizes[run], mem_type,
+ UNDERFLOW, OVERFLOW);
+ close(fd);
+ return KSFT_FAIL;
+ }
+ result = check_mte_memory(ptr, sizes[run], mode, TAG_CHECK_ON);
+ mte_free_memory_tag_range((void *)ptr, sizes[run], mem_type, UNDERFLOW, OVERFLOW);
+ close(fd);
+ if (result != KSFT_PASS)
+ return KSFT_FAIL;
+ }
+ return KSFT_PASS;
+}
+
+int main(int argc, char *argv[])
+{
+ int err;
+ int item = sizeof(sizes)/sizeof(int);
+
+ err = mte_default_setup();
+ if (err)
+ return err;
+ page_size = getpagesize();
+ if (!page_size) {
+ ksft_print_msg("ERR: Unable to get page size\n");
+ return KSFT_FAIL;
+ }
+ sizes[item - 3] = page_size - 1;
+ sizes[item - 2] = page_size;
+ sizes[item - 1] = page_size + 1;
+
+ /* Register signal handlers */
+ mte_register_signal(SIGBUS, mte_default_handler);
+ mte_register_signal(SIGSEGV, mte_default_handler);
+
+ mte_enable_pstate_tco();
+ evaluate_test(check_anonymous_memory_mapping(USE_MMAP, MTE_SYNC_ERR, MAP_PRIVATE, TAG_CHECK_OFF),
+ "Check anonymous memory with private mapping, sync error mode, mmap memory and tag check off\n");
+ evaluate_test(check_file_memory_mapping(USE_MPROTECT, MTE_SYNC_ERR, MAP_PRIVATE, TAG_CHECK_OFF),
+ "Check file memory with private mapping, sync error mode, mmap/mprotect memory and tag check off\n");
+
+ mte_disable_pstate_tco();
+ evaluate_test(check_anonymous_memory_mapping(USE_MMAP, MTE_NONE_ERR, MAP_PRIVATE, TAG_CHECK_OFF),
+ "Check anonymous memory with private mapping, no error mode, mmap memory and tag check off\n");
+ evaluate_test(check_file_memory_mapping(USE_MPROTECT, MTE_NONE_ERR, MAP_PRIVATE, TAG_CHECK_OFF),
+ "Check file memory with private mapping, no error mode, mmap/mprotect memory and tag check off\n");
+
+ evaluate_test(check_anonymous_memory_mapping(USE_MMAP, MTE_SYNC_ERR, MAP_PRIVATE, TAG_CHECK_ON),
+ "Check anonymous memory with private mapping, sync error mode, mmap memory and tag check on\n");
+ evaluate_test(check_anonymous_memory_mapping(USE_MPROTECT, MTE_SYNC_ERR, MAP_PRIVATE, TAG_CHECK_ON),
+ "Check anonymous memory with private mapping, sync error mode, mmap/mprotect memory and tag check on\n");
+ evaluate_test(check_anonymous_memory_mapping(USE_MMAP, MTE_SYNC_ERR, MAP_SHARED, TAG_CHECK_ON),
+ "Check anonymous memory with shared mapping, sync error mode, mmap memory and tag check on\n");
+ evaluate_test(check_anonymous_memory_mapping(USE_MPROTECT, MTE_SYNC_ERR, MAP_SHARED, TAG_CHECK_ON),
+ "Check anonymous memory with shared mapping, sync error mode, mmap/mprotect memory and tag check on\n");
+ evaluate_test(check_anonymous_memory_mapping(USE_MMAP, MTE_ASYNC_ERR, MAP_PRIVATE, TAG_CHECK_ON),
+ "Check anonymous memory with private mapping, async error mode, mmap memory and tag check on\n");
+ evaluate_test(check_anonymous_memory_mapping(USE_MPROTECT, MTE_ASYNC_ERR, MAP_PRIVATE, TAG_CHECK_ON),
+ "Check anonymous memory with private mapping, async error mode, mmap/mprotect memory and tag check on\n");
+ evaluate_test(check_anonymous_memory_mapping(USE_MMAP, MTE_ASYNC_ERR, MAP_SHARED, TAG_CHECK_ON),
+ "Check anonymous memory with shared mapping, async error mode, mmap memory and tag check on\n");
+ evaluate_test(check_anonymous_memory_mapping(USE_MPROTECT, MTE_ASYNC_ERR, MAP_SHARED, TAG_CHECK_ON),
+ "Check anonymous memory with shared mapping, async error mode, mmap/mprotect memory and tag check on\n");
+
+ evaluate_test(check_file_memory_mapping(USE_MMAP, MTE_SYNC_ERR, MAP_PRIVATE, TAG_CHECK_ON),
+ "Check file memory with private mapping, sync error mode, mmap memory and tag check on\n");
+ evaluate_test(check_file_memory_mapping(USE_MPROTECT, MTE_SYNC_ERR, MAP_PRIVATE, TAG_CHECK_ON),
+ "Check file memory with private mapping, sync error mode, mmap/mprotect memory and tag check on\n");
+ evaluate_test(check_file_memory_mapping(USE_MMAP, MTE_SYNC_ERR, MAP_SHARED, TAG_CHECK_ON),
+ "Check file memory with shared mapping, sync error mode, mmap memory and tag check on\n");
+ evaluate_test(check_file_memory_mapping(USE_MPROTECT, MTE_SYNC_ERR, MAP_SHARED, TAG_CHECK_ON),
+ "Check file memory with shared mapping, sync error mode, mmap/mprotect memory and tag check on\n");
+ evaluate_test(check_file_memory_mapping(USE_MMAP, MTE_ASYNC_ERR, MAP_PRIVATE, TAG_CHECK_ON),
+ "Check file memory with private mapping, async error mode, mmap memory and tag check on\n");
+ evaluate_test(check_file_memory_mapping(USE_MPROTECT, MTE_ASYNC_ERR, MAP_PRIVATE, TAG_CHECK_ON),
+ "Check file memory with private mapping, async error mode, mmap/mprotect memory and tag check on\n");
+ evaluate_test(check_file_memory_mapping(USE_MMAP, MTE_ASYNC_ERR, MAP_SHARED, TAG_CHECK_ON),
+ "Check file memory with shared mapping, async error mode, mmap memory and tag check on\n");
+ evaluate_test(check_file_memory_mapping(USE_MPROTECT, MTE_ASYNC_ERR, MAP_SHARED, TAG_CHECK_ON),
+ "Check file memory with shared mapping, async error mode, mmap/mprotect memory and tag check on\n");
+
+ evaluate_test(check_clear_prot_mte_flag(USE_MMAP, MTE_SYNC_ERR, MAP_PRIVATE),
+ "Check clear PROT_MTE flags with private mapping, sync error mode and mmap memory\n");
+ evaluate_test(check_clear_prot_mte_flag(USE_MPROTECT, MTE_SYNC_ERR, MAP_PRIVATE),
+ "Check clear PROT_MTE flags with private mapping and sync error mode and mmap/mprotect memory\n");
+
+ mte_restore_setup();
+ ksft_print_cnts();
+ return ksft_get_fail_cnt() == 0 ? KSFT_PASS : KSFT_FAIL;
+}
diff --git a/tools/testing/selftests/arm64/mte/check_tags_inclusion.c b/tools/testing/selftests/arm64/mte/check_tags_inclusion.c
new file mode 100644
index 000000000000..94d245a0ed56
--- /dev/null
+++ b/tools/testing/selftests/arm64/mte/check_tags_inclusion.c
@@ -0,0 +1,185 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (C) 2020 ARM Limited
+
+#define _GNU_SOURCE
+
+#include <errno.h>
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <ucontext.h>
+#include <sys/wait.h>
+
+#include "kselftest.h"
+#include "mte_common_util.h"
+#include "mte_def.h"
+
+#define BUFFER_SIZE (5 * MT_GRANULE_SIZE)
+#define RUNS (MT_TAG_COUNT * 2)
+#define MTE_LAST_TAG_MASK (0x7FFF)
+
+static int verify_mte_pointer_validity(char *ptr, int mode)
+{
+ mte_initialize_current_context(mode, (uintptr_t)ptr, BUFFER_SIZE);
+ /* Check the validity of the tagged pointer */
+ memset((void *)ptr, '1', BUFFER_SIZE);
+ mte_wait_after_trig();
+ if (cur_mte_cxt.fault_valid)
+ return KSFT_FAIL;
+ /* Proceed further for nonzero tags */
+ if (!MT_FETCH_TAG((uintptr_t)ptr))
+ return KSFT_PASS;
+ mte_initialize_current_context(mode, (uintptr_t)ptr, BUFFER_SIZE + 1);
+ /* Check the validity outside the range */
+ ptr[BUFFER_SIZE] = '2';
+ mte_wait_after_trig();
+ if (!cur_mte_cxt.fault_valid)
+ return KSFT_FAIL;
+ else
+ return KSFT_PASS;
+}
+
+static int check_single_included_tags(int mem_type, int mode)
+{
+ char *ptr;
+ int tag, run, result = KSFT_PASS;
+
+ ptr = (char *)mte_allocate_memory(BUFFER_SIZE + MT_GRANULE_SIZE, mem_type, 0, false);
+ if (check_allocated_memory(ptr, BUFFER_SIZE + MT_GRANULE_SIZE,
+ mem_type, false) != KSFT_PASS)
+ return KSFT_FAIL;
+
+ for (tag = 0; (tag < MT_TAG_COUNT) && (result == KSFT_PASS); tag++) {
+ mte_switch_mode(mode, MT_INCLUDE_VALID_TAG(tag));
+ /* Try to catch a excluded tag by a number of tries. */
+ for (run = 0; (run < RUNS) && (result == KSFT_PASS); run++) {
+ ptr = (char *)mte_insert_tags(ptr, BUFFER_SIZE);
+ /* Check tag value */
+ if (MT_FETCH_TAG((uintptr_t)ptr) == tag) {
+ ksft_print_msg("FAIL: wrong tag = 0x%x with include mask=0x%x\n",
+ MT_FETCH_TAG((uintptr_t)ptr),
+ MT_INCLUDE_VALID_TAG(tag));
+ result = KSFT_FAIL;
+ break;
+ }
+ result = verify_mte_pointer_validity(ptr, mode);
+ }
+ }
+ mte_free_memory_tag_range((void *)ptr, BUFFER_SIZE, mem_type, 0, MT_GRANULE_SIZE);
+ return result;
+}
+
+static int check_multiple_included_tags(int mem_type, int mode)
+{
+ char *ptr;
+ int tag, run, result = KSFT_PASS;
+ unsigned long excl_mask = 0;
+
+ ptr = (char *)mte_allocate_memory(BUFFER_SIZE + MT_GRANULE_SIZE, mem_type, 0, false);
+ if (check_allocated_memory(ptr, BUFFER_SIZE + MT_GRANULE_SIZE,
+ mem_type, false) != KSFT_PASS)
+ return KSFT_FAIL;
+
+ for (tag = 0; (tag < MT_TAG_COUNT - 1) && (result == KSFT_PASS); tag++) {
+ excl_mask |= 1 << tag;
+ mte_switch_mode(mode, MT_INCLUDE_VALID_TAGS(excl_mask));
+ /* Try to catch a excluded tag by a number of tries. */
+ for (run = 0; (run < RUNS) && (result == KSFT_PASS); run++) {
+ ptr = (char *)mte_insert_tags(ptr, BUFFER_SIZE);
+ /* Check tag value */
+ if (MT_FETCH_TAG((uintptr_t)ptr) < tag) {
+ ksft_print_msg("FAIL: wrong tag = 0x%x with include mask=0x%x\n",
+ MT_FETCH_TAG((uintptr_t)ptr),
+ MT_INCLUDE_VALID_TAGS(excl_mask));
+ result = KSFT_FAIL;
+ break;
+ }
+ result = verify_mte_pointer_validity(ptr, mode);
+ }
+ }
+ mte_free_memory_tag_range((void *)ptr, BUFFER_SIZE, mem_type, 0, MT_GRANULE_SIZE);
+ return result;
+}
+
+static int check_all_included_tags(int mem_type, int mode)
+{
+ char *ptr;
+ int run, result = KSFT_PASS;
+
+ ptr = (char *)mte_allocate_memory(BUFFER_SIZE + MT_GRANULE_SIZE, mem_type, 0, false);
+ if (check_allocated_memory(ptr, BUFFER_SIZE + MT_GRANULE_SIZE,
+ mem_type, false) != KSFT_PASS)
+ return KSFT_FAIL;
+
+ mte_switch_mode(mode, MT_INCLUDE_TAG_MASK);
+ /* Try to catch a excluded tag by a number of tries. */
+ for (run = 0; (run < RUNS) && (result == KSFT_PASS); run++) {
+ ptr = (char *)mte_insert_tags(ptr, BUFFER_SIZE);
+ /*
+ * Here tag byte can be between 0x0 to 0xF (full allowed range)
+ * so no need to match so just verify if it is writable.
+ */
+ result = verify_mte_pointer_validity(ptr, mode);
+ }
+ mte_free_memory_tag_range((void *)ptr, BUFFER_SIZE, mem_type, 0, MT_GRANULE_SIZE);
+ return result;
+}
+
+static int check_none_included_tags(int mem_type, int mode)
+{
+ char *ptr;
+ int run;
+
+ ptr = (char *)mte_allocate_memory(BUFFER_SIZE, mem_type, 0, false);
+ if (check_allocated_memory(ptr, BUFFER_SIZE, mem_type, false) != KSFT_PASS)
+ return KSFT_FAIL;
+
+ mte_switch_mode(mode, MT_EXCLUDE_TAG_MASK);
+ /* Try to catch a excluded tag by a number of tries. */
+ for (run = 0; run < RUNS; run++) {
+ ptr = (char *)mte_insert_tags(ptr, BUFFER_SIZE);
+ /* Here all tags exluded so tag value generated should be 0 */
+ if (MT_FETCH_TAG((uintptr_t)ptr)) {
+ ksft_print_msg("FAIL: included tag value found\n");
+ mte_free_memory((void *)ptr, BUFFER_SIZE, mem_type, true);
+ return KSFT_FAIL;
+ }
+ mte_initialize_current_context(mode, (uintptr_t)ptr, BUFFER_SIZE);
+ /* Check the write validity of the untagged pointer */
+ memset((void *)ptr, '1', BUFFER_SIZE);
+ mte_wait_after_trig();
+ if (cur_mte_cxt.fault_valid)
+ break;
+ }
+ mte_free_memory((void *)ptr, BUFFER_SIZE, mem_type, false);
+ if (cur_mte_cxt.fault_valid)
+ return KSFT_FAIL;
+ else
+ return KSFT_PASS;
+}
+
+int main(int argc, char *argv[])
+{
+ int err;
+
+ err = mte_default_setup();
+ if (err)
+ return err;
+
+ /* Register SIGSEGV handler */
+ mte_register_signal(SIGSEGV, mte_default_handler);
+
+ evaluate_test(check_single_included_tags(USE_MMAP, MTE_SYNC_ERR),
+ "Check an included tag value with sync mode\n");
+ evaluate_test(check_multiple_included_tags(USE_MMAP, MTE_SYNC_ERR),
+ "Check different included tags value with sync mode\n");
+ evaluate_test(check_none_included_tags(USE_MMAP, MTE_SYNC_ERR),
+ "Check none included tags value with sync mode\n");
+ evaluate_test(check_all_included_tags(USE_MMAP, MTE_SYNC_ERR),
+ "Check all included tags value with sync mode\n");
+
+ mte_restore_setup();
+ ksft_print_cnts();
+ return ksft_get_fail_cnt() == 0 ? KSFT_PASS : KSFT_FAIL;
+}
diff --git a/tools/testing/selftests/arm64/mte/check_user_mem.c b/tools/testing/selftests/arm64/mte/check_user_mem.c
new file mode 100644
index 000000000000..594e98e76880
--- /dev/null
+++ b/tools/testing/selftests/arm64/mte/check_user_mem.c
@@ -0,0 +1,111 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (C) 2020 ARM Limited
+
+#define _GNU_SOURCE
+
+#include <errno.h>
+#include <fcntl.h>
+#include <signal.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <ucontext.h>
+#include <unistd.h>
+#include <sys/mman.h>
+
+#include "kselftest.h"
+#include "mte_common_util.h"
+#include "mte_def.h"
+
+static size_t page_sz;
+
+static int check_usermem_access_fault(int mem_type, int mode, int mapping)
+{
+ int fd, i, err;
+ char val = 'A';
+ size_t len, read_len;
+ void *ptr, *ptr_next;
+
+ err = KSFT_FAIL;
+ len = 2 * page_sz;
+ mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG);
+ fd = create_temp_file();
+ if (fd == -1)
+ return KSFT_FAIL;
+ for (i = 0; i < len; i++)
+ write(fd, &val, sizeof(val));
+ lseek(fd, 0, 0);
+ ptr = mte_allocate_memory(len, mem_type, mapping, true);
+ if (check_allocated_memory(ptr, len, mem_type, true) != KSFT_PASS) {
+ close(fd);
+ return KSFT_FAIL;
+ }
+ mte_initialize_current_context(mode, (uintptr_t)ptr, len);
+ /* Copy from file into buffer with valid tag */
+ read_len = read(fd, ptr, len);
+ mte_wait_after_trig();
+ if (cur_mte_cxt.fault_valid || read_len < len)
+ goto usermem_acc_err;
+ /* Verify same pattern is read */
+ for (i = 0; i < len; i++)
+ if (*(char *)(ptr + i) != val)
+ break;
+ if (i < len)
+ goto usermem_acc_err;
+
+ /* Tag the next half of memory with different value */
+ ptr_next = (void *)((unsigned long)ptr + page_sz);
+ ptr_next = mte_insert_new_tag(ptr_next);
+ mte_set_tag_address_range(ptr_next, page_sz);
+
+ lseek(fd, 0, 0);
+ /* Copy from file into buffer with invalid tag */
+ read_len = read(fd, ptr, len);
+ mte_wait_after_trig();
+ /*
+ * Accessing user memory in kernel with invalid tag should fail in sync
+ * mode without fault but may not fail in async mode as per the
+ * implemented MTE userspace support in Arm64 kernel.
+ */
+ if (mode == MTE_SYNC_ERR &&
+ !cur_mte_cxt.fault_valid && read_len < len) {
+ err = KSFT_PASS;
+ } else if (mode == MTE_ASYNC_ERR &&
+ !cur_mte_cxt.fault_valid && read_len == len) {
+ err = KSFT_PASS;
+ }
+usermem_acc_err:
+ mte_free_memory((void *)ptr, len, mem_type, true);
+ close(fd);
+ return err;
+}
+
+int main(int argc, char *argv[])
+{
+ int err;
+
+ page_sz = getpagesize();
+ if (!page_sz) {
+ ksft_print_msg("ERR: Unable to get page size\n");
+ return KSFT_FAIL;
+ }
+ err = mte_default_setup();
+ if (err)
+ return err;
+ /* Register signal handlers */
+ mte_register_signal(SIGSEGV, mte_default_handler);
+
+ evaluate_test(check_usermem_access_fault(USE_MMAP, MTE_SYNC_ERR, MAP_PRIVATE),
+ "Check memory access from kernel in sync mode, private mapping and mmap memory\n");
+ evaluate_test(check_usermem_access_fault(USE_MMAP, MTE_SYNC_ERR, MAP_SHARED),
+ "Check memory access from kernel in sync mode, shared mapping and mmap memory\n");
+
+ evaluate_test(check_usermem_access_fault(USE_MMAP, MTE_ASYNC_ERR, MAP_PRIVATE),
+ "Check memory access from kernel in async mode, private mapping and mmap memory\n");
+ evaluate_test(check_usermem_access_fault(USE_MMAP, MTE_ASYNC_ERR, MAP_SHARED),
+ "Check memory access from kernel in async mode, shared mapping and mmap memory\n");
+
+ mte_restore_setup();
+ ksft_print_cnts();
+ return ksft_get_fail_cnt() == 0 ? KSFT_PASS : KSFT_FAIL;
+}
diff --git a/tools/testing/selftests/arm64/mte/mte_common_util.c b/tools/testing/selftests/arm64/mte/mte_common_util.c
new file mode 100644
index 000000000000..39f8908988ea
--- /dev/null
+++ b/tools/testing/selftests/arm64/mte/mte_common_util.c
@@ -0,0 +1,341 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (C) 2020 ARM Limited
+
+#include <fcntl.h>
+#include <sched.h>
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+
+#include <linux/auxvec.h>
+#include <sys/auxv.h>
+#include <sys/mman.h>
+#include <sys/prctl.h>
+
+#include <asm/hwcap.h>
+
+#include "kselftest.h"
+#include "mte_common_util.h"
+#include "mte_def.h"
+
+#define INIT_BUFFER_SIZE 256
+
+struct mte_fault_cxt cur_mte_cxt;
+static unsigned int mte_cur_mode;
+static unsigned int mte_cur_pstate_tco;
+
+void mte_default_handler(int signum, siginfo_t *si, void *uc)
+{
+ unsigned long addr = (unsigned long)si->si_addr;
+
+ if (signum == SIGSEGV) {
+#ifdef DEBUG
+ ksft_print_msg("INFO: SIGSEGV signal at pc=%lx, fault addr=%lx, si_code=%lx\n",
+ ((ucontext_t *)uc)->uc_mcontext.pc, addr, si->si_code);
+#endif
+ if (si->si_code == SEGV_MTEAERR) {
+ if (cur_mte_cxt.trig_si_code == si->si_code)
+ cur_mte_cxt.fault_valid = true;
+ return;
+ }
+ /* Compare the context for precise error */
+ else if (si->si_code == SEGV_MTESERR) {
+ if (cur_mte_cxt.trig_si_code == si->si_code &&
+ ((cur_mte_cxt.trig_range >= 0 &&
+ addr >= MT_CLEAR_TAG(cur_mte_cxt.trig_addr) &&
+ addr <= (MT_CLEAR_TAG(cur_mte_cxt.trig_addr) + cur_mte_cxt.trig_range)) ||
+ (cur_mte_cxt.trig_range < 0 &&
+ addr <= MT_CLEAR_TAG(cur_mte_cxt.trig_addr) &&
+ addr >= (MT_CLEAR_TAG(cur_mte_cxt.trig_addr) + cur_mte_cxt.trig_range)))) {
+ cur_mte_cxt.fault_valid = true;
+ /* Adjust the pc by 4 */
+ ((ucontext_t *)uc)->uc_mcontext.pc += 4;
+ } else {
+ ksft_print_msg("Invalid MTE synchronous exception caught!\n");
+ exit(1);
+ }
+ } else {
+ ksft_print_msg("Unknown SIGSEGV exception caught!\n");
+ exit(1);
+ }
+ } else if (signum == SIGBUS) {
+ ksft_print_msg("INFO: SIGBUS signal at pc=%lx, fault addr=%lx, si_code=%lx\n",
+ ((ucontext_t *)uc)->uc_mcontext.pc, addr, si->si_code);
+ if ((cur_mte_cxt.trig_range >= 0 &&
+ addr >= MT_CLEAR_TAG(cur_mte_cxt.trig_addr) &&
+ addr <= (MT_CLEAR_TAG(cur_mte_cxt.trig_addr) + cur_mte_cxt.trig_range)) ||
+ (cur_mte_cxt.trig_range < 0 &&
+ addr <= MT_CLEAR_TAG(cur_mte_cxt.trig_addr) &&
+ addr >= (MT_CLEAR_TAG(cur_mte_cxt.trig_addr) + cur_mte_cxt.trig_range))) {
+ cur_mte_cxt.fault_valid = true;
+ /* Adjust the pc by 4 */
+ ((ucontext_t *)uc)->uc_mcontext.pc += 4;
+ }
+ }
+}
+
+void mte_register_signal(int signal, void (*handler)(int, siginfo_t *, void *))
+{
+ struct sigaction sa;
+
+ sa.sa_sigaction = handler;
+ sa.sa_flags = SA_SIGINFO;
+ sigemptyset(&sa.sa_mask);
+ sigaction(signal, &sa, NULL);
+}
+
+void mte_wait_after_trig(void)
+{
+ sched_yield();
+}
+
+void *mte_insert_tags(void *ptr, size_t size)
+{
+ void *tag_ptr;
+ int align_size;
+
+ if (!ptr || (unsigned long)(ptr) & MT_ALIGN_GRANULE) {
+ ksft_print_msg("FAIL: Addr=%lx: invalid\n", ptr);
+ return NULL;
+ }
+ align_size = MT_ALIGN_UP(size);
+ tag_ptr = mte_insert_random_tag(ptr);
+ mte_set_tag_address_range(tag_ptr, align_size);
+ return tag_ptr;
+}
+
+void mte_clear_tags(void *ptr, size_t size)
+{
+ if (!ptr || (unsigned long)(ptr) & MT_ALIGN_GRANULE) {
+ ksft_print_msg("FAIL: Addr=%lx: invalid\n", ptr);
+ return;
+ }
+ size = MT_ALIGN_UP(size);
+ ptr = (void *)MT_CLEAR_TAG((unsigned long)ptr);
+ mte_clear_tag_address_range(ptr, size);
+}
+
+static void *__mte_allocate_memory_range(size_t size, int mem_type, int mapping,
+ size_t range_before, size_t range_after,
+ bool tags, int fd)
+{
+ void *ptr;
+ int prot_flag, map_flag;
+ size_t entire_size = size + range_before + range_after;
+
+ if (mem_type != USE_MALLOC && mem_type != USE_MMAP &&
+ mem_type != USE_MPROTECT) {
+ ksft_print_msg("FAIL: Invalid allocate request\n");
+ return NULL;
+ }
+ if (mem_type == USE_MALLOC)
+ return malloc(entire_size) + range_before;
+
+ prot_flag = PROT_READ | PROT_WRITE;
+ if (mem_type == USE_MMAP)
+ prot_flag |= PROT_MTE;
+
+ map_flag = mapping;
+ if (fd == -1)
+ map_flag = MAP_ANONYMOUS | map_flag;
+ if (!(mapping & MAP_SHARED))
+ map_flag |= MAP_PRIVATE;
+ ptr = mmap(NULL, entire_size, prot_flag, map_flag, fd, 0);
+ if (ptr == MAP_FAILED) {
+ ksft_print_msg("FAIL: mmap allocation\n");
+ return NULL;
+ }
+ if (mem_type == USE_MPROTECT) {
+ if (mprotect(ptr, entire_size, prot_flag | PROT_MTE)) {
+ munmap(ptr, size);
+ ksft_print_msg("FAIL: mprotect PROT_MTE property\n");
+ return NULL;
+ }
+ }
+ if (tags)
+ ptr = mte_insert_tags(ptr + range_before, size);
+ return ptr;
+}
+
+void *mte_allocate_memory_tag_range(size_t size, int mem_type, int mapping,
+ size_t range_before, size_t range_after)
+{
+ return __mte_allocate_memory_range(size, mem_type, mapping, range_before,
+ range_after, true, -1);
+}
+
+void *mte_allocate_memory(size_t size, int mem_type, int mapping, bool tags)
+{
+ return __mte_allocate_memory_range(size, mem_type, mapping, 0, 0, tags, -1);
+}
+
+void *mte_allocate_file_memory(size_t size, int mem_type, int mapping, bool tags, int fd)
+{
+ int index;
+ char buffer[INIT_BUFFER_SIZE];
+
+ if (mem_type != USE_MPROTECT && mem_type != USE_MMAP) {
+ ksft_print_msg("FAIL: Invalid mmap file request\n");
+ return NULL;
+ }
+ /* Initialize the file for mappable size */
+ lseek(fd, 0, SEEK_SET);
+ for (index = INIT_BUFFER_SIZE; index < size; index += INIT_BUFFER_SIZE)
+ write(fd, buffer, INIT_BUFFER_SIZE);
+ index -= INIT_BUFFER_SIZE;
+ write(fd, buffer, size - index);
+ return __mte_allocate_memory_range(size, mem_type, mapping, 0, 0, tags, fd);
+}
+
+void *mte_allocate_file_memory_tag_range(size_t size, int mem_type, int mapping,
+ size_t range_before, size_t range_after, int fd)
+{
+ int index;
+ char buffer[INIT_BUFFER_SIZE];
+ int map_size = size + range_before + range_after;
+
+ if (mem_type != USE_MPROTECT && mem_type != USE_MMAP) {
+ ksft_print_msg("FAIL: Invalid mmap file request\n");
+ return NULL;
+ }
+ /* Initialize the file for mappable size */
+ lseek(fd, 0, SEEK_SET);
+ for (index = INIT_BUFFER_SIZE; index < map_size; index += INIT_BUFFER_SIZE)
+ write(fd, buffer, INIT_BUFFER_SIZE);
+ index -= INIT_BUFFER_SIZE;
+ write(fd, buffer, map_size - index);
+ return __mte_allocate_memory_range(size, mem_type, mapping, range_before,
+ range_after, true, fd);
+}
+
+static void __mte_free_memory_range(void *ptr, size_t size, int mem_type,
+ size_t range_before, size_t range_after, bool tags)
+{
+ switch (mem_type) {
+ case USE_MALLOC:
+ free(ptr - range_before);
+ break;
+ case USE_MMAP:
+ case USE_MPROTECT:
+ if (tags)
+ mte_clear_tags(ptr, size);
+ munmap(ptr - range_before, size + range_before + range_after);
+ break;
+ default:
+ ksft_print_msg("FAIL: Invalid free request\n");
+ break;
+ }
+}
+
+void mte_free_memory_tag_range(void *ptr, size_t size, int mem_type,
+ size_t range_before, size_t range_after)
+{
+ __mte_free_memory_range(ptr, size, mem_type, range_before, range_after, true);
+}
+
+void mte_free_memory(void *ptr, size_t size, int mem_type, bool tags)
+{
+ __mte_free_memory_range(ptr, size, mem_type, 0, 0, tags);
+}
+
+void mte_initialize_current_context(int mode, uintptr_t ptr, ssize_t range)
+{
+ cur_mte_cxt.fault_valid = false;
+ cur_mte_cxt.trig_addr = ptr;
+ cur_mte_cxt.trig_range = range;
+ if (mode == MTE_SYNC_ERR)
+ cur_mte_cxt.trig_si_code = SEGV_MTESERR;
+ else if (mode == MTE_ASYNC_ERR)
+ cur_mte_cxt.trig_si_code = SEGV_MTEAERR;
+ else
+ cur_mte_cxt.trig_si_code = 0;
+}
+
+int mte_switch_mode(int mte_option, unsigned long incl_mask)
+{
+ unsigned long en = 0;
+
+ if (!(mte_option == MTE_SYNC_ERR || mte_option == MTE_ASYNC_ERR ||
+ mte_option == MTE_NONE_ERR || incl_mask <= MTE_ALLOW_NON_ZERO_TAG)) {
+ ksft_print_msg("FAIL: Invalid mte config option\n");
+ return -EINVAL;
+ }
+ en = PR_TAGGED_ADDR_ENABLE;
+ if (mte_option == MTE_SYNC_ERR)
+ en |= PR_MTE_TCF_SYNC;
+ else if (mte_option == MTE_ASYNC_ERR)
+ en |= PR_MTE_TCF_ASYNC;
+ else if (mte_option == MTE_NONE_ERR)
+ en |= PR_MTE_TCF_NONE;
+
+ en |= (incl_mask << PR_MTE_TAG_SHIFT);
+ /* Enable address tagging ABI, mte error reporting mode and tag inclusion mask. */
+ if (!prctl(PR_SET_TAGGED_ADDR_CTRL, en, 0, 0, 0) == 0) {
+ ksft_print_msg("FAIL:prctl PR_SET_TAGGED_ADDR_CTRL for mte mode\n");
+ return -EINVAL;
+ }
+ return 0;
+}
+
+#define ID_AA64PFR1_MTE_SHIFT 8
+#define ID_AA64PFR1_MTE 2
+
+int mte_default_setup(void)
+{
+ unsigned long hwcaps = getauxval(AT_HWCAP);
+ unsigned long en = 0;
+ int ret;
+
+ if (!(hwcaps & HWCAP_CPUID)) {
+ ksft_print_msg("FAIL: CPUID registers unavailable\n");
+ return KSFT_FAIL;
+ }
+ /* Read ID_AA64PFR1_EL1 register */
+ asm volatile("mrs %0, id_aa64pfr1_el1" : "=r"(hwcaps) : : "memory");
+ if (((hwcaps >> ID_AA64PFR1_MTE_SHIFT) & MT_TAG_MASK) != ID_AA64PFR1_MTE) {
+ ksft_print_msg("FAIL: MTE features unavailable\n");
+ return KSFT_SKIP;
+ }
+ /* Get current mte mode */
+ ret = prctl(PR_GET_TAGGED_ADDR_CTRL, en, 0, 0, 0);
+ if (ret < 0) {
+ ksft_print_msg("FAIL:prctl PR_GET_TAGGED_ADDR_CTRL with error =%d\n", ret);
+ return KSFT_FAIL;
+ }
+ if (ret & PR_MTE_TCF_SYNC)
+ mte_cur_mode = MTE_SYNC_ERR;
+ else if (ret & PR_MTE_TCF_ASYNC)
+ mte_cur_mode = MTE_ASYNC_ERR;
+ else if (ret & PR_MTE_TCF_NONE)
+ mte_cur_mode = MTE_NONE_ERR;
+
+ mte_cur_pstate_tco = mte_get_pstate_tco();
+ /* Disable PSTATE.TCO */
+ mte_disable_pstate_tco();
+ return 0;
+}
+
+void mte_restore_setup(void)
+{
+ mte_switch_mode(mte_cur_mode, MTE_ALLOW_NON_ZERO_TAG);
+ if (mte_cur_pstate_tco == MT_PSTATE_TCO_EN)
+ mte_enable_pstate_tco();
+ else if (mte_cur_pstate_tco == MT_PSTATE_TCO_DIS)
+ mte_disable_pstate_tco();
+}
+
+int create_temp_file(void)
+{
+ int fd;
+ char filename[] = "/dev/shm/tmp_XXXXXX";
+
+ /* Create a file in the tmpfs filesystem */
+ fd = mkstemp(&filename[0]);
+ if (fd == -1) {
+ ksft_print_msg("FAIL: Unable to open temporary file\n");
+ return 0;
+ }
+ unlink(&filename[0]);
+ return fd;
+}
diff --git a/tools/testing/selftests/arm64/mte/mte_common_util.h b/tools/testing/selftests/arm64/mte/mte_common_util.h
new file mode 100644
index 000000000000..195a7d1879e6
--- /dev/null
+++ b/tools/testing/selftests/arm64/mte/mte_common_util.h
@@ -0,0 +1,118 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (C) 2020 ARM Limited */
+
+#ifndef _MTE_COMMON_UTIL_H
+#define _MTE_COMMON_UTIL_H
+
+#include <signal.h>
+#include <stdbool.h>
+#include <stdlib.h>
+#include <sys/auxv.h>
+#include <sys/mman.h>
+#include <sys/prctl.h>
+#include "mte_def.h"
+#include "kselftest.h"
+
+enum mte_mem_type {
+ USE_MALLOC,
+ USE_MMAP,
+ USE_MPROTECT,
+};
+
+enum mte_mode {
+ MTE_NONE_ERR,
+ MTE_SYNC_ERR,
+ MTE_ASYNC_ERR,
+};
+
+struct mte_fault_cxt {
+ /* Address start which triggers mte tag fault */
+ unsigned long trig_addr;
+ /* Address range for mte tag fault and negative value means underflow */
+ ssize_t trig_range;
+ /* siginfo si code */
+ unsigned long trig_si_code;
+ /* Flag to denote if correct fault caught */
+ bool fault_valid;
+};
+
+extern struct mte_fault_cxt cur_mte_cxt;
+
+/* MTE utility functions */
+void mte_default_handler(int signum, siginfo_t *si, void *uc);
+void mte_register_signal(int signal, void (*handler)(int, siginfo_t *, void *));
+void mte_wait_after_trig(void);
+void *mte_allocate_memory(size_t size, int mem_type, int mapping, bool tags);
+void *mte_allocate_memory_tag_range(size_t size, int mem_type, int mapping,
+ size_t range_before, size_t range_after);
+void *mte_allocate_file_memory(size_t size, int mem_type, int mapping,
+ bool tags, int fd);
+void *mte_allocate_file_memory_tag_range(size_t size, int mem_type, int mapping,
+ size_t range_before, size_t range_after, int fd);
+void mte_free_memory(void *ptr, size_t size, int mem_type, bool tags);
+void mte_free_memory_tag_range(void *ptr, size_t size, int mem_type,
+ size_t range_before, size_t range_after);
+void *mte_insert_tags(void *ptr, size_t size);
+void mte_clear_tags(void *ptr, size_t size);
+int mte_default_setup(void);
+void mte_restore_setup(void);
+int mte_switch_mode(int mte_option, unsigned long incl_mask);
+void mte_initialize_current_context(int mode, uintptr_t ptr, ssize_t range);
+
+/* Common utility functions */
+int create_temp_file(void);
+
+/* Assembly MTE utility functions */
+void *mte_insert_random_tag(void *ptr);
+void *mte_insert_new_tag(void *ptr);
+void *mte_get_tag_address(void *ptr);
+void mte_set_tag_address_range(void *ptr, int range);
+void mte_clear_tag_address_range(void *ptr, int range);
+void mte_disable_pstate_tco(void);
+void mte_enable_pstate_tco(void);
+unsigned int mte_get_pstate_tco(void);
+
+/* Test framework static inline functions/macros */
+static inline void evaluate_test(int err, const char *msg)
+{
+ if (err == KSFT_PASS)
+ ksft_test_result_pass(msg);
+ else if (err == KSFT_FAIL)
+ ksft_test_result_fail(msg);
+}
+
+static inline int check_allocated_memory(void *ptr, size_t size,
+ int mem_type, bool tags)
+{
+ if (ptr == NULL) {
+ ksft_print_msg("FAIL: memory allocation\n");
+ return KSFT_FAIL;
+ }
+
+ if (tags && !MT_FETCH_TAG((uintptr_t)ptr)) {
+ ksft_print_msg("FAIL: tag not found at addr(%p)\n", ptr);
+ mte_free_memory((void *)ptr, size, mem_type, false);
+ return KSFT_FAIL;
+ }
+
+ return KSFT_PASS;
+}
+
+static inline int check_allocated_memory_range(void *ptr, size_t size, int mem_type,
+ size_t range_before, size_t range_after)
+{
+ if (ptr == NULL) {
+ ksft_print_msg("FAIL: memory allocation\n");
+ return KSFT_FAIL;
+ }
+
+ if (!MT_FETCH_TAG((uintptr_t)ptr)) {
+ ksft_print_msg("FAIL: tag not found at addr(%p)\n", ptr);
+ mte_free_memory_tag_range((void *)ptr, size, mem_type, range_before,
+ range_after);
+ return KSFT_FAIL;
+ }
+ return KSFT_PASS;
+}
+
+#endif /* _MTE_COMMON_UTIL_H */
diff --git a/tools/testing/selftests/arm64/mte/mte_def.h b/tools/testing/selftests/arm64/mte/mte_def.h
new file mode 100644
index 000000000000..9b188254b61a
--- /dev/null
+++ b/tools/testing/selftests/arm64/mte/mte_def.h
@@ -0,0 +1,60 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (C) 2020 ARM Limited */
+
+/*
+ * Below definitions may be found in kernel headers, However, they are
+ * redefined here to decouple the MTE selftests compilations from them.
+ */
+#ifndef SEGV_MTEAERR
+#define SEGV_MTEAERR 8
+#endif
+#ifndef SEGV_MTESERR
+#define SEGV_MTESERR 9
+#endif
+#ifndef PROT_MTE
+#define PROT_MTE 0x20
+#endif
+#ifndef HWCAP2_MTE
+#define HWCAP2_MTE (1 << 18)
+#endif
+
+#ifndef PR_MTE_TCF_SHIFT
+#define PR_MTE_TCF_SHIFT 1
+#endif
+#ifndef PR_MTE_TCF_NONE
+#define PR_MTE_TCF_NONE (0UL << PR_MTE_TCF_SHIFT)
+#endif
+#ifndef PR_MTE_TCF_SYNC
+#define PR_MTE_TCF_SYNC (1UL << PR_MTE_TCF_SHIFT)
+#endif
+#ifndef PR_MTE_TCF_ASYNC
+#define PR_MTE_TCF_ASYNC (2UL << PR_MTE_TCF_SHIFT)
+#endif
+#ifndef PR_MTE_TAG_SHIFT
+#define PR_MTE_TAG_SHIFT 3
+#endif
+
+/* MTE Hardware feature definitions below. */
+#define MT_TAG_SHIFT 56
+#define MT_TAG_MASK 0xFUL
+#define MT_FREE_TAG 0x0UL
+#define MT_GRANULE_SIZE 16
+#define MT_TAG_COUNT 16
+#define MT_INCLUDE_TAG_MASK 0xFFFF
+#define MT_EXCLUDE_TAG_MASK 0x0
+
+#define MT_ALIGN_GRANULE (MT_GRANULE_SIZE - 1)
+#define MT_CLEAR_TAG(x) ((x) & ~(MT_TAG_MASK << MT_TAG_SHIFT))
+#define MT_SET_TAG(x, y) ((x) | (y << MT_TAG_SHIFT))
+#define MT_FETCH_TAG(x) ((x >> MT_TAG_SHIFT) & (MT_TAG_MASK))
+#define MT_ALIGN_UP(x) ((x + MT_ALIGN_GRANULE) & ~(MT_ALIGN_GRANULE))
+
+#define MT_PSTATE_TCO_SHIFT 25
+#define MT_PSTATE_TCO_MASK ~(0x1 << MT_PSTATE_TCO_SHIFT)
+#define MT_PSTATE_TCO_EN 1
+#define MT_PSTATE_TCO_DIS 0
+
+#define MT_EXCLUDE_TAG(x) (1 << (x))
+#define MT_INCLUDE_VALID_TAG(x) (MT_INCLUDE_TAG_MASK ^ MT_EXCLUDE_TAG(x))
+#define MT_INCLUDE_VALID_TAGS(x) (MT_INCLUDE_TAG_MASK ^ (x))
+#define MTE_ALLOW_NON_ZERO_TAG MT_INCLUDE_VALID_TAG(0)
diff --git a/tools/testing/selftests/arm64/mte/mte_helper.S b/tools/testing/selftests/arm64/mte/mte_helper.S
new file mode 100644
index 000000000000..a02c04cd0aac
--- /dev/null
+++ b/tools/testing/selftests/arm64/mte/mte_helper.S
@@ -0,0 +1,128 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (C) 2020 ARM Limited */
+
+#include "mte_def.h"
+
+#define ENTRY(name) \
+ .globl name ;\
+ .p2align 2;\
+ .type name, @function ;\
+name:
+
+#define ENDPROC(name) \
+ .size name, .-name ;
+
+ .text
+/*
+ * mte_insert_random_tag: Insert random tag and might be same as the source tag if
+ * the source pointer has it.
+ * Input:
+ * x0 - source pointer with a tag/no-tag
+ * Return:
+ * x0 - pointer with random tag
+ */
+ENTRY(mte_insert_random_tag)
+ irg x0, x0, xzr
+ ret
+ENDPROC(mte_insert_random_tag)
+
+/*
+ * mte_insert_new_tag: Insert new tag and different from the source tag if
+ * source pointer has it.
+ * Input:
+ * x0 - source pointer with a tag/no-tag
+ * Return:
+ * x0 - pointer with random tag
+ */
+ENTRY(mte_insert_new_tag)
+ gmi x1, x0, xzr
+ irg x0, x0, x1
+ ret
+ENDPROC(mte_insert_new_tag)
+
+/*
+ * mte_get_tag_address: Get the tag from given address.
+ * Input:
+ * x0 - source pointer
+ * Return:
+ * x0 - pointer with appended tag
+ */
+ENTRY(mte_get_tag_address)
+ ldg x0, [x0]
+ ret
+ENDPROC(mte_get_tag_address)
+
+/*
+ * mte_set_tag_address_range: Set the tag range from the given address
+ * Input:
+ * x0 - source pointer with tag data
+ * x1 - range
+ * Return:
+ * none
+ */
+ENTRY(mte_set_tag_address_range)
+ cbz x1, 2f
+1:
+ stg x0, [x0, #0x0]
+ add x0, x0, #MT_GRANULE_SIZE
+ sub x1, x1, #MT_GRANULE_SIZE
+ cbnz x1, 1b
+2:
+ ret
+ENDPROC(mte_set_tag_address_range)
+
+/*
+ * mt_clear_tag_address_range: Clear the tag range from the given address
+ * Input:
+ * x0 - source pointer with tag data
+ * x1 - range
+ * Return:
+ * none
+ */
+ENTRY(mte_clear_tag_address_range)
+ cbz x1, 2f
+1:
+ stzg x0, [x0, #0x0]
+ add x0, x0, #MT_GRANULE_SIZE
+ sub x1, x1, #MT_GRANULE_SIZE
+ cbnz x1, 1b
+2:
+ ret
+ENDPROC(mte_clear_tag_address_range)
+
+/*
+ * mte_enable_pstate_tco: Enable PSTATE.TCO (tag check override) field
+ * Input:
+ * none
+ * Return:
+ * none
+ */
+ENTRY(mte_enable_pstate_tco)
+ msr tco, #MT_PSTATE_TCO_EN
+ ret
+ENDPROC(mte_enable_pstate_tco)
+
+/*
+ * mte_disable_pstate_tco: Disable PSTATE.TCO (tag check override) field
+ * Input:
+ * none
+ * Return:
+ * none
+ */
+ENTRY(mte_disable_pstate_tco)
+ msr tco, #MT_PSTATE_TCO_DIS
+ ret
+ENDPROC(mte_disable_pstate_tco)
+
+/*
+ * mte_get_pstate_tco: Get PSTATE.TCO (tag check override) field
+ * Input:
+ * none
+ * Return:
+ * x0
+ */
+ENTRY(mte_get_pstate_tco)
+ mrs x0, tco
+ ubfx x0, x0, #MT_PSTATE_TCO_SHIFT, #1
+ ret
+ENDPROC(mte_get_pstate_tco)
diff --git a/tools/testing/selftests/arm64/pauth/.gitignore b/tools/testing/selftests/arm64/pauth/.gitignore
new file mode 100644
index 000000000000..155137d92722
--- /dev/null
+++ b/tools/testing/selftests/arm64/pauth/.gitignore
@@ -0,0 +1,2 @@
+exec_target
+pac
diff --git a/tools/testing/selftests/arm64/pauth/Makefile b/tools/testing/selftests/arm64/pauth/Makefile
new file mode 100644
index 000000000000..72e290b0b10c
--- /dev/null
+++ b/tools/testing/selftests/arm64/pauth/Makefile
@@ -0,0 +1,39 @@
+# SPDX-License-Identifier: GPL-2.0
+# Copyright (C) 2020 ARM Limited
+
+# preserve CC value from top level Makefile
+ifeq ($(CC),cc)
+CC := $(CROSS_COMPILE)gcc
+endif
+
+CFLAGS += -mbranch-protection=pac-ret
+# check if the compiler supports ARMv8.3 and branch protection with PAuth
+pauth_cc_support := $(shell if ($(CC) $(CFLAGS) -march=armv8.3-a -E -x c /dev/null -o /dev/null 2>&1) then echo "1"; fi)
+
+ifeq ($(pauth_cc_support),1)
+TEST_GEN_PROGS := pac
+TEST_GEN_FILES := pac_corruptor.o helper.o
+TEST_GEN_PROGS_EXTENDED := exec_target
+endif
+
+include ../../lib.mk
+
+ifeq ($(pauth_cc_support),1)
+# pac* and aut* instructions are not available on architectures berfore
+# ARMv8.3. Therefore target ARMv8.3 wherever they are used directly
+$(OUTPUT)/pac_corruptor.o: pac_corruptor.S
+ $(CC) -c $^ -o $@ $(CFLAGS) -march=armv8.3-a
+
+$(OUTPUT)/helper.o: helper.c
+ $(CC) -c $^ -o $@ $(CFLAGS) -march=armv8.3-a
+
+# when -mbranch-protection is enabled and the target architecture is ARMv8.3 or
+# greater, gcc emits pac* instructions which are not in HINT NOP space,
+# preventing the tests from occurring at all. Compile for ARMv8.2 so tests can
+# run on earlier targets and print a meaningful error messages
+$(OUTPUT)/exec_target: exec_target.c $(OUTPUT)/helper.o
+ $(CC) $^ -o $@ $(CFLAGS) -march=armv8.2-a
+
+$(OUTPUT)/pac: pac.c $(OUTPUT)/pac_corruptor.o $(OUTPUT)/helper.o
+ $(CC) $^ -o $@ $(CFLAGS) -march=armv8.2-a
+endif
diff --git a/tools/testing/selftests/arm64/pauth/exec_target.c b/tools/testing/selftests/arm64/pauth/exec_target.c
new file mode 100644
index 000000000000..4435600ca400
--- /dev/null
+++ b/tools/testing/selftests/arm64/pauth/exec_target.c
@@ -0,0 +1,34 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (C) 2020 ARM Limited
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/auxv.h>
+
+#include "helper.h"
+
+int main(void)
+{
+ struct signatures signed_vals;
+ unsigned long hwcaps;
+ size_t val;
+
+ fread(&val, sizeof(size_t), 1, stdin);
+
+ /* don't try to execute illegal (unimplemented) instructions) caller
+ * should have checked this and keep worker simple
+ */
+ hwcaps = getauxval(AT_HWCAP);
+
+ if (hwcaps & HWCAP_PACA) {
+ signed_vals.keyia = keyia_sign(val);
+ signed_vals.keyib = keyib_sign(val);
+ signed_vals.keyda = keyda_sign(val);
+ signed_vals.keydb = keydb_sign(val);
+ }
+ signed_vals.keyg = (hwcaps & HWCAP_PACG) ? keyg_sign(val) : 0;
+
+ fwrite(&signed_vals, sizeof(struct signatures), 1, stdout);
+
+ return 0;
+}
diff --git a/tools/testing/selftests/arm64/pauth/helper.c b/tools/testing/selftests/arm64/pauth/helper.c
new file mode 100644
index 000000000000..2c201e7d0d50
--- /dev/null
+++ b/tools/testing/selftests/arm64/pauth/helper.c
@@ -0,0 +1,39 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (C) 2020 ARM Limited
+
+#include "helper.h"
+
+size_t keyia_sign(size_t ptr)
+{
+ asm volatile("paciza %0" : "+r" (ptr));
+ return ptr;
+}
+
+size_t keyib_sign(size_t ptr)
+{
+ asm volatile("pacizb %0" : "+r" (ptr));
+ return ptr;
+}
+
+size_t keyda_sign(size_t ptr)
+{
+ asm volatile("pacdza %0" : "+r" (ptr));
+ return ptr;
+}
+
+size_t keydb_sign(size_t ptr)
+{
+ asm volatile("pacdzb %0" : "+r" (ptr));
+ return ptr;
+}
+
+size_t keyg_sign(size_t ptr)
+{
+ /* output is encoded in the upper 32 bits */
+ size_t dest = 0;
+ size_t modifier = 0;
+
+ asm volatile("pacga %0, %1, %2" : "=r" (dest) : "r" (ptr), "r" (modifier));
+
+ return dest;
+}
diff --git a/tools/testing/selftests/arm64/pauth/helper.h b/tools/testing/selftests/arm64/pauth/helper.h
new file mode 100644
index 000000000000..652496c7b411
--- /dev/null
+++ b/tools/testing/selftests/arm64/pauth/helper.h
@@ -0,0 +1,28 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (C) 2020 ARM Limited */
+
+#ifndef _HELPER_H_
+#define _HELPER_H_
+
+#include <stdlib.h>
+
+#define NKEYS 5
+
+struct signatures {
+ size_t keyia;
+ size_t keyib;
+ size_t keyda;
+ size_t keydb;
+ size_t keyg;
+};
+
+void pac_corruptor(void);
+
+/* PAuth sign a value with key ia and modifier value 0 */
+size_t keyia_sign(size_t val);
+size_t keyib_sign(size_t val);
+size_t keyda_sign(size_t val);
+size_t keydb_sign(size_t val);
+size_t keyg_sign(size_t val);
+
+#endif
diff --git a/tools/testing/selftests/arm64/pauth/pac.c b/tools/testing/selftests/arm64/pauth/pac.c
new file mode 100644
index 000000000000..592fe538506e
--- /dev/null
+++ b/tools/testing/selftests/arm64/pauth/pac.c
@@ -0,0 +1,368 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (C) 2020 ARM Limited
+
+#define _GNU_SOURCE
+
+#include <sys/auxv.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <signal.h>
+#include <setjmp.h>
+#include <sched.h>
+
+#include "../../kselftest_harness.h"
+#include "helper.h"
+
+#define PAC_COLLISION_ATTEMPTS 10
+/*
+ * The kernel sets TBID by default. So bits 55 and above should remain
+ * untouched no matter what.
+ * The VA space size is 48 bits. Bigger is opt-in.
+ */
+#define PAC_MASK (~0xff80ffffffffffff)
+#define ARBITRARY_VALUE (0x1234)
+#define ASSERT_PAUTH_ENABLED() \
+do { \
+ unsigned long hwcaps = getauxval(AT_HWCAP); \
+ /* data key instructions are not in NOP space. This prevents a SIGILL */ \
+ ASSERT_NE(0, hwcaps & HWCAP_PACA) TH_LOG("PAUTH not enabled"); \
+} while (0)
+#define ASSERT_GENERIC_PAUTH_ENABLED() \
+do { \
+ unsigned long hwcaps = getauxval(AT_HWCAP); \
+ /* generic key instructions are not in NOP space. This prevents a SIGILL */ \
+ ASSERT_NE(0, hwcaps & HWCAP_PACG) TH_LOG("Generic PAUTH not enabled"); \
+} while (0)
+
+void sign_specific(struct signatures *sign, size_t val)
+{
+ sign->keyia = keyia_sign(val);
+ sign->keyib = keyib_sign(val);
+ sign->keyda = keyda_sign(val);
+ sign->keydb = keydb_sign(val);
+}
+
+void sign_all(struct signatures *sign, size_t val)
+{
+ sign->keyia = keyia_sign(val);
+ sign->keyib = keyib_sign(val);
+ sign->keyda = keyda_sign(val);
+ sign->keydb = keydb_sign(val);
+ sign->keyg = keyg_sign(val);
+}
+
+int n_same(struct signatures *old, struct signatures *new, int nkeys)
+{
+ int res = 0;
+
+ res += old->keyia == new->keyia;
+ res += old->keyib == new->keyib;
+ res += old->keyda == new->keyda;
+ res += old->keydb == new->keydb;
+ if (nkeys == NKEYS)
+ res += old->keyg == new->keyg;
+
+ return res;
+}
+
+int n_same_single_set(struct signatures *sign, int nkeys)
+{
+ size_t vals[nkeys];
+ int same = 0;
+
+ vals[0] = sign->keyia & PAC_MASK;
+ vals[1] = sign->keyib & PAC_MASK;
+ vals[2] = sign->keyda & PAC_MASK;
+ vals[3] = sign->keydb & PAC_MASK;
+
+ if (nkeys >= 4)
+ vals[4] = sign->keyg & PAC_MASK;
+
+ for (int i = 0; i < nkeys - 1; i++) {
+ for (int j = i + 1; j < nkeys; j++) {
+ if (vals[i] == vals[j])
+ same += 1;
+ }
+ }
+ return same;
+}
+
+int exec_sign_all(struct signatures *signed_vals, size_t val)
+{
+ int new_stdin[2];
+ int new_stdout[2];
+ int status;
+ int i;
+ ssize_t ret;
+ pid_t pid;
+ cpu_set_t mask;
+
+ ret = pipe(new_stdin);
+ if (ret == -1) {
+ perror("pipe returned error");
+ return -1;
+ }
+
+ ret = pipe(new_stdout);
+ if (ret == -1) {
+ perror("pipe returned error");
+ return -1;
+ }
+
+ /*
+ * pin this process and all its children to a single CPU, so it can also
+ * guarantee a context switch with its child
+ */
+ sched_getaffinity(0, sizeof(mask), &mask);
+
+ for (i = 0; i < sizeof(cpu_set_t); i++)
+ if (CPU_ISSET(i, &mask))
+ break;
+
+ CPU_ZERO(&mask);
+ CPU_SET(i, &mask);
+ sched_setaffinity(0, sizeof(mask), &mask);
+
+ pid = fork();
+ // child
+ if (pid == 0) {
+ dup2(new_stdin[0], STDIN_FILENO);
+ if (ret == -1) {
+ perror("dup2 returned error");
+ exit(1);
+ }
+
+ dup2(new_stdout[1], STDOUT_FILENO);
+ if (ret == -1) {
+ perror("dup2 returned error");
+ exit(1);
+ }
+
+ close(new_stdin[0]);
+ close(new_stdin[1]);
+ close(new_stdout[0]);
+ close(new_stdout[1]);
+
+ ret = execl("exec_target", "exec_target", (char *)NULL);
+ if (ret == -1) {
+ perror("exec returned error");
+ exit(1);
+ }
+ }
+
+ close(new_stdin[0]);
+ close(new_stdout[1]);
+
+ ret = write(new_stdin[1], &val, sizeof(size_t));
+ if (ret == -1) {
+ perror("write returned error");
+ return -1;
+ }
+
+ /*
+ * wait for the worker to finish, so that read() reads all data
+ * will also context switch with worker so that this function can be used
+ * for context switch tests
+ */
+ waitpid(pid, &status, 0);
+ if (WIFEXITED(status) == 0) {
+ fprintf(stderr, "worker exited unexpectedly\n");
+ return -1;
+ }
+ if (WEXITSTATUS(status) != 0) {
+ fprintf(stderr, "worker exited with error\n");
+ return -1;
+ }
+
+ ret = read(new_stdout[0], signed_vals, sizeof(struct signatures));
+ if (ret == -1) {
+ perror("read returned error");
+ return -1;
+ }
+
+ return 0;
+}
+
+sigjmp_buf jmpbuf;
+void pac_signal_handler(int signum, siginfo_t *si, void *uc)
+{
+ if (signum == SIGSEGV || signum == SIGILL)
+ siglongjmp(jmpbuf, 1);
+}
+
+/* check that a corrupted PAC results in SIGSEGV or SIGILL */
+TEST(corrupt_pac)
+{
+ struct sigaction sa;
+
+ ASSERT_PAUTH_ENABLED();
+ if (sigsetjmp(jmpbuf, 1) == 0) {
+ sa.sa_sigaction = pac_signal_handler;
+ sa.sa_flags = SA_SIGINFO | SA_RESETHAND;
+ sigemptyset(&sa.sa_mask);
+
+ sigaction(SIGSEGV, &sa, NULL);
+ sigaction(SIGILL, &sa, NULL);
+
+ pac_corruptor();
+ ASSERT_TRUE(0) TH_LOG("SIGSEGV/SIGILL signal did not occur");
+ }
+}
+
+/*
+ * There are no separate pac* and aut* controls so checking only the pac*
+ * instructions is sufficient
+ */
+TEST(pac_instructions_not_nop)
+{
+ size_t keyia = 0;
+ size_t keyib = 0;
+ size_t keyda = 0;
+ size_t keydb = 0;
+
+ ASSERT_PAUTH_ENABLED();
+
+ for (int i = 0; i < PAC_COLLISION_ATTEMPTS; i++) {
+ keyia |= keyia_sign(i) & PAC_MASK;
+ keyib |= keyib_sign(i) & PAC_MASK;
+ keyda |= keyda_sign(i) & PAC_MASK;
+ keydb |= keydb_sign(i) & PAC_MASK;
+ }
+
+ ASSERT_NE(0, keyia) TH_LOG("keyia instructions did nothing");
+ ASSERT_NE(0, keyib) TH_LOG("keyib instructions did nothing");
+ ASSERT_NE(0, keyda) TH_LOG("keyda instructions did nothing");
+ ASSERT_NE(0, keydb) TH_LOG("keydb instructions did nothing");
+}
+
+TEST(pac_instructions_not_nop_generic)
+{
+ size_t keyg = 0;
+
+ ASSERT_GENERIC_PAUTH_ENABLED();
+
+ for (int i = 0; i < PAC_COLLISION_ATTEMPTS; i++)
+ keyg |= keyg_sign(i) & PAC_MASK;
+
+ ASSERT_NE(0, keyg) TH_LOG("keyg instructions did nothing");
+}
+
+TEST(single_thread_different_keys)
+{
+ int same = 10;
+ int nkeys = NKEYS;
+ int tmp;
+ struct signatures signed_vals;
+ unsigned long hwcaps = getauxval(AT_HWCAP);
+
+ /* generic and data key instructions are not in NOP space. This prevents a SIGILL */
+ ASSERT_NE(0, hwcaps & HWCAP_PACA) TH_LOG("PAUTH not enabled");
+ if (!(hwcaps & HWCAP_PACG)) {
+ TH_LOG("WARNING: Generic PAUTH not enabled. Skipping generic key checks");
+ nkeys = NKEYS - 1;
+ }
+
+ /*
+ * In Linux the PAC field can be up to 7 bits wide. Even if keys are
+ * different, there is about 5% chance for PACs to collide with
+ * different addresses. This chance rapidly increases with fewer bits
+ * allocated for the PAC (e.g. wider address). A comparison of the keys
+ * directly will be more reliable.
+ * All signed values need to be different at least once out of n
+ * attempts to be certain that the keys are different
+ */
+ for (int i = 0; i < PAC_COLLISION_ATTEMPTS; i++) {
+ if (nkeys == NKEYS)
+ sign_all(&signed_vals, i);
+ else
+ sign_specific(&signed_vals, i);
+
+ tmp = n_same_single_set(&signed_vals, nkeys);
+ if (tmp < same)
+ same = tmp;
+ }
+
+ ASSERT_EQ(0, same) TH_LOG("%d keys clashed every time", same);
+}
+
+/*
+ * fork() does not change keys. Only exec() does so call a worker program.
+ * Its only job is to sign a value and report back the resutls
+ */
+TEST(exec_changed_keys)
+{
+ struct signatures new_keys;
+ struct signatures old_keys;
+ int ret;
+ int same = 10;
+ int nkeys = NKEYS;
+ unsigned long hwcaps = getauxval(AT_HWCAP);
+
+ /* generic and data key instructions are not in NOP space. This prevents a SIGILL */
+ ASSERT_NE(0, hwcaps & HWCAP_PACA) TH_LOG("PAUTH not enabled");
+ if (!(hwcaps & HWCAP_PACG)) {
+ TH_LOG("WARNING: Generic PAUTH not enabled. Skipping generic key checks");
+ nkeys = NKEYS - 1;
+ }
+
+ for (int i = 0; i < PAC_COLLISION_ATTEMPTS; i++) {
+ ret = exec_sign_all(&new_keys, i);
+ ASSERT_EQ(0, ret) TH_LOG("failed to run worker");
+
+ if (nkeys == NKEYS)
+ sign_all(&old_keys, i);
+ else
+ sign_specific(&old_keys, i);
+
+ ret = n_same(&old_keys, &new_keys, nkeys);
+ if (ret < same)
+ same = ret;
+ }
+
+ ASSERT_EQ(0, same) TH_LOG("exec() did not change %d keys", same);
+}
+
+TEST(context_switch_keep_keys)
+{
+ int ret;
+ struct signatures trash;
+ struct signatures before;
+ struct signatures after;
+
+ ASSERT_PAUTH_ENABLED();
+
+ sign_specific(&before, ARBITRARY_VALUE);
+
+ /* will context switch with a process with different keys at least once */
+ ret = exec_sign_all(&trash, ARBITRARY_VALUE);
+ ASSERT_EQ(0, ret) TH_LOG("failed to run worker");
+
+ sign_specific(&after, ARBITRARY_VALUE);
+
+ ASSERT_EQ(before.keyia, after.keyia) TH_LOG("keyia changed after context switching");
+ ASSERT_EQ(before.keyib, after.keyib) TH_LOG("keyib changed after context switching");
+ ASSERT_EQ(before.keyda, after.keyda) TH_LOG("keyda changed after context switching");
+ ASSERT_EQ(before.keydb, after.keydb) TH_LOG("keydb changed after context switching");
+}
+
+TEST(context_switch_keep_keys_generic)
+{
+ int ret;
+ struct signatures trash;
+ size_t before;
+ size_t after;
+
+ ASSERT_GENERIC_PAUTH_ENABLED();
+
+ before = keyg_sign(ARBITRARY_VALUE);
+
+ /* will context switch with a process with different keys at least once */
+ ret = exec_sign_all(&trash, ARBITRARY_VALUE);
+ ASSERT_EQ(0, ret) TH_LOG("failed to run worker");
+
+ after = keyg_sign(ARBITRARY_VALUE);
+
+ ASSERT_EQ(before, after) TH_LOG("keyg changed after context switching");
+}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/arm64/pauth/pac_corruptor.S b/tools/testing/selftests/arm64/pauth/pac_corruptor.S
new file mode 100644
index 000000000000..aa6588050752
--- /dev/null
+++ b/tools/testing/selftests/arm64/pauth/pac_corruptor.S
@@ -0,0 +1,19 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (C) 2020 ARM Limited */
+
+.global pac_corruptor
+
+.text
+/*
+ * Corrupting a single bit of the PAC ensures the authentication will fail. It
+ * also guarantees no possible collision. TCR_EL1.TBI0 is set by default so no
+ * top byte PAC is tested
+ */
+ pac_corruptor:
+ paciasp
+
+ /* corrupt the top bit of the PAC */
+ eor lr, lr, #1 << 53
+
+ autiasp
+ ret
diff --git a/tools/testing/selftests/bpf/.gitignore b/tools/testing/selftests/bpf/.gitignore
index 1bb204cee853..9a0946ddb705 100644
--- a/tools/testing/selftests/bpf/.gitignore
+++ b/tools/testing/selftests/bpf/.gitignore
@@ -6,7 +6,6 @@ test_lpm_map
test_tag
FEATURE-DUMP.libbpf
fixdep
-test_align
test_dev_cgroup
/test_progs*
test_tcpbpf_user
diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index e7a8cf83ba48..fc946b7ac288 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -32,7 +32,7 @@ LDLIBS += -lcap -lelf -lz -lrt -lpthread
# Order correspond to 'make run_tests' order
TEST_GEN_PROGS = test_verifier test_tag test_maps test_lru_map test_lpm_map test_progs \
- test_align test_verifier_log test_dev_cgroup test_tcpbpf_user \
+ test_verifier_log test_dev_cgroup test_tcpbpf_user \
test_sock test_btf test_sockmap get_cgroup_id_user test_socket_cookie \
test_cgroup_storage \
test_netcnt test_tcpnotify_user test_sock_fields test_sysctl \
@@ -102,7 +102,7 @@ endif
OVERRIDE_TARGETS := 1
override define CLEAN
$(call msg,CLEAN)
- $(RM) -r $(TEST_GEN_PROGS) $(TEST_GEN_PROGS_EXTENDED) $(TEST_GEN_FILES) $(EXTRA_CLEAN)
+ $(Q)$(RM) -r $(TEST_GEN_PROGS) $(TEST_GEN_PROGS_EXTENDED) $(TEST_GEN_FILES) $(EXTRA_CLEAN)
endef
include ../lib.mk
@@ -123,17 +123,21 @@ $(notdir $(TEST_GEN_PROGS) \
$(TEST_GEN_PROGS_EXTENDED) \
$(TEST_CUSTOM_PROGS)): %: $(OUTPUT)/% ;
+$(OUTPUT)/%.o: %.c
+ $(call msg,CC,,$@)
+ $(Q)$(CC) $(CFLAGS) -c $(filter %.c,$^) $(LDLIBS) -o $@
+
$(OUTPUT)/%:%.c
$(call msg,BINARY,,$@)
- $(LINK.c) $^ $(LDLIBS) -o $@
+ $(Q)$(LINK.c) $^ $(LDLIBS) -o $@
$(OUTPUT)/urandom_read: urandom_read.c
$(call msg,BINARY,,$@)
- $(CC) $(LDFLAGS) -o $@ $< $(LDLIBS) -Wl,--build-id
+ $(Q)$(CC) $(LDFLAGS) -o $@ $< $(LDLIBS) -Wl,--build-id
$(OUTPUT)/test_stub.o: test_stub.c $(BPFOBJ)
$(call msg,CC,,$@)
- $(CC) -c $(CFLAGS) -o $@ $<
+ $(Q)$(CC) -c $(CFLAGS) -o $@ $<
VMLINUX_BTF_PATHS ?= $(if $(O),$(O)/vmlinux) \
$(if $(KBUILD_OUTPUT),$(KBUILD_OUTPUT)/vmlinux) \
@@ -142,7 +146,9 @@ VMLINUX_BTF_PATHS ?= $(if $(O),$(O)/vmlinux) \
/boot/vmlinux-$(shell uname -r)
VMLINUX_BTF ?= $(abspath $(firstword $(wildcard $(VMLINUX_BTF_PATHS))))
-$(OUTPUT)/runqslower: $(BPFOBJ)
+DEFAULT_BPFTOOL := $(SCRATCH_DIR)/sbin/bpftool
+
+$(OUTPUT)/runqslower: $(BPFOBJ) | $(DEFAULT_BPFTOOL)
$(Q)$(MAKE) $(submake_extras) -C $(TOOLSDIR)/bpf/runqslower \
OUTPUT=$(SCRATCH_DIR)/ VMLINUX_BTF=$(VMLINUX_BTF) \
BPFOBJ=$(BPFOBJ) BPF_INCLUDE=$(INCLUDE_DIR) && \
@@ -164,7 +170,6 @@ $(OUTPUT)/test_netcnt: cgroup_helpers.c
$(OUTPUT)/test_sock_fields: cgroup_helpers.c
$(OUTPUT)/test_sysctl: cgroup_helpers.c
-DEFAULT_BPFTOOL := $(SCRATCH_DIR)/sbin/bpftool
BPFTOOL ?= $(DEFAULT_BPFTOOL)
$(DEFAULT_BPFTOOL): $(wildcard $(BPFTOOLDIR)/*.[ch] $(BPFTOOLDIR)/Makefile) \
$(BPFOBJ) | $(BUILD_DIR)/bpftool
@@ -180,15 +185,15 @@ $(BPFOBJ): $(wildcard $(BPFDIR)/*.[ch] $(BPFDIR)/Makefile) \
$(BUILD_DIR)/libbpf $(BUILD_DIR)/bpftool $(BUILD_DIR)/resolve_btfids $(INCLUDE_DIR):
$(call msg,MKDIR,,$@)
- mkdir -p $@
+ $(Q)mkdir -p $@
$(INCLUDE_DIR)/vmlinux.h: $(VMLINUX_BTF) | $(BPFTOOL) $(INCLUDE_DIR)
ifeq ($(VMLINUX_H),)
$(call msg,GEN,,$@)
- $(BPFTOOL) btf dump file $(VMLINUX_BTF) format c > $@
+ $(Q)$(BPFTOOL) btf dump file $(VMLINUX_BTF) format c > $@
else
$(call msg,CP,,$@)
- cp "$(VMLINUX_H)" $@
+ $(Q)cp "$(VMLINUX_H)" $@
endif
$(RESOLVE_BTFIDS): $(BPFOBJ) | $(BUILD_DIR)/resolve_btfids \
@@ -237,28 +242,28 @@ $(OUTPUT)/flow_dissector_load.o: flow_dissector_load.h
# $4 - LDFLAGS
define CLANG_BPF_BUILD_RULE
$(call msg,CLNG-LLC,$(TRUNNER_BINARY),$2)
- ($(CLANG) $3 -O2 -target bpf -emit-llvm \
+ $(Q)($(CLANG) $3 -O2 -target bpf -emit-llvm \
-c $1 -o - || echo "BPF obj compilation failed") | \
$(LLC) -mattr=dwarfris -march=bpf -mcpu=v3 $4 -filetype=obj -o $2
endef
# Similar to CLANG_BPF_BUILD_RULE, but with disabled alu32
define CLANG_NOALU32_BPF_BUILD_RULE
$(call msg,CLNG-LLC,$(TRUNNER_BINARY),$2)
- ($(CLANG) $3 -O2 -target bpf -emit-llvm \
+ $(Q)($(CLANG) $3 -O2 -target bpf -emit-llvm \
-c $1 -o - || echo "BPF obj compilation failed") | \
$(LLC) -march=bpf -mcpu=v2 $4 -filetype=obj -o $2
endef
# Similar to CLANG_BPF_BUILD_RULE, but using native Clang and bpf LLC
define CLANG_NATIVE_BPF_BUILD_RULE
$(call msg,CLNG-BPF,$(TRUNNER_BINARY),$2)
- ($(CLANG) $3 -O2 -emit-llvm \
+ $(Q)($(CLANG) $3 -O2 -emit-llvm \
-c $1 -o - || echo "BPF obj compilation failed") | \
$(LLC) -march=bpf -mcpu=v3 $4 -filetype=obj -o $2
endef
# Build BPF object using GCC
define GCC_BPF_BUILD_RULE
$(call msg,GCC-BPF,$(TRUNNER_BINARY),$2)
- $(BPF_GCC) $3 $4 -O2 -c $1 -o $2
+ $(Q)$(BPF_GCC) $3 $4 -O2 -c $1 -o $2
endef
SKEL_BLACKLIST := btf__% test_pinning_invalid.c test_sk_assign.c
@@ -300,7 +305,7 @@ ifeq ($($(TRUNNER_OUTPUT)-dir),)
$(TRUNNER_OUTPUT)-dir := y
$(TRUNNER_OUTPUT):
$$(call msg,MKDIR,,$$@)
- mkdir -p $$@
+ $(Q)mkdir -p $$@
endif
# ensure we set up BPF objects generation rule just once for a given
@@ -320,7 +325,7 @@ $(TRUNNER_BPF_SKELS): $(TRUNNER_OUTPUT)/%.skel.h: \
$(TRUNNER_OUTPUT)/%.o \
| $(BPFTOOL) $(TRUNNER_OUTPUT)
$$(call msg,GEN-SKEL,$(TRUNNER_BINARY),$$@)
- $$(BPFTOOL) gen skeleton $$< > $$@
+ $(Q)$$(BPFTOOL) gen skeleton $$< > $$@
endif
# ensure we set up tests.h header generation rule just once
@@ -344,7 +349,7 @@ $(TRUNNER_TEST_OBJS): $(TRUNNER_OUTPUT)/%.test.o: \
$(TRUNNER_BPF_SKELS) \
$$(BPFOBJ) | $(TRUNNER_OUTPUT)
$$(call msg,TEST-OBJ,$(TRUNNER_BINARY),$$@)
- cd $$(@D) && $$(CC) -I. $$(CFLAGS) -c $(CURDIR)/$$< $$(LDLIBS) -o $$(@F)
+ $(Q)cd $$(@D) && $$(CC) -I. $$(CFLAGS) -c $(CURDIR)/$$< $$(LDLIBS) -o $$(@F)
$(TRUNNER_EXTRA_OBJS): $(TRUNNER_OUTPUT)/%.o: \
%.c \
@@ -352,13 +357,13 @@ $(TRUNNER_EXTRA_OBJS): $(TRUNNER_OUTPUT)/%.o: \
$(TRUNNER_TESTS_HDR) \
$$(BPFOBJ) | $(TRUNNER_OUTPUT)
$$(call msg,EXT-OBJ,$(TRUNNER_BINARY),$$@)
- $$(CC) $$(CFLAGS) -c $$< $$(LDLIBS) -o $$@
+ $(Q)$$(CC) $$(CFLAGS) -c $$< $$(LDLIBS) -o $$@
# only copy extra resources if in flavored build
$(TRUNNER_BINARY)-extras: $(TRUNNER_EXTRA_FILES) | $(TRUNNER_OUTPUT)
ifneq ($2,)
$$(call msg,EXT-COPY,$(TRUNNER_BINARY),$(TRUNNER_EXTRA_FILES))
- cp -a $$^ $(TRUNNER_OUTPUT)/
+ $(Q)cp -a $$^ $(TRUNNER_OUTPUT)/
endif
$(OUTPUT)/$(TRUNNER_BINARY): $(TRUNNER_TEST_OBJS) \
@@ -366,8 +371,8 @@ $(OUTPUT)/$(TRUNNER_BINARY): $(TRUNNER_TEST_OBJS) \
$(RESOLVE_BTFIDS) \
| $(TRUNNER_BINARY)-extras
$$(call msg,BINARY,,$$@)
- $$(CC) $$(CFLAGS) $$(filter %.a %.o,$$^) $$(LDLIBS) -o $$@
- $(RESOLVE_BTFIDS) --no-fail --btf btf_data.o $$@
+ $(Q)$$(CC) $$(CFLAGS) $$(filter %.a %.o,$$^) $$(LDLIBS) -o $$@
+ $(Q)$(RESOLVE_BTFIDS) --no-fail --btf btf_data.o $$@
endef
@@ -420,17 +425,17 @@ verifier/tests.h: verifier/*.c
) > verifier/tests.h)
$(OUTPUT)/test_verifier: test_verifier.c verifier/tests.h $(BPFOBJ) | $(OUTPUT)
$(call msg,BINARY,,$@)
- $(CC) $(CFLAGS) $(filter %.a %.o %.c,$^) $(LDLIBS) -o $@
+ $(Q)$(CC) $(CFLAGS) $(filter %.a %.o %.c,$^) $(LDLIBS) -o $@
# Make sure we are able to include and link libbpf against c++.
$(OUTPUT)/test_cpp: test_cpp.cpp $(OUTPUT)/test_core_extern.skel.h $(BPFOBJ)
$(call msg,CXX,,$@)
- $(CXX) $(CFLAGS) $^ $(LDLIBS) -o $@
+ $(Q)$(CXX) $(CFLAGS) $^ $(LDLIBS) -o $@
# Benchmark runner
$(OUTPUT)/bench_%.o: benchs/bench_%.c bench.h
$(call msg,CC,,$@)
- $(CC) $(CFLAGS) -c $(filter %.c,$^) $(LDLIBS) -o $@
+ $(Q)$(CC) $(CFLAGS) -c $(filter %.c,$^) $(LDLIBS) -o $@
$(OUTPUT)/bench_rename.o: $(OUTPUT)/test_overhead.skel.h
$(OUTPUT)/bench_trigger.o: $(OUTPUT)/trigger_bench.skel.h
$(OUTPUT)/bench_ringbufs.o: $(OUTPUT)/ringbuf_bench.skel.h \
@@ -443,7 +448,7 @@ $(OUTPUT)/bench: $(OUTPUT)/bench.o $(OUTPUT)/testing_helpers.o \
$(OUTPUT)/bench_trigger.o \
$(OUTPUT)/bench_ringbufs.o
$(call msg,BINARY,,$@)
- $(CC) $(LDFLAGS) -o $@ $(filter %.a %.o,$^) $(LDLIBS)
+ $(Q)$(CC) $(LDFLAGS) -o $@ $(filter %.a %.o,$^) $(LDLIBS)
EXTRA_CLEAN := $(TEST_CUSTOM_PROGS) $(SCRATCH_DIR) \
prog_tests/tests.h map_tests/tests.h verifier/tests.h \
diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
index 4ffefdc1130f..7375d9a6d242 100644
--- a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
+++ b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
@@ -468,6 +468,7 @@ static void test_bpf_hash_map(void)
DECLARE_LIBBPF_OPTS(bpf_iter_attach_opts, opts);
struct bpf_iter_bpf_hash_map *skel;
int err, i, len, map_fd, iter_fd;
+ union bpf_iter_link_info linfo;
__u64 val, expected_val = 0;
struct bpf_link *link;
struct key_t {
@@ -490,13 +491,16 @@ static void test_bpf_hash_map(void)
goto out;
/* iterator with hashmap2 and hashmap3 should fail */
- opts.map_fd = bpf_map__fd(skel->maps.hashmap2);
+ memset(&linfo, 0, sizeof(linfo));
+ linfo.map.map_fd = bpf_map__fd(skel->maps.hashmap2);
+ opts.link_info = &linfo;
+ opts.link_info_len = sizeof(linfo);
link = bpf_program__attach_iter(skel->progs.dump_bpf_hash_map, &opts);
if (CHECK(!IS_ERR(link), "attach_iter",
"attach_iter for hashmap2 unexpected succeeded\n"))
goto out;
- opts.map_fd = bpf_map__fd(skel->maps.hashmap3);
+ linfo.map.map_fd = bpf_map__fd(skel->maps.hashmap3);
link = bpf_program__attach_iter(skel->progs.dump_bpf_hash_map, &opts);
if (CHECK(!IS_ERR(link), "attach_iter",
"attach_iter for hashmap3 unexpected succeeded\n"))
@@ -519,7 +523,7 @@ static void test_bpf_hash_map(void)
goto out;
}
- opts.map_fd = map_fd;
+ linfo.map.map_fd = map_fd;
link = bpf_program__attach_iter(skel->progs.dump_bpf_hash_map, &opts);
if (CHECK(IS_ERR(link), "attach_iter", "attach_iter failed\n"))
goto out;
@@ -562,6 +566,7 @@ static void test_bpf_percpu_hash_map(void)
DECLARE_LIBBPF_OPTS(bpf_iter_attach_opts, opts);
struct bpf_iter_bpf_percpu_hash_map *skel;
int err, i, j, len, map_fd, iter_fd;
+ union bpf_iter_link_info linfo;
__u32 expected_val = 0;
struct bpf_link *link;
struct key_t {
@@ -606,7 +611,10 @@ static void test_bpf_percpu_hash_map(void)
goto out;
}
- opts.map_fd = map_fd;
+ memset(&linfo, 0, sizeof(linfo));
+ linfo.map.map_fd = map_fd;
+ opts.link_info = &linfo;
+ opts.link_info_len = sizeof(linfo);
link = bpf_program__attach_iter(skel->progs.dump_bpf_percpu_hash_map, &opts);
if (CHECK(IS_ERR(link), "attach_iter", "attach_iter failed\n"))
goto out;
@@ -649,6 +657,7 @@ static void test_bpf_array_map(void)
DECLARE_LIBBPF_OPTS(bpf_iter_attach_opts, opts);
__u32 expected_key = 0, res_first_key;
struct bpf_iter_bpf_array_map *skel;
+ union bpf_iter_link_info linfo;
int err, i, map_fd, iter_fd;
struct bpf_link *link;
char buf[64] = {};
@@ -673,7 +682,10 @@ static void test_bpf_array_map(void)
goto out;
}
- opts.map_fd = map_fd;
+ memset(&linfo, 0, sizeof(linfo));
+ linfo.map.map_fd = map_fd;
+ opts.link_info = &linfo;
+ opts.link_info_len = sizeof(linfo);
link = bpf_program__attach_iter(skel->progs.dump_bpf_array_map, &opts);
if (CHECK(IS_ERR(link), "attach_iter", "attach_iter failed\n"))
goto out;
@@ -730,6 +742,7 @@ static void test_bpf_percpu_array_map(void)
DECLARE_LIBBPF_OPTS(bpf_iter_attach_opts, opts);
struct bpf_iter_bpf_percpu_array_map *skel;
__u32 expected_key = 0, expected_val = 0;
+ union bpf_iter_link_info linfo;
int err, i, j, map_fd, iter_fd;
struct bpf_link *link;
char buf[64];
@@ -765,7 +778,10 @@ static void test_bpf_percpu_array_map(void)
goto out;
}
- opts.map_fd = map_fd;
+ memset(&linfo, 0, sizeof(linfo));
+ linfo.map.map_fd = map_fd;
+ opts.link_info = &linfo;
+ opts.link_info_len = sizeof(linfo);
link = bpf_program__attach_iter(skel->progs.dump_bpf_percpu_array_map, &opts);
if (CHECK(IS_ERR(link), "attach_iter", "attach_iter failed\n"))
goto out;
@@ -803,6 +819,7 @@ static void test_bpf_sk_storage_map(void)
DECLARE_LIBBPF_OPTS(bpf_iter_attach_opts, opts);
int err, i, len, map_fd, iter_fd, num_sockets;
struct bpf_iter_bpf_sk_storage_map *skel;
+ union bpf_iter_link_info linfo;
int sock_fd[3] = {-1, -1, -1};
__u32 val, expected_val = 0;
struct bpf_link *link;
@@ -829,7 +846,10 @@ static void test_bpf_sk_storage_map(void)
goto out;
}
- opts.map_fd = map_fd;
+ memset(&linfo, 0, sizeof(linfo));
+ linfo.map.map_fd = map_fd;
+ opts.link_info = &linfo;
+ opts.link_info_len = sizeof(linfo);
link = bpf_program__attach_iter(skel->progs.dump_bpf_sk_storage_map, &opts);
if (CHECK(IS_ERR(link), "attach_iter", "attach_iter failed\n"))
goto out;
@@ -871,6 +891,7 @@ static void test_rdonly_buf_out_of_bound(void)
{
DECLARE_LIBBPF_OPTS(bpf_iter_attach_opts, opts);
struct bpf_iter_test_kern5 *skel;
+ union bpf_iter_link_info linfo;
struct bpf_link *link;
skel = bpf_iter_test_kern5__open_and_load();
@@ -878,7 +899,10 @@ static void test_rdonly_buf_out_of_bound(void)
"skeleton open_and_load failed\n"))
return;
- opts.map_fd = bpf_map__fd(skel->maps.hashmap1);
+ memset(&linfo, 0, sizeof(linfo));
+ linfo.map.map_fd = bpf_map__fd(skel->maps.hashmap1);
+ opts.link_info = &linfo;
+ opts.link_info_len = sizeof(linfo);
link = bpf_program__attach_iter(skel->progs.dump_bpf_hash_map, &opts);
if (CHECK(!IS_ERR(link), "attach_iter", "unexpected success\n"))
bpf_link__destroy(link);
diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_obj_id.c b/tools/testing/selftests/bpf/prog_tests/bpf_obj_id.c
index 7afa4160416f..284d5921c345 100644
--- a/tools/testing/selftests/bpf/prog_tests/bpf_obj_id.c
+++ b/tools/testing/selftests/bpf/prog_tests/bpf_obj_id.c
@@ -159,15 +159,15 @@ void test_bpf_obj_id(void)
/* Check getting link info */
info_len = sizeof(struct bpf_link_info) * 2;
bzero(&link_infos[i], info_len);
- link_infos[i].raw_tracepoint.tp_name = (__u64)&tp_name;
+ link_infos[i].raw_tracepoint.tp_name = ptr_to_u64(&tp_name);
link_infos[i].raw_tracepoint.tp_name_len = sizeof(tp_name);
err = bpf_obj_get_info_by_fd(bpf_link__fd(links[i]),
&link_infos[i], &info_len);
if (CHECK(err ||
link_infos[i].type != BPF_LINK_TYPE_RAW_TRACEPOINT ||
link_infos[i].prog_id != prog_infos[i].id ||
- link_infos[i].raw_tracepoint.tp_name != (__u64)&tp_name ||
- strcmp((char *)link_infos[i].raw_tracepoint.tp_name,
+ link_infos[i].raw_tracepoint.tp_name != ptr_to_u64(&tp_name) ||
+ strcmp(u64_to_ptr(link_infos[i].raw_tracepoint.tp_name),
"sys_enter") ||
info_len != sizeof(struct bpf_link_info),
"get-link-info(fd)",
@@ -178,7 +178,7 @@ void test_bpf_obj_id(void)
link_infos[i].type, BPF_LINK_TYPE_RAW_TRACEPOINT,
link_infos[i].id,
link_infos[i].prog_id, prog_infos[i].id,
- (char *)link_infos[i].raw_tracepoint.tp_name,
+ (const char *)u64_to_ptr(link_infos[i].raw_tracepoint.tp_name),
"sys_enter"))
goto done;
diff --git a/tools/testing/selftests/bpf/prog_tests/btf_dump.c b/tools/testing/selftests/bpf/prog_tests/btf_dump.c
index cb33a7ee4e04..39fb81d9daeb 100644
--- a/tools/testing/selftests/bpf/prog_tests/btf_dump.c
+++ b/tools/testing/selftests/bpf/prog_tests/btf_dump.c
@@ -12,15 +12,16 @@ void btf_dump_printf(void *ctx, const char *fmt, va_list args)
static struct btf_dump_test_case {
const char *name;
const char *file;
+ bool known_ptr_sz;
struct btf_dump_opts opts;
} btf_dump_test_cases[] = {
- {"btf_dump: syntax", "btf_dump_test_case_syntax", {}},
- {"btf_dump: ordering", "btf_dump_test_case_ordering", {}},
- {"btf_dump: padding", "btf_dump_test_case_padding", {}},
- {"btf_dump: packing", "btf_dump_test_case_packing", {}},
- {"btf_dump: bitfields", "btf_dump_test_case_bitfields", {}},
- {"btf_dump: multidim", "btf_dump_test_case_multidim", {}},
- {"btf_dump: namespacing", "btf_dump_test_case_namespacing", {}},
+ {"btf_dump: syntax", "btf_dump_test_case_syntax", true, {}},
+ {"btf_dump: ordering", "btf_dump_test_case_ordering", false, {}},
+ {"btf_dump: padding", "btf_dump_test_case_padding", true, {}},
+ {"btf_dump: packing", "btf_dump_test_case_packing", true, {}},
+ {"btf_dump: bitfields", "btf_dump_test_case_bitfields", true, {}},
+ {"btf_dump: multidim", "btf_dump_test_case_multidim", false, {}},
+ {"btf_dump: namespacing", "btf_dump_test_case_namespacing", false, {}},
};
static int btf_dump_all_types(const struct btf *btf,
@@ -62,6 +63,18 @@ static int test_btf_dump_case(int n, struct btf_dump_test_case *t)
goto done;
}
+ /* tests with t->known_ptr_sz have no "long" or "unsigned long" type,
+ * so it's impossible to determine correct pointer size; but if they
+ * do, it should be 8 regardless of host architecture, becaues BPF
+ * target is always 64-bit
+ */
+ if (!t->known_ptr_sz) {
+ btf__set_pointer_size(btf, 8);
+ } else {
+ CHECK(btf__pointer_size(btf) != 8, "ptr_sz", "exp %d, got %zu\n",
+ 8, btf__pointer_size(btf));
+ }
+
snprintf(out_file, sizeof(out_file), "/tmp/%s.output.XXXXXX", t->file);
fd = mkstemp(out_file);
if (CHECK(fd < 0, "create_tmp", "failed to create file: %d\n", fd)) {
diff --git a/tools/testing/selftests/bpf/prog_tests/core_extern.c b/tools/testing/selftests/bpf/prog_tests/core_extern.c
index b093787e9448..1931a158510e 100644
--- a/tools/testing/selftests/bpf/prog_tests/core_extern.c
+++ b/tools/testing/selftests/bpf/prog_tests/core_extern.c
@@ -159,8 +159,8 @@ void test_core_extern(void)
exp = (uint64_t *)&t->data;
for (j = 0; j < n; j++) {
CHECK(got[j] != exp[j], "check_res",
- "result #%d: expected %lx, but got %lx\n",
- j, exp[j], got[j]);
+ "result #%d: expected %llx, but got %llx\n",
+ j, (__u64)exp[j], (__u64)got[j]);
}
cleanup:
test_core_extern__destroy(skel);
diff --git a/tools/testing/selftests/bpf/prog_tests/core_reloc.c b/tools/testing/selftests/bpf/prog_tests/core_reloc.c
index 084ed26a7d78..a54eafc5e4b3 100644
--- a/tools/testing/selftests/bpf/prog_tests/core_reloc.c
+++ b/tools/testing/selftests/bpf/prog_tests/core_reloc.c
@@ -237,8 +237,8 @@
.union_sz = sizeof(((type *)0)->union_field), \
.arr_sz = sizeof(((type *)0)->arr_field), \
.arr_elem_sz = sizeof(((type *)0)->arr_field[0]), \
- .ptr_sz = sizeof(((type *)0)->ptr_field), \
- .enum_sz = sizeof(((type *)0)->enum_field), \
+ .ptr_sz = 8, /* always 8-byte pointer for BPF */ \
+ .enum_sz = sizeof(((type *)0)->enum_field), \
}
#define SIZE_CASE(name) { \
@@ -432,20 +432,20 @@ static struct core_reloc_test_case test_cases[] = {
.sb4 = -1,
.sb20 = -0x17654321,
.u32 = 0xBEEF,
- .s32 = -0x3FEDCBA987654321,
+ .s32 = -0x3FEDCBA987654321LL,
}),
BITFIELDS_CASE(bitfields___bitfield_vs_int, {
- .ub1 = 0xFEDCBA9876543210,
+ .ub1 = 0xFEDCBA9876543210LL,
.ub2 = 0xA6,
- .ub7 = -0x7EDCBA987654321,
- .sb4 = -0x6123456789ABCDE,
- .sb20 = 0xD00D,
+ .ub7 = -0x7EDCBA987654321LL,
+ .sb4 = -0x6123456789ABCDELL,
+ .sb20 = 0xD00DLL,
.u32 = -0x76543,
- .s32 = 0x0ADEADBEEFBADB0B,
+ .s32 = 0x0ADEADBEEFBADB0BLL,
}),
BITFIELDS_CASE(bitfields___just_big_enough, {
- .ub1 = 0xF,
- .ub2 = 0x0812345678FEDCBA,
+ .ub1 = 0xFLL,
+ .ub2 = 0x0812345678FEDCBALL,
}),
BITFIELDS_ERR_CASE(bitfields___err_too_big_bitfield),
diff --git a/tools/testing/selftests/bpf/prog_tests/fexit_bpf2bpf.c b/tools/testing/selftests/bpf/prog_tests/fexit_bpf2bpf.c
index a895bfed55db..197d0d217b56 100644
--- a/tools/testing/selftests/bpf/prog_tests/fexit_bpf2bpf.c
+++ b/tools/testing/selftests/bpf/prog_tests/fexit_bpf2bpf.c
@@ -16,7 +16,7 @@ static void test_fexit_bpf2bpf_common(const char *obj_file,
__u32 duration = 0, retval;
struct bpf_map *data_map;
const int zero = 0;
- u64 *result = NULL;
+ __u64 *result = NULL;
err = bpf_prog_load(target_obj_file, BPF_PROG_TYPE_UNSPEC,
&pkt_obj, &pkt_fd);
@@ -29,7 +29,7 @@ static void test_fexit_bpf2bpf_common(const char *obj_file,
link = calloc(sizeof(struct bpf_link *), prog_cnt);
prog = calloc(sizeof(struct bpf_program *), prog_cnt);
- result = malloc((prog_cnt + 32 /* spare */) * sizeof(u64));
+ result = malloc((prog_cnt + 32 /* spare */) * sizeof(__u64));
if (CHECK(!link || !prog || !result, "alloc_memory",
"failed to alloc memory"))
goto close_prog;
@@ -72,7 +72,7 @@ static void test_fexit_bpf2bpf_common(const char *obj_file,
goto close_prog;
for (i = 0; i < prog_cnt; i++)
- if (CHECK(result[i] != 1, "result", "fexit_bpf2bpf failed err %ld\n",
+ if (CHECK(result[i] != 1, "result", "fexit_bpf2bpf failed err %llu\n",
result[i]))
goto close_prog;
diff --git a/tools/testing/selftests/bpf/prog_tests/flow_dissector.c b/tools/testing/selftests/bpf/prog_tests/flow_dissector.c
index f11f187990e9..cd6dc80edf18 100644
--- a/tools/testing/selftests/bpf/prog_tests/flow_dissector.c
+++ b/tools/testing/selftests/bpf/prog_tests/flow_dissector.c
@@ -591,7 +591,7 @@ void test_flow_dissector(void)
CHECK_ATTR(tattr.data_size_out != sizeof(flow_keys) ||
err || tattr.retval != 1,
tests[i].name,
- "err %d errno %d retval %d duration %d size %u/%lu\n",
+ "err %d errno %d retval %d duration %d size %u/%zu\n",
err, errno, tattr.retval, tattr.duration,
tattr.data_size_out, sizeof(flow_keys));
CHECK_FLOW_KEYS(tests[i].name, flow_keys, tests[i].keys);
diff --git a/tools/testing/selftests/bpf/prog_tests/global_data.c b/tools/testing/selftests/bpf/prog_tests/global_data.c
index e3cb62b0a110..9efa7e50eab2 100644
--- a/tools/testing/selftests/bpf/prog_tests/global_data.c
+++ b/tools/testing/selftests/bpf/prog_tests/global_data.c
@@ -5,7 +5,7 @@
static void test_global_data_number(struct bpf_object *obj, __u32 duration)
{
int i, err, map_fd;
- uint64_t num;
+ __u64 num;
map_fd = bpf_find_map(__func__, obj, "result_number");
if (CHECK_FAIL(map_fd < 0))
@@ -14,7 +14,7 @@ static void test_global_data_number(struct bpf_object *obj, __u32 duration)
struct {
char *name;
uint32_t key;
- uint64_t num;
+ __u64 num;
} tests[] = {
{ "relocate .bss reference", 0, 0 },
{ "relocate .data reference", 1, 42 },
@@ -32,7 +32,7 @@ static void test_global_data_number(struct bpf_object *obj, __u32 duration)
for (i = 0; i < sizeof(tests) / sizeof(tests[0]); i++) {
err = bpf_map_lookup_elem(map_fd, &tests[i].key, &num);
CHECK(err || num != tests[i].num, tests[i].name,
- "err %d result %lx expected %lx\n",
+ "err %d result %llx expected %llx\n",
err, num, tests[i].num);
}
}
diff --git a/tools/testing/selftests/bpf/prog_tests/mmap.c b/tools/testing/selftests/bpf/prog_tests/mmap.c
index 43d0b5578f46..9c3c5c0f068f 100644
--- a/tools/testing/selftests/bpf/prog_tests/mmap.c
+++ b/tools/testing/selftests/bpf/prog_tests/mmap.c
@@ -21,7 +21,7 @@ void test_mmap(void)
const long page_size = sysconf(_SC_PAGE_SIZE);
int err, duration = 0, i, data_map_fd, data_map_id, tmp_fd, rdmap_fd;
struct bpf_map *data_map, *bss_map;
- void *bss_mmaped = NULL, *map_mmaped = NULL, *tmp1, *tmp2;
+ void *bss_mmaped = NULL, *map_mmaped = NULL, *tmp0, *tmp1, *tmp2;
struct test_mmap__bss *bss_data;
struct bpf_map_info map_info;
__u32 map_info_sz = sizeof(map_info);
@@ -183,16 +183,23 @@ void test_mmap(void)
/* check some more advanced mmap() manipulations */
+ tmp0 = mmap(NULL, 4 * page_size, PROT_READ, MAP_SHARED | MAP_ANONYMOUS,
+ -1, 0);
+ if (CHECK(tmp0 == MAP_FAILED, "adv_mmap0", "errno %d\n", errno))
+ goto cleanup;
+
/* map all but last page: pages 1-3 mapped */
- tmp1 = mmap(NULL, 3 * page_size, PROT_READ, MAP_SHARED,
+ tmp1 = mmap(tmp0, 3 * page_size, PROT_READ, MAP_SHARED | MAP_FIXED,
data_map_fd, 0);
- if (CHECK(tmp1 == MAP_FAILED, "adv_mmap1", "errno %d\n", errno))
+ if (CHECK(tmp0 != tmp1, "adv_mmap1", "tmp0: %p, tmp1: %p\n", tmp0, tmp1)) {
+ munmap(tmp0, 4 * page_size);
goto cleanup;
+ }
/* unmap second page: pages 1, 3 mapped */
err = munmap(tmp1 + page_size, page_size);
if (CHECK(err, "adv_mmap2", "errno %d\n", errno)) {
- munmap(tmp1, map_sz);
+ munmap(tmp1, 4 * page_size);
goto cleanup;
}
@@ -201,7 +208,7 @@ void test_mmap(void)
MAP_SHARED | MAP_FIXED, data_map_fd, 0);
if (CHECK(tmp2 == MAP_FAILED, "adv_mmap3", "errno %d\n", errno)) {
munmap(tmp1, page_size);
- munmap(tmp1 + 2*page_size, page_size);
+ munmap(tmp1 + 2*page_size, 2 * page_size);
goto cleanup;
}
CHECK(tmp1 + page_size != tmp2, "adv_mmap4",
@@ -211,7 +218,7 @@ void test_mmap(void)
tmp2 = mmap(tmp1, 4 * page_size, PROT_READ, MAP_SHARED | MAP_FIXED,
data_map_fd, 0);
if (CHECK(tmp2 == MAP_FAILED, "adv_mmap5", "errno %d\n", errno)) {
- munmap(tmp1, 3 * page_size); /* unmap page 1 */
+ munmap(tmp1, 4 * page_size); /* unmap page 1 */
goto cleanup;
}
CHECK(tmp1 != tmp2, "adv_mmap6", "tmp1: %p, tmp2: %p\n", tmp1, tmp2);
diff --git a/tools/testing/selftests/bpf/prog_tests/prog_run_xattr.c b/tools/testing/selftests/bpf/prog_tests/prog_run_xattr.c
index dde2b7ae7bc9..935a294f049a 100644
--- a/tools/testing/selftests/bpf/prog_tests/prog_run_xattr.c
+++ b/tools/testing/selftests/bpf/prog_tests/prog_run_xattr.c
@@ -28,7 +28,7 @@ void test_prog_run_xattr(void)
"err %d errno %d retval %d\n", err, errno, tattr.retval);
CHECK_ATTR(tattr.data_size_out != sizeof(pkt_v4), "data_size_out",
- "incorrect output size, want %lu have %u\n",
+ "incorrect output size, want %zu have %u\n",
sizeof(pkt_v4), tattr.data_size_out);
CHECK_ATTR(buf[5] != 0, "overflow",
diff --git a/tools/testing/selftests/bpf/prog_tests/send_signal.c b/tools/testing/selftests/bpf/prog_tests/send_signal.c
index 504abb7bfb95..7043e6ded0e6 100644
--- a/tools/testing/selftests/bpf/prog_tests/send_signal.c
+++ b/tools/testing/selftests/bpf/prog_tests/send_signal.c
@@ -48,21 +48,19 @@ static void test_send_signal_common(struct perf_event_attr *attr,
close(pipe_p2c[1]); /* close write */
/* notify parent signal handler is installed */
- write(pipe_c2p[1], buf, 1);
+ CHECK(write(pipe_c2p[1], buf, 1) != 1, "pipe_write", "err %d\n", -errno);
/* make sure parent enabled bpf program to send_signal */
- read(pipe_p2c[0], buf, 1);
+ CHECK(read(pipe_p2c[0], buf, 1) != 1, "pipe_read", "err %d\n", -errno);
/* wait a little for signal handler */
sleep(1);
- if (sigusr1_received)
- write(pipe_c2p[1], "2", 1);
- else
- write(pipe_c2p[1], "0", 1);
+ buf[0] = sigusr1_received ? '2' : '0';
+ CHECK(write(pipe_c2p[1], buf, 1) != 1, "pipe_write", "err %d\n", -errno);
/* wait for parent notification and exit */
- read(pipe_p2c[0], buf, 1);
+ CHECK(read(pipe_p2c[0], buf, 1) != 1, "pipe_read", "err %d\n", -errno);
close(pipe_c2p[1]);
close(pipe_p2c[0]);
@@ -99,7 +97,7 @@ static void test_send_signal_common(struct perf_event_attr *attr,
}
/* wait until child signal handler installed */
- read(pipe_c2p[0], buf, 1);
+ CHECK(read(pipe_c2p[0], buf, 1) != 1, "pipe_read", "err %d\n", -errno);
/* trigger the bpf send_signal */
skel->bss->pid = pid;
@@ -107,7 +105,7 @@ static void test_send_signal_common(struct perf_event_attr *attr,
skel->bss->signal_thread = signal_thread;
/* notify child that bpf program can send_signal now */
- write(pipe_p2c[1], buf, 1);
+ CHECK(write(pipe_p2c[1], buf, 1) != 1, "pipe_write", "err %d\n", -errno);
/* wait for result */
err = read(pipe_c2p[0], buf, 1);
@@ -121,7 +119,7 @@ static void test_send_signal_common(struct perf_event_attr *attr,
CHECK(buf[0] != '2', test_name, "incorrect result\n");
/* notify child safe to exit */
- write(pipe_p2c[1], buf, 1);
+ CHECK(write(pipe_p2c[1], buf, 1) != 1, "pipe_write", "err %d\n", -errno);
disable_pmu:
close(pmu_fd);
diff --git a/tools/testing/selftests/bpf/prog_tests/sk_lookup.c b/tools/testing/selftests/bpf/prog_tests/sk_lookup.c
index c571584c00f5..9ff0412e1fd3 100644
--- a/tools/testing/selftests/bpf/prog_tests/sk_lookup.c
+++ b/tools/testing/selftests/bpf/prog_tests/sk_lookup.c
@@ -309,6 +309,7 @@ static void v4_to_v6(struct sockaddr_storage *ss)
v6->sin6_addr.s6_addr[10] = 0xff;
v6->sin6_addr.s6_addr[11] = 0xff;
memcpy(&v6->sin6_addr.s6_addr[12], &v4.sin_addr.s_addr, 4);
+ memset(&v6->sin6_addr.s6_addr[0], 0, 10);
}
static int udp_recv_send(int server_fd)
diff --git a/tools/testing/selftests/bpf/prog_tests/skb_ctx.c b/tools/testing/selftests/bpf/prog_tests/skb_ctx.c
index 25de86af2d03..fafeddaad6a9 100644
--- a/tools/testing/selftests/bpf/prog_tests/skb_ctx.c
+++ b/tools/testing/selftests/bpf/prog_tests/skb_ctx.c
@@ -81,7 +81,7 @@ void test_skb_ctx(void)
CHECK_ATTR(tattr.ctx_size_out != sizeof(skb),
"ctx_size_out",
- "incorrect output size, want %lu have %u\n",
+ "incorrect output size, want %zu have %u\n",
sizeof(skb), tattr.ctx_size_out);
for (i = 0; i < 5; i++)
diff --git a/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id_nmi.c b/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id_nmi.c
index f002e3090d92..11a769e18f5d 100644
--- a/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id_nmi.c
+++ b/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id_nmi.c
@@ -6,11 +6,13 @@ static __u64 read_perf_max_sample_freq(void)
{
__u64 sample_freq = 5000; /* fallback to 5000 on error */
FILE *f;
+ __u32 duration = 0;
f = fopen("/proc/sys/kernel/perf_event_max_sample_rate", "r");
if (f == NULL)
return sample_freq;
- fscanf(f, "%llu", &sample_freq);
+ CHECK(fscanf(f, "%llu", &sample_freq) != 1, "Get max sample rate",
+ "return default value: 5000,err %d\n", -errno);
fclose(f);
return sample_freq;
}
diff --git a/tools/testing/selftests/bpf/prog_tests/test_global_funcs.c b/tools/testing/selftests/bpf/prog_tests/test_global_funcs.c
index 25b068591e9a..193002b14d7f 100644
--- a/tools/testing/selftests/bpf/prog_tests/test_global_funcs.c
+++ b/tools/testing/selftests/bpf/prog_tests/test_global_funcs.c
@@ -19,7 +19,7 @@ static int libbpf_debug_print(enum libbpf_print_level level,
log_buf = va_arg(args, char *);
if (!log_buf)
goto out;
- if (strstr(log_buf, err_str) == 0)
+ if (err_str && strstr(log_buf, err_str) == 0)
found = true;
out:
printf(format, log_buf);
diff --git a/tools/testing/selftests/bpf/prog_tests/varlen.c b/tools/testing/selftests/bpf/prog_tests/varlen.c
index c75525eab02c..dd324b4933db 100644
--- a/tools/testing/selftests/bpf/prog_tests/varlen.c
+++ b/tools/testing/selftests/bpf/prog_tests/varlen.c
@@ -44,25 +44,25 @@ void test_varlen(void)
CHECK_VAL(bss->payload1_len2, size2);
CHECK_VAL(bss->total1, size1 + size2);
CHECK(memcmp(bss->payload1, exp_str, size1 + size2), "content_check",
- "doesn't match!");
+ "doesn't match!\n");
CHECK_VAL(data->payload2_len1, size1);
CHECK_VAL(data->payload2_len2, size2);
CHECK_VAL(data->total2, size1 + size2);
CHECK(memcmp(data->payload2, exp_str, size1 + size2), "content_check",
- "doesn't match!");
+ "doesn't match!\n");
CHECK_VAL(data->payload3_len1, size1);
CHECK_VAL(data->payload3_len2, size2);
CHECK_VAL(data->total3, size1 + size2);
CHECK(memcmp(data->payload3, exp_str, size1 + size2), "content_check",
- "doesn't match!");
+ "doesn't match!\n");
CHECK_VAL(data->payload4_len1, size1);
CHECK_VAL(data->payload4_len2, size2);
CHECK_VAL(data->total4, size1 + size2);
CHECK(memcmp(data->payload4, exp_str, size1 + size2), "content_check",
- "doesn't match!");
+ "doesn't match!\n");
cleanup:
test_varlen__destroy(skel);
}
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_bpf_hash_map.c b/tools/testing/selftests/bpf/progs/bpf_iter_bpf_hash_map.c
index 07ddbfdbcab7..6dfce3fd68bc 100644
--- a/tools/testing/selftests/bpf/progs/bpf_iter_bpf_hash_map.c
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_bpf_hash_map.c
@@ -47,7 +47,10 @@ int dump_bpf_hash_map(struct bpf_iter__bpf_map_elem *ctx)
__u32 seq_num = ctx->meta->seq_num;
struct bpf_map *map = ctx->map;
struct key_t *key = ctx->key;
+ struct key_t tmp_key;
__u64 *val = ctx->value;
+ __u64 tmp_val = 0;
+ int ret;
if (in_test_mode) {
/* test mode is used by selftests to
@@ -61,6 +64,18 @@ int dump_bpf_hash_map(struct bpf_iter__bpf_map_elem *ctx)
if (key == (void *)0 || val == (void *)0)
return 0;
+ /* update the value and then delete the <key, value> pair.
+ * it should not impact the existing 'val' which is still
+ * accessible under rcu.
+ */
+ __builtin_memcpy(&tmp_key, key, sizeof(struct key_t));
+ ret = bpf_map_update_elem(&hashmap1, &tmp_key, &tmp_val, 0);
+ if (ret)
+ return 0;
+ ret = bpf_map_delete_elem(&hashmap1, &tmp_key);
+ if (ret)
+ return 0;
+
key_sum_a += key->a;
key_sum_b += key->b;
key_sum_c += key->c;
diff --git a/tools/testing/selftests/bpf/progs/core_reloc_types.h b/tools/testing/selftests/bpf/progs/core_reloc_types.h
index 34d84717c946..69139ed66216 100644
--- a/tools/testing/selftests/bpf/progs/core_reloc_types.h
+++ b/tools/testing/selftests/bpf/progs/core_reloc_types.h
@@ -1,5 +1,10 @@
#include <stdint.h>
#include <stdbool.h>
+
+void preserce_ptr_sz_fn(long x) {}
+
+#define __bpf_aligned __attribute__((aligned(8)))
+
/*
* KERNEL
*/
@@ -444,51 +449,51 @@ struct core_reloc_primitives {
char a;
int b;
enum core_reloc_primitives_enum c;
- void *d;
- int (*f)(const char *);
+ void *d __bpf_aligned;
+ int (*f)(const char *) __bpf_aligned;
};
struct core_reloc_primitives___diff_enum_def {
char a;
int b;
- void *d;
- int (*f)(const char *);
+ void *d __bpf_aligned;
+ int (*f)(const char *) __bpf_aligned;
enum {
X = 100,
Y = 200,
- } c; /* inline enum def with differing set of values */
+ } c __bpf_aligned; /* inline enum def with differing set of values */
};
struct core_reloc_primitives___diff_func_proto {
- void (*f)(int); /* incompatible function prototype */
- void *d;
- enum core_reloc_primitives_enum c;
+ void (*f)(int) __bpf_aligned; /* incompatible function prototype */
+ void *d __bpf_aligned;
+ enum core_reloc_primitives_enum c __bpf_aligned;
int b;
char a;
};
struct core_reloc_primitives___diff_ptr_type {
- const char * const d; /* different pointee type + modifiers */
- char a;
+ const char * const d __bpf_aligned; /* different pointee type + modifiers */
+ char a __bpf_aligned;
int b;
enum core_reloc_primitives_enum c;
- int (*f)(const char *);
+ int (*f)(const char *) __bpf_aligned;
};
struct core_reloc_primitives___err_non_enum {
char a[1];
int b;
int c; /* int instead of enum */
- void *d;
- int (*f)(const char *);
+ void *d __bpf_aligned;
+ int (*f)(const char *) __bpf_aligned;
};
struct core_reloc_primitives___err_non_int {
char a[1];
- int *b; /* ptr instead of int */
- enum core_reloc_primitives_enum c;
- void *d;
- int (*f)(const char *);
+ int *b __bpf_aligned; /* ptr instead of int */
+ enum core_reloc_primitives_enum c __bpf_aligned;
+ void *d __bpf_aligned;
+ int (*f)(const char *) __bpf_aligned;
};
struct core_reloc_primitives___err_non_ptr {
@@ -496,7 +501,7 @@ struct core_reloc_primitives___err_non_ptr {
int b;
enum core_reloc_primitives_enum c;
int d; /* int instead of ptr */
- int (*f)(const char *);
+ int (*f)(const char *) __bpf_aligned;
};
/*
@@ -507,7 +512,7 @@ struct core_reloc_mods_output {
};
typedef const int int_t;
-typedef const char *char_ptr_t;
+typedef const char *char_ptr_t __bpf_aligned;
typedef const int arr_t[7];
struct core_reloc_mods_substruct {
@@ -523,9 +528,9 @@ typedef struct {
struct core_reloc_mods {
int a;
int_t b;
- char *c;
+ char *c __bpf_aligned;
char_ptr_t d;
- int e[3];
+ int e[3] __bpf_aligned;
arr_t f;
struct core_reloc_mods_substruct g;
core_reloc_mods_substruct_t h;
@@ -535,9 +540,9 @@ struct core_reloc_mods {
struct core_reloc_mods___mod_swap {
int b;
int_t a;
- char *d;
+ char *d __bpf_aligned;
char_ptr_t c;
- int f[3];
+ int f[3] __bpf_aligned;
arr_t e;
struct {
int y;
@@ -555,7 +560,7 @@ typedef arr1_t arr2_t;
typedef arr2_t arr3_t;
typedef arr3_t arr4_t;
-typedef const char * const volatile fancy_char_ptr_t;
+typedef const char * const volatile fancy_char_ptr_t __bpf_aligned;
typedef core_reloc_mods_substruct_t core_reloc_mods_substruct_tt;
@@ -567,7 +572,7 @@ struct core_reloc_mods___typedefs {
arr4_t e;
fancy_char_ptr_t d;
fancy_char_ptr_t c;
- int3_t b;
+ int3_t b __bpf_aligned;
int3_t a;
};
@@ -739,19 +744,19 @@ struct core_reloc_bitfields___bit_sz_change {
int8_t sb4: 1; /* 4 -> 1 */
int32_t sb20: 30; /* 20 -> 30 */
/* non-bitfields */
- uint16_t u32; /* 32 -> 16 */
- int64_t s32; /* 32 -> 64 */
+ uint16_t u32; /* 32 -> 16 */
+ int64_t s32 __bpf_aligned; /* 32 -> 64 */
};
/* turn bitfield into non-bitfield and vice versa */
struct core_reloc_bitfields___bitfield_vs_int {
uint64_t ub1; /* 3 -> 64 non-bitfield */
uint8_t ub2; /* 20 -> 8 non-bitfield */
- int64_t ub7; /* 7 -> 64 non-bitfield signed */
- int64_t sb4; /* 4 -> 64 non-bitfield signed */
- uint64_t sb20; /* 20 -> 16 non-bitfield unsigned */
- int32_t u32: 20; /* 32 non-bitfield -> 20 bitfield */
- uint64_t s32: 60; /* 32 non-bitfield -> 60 bitfield */
+ int64_t ub7 __bpf_aligned; /* 7 -> 64 non-bitfield signed */
+ int64_t sb4 __bpf_aligned; /* 4 -> 64 non-bitfield signed */
+ uint64_t sb20 __bpf_aligned; /* 20 -> 16 non-bitfield unsigned */
+ int32_t u32: 20; /* 32 non-bitfield -> 20 bitfield */
+ uint64_t s32: 60 __bpf_aligned; /* 32 non-bitfield -> 60 bitfield */
};
struct core_reloc_bitfields___just_big_enough {
diff --git a/tools/testing/selftests/bpf/progs/test_tcpbpf_kern.c b/tools/testing/selftests/bpf/progs/test_tcpbpf_kern.c
index 1f1966e86e9f..3e6912e4df3d 100644
--- a/tools/testing/selftests/bpf/progs/test_tcpbpf_kern.c
+++ b/tools/testing/selftests/bpf/progs/test_tcpbpf_kern.c
@@ -54,6 +54,7 @@ SEC("sockops")
int bpf_testcb(struct bpf_sock_ops *skops)
{
char header[sizeof(struct ipv6hdr) + sizeof(struct tcphdr)];
+ struct bpf_sock_ops *reuse = skops;
struct tcphdr *thdr;
int good_call_rv = 0;
int bad_call_rv = 0;
@@ -62,6 +63,46 @@ int bpf_testcb(struct bpf_sock_ops *skops)
int v = 0;
int op;
+ /* Test reading fields in bpf_sock_ops using single register */
+ asm volatile (
+ "%[reuse] = *(u32 *)(%[reuse] +96)"
+ : [reuse] "+r"(reuse)
+ :);
+
+ asm volatile (
+ "%[op] = *(u32 *)(%[skops] +96)"
+ : [op] "+r"(op)
+ : [skops] "r"(skops)
+ :);
+
+ asm volatile (
+ "r9 = %[skops];\n"
+ "r8 = *(u32 *)(r9 +164);\n"
+ "*(u32 *)(r9 +164) = r8;\n"
+ :: [skops] "r"(skops)
+ : "r9", "r8");
+
+ asm volatile (
+ "r1 = %[skops];\n"
+ "r1 = *(u64 *)(r1 +184);\n"
+ "if r1 == 0 goto +1;\n"
+ "r1 = *(u32 *)(r1 +4);\n"
+ :: [skops] "r"(skops):"r1");
+
+ asm volatile (
+ "r9 = %[skops];\n"
+ "r9 = *(u64 *)(r9 +184);\n"
+ "if r9 == 0 goto +1;\n"
+ "r9 = *(u32 *)(r9 +4);\n"
+ :: [skops] "r"(skops):"r9");
+
+ asm volatile (
+ "r1 = %[skops];\n"
+ "r2 = *(u64 *)(r1 +184);\n"
+ "if r2 == 0 goto +1;\n"
+ "r2 = *(u32 *)(r2 +4);\n"
+ :: [skops] "r"(skops):"r1", "r2");
+
op = (int) skops->op;
update_event_map(op);
diff --git a/tools/testing/selftests/bpf/progs/test_varlen.c b/tools/testing/selftests/bpf/progs/test_varlen.c
index cd4b72c55dfe..913acdffd90f 100644
--- a/tools/testing/selftests/bpf/progs/test_varlen.c
+++ b/tools/testing/selftests/bpf/progs/test_varlen.c
@@ -15,9 +15,9 @@ int test_pid = 0;
bool capture = false;
/* .bss */
-long payload1_len1 = 0;
-long payload1_len2 = 0;
-long total1 = 0;
+__u64 payload1_len1 = 0;
+__u64 payload1_len2 = 0;
+__u64 total1 = 0;
char payload1[MAX_LEN + MAX_LEN] = {};
/* .data */
diff --git a/tools/testing/selftests/bpf/settings b/tools/testing/selftests/bpf/settings
new file mode 100644
index 000000000000..e7b9417537fb
--- /dev/null
+++ b/tools/testing/selftests/bpf/settings
@@ -0,0 +1 @@
+timeout=0
diff --git a/tools/testing/selftests/bpf/test_btf.c b/tools/testing/selftests/bpf/test_btf.c
index 305fae8f80a9..c75fc6447186 100644
--- a/tools/testing/selftests/bpf/test_btf.c
+++ b/tools/testing/selftests/bpf/test_btf.c
@@ -3883,7 +3883,7 @@ static int test_big_btf_info(unsigned int test_num)
info_garbage.garbage = 0;
err = bpf_obj_get_info_by_fd(btf_fd, info, &info_len);
if (CHECK(err || info_len != sizeof(*info),
- "err:%d errno:%d info_len:%u sizeof(*info):%lu",
+ "err:%d errno:%d info_len:%u sizeof(*info):%zu",
err, errno, info_len, sizeof(*info))) {
err = -1;
goto done;
@@ -4094,7 +4094,7 @@ static int do_test_get_info(unsigned int test_num)
if (CHECK(err || !info.id || info_len != sizeof(info) ||
info.btf_size != raw_btf_size ||
(ret = memcmp(raw_btf, user_btf, expected_nbytes)),
- "err:%d errno:%d info.id:%u info_len:%u sizeof(info):%lu raw_btf_size:%u info.btf_size:%u expected_nbytes:%u memcmp:%d",
+ "err:%d errno:%d info.id:%u info_len:%u sizeof(info):%zu raw_btf_size:%u info.btf_size:%u expected_nbytes:%u memcmp:%d",
err, errno, info.id, info_len, sizeof(info),
raw_btf_size, info.btf_size, expected_nbytes, ret)) {
err = -1;
@@ -4730,7 +4730,7 @@ ssize_t get_pprint_expected_line(enum pprint_mapv_kind_t mapv_kind,
nexpected_line = snprintf(expected_line, line_size,
"%s%u: {%u,0,%d,0x%x,0x%x,0x%x,"
- "{%lu|[%u,%u,%u,%u,%u,%u,%u,%u]},%s,"
+ "{%llu|[%u,%u,%u,%u,%u,%u,%u,%u]},%s,"
"%u,0x%x,[[%d,%d],[%d,%d]]}\n",
percpu_map ? "\tcpu" : "",
percpu_map ? cpu : next_key,
@@ -4738,7 +4738,7 @@ ssize_t get_pprint_expected_line(enum pprint_mapv_kind_t mapv_kind,
v->unused_bits2a,
v->bits28,
v->unused_bits2b,
- v->ui64,
+ (__u64)v->ui64,
v->ui8a[0], v->ui8a[1],
v->ui8a[2], v->ui8a[3],
v->ui8a[4], v->ui8a[5],
diff --git a/tools/testing/selftests/bpf/test_maps.c b/tools/testing/selftests/bpf/test_maps.c
index 754cf611723e..0d92ebcb335d 100644
--- a/tools/testing/selftests/bpf/test_maps.c
+++ b/tools/testing/selftests/bpf/test_maps.c
@@ -1274,6 +1274,8 @@ static void __run_parallel(unsigned int tasks,
pid_t pid[tasks];
int i;
+ fflush(stdout);
+
for (i = 0; i < tasks; i++) {
pid[i] = fork();
if (pid[i] == 0) {
diff --git a/tools/testing/selftests/bpf/test_progs.c b/tools/testing/selftests/bpf/test_progs.c
index b1e4dadacd9b..22943b58d752 100644
--- a/tools/testing/selftests/bpf/test_progs.c
+++ b/tools/testing/selftests/bpf/test_progs.c
@@ -618,7 +618,9 @@ int cd_flavor_subdir(const char *exec_name)
if (!flavor)
return 0;
flavor++;
- fprintf(stdout, "Switching to flavor '%s' subdirectory...\n", flavor);
+ if (env.verbosity > VERBOSE_NONE)
+ fprintf(stdout, "Switching to flavor '%s' subdirectory...\n", flavor);
+
return chdir(flavor);
}
diff --git a/tools/testing/selftests/bpf/test_progs.h b/tools/testing/selftests/bpf/test_progs.h
index 6e09bf738473..dbb820dde138 100644
--- a/tools/testing/selftests/bpf/test_progs.h
+++ b/tools/testing/selftests/bpf/test_progs.h
@@ -135,6 +135,11 @@ static inline __u64 ptr_to_u64(const void *ptr)
return (__u64) (unsigned long) ptr;
}
+static inline void *u64_to_ptr(__u64 ptr)
+{
+ return (void *) (unsigned long) ptr;
+}
+
int bpf_find_map(const char *test, struct bpf_object *obj, const char *name);
int compare_map_keys(int map1_fd, int map2_fd);
int compare_stack_ips(int smap_fd, int amap_fd, int stack_trace_len);
diff --git a/tools/testing/selftests/bpf/test_tcpnotify_user.c b/tools/testing/selftests/bpf/test_tcpnotify_user.c
index 8549b31716ab..73da7fe8c152 100644
--- a/tools/testing/selftests/bpf/test_tcpnotify_user.c
+++ b/tools/testing/selftests/bpf/test_tcpnotify_user.c
@@ -124,17 +124,24 @@ int main(int argc, char **argv)
sprintf(test_script,
"iptables -A INPUT -p tcp --dport %d -j DROP",
TESTPORT);
- system(test_script);
+ if (system(test_script)) {
+ printf("FAILED: execute command: %s, err %d\n", test_script, -errno);
+ goto err;
+ }
sprintf(test_script,
"nc 127.0.0.1 %d < /etc/passwd > /dev/null 2>&1 ",
TESTPORT);
- system(test_script);
+ if (system(test_script))
+ printf("execute command: %s, err %d\n", test_script, -errno);
sprintf(test_script,
"iptables -D INPUT -p tcp --dport %d -j DROP",
TESTPORT);
- system(test_script);
+ if (system(test_script)) {
+ printf("FAILED: execute command: %s, err %d\n", test_script, -errno);
+ goto err;
+ }
rv = bpf_map_lookup_elem(bpf_map__fd(global_map), &key, &g);
if (rv != 0) {
diff --git a/tools/testing/selftests/cgroup/.gitignore b/tools/testing/selftests/cgroup/.gitignore
index aa6de65b0838..84cfcabea838 100644
--- a/tools/testing/selftests/cgroup/.gitignore
+++ b/tools/testing/selftests/cgroup/.gitignore
@@ -2,3 +2,4 @@
test_memcontrol
test_core
test_freezer
+test_kmem \ No newline at end of file
diff --git a/tools/testing/selftests/cgroup/Makefile b/tools/testing/selftests/cgroup/Makefile
index 967f268fde74..f027d933595b 100644
--- a/tools/testing/selftests/cgroup/Makefile
+++ b/tools/testing/selftests/cgroup/Makefile
@@ -6,11 +6,13 @@ all:
TEST_FILES := with_stress.sh
TEST_PROGS := test_stress.sh
TEST_GEN_PROGS = test_memcontrol
+TEST_GEN_PROGS += test_kmem
TEST_GEN_PROGS += test_core
TEST_GEN_PROGS += test_freezer
include ../lib.mk
$(OUTPUT)/test_memcontrol: cgroup_util.c ../clone3/clone3_selftests.h
+$(OUTPUT)/test_kmem: cgroup_util.c ../clone3/clone3_selftests.h
$(OUTPUT)/test_core: cgroup_util.c ../clone3/clone3_selftests.h
$(OUTPUT)/test_freezer: cgroup_util.c ../clone3/clone3_selftests.h
diff --git a/tools/testing/selftests/cgroup/cgroup_util.c b/tools/testing/selftests/cgroup/cgroup_util.c
index 8a637ca7d73a..05853b0b8831 100644
--- a/tools/testing/selftests/cgroup/cgroup_util.c
+++ b/tools/testing/selftests/cgroup/cgroup_util.c
@@ -106,7 +106,7 @@ int cg_read_strcmp(const char *cgroup, const char *control,
/* Handle the case of comparing against empty string */
if (!expected)
- size = 32;
+ return -1;
else
size = strlen(expected) + 1;
diff --git a/tools/testing/selftests/cgroup/test_kmem.c b/tools/testing/selftests/cgroup/test_kmem.c
new file mode 100644
index 000000000000..0941aa16157e
--- /dev/null
+++ b/tools/testing/selftests/cgroup/test_kmem.c
@@ -0,0 +1,450 @@
+// SPDX-License-Identifier: GPL-2.0
+#define _GNU_SOURCE
+
+#include <linux/limits.h>
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <unistd.h>
+#include <sys/wait.h>
+#include <errno.h>
+#include <sys/sysinfo.h>
+#include <pthread.h>
+
+#include "../kselftest.h"
+#include "cgroup_util.h"
+
+
+/*
+ * Memory cgroup charging and vmstat data aggregation is performed using
+ * percpu batches 32 pages big (look at MEMCG_CHARGE_BATCH). So the maximum
+ * discrepancy between charge and vmstat entries is number of cpus multiplied
+ * by 32 pages multiplied by 2.
+ */
+#define MAX_VMSTAT_ERROR (4096 * 32 * 2 * get_nprocs())
+
+
+static int alloc_dcache(const char *cgroup, void *arg)
+{
+ unsigned long i;
+ struct stat st;
+ char buf[128];
+
+ for (i = 0; i < (unsigned long)arg; i++) {
+ snprintf(buf, sizeof(buf),
+ "/something-non-existent-with-a-long-name-%64lu-%d",
+ i, getpid());
+ stat(buf, &st);
+ }
+
+ return 0;
+}
+
+/*
+ * This test allocates 100000 of negative dentries with long names.
+ * Then it checks that "slab" in memory.stat is larger than 1M.
+ * Then it sets memory.high to 1M and checks that at least 1/2
+ * of slab memory has been reclaimed.
+ */
+static int test_kmem_basic(const char *root)
+{
+ int ret = KSFT_FAIL;
+ char *cg = NULL;
+ long slab0, slab1, current;
+
+ cg = cg_name(root, "kmem_basic_test");
+ if (!cg)
+ goto cleanup;
+
+ if (cg_create(cg))
+ goto cleanup;
+
+ if (cg_run(cg, alloc_dcache, (void *)100000))
+ goto cleanup;
+
+ slab0 = cg_read_key_long(cg, "memory.stat", "slab ");
+ if (slab0 < (1 << 20))
+ goto cleanup;
+
+ cg_write(cg, "memory.high", "1M");
+ slab1 = cg_read_key_long(cg, "memory.stat", "slab ");
+ if (slab1 <= 0)
+ goto cleanup;
+
+ current = cg_read_long(cg, "memory.current");
+ if (current <= 0)
+ goto cleanup;
+
+ if (slab1 < slab0 / 2 && current < slab0 / 2)
+ ret = KSFT_PASS;
+cleanup:
+ cg_destroy(cg);
+ free(cg);
+
+ return ret;
+}
+
+static void *alloc_kmem_fn(void *arg)
+{
+ alloc_dcache(NULL, (void *)100);
+ return NULL;
+}
+
+static int alloc_kmem_smp(const char *cgroup, void *arg)
+{
+ int nr_threads = 2 * get_nprocs();
+ pthread_t *tinfo;
+ unsigned long i;
+ int ret = -1;
+
+ tinfo = calloc(nr_threads, sizeof(pthread_t));
+ if (tinfo == NULL)
+ return -1;
+
+ for (i = 0; i < nr_threads; i++) {
+ if (pthread_create(&tinfo[i], NULL, &alloc_kmem_fn,
+ (void *)i)) {
+ free(tinfo);
+ return -1;
+ }
+ }
+
+ for (i = 0; i < nr_threads; i++) {
+ ret = pthread_join(tinfo[i], NULL);
+ if (ret)
+ break;
+ }
+
+ free(tinfo);
+ return ret;
+}
+
+static int cg_run_in_subcgroups(const char *parent,
+ int (*fn)(const char *cgroup, void *arg),
+ void *arg, int times)
+{
+ char *child;
+ int i;
+
+ for (i = 0; i < times; i++) {
+ child = cg_name_indexed(parent, "child", i);
+ if (!child)
+ return -1;
+
+ if (cg_create(child)) {
+ cg_destroy(child);
+ free(child);
+ return -1;
+ }
+
+ if (cg_run(child, fn, NULL)) {
+ cg_destroy(child);
+ free(child);
+ return -1;
+ }
+
+ cg_destroy(child);
+ free(child);
+ }
+
+ return 0;
+}
+
+/*
+ * The test creates and destroys a large number of cgroups. In each cgroup it
+ * allocates some slab memory (mostly negative dentries) using 2 * NR_CPUS
+ * threads. Then it checks the sanity of numbers on the parent level:
+ * the total size of the cgroups should be roughly equal to
+ * anon + file + slab + kernel_stack.
+ */
+static int test_kmem_memcg_deletion(const char *root)
+{
+ long current, slab, anon, file, kernel_stack, sum;
+ int ret = KSFT_FAIL;
+ char *parent;
+
+ parent = cg_name(root, "kmem_memcg_deletion_test");
+ if (!parent)
+ goto cleanup;
+
+ if (cg_create(parent))
+ goto cleanup;
+
+ if (cg_write(parent, "cgroup.subtree_control", "+memory"))
+ goto cleanup;
+
+ if (cg_run_in_subcgroups(parent, alloc_kmem_smp, NULL, 100))
+ goto cleanup;
+
+ current = cg_read_long(parent, "memory.current");
+ slab = cg_read_key_long(parent, "memory.stat", "slab ");
+ anon = cg_read_key_long(parent, "memory.stat", "anon ");
+ file = cg_read_key_long(parent, "memory.stat", "file ");
+ kernel_stack = cg_read_key_long(parent, "memory.stat", "kernel_stack ");
+ if (current < 0 || slab < 0 || anon < 0 || file < 0 ||
+ kernel_stack < 0)
+ goto cleanup;
+
+ sum = slab + anon + file + kernel_stack;
+ if (abs(sum - current) < MAX_VMSTAT_ERROR) {
+ ret = KSFT_PASS;
+ } else {
+ printf("memory.current = %ld\n", current);
+ printf("slab + anon + file + kernel_stack = %ld\n", sum);
+ printf("slab = %ld\n", slab);
+ printf("anon = %ld\n", anon);
+ printf("file = %ld\n", file);
+ printf("kernel_stack = %ld\n", kernel_stack);
+ }
+
+cleanup:
+ cg_destroy(parent);
+ free(parent);
+
+ return ret;
+}
+
+/*
+ * The test reads the entire /proc/kpagecgroup. If the operation went
+ * successfully (and the kernel didn't panic), the test is treated as passed.
+ */
+static int test_kmem_proc_kpagecgroup(const char *root)
+{
+ unsigned long buf[128];
+ int ret = KSFT_FAIL;
+ ssize_t len;
+ int fd;
+
+ fd = open("/proc/kpagecgroup", O_RDONLY);
+ if (fd < 0)
+ return ret;
+
+ do {
+ len = read(fd, buf, sizeof(buf));
+ } while (len > 0);
+
+ if (len == 0)
+ ret = KSFT_PASS;
+
+ close(fd);
+ return ret;
+}
+
+static void *pthread_wait_fn(void *arg)
+{
+ sleep(100);
+ return NULL;
+}
+
+static int spawn_1000_threads(const char *cgroup, void *arg)
+{
+ int nr_threads = 1000;
+ pthread_t *tinfo;
+ unsigned long i;
+ long stack;
+ int ret = -1;
+
+ tinfo = calloc(nr_threads, sizeof(pthread_t));
+ if (tinfo == NULL)
+ return -1;
+
+ for (i = 0; i < nr_threads; i++) {
+ if (pthread_create(&tinfo[i], NULL, &pthread_wait_fn,
+ (void *)i)) {
+ free(tinfo);
+ return(-1);
+ }
+ }
+
+ stack = cg_read_key_long(cgroup, "memory.stat", "kernel_stack ");
+ if (stack >= 4096 * 1000)
+ ret = 0;
+
+ free(tinfo);
+ return ret;
+}
+
+/*
+ * The test spawns a process, which spawns 1000 threads. Then it checks
+ * that memory.stat's kernel_stack is at least 1000 pages large.
+ */
+static int test_kmem_kernel_stacks(const char *root)
+{
+ int ret = KSFT_FAIL;
+ char *cg = NULL;
+
+ cg = cg_name(root, "kmem_kernel_stacks_test");
+ if (!cg)
+ goto cleanup;
+
+ if (cg_create(cg))
+ goto cleanup;
+
+ if (cg_run(cg, spawn_1000_threads, NULL))
+ goto cleanup;
+
+ ret = KSFT_PASS;
+cleanup:
+ cg_destroy(cg);
+ free(cg);
+
+ return ret;
+}
+
+/*
+ * This test sequentionally creates 30 child cgroups, allocates some
+ * kernel memory in each of them, and deletes them. Then it checks
+ * that the number of dying cgroups on the parent level is 0.
+ */
+static int test_kmem_dead_cgroups(const char *root)
+{
+ int ret = KSFT_FAIL;
+ char *parent;
+ long dead;
+ int i;
+
+ parent = cg_name(root, "kmem_dead_cgroups_test");
+ if (!parent)
+ goto cleanup;
+
+ if (cg_create(parent))
+ goto cleanup;
+
+ if (cg_write(parent, "cgroup.subtree_control", "+memory"))
+ goto cleanup;
+
+ if (cg_run_in_subcgroups(parent, alloc_dcache, (void *)100, 30))
+ goto cleanup;
+
+ for (i = 0; i < 5; i++) {
+ dead = cg_read_key_long(parent, "cgroup.stat",
+ "nr_dying_descendants ");
+ if (dead == 0) {
+ ret = KSFT_PASS;
+ break;
+ }
+ /*
+ * Reclaiming cgroups might take some time,
+ * let's wait a bit and repeat.
+ */
+ sleep(1);
+ }
+
+cleanup:
+ cg_destroy(parent);
+ free(parent);
+
+ return ret;
+}
+
+/*
+ * This test creates a sub-tree with 1000 memory cgroups.
+ * Then it checks that the memory.current on the parent level
+ * is greater than 0 and approximates matches the percpu value
+ * from memory.stat.
+ */
+static int test_percpu_basic(const char *root)
+{
+ int ret = KSFT_FAIL;
+ char *parent, *child;
+ long current, percpu;
+ int i;
+
+ parent = cg_name(root, "percpu_basic_test");
+ if (!parent)
+ goto cleanup;
+
+ if (cg_create(parent))
+ goto cleanup;
+
+ if (cg_write(parent, "cgroup.subtree_control", "+memory"))
+ goto cleanup;
+
+ for (i = 0; i < 1000; i++) {
+ child = cg_name_indexed(parent, "child", i);
+ if (!child)
+ return -1;
+
+ if (cg_create(child))
+ goto cleanup_children;
+
+ free(child);
+ }
+
+ current = cg_read_long(parent, "memory.current");
+ percpu = cg_read_key_long(parent, "memory.stat", "percpu ");
+
+ if (current > 0 && percpu > 0 && abs(current - percpu) <
+ MAX_VMSTAT_ERROR)
+ ret = KSFT_PASS;
+ else
+ printf("memory.current %ld\npercpu %ld\n",
+ current, percpu);
+
+cleanup_children:
+ for (i = 0; i < 1000; i++) {
+ child = cg_name_indexed(parent, "child", i);
+ cg_destroy(child);
+ free(child);
+ }
+
+cleanup:
+ cg_destroy(parent);
+ free(parent);
+
+ return ret;
+}
+
+#define T(x) { x, #x }
+struct kmem_test {
+ int (*fn)(const char *root);
+ const char *name;
+} tests[] = {
+ T(test_kmem_basic),
+ T(test_kmem_memcg_deletion),
+ T(test_kmem_proc_kpagecgroup),
+ T(test_kmem_kernel_stacks),
+ T(test_kmem_dead_cgroups),
+ T(test_percpu_basic),
+};
+#undef T
+
+int main(int argc, char **argv)
+{
+ char root[PATH_MAX];
+ int i, ret = EXIT_SUCCESS;
+
+ if (cg_find_unified_root(root, sizeof(root)))
+ ksft_exit_skip("cgroup v2 isn't mounted\n");
+
+ /*
+ * Check that memory controller is available:
+ * memory is listed in cgroup.controllers
+ */
+ if (cg_read_strstr(root, "cgroup.controllers", "memory"))
+ ksft_exit_skip("memory controller isn't available\n");
+
+ if (cg_read_strstr(root, "cgroup.subtree_control", "memory"))
+ if (cg_write(root, "cgroup.subtree_control", "+memory"))
+ ksft_exit_skip("Failed to set memory controller\n");
+
+ for (i = 0; i < ARRAY_SIZE(tests); i++) {
+ switch (tests[i].fn(root)) {
+ case KSFT_PASS:
+ ksft_test_result_pass("%s\n", tests[i].name);
+ break;
+ case KSFT_SKIP:
+ ksft_test_result_skip("%s\n", tests[i].name);
+ break;
+ default:
+ ret = EXIT_FAILURE;
+ ksft_test_result_fail("%s\n", tests[i].name);
+ break;
+ }
+ }
+
+ return ret;
+}
diff --git a/tools/testing/selftests/clone3/clone3.c b/tools/testing/selftests/clone3/clone3.c
index b7e6dec36173..42be3b925830 100644
--- a/tools/testing/selftests/clone3/clone3.c
+++ b/tools/testing/selftests/clone3/clone3.c
@@ -20,13 +20,6 @@
#include "../kselftest.h"
#include "clone3_selftests.h"
-/*
- * Different sizes of struct clone_args
- */
-#ifndef CLONE3_ARGS_SIZE_V0
-#define CLONE3_ARGS_SIZE_V0 64
-#endif
-
enum test_mode {
CLONE3_ARGS_NO_TEST,
CLONE3_ARGS_ALL_0,
@@ -38,13 +31,13 @@ enum test_mode {
static int call_clone3(uint64_t flags, size_t size, enum test_mode test_mode)
{
- struct clone_args args = {
+ struct __clone_args args = {
.flags = flags,
.exit_signal = SIGCHLD,
};
struct clone_args_extended {
- struct clone_args args;
+ struct __clone_args args;
__aligned_u64 excess_space[2];
} args_ext;
@@ -52,11 +45,11 @@ static int call_clone3(uint64_t flags, size_t size, enum test_mode test_mode)
int status;
memset(&args_ext, 0, sizeof(args_ext));
- if (size > sizeof(struct clone_args))
+ if (size > sizeof(struct __clone_args))
args_ext.excess_space[1] = 1;
if (size == 0)
- size = sizeof(struct clone_args);
+ size = sizeof(struct __clone_args);
switch (test_mode) {
case CLONE3_ARGS_ALL_0:
@@ -77,9 +70,9 @@ static int call_clone3(uint64_t flags, size_t size, enum test_mode test_mode)
break;
}
- memcpy(&args_ext.args, &args, sizeof(struct clone_args));
+ memcpy(&args_ext.args, &args, sizeof(struct __clone_args));
- pid = sys_clone3((struct clone_args *)&args_ext, size);
+ pid = sys_clone3((struct __clone_args *)&args_ext, size);
if (pid < 0) {
ksft_print_msg("%s - Failed to create new process\n",
strerror(errno));
@@ -144,14 +137,14 @@ int main(int argc, char *argv[])
else
ksft_test_result_skip("Skipping clone3() with CLONE_NEWPID\n");
- /* Do a clone3() with CLONE3_ARGS_SIZE_V0. */
- test_clone3(0, CLONE3_ARGS_SIZE_V0, 0, CLONE3_ARGS_NO_TEST);
+ /* Do a clone3() with CLONE_ARGS_SIZE_VER0. */
+ test_clone3(0, CLONE_ARGS_SIZE_VER0, 0, CLONE3_ARGS_NO_TEST);
- /* Do a clone3() with CLONE3_ARGS_SIZE_V0 - 8 */
- test_clone3(0, CLONE3_ARGS_SIZE_V0 - 8, -EINVAL, CLONE3_ARGS_NO_TEST);
+ /* Do a clone3() with CLONE_ARGS_SIZE_VER0 - 8 */
+ test_clone3(0, CLONE_ARGS_SIZE_VER0 - 8, -EINVAL, CLONE3_ARGS_NO_TEST);
/* Do a clone3() with sizeof(struct clone_args) + 8 */
- test_clone3(0, sizeof(struct clone_args) + 8, 0, CLONE3_ARGS_NO_TEST);
+ test_clone3(0, sizeof(struct __clone_args) + 8, 0, CLONE3_ARGS_NO_TEST);
/* Do a clone3() with exit_signal having highest 32 bits non-zero */
test_clone3(0, 0, -EINVAL, CLONE3_ARGS_INVAL_EXIT_SIGNAL_BIG);
@@ -165,31 +158,31 @@ int main(int argc, char *argv[])
/* Do a clone3() with NSIG < exit_signal < CSIG */
test_clone3(0, 0, -EINVAL, CLONE3_ARGS_INVAL_EXIT_SIGNAL_NSIG);
- test_clone3(0, sizeof(struct clone_args) + 8, 0, CLONE3_ARGS_ALL_0);
+ test_clone3(0, sizeof(struct __clone_args) + 8, 0, CLONE3_ARGS_ALL_0);
- test_clone3(0, sizeof(struct clone_args) + 16, -E2BIG,
+ test_clone3(0, sizeof(struct __clone_args) + 16, -E2BIG,
CLONE3_ARGS_ALL_0);
- test_clone3(0, sizeof(struct clone_args) * 2, -E2BIG,
+ test_clone3(0, sizeof(struct __clone_args) * 2, -E2BIG,
CLONE3_ARGS_ALL_0);
/* Do a clone3() with > page size */
test_clone3(0, getpagesize() + 8, -E2BIG, CLONE3_ARGS_NO_TEST);
- /* Do a clone3() with CLONE3_ARGS_SIZE_V0 in a new PID NS. */
+ /* Do a clone3() with CLONE_ARGS_SIZE_VER0 in a new PID NS. */
if (uid == 0)
- test_clone3(CLONE_NEWPID, CLONE3_ARGS_SIZE_V0, 0,
+ test_clone3(CLONE_NEWPID, CLONE_ARGS_SIZE_VER0, 0,
CLONE3_ARGS_NO_TEST);
else
ksft_test_result_skip("Skipping clone3() with CLONE_NEWPID\n");
- /* Do a clone3() with CLONE3_ARGS_SIZE_V0 - 8 in a new PID NS */
- test_clone3(CLONE_NEWPID, CLONE3_ARGS_SIZE_V0 - 8, -EINVAL,
+ /* Do a clone3() with CLONE_ARGS_SIZE_VER0 - 8 in a new PID NS */
+ test_clone3(CLONE_NEWPID, CLONE_ARGS_SIZE_VER0 - 8, -EINVAL,
CLONE3_ARGS_NO_TEST);
/* Do a clone3() with sizeof(struct clone_args) + 8 in a new PID NS */
if (uid == 0)
- test_clone3(CLONE_NEWPID, sizeof(struct clone_args) + 8, 0,
+ test_clone3(CLONE_NEWPID, sizeof(struct __clone_args) + 8, 0,
CLONE3_ARGS_NO_TEST);
else
ksft_test_result_skip("Skipping clone3() with CLONE_NEWPID\n");
diff --git a/tools/testing/selftests/clone3/clone3_cap_checkpoint_restore.c b/tools/testing/selftests/clone3/clone3_cap_checkpoint_restore.c
index 9562425aa0a9..55bd387ce7ec 100644
--- a/tools/testing/selftests/clone3/clone3_cap_checkpoint_restore.c
+++ b/tools/testing/selftests/clone3/clone3_cap_checkpoint_restore.c
@@ -44,13 +44,13 @@ static int call_clone3_set_tid(struct __test_metadata *_metadata,
int status;
pid_t pid = -1;
- struct clone_args args = {
+ struct __clone_args args = {
.exit_signal = SIGCHLD,
.set_tid = ptr_to_u64(set_tid),
.set_tid_size = set_tid_size,
};
- pid = sys_clone3(&args, sizeof(struct clone_args));
+ pid = sys_clone3(&args, sizeof(args));
if (pid < 0) {
TH_LOG("%s - Failed to create new process", strerror(errno));
return -errno;
diff --git a/tools/testing/selftests/clone3/clone3_clear_sighand.c b/tools/testing/selftests/clone3/clone3_clear_sighand.c
index db5fc9c5edcf..47a8c0fc3676 100644
--- a/tools/testing/selftests/clone3/clone3_clear_sighand.c
+++ b/tools/testing/selftests/clone3/clone3_clear_sighand.c
@@ -47,7 +47,7 @@ static void test_clone3_clear_sighand(void)
{
int ret;
pid_t pid;
- struct clone_args args = {};
+ struct __clone_args args = {};
struct sigaction act;
/*
diff --git a/tools/testing/selftests/clone3/clone3_selftests.h b/tools/testing/selftests/clone3/clone3_selftests.h
index 91c1a78ddb39..e81ffaaee02b 100644
--- a/tools/testing/selftests/clone3/clone3_selftests.h
+++ b/tools/testing/selftests/clone3/clone3_selftests.h
@@ -19,13 +19,11 @@
#define CLONE_INTO_CGROUP 0x200000000ULL /* Clone into a specific cgroup given the right permissions. */
#endif
-#ifndef CLONE_ARGS_SIZE_VER0
-#define CLONE_ARGS_SIZE_VER0 64
-#endif
-
#ifndef __NR_clone3
#define __NR_clone3 -1
-struct clone_args {
+#endif
+
+struct __clone_args {
__aligned_u64 flags;
__aligned_u64 pidfd;
__aligned_u64 child_tid;
@@ -34,15 +32,21 @@ struct clone_args {
__aligned_u64 stack;
__aligned_u64 stack_size;
__aligned_u64 tls;
-#define CLONE_ARGS_SIZE_VER1 80
+#ifndef CLONE_ARGS_SIZE_VER0
+#define CLONE_ARGS_SIZE_VER0 64 /* sizeof first published struct */
+#endif
__aligned_u64 set_tid;
__aligned_u64 set_tid_size;
-#define CLONE_ARGS_SIZE_VER2 88
+#ifndef CLONE_ARGS_SIZE_VER1
+#define CLONE_ARGS_SIZE_VER1 80 /* sizeof second published struct */
+#endif
__aligned_u64 cgroup;
+#ifndef CLONE_ARGS_SIZE_VER2
+#define CLONE_ARGS_SIZE_VER2 88 /* sizeof third published struct */
+#endif
};
-#endif /* __NR_clone3 */
-static pid_t sys_clone3(struct clone_args *args, size_t size)
+static pid_t sys_clone3(struct __clone_args *args, size_t size)
{
fflush(stdout);
fflush(stderr);
@@ -52,7 +56,7 @@ static pid_t sys_clone3(struct clone_args *args, size_t size)
static inline void test_clone3_supported(void)
{
pid_t pid;
- struct clone_args args = {};
+ struct __clone_args args = {};
if (__NR_clone3 < 0)
ksft_exit_skip("clone3() syscall is not supported\n");
diff --git a/tools/testing/selftests/clone3/clone3_set_tid.c b/tools/testing/selftests/clone3/clone3_set_tid.c
index 5831c1082d6d..0229e9ebb995 100644
--- a/tools/testing/selftests/clone3/clone3_set_tid.c
+++ b/tools/testing/selftests/clone3/clone3_set_tid.c
@@ -46,14 +46,14 @@ static int call_clone3_set_tid(pid_t *set_tid,
int status;
pid_t pid = -1;
- struct clone_args args = {
+ struct __clone_args args = {
.flags = flags,
.exit_signal = SIGCHLD,
.set_tid = ptr_to_u64(set_tid),
.set_tid_size = set_tid_size,
};
- pid = sys_clone3(&args, sizeof(struct clone_args));
+ pid = sys_clone3(&args, sizeof(args));
if (pid < 0) {
ksft_print_msg("%s - Failed to create new process\n",
strerror(errno));
diff --git a/tools/testing/selftests/exec/.gitignore b/tools/testing/selftests/exec/.gitignore
index 94b02a18f230..344a99c6da1b 100644
--- a/tools/testing/selftests/exec/.gitignore
+++ b/tools/testing/selftests/exec/.gitignore
@@ -10,3 +10,4 @@ execveat.denatured
/recursion-depth
xxxxxxxx*
pipe
+S_I*.test
diff --git a/tools/testing/selftests/exec/Makefile b/tools/testing/selftests/exec/Makefile
index 4453b8f8def3..0a13b110c1e6 100644
--- a/tools/testing/selftests/exec/Makefile
+++ b/tools/testing/selftests/exec/Makefile
@@ -3,7 +3,7 @@ CFLAGS = -Wall
CFLAGS += -Wno-nonnull
CFLAGS += -D_GNU_SOURCE
-TEST_PROGS := binfmt_script
+TEST_PROGS := binfmt_script non-regular
TEST_GEN_PROGS := execveat
TEST_GEN_FILES := execveat.symlink execveat.denatured script subdir pipe
# Makefile is a run-time dependency, since it's accessed by the execveat test
@@ -11,7 +11,8 @@ TEST_FILES := Makefile
TEST_GEN_PROGS += recursion-depth
-EXTRA_CLEAN := $(OUTPUT)/subdir.moved $(OUTPUT)/execveat.moved $(OUTPUT)/xxxxx*
+EXTRA_CLEAN := $(OUTPUT)/subdir.moved $(OUTPUT)/execveat.moved $(OUTPUT)/xxxxx* \
+ $(OUTPUT)/S_I*.test
include ../lib.mk
diff --git a/tools/testing/selftests/exec/non-regular.c b/tools/testing/selftests/exec/non-regular.c
new file mode 100644
index 000000000000..cd3a34aca93e
--- /dev/null
+++ b/tools/testing/selftests/exec/non-regular.c
@@ -0,0 +1,196 @@
+// SPDX-License-Identifier: GPL-2.0+
+#include <errno.h>
+#include <fcntl.h>
+#include <stdio.h>
+#include <string.h>
+#include <unistd.h>
+#include <sys/socket.h>
+#include <sys/stat.h>
+#include <sys/sysmacros.h>
+#include <sys/types.h>
+
+#include "../kselftest_harness.h"
+
+/* Remove a file, ignoring the result if it didn't exist. */
+void rm(struct __test_metadata *_metadata, const char *pathname,
+ int is_dir)
+{
+ int rc;
+
+ if (is_dir)
+ rc = rmdir(pathname);
+ else
+ rc = unlink(pathname);
+
+ if (rc < 0) {
+ ASSERT_EQ(errno, ENOENT) {
+ TH_LOG("Not ENOENT: %s", pathname);
+ }
+ } else {
+ ASSERT_EQ(rc, 0) {
+ TH_LOG("Failed to remove: %s", pathname);
+ }
+ }
+}
+
+FIXTURE(file) {
+ char *pathname;
+ int is_dir;
+};
+
+FIXTURE_VARIANT(file)
+{
+ const char *name;
+ int expected;
+ int is_dir;
+ void (*setup)(struct __test_metadata *_metadata,
+ FIXTURE_DATA(file) *self,
+ const FIXTURE_VARIANT(file) *variant);
+ int major, minor, mode; /* for mknod() */
+};
+
+void setup_link(struct __test_metadata *_metadata,
+ FIXTURE_DATA(file) *self,
+ const FIXTURE_VARIANT(file) *variant)
+{
+ const char * const paths[] = {
+ "/bin/true",
+ "/usr/bin/true",
+ };
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(paths); i++) {
+ if (access(paths[i], X_OK) == 0) {
+ ASSERT_EQ(symlink(paths[i], self->pathname), 0);
+ return;
+ }
+ }
+ ASSERT_EQ(1, 0) {
+ TH_LOG("Could not find viable 'true' binary");
+ }
+}
+
+FIXTURE_VARIANT_ADD(file, S_IFLNK)
+{
+ .name = "S_IFLNK",
+ .expected = ELOOP,
+ .setup = setup_link,
+};
+
+void setup_dir(struct __test_metadata *_metadata,
+ FIXTURE_DATA(file) *self,
+ const FIXTURE_VARIANT(file) *variant)
+{
+ ASSERT_EQ(mkdir(self->pathname, 0755), 0);
+}
+
+FIXTURE_VARIANT_ADD(file, S_IFDIR)
+{
+ .name = "S_IFDIR",
+ .is_dir = 1,
+ .expected = EACCES,
+ .setup = setup_dir,
+};
+
+void setup_node(struct __test_metadata *_metadata,
+ FIXTURE_DATA(file) *self,
+ const FIXTURE_VARIANT(file) *variant)
+{
+ dev_t dev;
+ int rc;
+
+ dev = makedev(variant->major, variant->minor);
+ rc = mknod(self->pathname, 0755 | variant->mode, dev);
+ ASSERT_EQ(rc, 0) {
+ if (errno == EPERM)
+ SKIP(return, "Please run as root; cannot mknod(%s)",
+ variant->name);
+ }
+}
+
+FIXTURE_VARIANT_ADD(file, S_IFBLK)
+{
+ .name = "S_IFBLK",
+ .expected = EACCES,
+ .setup = setup_node,
+ /* /dev/loop0 */
+ .major = 7,
+ .minor = 0,
+ .mode = S_IFBLK,
+};
+
+FIXTURE_VARIANT_ADD(file, S_IFCHR)
+{
+ .name = "S_IFCHR",
+ .expected = EACCES,
+ .setup = setup_node,
+ /* /dev/zero */
+ .major = 1,
+ .minor = 5,
+ .mode = S_IFCHR,
+};
+
+void setup_fifo(struct __test_metadata *_metadata,
+ FIXTURE_DATA(file) *self,
+ const FIXTURE_VARIANT(file) *variant)
+{
+ ASSERT_EQ(mkfifo(self->pathname, 0755), 0);
+}
+
+FIXTURE_VARIANT_ADD(file, S_IFIFO)
+{
+ .name = "S_IFIFO",
+ .expected = EACCES,
+ .setup = setup_fifo,
+};
+
+FIXTURE_SETUP(file)
+{
+ ASSERT_GT(asprintf(&self->pathname, "%s.test", variant->name), 6);
+ self->is_dir = variant->is_dir;
+
+ rm(_metadata, self->pathname, variant->is_dir);
+ variant->setup(_metadata, self, variant);
+}
+
+FIXTURE_TEARDOWN(file)
+{
+ rm(_metadata, self->pathname, self->is_dir);
+}
+
+TEST_F(file, exec_errno)
+{
+ char * const argv[2] = { (char * const)self->pathname, NULL };
+
+ EXPECT_LT(execv(argv[0], argv), 0);
+ EXPECT_EQ(errno, variant->expected);
+}
+
+/* S_IFSOCK */
+FIXTURE(sock)
+{
+ int fd;
+};
+
+FIXTURE_SETUP(sock)
+{
+ self->fd = socket(AF_INET, SOCK_STREAM, 0);
+ ASSERT_GE(self->fd, 0);
+}
+
+FIXTURE_TEARDOWN(sock)
+{
+ if (self->fd >= 0)
+ ASSERT_EQ(close(self->fd), 0);
+}
+
+TEST_F(sock, exec_errno)
+{
+ char * const argv[2] = { " magic socket ", NULL };
+ char * const envp[1] = { NULL };
+
+ EXPECT_LT(fexecve(self->fd, argv, envp), 0);
+ EXPECT_EQ(errno, EACCES);
+}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/firmware/fw_filesystem.sh b/tools/testing/selftests/firmware/fw_filesystem.sh
index fcc281373b4d..c2a2a100114b 100755
--- a/tools/testing/selftests/firmware/fw_filesystem.sh
+++ b/tools/testing/selftests/firmware/fw_filesystem.sh
@@ -149,6 +149,26 @@ config_unset_into_buf()
echo 0 > $DIR/config_into_buf
}
+config_set_buf_size()
+{
+ echo $1 > $DIR/config_buf_size
+}
+
+config_set_file_offset()
+{
+ echo $1 > $DIR/config_file_offset
+}
+
+config_set_partial()
+{
+ echo 1 > $DIR/config_partial
+}
+
+config_unset_partial()
+{
+ echo 0 > $DIR/config_partial
+}
+
config_set_sync_direct()
{
echo 1 > $DIR/config_sync_direct
@@ -207,6 +227,35 @@ read_firmwares()
done
}
+read_partial_firmwares()
+{
+ if [ "$(cat $DIR/config_into_buf)" == "1" ]; then
+ fwfile="${FW_INTO_BUF}"
+ else
+ fwfile="${FW}"
+ fi
+
+ if [ "$1" = "xzonly" ]; then
+ fwfile="${fwfile}-orig"
+ fi
+
+ # Strip fwfile down to match partial offset and length
+ partial_data="$(cat $fwfile)"
+ partial_data="${partial_data:$2:$3}"
+
+ for i in $(seq 0 3); do
+ config_set_read_fw_idx $i
+
+ read_firmware="$(cat $DIR/read_firmware)"
+
+ # Verify the contents are what we expect.
+ if [ $read_firmware != $partial_data ]; then
+ echo "request #$i: partial firmware was not loaded" >&2
+ exit 1
+ fi
+ done
+}
+
read_firmwares_expect_nofile()
{
for i in $(seq 0 3); do
@@ -242,6 +291,21 @@ test_batched_request_firmware_into_buf_nofile()
echo "OK"
}
+test_request_partial_firmware_into_buf_nofile()
+{
+ echo -n "Test request_partial_firmware_into_buf() off=$1 size=$2 nofile: "
+ config_reset
+ config_set_name nope-test-firmware.bin
+ config_set_into_buf
+ config_set_partial
+ config_set_buf_size $2
+ config_set_file_offset $1
+ config_trigger_sync
+ read_firmwares_expect_nofile
+ release_all_firmware
+ echo "OK"
+}
+
test_batched_request_firmware_direct_nofile()
{
echo -n "Batched request_firmware_direct() nofile try #$1: "
@@ -356,6 +420,21 @@ test_request_firmware_nowait_custom()
echo "OK"
}
+test_request_partial_firmware_into_buf()
+{
+ echo -n "Test request_partial_firmware_into_buf() off=$1 size=$2: "
+ config_reset
+ config_set_name $TEST_FIRMWARE_INTO_BUF_FILENAME
+ config_set_into_buf
+ config_set_partial
+ config_set_buf_size $2
+ config_set_file_offset $1
+ config_trigger_sync
+ read_partial_firmwares normal $1 $2
+ release_all_firmware
+ echo "OK"
+}
+
# Only continue if batched request triggers are present on the
# test-firmware driver
test_config_present
@@ -383,6 +462,12 @@ for i in $(seq 1 5); do
test_request_firmware_nowait_custom $i normal
done
+# Partial loads cannot use fallback, so do not repeat tests.
+test_request_partial_firmware_into_buf 0 10
+test_request_partial_firmware_into_buf 0 5
+test_request_partial_firmware_into_buf 1 6
+test_request_partial_firmware_into_buf 2 10
+
# Test for file not found, errors are expected, the failure would be
# a hung task, which would require a hard reset.
echo
@@ -407,6 +492,12 @@ for i in $(seq 1 5); do
test_request_firmware_nowait_custom_nofile $i
done
+# Partial loads cannot use fallback, so do not repeat tests.
+test_request_partial_firmware_into_buf_nofile 0 10
+test_request_partial_firmware_into_buf_nofile 0 5
+test_request_partial_firmware_into_buf_nofile 1 6
+test_request_partial_firmware_into_buf_nofile 2 10
+
test "$HAS_FW_LOADER_COMPRESS" != "yes" && exit 0
# test with both files present
diff --git a/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_kprobe.tc b/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_kprobe.tc
index 68550f97d3c3..3bcd4c3624ee 100644
--- a/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_kprobe.tc
+++ b/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_kprobe.tc
@@ -6,7 +6,7 @@
echo 0 > events/enable
echo > dynamic_events
-PLACE=_do_fork
+PLACE=kernel_clone
echo "p:myevent1 $PLACE" >> dynamic_events
echo "r:myevent2 $PLACE" >> dynamic_events
diff --git a/tools/testing/selftests/ftrace/test.d/dynevent/clear_select_events.tc b/tools/testing/selftests/ftrace/test.d/dynevent/clear_select_events.tc
index c969be9eb7de..438961971b7e 100644
--- a/tools/testing/selftests/ftrace/test.d/dynevent/clear_select_events.tc
+++ b/tools/testing/selftests/ftrace/test.d/dynevent/clear_select_events.tc
@@ -6,7 +6,7 @@
echo 0 > events/enable
echo > dynamic_events
-PLACE=_do_fork
+PLACE=kernel_clone
setup_events() {
echo "p:myevent1 $PLACE" >> dynamic_events
diff --git a/tools/testing/selftests/ftrace/test.d/dynevent/generic_clear_event.tc b/tools/testing/selftests/ftrace/test.d/dynevent/generic_clear_event.tc
index 16d543eaac88..a8603bd23e0d 100644
--- a/tools/testing/selftests/ftrace/test.d/dynevent/generic_clear_event.tc
+++ b/tools/testing/selftests/ftrace/test.d/dynevent/generic_clear_event.tc
@@ -6,7 +6,7 @@
echo 0 > events/enable
echo > dynamic_events
-PLACE=_do_fork
+PLACE=kernel_clone
setup_events() {
echo "p:myevent1 $PLACE" >> dynamic_events
diff --git a/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-stacktrace.tc b/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-stacktrace.tc
index 0f41e441c203..98305d76bd04 100644
--- a/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-stacktrace.tc
+++ b/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-stacktrace.tc
@@ -4,9 +4,9 @@
# requires: set_ftrace_filter
# flags: instance
-echo _do_fork:stacktrace >> set_ftrace_filter
+echo kernel_clone:stacktrace >> set_ftrace_filter
-grep -q "_do_fork:stacktrace:unlimited" set_ftrace_filter
+grep -q "kernel_clone:stacktrace:unlimited" set_ftrace_filter
(echo "forked"; sleep 1)
diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/add_and_remove.tc b/tools/testing/selftests/ftrace/test.d/kprobe/add_and_remove.tc
index eba858c21815..9737cd0578a7 100644
--- a/tools/testing/selftests/ftrace/test.d/kprobe/add_and_remove.tc
+++ b/tools/testing/selftests/ftrace/test.d/kprobe/add_and_remove.tc
@@ -3,7 +3,7 @@
# description: Kprobe dynamic event - adding and removing
# requires: kprobe_events
-echo p:myevent _do_fork > kprobe_events
+echo p:myevent kernel_clone > kprobe_events
grep myevent kprobe_events
test -d events/kprobes/myevent
echo > kprobe_events
diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/busy_check.tc b/tools/testing/selftests/ftrace/test.d/kprobe/busy_check.tc
index d10bf4f05bc8..f9a40af76888 100644
--- a/tools/testing/selftests/ftrace/test.d/kprobe/busy_check.tc
+++ b/tools/testing/selftests/ftrace/test.d/kprobe/busy_check.tc
@@ -3,7 +3,7 @@
# description: Kprobe dynamic event - busy event check
# requires: kprobe_events
-echo p:myevent _do_fork > kprobe_events
+echo p:myevent kernel_clone > kprobe_events
test -d events/kprobes/myevent
echo 1 > events/kprobes/myevent/enable
echo > kprobe_events && exit_fail # this must fail
diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args.tc
index 61f2ac441aec..eb543d3cfe5f 100644
--- a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args.tc
+++ b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args.tc
@@ -3,13 +3,13 @@
# description: Kprobe dynamic event with arguments
# requires: kprobe_events
-echo 'p:testprobe _do_fork $stack $stack0 +0($stack)' > kprobe_events
+echo 'p:testprobe kernel_clone $stack $stack0 +0($stack)' > kprobe_events
grep testprobe kprobe_events | grep -q 'arg1=\$stack arg2=\$stack0 arg3=+0(\$stack)'
test -d events/kprobes/testprobe
echo 1 > events/kprobes/testprobe/enable
( echo "forked")
-grep testprobe trace | grep '_do_fork' | \
+grep testprobe trace | grep 'kernel_clone' | \
grep -q 'arg1=0x[[:xdigit:]]* arg2=0x[[:xdigit:]]* arg3=0x[[:xdigit:]]*$'
echo 0 > events/kprobes/testprobe/enable
diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_comm.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_comm.tc
index 05aaeed6987f..4e5b63be51c9 100644
--- a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_comm.tc
+++ b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_comm.tc
@@ -5,7 +5,7 @@
grep -A1 "fetcharg:" README | grep -q "\$comm" || exit_unsupported # this is too old
-echo 'p:testprobe _do_fork comm=$comm ' > kprobe_events
+echo 'p:testprobe kernel_clone comm=$comm ' > kprobe_events
grep testprobe kprobe_events | grep -q 'comm=$comm'
test -d events/kprobes/testprobe
diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_string.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_string.tc
index b5fa05443b39..a1d70588ab21 100644
--- a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_string.tc
+++ b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_string.tc
@@ -30,13 +30,13 @@ esac
: "Test get argument (1)"
echo "p:testprobe tracefs_create_dir arg1=+0(${ARG1}):string" > kprobe_events
echo 1 > events/kprobes/testprobe/enable
-echo "p:test _do_fork" >> kprobe_events
+echo "p:test kernel_clone" >> kprobe_events
grep -qe "testprobe.* arg1=\"test\"" trace
echo 0 > events/kprobes/testprobe/enable
: "Test get argument (2)"
echo "p:testprobe tracefs_create_dir arg1=+0(${ARG1}):string arg2=+0(${ARG1}):string" > kprobe_events
echo 1 > events/kprobes/testprobe/enable
-echo "p:test _do_fork" >> kprobe_events
+echo "p:test kernel_clone" >> kprobe_events
grep -qe "testprobe.* arg1=\"test\" arg2=\"test\"" trace
diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_symbol.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_symbol.tc
index b8c75a3d003c..bd25dd0ba0d0 100644
--- a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_symbol.tc
+++ b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_symbol.tc
@@ -14,12 +14,12 @@ elif ! grep "$SYMBOL\$" /proc/kallsyms; then
fi
: "Test get basic types symbol argument"
-echo "p:testprobe_u _do_fork arg1=@linux_proc_banner:u64 arg2=@linux_proc_banner:u32 arg3=@linux_proc_banner:u16 arg4=@linux_proc_banner:u8" > kprobe_events
-echo "p:testprobe_s _do_fork arg1=@linux_proc_banner:s64 arg2=@linux_proc_banner:s32 arg3=@linux_proc_banner:s16 arg4=@linux_proc_banner:s8" >> kprobe_events
+echo "p:testprobe_u kernel_clone arg1=@linux_proc_banner:u64 arg2=@linux_proc_banner:u32 arg3=@linux_proc_banner:u16 arg4=@linux_proc_banner:u8" > kprobe_events
+echo "p:testprobe_s kernel_clone arg1=@linux_proc_banner:s64 arg2=@linux_proc_banner:s32 arg3=@linux_proc_banner:s16 arg4=@linux_proc_banner:s8" >> kprobe_events
if grep -q "x8/16/32/64" README; then
- echo "p:testprobe_x _do_fork arg1=@linux_proc_banner:x64 arg2=@linux_proc_banner:x32 arg3=@linux_proc_banner:x16 arg4=@linux_proc_banner:x8" >> kprobe_events
+ echo "p:testprobe_x kernel_clone arg1=@linux_proc_banner:x64 arg2=@linux_proc_banner:x32 arg3=@linux_proc_banner:x16 arg4=@linux_proc_banner:x8" >> kprobe_events
fi
-echo "p:testprobe_bf _do_fork arg1=@linux_proc_banner:b8@4/32" >> kprobe_events
+echo "p:testprobe_bf kernel_clone arg1=@linux_proc_banner:b8@4/32" >> kprobe_events
echo 1 > events/kprobes/enable
(echo "forked")
echo 0 > events/kprobes/enable
@@ -27,7 +27,7 @@ grep "testprobe_[usx]:.* arg1=.* arg2=.* arg3=.* arg4=.*" trace
grep "testprobe_bf:.* arg1=.*" trace
: "Test get string symbol argument"
-echo "p:testprobe_str _do_fork arg1=@linux_proc_banner:string" > kprobe_events
+echo "p:testprobe_str kernel_clone arg1=@linux_proc_banner:string" > kprobe_events
echo 1 > events/kprobes/enable
(echo "forked")
echo 0 > events/kprobes/enable
diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_type.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_type.tc
index 0610e0b5587c..91fcce1c241c 100644
--- a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_type.tc
+++ b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_type.tc
@@ -4,7 +4,7 @@
# requires: kprobe_events "x8/16/32/64":README
gen_event() { # Bitsize
- echo "p:testprobe _do_fork \$stack0:s$1 \$stack0:u$1 \$stack0:x$1 \$stack0:b4@4/$1"
+ echo "p:testprobe kernel_clone \$stack0:s$1 \$stack0:u$1 \$stack0:x$1 \$stack0:b4@4/$1"
}
check_types() { # s-type u-type x-type bf-type width
diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_ftrace.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_ftrace.tc
index 81d8b58c03bc..0d179094191f 100644
--- a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_ftrace.tc
+++ b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_ftrace.tc
@@ -5,29 +5,29 @@
# prepare
echo nop > current_tracer
-echo _do_fork > set_ftrace_filter
-echo 'p:testprobe _do_fork' > kprobe_events
+echo kernel_clone > set_ftrace_filter
+echo 'p:testprobe kernel_clone' > kprobe_events
# kprobe on / ftrace off
echo 1 > events/kprobes/testprobe/enable
echo > trace
( echo "forked")
grep testprobe trace
-! grep '_do_fork <-' trace
+! grep 'kernel_clone <-' trace
# kprobe on / ftrace on
echo function > current_tracer
echo > trace
( echo "forked")
grep testprobe trace
-grep '_do_fork <-' trace
+grep 'kernel_clone <-' trace
# kprobe off / ftrace on
echo 0 > events/kprobes/testprobe/enable
echo > trace
( echo "forked")
! grep testprobe trace
-grep '_do_fork <-' trace
+grep 'kernel_clone <-' trace
# kprobe on / ftrace on
echo 1 > events/kprobes/testprobe/enable
@@ -35,11 +35,11 @@ echo function > current_tracer
echo > trace
( echo "forked")
grep testprobe trace
-grep '_do_fork <-' trace
+grep 'kernel_clone <-' trace
# kprobe on / ftrace off
echo nop > current_tracer
echo > trace
( echo "forked")
grep testprobe trace
-! grep '_do_fork <-' trace
+! grep 'kernel_clone <-' trace
diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_multiprobe.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_multiprobe.tc
index 366b7e1b6718..45d90b6c763d 100644
--- a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_multiprobe.tc
+++ b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_multiprobe.tc
@@ -4,7 +4,7 @@
# requires: kprobe_events "Create/append/":README
# Choose 2 symbols for target
-SYM1=_do_fork
+SYM1=kernel_clone
SYM2=do_exit
EVENT_NAME=kprobes/testevent
diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_syntax_errors.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_syntax_errors.tc
index b4d834675e59..c02ea50d63ea 100644
--- a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_syntax_errors.tc
+++ b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_syntax_errors.tc
@@ -86,15 +86,15 @@ esac
# multiprobe errors
if grep -q "Create/append/" README && grep -q "imm-value" README; then
-echo 'p:kprobes/testevent _do_fork' > kprobe_events
+echo 'p:kprobes/testevent kernel_clone' > kprobe_events
check_error '^r:kprobes/testevent do_exit' # DIFF_PROBE_TYPE
# Explicitly use printf "%s" to not interpret \1
-printf "%s" 'p:kprobes/testevent _do_fork abcd=\1' > kprobe_events
-check_error 'p:kprobes/testevent _do_fork ^bcd=\1' # DIFF_ARG_TYPE
-check_error 'p:kprobes/testevent _do_fork ^abcd=\1:u8' # DIFF_ARG_TYPE
-check_error 'p:kprobes/testevent _do_fork ^abcd=\"foo"' # DIFF_ARG_TYPE
-check_error '^p:kprobes/testevent _do_fork abcd=\1' # SAME_PROBE
+printf "%s" 'p:kprobes/testevent kernel_clone abcd=\1' > kprobe_events
+check_error 'p:kprobes/testevent kernel_clone ^bcd=\1' # DIFF_ARG_TYPE
+check_error 'p:kprobes/testevent kernel_clone ^abcd=\1:u8' # DIFF_ARG_TYPE
+check_error 'p:kprobes/testevent kernel_clone ^abcd=\"foo"' # DIFF_ARG_TYPE
+check_error '^p:kprobes/testevent kernel_clone abcd=\1' # SAME_PROBE
fi
exit 0
diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kretprobe_args.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kretprobe_args.tc
index 523fde6d1aa5..7ae492c204a4 100644
--- a/tools/testing/selftests/ftrace/test.d/kprobe/kretprobe_args.tc
+++ b/tools/testing/selftests/ftrace/test.d/kprobe/kretprobe_args.tc
@@ -4,14 +4,14 @@
# requires: kprobe_events
# Add new kretprobe event
-echo 'r:testprobe2 _do_fork $retval' > kprobe_events
+echo 'r:testprobe2 kernel_clone $retval' > kprobe_events
grep testprobe2 kprobe_events | grep -q 'arg1=\$retval'
test -d events/kprobes/testprobe2
echo 1 > events/kprobes/testprobe2/enable
( echo "forked")
-cat trace | grep testprobe2 | grep -q '<- _do_fork'
+cat trace | grep testprobe2 | grep -q '<- kernel_clone'
echo 0 > events/kprobes/testprobe2/enable
echo '-:testprobe2' >> kprobe_events
diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/profile.tc b/tools/testing/selftests/ftrace/test.d/kprobe/profile.tc
index ff6c44adc8a0..c4093fc1a773 100644
--- a/tools/testing/selftests/ftrace/test.d/kprobe/profile.tc
+++ b/tools/testing/selftests/ftrace/test.d/kprobe/profile.tc
@@ -4,7 +4,7 @@
# requires: kprobe_events
! grep -q 'myevent' kprobe_profile
-echo p:myevent _do_fork > kprobe_events
+echo p:myevent kernel_clone > kprobe_events
grep -q 'myevent[[:space:]]*0[[:space:]]*0$' kprobe_profile
echo 1 > events/kprobes/myevent/enable
( echo "forked" )
diff --git a/tools/testing/selftests/kmod/kmod.sh b/tools/testing/selftests/kmod/kmod.sh
index ea2147248ebe..afd42387e8b2 100755
--- a/tools/testing/selftests/kmod/kmod.sh
+++ b/tools/testing/selftests/kmod/kmod.sh
@@ -343,7 +343,7 @@ kmod_test_0001_driver()
kmod_defaults_driver
config_num_threads 1
- printf '\000' >"$DIR"/config_test_driver
+ printf $NAME >"$DIR"/config_test_driver
config_trigger ${FUNCNAME[0]}
config_expect_result ${FUNCNAME[0]} MODULE_NOT_FOUND
}
@@ -354,7 +354,7 @@ kmod_test_0001_fs()
kmod_defaults_fs
config_num_threads 1
- printf '\000' >"$DIR"/config_test_fs
+ printf $NAME >"$DIR"/config_test_fs
config_trigger ${FUNCNAME[0]}
config_expect_result ${FUNCNAME[0]} -EINVAL
}
diff --git a/tools/testing/selftests/kselftest_harness.h b/tools/testing/selftests/kselftest_harness.h
index 4f78e4805633..f19804df244c 100644
--- a/tools/testing/selftests/kselftest_harness.h
+++ b/tools/testing/selftests/kselftest_harness.h
@@ -971,6 +971,11 @@ void __run_test(struct __fixture_metadata *f,
ksft_print_msg(" RUN %s%s%s.%s ...\n",
f->name, variant->name[0] ? "." : "", variant->name, t->name);
+
+ /* Make sure output buffers are flushed before fork */
+ fflush(stdout);
+ fflush(stderr);
+
t->pid = fork();
if (t->pid < 0) {
ksft_print_msg("ERROR SPAWNING TEST CHILD\n");
diff --git a/tools/testing/selftests/kvm/x86_64/debug_regs.c b/tools/testing/selftests/kvm/x86_64/debug_regs.c
index 8162c58a1234..2fc6b3af81a1 100644
--- a/tools/testing/selftests/kvm/x86_64/debug_regs.c
+++ b/tools/testing/selftests/kvm/x86_64/debug_regs.c
@@ -40,11 +40,11 @@ static void guest_code(void)
/* Single step test, covers 2 basic instructions and 2 emulated */
asm volatile("ss_start: "
- "xor %%rax,%%rax\n\t"
+ "xor %%eax,%%eax\n\t"
"cpuid\n\t"
"movl $0x1a0,%%ecx\n\t"
"rdmsr\n\t"
- : : : "rax", "ecx");
+ : : : "eax", "ebx", "ecx", "edx");
/* DR6.BD test */
asm volatile("bd_start: mov %%dr0, %%rax" : : : "rax");
@@ -73,7 +73,7 @@ int main(void)
int i;
/* Instruction lengths starting at ss_start */
int ss_size[4] = {
- 3, /* xor */
+ 2, /* xor */
2, /* cpuid */
5, /* mov */
2, /* rdmsr */
diff --git a/tools/testing/selftests/lkdtm/run.sh b/tools/testing/selftests/lkdtm/run.sh
index 8383eb89d88a..bb7a1775307b 100755
--- a/tools/testing/selftests/lkdtm/run.sh
+++ b/tools/testing/selftests/lkdtm/run.sh
@@ -82,7 +82,7 @@ dmesg > "$DMESG"
($SHELL -c 'cat <(echo '"$test"') >'"$TRIGGER" 2>/dev/null) || true
# Record and dump the results
-dmesg | diff --changed-group-format='%>' --unchanged-group-format='' "$DMESG" - > "$LOG" || true
+dmesg | comm --nocheck-order -13 "$DMESG" - > "$LOG" || true
cat "$LOG"
# Check for expected output
diff --git a/tools/testing/selftests/mincore/.gitignore b/tools/testing/selftests/mincore/.gitignore
new file mode 100644
index 000000000000..15c4dfc2df00
--- /dev/null
+++ b/tools/testing/selftests/mincore/.gitignore
@@ -0,0 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0+
+mincore_selftest
diff --git a/tools/testing/selftests/mincore/Makefile b/tools/testing/selftests/mincore/Makefile
new file mode 100644
index 000000000000..38c7db1e8926
--- /dev/null
+++ b/tools/testing/selftests/mincore/Makefile
@@ -0,0 +1,6 @@
+# SPDX-License-Identifier: GPL-2.0+
+
+CFLAGS += -Wall
+
+TEST_GEN_PROGS := mincore_selftest
+include ../lib.mk
diff --git a/tools/testing/selftests/mincore/mincore_selftest.c b/tools/testing/selftests/mincore/mincore_selftest.c
new file mode 100644
index 000000000000..5a1e85ff5d32
--- /dev/null
+++ b/tools/testing/selftests/mincore/mincore_selftest.c
@@ -0,0 +1,361 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * kselftest suite for mincore().
+ *
+ * Copyright (C) 2020 Collabora, Ltd.
+ */
+
+#define _GNU_SOURCE
+
+#include <stdio.h>
+#include <errno.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <sys/mman.h>
+#include <string.h>
+#include <fcntl.h>
+#include <string.h>
+
+#include "../kselftest.h"
+#include "../kselftest_harness.h"
+
+/* Default test file size: 4MB */
+#define MB (1UL << 20)
+#define FILE_SIZE (4 * MB)
+
+
+/*
+ * Tests the user interface. This test triggers most of the documented
+ * error conditions in mincore().
+ */
+TEST(basic_interface)
+{
+ int retval;
+ int page_size;
+ unsigned char vec[1];
+ char *addr;
+
+ page_size = sysconf(_SC_PAGESIZE);
+
+ /* Query a 0 byte sized range */
+ retval = mincore(0, 0, vec);
+ EXPECT_EQ(0, retval);
+
+ /* Addresses in the specified range are invalid or unmapped */
+ errno = 0;
+ retval = mincore(NULL, page_size, vec);
+ EXPECT_EQ(-1, retval);
+ EXPECT_EQ(ENOMEM, errno);
+
+ errno = 0;
+ addr = mmap(NULL, page_size, PROT_READ | PROT_WRITE,
+ MAP_SHARED | MAP_ANONYMOUS, -1, 0);
+ ASSERT_NE(MAP_FAILED, addr) {
+ TH_LOG("mmap error: %s", strerror(errno));
+ }
+
+ /* <addr> argument is not page-aligned */
+ errno = 0;
+ retval = mincore(addr + 1, page_size, vec);
+ EXPECT_EQ(-1, retval);
+ EXPECT_EQ(EINVAL, errno);
+
+ /* <length> argument is too large */
+ errno = 0;
+ retval = mincore(addr, -1, vec);
+ EXPECT_EQ(-1, retval);
+ EXPECT_EQ(ENOMEM, errno);
+
+ /* <vec> argument points to an illegal address */
+ errno = 0;
+ retval = mincore(addr, page_size, NULL);
+ EXPECT_EQ(-1, retval);
+ EXPECT_EQ(EFAULT, errno);
+ munmap(addr, page_size);
+}
+
+
+/*
+ * Test mincore() behavior on a private anonymous page mapping.
+ * Check that the page is not loaded into memory right after the mapping
+ * but after accessing it (on-demand allocation).
+ * Then free the page and check that it's not memory-resident.
+ */
+TEST(check_anonymous_locked_pages)
+{
+ unsigned char vec[1];
+ char *addr;
+ int retval;
+ int page_size;
+
+ page_size = sysconf(_SC_PAGESIZE);
+
+ /* Map one page and check it's not memory-resident */
+ errno = 0;
+ addr = mmap(NULL, page_size, PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+ ASSERT_NE(MAP_FAILED, addr) {
+ TH_LOG("mmap error: %s", strerror(errno));
+ }
+ retval = mincore(addr, page_size, vec);
+ ASSERT_EQ(0, retval);
+ ASSERT_EQ(0, vec[0]) {
+ TH_LOG("Page found in memory before use");
+ }
+
+ /* Touch the page and check again. It should now be in memory */
+ addr[0] = 1;
+ mlock(addr, page_size);
+ retval = mincore(addr, page_size, vec);
+ ASSERT_EQ(0, retval);
+ ASSERT_EQ(1, vec[0]) {
+ TH_LOG("Page not found in memory after use");
+ }
+
+ /*
+ * It shouldn't be memory-resident after unlocking it and
+ * marking it as unneeded.
+ */
+ munlock(addr, page_size);
+ madvise(addr, page_size, MADV_DONTNEED);
+ retval = mincore(addr, page_size, vec);
+ ASSERT_EQ(0, retval);
+ ASSERT_EQ(0, vec[0]) {
+ TH_LOG("Page in memory after being zapped");
+ }
+ munmap(addr, page_size);
+}
+
+
+/*
+ * Check mincore() behavior on huge pages.
+ * This test will be skipped if the mapping fails (ie. if there are no
+ * huge pages available).
+ *
+ * Make sure the system has at least one free huge page, check
+ * "HugePages_Free" in /proc/meminfo.
+ * Increment /sys/kernel/mm/hugepages/hugepages-2048kB/nr_hugepages if
+ * needed.
+ */
+TEST(check_huge_pages)
+{
+ unsigned char vec[1];
+ char *addr;
+ int retval;
+ int page_size;
+
+ page_size = sysconf(_SC_PAGESIZE);
+
+ errno = 0;
+ addr = mmap(NULL, page_size, PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB,
+ -1, 0);
+ if (addr == MAP_FAILED) {
+ if (errno == ENOMEM)
+ SKIP(return, "No huge pages available.");
+ else
+ TH_LOG("mmap error: %s", strerror(errno));
+ }
+ retval = mincore(addr, page_size, vec);
+ ASSERT_EQ(0, retval);
+ ASSERT_EQ(0, vec[0]) {
+ TH_LOG("Page found in memory before use");
+ }
+
+ addr[0] = 1;
+ mlock(addr, page_size);
+ retval = mincore(addr, page_size, vec);
+ ASSERT_EQ(0, retval);
+ ASSERT_EQ(1, vec[0]) {
+ TH_LOG("Page not found in memory after use");
+ }
+
+ munlock(addr, page_size);
+ munmap(addr, page_size);
+}
+
+
+/*
+ * Test mincore() behavior on a file-backed page.
+ * No pages should be loaded into memory right after the mapping. Then,
+ * accessing any address in the mapping range should load the page
+ * containing the address and a number of subsequent pages (readahead).
+ *
+ * The actual readahead settings depend on the test environment, so we
+ * can't make a lot of assumptions about that. This test covers the most
+ * general cases.
+ */
+TEST(check_file_mmap)
+{
+ unsigned char *vec;
+ int vec_size;
+ char *addr;
+ int retval;
+ int page_size;
+ int fd;
+ int i;
+ int ra_pages = 0;
+
+ page_size = sysconf(_SC_PAGESIZE);
+ vec_size = FILE_SIZE / page_size;
+ if (FILE_SIZE % page_size)
+ vec_size++;
+
+ vec = calloc(vec_size, sizeof(unsigned char));
+ ASSERT_NE(NULL, vec) {
+ TH_LOG("Can't allocate array");
+ }
+
+ errno = 0;
+ fd = open(".", O_TMPFILE | O_RDWR, 0600);
+ ASSERT_NE(-1, fd) {
+ TH_LOG("Can't create temporary file: %s",
+ strerror(errno));
+ }
+ errno = 0;
+ retval = fallocate(fd, 0, 0, FILE_SIZE);
+ ASSERT_EQ(0, retval) {
+ TH_LOG("Error allocating space for the temporary file: %s",
+ strerror(errno));
+ }
+
+ /*
+ * Map the whole file, the pages shouldn't be fetched yet.
+ */
+ errno = 0;
+ addr = mmap(NULL, FILE_SIZE, PROT_READ | PROT_WRITE,
+ MAP_SHARED, fd, 0);
+ ASSERT_NE(MAP_FAILED, addr) {
+ TH_LOG("mmap error: %s", strerror(errno));
+ }
+ retval = mincore(addr, FILE_SIZE, vec);
+ ASSERT_EQ(0, retval);
+ for (i = 0; i < vec_size; i++) {
+ ASSERT_EQ(0, vec[i]) {
+ TH_LOG("Unexpected page in memory");
+ }
+ }
+
+ /*
+ * Touch a page in the middle of the mapping. We expect the next
+ * few pages (the readahead window) to be populated too.
+ */
+ addr[FILE_SIZE / 2] = 1;
+ retval = mincore(addr, FILE_SIZE, vec);
+ ASSERT_EQ(0, retval);
+ ASSERT_EQ(1, vec[FILE_SIZE / 2 / page_size]) {
+ TH_LOG("Page not found in memory after use");
+ }
+
+ i = FILE_SIZE / 2 / page_size + 1;
+ while (i < vec_size && vec[i]) {
+ ra_pages++;
+ i++;
+ }
+ EXPECT_GT(ra_pages, 0) {
+ TH_LOG("No read-ahead pages found in memory");
+ }
+
+ EXPECT_LT(i, vec_size) {
+ TH_LOG("Read-ahead pages reached the end of the file");
+ }
+ /*
+ * End of the readahead window. The rest of the pages shouldn't
+ * be in memory.
+ */
+ if (i < vec_size) {
+ while (i < vec_size && !vec[i])
+ i++;
+ EXPECT_EQ(vec_size, i) {
+ TH_LOG("Unexpected page in memory beyond readahead window");
+ }
+ }
+
+ munmap(addr, FILE_SIZE);
+ close(fd);
+ free(vec);
+}
+
+
+/*
+ * Test mincore() behavior on a page backed by a tmpfs file. This test
+ * performs the same steps as the previous one. However, we don't expect
+ * any readahead in this case.
+ */
+TEST(check_tmpfs_mmap)
+{
+ unsigned char *vec;
+ int vec_size;
+ char *addr;
+ int retval;
+ int page_size;
+ int fd;
+ int i;
+ int ra_pages = 0;
+
+ page_size = sysconf(_SC_PAGESIZE);
+ vec_size = FILE_SIZE / page_size;
+ if (FILE_SIZE % page_size)
+ vec_size++;
+
+ vec = calloc(vec_size, sizeof(unsigned char));
+ ASSERT_NE(NULL, vec) {
+ TH_LOG("Can't allocate array");
+ }
+
+ errno = 0;
+ fd = open("/dev/shm", O_TMPFILE | O_RDWR, 0600);
+ ASSERT_NE(-1, fd) {
+ TH_LOG("Can't create temporary file: %s",
+ strerror(errno));
+ }
+ errno = 0;
+ retval = fallocate(fd, 0, 0, FILE_SIZE);
+ ASSERT_EQ(0, retval) {
+ TH_LOG("Error allocating space for the temporary file: %s",
+ strerror(errno));
+ }
+
+ /*
+ * Map the whole file, the pages shouldn't be fetched yet.
+ */
+ errno = 0;
+ addr = mmap(NULL, FILE_SIZE, PROT_READ | PROT_WRITE,
+ MAP_SHARED, fd, 0);
+ ASSERT_NE(MAP_FAILED, addr) {
+ TH_LOG("mmap error: %s", strerror(errno));
+ }
+ retval = mincore(addr, FILE_SIZE, vec);
+ ASSERT_EQ(0, retval);
+ for (i = 0; i < vec_size; i++) {
+ ASSERT_EQ(0, vec[i]) {
+ TH_LOG("Unexpected page in memory");
+ }
+ }
+
+ /*
+ * Touch a page in the middle of the mapping. We expect only
+ * that page to be fetched into memory.
+ */
+ addr[FILE_SIZE / 2] = 1;
+ retval = mincore(addr, FILE_SIZE, vec);
+ ASSERT_EQ(0, retval);
+ ASSERT_EQ(1, vec[FILE_SIZE / 2 / page_size]) {
+ TH_LOG("Page not found in memory after use");
+ }
+
+ i = FILE_SIZE / 2 / page_size + 1;
+ while (i < vec_size && vec[i]) {
+ ra_pages++;
+ i++;
+ }
+ ASSERT_EQ(ra_pages, 0) {
+ TH_LOG("Read-ahead pages found in memory");
+ }
+
+ munmap(addr, FILE_SIZE);
+ close(fd);
+ free(vec);
+}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/net/icmp_redirect.sh b/tools/testing/selftests/net/icmp_redirect.sh
index 18c5de53558a..bf361f30d6ef 100755
--- a/tools/testing/selftests/net/icmp_redirect.sh
+++ b/tools/testing/selftests/net/icmp_redirect.sh
@@ -180,6 +180,8 @@ setup()
;;
r[12]) ip netns exec $ns sysctl -q -w net.ipv4.ip_forward=1
ip netns exec $ns sysctl -q -w net.ipv4.conf.all.send_redirects=1
+ ip netns exec $ns sysctl -q -w net.ipv4.conf.default.rp_filter=0
+ ip netns exec $ns sysctl -q -w net.ipv4.conf.all.rp_filter=0
ip netns exec $ns sysctl -q -w net.ipv6.conf.all.forwarding=1
ip netns exec $ns sysctl -q -w net.ipv6.route.mtu_expires=10
diff --git a/tools/testing/selftests/net/mptcp/config b/tools/testing/selftests/net/mptcp/config
index 2499824d9e1c..8df5cb8f71ff 100644
--- a/tools/testing/selftests/net/mptcp/config
+++ b/tools/testing/selftests/net/mptcp/config
@@ -1,4 +1,6 @@
CONFIG_MPTCP=y
CONFIG_MPTCP_IPV6=y
+CONFIG_INET_DIAG=m
+CONFIG_INET_MPTCP_DIAG=m
CONFIG_VETH=y
CONFIG_NET_SCH_NETEM=m
diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect.c b/tools/testing/selftests/net/mptcp/mptcp_connect.c
index cad6f73a5fd0..090620c3e10c 100644
--- a/tools/testing/selftests/net/mptcp/mptcp_connect.c
+++ b/tools/testing/selftests/net/mptcp/mptcp_connect.c
@@ -406,10 +406,11 @@ static int copyfd_io_poll(int infd, int peerfd, int outfd)
/* ... but we still receive.
* Close our write side, ev. give some time
- * for address notification
+ * for address notification and/or checking
+ * the current status
*/
- if (cfg_join)
- usleep(400000);
+ if (cfg_wait)
+ usleep(cfg_wait);
shutdown(peerfd, SHUT_WR);
} else {
if (errno == EINTR)
@@ -427,7 +428,7 @@ static int copyfd_io_poll(int infd, int peerfd, int outfd)
}
/* leave some time for late join/announce */
- if (cfg_wait)
+ if (cfg_join)
usleep(cfg_wait);
close(peerfd);
diff --git a/tools/testing/selftests/net/rtnetlink.sh b/tools/testing/selftests/net/rtnetlink.sh
index 7c38a909f8b8..8a2fe6d64bf2 100755
--- a/tools/testing/selftests/net/rtnetlink.sh
+++ b/tools/testing/selftests/net/rtnetlink.sh
@@ -1175,6 +1175,51 @@ kci_test_neigh_get()
echo "PASS: neigh get"
}
+kci_test_bridge_parent_id()
+{
+ local ret=0
+ sysfsnet=/sys/bus/netdevsim/devices/netdevsim
+ probed=false
+
+ if [ ! -w /sys/bus/netdevsim/new_device ] ; then
+ modprobe -q netdevsim
+ check_err $?
+ if [ $ret -ne 0 ]; then
+ echo "SKIP: bridge_parent_id can't load netdevsim"
+ return $ksft_skip
+ fi
+ probed=true
+ fi
+
+ echo "10 1" > /sys/bus/netdevsim/new_device
+ while [ ! -d ${sysfsnet}10 ] ; do :; done
+ echo "20 1" > /sys/bus/netdevsim/new_device
+ while [ ! -d ${sysfsnet}20 ] ; do :; done
+ udevadm settle
+ dev10=`ls ${sysfsnet}10/net/`
+ dev20=`ls ${sysfsnet}20/net/`
+
+ ip link add name test-bond0 type bond mode 802.3ad
+ ip link set dev $dev10 master test-bond0
+ ip link set dev $dev20 master test-bond0
+ ip link add name test-br0 type bridge
+ ip link set dev test-bond0 master test-br0
+ check_err $?
+
+ # clean up any leftovers
+ ip link del dev test-br0
+ ip link del dev test-bond0
+ echo 20 > /sys/bus/netdevsim/del_device
+ echo 10 > /sys/bus/netdevsim/del_device
+ $probed && rmmod netdevsim
+
+ if [ $ret -ne 0 ]; then
+ echo "FAIL: bridge_parent_id"
+ return 1
+ fi
+ echo "PASS: bridge_parent_id"
+}
+
kci_test_rtnl()
{
local ret=0
@@ -1224,6 +1269,8 @@ kci_test_rtnl()
check_err $?
kci_test_neigh_get
check_err $?
+ kci_test_bridge_parent_id
+ check_err $?
kci_del_dummy
return $ret
diff --git a/tools/testing/selftests/netfilter/nft_flowtable.sh b/tools/testing/selftests/netfilter/nft_flowtable.sh
index d3e0809ab368..431296c0f91c 100755
--- a/tools/testing/selftests/netfilter/nft_flowtable.sh
+++ b/tools/testing/selftests/netfilter/nft_flowtable.sh
@@ -2,13 +2,18 @@
# SPDX-License-Identifier: GPL-2.0
#
# This tests basic flowtable functionality.
-# Creates following topology:
+# Creates following default topology:
#
# Originator (MTU 9000) <-Router1-> MTU 1500 <-Router2-> Responder (MTU 2000)
# Router1 is the one doing flow offloading, Router2 has no special
# purpose other than having a link that is smaller than either Originator
# and responder, i.e. TCPMSS announced values are too large and will still
# result in fragmentation and/or PMTU discovery.
+#
+# You can check with different Orgininator/Link/Responder MTU eg:
+# nft_flowtable.sh -o8000 -l1500 -r2000
+#
+
# Kselftest framework requirement - SKIP code is 4.
ksft_skip=4
@@ -21,29 +26,17 @@ ns2out=""
log_netns=$(sysctl -n net.netfilter.nf_log_all_netns)
-nft --version > /dev/null 2>&1
-if [ $? -ne 0 ];then
- echo "SKIP: Could not run test without nft tool"
- exit $ksft_skip
-fi
-
-ip -Version > /dev/null 2>&1
-if [ $? -ne 0 ];then
- echo "SKIP: Could not run test without ip tool"
- exit $ksft_skip
-fi
-
-which nc > /dev/null 2>&1
-if [ $? -ne 0 ];then
- echo "SKIP: Could not run test without nc (netcat)"
- exit $ksft_skip
-fi
+checktool (){
+ if ! $1 > /dev/null 2>&1; then
+ echo "SKIP: Could not $2"
+ exit $ksft_skip
+ fi
+}
-ip netns add nsr1
-if [ $? -ne 0 ];then
- echo "SKIP: Could not create net namespace"
- exit $ksft_skip
-fi
+checktool "nft --version" "run test without nft tool"
+checktool "ip -Version" "run test without ip tool"
+checktool "which nc" "run test without nc (netcat)"
+checktool "ip netns add nsr1" "create net namespace"
ip netns add ns1
ip netns add ns2
@@ -89,11 +82,41 @@ ip -net nsr2 addr add dead:2::1/64 dev veth1
# ns2 is going via nsr2 with a smaller mtu, so that TCPMSS announced by both peers
# is NOT the lowest link mtu.
-ip -net nsr1 link set veth0 mtu 9000
-ip -net ns1 link set eth0 mtu 9000
+omtu=9000
+lmtu=1500
+rmtu=2000
+
+usage(){
+ echo "nft_flowtable.sh [OPTIONS]"
+ echo
+ echo "MTU options"
+ echo " -o originator"
+ echo " -l link"
+ echo " -r responder"
+ exit 1
+}
+
+while getopts "o:l:r:" o
+do
+ case $o in
+ o) omtu=$OPTARG;;
+ l) lmtu=$OPTARG;;
+ r) rmtu=$OPTARG;;
+ *) usage;;
+ esac
+done
+
+if ! ip -net nsr1 link set veth0 mtu $omtu; then
+ exit 1
+fi
+
+ip -net ns1 link set eth0 mtu $omtu
+
+if ! ip -net nsr2 link set veth1 mtu $rmtu; then
+ exit 1
+fi
-ip -net nsr2 link set veth1 mtu 2000
-ip -net ns2 link set eth0 mtu 2000
+ip -net ns2 link set eth0 mtu $rmtu
# transfer-net between nsr1 and nsr2.
# these addresses are not used for connections.
@@ -113,7 +136,10 @@ for i in 1 2; do
ip -net ns$i route add default via 10.0.$i.1
ip -net ns$i addr add dead:$i::99/64 dev eth0
ip -net ns$i route add default via dead:$i::1
- ip netns exec ns$i sysctl net.ipv4.tcp_no_metrics_save=1 > /dev/null
+ if ! ip netns exec ns$i sysctl net.ipv4.tcp_no_metrics_save=1 > /dev/null; then
+ echo "ERROR: Check Originator/Responder values (problem during address addition)"
+ exit 1
+ fi
# don't set ip DF bit for first two tests
ip netns exec ns$i sysctl net.ipv4.ip_no_pmtu_disc=1 > /dev/null
@@ -147,7 +173,7 @@ table inet filter {
# as PMTUd is off.
# This rule is deleted for the last test, when we expect PMTUd
# to kick in and ensure all packets meet mtu requirements.
- meta length gt 1500 accept comment something-to-grep-for
+ meta length gt $lmtu accept comment something-to-grep-for
# next line blocks connection w.o. working offload.
# we only do this for reverse dir, because we expect packets to
@@ -171,15 +197,13 @@ if [ $? -ne 0 ]; then
fi
# test basic connectivity
-ip netns exec ns1 ping -c 1 -q 10.0.2.99 > /dev/null
-if [ $? -ne 0 ];then
+if ! ip netns exec ns1 ping -c 1 -q 10.0.2.99 > /dev/null; then
echo "ERROR: ns1 cannot reach ns2" 1>&2
bash
exit 1
fi
-ip netns exec ns2 ping -c 1 -q 10.0.1.99 > /dev/null
-if [ $? -ne 0 ];then
+if ! ip netns exec ns2 ping -c 1 -q 10.0.1.99 > /dev/null; then
echo "ERROR: ns2 cannot reach ns1" 1>&2
exit 1
fi
@@ -196,7 +220,6 @@ ns2out=$(mktemp)
make_file()
{
name=$1
- who=$2
SIZE=$((RANDOM % (1024 * 8)))
TSIZE=$((SIZE * 1024))
@@ -215,8 +238,7 @@ check_transfer()
out=$2
what=$3
- cmp "$in" "$out" > /dev/null 2>&1
- if [ $? -ne 0 ] ;then
+ if ! cmp "$in" "$out" > /dev/null 2>&1; then
echo "FAIL: file mismatch for $what" 1>&2
ls -l "$in"
ls -l "$out"
@@ -243,17 +265,21 @@ test_tcp_forwarding_ip()
sleep 3
- kill $lpid
- kill $cpid
+ if ps -p $lpid > /dev/null;then
+ kill $lpid
+ fi
+
+ if ps -p $cpid > /dev/null;then
+ kill $cpid
+ fi
+
wait
- check_transfer "$ns1in" "$ns2out" "ns1 -> ns2"
- if [ $? -ne 0 ];then
+ if ! check_transfer "$ns1in" "$ns2out" "ns1 -> ns2"; then
lret=1
fi
- check_transfer "$ns2in" "$ns1out" "ns1 <- ns2"
- if [ $? -ne 0 ];then
+ if ! check_transfer "$ns2in" "$ns1out" "ns1 <- ns2"; then
lret=1
fi
@@ -282,13 +308,12 @@ test_tcp_forwarding_nat()
return $lret
}
-make_file "$ns1in" "ns1"
-make_file "$ns2in" "ns2"
+make_file "$ns1in"
+make_file "$ns2in"
# First test:
# No PMTU discovery, nsr1 is expected to fragment packets from ns1 to ns2 as needed.
-test_tcp_forwarding ns1 ns2
-if [ $? -eq 0 ] ;then
+if test_tcp_forwarding ns1 ns2; then
echo "PASS: flow offloaded for ns1/ns2"
else
echo "FAIL: flow offload for ns1/ns2:" 1>&2
@@ -319,9 +344,7 @@ table ip nat {
}
EOF
-test_tcp_forwarding_nat ns1 ns2
-
-if [ $? -eq 0 ] ;then
+if test_tcp_forwarding_nat ns1 ns2; then
echo "PASS: flow offloaded for ns1/ns2 with NAT"
else
echo "FAIL: flow offload for ns1/ns2 with NAT" 1>&2
@@ -333,8 +356,7 @@ fi
# Same as second test, but with PMTU discovery enabled.
handle=$(ip netns exec nsr1 nft -a list table inet filter | grep something-to-grep-for | cut -d \# -f 2)
-ip netns exec nsr1 nft delete rule inet filter forward $handle
-if [ $? -ne 0 ] ;then
+if ! ip netns exec nsr1 nft delete rule inet filter forward $handle; then
echo "FAIL: Could not delete large-packet accept rule"
exit 1
fi
@@ -342,8 +364,7 @@ fi
ip netns exec ns1 sysctl net.ipv4.ip_no_pmtu_disc=0 > /dev/null
ip netns exec ns2 sysctl net.ipv4.ip_no_pmtu_disc=0 > /dev/null
-test_tcp_forwarding_nat ns1 ns2
-if [ $? -eq 0 ] ;then
+if test_tcp_forwarding_nat ns1 ns2; then
echo "PASS: flow offloaded for ns1/ns2 with NAT and pmtu discovery"
else
echo "FAIL: flow offload for ns1/ns2 with NAT and pmtu discovery" 1>&2
@@ -389,8 +410,7 @@ ip -net ns2 route del 192.168.10.1 via 10.0.2.1
ip -net ns2 route add default via 10.0.2.1
ip -net ns2 route add default via dead:2::1
-test_tcp_forwarding ns1 ns2
-if [ $? -eq 0 ] ;then
+if test_tcp_forwarding ns1 ns2; then
echo "PASS: ipsec tunnel mode for ns1/ns2"
else
echo "FAIL: ipsec tunnel mode for ns1/ns2"
diff --git a/tools/testing/selftests/pidfd/pidfd.h b/tools/testing/selftests/pidfd/pidfd.h
index a2c80914e3dc..01f8d3c0cf2c 100644
--- a/tools/testing/selftests/pidfd/pidfd.h
+++ b/tools/testing/selftests/pidfd/pidfd.h
@@ -46,6 +46,10 @@
#define __NR_pidfd_getfd -1
#endif
+#ifndef PIDFD_NONBLOCK
+#define PIDFD_NONBLOCK O_NONBLOCK
+#endif
+
/*
* The kernel reserves 300 pids via RESERVED_PIDS in kernel/pid.c
* That means, when it wraps around any pid < 300 will be skipped.
diff --git a/tools/testing/selftests/pidfd/pidfd_setns_test.c b/tools/testing/selftests/pidfd/pidfd_setns_test.c
index 7dca1aa4672d..1f085b922c6e 100644
--- a/tools/testing/selftests/pidfd/pidfd_setns_test.c
+++ b/tools/testing/selftests/pidfd/pidfd_setns_test.c
@@ -75,7 +75,7 @@ static int sys_waitid(int which, pid_t pid, int options)
pid_t create_child(int *pidfd, unsigned flags)
{
- struct clone_args args = {
+ struct __clone_args args = {
.flags = CLONE_PIDFD | flags,
.exit_signal = SIGCHLD,
.pidfd = ptr_to_u64(pidfd),
diff --git a/tools/testing/selftests/pidfd/pidfd_wait.c b/tools/testing/selftests/pidfd/pidfd_wait.c
index 7079f8eef792..be2943f072f6 100644
--- a/tools/testing/selftests/pidfd/pidfd_wait.c
+++ b/tools/testing/selftests/pidfd/pidfd_wait.c
@@ -17,10 +17,15 @@
#include <unistd.h>
#include "pidfd.h"
-#include "../kselftest.h"
+#include "../kselftest_harness.h"
#define ptr_to_u64(ptr) ((__u64)((uintptr_t)(ptr)))
+/* Attempt to de-conflict with the selftests tree. */
+#ifndef SKIP
+#define SKIP(s, ...) XFAIL(s, ##__VA_ARGS__)
+#endif
+
static pid_t sys_clone3(struct clone_args *args)
{
return syscall(__NR_clone3, args, sizeof(struct clone_args));
@@ -32,9 +37,8 @@ static int sys_waitid(int which, pid_t pid, siginfo_t *info, int options,
return syscall(__NR_waitid, which, pid, info, options, ru);
}
-static int test_pidfd_wait_simple(void)
+TEST(wait_simple)
{
- const char *test_name = "pidfd wait simple";
int pidfd = -1, status = 0;
pid_t parent_tid = -1;
struct clone_args args = {
@@ -50,76 +54,40 @@ static int test_pidfd_wait_simple(void)
};
pidfd = open("/proc/self", O_DIRECTORY | O_RDONLY | O_CLOEXEC);
- if (pidfd < 0)
- ksft_exit_fail_msg("%s test: failed to open /proc/self %s\n",
- test_name, strerror(errno));
+ ASSERT_GE(pidfd, 0);
pid = sys_waitid(P_PIDFD, pidfd, &info, WEXITED, NULL);
- if (pid == 0)
- ksft_exit_fail_msg(
- "%s test: succeeded to wait on invalid pidfd %s\n",
- test_name, strerror(errno));
- close(pidfd);
+ ASSERT_NE(pid, 0);
+ EXPECT_EQ(close(pidfd), 0);
pidfd = -1;
pidfd = open("/dev/null", O_RDONLY | O_CLOEXEC);
- if (pidfd == 0)
- ksft_exit_fail_msg("%s test: failed to open /dev/null %s\n",
- test_name, strerror(errno));
+ ASSERT_GE(pidfd, 0);
pid = sys_waitid(P_PIDFD, pidfd, &info, WEXITED, NULL);
- if (pid == 0)
- ksft_exit_fail_msg(
- "%s test: succeeded to wait on invalid pidfd %s\n",
- test_name, strerror(errno));
- close(pidfd);
+ ASSERT_NE(pid, 0);
+ EXPECT_EQ(close(pidfd), 0);
pidfd = -1;
pid = sys_clone3(&args);
- if (pid < 0)
- ksft_exit_fail_msg("%s test: failed to create new process %s\n",
- test_name, strerror(errno));
+ ASSERT_GE(pid, 0);
if (pid == 0)
exit(EXIT_SUCCESS);
pid = sys_waitid(P_PIDFD, pidfd, &info, WEXITED, NULL);
- if (pid < 0)
- ksft_exit_fail_msg(
- "%s test: failed to wait on process with pid %d and pidfd %d: %s\n",
- test_name, parent_tid, pidfd, strerror(errno));
-
- if (!WIFEXITED(info.si_status) || WEXITSTATUS(info.si_status))
- ksft_exit_fail_msg(
- "%s test: unexpected status received after waiting on process with pid %d and pidfd %d: %s\n",
- test_name, parent_tid, pidfd, strerror(errno));
- close(pidfd);
-
- if (info.si_signo != SIGCHLD)
- ksft_exit_fail_msg(
- "%s test: unexpected si_signo value %d received after waiting on process with pid %d and pidfd %d: %s\n",
- test_name, info.si_signo, parent_tid, pidfd,
- strerror(errno));
-
- if (info.si_code != CLD_EXITED)
- ksft_exit_fail_msg(
- "%s test: unexpected si_code value %d received after waiting on process with pid %d and pidfd %d: %s\n",
- test_name, info.si_code, parent_tid, pidfd,
- strerror(errno));
-
- if (info.si_pid != parent_tid)
- ksft_exit_fail_msg(
- "%s test: unexpected si_pid value %d received after waiting on process with pid %d and pidfd %d: %s\n",
- test_name, info.si_pid, parent_tid, pidfd,
- strerror(errno));
-
- ksft_test_result_pass("%s test: Passed\n", test_name);
- return 0;
+ ASSERT_GE(pid, 0);
+ ASSERT_EQ(WIFEXITED(info.si_status), true);
+ ASSERT_EQ(WEXITSTATUS(info.si_status), 0);
+ EXPECT_EQ(close(pidfd), 0);
+
+ ASSERT_EQ(info.si_signo, SIGCHLD);
+ ASSERT_EQ(info.si_code, CLD_EXITED);
+ ASSERT_EQ(info.si_pid, parent_tid);
}
-static int test_pidfd_wait_states(void)
+TEST(wait_states)
{
- const char *test_name = "pidfd wait states";
int pidfd = -1, status = 0;
pid_t parent_tid = -1;
struct clone_args args = {
@@ -135,9 +103,7 @@ static int test_pidfd_wait_states(void)
};
pid = sys_clone3(&args);
- if (pid < 0)
- ksft_exit_fail_msg("%s test: failed to create new process %s\n",
- test_name, strerror(errno));
+ ASSERT_GE(pid, 0);
if (pid == 0) {
kill(getpid(), SIGSTOP);
@@ -145,127 +111,115 @@ static int test_pidfd_wait_states(void)
exit(EXIT_SUCCESS);
}
- ret = sys_waitid(P_PIDFD, pidfd, &info, WSTOPPED, NULL);
- if (ret < 0)
- ksft_exit_fail_msg(
- "%s test: failed to wait on WSTOPPED process with pid %d and pidfd %d: %s\n",
- test_name, parent_tid, pidfd, strerror(errno));
-
- if (info.si_signo != SIGCHLD)
- ksft_exit_fail_msg(
- "%s test: unexpected si_signo value %d received after waiting on process with pid %d and pidfd %d: %s\n",
- test_name, info.si_signo, parent_tid, pidfd,
- strerror(errno));
-
- if (info.si_code != CLD_STOPPED)
- ksft_exit_fail_msg(
- "%s test: unexpected si_code value %d received after waiting on process with pid %d and pidfd %d: %s\n",
- test_name, info.si_code, parent_tid, pidfd,
- strerror(errno));
-
- if (info.si_pid != parent_tid)
- ksft_exit_fail_msg(
- "%s test: unexpected si_pid value %d received after waiting on process with pid %d and pidfd %d: %s\n",
- test_name, info.si_pid, parent_tid, pidfd,
- strerror(errno));
-
- ret = sys_pidfd_send_signal(pidfd, SIGCONT, NULL, 0);
- if (ret < 0)
- ksft_exit_fail_msg(
- "%s test: failed to send signal to process with pid %d and pidfd %d: %s\n",
- test_name, parent_tid, pidfd, strerror(errno));
-
- ret = sys_waitid(P_PIDFD, pidfd, &info, WCONTINUED, NULL);
- if (ret < 0)
- ksft_exit_fail_msg(
- "%s test: failed to wait WCONTINUED on process with pid %d and pidfd %d: %s\n",
- test_name, parent_tid, pidfd, strerror(errno));
-
- if (info.si_signo != SIGCHLD)
- ksft_exit_fail_msg(
- "%s test: unexpected si_signo value %d received after waiting on process with pid %d and pidfd %d: %s\n",
- test_name, info.si_signo, parent_tid, pidfd,
- strerror(errno));
-
- if (info.si_code != CLD_CONTINUED)
- ksft_exit_fail_msg(
- "%s test: unexpected si_code value %d received after waiting on process with pid %d and pidfd %d: %s\n",
- test_name, info.si_code, parent_tid, pidfd,
- strerror(errno));
-
- if (info.si_pid != parent_tid)
- ksft_exit_fail_msg(
- "%s test: unexpected si_pid value %d received after waiting on process with pid %d and pidfd %d: %s\n",
- test_name, info.si_pid, parent_tid, pidfd,
- strerror(errno));
-
- ret = sys_waitid(P_PIDFD, pidfd, &info, WUNTRACED, NULL);
- if (ret < 0)
- ksft_exit_fail_msg(
- "%s test: failed to wait on WUNTRACED process with pid %d and pidfd %d: %s\n",
- test_name, parent_tid, pidfd, strerror(errno));
-
- if (info.si_signo != SIGCHLD)
- ksft_exit_fail_msg(
- "%s test: unexpected si_signo value %d received after waiting on process with pid %d and pidfd %d: %s\n",
- test_name, info.si_signo, parent_tid, pidfd,
- strerror(errno));
-
- if (info.si_code != CLD_STOPPED)
- ksft_exit_fail_msg(
- "%s test: unexpected si_code value %d received after waiting on process with pid %d and pidfd %d: %s\n",
- test_name, info.si_code, parent_tid, pidfd,
- strerror(errno));
-
- if (info.si_pid != parent_tid)
- ksft_exit_fail_msg(
- "%s test: unexpected si_pid value %d received after waiting on process with pid %d and pidfd %d: %s\n",
- test_name, info.si_pid, parent_tid, pidfd,
- strerror(errno));
-
- ret = sys_pidfd_send_signal(pidfd, SIGKILL, NULL, 0);
- if (ret < 0)
- ksft_exit_fail_msg(
- "%s test: failed to send SIGKILL to process with pid %d and pidfd %d: %s\n",
- test_name, parent_tid, pidfd, strerror(errno));
+ ASSERT_EQ(sys_waitid(P_PIDFD, pidfd, &info, WSTOPPED, NULL), 0);
+ ASSERT_EQ(info.si_signo, SIGCHLD);
+ ASSERT_EQ(info.si_code, CLD_STOPPED);
+ ASSERT_EQ(info.si_pid, parent_tid);
- ret = sys_waitid(P_PIDFD, pidfd, &info, WEXITED, NULL);
- if (ret < 0)
- ksft_exit_fail_msg(
- "%s test: failed to wait on WEXITED process with pid %d and pidfd %d: %s\n",
- test_name, parent_tid, pidfd, strerror(errno));
-
- if (info.si_signo != SIGCHLD)
- ksft_exit_fail_msg(
- "%s test: unexpected si_signo value %d received after waiting on process with pid %d and pidfd %d: %s\n",
- test_name, info.si_signo, parent_tid, pidfd,
- strerror(errno));
-
- if (info.si_code != CLD_KILLED)
- ksft_exit_fail_msg(
- "%s test: unexpected si_code value %d received after waiting on process with pid %d and pidfd %d: %s\n",
- test_name, info.si_code, parent_tid, pidfd,
- strerror(errno));
-
- if (info.si_pid != parent_tid)
- ksft_exit_fail_msg(
- "%s test: unexpected si_pid value %d received after waiting on process with pid %d and pidfd %d: %s\n",
- test_name, info.si_pid, parent_tid, pidfd,
- strerror(errno));
-
- close(pidfd);
-
- ksft_test_result_pass("%s test: Passed\n", test_name);
- return 0;
+ ASSERT_EQ(sys_pidfd_send_signal(pidfd, SIGCONT, NULL, 0), 0);
+
+ ASSERT_EQ(sys_waitid(P_PIDFD, pidfd, &info, WCONTINUED, NULL), 0);
+ ASSERT_EQ(info.si_signo, SIGCHLD);
+ ASSERT_EQ(info.si_code, CLD_CONTINUED);
+ ASSERT_EQ(info.si_pid, parent_tid);
+
+ ASSERT_EQ(sys_waitid(P_PIDFD, pidfd, &info, WUNTRACED, NULL), 0);
+ ASSERT_EQ(info.si_signo, SIGCHLD);
+ ASSERT_EQ(info.si_code, CLD_STOPPED);
+ ASSERT_EQ(info.si_pid, parent_tid);
+
+ ASSERT_EQ(sys_pidfd_send_signal(pidfd, SIGKILL, NULL, 0), 0);
+
+ ASSERT_EQ(sys_waitid(P_PIDFD, pidfd, &info, WEXITED, NULL), 0);
+ ASSERT_EQ(info.si_signo, SIGCHLD);
+ ASSERT_EQ(info.si_code, CLD_KILLED);
+ ASSERT_EQ(info.si_pid, parent_tid);
+
+ EXPECT_EQ(close(pidfd), 0);
}
-int main(int argc, char **argv)
+TEST(wait_nonblock)
{
- ksft_print_header();
- ksft_set_plan(2);
+ int pidfd, status = 0;
+ unsigned int flags = 0;
+ pid_t parent_tid = -1;
+ struct clone_args args = {
+ .parent_tid = ptr_to_u64(&parent_tid),
+ .flags = CLONE_PARENT_SETTID,
+ .exit_signal = SIGCHLD,
+ };
+ int ret;
+ pid_t pid;
+ siginfo_t info = {
+ .si_signo = 0,
+ };
+
+ /*
+ * Callers need to see ECHILD with non-blocking pidfds when no child
+ * processes exists.
+ */
+ pidfd = sys_pidfd_open(getpid(), PIDFD_NONBLOCK);
+ EXPECT_GE(pidfd, 0) {
+ /* pidfd_open() doesn't support PIDFD_NONBLOCK. */
+ ASSERT_EQ(errno, EINVAL);
+ SKIP(return, "Skipping PIDFD_NONBLOCK test");
+ }
+
+ ret = sys_waitid(P_PIDFD, pidfd, &info, WEXITED, NULL);
+ ASSERT_LT(ret, 0);
+ ASSERT_EQ(errno, ECHILD);
+ EXPECT_EQ(close(pidfd), 0);
+
+ pid = sys_clone3(&args);
+ ASSERT_GE(pid, 0);
+
+ if (pid == 0) {
+ kill(getpid(), SIGSTOP);
+ exit(EXIT_SUCCESS);
+ }
- test_pidfd_wait_simple();
- test_pidfd_wait_states();
+ pidfd = sys_pidfd_open(pid, PIDFD_NONBLOCK);
+ EXPECT_GE(pidfd, 0) {
+ /* pidfd_open() doesn't support PIDFD_NONBLOCK. */
+ ASSERT_EQ(errno, EINVAL);
+ SKIP(return, "Skipping PIDFD_NONBLOCK test");
+ }
+
+ flags = fcntl(pidfd, F_GETFL, 0);
+ ASSERT_GT(flags, 0);
+ ASSERT_GT((flags & O_NONBLOCK), 0);
+
+ /*
+ * Callers need to see EAGAIN/EWOULDBLOCK with non-blocking pidfd when
+ * child processes exist but none have exited.
+ */
+ ret = sys_waitid(P_PIDFD, pidfd, &info, WEXITED, NULL);
+ ASSERT_LT(ret, 0);
+ ASSERT_EQ(errno, EAGAIN);
+
+ /*
+ * Callers need to continue seeing 0 with non-blocking pidfd and
+ * WNOHANG raised explicitly when child processes exist but none have
+ * exited.
+ */
+ ret = sys_waitid(P_PIDFD, pidfd, &info, WEXITED | WNOHANG, NULL);
+ ASSERT_EQ(ret, 0);
- return ksft_exit_pass();
+ ASSERT_EQ(fcntl(pidfd, F_SETFL, (flags & ~O_NONBLOCK)), 0);
+
+ ASSERT_EQ(sys_waitid(P_PIDFD, pidfd, &info, WSTOPPED, NULL), 0);
+ ASSERT_EQ(info.si_signo, SIGCHLD);
+ ASSERT_EQ(info.si_code, CLD_STOPPED);
+ ASSERT_EQ(info.si_pid, parent_tid);
+
+ ASSERT_EQ(sys_pidfd_send_signal(pidfd, SIGCONT, NULL, 0), 0);
+
+ ASSERT_EQ(sys_waitid(P_PIDFD, pidfd, &info, WEXITED, NULL), 0);
+ ASSERT_EQ(info.si_signo, SIGCHLD);
+ ASSERT_EQ(info.si_code, CLD_EXITED);
+ ASSERT_EQ(info.si_pid, parent_tid);
+
+ EXPECT_EQ(close(pidfd), 0);
}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/powerpc/alignment/alignment_handler.c b/tools/testing/selftests/powerpc/alignment/alignment_handler.c
index 0453c50c949c..55ef15184057 100644
--- a/tools/testing/selftests/powerpc/alignment/alignment_handler.c
+++ b/tools/testing/selftests/powerpc/alignment/alignment_handler.c
@@ -9,7 +9,17 @@
* This selftest exercises the powerpc alignment fault handler.
*
* We create two sets of source and destination buffers, one in regular memory,
- * the other cache-inhibited (we use /dev/fb0 for this).
+ * the other cache-inhibited (by default we use /dev/fb0 for this, but an
+ * alterative path for cache-inhibited memory may be provided).
+ *
+ * One way to get cache-inhibited memory is to use the "mem" kernel parameter
+ * to limit the kernel to less memory than actually exists. Addresses above
+ * the limit may still be accessed but will be treated as cache-inhibited. For
+ * example, if there is actually 4GB of memory and the parameter "mem=3GB" is
+ * used, memory from address 0xC0000000 onwards is treated as cache-inhibited.
+ * To access this region /dev/mem is used. The kernel should be configured
+ * without CONFIG_STRICT_DEVMEM. In this case use:
+ * ./alignment_handler /dev/mem 0xc0000000
*
* We initialise the source buffers, then use whichever set of load/store
* instructions is under test to copy bytes from the source buffers to the
@@ -48,11 +58,14 @@
#include <asm/cputable.h>
#include "utils.h"
+#include "instructions.h"
int bufsize;
int debug;
int testing;
volatile int gotsig;
+char *cipath = "/dev/fb0";
+long cioffset;
void sighandler(int sig, siginfo_t *info, void *ctx)
{
@@ -84,6 +97,17 @@ void sighandler(int sig, siginfo_t *info, void *ctx)
} \
rc |= do_test(#name, test_##name)
+#define TESTP(name, ld_op, st_op, ld_reg, st_reg) \
+ void test_##name(char *s, char *d) \
+ { \
+ asm volatile( \
+ ld_op(ld_reg, %0, 0, 0) \
+ st_op(st_reg, %1, 0, 0) \
+ :: "r"(s), "r"(d), "r"(0) \
+ : "memory", "vs0", "vs32", "r31"); \
+ } \
+ rc |= do_test(#name, test_##name)
+
#define LOAD_VSX_XFORM_TEST(op) TEST(op, op, stxvd2x, XFORM, 32, 32)
#define STORE_VSX_XFORM_TEST(op) TEST(op, lxvd2x, op, XFORM, 32, 32)
#define LOAD_VSX_DFORM_TEST(op) TEST(op, op, stxv, DFORM, 32, 32)
@@ -103,6 +127,17 @@ void sighandler(int sig, siginfo_t *info, void *ctx)
#define LOAD_FLOAT_XFORM_TEST(op) TEST(op, op, stfdx, XFORM, 0, 0)
#define STORE_FLOAT_XFORM_TEST(op) TEST(op, lfdx, op, XFORM, 0, 0)
+#define LOAD_MLS_PREFIX_TEST(op) TESTP(op, op, PSTD, 31, 31)
+#define STORE_MLS_PREFIX_TEST(op) TESTP(op, PLD, op, 31, 31)
+
+#define LOAD_8LS_PREFIX_TEST(op) TESTP(op, op, PSTD, 31, 31)
+#define STORE_8LS_PREFIX_TEST(op) TESTP(op, PLD, op, 31, 31)
+
+#define LOAD_FLOAT_MLS_PREFIX_TEST(op) TESTP(op, op, PSTFD, 0, 0)
+#define STORE_FLOAT_MLS_PREFIX_TEST(op) TESTP(op, PLFD, op, 0, 0)
+
+#define LOAD_VSX_8LS_PREFIX_TEST(op, tail) TESTP(op, op, PSTXV ## tail, 0, 32)
+#define STORE_VSX_8LS_PREFIX_TEST(op, tail) TESTP(op, PLXV ## tail, op, 32, 0)
/* FIXME: Unimplemented tests: */
// STORE_DFORM_TEST(stq) /* FIXME: need two registers for quad */
@@ -195,17 +230,18 @@ int do_test(char *test_name, void (*test_func)(char *, char *))
printf("\tDoing %s:\t", test_name);
- fd = open("/dev/fb0", O_RDWR);
+ fd = open(cipath, O_RDWR);
if (fd < 0) {
printf("\n");
- perror("Can't open /dev/fb0 now?");
+ perror("Can't open ci file now?");
return 1;
}
- ci0 = mmap(NULL, bufsize, PROT_WRITE, MAP_SHARED,
- fd, 0x0);
- ci1 = mmap(NULL, bufsize, PROT_WRITE, MAP_SHARED,
- fd, bufsize);
+ ci0 = mmap(NULL, bufsize, PROT_WRITE | PROT_READ, MAP_SHARED,
+ fd, cioffset);
+ ci1 = mmap(NULL, bufsize, PROT_WRITE | PROT_READ, MAP_SHARED,
+ fd, cioffset + bufsize);
+
if ((ci0 == MAP_FAILED) || (ci1 == MAP_FAILED)) {
printf("\n");
perror("mmap failed");
@@ -270,11 +306,11 @@ int do_test(char *test_name, void (*test_func)(char *, char *))
return rc;
}
-static bool can_open_fb0(void)
+static bool can_open_cifile(void)
{
int fd;
- fd = open("/dev/fb0", O_RDWR);
+ fd = open(cipath, O_RDWR);
if (fd < 0)
return false;
@@ -286,7 +322,7 @@ int test_alignment_handler_vsx_206(void)
{
int rc = 0;
- SKIP_IF(!can_open_fb0());
+ SKIP_IF(!can_open_cifile());
SKIP_IF(!have_hwcap(PPC_FEATURE_ARCH_2_06));
printf("VSX: 2.06B\n");
@@ -304,7 +340,7 @@ int test_alignment_handler_vsx_207(void)
{
int rc = 0;
- SKIP_IF(!can_open_fb0());
+ SKIP_IF(!can_open_cifile());
SKIP_IF(!have_hwcap2(PPC_FEATURE2_ARCH_2_07));
printf("VSX: 2.07B\n");
@@ -320,7 +356,7 @@ int test_alignment_handler_vsx_300(void)
{
int rc = 0;
- SKIP_IF(!can_open_fb0());
+ SKIP_IF(!can_open_cifile());
SKIP_IF(!have_hwcap2(PPC_FEATURE2_ARCH_3_00));
printf("VSX: 3.00B\n");
@@ -348,11 +384,30 @@ int test_alignment_handler_vsx_300(void)
return rc;
}
+int test_alignment_handler_vsx_prefix(void)
+{
+ int rc = 0;
+
+ SKIP_IF(!can_open_cifile());
+ SKIP_IF(!have_hwcap2(PPC_FEATURE2_ARCH_3_1));
+
+ printf("VSX: PREFIX\n");
+ LOAD_VSX_8LS_PREFIX_TEST(PLXSD, 0);
+ LOAD_VSX_8LS_PREFIX_TEST(PLXSSP, 0);
+ LOAD_VSX_8LS_PREFIX_TEST(PLXV0, 0);
+ LOAD_VSX_8LS_PREFIX_TEST(PLXV1, 1);
+ STORE_VSX_8LS_PREFIX_TEST(PSTXSD, 0);
+ STORE_VSX_8LS_PREFIX_TEST(PSTXSSP, 0);
+ STORE_VSX_8LS_PREFIX_TEST(PSTXV0, 0);
+ STORE_VSX_8LS_PREFIX_TEST(PSTXV1, 1);
+ return rc;
+}
+
int test_alignment_handler_integer(void)
{
int rc = 0;
- SKIP_IF(!can_open_fb0());
+ SKIP_IF(!can_open_cifile());
printf("Integer\n");
LOAD_DFORM_TEST(lbz);
@@ -408,7 +463,7 @@ int test_alignment_handler_integer_206(void)
{
int rc = 0;
- SKIP_IF(!can_open_fb0());
+ SKIP_IF(!can_open_cifile());
SKIP_IF(!have_hwcap(PPC_FEATURE_ARCH_2_06));
printf("Integer: 2.06\n");
@@ -419,11 +474,32 @@ int test_alignment_handler_integer_206(void)
return rc;
}
+int test_alignment_handler_integer_prefix(void)
+{
+ int rc = 0;
+
+ SKIP_IF(!can_open_cifile());
+ SKIP_IF(!have_hwcap2(PPC_FEATURE2_ARCH_3_1));
+
+ printf("Integer: PREFIX\n");
+ LOAD_MLS_PREFIX_TEST(PLBZ);
+ LOAD_MLS_PREFIX_TEST(PLHZ);
+ LOAD_MLS_PREFIX_TEST(PLHA);
+ LOAD_MLS_PREFIX_TEST(PLWZ);
+ LOAD_8LS_PREFIX_TEST(PLWA);
+ LOAD_8LS_PREFIX_TEST(PLD);
+ STORE_MLS_PREFIX_TEST(PSTB);
+ STORE_MLS_PREFIX_TEST(PSTH);
+ STORE_MLS_PREFIX_TEST(PSTW);
+ STORE_8LS_PREFIX_TEST(PSTD);
+ return rc;
+}
+
int test_alignment_handler_vmx(void)
{
int rc = 0;
- SKIP_IF(!can_open_fb0());
+ SKIP_IF(!can_open_cifile());
SKIP_IF(!have_hwcap(PPC_FEATURE_HAS_ALTIVEC));
printf("VMX\n");
@@ -451,7 +527,7 @@ int test_alignment_handler_fp(void)
{
int rc = 0;
- SKIP_IF(!can_open_fb0());
+ SKIP_IF(!can_open_cifile());
printf("Floating point\n");
LOAD_FLOAT_DFORM_TEST(lfd);
@@ -479,7 +555,7 @@ int test_alignment_handler_fp_205(void)
{
int rc = 0;
- SKIP_IF(!can_open_fb0());
+ SKIP_IF(!can_open_cifile());
SKIP_IF(!have_hwcap(PPC_FEATURE_ARCH_2_05));
printf("Floating point: 2.05\n");
@@ -497,7 +573,7 @@ int test_alignment_handler_fp_206(void)
{
int rc = 0;
- SKIP_IF(!can_open_fb0());
+ SKIP_IF(!can_open_cifile());
SKIP_IF(!have_hwcap(PPC_FEATURE_ARCH_2_06));
printf("Floating point: 2.06\n");
@@ -507,13 +583,32 @@ int test_alignment_handler_fp_206(void)
return rc;
}
+
+int test_alignment_handler_fp_prefix(void)
+{
+ int rc = 0;
+
+ SKIP_IF(!can_open_cifile());
+ SKIP_IF(!have_hwcap2(PPC_FEATURE2_ARCH_3_1));
+
+ printf("Floating point: PREFIX\n");
+ LOAD_FLOAT_DFORM_TEST(lfs);
+ LOAD_FLOAT_MLS_PREFIX_TEST(PLFS);
+ LOAD_FLOAT_MLS_PREFIX_TEST(PLFD);
+ STORE_FLOAT_MLS_PREFIX_TEST(PSTFS);
+ STORE_FLOAT_MLS_PREFIX_TEST(PSTFD);
+ return rc;
+}
+
void usage(char *prog)
{
- printf("Usage: %s [options]\n", prog);
+ printf("Usage: %s [options] [path [offset]]\n", prog);
printf(" -d Enable debug error output\n");
printf("\n");
- printf("This test requires a POWER8 or POWER9 CPU and a usable ");
- printf("framebuffer at /dev/fb0.\n");
+ printf("This test requires a POWER8, POWER9 or POWER10 CPU ");
+ printf("and either a usable framebuffer at /dev/fb0 or ");
+ printf("the path to usable cache inhibited memory and optional ");
+ printf("offset to be provided\n");
}
int main(int argc, char *argv[])
@@ -533,6 +628,13 @@ int main(int argc, char *argv[])
exit(1);
}
}
+ argc -= optind;
+ argv += optind;
+
+ if (argc > 0)
+ cipath = argv[0];
+ if (argc > 1)
+ cioffset = strtol(argv[1], 0, 0x10);
bufsize = getpagesize();
@@ -552,10 +654,14 @@ int main(int argc, char *argv[])
"test_alignment_handler_vsx_207");
rc |= test_harness(test_alignment_handler_vsx_300,
"test_alignment_handler_vsx_300");
+ rc |= test_harness(test_alignment_handler_vsx_prefix,
+ "test_alignment_handler_vsx_prefix");
rc |= test_harness(test_alignment_handler_integer,
"test_alignment_handler_integer");
rc |= test_harness(test_alignment_handler_integer_206,
"test_alignment_handler_integer_206");
+ rc |= test_harness(test_alignment_handler_integer_prefix,
+ "test_alignment_handler_integer_prefix");
rc |= test_harness(test_alignment_handler_vmx,
"test_alignment_handler_vmx");
rc |= test_harness(test_alignment_handler_fp,
@@ -564,5 +670,7 @@ int main(int argc, char *argv[])
"test_alignment_handler_fp_205");
rc |= test_harness(test_alignment_handler_fp_206,
"test_alignment_handler_fp_206");
+ rc |= test_harness(test_alignment_handler_fp_prefix,
+ "test_alignment_handler_fp_prefix");
return rc;
}
diff --git a/tools/testing/selftests/powerpc/benchmarks/context_switch.c b/tools/testing/selftests/powerpc/benchmarks/context_switch.c
index a2e8c9da7fa5..d50cc05df495 100644
--- a/tools/testing/selftests/powerpc/benchmarks/context_switch.c
+++ b/tools/testing/selftests/powerpc/benchmarks/context_switch.c
@@ -19,6 +19,7 @@
#include <limits.h>
#include <sys/time.h>
#include <sys/syscall.h>
+#include <sys/sysinfo.h>
#include <sys/types.h>
#include <sys/shm.h>
#include <linux/futex.h>
@@ -104,8 +105,9 @@ static void start_thread_on(void *(*fn)(void *), void *arg, unsigned long cpu)
static void start_process_on(void *(*fn)(void *), void *arg, unsigned long cpu)
{
- int pid;
- cpu_set_t cpuset;
+ int pid, ncpus;
+ cpu_set_t *cpuset;
+ size_t size;
pid = fork();
if (pid == -1) {
@@ -116,14 +118,23 @@ static void start_process_on(void *(*fn)(void *), void *arg, unsigned long cpu)
if (pid)
return;
- CPU_ZERO(&cpuset);
- CPU_SET(cpu, &cpuset);
+ ncpus = get_nprocs();
+ size = CPU_ALLOC_SIZE(ncpus);
+ cpuset = CPU_ALLOC(ncpus);
+ if (!cpuset) {
+ perror("malloc");
+ exit(1);
+ }
+ CPU_ZERO_S(size, cpuset);
+ CPU_SET_S(cpu, size, cpuset);
- if (sched_setaffinity(0, sizeof(cpuset), &cpuset)) {
+ if (sched_setaffinity(0, size, cpuset)) {
perror("sched_setaffinity");
+ CPU_FREE(cpuset);
exit(1);
}
+ CPU_FREE(cpuset);
fn(arg);
exit(0);
diff --git a/tools/testing/selftests/powerpc/copyloops/.gitignore b/tools/testing/selftests/powerpc/copyloops/.gitignore
index ddaf140b8255..994b11af765c 100644
--- a/tools/testing/selftests/powerpc/copyloops/.gitignore
+++ b/tools/testing/selftests/powerpc/copyloops/.gitignore
@@ -12,4 +12,4 @@ memcpy_p7_t1
copyuser_64_exc_t0
copyuser_64_exc_t1
copyuser_64_exc_t2
-memcpy_mcsafe_64
+copy_mc_64
diff --git a/tools/testing/selftests/powerpc/copyloops/Makefile b/tools/testing/selftests/powerpc/copyloops/Makefile
index 0917983a1c78..3095b1f1c02b 100644
--- a/tools/testing/selftests/powerpc/copyloops/Makefile
+++ b/tools/testing/selftests/powerpc/copyloops/Makefile
@@ -12,7 +12,7 @@ ASFLAGS = $(CFLAGS) -Wa,-mpower4
TEST_GEN_PROGS := copyuser_64_t0 copyuser_64_t1 copyuser_64_t2 \
copyuser_p7_t0 copyuser_p7_t1 \
memcpy_64_t0 memcpy_64_t1 memcpy_64_t2 \
- memcpy_p7_t0 memcpy_p7_t1 memcpy_mcsafe_64 \
+ memcpy_p7_t0 memcpy_p7_t1 copy_mc_64 \
copyuser_64_exc_t0 copyuser_64_exc_t1 copyuser_64_exc_t2
EXTRA_SOURCES := validate.c ../harness.c stubs.S
@@ -45,9 +45,9 @@ $(OUTPUT)/memcpy_p7_t%: memcpy_power7.S $(EXTRA_SOURCES)
-D SELFTEST_CASE=$(subst memcpy_p7_t,,$(notdir $@)) \
-o $@ $^
-$(OUTPUT)/memcpy_mcsafe_64: memcpy_mcsafe_64.S $(EXTRA_SOURCES)
+$(OUTPUT)/copy_mc_64: copy_mc_64.S $(EXTRA_SOURCES)
$(CC) $(CPPFLAGS) $(CFLAGS) \
- -D COPY_LOOP=test_memcpy_mcsafe \
+ -D COPY_LOOP=test_copy_mc_generic \
-o $@ $^
$(OUTPUT)/copyuser_64_exc_t%: copyuser_64.S exc_validate.c ../harness.c \
diff --git a/tools/testing/selftests/powerpc/copyloops/copy_mc_64.S b/tools/testing/selftests/powerpc/copyloops/copy_mc_64.S
new file mode 120000
index 000000000000..dcbe06d500fb
--- /dev/null
+++ b/tools/testing/selftests/powerpc/copyloops/copy_mc_64.S
@@ -0,0 +1 @@
+../../../../../arch/powerpc/lib/copy_mc_64.S \ No newline at end of file
diff --git a/tools/testing/selftests/powerpc/copyloops/memcpy_mcsafe_64.S b/tools/testing/selftests/powerpc/copyloops/memcpy_mcsafe_64.S
deleted file mode 120000
index f0feef3062f6..000000000000
--- a/tools/testing/selftests/powerpc/copyloops/memcpy_mcsafe_64.S
+++ /dev/null
@@ -1 +0,0 @@
-../../../../../arch/powerpc/lib/memcpy_mcsafe_64.S \ No newline at end of file
diff --git a/tools/testing/selftests/powerpc/eeh/eeh-functions.sh b/tools/testing/selftests/powerpc/eeh/eeh-functions.sh
index f52ed92b53e7..00dc32c0ed75 100755
--- a/tools/testing/selftests/powerpc/eeh/eeh-functions.sh
+++ b/tools/testing/selftests/powerpc/eeh/eeh-functions.sh
@@ -5,12 +5,17 @@ pe_ok() {
local dev="$1"
local path="/sys/bus/pci/devices/$dev/eeh_pe_state"
- if ! [ -e "$path" ] ; then
+ # if a driver doesn't support the error handling callbacks then the
+ # device is recovered by removing and re-probing it. This causes the
+ # sysfs directory to disappear so read the PE state once and squash
+ # any potential error messages
+ local eeh_state="$(cat $path 2>/dev/null)"
+ if [ -z "$eeh_state" ]; then
return 1;
fi
- local fw_state="$(cut -d' ' -f1 < $path)"
- local sw_state="$(cut -d' ' -f2 < $path)"
+ local fw_state="$(echo $eeh_state | cut -d' ' -f1)"
+ local sw_state="$(echo $eeh_state | cut -d' ' -f2)"
# If EEH_PE_ISOLATED or EEH_PE_RECOVERING are set then the PE is in an
# error state or being recovered. Either way, not ok.
diff --git a/tools/testing/selftests/powerpc/include/instructions.h b/tools/testing/selftests/powerpc/include/instructions.h
index f36061eb6f0f..4efa6314bd96 100644
--- a/tools/testing/selftests/powerpc/include/instructions.h
+++ b/tools/testing/selftests/powerpc/include/instructions.h
@@ -66,4 +66,81 @@ static inline int paste_last(void *i)
#define PPC_INST_PASTE __PASTE(0, 0, 0, 0)
#define PPC_INST_PASTE_LAST __PASTE(0, 0, 1, 1)
+/* This defines the prefixed load/store instructions */
+#ifdef __ASSEMBLY__
+# define stringify_in_c(...) __VA_ARGS__
+#else
+# define __stringify_in_c(...) #__VA_ARGS__
+# define stringify_in_c(...) __stringify_in_c(__VA_ARGS__) " "
+#endif
+
+#define __PPC_RA(a) (((a) & 0x1f) << 16)
+#define __PPC_RS(s) (((s) & 0x1f) << 21)
+#define __PPC_RT(t) __PPC_RS(t)
+#define __PPC_PREFIX_R(r) (((r) & 0x1) << 20)
+
+#define PPC_PREFIX_MLS 0x06000000
+#define PPC_PREFIX_8LS 0x04000000
+
+#define PPC_INST_LBZ 0x88000000
+#define PPC_INST_LHZ 0xa0000000
+#define PPC_INST_LHA 0xa8000000
+#define PPC_INST_LWZ 0x80000000
+#define PPC_INST_STB 0x98000000
+#define PPC_INST_STH 0xb0000000
+#define PPC_INST_STW 0x90000000
+#define PPC_INST_STD 0xf8000000
+#define PPC_INST_LFS 0xc0000000
+#define PPC_INST_LFD 0xc8000000
+#define PPC_INST_STFS 0xd0000000
+#define PPC_INST_STFD 0xd8000000
+
+#define PREFIX_MLS(instr, t, a, r, d) stringify_in_c(.balign 64, , 4;) \
+ stringify_in_c(.long PPC_PREFIX_MLS | \
+ __PPC_PREFIX_R(r) | \
+ (((d) >> 16) & 0x3ffff);) \
+ stringify_in_c(.long (instr) | \
+ __PPC_RT(t) | \
+ __PPC_RA(a) | \
+ ((d) & 0xffff);\n)
+
+#define PREFIX_8LS(instr, t, a, r, d) stringify_in_c(.balign 64, , 4;) \
+ stringify_in_c(.long PPC_PREFIX_8LS | \
+ __PPC_PREFIX_R(r) | \
+ (((d) >> 16) & 0x3ffff);) \
+ stringify_in_c(.long (instr) | \
+ __PPC_RT(t) | \
+ __PPC_RA(a) | \
+ ((d) & 0xffff);\n)
+
+/* Prefixed Integer Load/Store instructions */
+#define PLBZ(t, a, r, d) PREFIX_MLS(PPC_INST_LBZ, t, a, r, d)
+#define PLHZ(t, a, r, d) PREFIX_MLS(PPC_INST_LHZ, t, a, r, d)
+#define PLHA(t, a, r, d) PREFIX_MLS(PPC_INST_LHA, t, a, r, d)
+#define PLWZ(t, a, r, d) PREFIX_MLS(PPC_INST_LWZ, t, a, r, d)
+#define PLWA(t, a, r, d) PREFIX_8LS(0xa4000000, t, a, r, d)
+#define PLD(t, a, r, d) PREFIX_8LS(0xe4000000, t, a, r, d)
+#define PLQ(t, a, r, d) PREFIX_8LS(0xe0000000, t, a, r, d)
+#define PSTB(s, a, r, d) PREFIX_MLS(PPC_INST_STB, s, a, r, d)
+#define PSTH(s, a, r, d) PREFIX_MLS(PPC_INST_STH, s, a, r, d)
+#define PSTW(s, a, r, d) PREFIX_MLS(PPC_INST_STW, s, a, r, d)
+#define PSTD(s, a, r, d) PREFIX_8LS(0xf4000000, s, a, r, d)
+#define PSTQ(s, a, r, d) PREFIX_8LS(0xf0000000, s, a, r, d)
+
+/* Prefixed Floating-Point Load/Store Instructions */
+#define PLFS(frt, a, r, d) PREFIX_MLS(PPC_INST_LFS, frt, a, r, d)
+#define PLFD(frt, a, r, d) PREFIX_MLS(PPC_INST_LFD, frt, a, r, d)
+#define PSTFS(frs, a, r, d) PREFIX_MLS(PPC_INST_STFS, frs, a, r, d)
+#define PSTFD(frs, a, r, d) PREFIX_MLS(PPC_INST_STFD, frs, a, r, d)
+
+/* Prefixed VSX Load/Store Instructions */
+#define PLXSD(vrt, a, r, d) PREFIX_8LS(0xa8000000, vrt, a, r, d)
+#define PLXSSP(vrt, a, r, d) PREFIX_8LS(0xac000000, vrt, a, r, d)
+#define PLXV0(s, a, r, d) PREFIX_8LS(0xc8000000, s, a, r, d)
+#define PLXV1(s, a, r, d) PREFIX_8LS(0xcc000000, s, a, r, d)
+#define PSTXSD(vrs, a, r, d) PREFIX_8LS(0xb8000000, vrs, a, r, d)
+#define PSTXSSP(vrs, a, r, d) PREFIX_8LS(0xbc000000, vrs, a, r, d)
+#define PSTXV0(s, a, r, d) PREFIX_8LS(0xd8000000, s, a, r, d)
+#define PSTXV1(s, a, r, d) PREFIX_8LS(0xdc000000, s, a, r, d)
+
#endif /* _SELFTESTS_POWERPC_INSTRUCTIONS_H */
diff --git a/tools/testing/selftests/powerpc/include/pkeys.h b/tools/testing/selftests/powerpc/include/pkeys.h
new file mode 100644
index 000000000000..3312cb1b058d
--- /dev/null
+++ b/tools/testing/selftests/powerpc/include/pkeys.h
@@ -0,0 +1,136 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright 2020, Sandipan Das, IBM Corp.
+ */
+
+#ifndef _SELFTESTS_POWERPC_PKEYS_H
+#define _SELFTESTS_POWERPC_PKEYS_H
+
+#include <sys/mman.h>
+
+#include "reg.h"
+#include "utils.h"
+
+/*
+ * Older versions of libc use the Intel-specific access rights.
+ * Hence, override the definitions as they might be incorrect.
+ */
+#undef PKEY_DISABLE_ACCESS
+#define PKEY_DISABLE_ACCESS 0x3
+
+#undef PKEY_DISABLE_WRITE
+#define PKEY_DISABLE_WRITE 0x2
+
+#undef PKEY_DISABLE_EXECUTE
+#define PKEY_DISABLE_EXECUTE 0x4
+
+/* Older versions of libc do not not define this */
+#ifndef SEGV_PKUERR
+#define SEGV_PKUERR 4
+#endif
+
+#define SI_PKEY_OFFSET 0x20
+
+#define __NR_pkey_mprotect 386
+#define __NR_pkey_alloc 384
+#define __NR_pkey_free 385
+
+#define PKEY_BITS_PER_PKEY 2
+#define NR_PKEYS 32
+#define PKEY_BITS_MASK ((1UL << PKEY_BITS_PER_PKEY) - 1)
+
+inline unsigned long pkeyreg_get(void)
+{
+ return mfspr(SPRN_AMR);
+}
+
+inline void pkeyreg_set(unsigned long amr)
+{
+ set_amr(amr);
+}
+
+void pkey_set_rights(int pkey, unsigned long rights)
+{
+ unsigned long amr, shift;
+
+ shift = (NR_PKEYS - pkey - 1) * PKEY_BITS_PER_PKEY;
+ amr = pkeyreg_get();
+ amr &= ~(PKEY_BITS_MASK << shift);
+ amr |= (rights & PKEY_BITS_MASK) << shift;
+ pkeyreg_set(amr);
+}
+
+int sys_pkey_mprotect(void *addr, size_t len, int prot, int pkey)
+{
+ return syscall(__NR_pkey_mprotect, addr, len, prot, pkey);
+}
+
+int sys_pkey_alloc(unsigned long flags, unsigned long rights)
+{
+ return syscall(__NR_pkey_alloc, flags, rights);
+}
+
+int sys_pkey_free(int pkey)
+{
+ return syscall(__NR_pkey_free, pkey);
+}
+
+int pkeys_unsupported(void)
+{
+ bool hash_mmu = false;
+ int pkey;
+
+ /* Protection keys are currently supported on Hash MMU only */
+ FAIL_IF(using_hash_mmu(&hash_mmu));
+ SKIP_IF(!hash_mmu);
+
+ /* Check if the system call is supported */
+ pkey = sys_pkey_alloc(0, 0);
+ SKIP_IF(pkey < 0);
+ sys_pkey_free(pkey);
+
+ return 0;
+}
+
+int siginfo_pkey(siginfo_t *si)
+{
+ /*
+ * In older versions of libc, siginfo_t does not have si_pkey as
+ * a member.
+ */
+#ifdef si_pkey
+ return si->si_pkey;
+#else
+ return *((int *)(((char *) si) + SI_PKEY_OFFSET));
+#endif
+}
+
+#define pkey_rights(r) ({ \
+ static char buf[4] = "rwx"; \
+ unsigned int amr_bits; \
+ if ((r) & PKEY_DISABLE_EXECUTE) \
+ buf[2] = '-'; \
+ amr_bits = (r) & PKEY_BITS_MASK; \
+ if (amr_bits & PKEY_DISABLE_WRITE) \
+ buf[1] = '-'; \
+ if (amr_bits & PKEY_DISABLE_ACCESS & ~PKEY_DISABLE_WRITE) \
+ buf[0] = '-'; \
+ buf; \
+})
+
+unsigned long next_pkey_rights(unsigned long rights)
+{
+ if (rights == PKEY_DISABLE_ACCESS)
+ return PKEY_DISABLE_EXECUTE;
+ else if (rights == (PKEY_DISABLE_ACCESS | PKEY_DISABLE_EXECUTE))
+ return 0;
+
+ if ((rights & PKEY_BITS_MASK) == 0)
+ rights |= PKEY_DISABLE_WRITE;
+ else if ((rights & PKEY_BITS_MASK) == PKEY_DISABLE_WRITE)
+ rights |= PKEY_DISABLE_ACCESS;
+
+ return rights;
+}
+
+#endif /* _SELFTESTS_POWERPC_PKEYS_H */
diff --git a/tools/testing/selftests/powerpc/include/reg.h b/tools/testing/selftests/powerpc/include/reg.h
index 022c5076b2c5..c0f2742a3a59 100644
--- a/tools/testing/selftests/powerpc/include/reg.h
+++ b/tools/testing/selftests/powerpc/include/reg.h
@@ -57,6 +57,12 @@
#define SPRN_PPR 896 /* Program Priority Register */
#define SPRN_AMR 13 /* Authority Mask Register - problem state */
+#define set_amr(v) asm volatile("isync;" \
+ "mtspr " __stringify(SPRN_AMR) ",%0;" \
+ "isync" : \
+ : "r" ((unsigned long)(v)) \
+ : "memory")
+
/* TEXASR register bits */
#define TEXASR_FC 0xFE00000000000000
#define TEXASR_FP 0x0100000000000000
diff --git a/tools/testing/selftests/powerpc/include/utils.h b/tools/testing/selftests/powerpc/include/utils.h
index e089a0c30d9a..71d2924f5b8b 100644
--- a/tools/testing/selftests/powerpc/include/utils.h
+++ b/tools/testing/selftests/powerpc/include/utils.h
@@ -42,6 +42,16 @@ int perf_event_enable(int fd);
int perf_event_disable(int fd);
int perf_event_reset(int fd);
+#if !defined(__GLIBC_PREREQ) || !__GLIBC_PREREQ(2, 30)
+#include <unistd.h>
+#include <sys/syscall.h>
+
+static inline pid_t gettid(void)
+{
+ return syscall(SYS_gettid);
+}
+#endif
+
static inline bool have_hwcap(unsigned long ftr)
{
return ((unsigned long)get_auxv_entry(AT_HWCAP) & ftr) == ftr;
@@ -60,6 +70,7 @@ static inline bool have_hwcap2(unsigned long ftr2)
#endif
bool is_ppc64le(void);
+int using_hash_mmu(bool *using_hash);
/* Yes, this is evil */
#define FAIL_IF(x) \
@@ -71,6 +82,15 @@ do { \
} \
} while (0)
+#define FAIL_IF_EXIT(x) \
+do { \
+ if ((x)) { \
+ fprintf(stderr, \
+ "[FAIL] Test FAILED on line %d\n", __LINE__); \
+ _exit(1); \
+ } \
+} while (0)
+
/* The test harness uses this, yes it's gross */
#define MAGIC_SKIP_RETURN_VALUE 99
@@ -96,11 +116,20 @@ do { \
#define _str(s) #s
#define str(s) _str(s)
+#define sigsafe_err(msg) ({ \
+ ssize_t nbytes __attribute__((unused)); \
+ nbytes = write(STDERR_FILENO, msg, strlen(msg)); })
+
/* POWER9 feature */
#ifndef PPC_FEATURE2_ARCH_3_00
#define PPC_FEATURE2_ARCH_3_00 0x00800000
#endif
+/* POWER10 feature */
+#ifndef PPC_FEATURE2_ARCH_3_1
+#define PPC_FEATURE2_ARCH_3_1 0x00040000
+#endif
+
#if defined(__powerpc64__)
#define UCONTEXT_NIA(UC) (UC)->uc_mcontext.gp_regs[PT_NIP]
#define UCONTEXT_MSR(UC) (UC)->uc_mcontext.gp_regs[PT_MSR]
diff --git a/tools/testing/selftests/powerpc/math/.gitignore b/tools/testing/selftests/powerpc/math/.gitignore
index e31ca6f453ed..d0c23b2e4b60 100644
--- a/tools/testing/selftests/powerpc/math/.gitignore
+++ b/tools/testing/selftests/powerpc/math/.gitignore
@@ -6,3 +6,4 @@ vmx_preempt
fpu_signal
vmx_signal
vsx_preempt
+fpu_denormal
diff --git a/tools/testing/selftests/powerpc/math/Makefile b/tools/testing/selftests/powerpc/math/Makefile
index 11a10d7a2bbd..fcc91c205984 100644
--- a/tools/testing/selftests/powerpc/math/Makefile
+++ b/tools/testing/selftests/powerpc/math/Makefile
@@ -1,5 +1,5 @@
# SPDX-License-Identifier: GPL-2.0
-TEST_GEN_PROGS := fpu_syscall fpu_preempt fpu_signal vmx_syscall vmx_preempt vmx_signal vsx_preempt
+TEST_GEN_PROGS := fpu_syscall fpu_preempt fpu_signal fpu_denormal vmx_syscall vmx_preempt vmx_signal vsx_preempt
top_srcdir = ../../../../..
include ../../lib.mk
@@ -11,9 +11,9 @@ $(OUTPUT)/fpu_syscall: fpu_asm.S
$(OUTPUT)/fpu_preempt: fpu_asm.S
$(OUTPUT)/fpu_signal: fpu_asm.S
-$(OUTPUT)/vmx_syscall: vmx_asm.S
-$(OUTPUT)/vmx_preempt: vmx_asm.S
-$(OUTPUT)/vmx_signal: vmx_asm.S
+$(OUTPUT)/vmx_syscall: vmx_asm.S ../utils.c
+$(OUTPUT)/vmx_preempt: vmx_asm.S ../utils.c
+$(OUTPUT)/vmx_signal: vmx_asm.S ../utils.c
$(OUTPUT)/vsx_preempt: CFLAGS += -mvsx
-$(OUTPUT)/vsx_preempt: vsx_asm.S
+$(OUTPUT)/vsx_preempt: vsx_asm.S ../utils.c
diff --git a/tools/testing/selftests/powerpc/math/fpu_denormal.c b/tools/testing/selftests/powerpc/math/fpu_denormal.c
new file mode 100644
index 000000000000..5f96682abaa8
--- /dev/null
+++ b/tools/testing/selftests/powerpc/math/fpu_denormal.c
@@ -0,0 +1,38 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright IBM Corp. 2020
+ *
+ * This test attempts to cause a FP denormal exception on POWER8 CPUs. Unfortunately
+ * if the denormal handler is not configured or working properly, this can cause a bad
+ * crash in kernel mode when the kernel tries to save FP registers when the process
+ * exits.
+ */
+
+#include <stdio.h>
+#include <string.h>
+
+#include "utils.h"
+
+static int test_denormal_fpu(void)
+{
+ unsigned int m32;
+ unsigned long m64;
+ volatile float f;
+ volatile double d;
+
+ /* try to induce lfs <denormal> ; stfd */
+
+ m32 = 0x00715fcf; /* random denormal */
+ memcpy((float *)&f, &m32, sizeof(f));
+ d = f;
+ memcpy(&m64, (double *)&d, sizeof(d));
+
+ FAIL_IF((long)(m64 != 0x380c57f3c0000000)); /* renormalised value */
+
+ return 0;
+}
+
+int main(int argc, char *argv[])
+{
+ return test_harness(test_denormal_fpu, "fpu_denormal");
+}
diff --git a/tools/testing/selftests/powerpc/math/vmx_preempt.c b/tools/testing/selftests/powerpc/math/vmx_preempt.c
index 2e059f154e77..6761d6ce30ec 100644
--- a/tools/testing/selftests/powerpc/math/vmx_preempt.c
+++ b/tools/testing/selftests/powerpc/math/vmx_preempt.c
@@ -57,6 +57,9 @@ int test_preempt_vmx(void)
int i, rc, threads;
pthread_t *tids;
+ // vcmpequd used in vmx_asm.S is v2.07
+ SKIP_IF(!have_hwcap2(PPC_FEATURE2_ARCH_2_07));
+
threads = sysconf(_SC_NPROCESSORS_ONLN) * THREAD_FACTOR;
tids = malloc(threads * sizeof(pthread_t));
FAIL_IF(!tids);
diff --git a/tools/testing/selftests/powerpc/math/vmx_signal.c b/tools/testing/selftests/powerpc/math/vmx_signal.c
index 785a48e0976f..b340a5c4e79d 100644
--- a/tools/testing/selftests/powerpc/math/vmx_signal.c
+++ b/tools/testing/selftests/powerpc/math/vmx_signal.c
@@ -96,6 +96,9 @@ int test_signal_vmx(void)
void *rc_p;
pthread_t *tids;
+ // vcmpequd used in vmx_asm.S is v2.07
+ SKIP_IF(!have_hwcap2(PPC_FEATURE2_ARCH_2_07));
+
threads = sysconf(_SC_NPROCESSORS_ONLN) * THREAD_FACTOR;
tids = malloc(threads * sizeof(pthread_t));
FAIL_IF(!tids);
diff --git a/tools/testing/selftests/powerpc/math/vmx_syscall.c b/tools/testing/selftests/powerpc/math/vmx_syscall.c
index 9ee293cc868e..03c78dfe3444 100644
--- a/tools/testing/selftests/powerpc/math/vmx_syscall.c
+++ b/tools/testing/selftests/powerpc/math/vmx_syscall.c
@@ -49,9 +49,14 @@ int test_vmx_syscall(void)
* Setup an environment with much context switching
*/
pid_t pid2;
- pid_t pid = fork();
+ pid_t pid;
int ret;
int child_ret;
+
+ // vcmpequd used in vmx_asm.S is v2.07
+ SKIP_IF(!have_hwcap2(PPC_FEATURE2_ARCH_2_07));
+
+ pid = fork();
FAIL_IF(pid == -1);
pid2 = fork();
diff --git a/tools/testing/selftests/powerpc/math/vsx_preempt.c b/tools/testing/selftests/powerpc/math/vsx_preempt.c
index 63de9c6e2cd3..d1601bb889d4 100644
--- a/tools/testing/selftests/powerpc/math/vsx_preempt.c
+++ b/tools/testing/selftests/powerpc/math/vsx_preempt.c
@@ -92,6 +92,8 @@ int test_preempt_vsx(void)
int i, rc, threads;
pthread_t *tids;
+ SKIP_IF(!have_hwcap(PPC_FEATURE_HAS_VSX));
+
threads = sysconf(_SC_NPROCESSORS_ONLN) * THREAD_FACTOR;
tids = malloc(threads * sizeof(pthread_t));
FAIL_IF(!tids);
diff --git a/tools/testing/selftests/powerpc/mm/.gitignore b/tools/testing/selftests/powerpc/mm/.gitignore
index 2ca523255b1b..aac4a59f9e28 100644
--- a/tools/testing/selftests/powerpc/mm/.gitignore
+++ b/tools/testing/selftests/powerpc/mm/.gitignore
@@ -8,3 +8,7 @@ wild_bctr
large_vm_fork_separation
bad_accesses
tlbie_test
+pkey_exec_prot
+pkey_siginfo
+stack_expansion_ldst
+stack_expansion_signal
diff --git a/tools/testing/selftests/powerpc/mm/Makefile b/tools/testing/selftests/powerpc/mm/Makefile
index b9103c4bb414..defe488d6bf1 100644
--- a/tools/testing/selftests/powerpc/mm/Makefile
+++ b/tools/testing/selftests/powerpc/mm/Makefile
@@ -3,22 +3,32 @@ noarg:
$(MAKE) -C ../
TEST_GEN_PROGS := hugetlb_vs_thp_test subpage_prot prot_sao segv_errors wild_bctr \
- large_vm_fork_separation bad_accesses
+ large_vm_fork_separation bad_accesses pkey_exec_prot \
+ pkey_siginfo stack_expansion_signal stack_expansion_ldst
+
TEST_GEN_PROGS_EXTENDED := tlbie_test
TEST_GEN_FILES := tempfile
top_srcdir = ../../../../..
include ../../lib.mk
-$(TEST_GEN_PROGS): ../harness.c
+$(TEST_GEN_PROGS): ../harness.c ../utils.c
$(OUTPUT)/prot_sao: ../utils.c
$(OUTPUT)/wild_bctr: CFLAGS += -m64
$(OUTPUT)/large_vm_fork_separation: CFLAGS += -m64
$(OUTPUT)/bad_accesses: CFLAGS += -m64
+$(OUTPUT)/pkey_exec_prot: CFLAGS += -m64
+$(OUTPUT)/pkey_siginfo: CFLAGS += -m64
+
+$(OUTPUT)/stack_expansion_signal: ../utils.c ../pmu/lib.c
+
+$(OUTPUT)/stack_expansion_ldst: CFLAGS += -fno-stack-protector
+$(OUTPUT)/stack_expansion_ldst: ../utils.c
$(OUTPUT)/tempfile:
dd if=/dev/zero of=$@ bs=64k count=1
$(OUTPUT)/tlbie_test: LDLIBS += -lpthread
+$(OUTPUT)/pkey_siginfo: LDLIBS += -lpthread
diff --git a/tools/testing/selftests/powerpc/mm/bad_accesses.c b/tools/testing/selftests/powerpc/mm/bad_accesses.c
index adc465f499ef..a864ed7e2008 100644
--- a/tools/testing/selftests/powerpc/mm/bad_accesses.c
+++ b/tools/testing/selftests/powerpc/mm/bad_accesses.c
@@ -64,34 +64,6 @@ int bad_access(char *p, bool write)
return 0;
}
-static int using_hash_mmu(bool *using_hash)
-{
- char line[128];
- FILE *f;
- int rc;
-
- f = fopen("/proc/cpuinfo", "r");
- FAIL_IF(!f);
-
- rc = 0;
- while (fgets(line, sizeof(line), f) != NULL) {
- if (strcmp(line, "MMU : Hash\n") == 0) {
- *using_hash = true;
- goto out;
- }
-
- if (strcmp(line, "MMU : Radix\n") == 0) {
- *using_hash = false;
- goto out;
- }
- }
-
- rc = -1;
-out:
- fclose(f);
- return rc;
-}
-
static int test(void)
{
unsigned long i, j, addr, region_shift, page_shift, page_size;
diff --git a/tools/testing/selftests/powerpc/mm/pkey_exec_prot.c b/tools/testing/selftests/powerpc/mm/pkey_exec_prot.c
new file mode 100644
index 000000000000..9e5c7f3f498a
--- /dev/null
+++ b/tools/testing/selftests/powerpc/mm/pkey_exec_prot.c
@@ -0,0 +1,294 @@
+// SPDX-License-Identifier: GPL-2.0+
+
+/*
+ * Copyright 2020, Sandipan Das, IBM Corp.
+ *
+ * Test if applying execute protection on pages using memory
+ * protection keys works as expected.
+ */
+
+#define _GNU_SOURCE
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <signal.h>
+
+#include <unistd.h>
+
+#include "pkeys.h"
+
+#define PPC_INST_NOP 0x60000000
+#define PPC_INST_TRAP 0x7fe00008
+#define PPC_INST_BLR 0x4e800020
+
+static volatile sig_atomic_t fault_pkey, fault_code, fault_type;
+static volatile sig_atomic_t remaining_faults;
+static volatile unsigned int *fault_addr;
+static unsigned long pgsize, numinsns;
+static unsigned int *insns;
+
+static void trap_handler(int signum, siginfo_t *sinfo, void *ctx)
+{
+ /* Check if this fault originated from the expected address */
+ if (sinfo->si_addr != (void *) fault_addr)
+ sigsafe_err("got a fault for an unexpected address\n");
+
+ _exit(1);
+}
+
+static void segv_handler(int signum, siginfo_t *sinfo, void *ctx)
+{
+ int signal_pkey;
+
+ signal_pkey = siginfo_pkey(sinfo);
+ fault_code = sinfo->si_code;
+
+ /* Check if this fault originated from the expected address */
+ if (sinfo->si_addr != (void *) fault_addr) {
+ sigsafe_err("got a fault for an unexpected address\n");
+ _exit(1);
+ }
+
+ /* Check if too many faults have occurred for a single test case */
+ if (!remaining_faults) {
+ sigsafe_err("got too many faults for the same address\n");
+ _exit(1);
+ }
+
+
+ /* Restore permissions in order to continue */
+ switch (fault_code) {
+ case SEGV_ACCERR:
+ if (mprotect(insns, pgsize, PROT_READ | PROT_WRITE)) {
+ sigsafe_err("failed to set access permissions\n");
+ _exit(1);
+ }
+ break;
+ case SEGV_PKUERR:
+ if (signal_pkey != fault_pkey) {
+ sigsafe_err("got a fault for an unexpected pkey\n");
+ _exit(1);
+ }
+
+ switch (fault_type) {
+ case PKEY_DISABLE_ACCESS:
+ pkey_set_rights(fault_pkey, 0);
+ break;
+ case PKEY_DISABLE_EXECUTE:
+ /*
+ * Reassociate the exec-only pkey with the region
+ * to be able to continue. Unlike AMR, we cannot
+ * set IAMR directly from userspace to restore the
+ * permissions.
+ */
+ if (mprotect(insns, pgsize, PROT_EXEC)) {
+ sigsafe_err("failed to set execute permissions\n");
+ _exit(1);
+ }
+ break;
+ default:
+ sigsafe_err("got a fault with an unexpected type\n");
+ _exit(1);
+ }
+ break;
+ default:
+ sigsafe_err("got a fault with an unexpected code\n");
+ _exit(1);
+ }
+
+ remaining_faults--;
+}
+
+static int test(void)
+{
+ struct sigaction segv_act, trap_act;
+ unsigned long rights;
+ int pkey, ret, i;
+
+ ret = pkeys_unsupported();
+ if (ret)
+ return ret;
+
+ /* Setup SIGSEGV handler */
+ segv_act.sa_handler = 0;
+ segv_act.sa_sigaction = segv_handler;
+ FAIL_IF(sigprocmask(SIG_SETMASK, 0, &segv_act.sa_mask) != 0);
+ segv_act.sa_flags = SA_SIGINFO;
+ segv_act.sa_restorer = 0;
+ FAIL_IF(sigaction(SIGSEGV, &segv_act, NULL) != 0);
+
+ /* Setup SIGTRAP handler */
+ trap_act.sa_handler = 0;
+ trap_act.sa_sigaction = trap_handler;
+ FAIL_IF(sigprocmask(SIG_SETMASK, 0, &trap_act.sa_mask) != 0);
+ trap_act.sa_flags = SA_SIGINFO;
+ trap_act.sa_restorer = 0;
+ FAIL_IF(sigaction(SIGTRAP, &trap_act, NULL) != 0);
+
+ /* Setup executable region */
+ pgsize = getpagesize();
+ numinsns = pgsize / sizeof(unsigned int);
+ insns = (unsigned int *) mmap(NULL, pgsize, PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+ FAIL_IF(insns == MAP_FAILED);
+
+ /* Write the instruction words */
+ for (i = 1; i < numinsns - 1; i++)
+ insns[i] = PPC_INST_NOP;
+
+ /*
+ * Set the first instruction as an unconditional trap. If
+ * the last write to this address succeeds, this should
+ * get overwritten by a no-op.
+ */
+ insns[0] = PPC_INST_TRAP;
+
+ /*
+ * Later, to jump to the executable region, we use a branch
+ * and link instruction (bctrl) which sets the return address
+ * automatically in LR. Use that to return back.
+ */
+ insns[numinsns - 1] = PPC_INST_BLR;
+
+ /* Allocate a pkey that restricts execution */
+ rights = PKEY_DISABLE_EXECUTE;
+ pkey = sys_pkey_alloc(0, rights);
+ FAIL_IF(pkey < 0);
+
+ /*
+ * Pick the first instruction's address from the executable
+ * region.
+ */
+ fault_addr = insns;
+
+ /* The following two cases will avoid SEGV_PKUERR */
+ fault_type = -1;
+ fault_pkey = -1;
+
+ /*
+ * Read an instruction word from the address when AMR bits
+ * are not set i.e. the pkey permits both read and write
+ * access.
+ *
+ * This should not generate a fault as having PROT_EXEC
+ * implies PROT_READ on GNU systems. The pkey currently
+ * restricts execution only based on the IAMR bits. The
+ * AMR bits are cleared.
+ */
+ remaining_faults = 0;
+ FAIL_IF(sys_pkey_mprotect(insns, pgsize, PROT_EXEC, pkey) != 0);
+ printf("read from %p, pkey permissions are %s\n", fault_addr,
+ pkey_rights(rights));
+ i = *fault_addr;
+ FAIL_IF(remaining_faults != 0);
+
+ /*
+ * Write an instruction word to the address when AMR bits
+ * are not set i.e. the pkey permits both read and write
+ * access.
+ *
+ * This should generate an access fault as having just
+ * PROT_EXEC also restricts writes. The pkey currently
+ * restricts execution only based on the IAMR bits. The
+ * AMR bits are cleared.
+ */
+ remaining_faults = 1;
+ FAIL_IF(sys_pkey_mprotect(insns, pgsize, PROT_EXEC, pkey) != 0);
+ printf("write to %p, pkey permissions are %s\n", fault_addr,
+ pkey_rights(rights));
+ *fault_addr = PPC_INST_TRAP;
+ FAIL_IF(remaining_faults != 0 || fault_code != SEGV_ACCERR);
+
+ /* The following three cases will generate SEGV_PKUERR */
+ rights |= PKEY_DISABLE_ACCESS;
+ fault_type = PKEY_DISABLE_ACCESS;
+ fault_pkey = pkey;
+
+ /*
+ * Read an instruction word from the address when AMR bits
+ * are set i.e. the pkey permits neither read nor write
+ * access.
+ *
+ * This should generate a pkey fault based on AMR bits only
+ * as having PROT_EXEC implicitly allows reads.
+ */
+ remaining_faults = 1;
+ FAIL_IF(sys_pkey_mprotect(insns, pgsize, PROT_EXEC, pkey) != 0);
+ pkey_set_rights(pkey, rights);
+ printf("read from %p, pkey permissions are %s\n", fault_addr,
+ pkey_rights(rights));
+ i = *fault_addr;
+ FAIL_IF(remaining_faults != 0 || fault_code != SEGV_PKUERR);
+
+ /*
+ * Write an instruction word to the address when AMR bits
+ * are set i.e. the pkey permits neither read nor write
+ * access.
+ *
+ * This should generate two faults. First, a pkey fault
+ * based on AMR bits and then an access fault since
+ * PROT_EXEC does not allow writes.
+ */
+ remaining_faults = 2;
+ FAIL_IF(sys_pkey_mprotect(insns, pgsize, PROT_EXEC, pkey) != 0);
+ pkey_set_rights(pkey, rights);
+ printf("write to %p, pkey permissions are %s\n", fault_addr,
+ pkey_rights(rights));
+ *fault_addr = PPC_INST_NOP;
+ FAIL_IF(remaining_faults != 0 || fault_code != SEGV_ACCERR);
+
+ /* Free the current pkey */
+ sys_pkey_free(pkey);
+
+ rights = 0;
+ do {
+ /*
+ * Allocate pkeys with all valid combinations of read,
+ * write and execute restrictions.
+ */
+ pkey = sys_pkey_alloc(0, rights);
+ FAIL_IF(pkey < 0);
+
+ /*
+ * Jump to the executable region. AMR bits may or may not
+ * be set but they should not affect execution.
+ *
+ * This should generate pkey faults based on IAMR bits which
+ * may be set to restrict execution.
+ *
+ * The first iteration also checks if the overwrite of the
+ * first instruction word from a trap to a no-op succeeded.
+ */
+ fault_pkey = pkey;
+ fault_type = -1;
+ remaining_faults = 0;
+ if (rights & PKEY_DISABLE_EXECUTE) {
+ fault_type = PKEY_DISABLE_EXECUTE;
+ remaining_faults = 1;
+ }
+
+ FAIL_IF(sys_pkey_mprotect(insns, pgsize, PROT_EXEC, pkey) != 0);
+ printf("execute at %p, pkey permissions are %s\n", fault_addr,
+ pkey_rights(rights));
+ asm volatile("mtctr %0; bctrl" : : "r"(insns));
+ FAIL_IF(remaining_faults != 0);
+ if (rights & PKEY_DISABLE_EXECUTE)
+ FAIL_IF(fault_code != SEGV_PKUERR);
+
+ /* Free the current pkey */
+ sys_pkey_free(pkey);
+
+ /* Find next valid combination of pkey rights */
+ rights = next_pkey_rights(rights);
+ } while (rights);
+
+ /* Cleanup */
+ munmap((void *) insns, pgsize);
+
+ return 0;
+}
+
+int main(void)
+{
+ test_harness(test, "pkey_exec_prot");
+}
diff --git a/tools/testing/selftests/powerpc/mm/pkey_siginfo.c b/tools/testing/selftests/powerpc/mm/pkey_siginfo.c
new file mode 100644
index 000000000000..4f815d7c1214
--- /dev/null
+++ b/tools/testing/selftests/powerpc/mm/pkey_siginfo.c
@@ -0,0 +1,333 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/*
+ * Copyright 2020, Sandipan Das, IBM Corp.
+ *
+ * Test if the signal information reports the correct memory protection
+ * key upon getting a key access violation fault for a page that was
+ * attempted to be protected by two different keys from two competing
+ * threads at the same time.
+ */
+
+#define _GNU_SOURCE
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <signal.h>
+
+#include <unistd.h>
+#include <pthread.h>
+#include <sys/mman.h>
+
+#include "pkeys.h"
+
+#define PPC_INST_NOP 0x60000000
+#define PPC_INST_BLR 0x4e800020
+#define PROT_RWX (PROT_READ | PROT_WRITE | PROT_EXEC)
+
+#define NUM_ITERATIONS 1000000
+
+static volatile sig_atomic_t perm_pkey, rest_pkey;
+static volatile sig_atomic_t rights, fault_count;
+static volatile unsigned int *volatile fault_addr;
+static pthread_barrier_t iteration_barrier;
+
+static void segv_handler(int signum, siginfo_t *sinfo, void *ctx)
+{
+ void *pgstart;
+ size_t pgsize;
+ int pkey;
+
+ pkey = siginfo_pkey(sinfo);
+
+ /* Check if this fault originated from a pkey access violation */
+ if (sinfo->si_code != SEGV_PKUERR) {
+ sigsafe_err("got a fault for an unexpected reason\n");
+ _exit(1);
+ }
+
+ /* Check if this fault originated from the expected address */
+ if (sinfo->si_addr != (void *) fault_addr) {
+ sigsafe_err("got a fault for an unexpected address\n");
+ _exit(1);
+ }
+
+ /* Check if this fault originated from the restrictive pkey */
+ if (pkey != rest_pkey) {
+ sigsafe_err("got a fault for an unexpected pkey\n");
+ _exit(1);
+ }
+
+ /* Check if too many faults have occurred for the same iteration */
+ if (fault_count > 0) {
+ sigsafe_err("got too many faults for the same address\n");
+ _exit(1);
+ }
+
+ pgsize = getpagesize();
+ pgstart = (void *) ((unsigned long) fault_addr & ~(pgsize - 1));
+
+ /*
+ * If the current fault occurred due to lack of execute rights,
+ * reassociate the page with the exec-only pkey since execute
+ * rights cannot be changed directly for the faulting pkey as
+ * IAMR is inaccessible from userspace.
+ *
+ * Otherwise, if the current fault occurred due to lack of
+ * read-write rights, change the AMR permission bits for the
+ * pkey.
+ *
+ * This will let the test continue.
+ */
+ if (rights == PKEY_DISABLE_EXECUTE &&
+ mprotect(pgstart, pgsize, PROT_EXEC))
+ _exit(1);
+ else
+ pkey_set_rights(pkey, 0);
+
+ fault_count++;
+}
+
+struct region {
+ unsigned long rights;
+ unsigned int *base;
+ size_t size;
+};
+
+static void *protect(void *p)
+{
+ unsigned long rights;
+ unsigned int *base;
+ size_t size;
+ int tid, i;
+
+ tid = gettid();
+ base = ((struct region *) p)->base;
+ size = ((struct region *) p)->size;
+ FAIL_IF_EXIT(!base);
+
+ /* No read, write and execute restrictions */
+ rights = 0;
+
+ printf("tid %d, pkey permissions are %s\n", tid, pkey_rights(rights));
+
+ /* Allocate the permissive pkey */
+ perm_pkey = sys_pkey_alloc(0, rights);
+ FAIL_IF_EXIT(perm_pkey < 0);
+
+ /*
+ * Repeatedly try to protect the common region with a permissive
+ * pkey
+ */
+ for (i = 0; i < NUM_ITERATIONS; i++) {
+ /*
+ * Wait until the other thread has finished allocating the
+ * restrictive pkey or until the next iteration has begun
+ */
+ pthread_barrier_wait(&iteration_barrier);
+
+ /* Try to associate the permissive pkey with the region */
+ FAIL_IF_EXIT(sys_pkey_mprotect(base, size, PROT_RWX,
+ perm_pkey));
+ }
+
+ /* Free the permissive pkey */
+ sys_pkey_free(perm_pkey);
+
+ return NULL;
+}
+
+static void *protect_access(void *p)
+{
+ size_t size, numinsns;
+ unsigned int *base;
+ int tid, i;
+
+ tid = gettid();
+ base = ((struct region *) p)->base;
+ size = ((struct region *) p)->size;
+ rights = ((struct region *) p)->rights;
+ numinsns = size / sizeof(base[0]);
+ FAIL_IF_EXIT(!base);
+
+ /* Allocate the restrictive pkey */
+ rest_pkey = sys_pkey_alloc(0, rights);
+ FAIL_IF_EXIT(rest_pkey < 0);
+
+ printf("tid %d, pkey permissions are %s\n", tid, pkey_rights(rights));
+ printf("tid %d, %s randomly in range [%p, %p]\n", tid,
+ (rights == PKEY_DISABLE_EXECUTE) ? "execute" :
+ (rights == PKEY_DISABLE_WRITE) ? "write" : "read",
+ base, base + numinsns);
+
+ /*
+ * Repeatedly try to protect the common region with a restrictive
+ * pkey and read, write or execute from it
+ */
+ for (i = 0; i < NUM_ITERATIONS; i++) {
+ /*
+ * Wait until the other thread has finished allocating the
+ * permissive pkey or until the next iteration has begun
+ */
+ pthread_barrier_wait(&iteration_barrier);
+
+ /* Try to associate the restrictive pkey with the region */
+ FAIL_IF_EXIT(sys_pkey_mprotect(base, size, PROT_RWX,
+ rest_pkey));
+
+ /* Choose a random instruction word address from the region */
+ fault_addr = base + (rand() % numinsns);
+ fault_count = 0;
+
+ switch (rights) {
+ /* Read protection test */
+ case PKEY_DISABLE_ACCESS:
+ /*
+ * Read an instruction word from the region and
+ * verify if it has not been overwritten to
+ * something unexpected
+ */
+ FAIL_IF_EXIT(*fault_addr != PPC_INST_NOP &&
+ *fault_addr != PPC_INST_BLR);
+ break;
+
+ /* Write protection test */
+ case PKEY_DISABLE_WRITE:
+ /*
+ * Write an instruction word to the region and
+ * verify if the overwrite has succeeded
+ */
+ *fault_addr = PPC_INST_BLR;
+ FAIL_IF_EXIT(*fault_addr != PPC_INST_BLR);
+ break;
+
+ /* Execute protection test */
+ case PKEY_DISABLE_EXECUTE:
+ /* Jump to the region and execute instructions */
+ asm volatile(
+ "mtctr %0; bctrl"
+ : : "r"(fault_addr) : "ctr", "lr");
+ break;
+ }
+
+ /*
+ * Restore the restrictions originally imposed by the
+ * restrictive pkey as the signal handler would have
+ * cleared out the corresponding AMR bits
+ */
+ pkey_set_rights(rest_pkey, rights);
+ }
+
+ /* Free restrictive pkey */
+ sys_pkey_free(rest_pkey);
+
+ return NULL;
+}
+
+static void reset_pkeys(unsigned long rights)
+{
+ int pkeys[NR_PKEYS], i;
+
+ /* Exhaustively allocate all available pkeys */
+ for (i = 0; i < NR_PKEYS; i++)
+ pkeys[i] = sys_pkey_alloc(0, rights);
+
+ /* Free all allocated pkeys */
+ for (i = 0; i < NR_PKEYS; i++)
+ sys_pkey_free(pkeys[i]);
+}
+
+static int test(void)
+{
+ pthread_t prot_thread, pacc_thread;
+ struct sigaction act;
+ pthread_attr_t attr;
+ size_t numinsns;
+ struct region r;
+ int ret, i;
+
+ srand(time(NULL));
+ ret = pkeys_unsupported();
+ if (ret)
+ return ret;
+
+ /* Allocate the region */
+ r.size = getpagesize();
+ r.base = mmap(NULL, r.size, PROT_RWX,
+ MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+ FAIL_IF(r.base == MAP_FAILED);
+
+ /*
+ * Fill the region with no-ops with a branch at the end
+ * for returning to the caller
+ */
+ numinsns = r.size / sizeof(r.base[0]);
+ for (i = 0; i < numinsns - 1; i++)
+ r.base[i] = PPC_INST_NOP;
+ r.base[i] = PPC_INST_BLR;
+
+ /* Setup SIGSEGV handler */
+ act.sa_handler = 0;
+ act.sa_sigaction = segv_handler;
+ FAIL_IF(sigprocmask(SIG_SETMASK, 0, &act.sa_mask) != 0);
+ act.sa_flags = SA_SIGINFO;
+ act.sa_restorer = 0;
+ FAIL_IF(sigaction(SIGSEGV, &act, NULL) != 0);
+
+ /*
+ * For these tests, the parent process should clear all bits of
+ * AMR and IAMR, i.e. impose no restrictions, for all available
+ * pkeys. This will be the base for the initial AMR and IAMR
+ * values for all the test thread pairs.
+ *
+ * If the AMR and IAMR bits of all available pkeys are cleared
+ * before running the tests and a fault is generated when
+ * attempting to read, write or execute instructions from a
+ * pkey protected region, the pkey responsible for this must be
+ * the one from the protect-and-access thread since the other
+ * one is fully permissive. Despite that, if the pkey reported
+ * by siginfo is not the restrictive pkey, then there must be a
+ * kernel bug.
+ */
+ reset_pkeys(0);
+
+ /* Setup barrier for protect and protect-and-access threads */
+ FAIL_IF(pthread_attr_init(&attr) != 0);
+ FAIL_IF(pthread_barrier_init(&iteration_barrier, NULL, 2) != 0);
+
+ /* Setup and start protect and protect-and-read threads */
+ puts("starting thread pair (protect, protect-and-read)");
+ r.rights = PKEY_DISABLE_ACCESS;
+ FAIL_IF(pthread_create(&prot_thread, &attr, &protect, &r) != 0);
+ FAIL_IF(pthread_create(&pacc_thread, &attr, &protect_access, &r) != 0);
+ FAIL_IF(pthread_join(prot_thread, NULL) != 0);
+ FAIL_IF(pthread_join(pacc_thread, NULL) != 0);
+
+ /* Setup and start protect and protect-and-write threads */
+ puts("starting thread pair (protect, protect-and-write)");
+ r.rights = PKEY_DISABLE_WRITE;
+ FAIL_IF(pthread_create(&prot_thread, &attr, &protect, &r) != 0);
+ FAIL_IF(pthread_create(&pacc_thread, &attr, &protect_access, &r) != 0);
+ FAIL_IF(pthread_join(prot_thread, NULL) != 0);
+ FAIL_IF(pthread_join(pacc_thread, NULL) != 0);
+
+ /* Setup and start protect and protect-and-execute threads */
+ puts("starting thread pair (protect, protect-and-execute)");
+ r.rights = PKEY_DISABLE_EXECUTE;
+ FAIL_IF(pthread_create(&prot_thread, &attr, &protect, &r) != 0);
+ FAIL_IF(pthread_create(&pacc_thread, &attr, &protect_access, &r) != 0);
+ FAIL_IF(pthread_join(prot_thread, NULL) != 0);
+ FAIL_IF(pthread_join(pacc_thread, NULL) != 0);
+
+ /* Cleanup */
+ FAIL_IF(pthread_attr_destroy(&attr) != 0);
+ FAIL_IF(pthread_barrier_destroy(&iteration_barrier) != 0);
+ munmap(r.base, r.size);
+
+ return 0;
+}
+
+int main(void)
+{
+ test_harness(test, "pkey_siginfo");
+}
diff --git a/tools/testing/selftests/powerpc/mm/prot_sao.c b/tools/testing/selftests/powerpc/mm/prot_sao.c
index e2eed65b7735..30b71b1d78d5 100644
--- a/tools/testing/selftests/powerpc/mm/prot_sao.c
+++ b/tools/testing/selftests/powerpc/mm/prot_sao.c
@@ -7,6 +7,7 @@
#include <stdlib.h>
#include <string.h>
#include <sys/mman.h>
+#include <unistd.h>
#include <asm/cputable.h>
@@ -18,8 +19,13 @@ int test_prot_sao(void)
{
char *p;
- /* 2.06 or later should support SAO */
- SKIP_IF(!have_hwcap(PPC_FEATURE_ARCH_2_06));
+ /*
+ * SAO was introduced in 2.06 and removed in 3.1. It's disabled in
+ * guests/LPARs by default, so also skip if we are running in a guest.
+ */
+ SKIP_IF(!have_hwcap(PPC_FEATURE_ARCH_2_06) ||
+ have_hwcap2(PPC_FEATURE2_ARCH_3_1) ||
+ access("/proc/device-tree/rtas/ibm,hypertas-functions", F_OK) == 0);
/*
* Ensure we can ask for PROT_SAO.
diff --git a/tools/testing/selftests/powerpc/mm/stack_expansion_ldst.c b/tools/testing/selftests/powerpc/mm/stack_expansion_ldst.c
new file mode 100644
index 000000000000..ed9143990888
--- /dev/null
+++ b/tools/testing/selftests/powerpc/mm/stack_expansion_ldst.c
@@ -0,0 +1,202 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Test that loads/stores expand the stack segment, or trigger a SEGV, in
+ * various conditions.
+ *
+ * Based on test code by Tom Lane.
+ */
+
+#undef NDEBUG
+#include <assert.h>
+
+#include <err.h>
+#include <errno.h>
+#include <stdio.h>
+#include <signal.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/resource.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <unistd.h>
+
+#define _KB (1024)
+#define _MB (1024 * 1024)
+
+volatile char *stack_top_ptr;
+volatile unsigned long stack_top_sp;
+volatile char c;
+
+enum access_type {
+ LOAD,
+ STORE,
+};
+
+/*
+ * Consume stack until the stack pointer is below @target_sp, then do an access
+ * (load or store) at offset @delta from either the base of the stack or the
+ * current stack pointer.
+ */
+__attribute__ ((noinline))
+int consume_stack(unsigned long target_sp, unsigned long stack_high, int delta, enum access_type type)
+{
+ unsigned long target;
+ char stack_cur;
+
+ if ((unsigned long)&stack_cur > target_sp)
+ return consume_stack(target_sp, stack_high, delta, type);
+ else {
+ // We don't really need this, but without it GCC might not
+ // generate a recursive call above.
+ stack_top_ptr = &stack_cur;
+
+#ifdef __powerpc__
+ asm volatile ("mr %[sp], %%r1" : [sp] "=r" (stack_top_sp));
+#else
+ asm volatile ("mov %%rsp, %[sp]" : [sp] "=r" (stack_top_sp));
+#endif
+ target = stack_high - delta + 1;
+ volatile char *p = (char *)target;
+
+ if (type == STORE)
+ *p = c;
+ else
+ c = *p;
+
+ // Do something to prevent the stack frame being popped prior to
+ // our access above.
+ getpid();
+ }
+
+ return 0;
+}
+
+static int search_proc_maps(char *needle, unsigned long *low, unsigned long *high)
+{
+ unsigned long start, end;
+ static char buf[4096];
+ char name[128];
+ FILE *f;
+ int rc;
+
+ f = fopen("/proc/self/maps", "r");
+ if (!f) {
+ perror("fopen");
+ return -1;
+ }
+
+ while (fgets(buf, sizeof(buf), f)) {
+ rc = sscanf(buf, "%lx-%lx %*c%*c%*c%*c %*x %*d:%*d %*d %127s\n",
+ &start, &end, name);
+ if (rc == 2)
+ continue;
+
+ if (rc != 3) {
+ printf("sscanf errored\n");
+ rc = -1;
+ break;
+ }
+
+ if (strstr(name, needle)) {
+ *low = start;
+ *high = end - 1;
+ rc = 0;
+ break;
+ }
+ }
+
+ fclose(f);
+
+ return rc;
+}
+
+int child(unsigned int stack_used, int delta, enum access_type type)
+{
+ unsigned long low, stack_high;
+
+ assert(search_proc_maps("[stack]", &low, &stack_high) == 0);
+
+ assert(consume_stack(stack_high - stack_used, stack_high, delta, type) == 0);
+
+ printf("Access OK: %s delta %-7d used size 0x%06x stack high 0x%lx top_ptr %p top sp 0x%lx actual used 0x%lx\n",
+ type == LOAD ? "load" : "store", delta, stack_used, stack_high,
+ stack_top_ptr, stack_top_sp, stack_high - stack_top_sp + 1);
+
+ return 0;
+}
+
+static int test_one(unsigned int stack_used, int delta, enum access_type type)
+{
+ pid_t pid;
+ int rc;
+
+ pid = fork();
+ if (pid == 0)
+ exit(child(stack_used, delta, type));
+
+ assert(waitpid(pid, &rc, 0) != -1);
+
+ if (WIFEXITED(rc) && WEXITSTATUS(rc) == 0)
+ return 0;
+
+ // We don't expect a non-zero exit that's not a signal
+ assert(!WIFEXITED(rc));
+
+ printf("Faulted: %s delta %-7d used size 0x%06x signal %d\n",
+ type == LOAD ? "load" : "store", delta, stack_used,
+ WTERMSIG(rc));
+
+ return 1;
+}
+
+// This is fairly arbitrary but is well below any of the targets below,
+// so that the delta between the stack pointer and the target is large.
+#define DEFAULT_SIZE (32 * _KB)
+
+static void test_one_type(enum access_type type, unsigned long page_size, unsigned long rlim_cur)
+{
+ unsigned long delta;
+
+ // We should be able to access anywhere within the rlimit
+ for (delta = page_size; delta <= rlim_cur; delta += page_size)
+ assert(test_one(DEFAULT_SIZE, delta, type) == 0);
+
+ assert(test_one(DEFAULT_SIZE, rlim_cur, type) == 0);
+
+ // But if we go past the rlimit it should fail
+ assert(test_one(DEFAULT_SIZE, rlim_cur + 1, type) != 0);
+}
+
+static int test(void)
+{
+ unsigned long page_size;
+ struct rlimit rlimit;
+
+ page_size = getpagesize();
+ getrlimit(RLIMIT_STACK, &rlimit);
+ printf("Stack rlimit is 0x%lx\n", rlimit.rlim_cur);
+
+ printf("Testing loads ...\n");
+ test_one_type(LOAD, page_size, rlimit.rlim_cur);
+ printf("Testing stores ...\n");
+ test_one_type(STORE, page_size, rlimit.rlim_cur);
+
+ printf("All OK\n");
+
+ return 0;
+}
+
+#ifdef __powerpc__
+#include "utils.h"
+
+int main(void)
+{
+ return test_harness(test, "stack_expansion_ldst");
+}
+#else
+int main(void)
+{
+ return test();
+}
+#endif
diff --git a/tools/testing/selftests/powerpc/mm/stack_expansion_signal.c b/tools/testing/selftests/powerpc/mm/stack_expansion_signal.c
new file mode 100644
index 000000000000..c8b32a29e274
--- /dev/null
+++ b/tools/testing/selftests/powerpc/mm/stack_expansion_signal.c
@@ -0,0 +1,118 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Test that signal delivery is able to expand the stack segment without
+ * triggering a SEGV.
+ *
+ * Based on test code by Tom Lane.
+ */
+
+#include <err.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <signal.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include "../pmu/lib.h"
+#include "utils.h"
+
+#define _KB (1024)
+#define _MB (1024 * 1024)
+
+static char *stack_base_ptr;
+static char *stack_top_ptr;
+
+static volatile sig_atomic_t sig_occurred = 0;
+
+static void sigusr1_handler(int signal_arg)
+{
+ sig_occurred = 1;
+}
+
+static int consume_stack(unsigned int stack_size, union pipe write_pipe)
+{
+ char stack_cur;
+
+ if ((stack_base_ptr - &stack_cur) < stack_size)
+ return consume_stack(stack_size, write_pipe);
+ else {
+ stack_top_ptr = &stack_cur;
+
+ FAIL_IF(notify_parent(write_pipe));
+
+ while (!sig_occurred)
+ barrier();
+ }
+
+ return 0;
+}
+
+static int child(unsigned int stack_size, union pipe write_pipe)
+{
+ struct sigaction act;
+ char stack_base;
+
+ act.sa_handler = sigusr1_handler;
+ sigemptyset(&act.sa_mask);
+ act.sa_flags = 0;
+ if (sigaction(SIGUSR1, &act, NULL) < 0)
+ err(1, "sigaction");
+
+ stack_base_ptr = (char *) (((size_t) &stack_base + 65535) & ~65535UL);
+
+ FAIL_IF(consume_stack(stack_size, write_pipe));
+
+ printf("size 0x%06x: OK, stack base %p top %p (%zx used)\n",
+ stack_size, stack_base_ptr, stack_top_ptr,
+ stack_base_ptr - stack_top_ptr);
+
+ return 0;
+}
+
+static int test_one_size(unsigned int stack_size)
+{
+ union pipe read_pipe, write_pipe;
+ pid_t pid;
+
+ FAIL_IF(pipe(read_pipe.fds) == -1);
+ FAIL_IF(pipe(write_pipe.fds) == -1);
+
+ pid = fork();
+ if (pid == 0) {
+ close(read_pipe.read_fd);
+ close(write_pipe.write_fd);
+ exit(child(stack_size, read_pipe));
+ }
+
+ close(read_pipe.write_fd);
+ close(write_pipe.read_fd);
+ FAIL_IF(sync_with_child(read_pipe, write_pipe));
+
+ kill(pid, SIGUSR1);
+
+ FAIL_IF(wait_for_child(pid));
+
+ close(read_pipe.read_fd);
+ close(write_pipe.write_fd);
+
+ return 0;
+}
+
+int test(void)
+{
+ unsigned int i, size;
+
+ // Test with used stack from 1MB - 64K to 1MB + 64K
+ // Increment by 64 to get more coverage of odd sizes
+ for (i = 0; i < (128 * _KB); i += 64) {
+ size = i + (1 * _MB) - (64 * _KB);
+ FAIL_IF(test_one_size(size));
+ }
+
+ return 0;
+}
+
+int main(void)
+{
+ return test_harness(test, "stack_expansion_signal");
+}
diff --git a/tools/testing/selftests/powerpc/pmu/count_stcx_fail.c b/tools/testing/selftests/powerpc/pmu/count_stcx_fail.c
index 7b4ac4537702..2980abca31e0 100644
--- a/tools/testing/selftests/powerpc/pmu/count_stcx_fail.c
+++ b/tools/testing/selftests/powerpc/pmu/count_stcx_fail.c
@@ -9,6 +9,7 @@
#include <stdbool.h>
#include <string.h>
#include <sys/prctl.h>
+#include <asm/cputable.h>
#include "event.h"
#include "utils.h"
@@ -104,6 +105,9 @@ static int test_body(void)
struct event events[3];
u64 overhead;
+ // The STCX_FAIL event we use works on Power8 or later
+ SKIP_IF(!have_hwcap2(PPC_FEATURE2_ARCH_2_07));
+
setup_event(&events[0], PERF_COUNT_HW_INSTRUCTIONS, PERF_TYPE_HARDWARE, "instructions");
setup_event(&events[1], PERF_COUNT_HW_CPU_CYCLES, PERF_TYPE_HARDWARE, "cycles");
setup_event(&events[2], PM_STCX_FAIL, PERF_TYPE_RAW, "stcx_fail");
diff --git a/tools/testing/selftests/powerpc/pmu/ebb/back_to_back_ebbs_test.c b/tools/testing/selftests/powerpc/pmu/ebb/back_to_back_ebbs_test.c
index a2d7b0e3dca9..a26ac122c759 100644
--- a/tools/testing/selftests/powerpc/pmu/ebb/back_to_back_ebbs_test.c
+++ b/tools/testing/selftests/powerpc/pmu/ebb/back_to_back_ebbs_test.c
@@ -91,8 +91,6 @@ int back_to_back_ebbs(void)
ebb_global_disable();
ebb_freeze_pmcs();
- count_pmc(1, sample_period);
-
dump_ebb_state();
event_close(&event);
diff --git a/tools/testing/selftests/powerpc/pmu/ebb/cycles_test.c b/tools/testing/selftests/powerpc/pmu/ebb/cycles_test.c
index bc893813483e..bb9f587fa76e 100644
--- a/tools/testing/selftests/powerpc/pmu/ebb/cycles_test.c
+++ b/tools/testing/selftests/powerpc/pmu/ebb/cycles_test.c
@@ -42,8 +42,6 @@ int cycles(void)
ebb_global_disable();
ebb_freeze_pmcs();
- count_pmc(1, sample_period);
-
dump_ebb_state();
event_close(&event);
diff --git a/tools/testing/selftests/powerpc/pmu/ebb/cycles_with_freeze_test.c b/tools/testing/selftests/powerpc/pmu/ebb/cycles_with_freeze_test.c
index dcd351d20328..9ae795ce314e 100644
--- a/tools/testing/selftests/powerpc/pmu/ebb/cycles_with_freeze_test.c
+++ b/tools/testing/selftests/powerpc/pmu/ebb/cycles_with_freeze_test.c
@@ -99,8 +99,6 @@ int cycles_with_freeze(void)
ebb_global_disable();
ebb_freeze_pmcs();
- count_pmc(1, sample_period);
-
dump_ebb_state();
printf("EBBs while frozen %d\n", ebbs_while_frozen);
diff --git a/tools/testing/selftests/powerpc/pmu/ebb/cycles_with_mmcr2_test.c b/tools/testing/selftests/powerpc/pmu/ebb/cycles_with_mmcr2_test.c
index 94c99c12c0f2..4b45a2e70f62 100644
--- a/tools/testing/selftests/powerpc/pmu/ebb/cycles_with_mmcr2_test.c
+++ b/tools/testing/selftests/powerpc/pmu/ebb/cycles_with_mmcr2_test.c
@@ -71,8 +71,6 @@ int cycles_with_mmcr2(void)
ebb_global_disable();
ebb_freeze_pmcs();
- count_pmc(1, sample_period);
-
dump_ebb_state();
event_close(&event);
diff --git a/tools/testing/selftests/powerpc/pmu/ebb/ebb.c b/tools/testing/selftests/powerpc/pmu/ebb/ebb.c
index dfbc5c3ad52d..21537d6eb6b7 100644
--- a/tools/testing/selftests/powerpc/pmu/ebb/ebb.c
+++ b/tools/testing/selftests/powerpc/pmu/ebb/ebb.c
@@ -396,8 +396,6 @@ int ebb_child(union pipe read_pipe, union pipe write_pipe)
ebb_global_disable();
ebb_freeze_pmcs();
- count_pmc(1, sample_period);
-
dump_ebb_state();
event_close(&event);
diff --git a/tools/testing/selftests/powerpc/pmu/ebb/ebb_on_willing_child_test.c b/tools/testing/selftests/powerpc/pmu/ebb/ebb_on_willing_child_test.c
index ca2f7d729155..b208bf6ad58d 100644
--- a/tools/testing/selftests/powerpc/pmu/ebb/ebb_on_willing_child_test.c
+++ b/tools/testing/selftests/powerpc/pmu/ebb/ebb_on_willing_child_test.c
@@ -38,8 +38,6 @@ static int victim_child(union pipe read_pipe, union pipe write_pipe)
ebb_global_disable();
ebb_freeze_pmcs();
- count_pmc(1, sample_period);
-
dump_ebb_state();
FAIL_IF(ebb_state.stats.ebb_count == 0);
diff --git a/tools/testing/selftests/powerpc/pmu/ebb/lost_exception_test.c b/tools/testing/selftests/powerpc/pmu/ebb/lost_exception_test.c
index ac3e6e182614..ba2681a12cc7 100644
--- a/tools/testing/selftests/powerpc/pmu/ebb/lost_exception_test.c
+++ b/tools/testing/selftests/powerpc/pmu/ebb/lost_exception_test.c
@@ -75,7 +75,6 @@ static int test_body(void)
ebb_freeze_pmcs();
ebb_global_disable();
- count_pmc(4, sample_period);
mtspr(SPRN_PMC4, 0xdead);
dump_summary_ebb_state();
diff --git a/tools/testing/selftests/powerpc/pmu/ebb/multi_counter_test.c b/tools/testing/selftests/powerpc/pmu/ebb/multi_counter_test.c
index b8242e9d97d2..791d37ba327b 100644
--- a/tools/testing/selftests/powerpc/pmu/ebb/multi_counter_test.c
+++ b/tools/testing/selftests/powerpc/pmu/ebb/multi_counter_test.c
@@ -70,13 +70,6 @@ int multi_counter(void)
ebb_global_disable();
ebb_freeze_pmcs();
- count_pmc(1, sample_period);
- count_pmc(2, sample_period);
- count_pmc(3, sample_period);
- count_pmc(4, sample_period);
- count_pmc(5, sample_period);
- count_pmc(6, sample_period);
-
dump_ebb_state();
for (i = 0; i < 6; i++)
diff --git a/tools/testing/selftests/powerpc/pmu/ebb/multi_ebb_procs_test.c b/tools/testing/selftests/powerpc/pmu/ebb/multi_ebb_procs_test.c
index a05c0e18ded6..9b0f70d59702 100644
--- a/tools/testing/selftests/powerpc/pmu/ebb/multi_ebb_procs_test.c
+++ b/tools/testing/selftests/powerpc/pmu/ebb/multi_ebb_procs_test.c
@@ -61,8 +61,6 @@ static int cycles_child(void)
ebb_global_disable();
ebb_freeze_pmcs();
- count_pmc(1, sample_period);
-
dump_summary_ebb_state();
event_close(&event);
diff --git a/tools/testing/selftests/powerpc/pmu/ebb/pmae_handling_test.c b/tools/testing/selftests/powerpc/pmu/ebb/pmae_handling_test.c
index 153ebc92234f..2904c741e04e 100644
--- a/tools/testing/selftests/powerpc/pmu/ebb/pmae_handling_test.c
+++ b/tools/testing/selftests/powerpc/pmu/ebb/pmae_handling_test.c
@@ -82,8 +82,6 @@ static int test_body(void)
ebb_global_disable();
ebb_freeze_pmcs();
- count_pmc(1, sample_period);
-
dump_ebb_state();
if (mmcr0_mismatch)
diff --git a/tools/testing/selftests/powerpc/pmu/ebb/pmc56_overflow_test.c b/tools/testing/selftests/powerpc/pmu/ebb/pmc56_overflow_test.c
index eadad75ed7e6..b29f8ba22d1e 100644
--- a/tools/testing/selftests/powerpc/pmu/ebb/pmc56_overflow_test.c
+++ b/tools/testing/selftests/powerpc/pmu/ebb/pmc56_overflow_test.c
@@ -76,8 +76,6 @@ int pmc56_overflow(void)
ebb_global_disable();
ebb_freeze_pmcs();
- count_pmc(2, sample_period);
-
dump_ebb_state();
printf("PMC5/6 overflow %d\n", pmc56_overflowed);
diff --git a/tools/testing/selftests/powerpc/pmu/lib.h b/tools/testing/selftests/powerpc/pmu/lib.h
index fa12e7d0b4d3..bf1bec013bbb 100644
--- a/tools/testing/selftests/powerpc/pmu/lib.h
+++ b/tools/testing/selftests/powerpc/pmu/lib.h
@@ -6,6 +6,7 @@
#ifndef __SELFTESTS_POWERPC_PMU_LIB_H
#define __SELFTESTS_POWERPC_PMU_LIB_H
+#include <stdbool.h>
#include <stdio.h>
#include <stdint.h>
#include <string.h>
diff --git a/tools/testing/selftests/powerpc/pmu/per_event_excludes.c b/tools/testing/selftests/powerpc/pmu/per_event_excludes.c
index 2756fe2efdc5..2d37942bf72b 100644
--- a/tools/testing/selftests/powerpc/pmu/per_event_excludes.c
+++ b/tools/testing/selftests/powerpc/pmu/per_event_excludes.c
@@ -12,6 +12,8 @@
#include <string.h>
#include <sys/prctl.h>
+#include <asm/cputable.h>
+
#include "event.h"
#include "lib.h"
#include "utils.h"
@@ -23,12 +25,9 @@
static int per_event_excludes(void)
{
struct event *e, events[4];
- char *platform;
int i;
- platform = (char *)get_auxv_entry(AT_BASE_PLATFORM);
- FAIL_IF(!platform);
- SKIP_IF(strcmp(platform, "power8") != 0);
+ SKIP_IF(!have_hwcap2(PPC_FEATURE2_ARCH_2_07));
/*
* We need to create the events disabled, otherwise the running/enabled
diff --git a/tools/testing/selftests/powerpc/ptrace/core-pkey.c b/tools/testing/selftests/powerpc/ptrace/core-pkey.c
index d5c64fee032d..bbc05ffc5860 100644
--- a/tools/testing/selftests/powerpc/ptrace/core-pkey.c
+++ b/tools/testing/selftests/powerpc/ptrace/core-pkey.c
@@ -150,7 +150,7 @@ static int child(struct shared_info *info)
printf("%-30s AMR: %016lx pkey1: %d pkey2: %d pkey3: %d\n",
user_write, info->amr, pkey1, pkey2, pkey3);
- mtspr(SPRN_AMR, info->amr);
+ set_amr(info->amr);
/*
* We won't use pkey3. This tests whether the kernel restores the UAMOR
diff --git a/tools/testing/selftests/powerpc/ptrace/ptrace-pkey.c b/tools/testing/selftests/powerpc/ptrace/ptrace-pkey.c
index bdbbbe8431e0..bc454f899124 100644
--- a/tools/testing/selftests/powerpc/ptrace/ptrace-pkey.c
+++ b/tools/testing/selftests/powerpc/ptrace/ptrace-pkey.c
@@ -44,7 +44,7 @@ struct shared_info {
unsigned long amr2;
/* AMR value that ptrace should refuse to write to the child. */
- unsigned long amr3;
+ unsigned long invalid_amr;
/* IAMR value the parent expects to read from the child. */
unsigned long expected_iamr;
@@ -57,8 +57,8 @@ struct shared_info {
* (even though they're valid ones) because userspace doesn't have
* access to those registers.
*/
- unsigned long new_iamr;
- unsigned long new_uamor;
+ unsigned long invalid_iamr;
+ unsigned long invalid_uamor;
};
static int sys_pkey_alloc(unsigned long flags, unsigned long init_access_rights)
@@ -66,11 +66,6 @@ static int sys_pkey_alloc(unsigned long flags, unsigned long init_access_rights)
return syscall(__NR_pkey_alloc, flags, init_access_rights);
}
-static int sys_pkey_free(int pkey)
-{
- return syscall(__NR_pkey_free, pkey);
-}
-
static int child(struct shared_info *info)
{
unsigned long reg;
@@ -100,33 +95,37 @@ static int child(struct shared_info *info)
info->amr1 |= 3ul << pkeyshift(pkey1);
info->amr2 |= 3ul << pkeyshift(pkey2);
- info->amr3 |= info->amr2 | 3ul << pkeyshift(pkey3);
+ /*
+ * invalid amr value where we try to force write
+ * things which are deined by a uamor setting.
+ */
+ info->invalid_amr = info->amr2 | (~0x0UL & ~info->expected_uamor);
+ /*
+ * if PKEY_DISABLE_EXECUTE succeeded we should update the expected_iamr
+ */
if (disable_execute)
info->expected_iamr |= 1ul << pkeyshift(pkey1);
else
info->expected_iamr &= ~(1ul << pkeyshift(pkey1));
- info->expected_iamr &= ~(1ul << pkeyshift(pkey2) | 1ul << pkeyshift(pkey3));
-
- info->expected_uamor |= 3ul << pkeyshift(pkey1) |
- 3ul << pkeyshift(pkey2);
- info->new_iamr |= 1ul << pkeyshift(pkey1) | 1ul << pkeyshift(pkey2);
- info->new_uamor |= 3ul << pkeyshift(pkey1);
+ /*
+ * We allocated pkey2 and pkey 3 above. Clear the IAMR bits.
+ */
+ info->expected_iamr &= ~(1ul << pkeyshift(pkey2));
+ info->expected_iamr &= ~(1ul << pkeyshift(pkey3));
/*
- * We won't use pkey3. We just want a plausible but invalid key to test
- * whether ptrace will let us write to AMR bits we are not supposed to.
- *
- * This also tests whether the kernel restores the UAMOR permissions
- * after a key is freed.
+ * Create an IAMR value different from expected value.
+ * Kernel will reject an IAMR and UAMOR change.
*/
- sys_pkey_free(pkey3);
+ info->invalid_iamr = info->expected_iamr | (1ul << pkeyshift(pkey1) | 1ul << pkeyshift(pkey2));
+ info->invalid_uamor = info->expected_uamor & ~(0x3ul << pkeyshift(pkey1));
printf("%-30s AMR: %016lx pkey1: %d pkey2: %d pkey3: %d\n",
user_write, info->amr1, pkey1, pkey2, pkey3);
- mtspr(SPRN_AMR, info->amr1);
+ set_amr(info->amr1);
/* Wait for parent to read our AMR value and write a new one. */
ret = prod_parent(&info->child_sync);
@@ -196,9 +195,9 @@ static int parent(struct shared_info *info, pid_t pid)
PARENT_SKIP_IF_UNSUPPORTED(ret, &info->child_sync);
PARENT_FAIL_IF(ret, &info->child_sync);
- info->amr1 = info->amr2 = info->amr3 = regs[0];
- info->expected_iamr = info->new_iamr = regs[1];
- info->expected_uamor = info->new_uamor = regs[2];
+ info->amr1 = info->amr2 = regs[0];
+ info->expected_iamr = regs[1];
+ info->expected_uamor = regs[2];
/* Wake up child so that it can set itself up. */
ret = prod_child(&info->child_sync);
@@ -234,10 +233,10 @@ static int parent(struct shared_info *info, pid_t pid)
return ret;
/* Write invalid AMR value in child. */
- ret = ptrace_write_regs(pid, NT_PPC_PKEY, &info->amr3, 1);
+ ret = ptrace_write_regs(pid, NT_PPC_PKEY, &info->invalid_amr, 1);
PARENT_FAIL_IF(ret, &info->child_sync);
- printf("%-30s AMR: %016lx\n", ptrace_write_running, info->amr3);
+ printf("%-30s AMR: %016lx\n", ptrace_write_running, info->invalid_amr);
/* Wake up child so that it can verify it didn't change. */
ret = prod_child(&info->child_sync);
@@ -249,7 +248,7 @@ static int parent(struct shared_info *info, pid_t pid)
/* Try to write to IAMR. */
regs[0] = info->amr1;
- regs[1] = info->new_iamr;
+ regs[1] = info->invalid_iamr;
ret = ptrace_write_regs(pid, NT_PPC_PKEY, regs, 2);
PARENT_FAIL_IF(!ret, &info->child_sync);
@@ -257,7 +256,7 @@ static int parent(struct shared_info *info, pid_t pid)
ptrace_write_running, regs[0], regs[1]);
/* Try to write to IAMR and UAMOR. */
- regs[2] = info->new_uamor;
+ regs[2] = info->invalid_uamor;
ret = ptrace_write_regs(pid, NT_PPC_PKEY, regs, 3);
PARENT_FAIL_IF(!ret, &info->child_sync);
diff --git a/tools/testing/selftests/powerpc/ptrace/ptrace-tar.c b/tools/testing/selftests/powerpc/ptrace/ptrace-tar.c
index 58cb1a860cc9..4436ca9d3caf 100644
--- a/tools/testing/selftests/powerpc/ptrace/ptrace-tar.c
+++ b/tools/testing/selftests/powerpc/ptrace/ptrace-tar.c
@@ -78,6 +78,9 @@ int ptrace_tar(void)
pid_t pid;
int ret, status;
+ // TAR was added in v2.07
+ SKIP_IF(!have_hwcap2(PPC_FEATURE2_ARCH_2_07));
+
shm_id = shmget(IPC_PRIVATE, sizeof(int) * 3, 0777|IPC_CREAT);
pid = fork();
if (pid < 0) {
diff --git a/tools/testing/selftests/powerpc/ptrace/ptrace-vsx.c b/tools/testing/selftests/powerpc/ptrace/ptrace-vsx.c
index c4fe0e893306..cb9875f764ca 100644
--- a/tools/testing/selftests/powerpc/ptrace/ptrace-vsx.c
+++ b/tools/testing/selftests/powerpc/ptrace/ptrace-vsx.c
@@ -61,6 +61,8 @@ int ptrace_vsx(void)
pid_t pid;
int ret, status, i;
+ SKIP_IF(!have_hwcap(PPC_FEATURE_HAS_VSX));
+
shm_id = shmget(IPC_PRIVATE, sizeof(int) * 2, 0777|IPC_CREAT);
for (i = 0; i < VEC_MAX; i++)
diff --git a/tools/testing/selftests/powerpc/security/spectre_v2.c b/tools/testing/selftests/powerpc/security/spectre_v2.c
index 8c6b982af2a8..c8d82b784102 100644
--- a/tools/testing/selftests/powerpc/security/spectre_v2.c
+++ b/tools/testing/selftests/powerpc/security/spectre_v2.c
@@ -183,6 +183,16 @@ int spectre_v2_test(void)
if (miss_percent > 15) {
printf("Branch misses > 15%% unexpected in this configuration!\n");
printf("Possible mis-match between reported & actual mitigation\n");
+ /*
+ * Such a mismatch may be caused by a guest system
+ * reporting as vulnerable when the host is mitigated.
+ * Return skip code to avoid detecting this as an error.
+ * We are not vulnerable and reporting otherwise, so
+ * missing such a mismatch is safe.
+ */
+ if (state == VULNERABLE)
+ return 4;
+
return 1;
}
break;
diff --git a/tools/testing/selftests/powerpc/stringloops/Makefile b/tools/testing/selftests/powerpc/stringloops/Makefile
index 7fc0623d85c3..9c39f55a58ff 100644
--- a/tools/testing/selftests/powerpc/stringloops/Makefile
+++ b/tools/testing/selftests/powerpc/stringloops/Makefile
@@ -8,7 +8,7 @@ build_32bit = $(shell if ($(CC) $(CFLAGS) -m32 -o /dev/null memcmp.c >/dev/null
TEST_GEN_PROGS := memcmp_64 strlen
-$(OUTPUT)/memcmp_64: memcmp.c
+$(OUTPUT)/memcmp_64: memcmp.c ../utils.c
$(OUTPUT)/memcmp_64: CFLAGS += -m64 -maltivec
ifeq ($(build_32bit),1)
diff --git a/tools/testing/selftests/powerpc/stringloops/memcmp.c b/tools/testing/selftests/powerpc/stringloops/memcmp.c
index b1fa7546957f..979df3d98368 100644
--- a/tools/testing/selftests/powerpc/stringloops/memcmp.c
+++ b/tools/testing/selftests/powerpc/stringloops/memcmp.c
@@ -2,7 +2,9 @@
#include <malloc.h>
#include <stdlib.h>
#include <string.h>
+#include <sys/mman.h>
#include <time.h>
+#include <asm/cputable.h>
#include "utils.h"
#define SIZE 256
@@ -13,6 +15,9 @@
#define LARGE_MAX_OFFSET 32
#define LARGE_SIZE_START 4096
+/* This is big enough to fit LARGE_SIZE and works on 4K & 64K kernels */
+#define MAP_SIZE (64 * 1024)
+
#define MAX_OFFSET_DIFF_S1_S2 48
int vmx_count;
@@ -68,25 +73,25 @@ static void test_one(char *s1, char *s2, unsigned long max_offset,
static int testcase(bool islarge)
{
- char *s1;
- char *s2;
- unsigned long i;
-
- unsigned long comp_size = (islarge ? LARGE_SIZE : SIZE);
- unsigned long alloc_size = comp_size + MAX_OFFSET_DIFF_S1_S2;
- int iterations = islarge ? LARGE_ITERATIONS : ITERATIONS;
-
- s1 = memalign(128, alloc_size);
- if (!s1) {
- perror("memalign");
- exit(1);
- }
+ unsigned long i, comp_size, alloc_size;
+ char *p, *s1, *s2;
+ int iterations;
- s2 = memalign(128, alloc_size);
- if (!s2) {
- perror("memalign");
- exit(1);
- }
+ comp_size = (islarge ? LARGE_SIZE : SIZE);
+ alloc_size = comp_size + MAX_OFFSET_DIFF_S1_S2;
+ iterations = islarge ? LARGE_ITERATIONS : ITERATIONS;
+
+ p = mmap(NULL, 4 * MAP_SIZE, PROT_READ | PROT_WRITE,
+ MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
+ FAIL_IF(p == MAP_FAILED);
+
+ /* Put s1/s2 at the end of a page */
+ s1 = p + MAP_SIZE - alloc_size;
+ s2 = p + 3 * MAP_SIZE - alloc_size;
+
+ /* And unmap the subsequent page to force a fault if we overread */
+ munmap(p + MAP_SIZE, MAP_SIZE);
+ munmap(p + 3 * MAP_SIZE, MAP_SIZE);
srandom(time(0));
@@ -147,6 +152,11 @@ static int testcase(bool islarge)
static int testcases(void)
{
+#ifdef __powerpc64__
+ // vcmpequd used in memcmp_64.S is v2.07
+ SKIP_IF(!have_hwcap2(PPC_FEATURE2_ARCH_2_07));
+#endif
+
testcase(0);
testcase(1);
return 0;
diff --git a/tools/testing/selftests/powerpc/utils.c b/tools/testing/selftests/powerpc/utils.c
index 5ee0e98c4896..18b6a773d5c7 100644
--- a/tools/testing/selftests/powerpc/utils.c
+++ b/tools/testing/selftests/powerpc/utils.c
@@ -16,6 +16,7 @@
#include <string.h>
#include <sys/ioctl.h>
#include <sys/stat.h>
+#include <sys/sysinfo.h>
#include <sys/types.h>
#include <sys/utsname.h>
#include <unistd.h>
@@ -88,28 +89,40 @@ void *get_auxv_entry(int type)
int pick_online_cpu(void)
{
- cpu_set_t mask;
- int cpu;
+ int ncpus, cpu = -1;
+ cpu_set_t *mask;
+ size_t size;
+
+ ncpus = get_nprocs_conf();
+ size = CPU_ALLOC_SIZE(ncpus);
+ mask = CPU_ALLOC(ncpus);
+ if (!mask) {
+ perror("malloc");
+ return -1;
+ }
- CPU_ZERO(&mask);
+ CPU_ZERO_S(size, mask);
- if (sched_getaffinity(0, sizeof(mask), &mask)) {
+ if (sched_getaffinity(0, size, mask)) {
perror("sched_getaffinity");
- return -1;
+ goto done;
}
/* We prefer a primary thread, but skip 0 */
- for (cpu = 8; cpu < CPU_SETSIZE; cpu += 8)
- if (CPU_ISSET(cpu, &mask))
- return cpu;
+ for (cpu = 8; cpu < ncpus; cpu += 8)
+ if (CPU_ISSET_S(cpu, size, mask))
+ goto done;
/* Search for anything, but in reverse */
- for (cpu = CPU_SETSIZE - 1; cpu >= 0; cpu--)
- if (CPU_ISSET(cpu, &mask))
- return cpu;
+ for (cpu = ncpus - 1; cpu >= 0; cpu--)
+ if (CPU_ISSET_S(cpu, size, mask))
+ goto done;
printf("No cpus in affinity mask?!\n");
- return -1;
+
+done:
+ CPU_FREE(mask);
+ return cpu;
}
bool is_ppc64le(void)
@@ -293,3 +306,31 @@ void set_dscr(unsigned long val)
asm volatile("mtspr %1,%0" : : "r" (val), "i" (SPRN_DSCR));
}
+
+int using_hash_mmu(bool *using_hash)
+{
+ char line[128];
+ FILE *f;
+ int rc;
+
+ f = fopen("/proc/cpuinfo", "r");
+ FAIL_IF(!f);
+
+ rc = 0;
+ while (fgets(line, sizeof(line), f) != NULL) {
+ if (strcmp(line, "MMU : Hash\n") == 0) {
+ *using_hash = true;
+ goto out;
+ }
+
+ if (strcmp(line, "MMU : Radix\n") == 0) {
+ *using_hash = false;
+ goto out;
+ }
+ }
+
+ rc = -1;
+out:
+ fclose(f);
+ return rc;
+}
diff --git a/tools/testing/selftests/rseq/param_test.c b/tools/testing/selftests/rseq/param_test.c
index e8a657a5f48a..384589095864 100644
--- a/tools/testing/selftests/rseq/param_test.c
+++ b/tools/testing/selftests/rseq/param_test.c
@@ -1,8 +1,10 @@
// SPDX-License-Identifier: LGPL-2.1
#define _GNU_SOURCE
#include <assert.h>
+#include <linux/membarrier.h>
#include <pthread.h>
#include <sched.h>
+#include <stdatomic.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
@@ -1131,6 +1133,220 @@ static int set_signal_handler(void)
return ret;
}
+struct test_membarrier_thread_args {
+ int stop;
+ intptr_t percpu_list_ptr;
+};
+
+/* Worker threads modify data in their "active" percpu lists. */
+void *test_membarrier_worker_thread(void *arg)
+{
+ struct test_membarrier_thread_args *args =
+ (struct test_membarrier_thread_args *)arg;
+ const int iters = opt_reps;
+ int i;
+
+ if (rseq_register_current_thread()) {
+ fprintf(stderr, "Error: rseq_register_current_thread(...) failed(%d): %s\n",
+ errno, strerror(errno));
+ abort();
+ }
+
+ /* Wait for initialization. */
+ while (!atomic_load(&args->percpu_list_ptr)) {}
+
+ for (i = 0; i < iters; ++i) {
+ int ret;
+
+ do {
+ int cpu = rseq_cpu_start();
+
+ ret = rseq_offset_deref_addv(&args->percpu_list_ptr,
+ sizeof(struct percpu_list_entry) * cpu, 1, cpu);
+ } while (rseq_unlikely(ret));
+ }
+
+ if (rseq_unregister_current_thread()) {
+ fprintf(stderr, "Error: rseq_unregister_current_thread(...) failed(%d): %s\n",
+ errno, strerror(errno));
+ abort();
+ }
+ return NULL;
+}
+
+void test_membarrier_init_percpu_list(struct percpu_list *list)
+{
+ int i;
+
+ memset(list, 0, sizeof(*list));
+ for (i = 0; i < CPU_SETSIZE; i++) {
+ struct percpu_list_node *node;
+
+ node = malloc(sizeof(*node));
+ assert(node);
+ node->data = 0;
+ node->next = NULL;
+ list->c[i].head = node;
+ }
+}
+
+void test_membarrier_free_percpu_list(struct percpu_list *list)
+{
+ int i;
+
+ for (i = 0; i < CPU_SETSIZE; i++)
+ free(list->c[i].head);
+}
+
+static int sys_membarrier(int cmd, int flags, int cpu_id)
+{
+ return syscall(__NR_membarrier, cmd, flags, cpu_id);
+}
+
+/*
+ * The manager thread swaps per-cpu lists that worker threads see,
+ * and validates that there are no unexpected modifications.
+ */
+void *test_membarrier_manager_thread(void *arg)
+{
+ struct test_membarrier_thread_args *args =
+ (struct test_membarrier_thread_args *)arg;
+ struct percpu_list list_a, list_b;
+ intptr_t expect_a = 0, expect_b = 0;
+ int cpu_a = 0, cpu_b = 0;
+
+ if (rseq_register_current_thread()) {
+ fprintf(stderr, "Error: rseq_register_current_thread(...) failed(%d): %s\n",
+ errno, strerror(errno));
+ abort();
+ }
+
+ /* Init lists. */
+ test_membarrier_init_percpu_list(&list_a);
+ test_membarrier_init_percpu_list(&list_b);
+
+ atomic_store(&args->percpu_list_ptr, (intptr_t)&list_a);
+
+ while (!atomic_load(&args->stop)) {
+ /* list_a is "active". */
+ cpu_a = rand() % CPU_SETSIZE;
+ /*
+ * As list_b is "inactive", we should never see changes
+ * to list_b.
+ */
+ if (expect_b != atomic_load(&list_b.c[cpu_b].head->data)) {
+ fprintf(stderr, "Membarrier test failed\n");
+ abort();
+ }
+
+ /* Make list_b "active". */
+ atomic_store(&args->percpu_list_ptr, (intptr_t)&list_b);
+ if (sys_membarrier(MEMBARRIER_CMD_PRIVATE_EXPEDITED_RSEQ,
+ MEMBARRIER_CMD_FLAG_CPU, cpu_a) &&
+ errno != ENXIO /* missing CPU */) {
+ perror("sys_membarrier");
+ abort();
+ }
+ /*
+ * Cpu A should now only modify list_b, so the values
+ * in list_a should be stable.
+ */
+ expect_a = atomic_load(&list_a.c[cpu_a].head->data);
+
+ cpu_b = rand() % CPU_SETSIZE;
+ /*
+ * As list_a is "inactive", we should never see changes
+ * to list_a.
+ */
+ if (expect_a != atomic_load(&list_a.c[cpu_a].head->data)) {
+ fprintf(stderr, "Membarrier test failed\n");
+ abort();
+ }
+
+ /* Make list_a "active". */
+ atomic_store(&args->percpu_list_ptr, (intptr_t)&list_a);
+ if (sys_membarrier(MEMBARRIER_CMD_PRIVATE_EXPEDITED_RSEQ,
+ MEMBARRIER_CMD_FLAG_CPU, cpu_b) &&
+ errno != ENXIO /* missing CPU*/) {
+ perror("sys_membarrier");
+ abort();
+ }
+ /* Remember a value from list_b. */
+ expect_b = atomic_load(&list_b.c[cpu_b].head->data);
+ }
+
+ test_membarrier_free_percpu_list(&list_a);
+ test_membarrier_free_percpu_list(&list_b);
+
+ if (rseq_unregister_current_thread()) {
+ fprintf(stderr, "Error: rseq_unregister_current_thread(...) failed(%d): %s\n",
+ errno, strerror(errno));
+ abort();
+ }
+ return NULL;
+}
+
+/* Test MEMBARRIER_CMD_PRIVATE_RESTART_RSEQ_ON_CPU membarrier command. */
+#ifdef RSEQ_ARCH_HAS_OFFSET_DEREF_ADDV
+void test_membarrier(void)
+{
+ const int num_threads = opt_threads;
+ struct test_membarrier_thread_args thread_args;
+ pthread_t worker_threads[num_threads];
+ pthread_t manager_thread;
+ int i, ret;
+
+ if (sys_membarrier(MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED_RSEQ, 0, 0)) {
+ perror("sys_membarrier");
+ abort();
+ }
+
+ thread_args.stop = 0;
+ thread_args.percpu_list_ptr = 0;
+ ret = pthread_create(&manager_thread, NULL,
+ test_membarrier_manager_thread, &thread_args);
+ if (ret) {
+ errno = ret;
+ perror("pthread_create");
+ abort();
+ }
+
+ for (i = 0; i < num_threads; i++) {
+ ret = pthread_create(&worker_threads[i], NULL,
+ test_membarrier_worker_thread, &thread_args);
+ if (ret) {
+ errno = ret;
+ perror("pthread_create");
+ abort();
+ }
+ }
+
+
+ for (i = 0; i < num_threads; i++) {
+ ret = pthread_join(worker_threads[i], NULL);
+ if (ret) {
+ errno = ret;
+ perror("pthread_join");
+ abort();
+ }
+ }
+
+ atomic_store(&thread_args.stop, 1);
+ ret = pthread_join(manager_thread, NULL);
+ if (ret) {
+ errno = ret;
+ perror("pthread_join");
+ abort();
+ }
+}
+#else /* RSEQ_ARCH_HAS_OFFSET_DEREF_ADDV */
+void test_membarrier(void)
+{
+ fprintf(stderr, "rseq_offset_deref_addv is not implemented on this architecture. "
+ "Skipping membarrier test.\n");
+}
+#endif
+
static void show_usage(int argc, char **argv)
{
printf("Usage : %s <OPTIONS>\n",
@@ -1153,7 +1369,7 @@ static void show_usage(int argc, char **argv)
printf(" [-r N] Number of repetitions per thread (default 5000)\n");
printf(" [-d] Disable rseq system call (no initialization)\n");
printf(" [-D M] Disable rseq for each M threads\n");
- printf(" [-T test] Choose test: (s)pinlock, (l)ist, (b)uffer, (m)emcpy, (i)ncrement\n");
+ printf(" [-T test] Choose test: (s)pinlock, (l)ist, (b)uffer, (m)emcpy, (i)ncrement, membarrie(r)\n");
printf(" [-M] Push into buffer and memcpy buffer with memory barriers.\n");
printf(" [-v] Verbose output.\n");
printf(" [-h] Show this help.\n");
@@ -1268,6 +1484,7 @@ int main(int argc, char **argv)
case 'i':
case 'b':
case 'm':
+ case 'r':
break;
default:
show_usage(argc, argv);
@@ -1320,6 +1537,10 @@ int main(int argc, char **argv)
printf_verbose("counter increment\n");
test_percpu_inc();
break;
+ case 'r':
+ printf_verbose("membarrier\n");
+ test_membarrier();
+ break;
}
if (!opt_disable_rseq && rseq_unregister_current_thread())
abort();
diff --git a/tools/testing/selftests/rseq/rseq-x86.h b/tools/testing/selftests/rseq/rseq-x86.h
index b2da6004fe30..640411518e46 100644
--- a/tools/testing/selftests/rseq/rseq-x86.h
+++ b/tools/testing/selftests/rseq/rseq-x86.h
@@ -279,6 +279,63 @@ error1:
#endif
}
+#define RSEQ_ARCH_HAS_OFFSET_DEREF_ADDV
+
+/*
+ * pval = *(ptr+off)
+ * *pval += inc;
+ */
+static inline __attribute__((always_inline))
+int rseq_offset_deref_addv(intptr_t *ptr, off_t off, intptr_t inc, int cpu)
+{
+ RSEQ_INJECT_C(9)
+
+ __asm__ __volatile__ goto (
+ RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */
+#ifdef RSEQ_COMPARE_TWICE
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error1])
+#endif
+ /* Start rseq by storing table entry pointer into rseq_cs. */
+ RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_CS_OFFSET(%[rseq_abi]))
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), 4f)
+ RSEQ_INJECT_ASM(3)
+#ifdef RSEQ_COMPARE_TWICE
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), %l[error1])
+#endif
+ /* get p+v */
+ "movq %[ptr], %%rbx\n\t"
+ "addq %[off], %%rbx\n\t"
+ /* get pv */
+ "movq (%%rbx), %%rcx\n\t"
+ /* *pv += inc */
+ "addq %[inc], (%%rcx)\n\t"
+ "2:\n\t"
+ RSEQ_INJECT_ASM(4)
+ RSEQ_ASM_DEFINE_ABORT(4, "", abort)
+ : /* gcc asm goto does not allow outputs */
+ : [cpu_id] "r" (cpu),
+ [rseq_abi] "r" (&__rseq_abi),
+ /* final store input */
+ [ptr] "m" (*ptr),
+ [off] "er" (off),
+ [inc] "er" (inc)
+ : "memory", "cc", "rax", "rbx", "rcx"
+ RSEQ_INJECT_CLOBBER
+ : abort
+#ifdef RSEQ_COMPARE_TWICE
+ , error1
+#endif
+ );
+ return 0;
+abort:
+ RSEQ_INJECT_FAILED
+ return -1;
+#ifdef RSEQ_COMPARE_TWICE
+error1:
+ rseq_bug("cpu_id comparison failed");
+#endif
+}
+
static inline __attribute__((always_inline))
int rseq_cmpeqv_trystorev_storev(intptr_t *v, intptr_t expect,
intptr_t *v2, intptr_t newv2,
diff --git a/tools/testing/selftests/rseq/run_param_test.sh b/tools/testing/selftests/rseq/run_param_test.sh
index e426304fd4a0..f51bc83c9e41 100755
--- a/tools/testing/selftests/rseq/run_param_test.sh
+++ b/tools/testing/selftests/rseq/run_param_test.sh
@@ -15,6 +15,7 @@ TEST_LIST=(
"-T m"
"-T m -M"
"-T i"
+ "-T r"
)
TEST_NAME=(
@@ -25,6 +26,7 @@ TEST_NAME=(
"memcpy"
"memcpy with barrier"
"increment"
+ "membarrier"
)
IFS="$OLDIFS"
diff --git a/tools/testing/selftests/seccomp/seccomp_bpf.c b/tools/testing/selftests/seccomp/seccomp_bpf.c
index 535720b2592a..4a180439ee9e 100644
--- a/tools/testing/selftests/seccomp/seccomp_bpf.c
+++ b/tools/testing/selftests/seccomp/seccomp_bpf.c
@@ -133,6 +133,8 @@ struct seccomp_data {
# define __NR_seccomp 348
# elif defined(__xtensa__)
# define __NR_seccomp 337
+# elif defined(__sh__)
+# define __NR_seccomp 372
# else
# warning "seccomp syscall number unknown for this architecture"
# define __NR_seccomp 0xffff
@@ -772,8 +774,15 @@ void *kill_thread(void *data)
return (void *)SIBLING_EXIT_UNKILLED;
}
+enum kill_t {
+ KILL_THREAD,
+ KILL_PROCESS,
+ RET_UNKNOWN
+};
+
/* Prepare a thread that will kill itself or both of us. */
-void kill_thread_or_group(struct __test_metadata *_metadata, bool kill_process)
+void kill_thread_or_group(struct __test_metadata *_metadata,
+ enum kill_t kill_how)
{
pthread_t thread;
void *status;
@@ -789,11 +798,12 @@ void kill_thread_or_group(struct __test_metadata *_metadata, bool kill_process)
.len = (unsigned short)ARRAY_SIZE(filter_thread),
.filter = filter_thread,
};
+ int kill = kill_how == KILL_PROCESS ? SECCOMP_RET_KILL_PROCESS : 0xAAAAAAAAA;
struct sock_filter filter_process[] = {
BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
offsetof(struct seccomp_data, nr)),
BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_prctl, 0, 1),
- BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL_PROCESS),
+ BPF_STMT(BPF_RET|BPF_K, kill),
BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
};
struct sock_fprog prog_process = {
@@ -806,13 +816,15 @@ void kill_thread_or_group(struct __test_metadata *_metadata, bool kill_process)
}
ASSERT_EQ(0, seccomp(SECCOMP_SET_MODE_FILTER, 0,
- kill_process ? &prog_process : &prog_thread));
+ kill_how == KILL_THREAD ? &prog_thread
+ : &prog_process));
/*
* Add the KILL_THREAD rule again to make sure that the KILL_PROCESS
* flag cannot be downgraded by a new filter.
*/
- ASSERT_EQ(0, seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog_thread));
+ if (kill_how == KILL_PROCESS)
+ ASSERT_EQ(0, seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog_thread));
/* Start a thread that will exit immediately. */
ASSERT_EQ(0, pthread_create(&thread, NULL, kill_thread, (void *)false));
@@ -840,7 +852,7 @@ TEST(KILL_thread)
child_pid = fork();
ASSERT_LE(0, child_pid);
if (child_pid == 0) {
- kill_thread_or_group(_metadata, false);
+ kill_thread_or_group(_metadata, KILL_THREAD);
_exit(38);
}
@@ -859,7 +871,7 @@ TEST(KILL_process)
child_pid = fork();
ASSERT_LE(0, child_pid);
if (child_pid == 0) {
- kill_thread_or_group(_metadata, true);
+ kill_thread_or_group(_metadata, KILL_PROCESS);
_exit(38);
}
@@ -870,6 +882,27 @@ TEST(KILL_process)
ASSERT_EQ(SIGSYS, WTERMSIG(status));
}
+TEST(KILL_unknown)
+{
+ int status;
+ pid_t child_pid;
+
+ child_pid = fork();
+ ASSERT_LE(0, child_pid);
+ if (child_pid == 0) {
+ kill_thread_or_group(_metadata, RET_UNKNOWN);
+ _exit(38);
+ }
+
+ ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0));
+
+ /* If the entire process was killed, we'll see SIGSYS. */
+ EXPECT_TRUE(WIFSIGNALED(status)) {
+ TH_LOG("Unknown SECCOMP_RET is only killing the thread?");
+ }
+ ASSERT_EQ(SIGSYS, WTERMSIG(status));
+}
+
/* TODO(wad) add 64-bit versus 32-bit arg tests. */
TEST(arg_out_of_range)
{
@@ -1665,66 +1698,148 @@ TEST_F(TRACE_poke, getpid_runs_normally)
}
#if defined(__x86_64__)
-# define ARCH_REGS struct user_regs_struct
-# define SYSCALL_NUM orig_rax
-# define SYSCALL_RET rax
+# define ARCH_REGS struct user_regs_struct
+# define SYSCALL_NUM(_regs) (_regs).orig_rax
+# define SYSCALL_RET(_regs) (_regs).rax
#elif defined(__i386__)
-# define ARCH_REGS struct user_regs_struct
-# define SYSCALL_NUM orig_eax
-# define SYSCALL_RET eax
+# define ARCH_REGS struct user_regs_struct
+# define SYSCALL_NUM(_regs) (_regs).orig_eax
+# define SYSCALL_RET(_regs) (_regs).eax
#elif defined(__arm__)
-# define ARCH_REGS struct pt_regs
-# define SYSCALL_NUM ARM_r7
-# define SYSCALL_RET ARM_r0
+# define ARCH_REGS struct pt_regs
+# define SYSCALL_NUM(_regs) (_regs).ARM_r7
+# ifndef PTRACE_SET_SYSCALL
+# define PTRACE_SET_SYSCALL 23
+# endif
+# define SYSCALL_NUM_SET(_regs, _nr) \
+ EXPECT_EQ(0, ptrace(PTRACE_SET_SYSCALL, tracee, NULL, _nr))
+# define SYSCALL_RET(_regs) (_regs).ARM_r0
#elif defined(__aarch64__)
-# define ARCH_REGS struct user_pt_regs
-# define SYSCALL_NUM regs[8]
-# define SYSCALL_RET regs[0]
+# define ARCH_REGS struct user_pt_regs
+# define SYSCALL_NUM(_regs) (_regs).regs[8]
+# ifndef NT_ARM_SYSTEM_CALL
+# define NT_ARM_SYSTEM_CALL 0x404
+# endif
+# define SYSCALL_NUM_SET(_regs, _nr) \
+ do { \
+ struct iovec __v; \
+ typeof(_nr) __nr = (_nr); \
+ __v.iov_base = &__nr; \
+ __v.iov_len = sizeof(__nr); \
+ EXPECT_EQ(0, ptrace(PTRACE_SETREGSET, tracee, \
+ NT_ARM_SYSTEM_CALL, &__v)); \
+ } while (0)
+# define SYSCALL_RET(_regs) (_regs).regs[0]
#elif defined(__riscv) && __riscv_xlen == 64
-# define ARCH_REGS struct user_regs_struct
-# define SYSCALL_NUM a7
-# define SYSCALL_RET a0
+# define ARCH_REGS struct user_regs_struct
+# define SYSCALL_NUM(_regs) (_regs).a7
+# define SYSCALL_RET(_regs) (_regs).a0
#elif defined(__csky__)
-# define ARCH_REGS struct pt_regs
-#if defined(__CSKYABIV2__)
-# define SYSCALL_NUM regs[3]
-#else
-# define SYSCALL_NUM regs[9]
-#endif
-# define SYSCALL_RET a0
+# define ARCH_REGS struct pt_regs
+# if defined(__CSKYABIV2__)
+# define SYSCALL_NUM(_regs) (_regs).regs[3]
+# else
+# define SYSCALL_NUM(_regs) (_regs).regs[9]
+# endif
+# define SYSCALL_RET(_regs) (_regs).a0
#elif defined(__hppa__)
-# define ARCH_REGS struct user_regs_struct
-# define SYSCALL_NUM gr[20]
-# define SYSCALL_RET gr[28]
+# define ARCH_REGS struct user_regs_struct
+# define SYSCALL_NUM(_regs) (_regs).gr[20]
+# define SYSCALL_RET(_regs) (_regs).gr[28]
#elif defined(__powerpc__)
-# define ARCH_REGS struct pt_regs
-# define SYSCALL_NUM gpr[0]
-# define SYSCALL_RET gpr[3]
+# define ARCH_REGS struct pt_regs
+# define SYSCALL_NUM(_regs) (_regs).gpr[0]
+# define SYSCALL_RET(_regs) (_regs).gpr[3]
+# define SYSCALL_RET_SET(_regs, _val) \
+ do { \
+ typeof(_val) _result = (_val); \
+ /* \
+ * A syscall error is signaled by CR0 SO bit \
+ * and the code is stored as a positive value. \
+ */ \
+ if (_result < 0) { \
+ SYSCALL_RET(_regs) = -result; \
+ (_regs).ccr |= 0x10000000; \
+ } else { \
+ SYSCALL_RET(_regs) = result; \
+ (_regs).ccr &= ~0x10000000; \
+ } \
+ } while (0)
+# define SYSCALL_RET_SET_ON_PTRACE_EXIT
#elif defined(__s390__)
-# define ARCH_REGS s390_regs
-# define SYSCALL_NUM gprs[2]
-# define SYSCALL_RET gprs[2]
-# define SYSCALL_NUM_RET_SHARE_REG
+# define ARCH_REGS s390_regs
+# define SYSCALL_NUM(_regs) (_regs).gprs[2]
+# define SYSCALL_RET_SET(_regs, _val) \
+ TH_LOG("Can't modify syscall return on this architecture")
#elif defined(__mips__)
-# define ARCH_REGS struct pt_regs
-# define SYSCALL_NUM regs[2]
-# define SYSCALL_SYSCALL_NUM regs[4]
-# define SYSCALL_RET regs[2]
-# define SYSCALL_NUM_RET_SHARE_REG
+# include <asm/unistd_nr_n32.h>
+# include <asm/unistd_nr_n64.h>
+# include <asm/unistd_nr_o32.h>
+# define ARCH_REGS struct pt_regs
+# define SYSCALL_NUM(_regs) \
+ ({ \
+ typeof((_regs).regs[2]) _nr; \
+ if ((_regs).regs[2] == __NR_O32_Linux) \
+ _nr = (_regs).regs[4]; \
+ else \
+ _nr = (_regs).regs[2]; \
+ _nr; \
+ })
+# define SYSCALL_NUM_SET(_regs, _nr) \
+ do { \
+ if ((_regs).regs[2] == __NR_O32_Linux) \
+ (_regs).regs[4] = _nr; \
+ else \
+ (_regs).regs[2] = _nr; \
+ } while (0)
+# define SYSCALL_RET_SET(_regs, _val) \
+ TH_LOG("Can't modify syscall return on this architecture")
#elif defined(__xtensa__)
-# define ARCH_REGS struct user_pt_regs
-# define SYSCALL_NUM syscall
+# define ARCH_REGS struct user_pt_regs
+# define SYSCALL_NUM(_regs) (_regs).syscall
/*
* On xtensa syscall return value is in the register
* a2 of the current window which is not fixed.
*/
-#define SYSCALL_RET(reg) a[(reg).windowbase * 4 + 2]
+#define SYSCALL_RET(_regs) (_regs).a[(_regs).windowbase * 4 + 2]
+#elif defined(__sh__)
+# define ARCH_REGS struct pt_regs
+# define SYSCALL_NUM(_regs) (_regs).gpr[3]
+# define SYSCALL_RET(_regs) (_regs).gpr[0]
#else
# error "Do not know how to find your architecture's registers and syscalls"
#endif
+/*
+ * Most architectures can change the syscall by just updating the
+ * associated register. This is the default if not defined above.
+ */
+#ifndef SYSCALL_NUM_SET
+# define SYSCALL_NUM_SET(_regs, _nr) \
+ do { \
+ SYSCALL_NUM(_regs) = (_nr); \
+ } while (0)
+#endif
+/*
+ * Most architectures can change the syscall return value by just
+ * writing to the SYSCALL_RET register. This is the default if not
+ * defined above. If an architecture cannot set the return value
+ * (for example when the syscall and return value register is
+ * shared), report it with TH_LOG() in an arch-specific definition
+ * of SYSCALL_RET_SET() above, and leave SYSCALL_RET undefined.
+ */
+#if !defined(SYSCALL_RET) && !defined(SYSCALL_RET_SET)
+# error "One of SYSCALL_RET or SYSCALL_RET_SET is needed for this arch"
+#endif
+#ifndef SYSCALL_RET_SET
+# define SYSCALL_RET_SET(_regs, _val) \
+ do { \
+ SYSCALL_RET(_regs) = (_val); \
+ } while (0)
+#endif
+
/* When the syscall return can't be changed, stub out the tests for it. */
-#ifdef SYSCALL_NUM_RET_SHARE_REG
+#ifndef SYSCALL_RET
# define EXPECT_SYSCALL_RETURN(val, action) EXPECT_EQ(-1, action)
#else
# define EXPECT_SYSCALL_RETURN(val, action) \
@@ -1739,116 +1854,92 @@ TEST_F(TRACE_poke, getpid_runs_normally)
} while (0)
#endif
-/* Use PTRACE_GETREGS and PTRACE_SETREGS when available. This is useful for
+/*
+ * Some architectures (e.g. powerpc) can only set syscall
+ * return values on syscall exit during ptrace.
+ */
+const bool ptrace_entry_set_syscall_nr = true;
+const bool ptrace_entry_set_syscall_ret =
+#ifndef SYSCALL_RET_SET_ON_PTRACE_EXIT
+ true;
+#else
+ false;
+#endif
+
+/*
+ * Use PTRACE_GETREGS and PTRACE_SETREGS when available. This is useful for
* architectures without HAVE_ARCH_TRACEHOOK (e.g. User-mode Linux).
*/
#if defined(__x86_64__) || defined(__i386__) || defined(__mips__)
-#define HAVE_GETREGS
+# define ARCH_GETREGS(_regs) ptrace(PTRACE_GETREGS, tracee, 0, &(_regs))
+# define ARCH_SETREGS(_regs) ptrace(PTRACE_SETREGS, tracee, 0, &(_regs))
+#else
+# define ARCH_GETREGS(_regs) ({ \
+ struct iovec __v; \
+ __v.iov_base = &(_regs); \
+ __v.iov_len = sizeof(_regs); \
+ ptrace(PTRACE_GETREGSET, tracee, NT_PRSTATUS, &__v); \
+ })
+# define ARCH_SETREGS(_regs) ({ \
+ struct iovec __v; \
+ __v.iov_base = &(_regs); \
+ __v.iov_len = sizeof(_regs); \
+ ptrace(PTRACE_SETREGSET, tracee, NT_PRSTATUS, &__v); \
+ })
#endif
/* Architecture-specific syscall fetching routine. */
int get_syscall(struct __test_metadata *_metadata, pid_t tracee)
{
ARCH_REGS regs;
-#ifdef HAVE_GETREGS
- EXPECT_EQ(0, ptrace(PTRACE_GETREGS, tracee, 0, &regs)) {
- TH_LOG("PTRACE_GETREGS failed");
- return -1;
- }
-#else
- struct iovec iov;
- iov.iov_base = &regs;
- iov.iov_len = sizeof(regs);
- EXPECT_EQ(0, ptrace(PTRACE_GETREGSET, tracee, NT_PRSTATUS, &iov)) {
- TH_LOG("PTRACE_GETREGSET failed");
+ EXPECT_EQ(0, ARCH_GETREGS(regs)) {
return -1;
}
-#endif
-#if defined(__mips__)
- if (regs.SYSCALL_NUM == __NR_O32_Linux)
- return regs.SYSCALL_SYSCALL_NUM;
-#endif
- return regs.SYSCALL_NUM;
+ return SYSCALL_NUM(regs);
}
/* Architecture-specific syscall changing routine. */
-void change_syscall(struct __test_metadata *_metadata,
- pid_t tracee, int syscall, int result)
+void __change_syscall(struct __test_metadata *_metadata,
+ pid_t tracee, long *syscall, long *ret)
{
- int ret;
- ARCH_REGS regs;
-#ifdef HAVE_GETREGS
- ret = ptrace(PTRACE_GETREGS, tracee, 0, &regs);
-#else
- struct iovec iov;
- iov.iov_base = &regs;
- iov.iov_len = sizeof(regs);
- ret = ptrace(PTRACE_GETREGSET, tracee, NT_PRSTATUS, &iov);
-#endif
- EXPECT_EQ(0, ret) {}
+ ARCH_REGS orig, regs;
-#if defined(__x86_64__) || defined(__i386__) || defined(__powerpc__) || \
- defined(__s390__) || defined(__hppa__) || defined(__riscv) || \
- defined(__xtensa__) || defined(__csky__)
- {
- regs.SYSCALL_NUM = syscall;
- }
-#elif defined(__mips__)
- {
- if (regs.SYSCALL_NUM == __NR_O32_Linux)
- regs.SYSCALL_SYSCALL_NUM = syscall;
- else
- regs.SYSCALL_NUM = syscall;
- }
+ /* Do not get/set registers if we have nothing to do. */
+ if (!syscall && !ret)
+ return;
-#elif defined(__arm__)
-# ifndef PTRACE_SET_SYSCALL
-# define PTRACE_SET_SYSCALL 23
-# endif
- {
- ret = ptrace(PTRACE_SET_SYSCALL, tracee, NULL, syscall);
- EXPECT_EQ(0, ret);
+ EXPECT_EQ(0, ARCH_GETREGS(regs)) {
+ return;
}
+ orig = regs;
-#elif defined(__aarch64__)
-# ifndef NT_ARM_SYSTEM_CALL
-# define NT_ARM_SYSTEM_CALL 0x404
-# endif
- {
- iov.iov_base = &syscall;
- iov.iov_len = sizeof(syscall);
- ret = ptrace(PTRACE_SETREGSET, tracee, NT_ARM_SYSTEM_CALL,
- &iov);
- EXPECT_EQ(0, ret);
- }
+ if (syscall)
+ SYSCALL_NUM_SET(regs, *syscall);
-#else
- ASSERT_EQ(1, 0) {
- TH_LOG("How is the syscall changed on this architecture?");
- }
-#endif
+ if (ret)
+ SYSCALL_RET_SET(regs, *ret);
- /* If syscall is skipped, change return value. */
- if (syscall == -1)
-#ifdef SYSCALL_NUM_RET_SHARE_REG
- TH_LOG("Can't modify syscall return on this architecture");
+ /* Flush any register changes made. */
+ if (memcmp(&orig, &regs, sizeof(orig)) != 0)
+ EXPECT_EQ(0, ARCH_SETREGS(regs));
+}
-#elif defined(__xtensa__)
- regs.SYSCALL_RET(regs) = result;
-#else
- regs.SYSCALL_RET = result;
-#endif
+/* Change only syscall number. */
+void change_syscall_nr(struct __test_metadata *_metadata,
+ pid_t tracee, long syscall)
+{
+ __change_syscall(_metadata, tracee, &syscall, NULL);
+}
-#ifdef HAVE_GETREGS
- ret = ptrace(PTRACE_SETREGS, tracee, 0, &regs);
-#else
- iov.iov_base = &regs;
- iov.iov_len = sizeof(regs);
- ret = ptrace(PTRACE_SETREGSET, tracee, NT_PRSTATUS, &iov);
-#endif
- EXPECT_EQ(0, ret);
+/* Change syscall return value (and set syscall number to -1). */
+void change_syscall_ret(struct __test_metadata *_metadata,
+ pid_t tracee, long ret)
+{
+ long syscall = -1;
+
+ __change_syscall(_metadata, tracee, &syscall, &ret);
}
void tracer_seccomp(struct __test_metadata *_metadata, pid_t tracee,
@@ -1866,17 +1957,17 @@ void tracer_seccomp(struct __test_metadata *_metadata, pid_t tracee,
case 0x1002:
/* change getpid to getppid. */
EXPECT_EQ(__NR_getpid, get_syscall(_metadata, tracee));
- change_syscall(_metadata, tracee, __NR_getppid, 0);
+ change_syscall_nr(_metadata, tracee, __NR_getppid);
break;
case 0x1003:
/* skip gettid with valid return code. */
EXPECT_EQ(__NR_gettid, get_syscall(_metadata, tracee));
- change_syscall(_metadata, tracee, -1, 45000);
+ change_syscall_ret(_metadata, tracee, 45000);
break;
case 0x1004:
/* skip openat with error. */
EXPECT_EQ(__NR_openat, get_syscall(_metadata, tracee));
- change_syscall(_metadata, tracee, -1, -ESRCH);
+ change_syscall_ret(_metadata, tracee, -ESRCH);
break;
case 0x1005:
/* do nothing (allow getppid) */
@@ -1891,12 +1982,21 @@ void tracer_seccomp(struct __test_metadata *_metadata, pid_t tracee,
}
+FIXTURE(TRACE_syscall) {
+ struct sock_fprog prog;
+ pid_t tracer, mytid, mypid, parent;
+ long syscall_nr;
+};
+
void tracer_ptrace(struct __test_metadata *_metadata, pid_t tracee,
int status, void *args)
{
- int ret, nr;
+ int ret;
unsigned long msg;
static bool entry;
+ long syscall_nr_val, syscall_ret_val;
+ long *syscall_nr = NULL, *syscall_ret = NULL;
+ FIXTURE_DATA(TRACE_syscall) *self = args;
/*
* The traditional way to tell PTRACE_SYSCALL entry/exit
@@ -1910,24 +2010,48 @@ void tracer_ptrace(struct __test_metadata *_metadata, pid_t tracee,
EXPECT_EQ(entry ? PTRACE_EVENTMSG_SYSCALL_ENTRY
: PTRACE_EVENTMSG_SYSCALL_EXIT, msg);
- if (!entry)
- return;
+ /*
+ * Some architectures only support setting return values during
+ * syscall exit under ptrace, and on exit the syscall number may
+ * no longer be available. Therefore, save the initial sycall
+ * number here, so it can be examined during both entry and exit
+ * phases.
+ */
+ if (entry)
+ self->syscall_nr = get_syscall(_metadata, tracee);
- nr = get_syscall(_metadata, tracee);
+ /*
+ * Depending on the architecture's syscall setting abilities, we
+ * pick which things to set during this phase (entry or exit).
+ */
+ if (entry == ptrace_entry_set_syscall_nr)
+ syscall_nr = &syscall_nr_val;
+ if (entry == ptrace_entry_set_syscall_ret)
+ syscall_ret = &syscall_ret_val;
+
+ /* Now handle the actual rewriting cases. */
+ switch (self->syscall_nr) {
+ case __NR_getpid:
+ syscall_nr_val = __NR_getppid;
+ /* Never change syscall return for this case. */
+ syscall_ret = NULL;
+ break;
+ case __NR_gettid:
+ syscall_nr_val = -1;
+ syscall_ret_val = 45000;
+ break;
+ case __NR_openat:
+ syscall_nr_val = -1;
+ syscall_ret_val = -ESRCH;
+ break;
+ default:
+ /* Unhandled, do nothing. */
+ return;
+ }
- if (nr == __NR_getpid)
- change_syscall(_metadata, tracee, __NR_getppid, 0);
- if (nr == __NR_gettid)
- change_syscall(_metadata, tracee, -1, 45000);
- if (nr == __NR_openat)
- change_syscall(_metadata, tracee, -1, -ESRCH);
+ __change_syscall(_metadata, tracee, syscall_nr, syscall_ret);
}
-FIXTURE(TRACE_syscall) {
- struct sock_fprog prog;
- pid_t tracer, mytid, mypid, parent;
-};
-
FIXTURE_VARIANT(TRACE_syscall) {
/*
* All of the SECCOMP_RET_TRACE behaviors can be tested with either
@@ -1986,7 +2110,7 @@ FIXTURE_SETUP(TRACE_syscall)
self->tracer = setup_trace_fixture(_metadata,
variant->use_ptrace ? tracer_ptrace
: tracer_seccomp,
- NULL, variant->use_ptrace);
+ self, variant->use_ptrace);
ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
ASSERT_EQ(0, ret);
@@ -3136,11 +3260,11 @@ skip:
static int user_notif_syscall(int nr, unsigned int flags)
{
struct sock_filter filter[] = {
- BPF_STMT(BPF_LD+BPF_W+BPF_ABS,
+ BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
offsetof(struct seccomp_data, nr)),
- BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, nr, 0, 1),
- BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_USER_NOTIF),
- BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_ALLOW),
+ BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, nr, 0, 1),
+ BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_USER_NOTIF),
+ BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
};
struct sock_fprog prog = {
@@ -3693,7 +3817,7 @@ TEST(user_notification_filter_empty)
long ret;
int status;
struct pollfd pollfd;
- struct clone_args args = {
+ struct __clone_args args = {
.flags = CLONE_FILES,
.exit_signal = SIGCHLD,
};
@@ -3709,7 +3833,7 @@ TEST(user_notification_filter_empty)
if (pid == 0) {
int listener;
- listener = user_notif_syscall(__NR_mknod, SECCOMP_FILTER_FLAG_NEW_LISTENER);
+ listener = user_notif_syscall(__NR_mknodat, SECCOMP_FILTER_FLAG_NEW_LISTENER);
if (listener < 0)
_exit(EXIT_FAILURE);
@@ -3747,7 +3871,7 @@ TEST(user_notification_filter_empty_threaded)
long ret;
int status;
struct pollfd pollfd;
- struct clone_args args = {
+ struct __clone_args args = {
.flags = CLONE_FILES,
.exit_signal = SIGCHLD,
};
diff --git a/tools/testing/selftests/splice/.gitignore b/tools/testing/selftests/splice/.gitignore
index d5a2da428752..be8266f5d04c 100644
--- a/tools/testing/selftests/splice/.gitignore
+++ b/tools/testing/selftests/splice/.gitignore
@@ -1,2 +1,3 @@
# SPDX-License-Identifier: GPL-2.0-only
default_file_splice_read
+splice_read
diff --git a/tools/testing/selftests/splice/Makefile b/tools/testing/selftests/splice/Makefile
index e519b159b60d..541cd826d5a5 100644
--- a/tools/testing/selftests/splice/Makefile
+++ b/tools/testing/selftests/splice/Makefile
@@ -1,5 +1,5 @@
# SPDX-License-Identifier: GPL-2.0
-TEST_PROGS := default_file_splice_read.sh
-TEST_GEN_PROGS_EXTENDED := default_file_splice_read
+TEST_PROGS := default_file_splice_read.sh short_splice_read.sh
+TEST_GEN_PROGS_EXTENDED := default_file_splice_read splice_read
include ../lib.mk
diff --git a/tools/testing/selftests/splice/config b/tools/testing/selftests/splice/config
new file mode 100644
index 000000000000..058c928368b8
--- /dev/null
+++ b/tools/testing/selftests/splice/config
@@ -0,0 +1 @@
+CONFIG_TEST_LKM=m
diff --git a/tools/testing/selftests/splice/settings b/tools/testing/selftests/splice/settings
new file mode 100644
index 000000000000..89cedfc0d12b
--- /dev/null
+++ b/tools/testing/selftests/splice/settings
@@ -0,0 +1 @@
+timeout=5
diff --git a/tools/testing/selftests/splice/short_splice_read.sh b/tools/testing/selftests/splice/short_splice_read.sh
new file mode 100755
index 000000000000..7810d3589d9a
--- /dev/null
+++ b/tools/testing/selftests/splice/short_splice_read.sh
@@ -0,0 +1,56 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+set -e
+
+ret=0
+
+do_splice()
+{
+ filename="$1"
+ bytes="$2"
+ expected="$3"
+
+ out=$(./splice_read "$filename" "$bytes" | cat)
+ if [ "$out" = "$expected" ] ; then
+ echo "ok: $filename $bytes"
+ else
+ echo "FAIL: $filename $bytes"
+ ret=1
+ fi
+}
+
+test_splice()
+{
+ filename="$1"
+
+ full=$(cat "$filename")
+ two=$(echo "$full" | grep -m1 . | cut -c-2)
+
+ # Make sure full splice has the same contents as a standard read.
+ do_splice "$filename" 4096 "$full"
+
+ # Make sure a partial splice see the first two characters.
+ do_splice "$filename" 2 "$two"
+}
+
+# proc_single_open(), seq_read()
+test_splice /proc/$$/limits
+# special open, seq_read()
+test_splice /proc/$$/comm
+
+# proc_handler, proc_dointvec_minmax
+test_splice /proc/sys/fs/nr_open
+# proc_handler, proc_dostring
+test_splice /proc/sys/kernel/modprobe
+# proc_handler, special read
+test_splice /proc/sys/kernel/version
+
+if ! [ -d /sys/module/test_module/sections ] ; then
+ modprobe test_module
+fi
+# kernfs, attr
+test_splice /sys/module/test_module/coresize
+# kernfs, binattr
+test_splice /sys/module/test_module/sections/.init.text
+
+exit $ret
diff --git a/tools/testing/selftests/splice/splice_read.c b/tools/testing/selftests/splice/splice_read.c
new file mode 100644
index 000000000000..46dae6a25cfb
--- /dev/null
+++ b/tools/testing/selftests/splice/splice_read.c
@@ -0,0 +1,57 @@
+// SPDX-License-Identifier: GPL-2.0
+#define _GNU_SOURCE
+#include <errno.h>
+#include <fcntl.h>
+#include <limits.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+int main(int argc, char *argv[])
+{
+ int fd;
+ size_t size;
+ ssize_t spliced;
+
+ if (argc < 2) {
+ fprintf(stderr, "Usage: %s INPUT [BYTES]\n", argv[0]);
+ return EXIT_FAILURE;
+ }
+
+ fd = open(argv[1], O_RDONLY);
+ if (fd < 0) {
+ perror(argv[1]);
+ return EXIT_FAILURE;
+ }
+
+ if (argc == 3)
+ size = atol(argv[2]);
+ else {
+ struct stat statbuf;
+
+ if (fstat(fd, &statbuf) < 0) {
+ perror(argv[1]);
+ return EXIT_FAILURE;
+ }
+
+ if (statbuf.st_size > INT_MAX) {
+ fprintf(stderr, "%s: Too big\n", argv[1]);
+ return EXIT_FAILURE;
+ }
+
+ size = statbuf.st_size;
+ }
+
+ /* splice(2) file to stdout. */
+ spliced = splice(fd, NULL, STDOUT_FILENO, NULL,
+ size, SPLICE_F_MOVE);
+ if (spliced < 0) {
+ perror("splice");
+ return EXIT_FAILURE;
+ }
+
+ close(fd);
+ return EXIT_SUCCESS;
+}
diff --git a/tools/testing/selftests/timers/Makefile b/tools/testing/selftests/timers/Makefile
index 7656c7ce79d9..0e73a16874c4 100644
--- a/tools/testing/selftests/timers/Makefile
+++ b/tools/testing/selftests/timers/Makefile
@@ -13,6 +13,7 @@ DESTRUCTIVE_TESTS = alarmtimer-suspend valid-adjtimex adjtick change_skew \
TEST_GEN_PROGS_EXTENDED = $(DESTRUCTIVE_TESTS)
+TEST_FILES := settings
include ../lib.mk
diff --git a/tools/testing/selftests/timers/settings b/tools/testing/selftests/timers/settings
new file mode 100644
index 000000000000..e7b9417537fb
--- /dev/null
+++ b/tools/testing/selftests/timers/settings
@@ -0,0 +1 @@
+timeout=0
diff --git a/tools/testing/selftests/vm/Makefile b/tools/testing/selftests/vm/Makefile
index a9026706d597..30873b19d04b 100644
--- a/tools/testing/selftests/vm/Makefile
+++ b/tools/testing/selftests/vm/Makefile
@@ -3,6 +3,23 @@
uname_M := $(shell uname -m 2>/dev/null || echo not)
MACHINE ?= $(shell echo $(uname_M) | sed -e 's/aarch64.*/arm64/')
+# Without this, failed build products remain, with up-to-date timestamps,
+# thus tricking Make (and you!) into believing that All Is Well, in subsequent
+# make invocations:
+.DELETE_ON_ERROR:
+
+# Avoid accidental wrong builds, due to built-in rules working just a little
+# bit too well--but not quite as well as required for our situation here.
+#
+# In other words, "make userfaultfd" is supposed to fail to build at all,
+# because this Makefile only supports either "make" (all), or "make /full/path".
+# However, the built-in rules, if not suppressed, will pick up CFLAGS and the
+# initial LDLIBS (but not the target-specific LDLIBS, because those are only
+# set for the full path target!). This causes it to get pretty far into building
+# things despite using incorrect values such as an *occasionally* incomplete
+# LDLIBS.
+MAKEFLAGS += --no-builtin-rules
+
CFLAGS = -Wall -I ../../../../usr/include $(EXTRA_CFLAGS)
LDLIBS = -lrt
TEST_GEN_FILES = compaction_test
diff --git a/tools/testing/selftests/vm/compaction_test.c b/tools/testing/selftests/vm/compaction_test.c
index bcec71250873..9b420140ba2b 100644
--- a/tools/testing/selftests/vm/compaction_test.c
+++ b/tools/testing/selftests/vm/compaction_test.c
@@ -18,7 +18,8 @@
#include "../kselftest.h"
-#define MAP_SIZE 1048576
+#define MAP_SIZE_MB 100
+#define MAP_SIZE (MAP_SIZE_MB * 1024 * 1024)
struct map_list {
void *map;
@@ -165,7 +166,7 @@ int main(int argc, char **argv)
void *map = NULL;
unsigned long mem_free = 0;
unsigned long hugepage_size = 0;
- unsigned long mem_fragmentable = 0;
+ long mem_fragmentable_MB = 0;
if (prereq() != 0) {
printf("Either the sysctl compact_unevictable_allowed is not\n"
@@ -190,9 +191,9 @@ int main(int argc, char **argv)
return -1;
}
- mem_fragmentable = mem_free * 0.8 / 1024;
+ mem_fragmentable_MB = mem_free * 0.8 / 1024;
- while (mem_fragmentable > 0) {
+ while (mem_fragmentable_MB > 0) {
map = mmap(NULL, MAP_SIZE, PROT_READ | PROT_WRITE,
MAP_ANONYMOUS | MAP_PRIVATE | MAP_LOCKED, -1, 0);
if (map == MAP_FAILED)
@@ -213,7 +214,7 @@ int main(int argc, char **argv)
for (i = 0; i < MAP_SIZE; i += page_size)
*(unsigned long *)(map + i) = (unsigned long)map + i;
- mem_fragmentable--;
+ mem_fragmentable_MB -= MAP_SIZE_MB;
}
for (entry = list; entry != NULL; entry = entry->next) {
diff --git a/tools/testing/selftests/vm/gup_benchmark.c b/tools/testing/selftests/vm/gup_benchmark.c
index 43b4dfe161a2..1d4359341e44 100644
--- a/tools/testing/selftests/vm/gup_benchmark.c
+++ b/tools/testing/selftests/vm/gup_benchmark.c
@@ -15,12 +15,12 @@
#define PAGE_SIZE sysconf(_SC_PAGESIZE)
#define GUP_FAST_BENCHMARK _IOWR('g', 1, struct gup_benchmark)
-#define GUP_LONGTERM_BENCHMARK _IOWR('g', 2, struct gup_benchmark)
-#define GUP_BENCHMARK _IOWR('g', 3, struct gup_benchmark)
+#define GUP_BENCHMARK _IOWR('g', 2, struct gup_benchmark)
/* Similar to above, but use FOLL_PIN instead of FOLL_GET. */
-#define PIN_FAST_BENCHMARK _IOWR('g', 4, struct gup_benchmark)
-#define PIN_BENCHMARK _IOWR('g', 5, struct gup_benchmark)
+#define PIN_FAST_BENCHMARK _IOWR('g', 3, struct gup_benchmark)
+#define PIN_BENCHMARK _IOWR('g', 4, struct gup_benchmark)
+#define PIN_LONGTERM_BENCHMARK _IOWR('g', 5, struct gup_benchmark)
/* Just the flags we need, copied from mm.h: */
#define FOLL_WRITE 0x01 /* check pte is writable */
@@ -52,6 +52,9 @@ int main(int argc, char **argv)
case 'b':
cmd = PIN_BENCHMARK;
break;
+ case 'L':
+ cmd = PIN_LONGTERM_BENCHMARK;
+ break;
case 'm':
size = atoi(optarg) * MB;
break;
@@ -67,9 +70,6 @@ int main(int argc, char **argv)
case 'T':
thp = 0;
break;
- case 'L':
- cmd = GUP_LONGTERM_BENCHMARK;
- break;
case 'U':
cmd = GUP_BENCHMARK;
break;
@@ -105,12 +105,16 @@ int main(int argc, char **argv)
gup.flags |= FOLL_WRITE;
fd = open("/sys/kernel/debug/gup_benchmark", O_RDWR);
- if (fd == -1)
- perror("open"), exit(1);
+ if (fd == -1) {
+ perror("open");
+ exit(1);
+ }
p = mmap(NULL, size, PROT_READ | PROT_WRITE, flags, filed, 0);
- if (p == MAP_FAILED)
- perror("mmap"), exit(1);
+ if (p == MAP_FAILED) {
+ perror("mmap");
+ exit(1);
+ }
gup.addr = (unsigned long)p;
if (thp == 1)
@@ -123,8 +127,10 @@ int main(int argc, char **argv)
for (i = 0; i < repeats; i++) {
gup.size = size;
- if (ioctl(fd, cmd, &gup))
- perror("ioctl"), exit(1);
+ if (ioctl(fd, cmd, &gup)) {
+ perror("ioctl");
+ exit(1);
+ }
printf("Time: get:%lld put:%lld us", gup.get_delta_usec,
gup.put_delta_usec);
diff --git a/tools/testing/selftests/vm/hmm-tests.c b/tools/testing/selftests/vm/hmm-tests.c
index 91d38a29956b..0a28a6a29581 100644
--- a/tools/testing/selftests/vm/hmm-tests.c
+++ b/tools/testing/selftests/vm/hmm-tests.c
@@ -680,7 +680,7 @@ TEST_F(hmm, anon_write_hugetlbfs)
n = gethugepagesizes(pagesizes, 4);
if (n <= 0)
- return;
+ SKIP(return, "Huge page size could not be determined");
for (idx = 0; --n > 0; ) {
if (pagesizes[n] < pagesizes[idx])
idx = n;
@@ -694,7 +694,7 @@ TEST_F(hmm, anon_write_hugetlbfs)
buffer->ptr = get_hugepage_region(size, GHR_STRICT);
if (buffer->ptr == NULL) {
free(buffer);
- return;
+ SKIP(return, "Huge page could not be allocated");
}
buffer->fd = -1;
@@ -942,6 +942,41 @@ TEST_F(hmm, migrate_fault)
}
/*
+ * Migrate anonymous shared memory to device private memory.
+ */
+TEST_F(hmm, migrate_shared)
+{
+ struct hmm_buffer *buffer;
+ unsigned long npages;
+ unsigned long size;
+ int ret;
+
+ npages = ALIGN(HMM_BUFFER_SIZE, self->page_size) >> self->page_shift;
+ ASSERT_NE(npages, 0);
+ size = npages << self->page_shift;
+
+ buffer = malloc(sizeof(*buffer));
+ ASSERT_NE(buffer, NULL);
+
+ buffer->fd = -1;
+ buffer->size = size;
+ buffer->mirror = malloc(size);
+ ASSERT_NE(buffer->mirror, NULL);
+
+ buffer->ptr = mmap(NULL, size,
+ PROT_READ | PROT_WRITE,
+ MAP_SHARED | MAP_ANONYMOUS,
+ buffer->fd, 0);
+ ASSERT_NE(buffer->ptr, MAP_FAILED);
+
+ /* Migrate memory to device. */
+ ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_MIGRATE, buffer, npages);
+ ASSERT_EQ(ret, -ENOENT);
+
+ hmm_buffer_free(buffer);
+}
+
+/*
* Try to migrate various memory types to device private memory.
*/
TEST_F(hmm2, migrate_mixed)
diff --git a/tools/testing/selftests/vm/map_hugetlb.c b/tools/testing/selftests/vm/map_hugetlb.c
index 6af951900aa3..312889edb84a 100644
--- a/tools/testing/selftests/vm/map_hugetlb.c
+++ b/tools/testing/selftests/vm/map_hugetlb.c
@@ -83,7 +83,7 @@ int main(int argc, char **argv)
}
if (shift)
- printf("%u kB hugepages\n", 1 << shift);
+ printf("%u kB hugepages\n", 1 << (shift - 10));
else
printf("Default size hugepages\n");
printf("Mapping %lu Mbytes\n", (unsigned long)length >> 20);
diff --git a/tools/testing/selftests/vm/userfaultfd.c b/tools/testing/selftests/vm/userfaultfd.c
index 61e5cfeb1350..9b0912a01777 100644
--- a/tools/testing/selftests/vm/userfaultfd.c
+++ b/tools/testing/selftests/vm/userfaultfd.c
@@ -227,8 +227,10 @@ static void hugetlb_allocate_area(void **alloc_area)
huge_fd, *alloc_area == area_src ? 0 :
nr_pages * page_size);
if (area_alias == MAP_FAILED) {
- if (munmap(*alloc_area, nr_pages * page_size) < 0)
- perror("hugetlb munmap"), exit(1);
+ if (munmap(*alloc_area, nr_pages * page_size) < 0) {
+ perror("hugetlb munmap");
+ exit(1);
+ }
*alloc_area = NULL;
return;
}
@@ -337,9 +339,10 @@ static void wp_range(int ufd, __u64 start, __u64 len, bool wp)
/* Undo write-protect, do wakeup after that */
prms.mode = wp ? UFFDIO_WRITEPROTECT_MODE_WP : 0;
- if (ioctl(ufd, UFFDIO_WRITEPROTECT, &prms))
- fprintf(stderr, "clear WP failed for address 0x%Lx\n",
- start), exit(1);
+ if (ioctl(ufd, UFFDIO_WRITEPROTECT, &prms)) {
+ fprintf(stderr, "clear WP failed for address 0x%Lx\n", start);
+ exit(1);
+ }
}
static void *locking_thread(void *arg)
@@ -359,8 +362,10 @@ static void *locking_thread(void *arg)
seed += cpu;
bzero(&rand, sizeof(rand));
bzero(&randstate, sizeof(randstate));
- if (initstate_r(seed, randstate, sizeof(randstate), &rand))
- fprintf(stderr, "srandom_r error\n"), exit(1);
+ if (initstate_r(seed, randstate, sizeof(randstate), &rand)) {
+ fprintf(stderr, "srandom_r error\n");
+ exit(1);
+ }
} else {
page_nr = -bounces;
if (!(bounces & BOUNCE_RACINGFAULTS))
@@ -369,12 +374,16 @@ static void *locking_thread(void *arg)
while (!finished) {
if (bounces & BOUNCE_RANDOM) {
- if (random_r(&rand, &rand_nr))
- fprintf(stderr, "random_r 1 error\n"), exit(1);
+ if (random_r(&rand, &rand_nr)) {
+ fprintf(stderr, "random_r 1 error\n");
+ exit(1);
+ }
page_nr = rand_nr;
if (sizeof(page_nr) > sizeof(rand_nr)) {
- if (random_r(&rand, &rand_nr))
- fprintf(stderr, "random_r 2 error\n"), exit(1);
+ if (random_r(&rand, &rand_nr)) {
+ fprintf(stderr, "random_r 2 error\n");
+ exit(1);
+ }
page_nr |= (((unsigned long) rand_nr) << 16) <<
16;
}
@@ -385,11 +394,13 @@ static void *locking_thread(void *arg)
start = time(NULL);
if (bounces & BOUNCE_VERIFY) {
count = *area_count(area_dst, page_nr);
- if (!count)
+ if (!count) {
fprintf(stderr,
"page_nr %lu wrong count %Lu %Lu\n",
page_nr, count,
- count_verify[page_nr]), exit(1);
+ count_verify[page_nr]);
+ exit(1);
+ }
/*
@@ -401,11 +412,12 @@ static void *locking_thread(void *arg)
*/
#if 1
if (!my_bcmp(area_dst + page_nr * page_size, zeropage,
- page_size))
+ page_size)) {
fprintf(stderr,
"my_bcmp page_nr %lu wrong count %Lu %Lu\n",
- page_nr, count,
- count_verify[page_nr]), exit(1);
+ page_nr, count, count_verify[page_nr]);
+ exit(1);
+ }
#else
unsigned long loops;
@@ -437,7 +449,7 @@ static void *locking_thread(void *arg)
fprintf(stderr,
"page_nr %lu memory corruption %Lu %Lu\n",
page_nr, count,
- count_verify[page_nr]), exit(1);
+ count_verify[page_nr]); exit(1);
}
count++;
*area_count(area_dst, page_nr) = count_verify[page_nr] = count;
@@ -461,12 +473,14 @@ static void retry_copy_page(int ufd, struct uffdio_copy *uffdio_copy,
offset);
if (ioctl(ufd, UFFDIO_COPY, uffdio_copy)) {
/* real retval in ufdio_copy.copy */
- if (uffdio_copy->copy != -EEXIST)
+ if (uffdio_copy->copy != -EEXIST) {
fprintf(stderr, "UFFDIO_COPY retry error %Ld\n",
- uffdio_copy->copy), exit(1);
+ uffdio_copy->copy);
+ exit(1);
+ }
} else {
fprintf(stderr, "UFFDIO_COPY retry unexpected %Ld\n",
- uffdio_copy->copy), exit(1);
+ uffdio_copy->copy); exit(1);
}
}
@@ -474,9 +488,10 @@ static int __copy_page(int ufd, unsigned long offset, bool retry)
{
struct uffdio_copy uffdio_copy;
- if (offset >= nr_pages * page_size)
- fprintf(stderr, "unexpected offset %lu\n",
- offset), exit(1);
+ if (offset >= nr_pages * page_size) {
+ fprintf(stderr, "unexpected offset %lu\n", offset);
+ exit(1);
+ }
uffdio_copy.dst = (unsigned long) area_dst + offset;
uffdio_copy.src = (unsigned long) area_src + offset;
uffdio_copy.len = page_size;
@@ -487,12 +502,14 @@ static int __copy_page(int ufd, unsigned long offset, bool retry)
uffdio_copy.copy = 0;
if (ioctl(ufd, UFFDIO_COPY, &uffdio_copy)) {
/* real retval in ufdio_copy.copy */
- if (uffdio_copy.copy != -EEXIST)
+ if (uffdio_copy.copy != -EEXIST) {
fprintf(stderr, "UFFDIO_COPY error %Ld\n",
- uffdio_copy.copy), exit(1);
+ uffdio_copy.copy);
+ exit(1);
+ }
} else if (uffdio_copy.copy != page_size) {
fprintf(stderr, "UFFDIO_COPY unexpected copy %Ld\n",
- uffdio_copy.copy), exit(1);
+ uffdio_copy.copy); exit(1);
} else {
if (test_uffdio_copy_eexist && retry) {
test_uffdio_copy_eexist = false;
@@ -521,11 +538,11 @@ static int uffd_read_msg(int ufd, struct uffd_msg *msg)
if (ret < 0) {
if (errno == EAGAIN)
return 1;
- else
- perror("blocking read error"), exit(1);
+ perror("blocking read error");
} else {
- fprintf(stderr, "short read\n"), exit(1);
+ fprintf(stderr, "short read\n");
}
+ exit(1);
}
return 0;
@@ -536,9 +553,10 @@ static void uffd_handle_page_fault(struct uffd_msg *msg,
{
unsigned long offset;
- if (msg->event != UFFD_EVENT_PAGEFAULT)
- fprintf(stderr, "unexpected msg event %u\n",
- msg->event), exit(1);
+ if (msg->event != UFFD_EVENT_PAGEFAULT) {
+ fprintf(stderr, "unexpected msg event %u\n", msg->event);
+ exit(1);
+ }
if (msg->arg.pagefault.flags & UFFD_PAGEFAULT_FLAG_WP) {
wp_range(uffd, msg->arg.pagefault.address, page_size, false);
@@ -546,8 +564,10 @@ static void uffd_handle_page_fault(struct uffd_msg *msg,
} else {
/* Missing page faults */
if (bounces & BOUNCE_VERIFY &&
- msg->arg.pagefault.flags & UFFD_PAGEFAULT_FLAG_WRITE)
- fprintf(stderr, "unexpected write fault\n"), exit(1);
+ msg->arg.pagefault.flags & UFFD_PAGEFAULT_FLAG_WRITE) {
+ fprintf(stderr, "unexpected write fault\n");
+ exit(1);
+ }
offset = (char *)(unsigned long)msg->arg.pagefault.address - area_dst;
offset &= ~(page_size-1);
@@ -574,25 +594,32 @@ static void *uffd_poll_thread(void *arg)
for (;;) {
ret = poll(pollfd, 2, -1);
- if (!ret)
- fprintf(stderr, "poll error %d\n", ret), exit(1);
- if (ret < 0)
- perror("poll"), exit(1);
+ if (!ret) {
+ fprintf(stderr, "poll error %d\n", ret);
+ exit(1);
+ }
+ if (ret < 0) {
+ perror("poll");
+ exit(1);
+ }
if (pollfd[1].revents & POLLIN) {
- if (read(pollfd[1].fd, &tmp_chr, 1) != 1)
- fprintf(stderr, "read pipefd error\n"),
- exit(1);
+ if (read(pollfd[1].fd, &tmp_chr, 1) != 1) {
+ fprintf(stderr, "read pipefd error\n");
+ exit(1);
+ }
break;
}
- if (!(pollfd[0].revents & POLLIN))
+ if (!(pollfd[0].revents & POLLIN)) {
fprintf(stderr, "pollfd[0].revents %d\n",
- pollfd[0].revents), exit(1);
+ pollfd[0].revents);
+ exit(1);
+ }
if (uffd_read_msg(uffd, &msg))
continue;
switch (msg.event) {
default:
fprintf(stderr, "unexpected msg event %u\n",
- msg.event), exit(1);
+ msg.event); exit(1);
break;
case UFFD_EVENT_PAGEFAULT:
uffd_handle_page_fault(&msg, stats);
@@ -606,8 +633,10 @@ static void *uffd_poll_thread(void *arg)
uffd_reg.range.start = msg.arg.remove.start;
uffd_reg.range.len = msg.arg.remove.end -
msg.arg.remove.start;
- if (ioctl(uffd, UFFDIO_UNREGISTER, &uffd_reg.range))
- fprintf(stderr, "remove failure\n"), exit(1);
+ if (ioctl(uffd, UFFDIO_UNREGISTER, &uffd_reg.range)) {
+ fprintf(stderr, "remove failure\n");
+ exit(1);
+ }
break;
case UFFD_EVENT_REMAP:
area_dst = (char *)(unsigned long)msg.arg.remap.to;
@@ -879,8 +908,10 @@ static int faulting_process(int signal_test)
area_dst = mremap(area_dst, nr_pages * page_size, nr_pages * page_size,
MREMAP_MAYMOVE | MREMAP_FIXED, area_src);
- if (area_dst == MAP_FAILED)
- perror("mremap"), exit(1);
+ if (area_dst == MAP_FAILED) {
+ perror("mremap");
+ exit(1);
+ }
for (; nr < nr_pages; nr++) {
count = *area_count(area_dst, nr);
@@ -888,7 +919,7 @@ static int faulting_process(int signal_test)
fprintf(stderr,
"nr %lu memory corruption %Lu %Lu\n",
nr, count,
- count_verify[nr]), exit(1);
+ count_verify[nr]); exit(1);
}
/*
* Trigger write protection if there is by writting
@@ -901,8 +932,10 @@ static int faulting_process(int signal_test)
return 1;
for (nr = 0; nr < nr_pages; nr++) {
- if (my_bcmp(area_dst + nr * page_size, zeropage, page_size))
- fprintf(stderr, "nr %lu is not zero\n", nr), exit(1);
+ if (my_bcmp(area_dst + nr * page_size, zeropage, page_size)) {
+ fprintf(stderr, "nr %lu is not zero\n", nr);
+ exit(1);
+ }
}
return 0;
@@ -916,12 +949,14 @@ static void retry_uffdio_zeropage(int ufd,
uffdio_zeropage->range.len,
offset);
if (ioctl(ufd, UFFDIO_ZEROPAGE, uffdio_zeropage)) {
- if (uffdio_zeropage->zeropage != -EEXIST)
+ if (uffdio_zeropage->zeropage != -EEXIST) {
fprintf(stderr, "UFFDIO_ZEROPAGE retry error %Ld\n",
- uffdio_zeropage->zeropage), exit(1);
+ uffdio_zeropage->zeropage);
+ exit(1);
+ }
} else {
fprintf(stderr, "UFFDIO_ZEROPAGE retry unexpected %Ld\n",
- uffdio_zeropage->zeropage), exit(1);
+ uffdio_zeropage->zeropage); exit(1);
}
}
@@ -933,9 +968,10 @@ static int __uffdio_zeropage(int ufd, unsigned long offset, bool retry)
has_zeropage = uffd_test_ops->expected_ioctls & (1 << _UFFDIO_ZEROPAGE);
- if (offset >= nr_pages * page_size)
- fprintf(stderr, "unexpected offset %lu\n",
- offset), exit(1);
+ if (offset >= nr_pages * page_size) {
+ fprintf(stderr, "unexpected offset %lu\n", offset);
+ exit(1);
+ }
uffdio_zeropage.range.start = (unsigned long) area_dst + offset;
uffdio_zeropage.range.len = page_size;
uffdio_zeropage.mode = 0;
@@ -943,22 +979,26 @@ static int __uffdio_zeropage(int ufd, unsigned long offset, bool retry)
if (ret) {
/* real retval in ufdio_zeropage.zeropage */
if (has_zeropage) {
- if (uffdio_zeropage.zeropage == -EEXIST)
- fprintf(stderr, "UFFDIO_ZEROPAGE -EEXIST\n"),
- exit(1);
- else
+ if (uffdio_zeropage.zeropage == -EEXIST) {
+ fprintf(stderr, "UFFDIO_ZEROPAGE -EEXIST\n");
+ exit(1);
+ } else {
fprintf(stderr, "UFFDIO_ZEROPAGE error %Ld\n",
- uffdio_zeropage.zeropage), exit(1);
+ uffdio_zeropage.zeropage);
+ exit(1);
+ }
} else {
- if (uffdio_zeropage.zeropage != -EINVAL)
+ if (uffdio_zeropage.zeropage != -EINVAL) {
fprintf(stderr,
"UFFDIO_ZEROPAGE not -EINVAL %Ld\n",
- uffdio_zeropage.zeropage), exit(1);
+ uffdio_zeropage.zeropage);
+ exit(1);
+ }
}
} else if (has_zeropage) {
if (uffdio_zeropage.zeropage != page_size) {
fprintf(stderr, "UFFDIO_ZEROPAGE unexpected %Ld\n",
- uffdio_zeropage.zeropage), exit(1);
+ uffdio_zeropage.zeropage); exit(1);
} else {
if (test_uffdio_zeropage_eexist && retry) {
test_uffdio_zeropage_eexist = false;
@@ -970,7 +1010,7 @@ static int __uffdio_zeropage(int ufd, unsigned long offset, bool retry)
} else {
fprintf(stderr,
"UFFDIO_ZEROPAGE succeeded %Ld\n",
- uffdio_zeropage.zeropage), exit(1);
+ uffdio_zeropage.zeropage); exit(1);
}
return 0;
@@ -1000,19 +1040,24 @@ static int userfaultfd_zeropage_test(void)
uffdio_register.mode = UFFDIO_REGISTER_MODE_MISSING;
if (test_uffdio_wp)
uffdio_register.mode |= UFFDIO_REGISTER_MODE_WP;
- if (ioctl(uffd, UFFDIO_REGISTER, &uffdio_register))
- fprintf(stderr, "register failure\n"), exit(1);
+ if (ioctl(uffd, UFFDIO_REGISTER, &uffdio_register)) {
+ fprintf(stderr, "register failure\n");
+ exit(1);
+ }
expected_ioctls = uffd_test_ops->expected_ioctls;
if ((uffdio_register.ioctls & expected_ioctls) !=
- expected_ioctls)
+ expected_ioctls) {
fprintf(stderr,
- "unexpected missing ioctl for anon memory\n"),
- exit(1);
+ "unexpected missing ioctl for anon memory\n");
+ exit(1);
+ }
if (uffdio_zeropage(uffd, 0)) {
- if (my_bcmp(area_dst, zeropage, page_size))
- fprintf(stderr, "zeropage is not zero\n"), exit(1);
+ if (my_bcmp(area_dst, zeropage, page_size)) {
+ fprintf(stderr, "zeropage is not zero\n");
+ exit(1);
+ }
}
close(uffd);
@@ -1047,32 +1092,41 @@ static int userfaultfd_events_test(void)
uffdio_register.mode = UFFDIO_REGISTER_MODE_MISSING;
if (test_uffdio_wp)
uffdio_register.mode |= UFFDIO_REGISTER_MODE_WP;
- if (ioctl(uffd, UFFDIO_REGISTER, &uffdio_register))
- fprintf(stderr, "register failure\n"), exit(1);
+ if (ioctl(uffd, UFFDIO_REGISTER, &uffdio_register)) {
+ fprintf(stderr, "register failure\n");
+ exit(1);
+ }
expected_ioctls = uffd_test_ops->expected_ioctls;
- if ((uffdio_register.ioctls & expected_ioctls) !=
- expected_ioctls)
- fprintf(stderr,
- "unexpected missing ioctl for anon memory\n"),
- exit(1);
+ if ((uffdio_register.ioctls & expected_ioctls) != expected_ioctls) {
+ fprintf(stderr, "unexpected missing ioctl for anon memory\n");
+ exit(1);
+ }
- if (pthread_create(&uffd_mon, &attr, uffd_poll_thread, &stats))
- perror("uffd_poll_thread create"), exit(1);
+ if (pthread_create(&uffd_mon, &attr, uffd_poll_thread, &stats)) {
+ perror("uffd_poll_thread create");
+ exit(1);
+ }
pid = fork();
- if (pid < 0)
- perror("fork"), exit(1);
+ if (pid < 0) {
+ perror("fork");
+ exit(1);
+ }
if (!pid)
return faulting_process(0);
waitpid(pid, &err, 0);
- if (err)
- fprintf(stderr, "faulting process failed\n"), exit(1);
+ if (err) {
+ fprintf(stderr, "faulting process failed\n");
+ exit(1);
+ }
- if (write(pipefd[1], &c, sizeof(c)) != sizeof(c))
- perror("pipe write"), exit(1);
+ if (write(pipefd[1], &c, sizeof(c)) != sizeof(c)) {
+ perror("pipe write");
+ exit(1);
+ }
if (pthread_join(uffd_mon, NULL))
return 1;
@@ -1110,38 +1164,49 @@ static int userfaultfd_sig_test(void)
uffdio_register.mode = UFFDIO_REGISTER_MODE_MISSING;
if (test_uffdio_wp)
uffdio_register.mode |= UFFDIO_REGISTER_MODE_WP;
- if (ioctl(uffd, UFFDIO_REGISTER, &uffdio_register))
- fprintf(stderr, "register failure\n"), exit(1);
+ if (ioctl(uffd, UFFDIO_REGISTER, &uffdio_register)) {
+ fprintf(stderr, "register failure\n");
+ exit(1);
+ }
expected_ioctls = uffd_test_ops->expected_ioctls;
- if ((uffdio_register.ioctls & expected_ioctls) !=
- expected_ioctls)
- fprintf(stderr,
- "unexpected missing ioctl for anon memory\n"),
- exit(1);
+ if ((uffdio_register.ioctls & expected_ioctls) != expected_ioctls) {
+ fprintf(stderr, "unexpected missing ioctl for anon memory\n");
+ exit(1);
+ }
- if (faulting_process(1))
- fprintf(stderr, "faulting process failed\n"), exit(1);
+ if (faulting_process(1)) {
+ fprintf(stderr, "faulting process failed\n");
+ exit(1);
+ }
if (uffd_test_ops->release_pages(area_dst))
return 1;
- if (pthread_create(&uffd_mon, &attr, uffd_poll_thread, &stats))
- perror("uffd_poll_thread create"), exit(1);
+ if (pthread_create(&uffd_mon, &attr, uffd_poll_thread, &stats)) {
+ perror("uffd_poll_thread create");
+ exit(1);
+ }
pid = fork();
- if (pid < 0)
- perror("fork"), exit(1);
+ if (pid < 0) {
+ perror("fork");
+ exit(1);
+ }
if (!pid)
exit(faulting_process(2));
waitpid(pid, &err, 0);
- if (err)
- fprintf(stderr, "faulting process failed\n"), exit(1);
+ if (err) {
+ fprintf(stderr, "faulting process failed\n");
+ exit(1);
+ }
- if (write(pipefd[1], &c, sizeof(c)) != sizeof(c))
- perror("pipe write"), exit(1);
+ if (write(pipefd[1], &c, sizeof(c)) != sizeof(c)) {
+ perror("pipe write");
+ exit(1);
+ }
if (pthread_join(uffd_mon, (void **)&userfaults))
return 1;
@@ -1395,7 +1460,7 @@ static void set_test_type(const char *type)
test_type = TEST_SHMEM;
uffd_test_ops = &shmem_uffd_test_ops;
} else {
- fprintf(stderr, "Unknown test type: %s\n", type), exit(1);
+ fprintf(stderr, "Unknown test type: %s\n", type); exit(1);
}
if (test_type == TEST_HUGETLB)
@@ -1403,12 +1468,15 @@ static void set_test_type(const char *type)
else
page_size = sysconf(_SC_PAGE_SIZE);
- if (!page_size)
- fprintf(stderr, "Unable to determine page size\n"),
- exit(2);
+ if (!page_size) {
+ fprintf(stderr, "Unable to determine page size\n");
+ exit(2);
+ }
if ((unsigned long) area_count(NULL, 0) + sizeof(unsigned long long) * 2
- > page_size)
- fprintf(stderr, "Impossible to run this test\n"), exit(2);
+ > page_size) {
+ fprintf(stderr, "Impossible to run this test\n");
+ exit(2);
+ }
}
static void sigalrm(int sig)
@@ -1425,8 +1493,10 @@ int main(int argc, char **argv)
if (argc < 4)
usage();
- if (signal(SIGALRM, sigalrm) == SIG_ERR)
- fprintf(stderr, "failed to arm SIGALRM"), exit(1);
+ if (signal(SIGALRM, sigalrm) == SIG_ERR) {
+ fprintf(stderr, "failed to arm SIGALRM");
+ exit(1);
+ }
alarm(ALARM_INTERVAL_SECS);
set_test_type(argv[1]);
diff --git a/tools/testing/selftests/x86/fsgsbase.c b/tools/testing/selftests/x86/fsgsbase.c
index 998319553523..7161cfc2e60b 100644
--- a/tools/testing/selftests/x86/fsgsbase.c
+++ b/tools/testing/selftests/x86/fsgsbase.c
@@ -443,6 +443,68 @@ static void test_unexpected_base(void)
#define USER_REGS_OFFSET(r) offsetof(struct user_regs_struct, r)
+static void test_ptrace_write_gs_read_base(void)
+{
+ int status;
+ pid_t child = fork();
+
+ if (child < 0)
+ err(1, "fork");
+
+ if (child == 0) {
+ printf("[RUN]\tPTRACE_POKE GS, read GSBASE back\n");
+
+ printf("[RUN]\tARCH_SET_GS to 1\n");
+ if (syscall(SYS_arch_prctl, ARCH_SET_GS, 1) != 0)
+ err(1, "ARCH_SET_GS");
+
+ if (ptrace(PTRACE_TRACEME, 0, NULL, NULL) != 0)
+ err(1, "PTRACE_TRACEME");
+
+ raise(SIGTRAP);
+ _exit(0);
+ }
+
+ wait(&status);
+
+ if (WSTOPSIG(status) == SIGTRAP) {
+ unsigned long base;
+ unsigned long gs_offset = USER_REGS_OFFSET(gs);
+ unsigned long base_offset = USER_REGS_OFFSET(gs_base);
+
+ /* Read the initial base. It should be 1. */
+ base = ptrace(PTRACE_PEEKUSER, child, base_offset, NULL);
+ if (base == 1) {
+ printf("[OK]\tGSBASE started at 1\n");
+ } else {
+ nerrs++;
+ printf("[FAIL]\tGSBASE started at 0x%lx\n", base);
+ }
+
+ printf("[RUN]\tSet GS = 0x7, read GSBASE\n");
+
+ /* Poke an LDT selector into GS. */
+ if (ptrace(PTRACE_POKEUSER, child, gs_offset, 0x7) != 0)
+ err(1, "PTRACE_POKEUSER");
+
+ /* And read the base. */
+ base = ptrace(PTRACE_PEEKUSER, child, base_offset, NULL);
+
+ if (base == 0 || base == 1) {
+ printf("[OK]\tGSBASE reads as 0x%lx with invalid GS\n", base);
+ } else {
+ nerrs++;
+ printf("[FAIL]\tGSBASE=0x%lx (should be 0 or 1)\n", base);
+ }
+ }
+
+ ptrace(PTRACE_CONT, child, NULL, NULL);
+
+ wait(&status);
+ if (!WIFEXITED(status))
+ printf("[WARN]\tChild didn't exit cleanly.\n");
+}
+
static void test_ptrace_write_gsbase(void)
{
int status;
@@ -517,6 +579,9 @@ static void test_ptrace_write_gsbase(void)
END:
ptrace(PTRACE_CONT, child, NULL, NULL);
+ wait(&status);
+ if (!WIFEXITED(status))
+ printf("[WARN]\tChild didn't exit cleanly.\n");
}
int main()
@@ -526,6 +591,9 @@ int main()
shared_scratch = mmap(NULL, 4096, PROT_READ | PROT_WRITE,
MAP_ANONYMOUS | MAP_SHARED, -1, 0);
+ /* Do these tests before we have an LDT. */
+ test_ptrace_write_gs_read_base();
+
/* Probe FSGSBASE */
sethandler(SIGILL, sigill, 0);
if (sigsetjmp(jmpbuf, 1) == 0) {
diff --git a/tools/testing/selftests/x86/test_vsyscall.c b/tools/testing/selftests/x86/test_vsyscall.c
index c41f24b517f4..65c141ebfbbd 100644
--- a/tools/testing/selftests/x86/test_vsyscall.c
+++ b/tools/testing/selftests/x86/test_vsyscall.c
@@ -462,6 +462,17 @@ static int test_vsys_x(void)
return 0;
}
+/*
+ * Debuggers expect ptrace() to be able to peek at the vsyscall page.
+ * Use process_vm_readv() as a proxy for ptrace() to test this. We
+ * want it to work in the vsyscall=emulate case and to fail in the
+ * vsyscall=xonly case.
+ *
+ * It's worth noting that this ABI is a bit nutty. write(2) can't
+ * read from the vsyscall page on any kernel version or mode. The
+ * fact that ptrace() ever worked was a nice courtesy of old kernels,
+ * but the code to support it is fairly gross.
+ */
static int test_process_vm_readv(void)
{
#ifdef __x86_64__
@@ -477,8 +488,12 @@ static int test_process_vm_readv(void)
remote.iov_len = 4096;
ret = process_vm_readv(getpid(), &local, 1, &remote, 1, 0);
if (ret != 4096) {
- printf("[OK]\tprocess_vm_readv() failed (ret = %d, errno = %d)\n", ret, errno);
- return 0;
+ /*
+ * We expect process_vm_readv() to work if and only if the
+ * vsyscall page is readable.
+ */
+ printf("[%s]\tprocess_vm_readv() failed (ret = %d, errno = %d)\n", vsyscall_map_r ? "FAIL" : "OK", ret, errno);
+ return vsyscall_map_r ? 1 : 0;
}
if (vsyscall_map_r) {
@@ -488,6 +503,9 @@ static int test_process_vm_readv(void)
printf("[FAIL]\tIt worked but returned incorrect data\n");
return 1;
}
+ } else {
+ printf("[FAIL]\tprocess_rm_readv() succeeded, but it should have failed in this configuration\n");
+ return 1;
}
#endif
diff --git a/tools/usb/Build b/tools/usb/Build
new file mode 100644
index 000000000000..2ad6f9745816
--- /dev/null
+++ b/tools/usb/Build
@@ -0,0 +1,2 @@
+testusb-y += testusb.o
+ffs-test-y += ffs-test.o
diff --git a/tools/usb/Makefile b/tools/usb/Makefile
index 01d758d73b6d..1b128e551b2e 100644
--- a/tools/usb/Makefile
+++ b/tools/usb/Makefile
@@ -1,14 +1,51 @@
# SPDX-License-Identifier: GPL-2.0
# Makefile for USB tools
+include ../scripts/Makefile.include
-PTHREAD_LIBS = -lpthread
-WARNINGS = -Wall -Wextra
-CFLAGS = $(WARNINGS) -g -I../include
-LDFLAGS = $(PTHREAD_LIBS)
+bindir ?= /usr/bin
-all: testusb ffs-test
-%: %.c
- $(CC) $(CFLAGS) -o $@ $^ $(LDFLAGS)
+ifeq ($(srctree),)
+srctree := $(patsubst %/,%,$(dir $(CURDIR)))
+srctree := $(patsubst %/,%,$(dir $(srctree)))
+endif
+
+# Do not use make's built-in rules
+# (this improves performance and avoids hard-to-debug behaviour);
+MAKEFLAGS += -r
+
+override CFLAGS += -O2 -Wall -Wextra -g -D_GNU_SOURCE -I$(OUTPUT)include -I$(srctree)/tools/include
+override LDFLAGS += -lpthread
+
+ALL_TARGETS := testusb ffs-test
+ALL_PROGRAMS := $(patsubst %,$(OUTPUT)%,$(ALL_TARGETS))
+
+all: $(ALL_PROGRAMS)
+
+export srctree OUTPUT CC LD CFLAGS
+include $(srctree)/tools/build/Makefile.include
+
+TESTUSB_IN := $(OUTPUT)testusb-in.o
+$(TESTUSB_IN): FORCE
+ $(Q)$(MAKE) $(build)=testusb
+$(OUTPUT)testusb: $(TESTUSB_IN)
+ $(QUIET_LINK)$(CC) $(CFLAGS) $< -o $@ $(LDFLAGS)
+
+FFS_TEST_IN := $(OUTPUT)ffs-test-in.o
+$(FFS_TEST_IN): FORCE
+ $(Q)$(MAKE) $(build)=ffs-test
+$(OUTPUT)ffs-test: $(FFS_TEST_IN)
+ $(QUIET_LINK)$(CC) $(CFLAGS) $< -o $@ $(LDFLAGS)
clean:
- $(RM) testusb ffs-test
+ rm -f $(ALL_PROGRAMS)
+ find $(if $(OUTPUT),$(OUTPUT),.) -name '*.o' -delete -o -name '\.*.d' -delete -o -name '\.*.o.cmd' -delete
+
+install: $(ALL_PROGRAMS)
+ install -d -m 755 $(DESTDIR)$(bindir); \
+ for program in $(ALL_PROGRAMS); do \
+ install $$program $(DESTDIR)$(bindir); \
+ done
+
+FORCE:
+
+.PHONY: all install clean FORCE prepare
diff --git a/tools/virtio/linux/virtio_config.h b/tools/virtio/linux/virtio_config.h
index dbf14c1e2188..f2640e505c4e 100644
--- a/tools/virtio/linux/virtio_config.h
+++ b/tools/virtio/linux/virtio_config.h
@@ -42,16 +42,16 @@ static inline void __virtio_clear_bit(struct virtio_device *vdev,
(__virtio_test_bit((dev), feature))
/**
- * virtio_has_iommu_quirk - determine whether this device has the iommu quirk
+ * virtio_has_dma_quirk - determine whether this device has the DMA quirk
* @vdev: the device
*/
-static inline bool virtio_has_iommu_quirk(const struct virtio_device *vdev)
+static inline bool virtio_has_dma_quirk(const struct virtio_device *vdev)
{
/*
* Note the reverse polarity of the quirk feature (compared to most
* other features), this is for compatibility with legacy systems.
*/
- return !virtio_has_feature(vdev, VIRTIO_F_IOMMU_PLATFORM);
+ return !virtio_has_feature(vdev, VIRTIO_F_ACCESS_PLATFORM);
}
static inline bool virtio_is_little_endian(struct virtio_device *vdev)
diff --git a/tools/vm/page-types.c b/tools/vm/page-types.c
index 58c0eab71bca..0517c744b04e 100644
--- a/tools/vm/page-types.c
+++ b/tools/vm/page-types.c
@@ -78,6 +78,7 @@
#define KPF_ARCH 38
#define KPF_UNCACHED 39
#define KPF_SOFTDIRTY 40
+#define KPF_ARCH_2 41
/* [48-] take some arbitrary free slots for expanding overloaded flags
* not part of kernel API
@@ -135,6 +136,7 @@ static const char * const page_flag_names[] = {
[KPF_ARCH] = "h:arch",
[KPF_UNCACHED] = "c:uncached",
[KPF_SOFTDIRTY] = "f:softdirty",
+ [KPF_ARCH_2] = "H:arch_2",
[KPF_READAHEAD] = "I:readahead",
[KPF_SLOB_FREE] = "P:slob_free",