summaryrefslogtreecommitdiff
path: root/tools
diff options
context:
space:
mode:
Diffstat (limited to 'tools')
-rw-r--r--tools/accounting/.gitignore1
-rw-r--r--tools/accounting/getdelays.c2
-rw-r--r--tools/arch/sh/include/asm/barrier.h2
-rw-r--r--tools/arch/x86/include/asm/cpufeatures.h5
-rw-r--r--tools/arch/x86/include/asm/msr-index.h12
-rw-r--r--tools/arch/x86/include/asm/orc_types.h3
-rw-r--r--tools/arch/x86/include/asm/unistd_64.h3
-rw-r--r--tools/arch/x86/include/uapi/asm/kvm.h1
-rw-r--r--tools/arch/x86/include/uapi/asm/unistd.h2
-rw-r--r--tools/arch/x86/lib/x86-opcode-map.txt17
-rw-r--r--tools/bootconfig/.gitignore1
-rw-r--r--tools/bootconfig/Makefile27
-rw-r--r--tools/bootconfig/main.c45
-rwxr-xr-xtools/bootconfig/test-bootconfig.sh21
-rw-r--r--tools/bpf/.gitignore1
-rw-r--r--tools/bpf/Makefile6
-rw-r--r--tools/bpf/bpf_asm.c2
-rw-r--r--tools/bpf/bpf_dbg.c2
-rw-r--r--tools/bpf/bpftool/.gitignore3
-rw-r--r--tools/bpf/bpftool/Documentation/bpftool-btf.rst11
-rw-r--r--tools/bpf/bpftool/Documentation/bpftool-cgroup.rst22
-rw-r--r--tools/bpf/bpftool/Documentation/bpftool-feature.rst41
-rw-r--r--tools/bpf/bpftool/Documentation/bpftool-gen.rst21
-rw-r--r--tools/bpf/bpftool/Documentation/bpftool-iter.rst81
-rw-r--r--tools/bpf/bpftool/Documentation/bpftool-link.rst121
-rw-r--r--tools/bpf/bpftool/Documentation/bpftool-map.rst37
-rw-r--r--tools/bpf/bpftool/Documentation/bpftool-net.rst12
-rw-r--r--tools/bpf/bpftool/Documentation/bpftool-perf.rst12
-rw-r--r--tools/bpf/bpftool/Documentation/bpftool-prog.rst42
-rw-r--r--tools/bpf/bpftool/Documentation/bpftool-struct_ops.rst117
-rw-r--r--tools/bpf/bpftool/Documentation/bpftool.rst11
-rw-r--r--tools/bpf/bpftool/Makefile49
-rw-r--r--tools/bpf/bpftool/bash-completion/bpftool177
-rw-r--r--tools/bpf/bpftool/btf.c16
-rw-r--r--tools/bpf/bpftool/btf_dumper.c203
-rw-r--r--tools/bpf/bpftool/cfg.c4
-rw-r--r--tools/bpf/bpftool/cgroup.c69
-rw-r--r--tools/bpf/bpftool/common.c42
-rw-r--r--tools/bpf/bpftool/feature.c541
-rw-r--r--tools/bpf/bpftool/gen.c7
-rw-r--r--tools/bpf/bpftool/iter.c88
-rw-r--r--tools/bpf/bpftool/jit_disasm.c1
-rw-r--r--tools/bpf/bpftool/link.c343
-rw-r--r--tools/bpf/bpftool/main.c15
-rw-r--r--tools/bpf/bpftool/main.h57
-rw-r--r--tools/bpf/bpftool/map.c46
-rw-r--r--tools/bpf/bpftool/map_perf_ring.c2
-rw-r--r--tools/bpf/bpftool/net.c12
-rw-r--r--tools/bpf/bpftool/perf.c2
-rw-r--r--tools/bpf/bpftool/prog.c485
-rw-r--r--tools/bpf/bpftool/skeleton/profiler.bpf.c119
-rw-r--r--tools/bpf/bpftool/skeleton/profiler.h46
-rw-r--r--tools/bpf/bpftool/struct_ops.c603
-rw-r--r--tools/bpf/runqslower/.gitignore1
-rw-r--r--tools/bpf/runqslower/Makefile3
-rw-r--r--tools/bpf/runqslower/runqslower.bpf.c4
-rw-r--r--tools/build/.gitignore1
-rw-r--r--tools/build/Makefile.feature8
-rw-r--r--tools/build/feature/.gitignore1
-rw-r--r--tools/build/feature/Makefile18
-rw-r--r--tools/build/feature/test-all.c5
-rw-r--r--tools/build/feature/test-clang-bpf-global-var.c4
-rw-r--r--tools/build/feature/test-file-handle.c17
-rw-r--r--tools/build/feature/test-libopencsd.c4
-rw-r--r--tools/build/feature/test-libpfm4.c9
-rw-r--r--tools/build/feature/test-sync-compare-and-swap.c2
-rw-r--r--tools/cgroup/.gitignore1
-rw-r--r--tools/cgroup/iocost_monitor.py55
-rw-r--r--tools/edid/1024x768.S43
-rw-r--r--tools/edid/1280x1024.S43
-rw-r--r--tools/edid/1600x1200.S43
-rw-r--r--tools/edid/1680x1050.S43
-rw-r--r--tools/edid/1920x1080.S43
-rw-r--r--tools/edid/800x600.S40
-rw-r--r--tools/edid/Makefile37
-rw-r--r--tools/edid/edid.S274
-rw-r--r--tools/edid/hex1
-rw-r--r--tools/gpio/.gitignore2
-rw-r--r--tools/gpio/Build1
-rw-r--r--tools/gpio/Makefile13
-rw-r--r--tools/gpio/gpio-hammer.c19
-rw-r--r--tools/gpio/gpio-utils.c6
-rw-r--r--tools/gpio/gpio-watch.c99
-rw-r--r--tools/gpio/lsgpio.c12
-rw-r--r--tools/iio/.gitignore1
-rw-r--r--tools/include/linux/bits.h24
-rw-r--r--tools/include/linux/build_bug.h82
-rw-r--r--tools/include/linux/compiler-gcc.h12
-rw-r--r--tools/include/linux/compiler.h29
-rw-r--r--tools/include/linux/const.h5
-rw-r--r--tools/include/linux/irqflags.h12
-rw-r--r--tools/include/linux/kallsyms.h2
-rw-r--r--tools/include/linux/kernel.h4
-rw-r--r--tools/include/linux/rbtree.h2
-rw-r--r--tools/include/linux/rbtree_augmented.h2
-rw-r--r--tools/include/uapi/drm/drm.h2
-rw-r--r--tools/include/uapi/drm/i915_drm.h21
-rw-r--r--tools/include/uapi/linux/bpf.h678
-rw-r--r--tools/include/uapi/linux/fscrypt.h1
-rw-r--r--tools/include/uapi/linux/if_link.h7
-rw-r--r--tools/include/uapi/linux/kvm.h51
-rw-r--r--tools/include/uapi/linux/mman.h5
-rw-r--r--tools/include/uapi/linux/perf_event.h24
-rw-r--r--tools/include/uapi/linux/sched.h5
-rw-r--r--tools/include/uapi/linux/stat.h11
-rw-r--r--tools/include/uapi/linux/types.h (renamed from tools/testing/selftests/bpf/include/uapi/linux/types.h)0
-rw-r--r--tools/include/uapi/linux/vhost.h24
-rw-r--r--tools/include/vdso/bits.h9
-rw-r--r--tools/include/vdso/const.h10
-rwxr-xr-xtools/kvm/kvm_stat/kvm_stat322
-rw-r--r--tools/kvm/kvm_stat/kvm_stat.service16
-rw-r--r--tools/kvm/kvm_stat/kvm_stat.txt53
-rw-r--r--tools/laptop/dslm/.gitignore1
-rw-r--r--tools/laptop/freefall/freefall.c2
-rw-r--r--tools/leds/.gitignore1
-rw-r--r--tools/lib/api/fs/Build1
-rw-r--r--tools/lib/api/fs/cgroup.c67
-rw-r--r--tools/lib/api/fs/fs.c17
-rw-r--r--tools/lib/api/fs/fs.h14
-rw-r--r--tools/lib/api/io.h115
-rw-r--r--tools/lib/bpf/.gitignore1
-rw-r--r--tools/lib/bpf/Build2
-rw-r--r--tools/lib/bpf/Makefile6
-rw-r--r--tools/lib/bpf/bpf.c76
-rw-r--r--tools/lib/bpf/bpf.h26
-rw-r--r--tools/lib/bpf/bpf_helpers.h21
-rw-r--r--tools/lib/bpf/bpf_tracing.h239
-rw-r--r--tools/lib/bpf/btf.c20
-rw-r--r--tools/lib/bpf/btf_dump.c12
-rw-r--r--tools/lib/bpf/hashmap.c10
-rw-r--r--tools/lib/bpf/hashmap.h1
-rw-r--r--tools/lib/bpf/libbpf.c1268
-rw-r--r--tools/lib/bpf/libbpf.h64
-rw-r--r--tools/lib/bpf/libbpf.map35
-rw-r--r--tools/lib/bpf/libbpf_internal.h2
-rw-r--r--tools/lib/bpf/libbpf_probes.c6
-rw-r--r--tools/lib/bpf/netlink.c42
-rw-r--r--tools/lib/bpf/ringbuf.c288
-rw-r--r--tools/lib/bpf/xsk.c16
-rw-r--r--tools/lib/lockdep/.gitignore1
-rw-r--r--tools/lib/perf/Documentation/examples/counting.c83
-rw-r--r--tools/lib/perf/cpumap.c2
-rw-r--r--tools/lib/perf/evlist.c4
-rw-r--r--tools/lib/perf/include/perf/event.h7
-rw-r--r--tools/lib/rbtree.c4
-rw-r--r--tools/lib/subcmd/parse-options.h2
-rw-r--r--tools/lib/symbol/kallsyms.c86
-rw-r--r--tools/lib/symbol/kallsyms.h2
-rw-r--r--tools/lib/traceevent/.gitignore1
-rw-r--r--tools/lib/traceevent/event-parse.c2
-rw-r--r--tools/lib/traceevent/kbuffer-parse.c2
-rw-r--r--tools/lib/traceevent/parse-filter.c29
-rw-r--r--tools/memory-model/.gitignore1
-rw-r--r--tools/memory-model/litmus-tests/.gitignore1
-rw-r--r--tools/objtool/.gitignore1
-rw-r--r--tools/objtool/Build18
-rw-r--r--tools/objtool/Documentation/stack-validation.txt41
-rw-r--r--tools/objtool/Makefile21
-rw-r--r--tools/objtool/arch.h19
-rw-r--r--tools/objtool/arch/x86/decode.c302
-rw-r--r--tools/objtool/arch/x86/include/cfi_regs.h25
-rw-r--r--tools/objtool/builtin-check.c14
-rw-r--r--tools/objtool/builtin-orc.c3
-rw-r--r--tools/objtool/builtin.h2
-rw-r--r--tools/objtool/cfi.h35
-rw-r--r--tools/objtool/check.c1061
-rw-r--r--tools/objtool/check.h35
-rw-r--r--tools/objtool/elf.c341
-rw-r--r--tools/objtool/elf.h75
-rw-r--r--tools/objtool/objtool.c4
-rw-r--r--tools/objtool/objtool.h27
-rw-r--r--tools/objtool/orc.h18
-rw-r--r--tools/objtool/orc_dump.c47
-rw-r--r--tools/objtool/orc_gen.c52
-rw-r--r--tools/objtool/special.c4
-rw-r--r--tools/objtool/warn.h2
-rw-r--r--tools/objtool/weak.c40
-rw-r--r--tools/pci/pcitest.c37
-rw-r--r--tools/pcmcia/.gitignore1
-rw-r--r--tools/perf/.gitignore1
-rw-r--r--tools/perf/Documentation/Makefile9
-rw-r--r--tools/perf/Documentation/intel-pt.txt992
-rw-r--r--tools/perf/Documentation/itrace.txt12
-rw-r--r--tools/perf/Documentation/perf-bench.txt8
-rw-r--r--tools/perf/Documentation/perf-c2c.txt13
-rw-r--r--tools/perf/Documentation/perf-config.txt19
-rw-r--r--tools/perf/Documentation/perf-inject.txt3
-rw-r--r--tools/perf/Documentation/perf-intel-pt.txt1042
-rw-r--r--tools/perf/Documentation/perf-list.txt8
-rw-r--r--tools/perf/Documentation/perf-record.txt57
-rw-r--r--tools/perf/Documentation/perf-report.txt20
-rw-r--r--tools/perf/Documentation/perf-script.txt27
-rw-r--r--tools/perf/Documentation/perf-stat.txt44
-rw-r--r--tools/perf/Documentation/perf-top.txt29
-rw-r--r--tools/perf/Documentation/perf.data-file-format.txt16
-rw-r--r--tools/perf/Documentation/security.txt237
-rw-r--r--tools/perf/Makefile.config58
-rw-r--r--tools/perf/Makefile.perf27
-rw-r--r--tools/perf/arch/arm/util/cs-etm.c14
-rw-r--r--tools/perf/arch/arm64/util/Build2
-rw-r--r--tools/perf/arch/arm64/util/arm-spe.c12
-rw-r--r--tools/perf/arch/arm64/util/machine.c27
-rw-r--r--tools/perf/arch/arm64/util/sym-handling.c19
-rw-r--r--tools/perf/arch/arm64/util/unwind-libdw.c6
-rw-r--r--tools/perf/arch/powerpc/util/Build2
-rw-r--r--tools/perf/arch/powerpc/util/header.c8
-rw-r--r--tools/perf/arch/powerpc/util/kvm-stat.c2
-rw-r--r--tools/perf/arch/powerpc/util/sym-handling.c10
-rw-r--r--tools/perf/arch/powerpc/util/unwind-libdw.c6
-rw-r--r--tools/perf/arch/s390/util/kvm-stat.c8
-rw-r--r--tools/perf/arch/x86/entry/syscalls/syscall_64.tbl740
-rw-r--r--tools/perf/arch/x86/tests/dwarf-unwind.c8
-rw-r--r--tools/perf/arch/x86/tests/insn-x86-dat-32.c112
-rw-r--r--tools/perf/arch/x86/tests/insn-x86-dat-64.c196
-rw-r--r--tools/perf/arch/x86/tests/insn-x86-dat-src.c236
-rw-r--r--tools/perf/arch/x86/tests/perf-time-to-tsc.c6
-rw-r--r--tools/perf/arch/x86/util/intel-bts.c2
-rw-r--r--tools/perf/arch/x86/util/intel-pt.c62
-rw-r--r--tools/perf/arch/x86/util/kvm-stat.c12
-rw-r--r--tools/perf/arch/x86/util/unwind-libdw.c6
-rw-r--r--tools/perf/bench/Build3
-rw-r--r--tools/perf/bench/bench.h3
-rw-r--r--tools/perf/bench/epoll-ctl.c4
-rw-r--r--tools/perf/bench/epoll-wait.c7
-rw-r--r--tools/perf/bench/futex-hash.c3
-rw-r--r--tools/perf/bench/futex-lock-pi.c3
-rw-r--r--tools/perf/bench/kallsyms-parse.c75
-rw-r--r--tools/perf/bench/sched-messaging.c2
-rw-r--r--tools/perf/bench/synthesize.c262
-rw-r--r--tools/perf/builtin-annotate.c17
-rw-r--r--tools/perf/builtin-bench.c13
-rw-r--r--tools/perf/builtin-c2c.c30
-rw-r--r--tools/perf/builtin-diff.c30
-rw-r--r--tools/perf/builtin-evlist.c2
-rw-r--r--tools/perf/builtin-ftrace.c20
-rw-r--r--tools/perf/builtin-inject.c21
-rw-r--r--tools/perf/builtin-kmem.c65
-rw-r--r--tools/perf/builtin-kvm.c23
-rw-r--r--tools/perf/builtin-list.c2
-rw-r--r--tools/perf/builtin-lock.c42
-rw-r--r--tools/perf/builtin-mem.c26
-rw-r--r--tools/perf/builtin-probe.c3
-rw-r--r--tools/perf/builtin-record.c202
-rw-r--r--tools/perf/builtin-report.c118
-rw-r--r--tools/perf/builtin-sched.c78
-rw-r--r--tools/perf/builtin-script.c508
-rw-r--r--tools/perf/builtin-stat.c221
-rw-r--r--tools/perf/builtin-timechart.c54
-rw-r--r--tools/perf/builtin-top.c89
-rw-r--r--tools/perf/builtin-trace.c207
-rwxr-xr-xtools/perf/check-headers.sh7
-rw-r--r--tools/perf/design.txt3
-rw-r--r--tools/perf/jvmti/libjvmti.c92
-rw-r--r--tools/perf/pmu-events/arch/powerpc/power8/metrics.json2
-rw-r--r--tools/perf/pmu-events/arch/powerpc/power9/metrics.json149
-rw-r--r--tools/perf/pmu-events/arch/powerpc/power9/nest_metrics.json19
-rw-r--r--tools/perf/pmu-events/arch/s390/cf_z15/crypto6.json8
-rw-r--r--tools/perf/pmu-events/arch/s390/cf_z15/extended.json30
-rw-r--r--tools/perf/pmu-events/arch/test/test_cpu/branch.json (renamed from tools/perf/pmu-events/arch/x86/amdfam17h/branch.json)0
-rw-r--r--tools/perf/pmu-events/arch/test/test_cpu/other.json26
-rw-r--r--tools/perf/pmu-events/arch/test/test_cpu/uncore.json21
-rw-r--r--tools/perf/pmu-events/arch/x86/amdfam17h/cache.json329
-rw-r--r--tools/perf/pmu-events/arch/x86/amdfam17h/other.json65
-rw-r--r--tools/perf/pmu-events/arch/x86/amdzen1/branch.json23
-rw-r--r--tools/perf/pmu-events/arch/x86/amdzen1/cache.json294
-rw-r--r--tools/perf/pmu-events/arch/x86/amdzen1/core.json (renamed from tools/perf/pmu-events/arch/x86/amdfam17h/core.json)15
-rw-r--r--tools/perf/pmu-events/arch/x86/amdzen1/floating-point.json (renamed from tools/perf/pmu-events/arch/x86/amdfam17h/floating-point.json)64
-rw-r--r--tools/perf/pmu-events/arch/x86/amdzen1/memory.json (renamed from tools/perf/pmu-events/arch/x86/amdfam17h/memory.json)82
-rw-r--r--tools/perf/pmu-events/arch/x86/amdzen1/other.json56
-rw-r--r--tools/perf/pmu-events/arch/x86/amdzen2/branch.json52
-rw-r--r--tools/perf/pmu-events/arch/x86/amdzen2/cache.json338
-rw-r--r--tools/perf/pmu-events/arch/x86/amdzen2/core.json130
-rw-r--r--tools/perf/pmu-events/arch/x86/amdzen2/floating-point.json140
-rw-r--r--tools/perf/pmu-events/arch/x86/amdzen2/memory.json341
-rw-r--r--tools/perf/pmu-events/arch/x86/amdzen2/other.json115
-rw-r--r--tools/perf/pmu-events/arch/x86/broadwell/bdw-metrics.json2
-rw-r--r--tools/perf/pmu-events/arch/x86/broadwellde/bdwde-metrics.json2
-rw-r--r--tools/perf/pmu-events/arch/x86/broadwellx/bdx-metrics.json2
-rw-r--r--tools/perf/pmu-events/arch/x86/cascadelakex/clx-metrics.json15
-rw-r--r--tools/perf/pmu-events/arch/x86/haswell/hsw-metrics.json2
-rw-r--r--tools/perf/pmu-events/arch/x86/haswellx/hsx-metrics.json2
-rw-r--r--tools/perf/pmu-events/arch/x86/ivybridge/ivb-metrics.json2
-rw-r--r--tools/perf/pmu-events/arch/x86/ivytown/ivt-metrics.json2
-rw-r--r--tools/perf/pmu-events/arch/x86/jaketown/jkt-metrics.json2
-rw-r--r--tools/perf/pmu-events/arch/x86/mapfile.csv3
-rw-r--r--tools/perf/pmu-events/arch/x86/sandybridge/snb-metrics.json2
-rw-r--r--tools/perf/pmu-events/arch/x86/skylake/skl-metrics.json5
-rw-r--r--tools/perf/pmu-events/arch/x86/skylakex/skx-metrics.json9
-rw-r--r--tools/perf/pmu-events/jevents.c49
-rw-r--r--tools/perf/pmu-events/jevents.h2
-rw-r--r--tools/perf/pmu-events/jsmn.h2
-rw-r--r--tools/perf/pmu-events/pmu-events.h3
-rw-r--r--tools/perf/scripts/perl/check-perf-trace.pl6
-rw-r--r--tools/perf/scripts/perl/failed-syscalls.pl2
-rw-r--r--tools/perf/scripts/perl/rw-by-file.pl6
-rw-r--r--tools/perf/scripts/perl/rw-by-pid.pl10
-rw-r--r--tools/perf/scripts/perl/rwtop.pl10
-rw-r--r--tools/perf/scripts/perl/wakeup-latency.pl6
-rwxr-xr-xtools/perf/scripts/python/bin/flamegraph-record2
-rwxr-xr-xtools/perf/scripts/python/bin/flamegraph-report3
-rwxr-xr-xtools/perf/scripts/python/flamegraph.py124
-rw-r--r--tools/perf/tests/.gitignore1
-rw-r--r--tools/perf/tests/Build4
-rw-r--r--tools/perf/tests/api-io.c304
-rw-r--r--tools/perf/tests/attr/system-wide-dummy50
-rw-r--r--tools/perf/tests/attr/test-record-C012
-rw-r--r--tools/perf/tests/builtin-test.c78
-rw-r--r--tools/perf/tests/demangle-java-test.c42
-rw-r--r--tools/perf/tests/dwarf-unwind.c11
-rw-r--r--tools/perf/tests/event-times.c8
-rw-r--r--tools/perf/tests/event_update.c2
-rw-r--r--tools/perf/tests/evsel-roundtrip-name.c25
-rw-r--r--tools/perf/tests/evsel-tp-sched.c10
-rw-r--r--tools/perf/tests/expr.c56
-rw-r--r--tools/perf/tests/hists_cumulate.c10
-rw-r--r--tools/perf/tests/hists_filter.c2
-rw-r--r--tools/perf/tests/hists_output.c10
-rw-r--r--tools/perf/tests/make20
-rw-r--r--tools/perf/tests/mmap-basic.c8
-rw-r--r--tools/perf/tests/openat-syscall-all-cpus.c14
-rw-r--r--tools/perf/tests/openat-syscall-tp-fields.c10
-rw-r--r--tools/perf/tests/openat-syscall.c10
-rw-r--r--tools/perf/tests/parse-events.c155
-rw-r--r--tools/perf/tests/perf-record.c6
-rw-r--r--tools/perf/tests/pfm.c203
-rw-r--r--tools/perf/tests/pmu-events.c540
-rw-r--r--tools/perf/tests/pmu.c4
-rw-r--r--tools/perf/tests/sample-parsing.c19
-rw-r--r--tools/perf/tests/sw-clock.c2
-rw-r--r--tools/perf/tests/switch-tracking.c14
-rw-r--r--tools/perf/tests/tests.h10
-rw-r--r--tools/perf/tests/topology.c12
-rwxr-xr-xtools/perf/trace/beauty/arch_errno_names.sh4
-rw-r--r--tools/perf/trace/beauty/clone.c1
-rw-r--r--tools/perf/trace/beauty/mmap.c1
-rw-r--r--tools/perf/ui/browsers/hists.c144
-rw-r--r--tools/perf/ui/gtk/annotate.c2
-rw-r--r--tools/perf/ui/gtk/hists.c6
-rw-r--r--tools/perf/ui/hist.c103
-rw-r--r--tools/perf/ui/keysyms.h1
-rw-r--r--tools/perf/util/Build21
-rw-r--r--tools/perf/util/annotate.c43
-rw-r--r--tools/perf/util/annotate.h6
-rw-r--r--tools/perf/util/arm-spe-decoder/Build1
-rw-r--r--tools/perf/util/arm-spe-decoder/arm-spe-decoder.c219
-rw-r--r--tools/perf/util/arm-spe-decoder/arm-spe-decoder.h82
-rw-r--r--tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c (renamed from tools/perf/util/arm-spe-pkt-decoder.c)0
-rw-r--r--tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.h (renamed from tools/perf/util/arm-spe-pkt-decoder.h)16
-rw-r--r--tools/perf/util/arm-spe.c832
-rw-r--r--tools/perf/util/auxtrace.c147
-rw-r--r--tools/perf/util/auxtrace.h31
-rw-r--r--tools/perf/util/block-info.c106
-rw-r--r--tools/perf/util/block-info.h9
-rw-r--r--tools/perf/util/bpf-event.c96
-rw-r--r--tools/perf/util/bpf-event.h7
-rw-r--r--tools/perf/util/bpf-loader.c4
-rw-r--r--tools/perf/util/branch.h43
-rw-r--r--tools/perf/util/callchain.c14
-rw-r--r--tools/perf/util/callchain.h9
-rw-r--r--tools/perf/util/cap.h4
-rw-r--r--tools/perf/util/cgroup.c146
-rw-r--r--tools/perf/util/cgroup.h17
-rw-r--r--tools/perf/util/cloexec.c6
-rw-r--r--tools/perf/util/config.c14
-rw-r--r--tools/perf/util/counts.c10
-rw-r--r--tools/perf/util/counts.h7
-rw-r--r--tools/perf/util/cpumap.c10
-rw-r--r--tools/perf/util/cputopo.h2
-rw-r--r--tools/perf/util/cs-etm-decoder/cs-etm-decoder.c2
-rw-r--r--tools/perf/util/cs-etm.c173
-rw-r--r--tools/perf/util/cs-etm.h3
-rw-r--r--tools/perf/util/data-convert-bt.c6
-rw-r--r--tools/perf/util/demangle-java.c13
-rw-r--r--tools/perf/util/dso.c17
-rw-r--r--tools/perf/util/dso.h6
-rw-r--r--tools/perf/util/dsos.c22
-rw-r--r--tools/perf/util/env.c2
-rw-r--r--tools/perf/util/env.h9
-rw-r--r--tools/perf/util/event.c39
-rw-r--r--tools/perf/util/event.h9
-rw-r--r--tools/perf/util/evlist.c196
-rw-r--r--tools/perf/util/evlist.h9
-rw-r--r--tools/perf/util/evsel.c521
-rw-r--r--tools/perf/util/evsel.h253
-rw-r--r--tools/perf/util/evsel_config.h45
-rw-r--r--tools/perf/util/evsel_fprintf.c11
-rw-r--r--tools/perf/util/evsel_fprintf.h3
-rw-r--r--tools/perf/util/expr.c131
-rw-r--r--tools/perf/util/expr.h36
-rw-r--r--tools/perf/util/expr.l127
-rw-r--r--tools/perf/util/expr.y214
-rw-r--r--tools/perf/util/genelf_debug.c4
-rw-r--r--tools/perf/util/hashmap.c238
-rw-r--r--tools/perf/util/hashmap.h176
-rw-r--r--tools/perf/util/header.c192
-rw-r--r--tools/perf/util/header.h1
-rw-r--r--tools/perf/util/hist.c60
-rw-r--r--tools/perf/util/hist.h8
-rw-r--r--tools/perf/util/intel-bts.c16
-rw-r--r--tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.c2
-rw-r--r--tools/perf/util/intel-pt.c329
-rw-r--r--tools/perf/util/jitdump.c2
-rw-r--r--tools/perf/util/jitdump.h6
-rw-r--r--tools/perf/util/llvm-utils.c2
-rw-r--r--tools/perf/util/machine.c468
-rw-r--r--tools/perf/util/machine.h3
-rw-r--r--tools/perf/util/map.c8
-rw-r--r--tools/perf/util/mem-events.c15
-rw-r--r--tools/perf/util/mem-events.h2
-rw-r--r--tools/perf/util/mem2node.c3
-rw-r--r--tools/perf/util/metricgroup.c404
-rw-r--r--tools/perf/util/metricgroup.h8
-rw-r--r--tools/perf/util/mmap.c21
-rw-r--r--tools/perf/util/ordered-events.c2
-rw-r--r--tools/perf/util/ordered-events.h2
-rw-r--r--tools/perf/util/parse-events.c154
-rw-r--r--tools/perf/util/parse-events.h6
-rw-r--r--tools/perf/util/parse-events.l25
-rw-r--r--tools/perf/util/parse-events.y12
-rw-r--r--tools/perf/util/perf_api_probe.c164
-rw-r--r--tools/perf/util/perf_api_probe.h14
-rw-r--r--tools/perf/util/perf_event_attr_fprintf.c3
-rw-r--r--tools/perf/util/pfm.c281
-rw-r--r--tools/perf/util/pfm.h37
-rw-r--r--tools/perf/util/pmu.c185
-rw-r--r--tools/perf/util/pmu.h19
-rw-r--r--tools/perf/util/probe-event.c49
-rw-r--r--tools/perf/util/probe-finder.c1
-rw-r--r--tools/perf/util/pstack.c2
-rw-r--r--tools/perf/util/python-ext-sources1
-rw-r--r--tools/perf/util/python.c4
-rw-r--r--tools/perf/util/record.c185
-rw-r--r--tools/perf/util/record.h8
-rw-r--r--tools/perf/util/s390-cpumcf-kernel.h1
-rw-r--r--tools/perf/util/s390-cpumsf.c12
-rw-r--r--tools/perf/util/scripting-engines/trace-event-python.c39
-rw-r--r--tools/perf/util/session.c33
-rw-r--r--tools/perf/util/setup.py4
-rw-r--r--tools/perf/util/sideband_evlist.c148
-rw-r--r--tools/perf/util/smt.c10
-rw-r--r--tools/perf/util/sort.c57
-rw-r--r--tools/perf/util/sort.h4
-rw-r--r--tools/perf/util/stat-display.c66
-rw-r--r--tools/perf/util/stat-shadow.c111
-rw-r--r--tools/perf/util/stat.c112
-rw-r--r--tools/perf/util/stat.h8
-rw-r--r--tools/perf/util/symbol-elf.c17
-rw-r--r--tools/perf/util/symbol.c19
-rw-r--r--tools/perf/util/symbol.h2
-rw-r--r--tools/perf/util/symbol_conf.h1
-rw-r--r--tools/perf/util/synthetic-events.c316
-rw-r--r--tools/perf/util/synthetic-events.h1
-rw-r--r--tools/perf/util/syscalltbl.c4
-rw-r--r--tools/perf/util/syscalltbl.h14
-rw-r--r--tools/perf/util/thread-stack.c274
-rw-r--r--tools/perf/util/thread-stack.h11
-rw-r--r--tools/perf/util/thread.c24
-rw-r--r--tools/perf/util/thread.h15
-rw-r--r--tools/perf/util/tool.h2
-rw-r--r--tools/perf/util/top.c2
-rw-r--r--tools/perf/util/top.h3
-rw-r--r--tools/perf/util/trace-event-info.c2
-rw-r--r--tools/perf/util/trace-event-read.c2
-rw-r--r--tools/perf/util/unwind-libunwind-local.c2
-rw-r--r--tools/perf/util/util.c19
-rw-r--r--tools/perf/util/util.h2
-rw-r--r--tools/power/acpi/.gitignore1
-rw-r--r--tools/power/cpupower/.gitignore1
-rw-r--r--tools/power/cpupower/utils/cpupower-info.c2
-rw-r--r--tools/power/cpupower/utils/cpupower-set.c2
-rw-r--r--tools/power/cpupower/utils/idle_monitor/amd_fam14h_idle.c2
-rw-r--r--tools/power/cpupower/utils/idle_monitor/cpuidle_sysfs.c6
-rw-r--r--tools/power/cpupower/utils/idle_monitor/hsw_ext_idle.c2
-rw-r--r--tools/power/cpupower/utils/idle_monitor/nhm_idle.c2
-rw-r--r--tools/power/cpupower/utils/idle_monitor/snb_idle.c2
-rw-r--r--tools/power/pm-graph/Makefile4
-rw-r--r--tools/power/pm-graph/README124
-rwxr-xr-xtools/power/pm-graph/bootgraph.py2
-rw-r--r--tools/power/pm-graph/config/custom-timeline-functions.cfg2
-rw-r--r--tools/power/pm-graph/sleepgraph.865
-rwxr-xr-xtools/power/pm-graph/sleepgraph.py797
-rw-r--r--tools/power/x86/intel-speed-select/.gitignore1
-rw-r--r--tools/power/x86/intel-speed-select/isst-config.c630
-rw-r--r--tools/power/x86/intel-speed-select/isst-core.c119
-rw-r--r--tools/power/x86/intel-speed-select/isst-display.c302
-rw-r--r--tools/power/x86/intel-speed-select/isst.h13
-rwxr-xr-xtools/power/x86/intel_pstate_tracer/intel_pstate_tracer.py1
-rw-r--r--tools/power/x86/turbostat/.gitignore1
-rw-r--r--tools/scripts/Makefile.include1
-rw-r--r--tools/spi/.gitignore1
-rw-r--r--tools/spi/Makefile6
-rw-r--r--tools/spi/spidev_test.c35
-rw-r--r--tools/testing/kunit/.gitattributes1
-rw-r--r--tools/testing/kunit/.gitignore1
-rw-r--r--tools/testing/kunit/configs/broken_on_uml.config41
-rwxr-xr-xtools/testing/kunit/kunit.py303
-rw-r--r--tools/testing/kunit/kunit_config.py41
-rw-r--r--tools/testing/kunit/kunit_kernel.py84
-rw-r--r--tools/testing/kunit/kunit_parser.py51
-rwxr-xr-xtools/testing/kunit/kunit_tool_test.py153
-rw-r--r--tools/testing/kunit/test_data/test_config_printk_time.log31
-rw-r--r--tools/testing/kunit/test_data/test_interrupted_tap_output.log37
-rw-r--r--tools/testing/kunit/test_data/test_kernel_panic_interrupt.log25
-rw-r--r--tools/testing/kunit/test_data/test_multiple_prefixes.log31
-rw-r--r--tools/testing/kunit/test_data/test_output_with_prefix_isolated_correctly.log33
-rw-r--r--tools/testing/kunit/test_data/test_pound_no_prefix.log33
-rw-r--r--tools/testing/kunit/test_data/test_pound_sign.log33
-rw-r--r--tools/testing/nvdimm/Kbuild4
-rw-r--r--tools/testing/nvdimm/test/Kbuild4
-rw-r--r--tools/testing/nvdimm/test/nfit.c2
-rw-r--r--tools/testing/radix-tree/.gitignore1
-rw-r--r--tools/testing/radix-tree/Makefile4
-rw-r--r--tools/testing/radix-tree/iteration_check_2.c87
-rw-r--r--tools/testing/radix-tree/linux.c32
-rw-r--r--tools/testing/radix-tree/linux/slab.h6
-rw-r--r--tools/testing/radix-tree/main.c1
-rw-r--r--tools/testing/radix-tree/test.h1
-rw-r--r--tools/testing/selftests/.gitignore6
-rw-r--r--tools/testing/selftests/Makefile15
-rw-r--r--tools/testing/selftests/android/Makefile2
-rw-r--r--tools/testing/selftests/android/ion/.gitignore1
-rw-r--r--tools/testing/selftests/android/ion/Makefile2
-rw-r--r--tools/testing/selftests/arm64/signal/.gitignore1
-rw-r--r--tools/testing/selftests/arm64/tags/.gitignore1
-rw-r--r--tools/testing/selftests/bpf/.gitignore7
-rw-r--r--tools/testing/selftests/bpf/Makefile69
-rw-r--r--tools/testing/selftests/bpf/README.rst45
-rw-r--r--tools/testing/selftests/bpf/bench.c465
-rw-r--r--tools/testing/selftests/bpf/bench.h81
-rw-r--r--tools/testing/selftests/bpf/benchs/bench_count.c91
-rw-r--r--tools/testing/selftests/bpf/benchs/bench_rename.c195
-rw-r--r--tools/testing/selftests/bpf/benchs/bench_ringbufs.c566
-rw-r--r--tools/testing/selftests/bpf/benchs/bench_trigger.c167
-rwxr-xr-xtools/testing/selftests/bpf/benchs/run_bench_rename.sh9
-rwxr-xr-xtools/testing/selftests/bpf/benchs/run_bench_ringbufs.sh75
-rwxr-xr-xtools/testing/selftests/bpf/benchs/run_bench_trigger.sh9
-rw-r--r--tools/testing/selftests/bpf/bpf_tcp_helpers.h2
-rw-r--r--tools/testing/selftests/bpf/bpf_trace_helpers.h120
-rw-r--r--tools/testing/selftests/bpf/config4
-rw-r--r--tools/testing/selftests/bpf/map_tests/.gitignore1
-rw-r--r--tools/testing/selftests/bpf/network_helpers.c167
-rw-r--r--tools/testing/selftests/bpf/network_helpers.h42
-rw-r--r--tools/testing/selftests/bpf/prog_tests/.gitignore1
-rw-r--r--tools/testing/selftests/bpf/prog_tests/align.c (renamed from tools/testing/selftests/bpf/test_align.c)109
-rw-r--r--tools/testing/selftests/bpf/prog_tests/bpf_iter.c409
-rw-r--r--tools/testing/selftests/bpf/prog_tests/bpf_obj_id.c110
-rw-r--r--tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c39
-rw-r--r--tools/testing/selftests/bpf/prog_tests/btf_dump.c2
-rw-r--r--tools/testing/selftests/bpf/prog_tests/btf_map_in_map.c49
-rw-r--r--tools/testing/selftests/bpf/prog_tests/cgroup_attach_autodetach.c2
-rw-r--r--tools/testing/selftests/bpf/prog_tests/cgroup_attach_multi.c2
-rw-r--r--tools/testing/selftests/bpf/prog_tests/cgroup_attach_override.c2
-rw-r--r--tools/testing/selftests/bpf/prog_tests/cgroup_link.c244
-rw-r--r--tools/testing/selftests/bpf/prog_tests/cgroup_skb_sk_lookup.c95
-rw-r--r--tools/testing/selftests/bpf/prog_tests/cls_redirect.c456
-rw-r--r--tools/testing/selftests/bpf/prog_tests/connect_force_port.c166
-rw-r--r--tools/testing/selftests/bpf/prog_tests/core_reloc.c2
-rw-r--r--tools/testing/selftests/bpf/prog_tests/enable_stats.c45
-rw-r--r--tools/testing/selftests/bpf/prog_tests/fentry_fexit.c12
-rw-r--r--tools/testing/selftests/bpf/prog_tests/fentry_test.c14
-rw-r--r--tools/testing/selftests/bpf/prog_tests/fexit_bpf2bpf.c31
-rw-r--r--tools/testing/selftests/bpf/prog_tests/fexit_test.c69
-rw-r--r--tools/testing/selftests/bpf/prog_tests/flow_dissector.c167
-rw-r--r--tools/testing/selftests/bpf/prog_tests/flow_dissector_load_bytes.c1
-rw-r--r--tools/testing/selftests/bpf/prog_tests/flow_dissector_reattach.c588
-rw-r--r--tools/testing/selftests/bpf/prog_tests/get_stack_raw_tp.c5
-rw-r--r--tools/testing/selftests/bpf/prog_tests/global_data.c1
-rw-r--r--tools/testing/selftests/bpf/prog_tests/global_data_init.c61
-rw-r--r--tools/testing/selftests/bpf/prog_tests/hashmap.c (renamed from tools/testing/selftests/bpf/test_hashmap.c)280
-rw-r--r--tools/testing/selftests/bpf/prog_tests/kfree_skb.c1
-rw-r--r--tools/testing/selftests/bpf/prog_tests/l4lb_all.c1
-rw-r--r--tools/testing/selftests/bpf/prog_tests/link_pinning.c105
-rw-r--r--tools/testing/selftests/bpf/prog_tests/map_lock.c14
-rw-r--r--tools/testing/selftests/bpf/prog_tests/mmap.c81
-rw-r--r--tools/testing/selftests/bpf/prog_tests/modify_return.c65
-rw-r--r--tools/testing/selftests/bpf/prog_tests/ns_current_pid_tgid.c85
-rw-r--r--tools/testing/selftests/bpf/prog_tests/perf_branches.c170
-rw-r--r--tools/testing/selftests/bpf/prog_tests/perf_buffer.c5
-rw-r--r--tools/testing/selftests/bpf/prog_tests/pkt_access.c1
-rw-r--r--tools/testing/selftests/bpf/prog_tests/pkt_md_access.c1
-rw-r--r--tools/testing/selftests/bpf/prog_tests/prog_run_xattr.c1
-rw-r--r--tools/testing/selftests/bpf/prog_tests/queue_stack_map.c1
-rw-r--r--tools/testing/selftests/bpf/prog_tests/ringbuf.c211
-rw-r--r--tools/testing/selftests/bpf/prog_tests/ringbuf_multi.c102
-rw-r--r--tools/testing/selftests/bpf/prog_tests/section_names.c42
-rw-r--r--tools/testing/selftests/bpf/prog_tests/select_reuseport.c73
-rw-r--r--tools/testing/selftests/bpf/prog_tests/signal_pending.c1
-rw-r--r--tools/testing/selftests/bpf/prog_tests/sk_assign.c328
-rw-r--r--tools/testing/selftests/bpf/prog_tests/skb_ctx.c2
-rw-r--r--tools/testing/selftests/bpf/prog_tests/skb_helpers.c30
-rw-r--r--tools/testing/selftests/bpf/prog_tests/sockmap_basic.c35
-rw-r--r--tools/testing/selftests/bpf/prog_tests/sockmap_ktls.c124
-rw-r--r--tools/testing/selftests/bpf/prog_tests/sockmap_listen.c1635
-rw-r--r--tools/testing/selftests/bpf/prog_tests/spinlock.c14
-rw-r--r--tools/testing/selftests/bpf/prog_tests/tcp_rtt.c110
-rw-r--r--tools/testing/selftests/bpf/prog_tests/test_lsm.c86
-rw-r--r--tools/testing/selftests/bpf/prog_tests/test_overhead.c14
-rw-r--r--tools/testing/selftests/bpf/prog_tests/trampoline_count.c25
-rw-r--r--tools/testing/selftests/bpf/prog_tests/vmlinux.c43
-rw-r--r--tools/testing/selftests/bpf/prog_tests/xdp.c1
-rw-r--r--tools/testing/selftests/bpf/prog_tests/xdp_adjust_tail.c124
-rw-r--r--tools/testing/selftests/bpf/prog_tests/xdp_attach.c90
-rw-r--r--tools/testing/selftests/bpf/prog_tests/xdp_bpf2bpf.c70
-rw-r--r--tools/testing/selftests/bpf/prog_tests/xdp_devmap_attach.c97
-rw-r--r--tools/testing/selftests/bpf/prog_tests/xdp_info.c68
-rw-r--r--tools/testing/selftests/bpf/prog_tests/xdp_noinline.c1
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_dctcp.c18
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_flow.c20
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_iter_bpf_map.c44
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_iter_ipv6_route.c78
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_iter_netlink.c82
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_iter_task.c41
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_iter_task_file.c44
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_iter_test_kern1.c4
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_iter_test_kern2.c4
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_iter_test_kern3.c33
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_iter_test_kern4.c67
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_iter_test_kern_common.h38
-rw-r--r--tools/testing/selftests/bpf/progs/btf_dump_test_case_syntax.c2
-rw-r--r--tools/testing/selftests/bpf/progs/cgroup_skb_sk_lookup_kern.c97
-rw-r--r--tools/testing/selftests/bpf/progs/connect4_prog.c107
-rw-r--r--tools/testing/selftests/bpf/progs/connect_force_port4.c83
-rw-r--r--tools/testing/selftests/bpf/progs/connect_force_port6.c94
-rw-r--r--tools/testing/selftests/bpf/progs/core_reloc_types.h2
-rw-r--r--tools/testing/selftests/bpf/progs/fentry_test.c2
-rw-r--r--tools/testing/selftests/bpf/progs/fexit_bpf2bpf.c2
-rw-r--r--tools/testing/selftests/bpf/progs/fexit_bpf2bpf_simple.c2
-rw-r--r--tools/testing/selftests/bpf/progs/fexit_test.c2
-rw-r--r--tools/testing/selftests/bpf/progs/freplace_connect4.c18
-rw-r--r--tools/testing/selftests/bpf/progs/kfree_skb.c2
-rw-r--r--tools/testing/selftests/bpf/progs/lsm.c48
-rw-r--r--tools/testing/selftests/bpf/progs/modify_return.c49
-rw-r--r--tools/testing/selftests/bpf/progs/perfbuf_bench.c33
-rw-r--r--tools/testing/selftests/bpf/progs/ringbuf_bench.c60
-rw-r--r--tools/testing/selftests/bpf/progs/sockmap_parse_prog.c1
-rw-r--r--tools/testing/selftests/bpf/progs/test_attach_probe.c3
-rw-r--r--tools/testing/selftests/bpf/progs/test_btf_haskv.c18
-rw-r--r--tools/testing/selftests/bpf/progs/test_btf_map_in_map.c76
-rw-r--r--tools/testing/selftests/bpf/progs/test_btf_newkv.c18
-rw-r--r--tools/testing/selftests/bpf/progs/test_btf_nokv.c18
-rw-r--r--tools/testing/selftests/bpf/progs/test_cgroup_link.c24
-rw-r--r--tools/testing/selftests/bpf/progs/test_cls_redirect.c1061
-rw-r--r--tools/testing/selftests/bpf/progs/test_cls_redirect.h54
-rw-r--r--tools/testing/selftests/bpf/progs/test_enable_stats.c18
-rw-r--r--tools/testing/selftests/bpf/progs/test_get_stack_rawtp_err.c26
-rw-r--r--tools/testing/selftests/bpf/progs/test_global_data.c2
-rw-r--r--tools/testing/selftests/bpf/progs/test_link_pinning.c25
-rw-r--r--tools/testing/selftests/bpf/progs/test_mmap.c8
-rw-r--r--tools/testing/selftests/bpf/progs/test_ns_current_pid_tgid.c37
-rw-r--r--tools/testing/selftests/bpf/progs/test_obj_id.c14
-rw-r--r--tools/testing/selftests/bpf/progs/test_overhead.c15
-rw-r--r--tools/testing/selftests/bpf/progs/test_perf_branches.c50
-rw-r--r--tools/testing/selftests/bpf/progs/test_perf_buffer.c2
-rw-r--r--tools/testing/selftests/bpf/progs/test_probe_user.c1
-rw-r--r--tools/testing/selftests/bpf/progs/test_ringbuf.c78
-rw-r--r--tools/testing/selftests/bpf/progs/test_ringbuf_multi.c77
-rw-r--r--tools/testing/selftests/bpf/progs/test_sk_assign.c186
-rw-r--r--tools/testing/selftests/bpf/progs/test_sk_lookup_kern.c1
-rw-r--r--tools/testing/selftests/bpf/progs/test_skb_ctx.c2
-rw-r--r--tools/testing/selftests/bpf/progs/test_skb_helpers.c28
-rw-r--r--tools/testing/selftests/bpf/progs/test_skmsg_load_helpers.c47
-rw-r--r--tools/testing/selftests/bpf/progs/test_sockmap_kern.h (renamed from tools/testing/selftests/bpf/test_sockmap_kern.h)204
-rw-r--r--tools/testing/selftests/bpf/progs/test_sockmap_listen.c98
-rw-r--r--tools/testing/selftests/bpf/progs/test_sysctl_prog.c2
-rw-r--r--tools/testing/selftests/bpf/progs/test_trampoline_count.c3
-rw-r--r--tools/testing/selftests/bpf/progs/test_vmlinux.c84
-rw-r--r--tools/testing/selftests/bpf/progs/test_xdp_adjust_tail_grow.c33
-rw-r--r--tools/testing/selftests/bpf/progs/test_xdp_adjust_tail_shrink.c (renamed from tools/testing/selftests/bpf/progs/test_adjust_tail.c)12
-rw-r--r--tools/testing/selftests/bpf/progs/test_xdp_bpf2bpf.c30
-rw-r--r--tools/testing/selftests/bpf/progs/test_xdp_devmap_helpers.c22
-rw-r--r--tools/testing/selftests/bpf/progs/test_xdp_with_devmap_helpers.c44
-rw-r--r--tools/testing/selftests/bpf/progs/trigger_bench.c47
-rw-r--r--tools/testing/selftests/bpf/test_bpftool.py178
-rwxr-xr-xtools/testing/selftests/bpf/test_bpftool.sh5
-rw-r--r--tools/testing/selftests/bpf/test_btf.c2
-rw-r--r--tools/testing/selftests/bpf/test_current_pid_tgid_new_ns.c159
-rw-r--r--tools/testing/selftests/bpf/test_maps.c58
-rw-r--r--tools/testing/selftests/bpf/test_progs.c206
-rw-r--r--tools/testing/selftests/bpf/test_progs.h38
-rw-r--r--tools/testing/selftests/bpf/test_sock_addr.c38
-rw-r--r--tools/testing/selftests/bpf/test_sockmap.c1060
-rw-r--r--tools/testing/selftests/bpf/test_verifier.c63
-rw-r--r--tools/testing/selftests/bpf/testing_helpers.c66
-rw-r--r--tools/testing/selftests/bpf/testing_helpers.h5
-rw-r--r--tools/testing/selftests/bpf/trace_helpers.c23
-rw-r--r--tools/testing/selftests/bpf/trace_helpers.h1
-rw-r--r--tools/testing/selftests/bpf/verifier/.gitignore1
-rw-r--r--tools/testing/selftests/bpf/verifier/and.c4
-rw-r--r--tools/testing/selftests/bpf/verifier/array_access.c4
-rw-r--r--tools/testing/selftests/bpf/verifier/bounds.c105
-rw-r--r--tools/testing/selftests/bpf/verifier/bpf_get_stack.c8
-rw-r--r--tools/testing/selftests/bpf/verifier/calls.c18
-rw-r--r--tools/testing/selftests/bpf/verifier/const_or.c8
-rw-r--r--tools/testing/selftests/bpf/verifier/ctx.c105
-rw-r--r--tools/testing/selftests/bpf/verifier/ctx_skb.c47
-rw-r--r--tools/testing/selftests/bpf/verifier/dead_code.c10
-rw-r--r--tools/testing/selftests/bpf/verifier/direct_value_access.c4
-rw-r--r--tools/testing/selftests/bpf/verifier/event_output.c24
-rw-r--r--tools/testing/selftests/bpf/verifier/helper_access_var_len.c46
-rw-r--r--tools/testing/selftests/bpf/verifier/helper_value_access.c42
-rw-r--r--tools/testing/selftests/bpf/verifier/precise.c8
-rw-r--r--tools/testing/selftests/bpf/verifier/prevent_map_lookup.c30
-rw-r--r--tools/testing/selftests/bpf/verifier/ref_tracking.c33
-rw-r--r--tools/testing/selftests/bpf/verifier/sock.c117
-rw-r--r--tools/testing/selftests/bpf/verifier/stack_ptr.c40
-rw-r--r--tools/testing/selftests/bpf/verifier/value_illegal_alu.c1
-rw-r--r--tools/testing/selftests/bpf/verifier/value_or_null.c19
-rw-r--r--tools/testing/selftests/bpf/verifier/value_ptr_arith.c8
-rw-r--r--tools/testing/selftests/breakpoints/.gitignore1
-rw-r--r--tools/testing/selftests/capabilities/.gitignore1
-rw-r--r--tools/testing/selftests/cgroup/.gitignore1
-rw-r--r--tools/testing/selftests/cgroup/Makefile6
-rw-r--r--tools/testing/selftests/cgroup/cgroup_util.c126
-rw-r--r--tools/testing/selftests/cgroup/cgroup_util.h4
-rw-r--r--tools/testing/selftests/cgroup/test_core.c177
-rw-r--r--tools/testing/selftests/clone3/.gitignore1
-rw-r--r--tools/testing/selftests/clone3/clone3_selftests.h19
-rw-r--r--tools/testing/selftests/dmabuf-heaps/dmabuf-heap.c1
-rw-r--r--tools/testing/selftests/drivers/.gitignore1
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/blackhole_routes.sh5
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/devlink_trap_acl_drops.sh151
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/devlink_trap_control.sh688
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/devlink_trap_l2_drops.sh47
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/devlink_trap_l3_drops.sh68
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/devlink_trap_l3_exceptions.sh20
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/devlink_trap_policer.sh384
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/devlink_trap_tunnel_ipip.sh6
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/devlink_trap_tunnel_vxlan.sh11
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/extack.sh45
-rw-r--r--tools/testing/selftests/drivers/net/mlxsw/mlxsw_lib.sh13
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/qos_defprio.sh18
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/qos_dscp_router.sh30
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/qos_mc_aware.sh2
-rw-r--r--tools/testing/selftests/drivers/net/mlxsw/router_scale.sh53
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/rtnetlink.sh68
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/sch_ets.sh14
-rw-r--r--tools/testing/selftests/drivers/net/mlxsw/sch_red_core.sh533
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/sch_red_ets.sh94
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/sch_red_prio.sh5
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/sch_red_root.sh68
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/sharedbuffer.sh222
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/sharedbuffer_configuration.py416
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/spectrum-2/resource_scale.sh5
-rw-r--r--tools/testing/selftests/drivers/net/mlxsw/spectrum-2/tc_flower_scale.sh4
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/tc_action_hw_stats.sh130
-rw-r--r--tools/testing/selftests/drivers/net/mlxsw/tc_flower_scale.sh31
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/tc_restrictions.sh318
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/vxlan.sh229
-rwxr-xr-xtools/testing/selftests/drivers/net/netdevsim/devlink.sh43
-rwxr-xr-xtools/testing/selftests/drivers/net/netdevsim/devlink_trap.sh125
-rw-r--r--tools/testing/selftests/efivarfs/.gitignore1
-rw-r--r--tools/testing/selftests/exec/.gitignore2
-rw-r--r--tools/testing/selftests/exec/Makefile3
-rwxr-xr-xtools/testing/selftests/exec/binfmt_script171
-rw-r--r--tools/testing/selftests/exec/execveat.c8
-rw-r--r--tools/testing/selftests/filesystems/.gitignore1
-rw-r--r--tools/testing/selftests/filesystems/binderfs/.gitignore1
-rw-r--r--tools/testing/selftests/filesystems/binderfs/Makefile4
-rw-r--r--tools/testing/selftests/filesystems/binderfs/binderfs_test.c498
-rw-r--r--tools/testing/selftests/filesystems/epoll/.gitignore1
-rw-r--r--tools/testing/selftests/filesystems/epoll/epoll_wakeup_test.c213
-rw-r--r--tools/testing/selftests/firmware/Makefile9
-rwxr-xr-xtools/testing/selftests/firmware/fw_filesystem.sh23
-rw-r--r--tools/testing/selftests/firmware/fw_namespace.c151
-rwxr-xr-xtools/testing/selftests/firmware/fw_run_tests.sh4
-rw-r--r--tools/testing/selftests/ftrace/.gitignore1
-rw-r--r--tools/testing/selftests/ftrace/config1
-rwxr-xr-xtools/testing/selftests/ftrace/ftracetest32
-rw-r--r--tools/testing/selftests/ftrace/test.d/event/event-no-pid.tc125
-rw-r--r--tools/testing/selftests/ftrace/test.d/ftrace/fgraph-filter-stack.tc5
-rw-r--r--tools/testing/selftests/ftrace/test.d/ftrace/fgraph-filter.tc2
-rw-r--r--tools/testing/selftests/ftrace/test.d/ftrace/func-filter-glob.tc2
-rw-r--r--tools/testing/selftests/ftrace/test.d/ftrace/func-filter-notrace-pid.tc105
-rw-r--r--tools/testing/selftests/ftrace/test.d/ftrace/func-filter-pid.tc5
-rw-r--r--tools/testing/selftests/ftrace/test.d/ftrace/func-filter-stacktrace.tc2
-rw-r--r--tools/testing/selftests/ftrace/test.d/ftrace/func_event_triggers.tc5
-rw-r--r--tools/testing/selftests/ftrace/test.d/ftrace/func_mod_trace.tc2
-rw-r--r--tools/testing/selftests/ftrace/test.d/ftrace/func_profiler.tc5
-rw-r--r--tools/testing/selftests/ftrace/test.d/ftrace/func_set_ftrace_file.tc5
-rw-r--r--tools/testing/selftests/ftrace/test.d/ftrace/func_stack_tracer.tc2
-rw-r--r--tools/testing/selftests/ftrace/test.d/ftrace/func_traceonoff_triggers.tc7
-rw-r--r--tools/testing/selftests/ftrace/test.d/ftrace/tracing-error-log.tc2
-rw-r--r--tools/testing/selftests/ftrace/test.d/functions14
-rw-r--r--tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_type.tc2
-rw-r--r--tools/testing/selftests/ftrace/test.d/kprobe/kprobe_ftrace.tc2
-rw-r--r--tools/testing/selftests/ftrace/test.d/kprobe/kprobe_syntax_errors.tc4
-rw-r--r--tools/testing/selftests/ftrace/test.d/preemptirq/irqsoff_tracer.tc9
-rw-r--r--tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-field-variable-support.tc5
-rw-r--r--tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-inter-event-combined-hist.tc5
-rw-r--r--tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-multi-actions-accept.tc5
-rw-r--r--tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-onmatch-action-hist.tc5
-rw-r--r--tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-onmatch-onmax-action-hist.tc5
-rw-r--r--tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-onmax-action-hist.tc5
-rw-r--r--tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-snapshot-action-hist.tc5
-rw-r--r--tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-trace-action-hist.tc5
-rw-r--r--tools/testing/selftests/ftrace/test.d/trigger/trigger-multihist.tc2
-rw-r--r--tools/testing/selftests/futex/functional/.gitignore1
-rwxr-xr-xtools/testing/selftests/gen_kselftest_tar.sh5
-rw-r--r--tools/testing/selftests/gpio/.gitignore1
-rw-r--r--tools/testing/selftests/gpio/Makefile12
-rw-r--r--tools/testing/selftests/ia64/.gitignore1
-rw-r--r--tools/testing/selftests/intel_pstate/.gitignore1
-rw-r--r--tools/testing/selftests/intel_pstate/Makefile2
-rw-r--r--tools/testing/selftests/ipc/.gitignore1
-rw-r--r--tools/testing/selftests/ipc/msgque.c2
-rw-r--r--tools/testing/selftests/ir/.gitignore1
-rw-r--r--tools/testing/selftests/kcmp/.gitignore1
-rwxr-xr-xtools/testing/selftests/kmod/kmod.sh43
-rw-r--r--tools/testing/selftests/kselftest/runner.sh2
-rwxr-xr-xtools/testing/selftests/kselftest_deps.sh272
-rw-r--r--tools/testing/selftests/kselftest_harness.h376
-rw-r--r--tools/testing/selftests/kvm/.gitignore9
-rw-r--r--tools/testing/selftests/kvm/Makefile45
-rw-r--r--tools/testing/selftests/kvm/clear_dirty_log_test.c4
-rw-r--r--tools/testing/selftests/kvm/demand_paging_test.c661
-rw-r--r--tools/testing/selftests/kvm/dirty_log_test.c107
-rw-r--r--tools/testing/selftests/kvm/include/evmcs.h6
-rw-r--r--tools/testing/selftests/kvm/include/kvm_util.h158
-rw-r--r--tools/testing/selftests/kvm/include/test_util.h28
-rw-r--r--tools/testing/selftests/kvm/include/x86_64/processor.h11
-rw-r--r--tools/testing/selftests/kvm/include/x86_64/svm_util.h10
-rw-r--r--tools/testing/selftests/kvm/include/x86_64/vmx.h27
-rw-r--r--tools/testing/selftests/kvm/kvm_create_max_vcpus.c8
-rw-r--r--tools/testing/selftests/kvm/lib/aarch64/processor.c41
-rw-r--r--tools/testing/selftests/kvm/lib/aarch64/ucall.c2
-rw-r--r--tools/testing/selftests/kvm/lib/assert.c6
-rw-r--r--tools/testing/selftests/kvm/lib/io.c12
-rw-r--r--tools/testing/selftests/kvm/lib/kvm_util.c331
-rw-r--r--tools/testing/selftests/kvm/lib/kvm_util_internal.h67
-rw-r--r--tools/testing/selftests/kvm/lib/s390x/processor.c83
-rw-r--r--tools/testing/selftests/kvm/lib/test_util.c93
-rw-r--r--tools/testing/selftests/kvm/lib/x86_64/processor.c201
-rw-r--r--tools/testing/selftests/kvm/lib/x86_64/svm.c2
-rw-r--r--tools/testing/selftests/kvm/lib/x86_64/vmx.c7
-rw-r--r--tools/testing/selftests/kvm/s390x/memop.c2
-rw-r--r--tools/testing/selftests/kvm/s390x/resets.c138
-rw-r--r--tools/testing/selftests/kvm/s390x/sync_regs_test.c13
-rw-r--r--tools/testing/selftests/kvm/set_memory_region_test.c408
-rw-r--r--tools/testing/selftests/kvm/steal_time.c352
-rw-r--r--tools/testing/selftests/kvm/x86_64/cr4_cpuid_sync_test.c6
-rw-r--r--tools/testing/selftests/kvm/x86_64/debug_regs.c202
-rw-r--r--tools/testing/selftests/kvm/x86_64/evmcs_test.c35
-rw-r--r--tools/testing/selftests/kvm/x86_64/hyperv_cpuid.c109
-rw-r--r--tools/testing/selftests/kvm/x86_64/mmio_warning_test.c6
-rw-r--r--tools/testing/selftests/kvm/x86_64/platform_info_test.c6
-rw-r--r--tools/testing/selftests/kvm/x86_64/smm_test.c21
-rw-r--r--tools/testing/selftests/kvm/x86_64/state_test.c72
-rw-r--r--tools/testing/selftests/kvm/x86_64/svm_vmcall_test.c6
-rw-r--r--tools/testing/selftests/kvm/x86_64/sync_regs_test.c4
-rw-r--r--tools/testing/selftests/kvm/x86_64/vmx_close_while_nested_test.c4
-rw-r--r--tools/testing/selftests/kvm/x86_64/vmx_dirty_log_test.c17
-rw-r--r--tools/testing/selftests/kvm/x86_64/vmx_preemption_timer_test.c255
-rw-r--r--tools/testing/selftests/kvm/x86_64/vmx_set_nested_state_test.c4
-rw-r--r--tools/testing/selftests/kvm/x86_64/vmx_tsc_adjust_test.c8
-rw-r--r--tools/testing/selftests/kvm/x86_64/xss_msr_test.c2
-rw-r--r--tools/testing/selftests/lib.mk3
-rw-r--r--tools/testing/selftests/lib/config1
-rwxr-xr-xtools/testing/selftests/lkdtm/run.sh22
-rw-r--r--tools/testing/selftests/media_tests/.gitignore1
-rw-r--r--tools/testing/selftests/membarrier/.gitignore1
-rw-r--r--tools/testing/selftests/memfd/.gitignore1
-rw-r--r--tools/testing/selftests/memfd/Makefile22
-rw-r--r--tools/testing/selftests/mount/.gitignore1
-rw-r--r--tools/testing/selftests/mqueue/.gitignore1
-rw-r--r--tools/testing/selftests/net/.gitignore6
-rw-r--r--tools/testing/selftests/net/Makefile5
-rw-r--r--tools/testing/selftests/net/config2
-rwxr-xr-xtools/testing/selftests/net/fib_nexthops.sh593
-rwxr-xr-xtools/testing/selftests/net/fib_tests.sh10
-rw-r--r--tools/testing/selftests/net/forwarding/.gitignore1
-rw-r--r--tools/testing/selftests/net/forwarding/devlink_lib.sh104
-rw-r--r--tools/testing/selftests/net/forwarding/lib.sh88
-rw-r--r--tools/testing/selftests/net/forwarding/mirror_lib.sh6
-rwxr-xr-xtools/testing/selftests/net/forwarding/pedit_dsfield.sh309
-rwxr-xr-xtools/testing/selftests/net/forwarding/sch_ets.sh9
-rw-r--r--tools/testing/selftests/net/forwarding/sch_ets_tests.sh10
-rwxr-xr-xtools/testing/selftests/net/forwarding/skbedit_priority.sh168
-rwxr-xr-xtools/testing/selftests/net/forwarding/tc_actions.sh26
-rw-r--r--tools/testing/selftests/net/forwarding/tc_common.sh32
-rw-r--r--tools/testing/selftests/net/hwtstamp_config.c (renamed from tools/testing/selftests/networking/timestamping/hwtstamp_config.c)0
-rw-r--r--tools/testing/selftests/net/ip_defrag.c8
-rw-r--r--tools/testing/selftests/net/mptcp/.gitignore2
-rw-r--r--tools/testing/selftests/net/mptcp/Makefile7
-rw-r--r--tools/testing/selftests/net/mptcp/mptcp_connect.c71
-rwxr-xr-xtools/testing/selftests/net/mptcp/mptcp_connect.sh24
-rwxr-xr-xtools/testing/selftests/net/mptcp/mptcp_join.sh357
-rwxr-xr-xtools/testing/selftests/net/mptcp/pm_netlink.sh130
-rw-r--r--tools/testing/selftests/net/mptcp/pm_nl_ctl.c616
-rwxr-xr-xtools/testing/selftests/net/pmtu.sh122
-rw-r--r--tools/testing/selftests/net/reuseaddr_ports_exhausted.c162
-rwxr-xr-xtools/testing/selftests/net/reuseaddr_ports_exhausted.sh35
-rw-r--r--tools/testing/selftests/net/rxtimestamp.c (renamed from tools/testing/selftests/networking/timestamping/rxtimestamp.c)0
-rw-r--r--tools/testing/selftests/net/tcp_mmap.c7
-rw-r--r--tools/testing/selftests/net/timestamping.c (renamed from tools/testing/selftests/networking/timestamping/timestamping.c)0
-rw-r--r--tools/testing/selftests/net/tls.c93
-rw-r--r--tools/testing/selftests/net/txtimestamp.c (renamed from tools/testing/selftests/networking/timestamping/txtimestamp.c)179
-rwxr-xr-xtools/testing/selftests/net/txtimestamp.sh (renamed from tools/testing/selftests/networking/timestamping/txtimestamp.sh)33
-rwxr-xr-xtools/testing/selftests/net/vrf-xfrm-tests.sh436
-rw-r--r--tools/testing/selftests/networking/timestamping/.gitignore4
-rw-r--r--tools/testing/selftests/networking/timestamping/Makefile11
-rw-r--r--tools/testing/selftests/networking/timestamping/config2
-rw-r--r--tools/testing/selftests/nsfs/.gitignore1
-rw-r--r--tools/testing/selftests/nsfs/pidns.c2
-rwxr-xr-xtools/testing/selftests/ntb/ntb_test.sh2
-rw-r--r--tools/testing/selftests/openat2/.gitignore1
-rw-r--r--tools/testing/selftests/pid_namespace/.gitignore1
-rw-r--r--tools/testing/selftests/pid_namespace/Makefile8
-rw-r--r--tools/testing/selftests/pid_namespace/config2
-rw-r--r--tools/testing/selftests/pid_namespace/regression_enomem.c45
-rw-r--r--tools/testing/selftests/pidfd/.gitignore2
-rw-r--r--tools/testing/selftests/pidfd/Makefile3
-rw-r--r--tools/testing/selftests/pidfd/config6
-rw-r--r--tools/testing/selftests/pidfd/pidfd.h2
-rw-r--r--tools/testing/selftests/pidfd/pidfd_setns_test.c473
-rw-r--r--tools/testing/selftests/powerpc/Makefile1
-rw-r--r--tools/testing/selftests/powerpc/alignment/.gitignore1
-rw-r--r--tools/testing/selftests/powerpc/benchmarks/.gitignore1
-rw-r--r--tools/testing/selftests/powerpc/benchmarks/Makefile2
-rw-r--r--tools/testing/selftests/powerpc/benchmarks/settings1
-rw-r--r--tools/testing/selftests/powerpc/cache_shape/.gitignore1
-rw-r--r--tools/testing/selftests/powerpc/copyloops/.gitignore1
-rw-r--r--tools/testing/selftests/powerpc/dscr/.gitignore1
-rw-r--r--tools/testing/selftests/powerpc/dscr/Makefile2
-rw-r--r--tools/testing/selftests/powerpc/dscr/settings1
-rwxr-xr-xtools/testing/selftests/powerpc/eeh/eeh-basic.sh5
-rw-r--r--tools/testing/selftests/powerpc/math/.gitignore1
-rw-r--r--tools/testing/selftests/powerpc/mm/.gitignore2
-rw-r--r--tools/testing/selftests/powerpc/nx-gzip/99-nx-gzip.rules1
-rw-r--r--tools/testing/selftests/powerpc/nx-gzip/Makefile8
-rw-r--r--tools/testing/selftests/powerpc/nx-gzip/README45
-rw-r--r--tools/testing/selftests/powerpc/nx-gzip/gunz_test.c1028
-rw-r--r--tools/testing/selftests/powerpc/nx-gzip/gzfht_test.c433
-rw-r--r--tools/testing/selftests/powerpc/nx-gzip/gzip_vas.c316
-rw-r--r--tools/testing/selftests/powerpc/nx-gzip/include/copy-paste.h56
-rw-r--r--tools/testing/selftests/powerpc/nx-gzip/include/crb.h155
-rw-r--r--tools/testing/selftests/powerpc/nx-gzip/include/nx.h38
-rw-r--r--tools/testing/selftests/powerpc/nx-gzip/include/nx_dbg.h95
-rw-r--r--tools/testing/selftests/powerpc/nx-gzip/include/nxu.h650
l---------tools/testing/selftests/powerpc/nx-gzip/include/vas-api.h1
-rwxr-xr-xtools/testing/selftests/powerpc/nx-gzip/nx-gzip-test.sh46
-rw-r--r--tools/testing/selftests/powerpc/pmu/.gitignore2
-rw-r--r--tools/testing/selftests/powerpc/pmu/Makefile8
-rw-r--r--tools/testing/selftests/powerpc/pmu/count_stcx_fail.c161
-rw-r--r--tools/testing/selftests/powerpc/pmu/ebb/.gitignore1
-rw-r--r--tools/testing/selftests/powerpc/pmu/ebb/Makefile1
-rw-r--r--tools/testing/selftests/powerpc/pmu/ebb/trace.h4
-rw-r--r--tools/testing/selftests/powerpc/pmu/loop.S35
-rw-r--r--tools/testing/selftests/powerpc/primitives/.gitignore1
-rw-r--r--tools/testing/selftests/powerpc/ptrace/.gitignore1
-rw-r--r--tools/testing/selftests/powerpc/security/.gitignore1
-rw-r--r--tools/testing/selftests/powerpc/signal/.gitignore2
-rw-r--r--tools/testing/selftests/powerpc/signal/Makefile4
-rw-r--r--tools/testing/selftests/powerpc/signal/settings1
-rw-r--r--tools/testing/selftests/powerpc/signal/sig_sc_double_restart.c174
-rw-r--r--tools/testing/selftests/powerpc/signal/sigreturn_vdso.c127
-rw-r--r--tools/testing/selftests/powerpc/stringloops/.gitignore1
-rw-r--r--tools/testing/selftests/powerpc/switch_endian/.gitignore1
-rw-r--r--tools/testing/selftests/powerpc/syscalls/.gitignore1
-rw-r--r--tools/testing/selftests/powerpc/tm/.gitignore2
-rw-r--r--tools/testing/selftests/powerpc/tm/Makefile6
-rw-r--r--tools/testing/selftests/powerpc/tm/settings1
-rw-r--r--tools/testing/selftests/powerpc/tm/tm-signal-context-force-tm.c74
-rw-r--r--tools/testing/selftests/powerpc/tm/tm-signal-pagefault.c284
-rw-r--r--tools/testing/selftests/powerpc/vphn/.gitignore1
-rw-r--r--tools/testing/selftests/prctl/.gitignore1
-rw-r--r--tools/testing/selftests/proc/.gitignore3
-rw-r--r--tools/testing/selftests/proc/Makefile2
-rw-r--r--tools/testing/selftests/proc/proc-fsconfig-hidepid.c50
-rw-r--r--tools/testing/selftests/proc/proc-multiple-procfs.c48
-rw-r--r--tools/testing/selftests/pstore/.gitignore1
-rwxr-xr-xtools/testing/selftests/pstore/pstore_tests2
-rw-r--r--tools/testing/selftests/ptp/.gitignore1
-rw-r--r--tools/testing/selftests/ptp/testptp.c6
-rw-r--r--tools/testing/selftests/ptrace/.gitignore1
-rw-r--r--tools/testing/selftests/ptrace/Makefile4
-rw-r--r--tools/testing/selftests/ptrace/vmaccess.c86
-rw-r--r--tools/testing/selftests/rcutorture/.gitignore1
-rw-r--r--tools/testing/selftests/rcutorture/bin/functions.sh2
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/kcsan-collapse.sh22
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/kvm-find-errors.sh2
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/kvm-recheck-rcu.sh16
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/kvm-recheck.sh26
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh52
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/kvm.sh13
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/CFLIST3
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/CFcommon2
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/RUDE0110
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/RUDE01.boot1
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/TRACE0111
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/TRACE01.boot1
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/TRACE0211
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/TRACE02.boot1
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/TREE1018
-rw-r--r--tools/testing/selftests/rcutorture/formal/srcu-cbmc/.gitignore1
-rw-r--r--tools/testing/selftests/rcutorture/formal/srcu-cbmc/include/linux/.gitignore1
-rw-r--r--tools/testing/selftests/rcutorture/formal/srcu-cbmc/tests/store_buffering/.gitignore1
-rw-r--r--tools/testing/selftests/resctrl/Makefile17
-rw-r--r--tools/testing/selftests/resctrl/README53
-rw-r--r--tools/testing/selftests/resctrl/cache.c272
-rw-r--r--tools/testing/selftests/resctrl/cat_test.c250
-rw-r--r--tools/testing/selftests/resctrl/cqm_test.c176
-rw-r--r--tools/testing/selftests/resctrl/fill_buf.c213
-rw-r--r--tools/testing/selftests/resctrl/mba_test.c171
-rw-r--r--tools/testing/selftests/resctrl/mbm_test.c145
-rw-r--r--tools/testing/selftests/resctrl/resctrl.h107
-rw-r--r--tools/testing/selftests/resctrl/resctrl_tests.c202
-rw-r--r--tools/testing/selftests/resctrl/resctrl_val.c744
-rw-r--r--tools/testing/selftests/resctrl/resctrlfs.c722
-rw-r--r--tools/testing/selftests/rseq/.gitignore1
-rw-r--r--tools/testing/selftests/rtc/.gitignore1
-rw-r--r--tools/testing/selftests/safesetid/.gitignore1
-rw-r--r--tools/testing/selftests/seccomp/.gitignore1
-rw-r--r--tools/testing/selftests/seccomp/Makefile17
-rw-r--r--tools/testing/selftests/seccomp/seccomp_bpf.c98
-rw-r--r--tools/testing/selftests/sigaltstack/.gitignore1
-rw-r--r--tools/testing/selftests/size/.gitignore1
-rw-r--r--tools/testing/selftests/sparc64/drivers/.gitignore1
-rw-r--r--tools/testing/selftests/splice/.gitignore1
-rw-r--r--tools/testing/selftests/sync/.gitignore1
-rw-r--r--tools/testing/selftests/sysctl/config2
-rwxr-xr-xtools/testing/selftests/sysctl/sysctl.sh57
-rw-r--r--tools/testing/selftests/tc-testing/.gitignore1
-rw-r--r--tools/testing/selftests/tc-testing/config6
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/actions/pedit.json25
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/filters/basic.json902
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/filters/tests.json44
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/qdiscs/fq_pie.json21
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/qdiscs/red.json185
-rwxr-xr-xtools/testing/selftests/tc-testing/tdc.py5
-rwxr-xr-xtools/testing/selftests/tc-testing/tdc_batch.py6
-rw-r--r--tools/testing/selftests/timens/.gitignore1
-rw-r--r--tools/testing/selftests/timens/clock_nanosleep.c2
-rw-r--r--tools/testing/selftests/timens/exec.c1
-rw-r--r--tools/testing/selftests/timens/procfs.c1
-rw-r--r--tools/testing/selftests/timens/timens.c3
-rw-r--r--tools/testing/selftests/timens/timens.h13
-rw-r--r--tools/testing/selftests/timens/timer.c6
-rw-r--r--tools/testing/selftests/timens/timerfd.c5
-rw-r--r--tools/testing/selftests/timers/.gitignore1
-rw-r--r--tools/testing/selftests/tmpfs/.gitignore1
-rwxr-xr-xtools/testing/selftests/tpm2/test_smoke.sh10
-rwxr-xr-xtools/testing/selftests/tpm2/test_space.sh8
-rw-r--r--tools/testing/selftests/vDSO/.gitignore3
-rw-r--r--tools/testing/selftests/vDSO/Makefile5
-rw-r--r--tools/testing/selftests/vDSO/parse_vdso.c24
-rw-r--r--tools/testing/selftests/vDSO/parse_vdso.h31
-rw-r--r--tools/testing/selftests/vDSO/vdso_standalone_test_x86.c4
-rw-r--r--tools/testing/selftests/vDSO/vdso_test_getcpu.c54
-rw-r--r--tools/testing/selftests/vDSO/vdso_test_gettimeofday.c (renamed from tools/testing/selftests/vDSO/vdso_test.c)10
-rw-r--r--tools/testing/selftests/vm/.gitignore6
-rw-r--r--tools/testing/selftests/vm/Makefile83
-rw-r--r--tools/testing/selftests/vm/charge_reserved_hugetlb.sh575
-rw-r--r--tools/testing/selftests/vm/config2
-rw-r--r--tools/testing/selftests/vm/gup_benchmark.c15
-rw-r--r--tools/testing/selftests/vm/hmm-tests.c1359
-rw-r--r--tools/testing/selftests/vm/hugetlb_reparenting_test.sh244
-rw-r--r--tools/testing/selftests/vm/khugepaged.c1035
-rw-r--r--tools/testing/selftests/vm/map_hugetlb.c14
-rw-r--r--tools/testing/selftests/vm/mlock2-tests.c233
-rw-r--r--tools/testing/selftests/vm/mremap_dontunmap.c312
-rw-r--r--tools/testing/selftests/vm/pkey-helpers.h225
-rw-r--r--tools/testing/selftests/vm/pkey-powerpc.h133
-rw-r--r--tools/testing/selftests/vm/pkey-x86.h181
-rw-r--r--tools/testing/selftests/vm/protection_keys.c (renamed from tools/testing/selftests/x86/protection_keys.c)696
-rwxr-xr-xtools/testing/selftests/vm/run_vmtests55
-rwxr-xr-xtools/testing/selftests/vm/test_hmm.sh97
-rw-r--r--tools/testing/selftests/vm/userfaultfd.c225
-rw-r--r--tools/testing/selftests/vm/write_hugetlb_memory.sh23
-rw-r--r--tools/testing/selftests/vm/write_to_hugetlbfs.c240
-rw-r--r--tools/testing/selftests/watchdog/.gitignore1
-rwxr-xr-xtools/testing/selftests/wireguard/netns.sh54
-rw-r--r--tools/testing/selftests/wireguard/qemu/.gitignore1
-rw-r--r--tools/testing/selftests/wireguard/qemu/Makefile2
-rw-r--r--tools/testing/selftests/wireguard/qemu/arch/powerpc64le.config1
-rw-r--r--tools/testing/selftests/wireguard/qemu/debug.config2
-rw-r--r--tools/testing/selftests/x86/.gitignore2
-rw-r--r--tools/testing/selftests/x86/Makefile2
-rw-r--r--tools/testing/selftests/x86/pkey-helpers.h219
-rw-r--r--tools/testing/selftests/x86/ptrace_syscall.c8
-rw-r--r--tools/testing/selftests/x86/test_vdso.c5
-rw-r--r--tools/testing/selftests/x86/vdso_restorer.c15
-rw-r--r--tools/testing/vsock/.gitignore1
-rw-r--r--tools/testing/vsock/vsock_test.c77
-rw-r--r--tools/thermal/tmon/.gitignore1
-rw-r--r--tools/thermal/tmon/tmon.c26
-rw-r--r--tools/usb/.gitignore1
-rw-r--r--tools/usb/usbip/.gitignore1
-rw-r--r--tools/virtio/.gitignore1
-rw-r--r--tools/virtio/Makefile30
-rw-r--r--tools/virtio/asm/barrier.h1
-rw-r--r--tools/virtio/generated/autoconf.h0
-rw-r--r--tools/virtio/linux/compiler.h1
-rw-r--r--tools/vm/.gitignore1
-rw-r--r--tools/vm/Makefile2
-rw-r--r--tools/vm/page_owner_sort.c5
1095 files changed, 65382 insertions, 11870 deletions
diff --git a/tools/accounting/.gitignore b/tools/accounting/.gitignore
index 86485203c4ae..c45fb4ed4309 100644
--- a/tools/accounting/.gitignore
+++ b/tools/accounting/.gitignore
@@ -1 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0-only
getdelays
diff --git a/tools/accounting/getdelays.c b/tools/accounting/getdelays.c
index 8cb504d30384..5ef1c15e88ad 100644
--- a/tools/accounting/getdelays.c
+++ b/tools/accounting/getdelays.c
@@ -136,7 +136,7 @@ static int send_cmd(int sd, __u16 nlmsg_type, __u32 nlmsg_pid,
msg.g.version = 0x1;
na = (struct nlattr *) GENLMSG_DATA(&msg);
na->nla_type = nla_type;
- na->nla_len = nla_len + 1 + NLA_HDRLEN;
+ na->nla_len = nla_len + NLA_HDRLEN;
memcpy(NLA_DATA(na), nla_data, nla_len);
msg.n.nlmsg_len += NLMSG_ALIGN(na->nla_len);
diff --git a/tools/arch/sh/include/asm/barrier.h b/tools/arch/sh/include/asm/barrier.h
index bde5221af282..7eaea27cdd67 100644
--- a/tools/arch/sh/include/asm/barrier.h
+++ b/tools/arch/sh/include/asm/barrier.h
@@ -22,7 +22,7 @@
* Historically we have only done this type of barrier for the MMUCR, but
* it's also necessary for the CCR, so we make it generic here instead.
*/
-#if defined(__SH4A__) || defined(__SH5__)
+#if defined(__SH4A__)
#define mb() __asm__ __volatile__ ("synco": : :"memory")
#define rmb() mb()
#define wmb() mb()
diff --git a/tools/arch/x86/include/asm/cpufeatures.h b/tools/arch/x86/include/asm/cpufeatures.h
index f3327cb56edf..db189945e9b0 100644
--- a/tools/arch/x86/include/asm/cpufeatures.h
+++ b/tools/arch/x86/include/asm/cpufeatures.h
@@ -217,7 +217,7 @@
#define X86_FEATURE_IBRS ( 7*32+25) /* Indirect Branch Restricted Speculation */
#define X86_FEATURE_IBPB ( 7*32+26) /* Indirect Branch Prediction Barrier */
#define X86_FEATURE_STIBP ( 7*32+27) /* Single Thread Indirect Branch Predictors */
-#define X86_FEATURE_ZEN ( 7*32+28) /* "" CPU is AMD family 0x17 (Zen) */
+#define X86_FEATURE_ZEN ( 7*32+28) /* "" CPU is AMD family 0x17 or above (Zen) */
#define X86_FEATURE_L1TF_PTEINV ( 7*32+29) /* "" L1TF workaround PTE inversion */
#define X86_FEATURE_IBRS_ENHANCED ( 7*32+30) /* Enhanced IBRS */
#define X86_FEATURE_MSR_IA32_FEAT_CTL ( 7*32+31) /* "" MSR IA32_FEAT_CTL configured */
@@ -285,6 +285,7 @@
#define X86_FEATURE_CQM_MBM_LOCAL (11*32+ 3) /* LLC Local MBM monitoring */
#define X86_FEATURE_FENCE_SWAPGS_USER (11*32+ 4) /* "" LFENCE in user entry SWAPGS path */
#define X86_FEATURE_FENCE_SWAPGS_KERNEL (11*32+ 5) /* "" LFENCE in kernel entry SWAPGS path */
+#define X86_FEATURE_SPLIT_LOCK_DETECT (11*32+ 6) /* #AC for split lock */
/* Intel-defined CPU features, CPUID level 0x00000007:1 (EAX), word 12 */
#define X86_FEATURE_AVX512_BF16 (12*32+ 5) /* AVX512 BFLOAT16 instructions */
@@ -299,6 +300,7 @@
#define X86_FEATURE_AMD_IBRS (13*32+14) /* "" Indirect Branch Restricted Speculation */
#define X86_FEATURE_AMD_STIBP (13*32+15) /* "" Single Thread Indirect Branch Predictors */
#define X86_FEATURE_AMD_STIBP_ALWAYS_ON (13*32+17) /* "" Single Thread Indirect Branch Predictors always-on preferred */
+#define X86_FEATURE_AMD_PPIN (13*32+23) /* Protected Processor Inventory Number */
#define X86_FEATURE_AMD_SSBD (13*32+24) /* "" Speculative Store Bypass Disable */
#define X86_FEATURE_VIRT_SSBD (13*32+25) /* Virtualized Speculative Store Bypass Disable */
#define X86_FEATURE_AMD_SSB_NO (13*32+26) /* "" Speculative Store Bypass is fixed in hardware. */
@@ -367,6 +369,7 @@
#define X86_FEATURE_INTEL_STIBP (18*32+27) /* "" Single Thread Indirect Branch Predictors */
#define X86_FEATURE_FLUSH_L1D (18*32+28) /* Flush L1D cache */
#define X86_FEATURE_ARCH_CAPABILITIES (18*32+29) /* IA32_ARCH_CAPABILITIES MSR (Intel) */
+#define X86_FEATURE_CORE_CAPABILITIES (18*32+30) /* "" IA32_CORE_CAPABILITIES MSR */
#define X86_FEATURE_SPEC_CTRL_SSBD (18*32+31) /* "" Speculative Store Bypass Disable */
/*
diff --git a/tools/arch/x86/include/asm/msr-index.h b/tools/arch/x86/include/asm/msr-index.h
index d5e517d1c3dd..ef452b817f44 100644
--- a/tools/arch/x86/include/asm/msr-index.h
+++ b/tools/arch/x86/include/asm/msr-index.h
@@ -41,6 +41,10 @@
/* Intel MSRs. Some also available on other CPUs */
+#define MSR_TEST_CTRL 0x00000033
+#define MSR_TEST_CTRL_SPLIT_LOCK_DETECT_BIT 29
+#define MSR_TEST_CTRL_SPLIT_LOCK_DETECT BIT(MSR_TEST_CTRL_SPLIT_LOCK_DETECT_BIT)
+
#define MSR_IA32_SPEC_CTRL 0x00000048 /* Speculation Control */
#define SPEC_CTRL_IBRS BIT(0) /* Indirect Branch Restricted Speculation */
#define SPEC_CTRL_STIBP_SHIFT 1 /* Single Thread Indirect Branch Predictor (STIBP) bit */
@@ -70,6 +74,11 @@
*/
#define MSR_IA32_UMWAIT_CONTROL_TIME_MASK (~0x03U)
+/* Abbreviated from Intel SDM name IA32_CORE_CAPABILITIES */
+#define MSR_IA32_CORE_CAPS 0x000000cf
+#define MSR_IA32_CORE_CAPS_SPLIT_LOCK_DETECT_BIT 5
+#define MSR_IA32_CORE_CAPS_SPLIT_LOCK_DETECT BIT(MSR_IA32_CORE_CAPS_SPLIT_LOCK_DETECT_BIT)
+
#define MSR_PKG_CST_CONFIG_CONTROL 0x000000e2
#define NHM_C3_AUTO_DEMOTE (1UL << 25)
#define NHM_C1_AUTO_DEMOTE (1UL << 26)
@@ -292,6 +301,9 @@
#define MSR_PP1_ENERGY_STATUS 0x00000641
#define MSR_PP1_POLICY 0x00000642
+#define MSR_AMD_PKG_ENERGY_STATUS 0xc001029b
+#define MSR_AMD_RAPL_POWER_UNIT 0xc0010299
+
/* Config TDP MSRs */
#define MSR_CONFIG_TDP_NOMINAL 0x00000648
#define MSR_CONFIG_TDP_LEVEL_1 0x00000649
diff --git a/tools/arch/x86/include/asm/orc_types.h b/tools/arch/x86/include/asm/orc_types.h
index 6e060907c163..d25534940bde 100644
--- a/tools/arch/x86/include/asm/orc_types.h
+++ b/tools/arch/x86/include/asm/orc_types.h
@@ -58,8 +58,7 @@
#define ORC_TYPE_CALL 0
#define ORC_TYPE_REGS 1
#define ORC_TYPE_REGS_IRET 2
-#define UNWIND_HINT_TYPE_SAVE 3
-#define UNWIND_HINT_TYPE_RESTORE 4
+#define UNWIND_HINT_TYPE_RET_OFFSET 3
#ifndef __ASSEMBLY__
/*
diff --git a/tools/arch/x86/include/asm/unistd_64.h b/tools/arch/x86/include/asm/unistd_64.h
index cb52a3a8b8fc..4205ed4158bf 100644
--- a/tools/arch/x86/include/asm/unistd_64.h
+++ b/tools/arch/x86/include/asm/unistd_64.h
@@ -1,4 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __NR_userfaultfd
+#define __NR_userfaultfd 282
+#endif
#ifndef __NR_perf_event_open
# define __NR_perf_event_open 298
#endif
diff --git a/tools/arch/x86/include/uapi/asm/kvm.h b/tools/arch/x86/include/uapi/asm/kvm.h
index 3f3f780c8c65..43e24903812c 100644
--- a/tools/arch/x86/include/uapi/asm/kvm.h
+++ b/tools/arch/x86/include/uapi/asm/kvm.h
@@ -400,6 +400,7 @@ struct kvm_sync_regs {
struct kvm_vmx_nested_state_data {
__u8 vmcs12[KVM_STATE_NESTED_VMX_VMCS_SIZE];
__u8 shadow_vmcs12[KVM_STATE_NESTED_VMX_VMCS_SIZE];
+ __u64 preemption_timer_deadline;
};
struct kvm_vmx_nested_state_hdr {
diff --git a/tools/arch/x86/include/uapi/asm/unistd.h b/tools/arch/x86/include/uapi/asm/unistd.h
index 196fdd02b8b1..30d7d04d72d6 100644
--- a/tools/arch/x86/include/uapi/asm/unistd.h
+++ b/tools/arch/x86/include/uapi/asm/unistd.h
@@ -3,7 +3,7 @@
#define _UAPI_ASM_X86_UNISTD_H
/* x32 syscall flag bit */
-#define __X32_SYSCALL_BIT 0x40000000UL
+#define __X32_SYSCALL_BIT 0x40000000
#ifndef __KERNEL__
# ifdef __i386__
diff --git a/tools/arch/x86/lib/x86-opcode-map.txt b/tools/arch/x86/lib/x86-opcode-map.txt
index 53adc1762ec0..ec31f5b60323 100644
--- a/tools/arch/x86/lib/x86-opcode-map.txt
+++ b/tools/arch/x86/lib/x86-opcode-map.txt
@@ -366,7 +366,7 @@ AVXcode: 1
1b: BNDCN Gv,Ev (F2) | BNDMOV Ev,Gv (66) | BNDMK Gv,Ev (F3) | BNDSTX Ev,Gv
1c: Grp20 (1A),(1C)
1d:
-1e:
+1e: Grp21 (1A)
1f: NOP Ev
# 0x0f 0x20-0x2f
20: MOV Rd,Cd
@@ -803,8 +803,8 @@ f0: MOVBE Gy,My | MOVBE Gw,Mw (66) | CRC32 Gd,Eb (F2) | CRC32 Gd,Eb (66&F2)
f1: MOVBE My,Gy | MOVBE Mw,Gw (66) | CRC32 Gd,Ey (F2) | CRC32 Gd,Ew (66&F2)
f2: ANDN Gy,By,Ey (v)
f3: Grp17 (1A)
-f5: BZHI Gy,Ey,By (v) | PEXT Gy,By,Ey (F3),(v) | PDEP Gy,By,Ey (F2),(v)
-f6: ADCX Gy,Ey (66) | ADOX Gy,Ey (F3) | MULX By,Gy,rDX,Ey (F2),(v)
+f5: BZHI Gy,Ey,By (v) | PEXT Gy,By,Ey (F3),(v) | PDEP Gy,By,Ey (F2),(v) | WRUSSD/Q My,Gy (66)
+f6: ADCX Gy,Ey (66) | ADOX Gy,Ey (F3) | MULX By,Gy,rDX,Ey (F2),(v) | WRSSD/Q My,Gy
f7: BEXTR Gy,Ey,By (v) | SHLX Gy,Ey,By (66),(v) | SARX Gy,Ey,By (F3),(v) | SHRX Gy,Ey,By (F2),(v)
f8: MOVDIR64B Gv,Mdqq (66) | ENQCMD Gv,Mdqq (F2) | ENQCMDS Gv,Mdqq (F3)
f9: MOVDIRI My,Gy
@@ -970,7 +970,7 @@ GrpTable: Grp7
2: LGDT Ms | XGETBV (000),(11B) | XSETBV (001),(11B) | VMFUNC (100),(11B) | XEND (101)(11B) | XTEST (110)(11B) | ENCLU (111),(11B)
3: LIDT Ms
4: SMSW Mw/Rv
-5: rdpkru (110),(11B) | wrpkru (111),(11B)
+5: rdpkru (110),(11B) | wrpkru (111),(11B) | SAVEPREVSSP (F3),(010),(11B) | RSTORSSP Mq (F3) | SETSSBSY (F3),(000),(11B)
6: LMSW Ew
7: INVLPG Mb | SWAPGS (o64),(000),(11B) | RDTSCP (001),(11B)
EndTable
@@ -1041,8 +1041,8 @@ GrpTable: Grp15
2: vldmxcsr Md (v1) | WRFSBASE Ry (F3),(11B)
3: vstmxcsr Md (v1) | WRGSBASE Ry (F3),(11B)
4: XSAVE | ptwrite Ey (F3),(11B)
-5: XRSTOR | lfence (11B)
-6: XSAVEOPT | clwb (66) | mfence (11B) | TPAUSE Rd (66),(11B) | UMONITOR Rv (F3),(11B) | UMWAIT Rd (F2),(11B)
+5: XRSTOR | lfence (11B) | INCSSPD/Q Ry (F3),(11B)
+6: XSAVEOPT | clwb (66) | mfence (11B) | TPAUSE Rd (66),(11B) | UMONITOR Rv (F3),(11B) | UMWAIT Rd (F2),(11B) | CLRSSBSY Mq (F3)
7: clflush | clflushopt (66) | sfence (11B)
EndTable
@@ -1077,6 +1077,11 @@ GrpTable: Grp20
0: cldemote Mb
EndTable
+GrpTable: Grp21
+1: RDSSPD/Q Ry (F3),(11B)
+7: ENDBR64 (F3),(010),(11B) | ENDBR32 (F3),(011),(11B)
+EndTable
+
# AMD's Prefetch Group
GrpTable: GrpP
0: PREFETCH
diff --git a/tools/bootconfig/.gitignore b/tools/bootconfig/.gitignore
index e7644dfaa4a7..b77513cae685 100644
--- a/tools/bootconfig/.gitignore
+++ b/tools/bootconfig/.gitignore
@@ -1 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0-only
bootconfig
diff --git a/tools/bootconfig/Makefile b/tools/bootconfig/Makefile
index a6146ac64458..da5975775337 100644
--- a/tools/bootconfig/Makefile
+++ b/tools/bootconfig/Makefile
@@ -1,23 +1,30 @@
# SPDX-License-Identifier: GPL-2.0
# Makefile for bootconfig command
+include ../scripts/Makefile.include
bindir ?= /usr/bin
-HEADER = include/linux/bootconfig.h
-CFLAGS = -Wall -g -I./include
+ifeq ($(srctree),)
+srctree := $(patsubst %/,%,$(dir $(CURDIR)))
+srctree := $(patsubst %/,%,$(dir $(srctree)))
+endif
-PROGS = bootconfig
+LIBSRC = $(srctree)/lib/bootconfig.c $(srctree)/include/linux/bootconfig.h
+CFLAGS = -Wall -g -I$(CURDIR)/include
-all: $(PROGS)
+ALL_TARGETS := bootconfig
+ALL_PROGRAMS := $(patsubst %,$(OUTPUT)%,$(ALL_TARGETS))
-bootconfig: ../../lib/bootconfig.c main.c $(HEADER)
+all: $(ALL_PROGRAMS)
+
+$(OUTPUT)bootconfig: main.c $(LIBSRC)
$(CC) $(filter %.c,$^) $(CFLAGS) -o $@
-install: $(PROGS)
- install bootconfig $(DESTDIR)$(bindir)
+test: $(ALL_PROGRAMS) test-bootconfig.sh
+ ./test-bootconfig.sh $(OUTPUT)
-test: bootconfig
- ./test-bootconfig.sh
+install: $(ALL_PROGRAMS)
+ install $(OUTPUT)bootconfig $(DESTDIR)$(bindir)
clean:
- $(RM) -f *.o bootconfig
+ $(RM) -f $(OUTPUT)*.o $(ALL_PROGRAMS)
diff --git a/tools/bootconfig/main.c b/tools/bootconfig/main.c
index a9b97814d1a9..0efaf45f7367 100644
--- a/tools/bootconfig/main.c
+++ b/tools/bootconfig/main.c
@@ -130,6 +130,7 @@ int load_xbc_from_initrd(int fd, char **buf)
int ret;
u32 size = 0, csum = 0, rcsum;
char magic[BOOTCONFIG_MAGIC_LEN];
+ const char *msg;
ret = fstat(fd, &stat);
if (ret < 0)
@@ -182,10 +183,12 @@ int load_xbc_from_initrd(int fd, char **buf)
return -EINVAL;
}
- ret = xbc_init(*buf);
+ ret = xbc_init(*buf, &msg, NULL);
/* Wrong data */
- if (ret < 0)
+ if (ret < 0) {
+ pr_err("parse error: %s.\n", msg);
return ret;
+ }
return size;
}
@@ -244,11 +247,34 @@ int delete_xbc(const char *path)
return ret;
}
+static void show_xbc_error(const char *data, const char *msg, int pos)
+{
+ int lin = 1, col, i;
+
+ if (pos < 0) {
+ pr_err("Error: %s.\n", msg);
+ return;
+ }
+
+ /* Note that pos starts from 0 but lin and col should start from 1. */
+ col = pos + 1;
+ for (i = 0; i < pos; i++) {
+ if (data[i] == '\n') {
+ lin++;
+ col = pos - i;
+ }
+ }
+ pr_err("Parse Error: %s at %d:%d\n", msg, lin, col);
+
+}
+
int apply_xbc(const char *path, const char *xbc_path)
{
u32 size, csum;
char *buf, *data;
int ret, fd;
+ const char *msg;
+ int pos;
ret = load_xbc_file(xbc_path, &buf);
if (ret < 0) {
@@ -267,11 +293,12 @@ int apply_xbc(const char *path, const char *xbc_path)
*(u32 *)(data + size + 4) = csum;
/* Check the data format */
- ret = xbc_init(buf);
+ ret = xbc_init(buf, &msg, &pos);
if (ret < 0) {
- pr_err("Failed to parse %s: %d\n", xbc_path, ret);
+ show_xbc_error(data, msg, pos);
free(data);
free(buf);
+
return ret;
}
printf("Apply %s to %s\n", xbc_path, path);
@@ -287,6 +314,7 @@ int apply_xbc(const char *path, const char *xbc_path)
ret = delete_xbc(path);
if (ret < 0) {
pr_err("Failed to delete previous boot config: %d\n", ret);
+ free(data);
return ret;
}
@@ -294,24 +322,27 @@ int apply_xbc(const char *path, const char *xbc_path)
fd = open(path, O_RDWR | O_APPEND);
if (fd < 0) {
pr_err("Failed to open %s: %d\n", path, fd);
+ free(data);
return fd;
}
/* TODO: Ensure the @path is initramfs/initrd image */
ret = write(fd, data, size + 8);
if (ret < 0) {
pr_err("Failed to apply a boot config: %d\n", ret);
- return ret;
+ goto out;
}
/* Write a magic word of the bootconfig */
ret = write(fd, BOOTCONFIG_MAGIC, BOOTCONFIG_MAGIC_LEN);
if (ret < 0) {
pr_err("Failed to apply a boot config magic: %d\n", ret);
- return ret;
+ goto out;
}
+ ret = 0;
+out:
close(fd);
free(data);
- return 0;
+ return ret;
}
int usage(void)
diff --git a/tools/bootconfig/test-bootconfig.sh b/tools/bootconfig/test-bootconfig.sh
index 1411f4c3454f..eff16b77d5eb 100755
--- a/tools/bootconfig/test-bootconfig.sh
+++ b/tools/bootconfig/test-bootconfig.sh
@@ -3,9 +3,16 @@
echo "Boot config test script"
-BOOTCONF=./bootconfig
-INITRD=`mktemp initrd-XXXX`
-TEMPCONF=`mktemp temp-XXXX.bconf`
+if [ -d "$1" ]; then
+ TESTDIR=$1
+else
+ TESTDIR=.
+fi
+BOOTCONF=${TESTDIR}/bootconfig
+
+INITRD=`mktemp ${TESTDIR}/initrd-XXXX`
+TEMPCONF=`mktemp ${TESTDIR}/temp-XXXX.bconf`
+OUTFILE=`mktemp ${TESTDIR}/tempout-XXXX`
NG=0
cleanup() {
@@ -65,7 +72,6 @@ new_size=$(stat -c %s $INITRD)
xpass test $new_size -eq $initrd_size
echo "No error messge while applying"
-OUTFILE=`mktemp tempout-XXXX`
dd if=/dev/zero of=$INITRD bs=4096 count=1
printf " \0\0\0 \0\0\0" >> $INITRD
$BOOTCONF -a $TEMPCONF $INITRD > $OUTFILE 2>&1
@@ -118,9 +124,16 @@ for i in samples/good-* ; do
xpass $BOOTCONF -a $i $INITRD
done
+
+echo
+echo "=== Summary ==="
+echo "# of Passed: $(expr $NO - $NG - 1)"
+echo "# of Failed: $NG"
+
echo
if [ $NG -eq 0 ]; then
echo "All tests passed"
else
echo "$NG tests failed"
+ exit 1
fi
diff --git a/tools/bpf/.gitignore b/tools/bpf/.gitignore
index 59024197e71d..cf53342175e7 100644
--- a/tools/bpf/.gitignore
+++ b/tools/bpf/.gitignore
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
FEATURE-DUMP.bpf
feature
bpf_asm
diff --git a/tools/bpf/Makefile b/tools/bpf/Makefile
index f897eeeb0b4f..77472e28c8fd 100644
--- a/tools/bpf/Makefile
+++ b/tools/bpf/Makefile
@@ -64,12 +64,12 @@ $(OUTPUT)%.lex.c: $(srctree)/tools/bpf/%.l
$(QUIET_FLEX)$(LEX) -o $@ $<
$(OUTPUT)%.o: $(srctree)/tools/bpf/%.c
- $(QUIET_CC)$(COMPILE.c) -o $@ $<
+ $(QUIET_CC)$(CC) $(CFLAGS) -c -o $@ $<
$(OUTPUT)%.yacc.o: $(OUTPUT)%.yacc.c
- $(QUIET_CC)$(COMPILE.c) -o $@ $<
+ $(QUIET_CC)$(CC) $(CFLAGS) -c -o $@ $<
$(OUTPUT)%.lex.o: $(OUTPUT)%.lex.c
- $(QUIET_CC)$(COMPILE.c) -o $@ $<
+ $(QUIET_CC)$(CC) $(CFLAGS) -c -o $@ $<
PROGS = $(OUTPUT)bpf_jit_disasm $(OUTPUT)bpf_dbg $(OUTPUT)bpf_asm
diff --git a/tools/bpf/bpf_asm.c b/tools/bpf/bpf_asm.c
index e5f95e3eede3..0063c3c029e7 100644
--- a/tools/bpf/bpf_asm.c
+++ b/tools/bpf/bpf_asm.c
@@ -11,7 +11,7 @@
*
* How to get into it:
*
- * 1) read Documentation/networking/filter.txt
+ * 1) read Documentation/networking/filter.rst
* 2) Run `bpf_asm [-c] <filter-prog file>` to translate into binary
* blob that is loadable with xt_bpf, cls_bpf et al. Note: -c will
* pretty print a C-like construct.
diff --git a/tools/bpf/bpf_dbg.c b/tools/bpf/bpf_dbg.c
index 9d3766e653a9..a0ebcdf59c31 100644
--- a/tools/bpf/bpf_dbg.c
+++ b/tools/bpf/bpf_dbg.c
@@ -13,7 +13,7 @@
* for making a verdict when multiple simple BPF programs are combined
* into one in order to prevent parsing same headers multiple times.
*
- * More on how to debug BPF opcodes see Documentation/networking/filter.txt
+ * More on how to debug BPF opcodes see Documentation/networking/filter.rst
* which is the main document on BPF. Mini howto for getting started:
*
* 1) `./bpf_dbg` to enter the shell (shell cmds denoted with '>'):
diff --git a/tools/bpf/bpftool/.gitignore b/tools/bpf/bpftool/.gitignore
index b13926432b84..26cde83e1ca3 100644
--- a/tools/bpf/bpftool/.gitignore
+++ b/tools/bpf/bpftool/.gitignore
@@ -1,7 +1,10 @@
+# SPDX-License-Identifier: GPL-2.0-only
*.d
+/_bpftool
/bpftool
bpftool*.8
bpf-helpers.*
FEATURE-DUMP.bpftool
feature
libbpf
+profiler.skel.h
diff --git a/tools/bpf/bpftool/Documentation/bpftool-btf.rst b/tools/bpf/bpftool/Documentation/bpftool-btf.rst
index 39615f8e145b..ce3a724f50c1 100644
--- a/tools/bpf/bpftool/Documentation/bpftool-btf.rst
+++ b/tools/bpf/bpftool/Documentation/bpftool-btf.rst
@@ -230,9 +230,14 @@ SEE ALSO
**bpf**\ (2),
**bpf-helpers**\ (7),
**bpftool**\ (8),
- **bpftool-map**\ (8),
- **bpftool-prog**\ (8),
+ **bpftool-btf**\ (8),
**bpftool-cgroup**\ (8),
**bpftool-feature**\ (8),
+ **bpftool-gen**\ (8),
+ **bpftool-iter**\ (8),
+ **bpftool-link**\ (8),
+ **bpftool-map**\ (8),
**bpftool-net**\ (8),
- **bpftool-perf**\ (8)
+ **bpftool-perf**\ (8),
+ **bpftool-prog**\ (8),
+ **bpftool-struct_ops**\ (8)
diff --git a/tools/bpf/bpftool/Documentation/bpftool-cgroup.rst b/tools/bpf/bpftool/Documentation/bpftool-cgroup.rst
index 06a28b07787d..a226aee3574f 100644
--- a/tools/bpf/bpftool/Documentation/bpftool-cgroup.rst
+++ b/tools/bpf/bpftool/Documentation/bpftool-cgroup.rst
@@ -20,7 +20,7 @@ SYNOPSIS
CGROUP COMMANDS
===============
-| **bpftool** **cgroup { show | list }** *CGROUP* [**effective**]
+| **bpftool** **cgroup** { **show** | **list** } *CGROUP* [**effective**]
| **bpftool** **cgroup tree** [*CGROUP_ROOT*] [**effective**]
| **bpftool** **cgroup attach** *CGROUP* *ATTACH_TYPE* *PROG* [*ATTACH_FLAGS*]
| **bpftool** **cgroup detach** *CGROUP* *ATTACH_TYPE* *PROG*
@@ -29,8 +29,8 @@ CGROUP COMMANDS
| *PROG* := { **id** *PROG_ID* | **pinned** *FILE* | **tag** *PROG_TAG* }
| *ATTACH_TYPE* := { **ingress** | **egress** | **sock_create** | **sock_ops** | **device** |
| **bind4** | **bind6** | **post_bind4** | **post_bind6** | **connect4** | **connect6** |
-| **sendmsg4** | **sendmsg6** | **recvmsg4** | **recvmsg6** | **sysctl** |
-| **getsockopt** | **setsockopt** }
+| **getpeername4** | **getpeername6** | **getsockname4** | **getsockname6** | **sendmsg4** |
+| **sendmsg6** | **recvmsg4** | **recvmsg6** | **sysctl** | **getsockopt** | **setsockopt** }
| *ATTACH_FLAGS* := { **multi** | **override** }
DESCRIPTION
@@ -101,7 +101,11 @@ DESCRIPTION
an unconnected udp6 socket (since 5.2);
**sysctl** sysctl access (since 5.2);
**getsockopt** call to getsockopt (since 5.3);
- **setsockopt** call to setsockopt (since 5.3).
+ **setsockopt** call to setsockopt (since 5.3);
+ **getpeername4** call to getpeername(2) for an inet4 socket (since 5.8);
+ **getpeername6** call to getpeername(2) for an inet6 socket (since 5.8);
+ **getsockname4** call to getsockname(2) for an inet4 socket (since 5.8);
+ **getsockname6** call to getsockname(2) for an inet6 socket (since 5.8).
**bpftool cgroup detach** *CGROUP* *ATTACH_TYPE* *PROG*
Detach *PROG* from the cgroup *CGROUP* and attach type
@@ -160,9 +164,13 @@ SEE ALSO
**bpf**\ (2),
**bpf-helpers**\ (7),
**bpftool**\ (8),
- **bpftool-prog**\ (8),
- **bpftool-map**\ (8),
+ **bpftool-btf**\ (8),
**bpftool-feature**\ (8),
+ **bpftool-gen**\ (8),
+ **bpftool-iter**\ (8),
+ **bpftool-link**\ (8),
+ **bpftool-map**\ (8),
**bpftool-net**\ (8),
**bpftool-perf**\ (8),
- **bpftool-btf**\ (8)
+ **bpftool-prog**\ (8),
+ **bpftool-struct_ops**\ (8)
diff --git a/tools/bpf/bpftool/Documentation/bpftool-feature.rst b/tools/bpf/bpftool/Documentation/bpftool-feature.rst
index 4d08f35034a2..8609f06e71de 100644
--- a/tools/bpf/bpftool/Documentation/bpftool-feature.rst
+++ b/tools/bpf/bpftool/Documentation/bpftool-feature.rst
@@ -19,19 +19,24 @@ SYNOPSIS
FEATURE COMMANDS
================
-| **bpftool** **feature probe** [*COMPONENT*] [**macros** [**prefix** *PREFIX*]]
+| **bpftool** **feature probe** [*COMPONENT*] [**full**] [**unprivileged**] [**macros** [**prefix** *PREFIX*]]
| **bpftool** **feature help**
|
| *COMPONENT* := { **kernel** | **dev** *NAME* }
DESCRIPTION
===========
- **bpftool feature probe** [**kernel**] [**macros** [**prefix** *PREFIX*]]
+ **bpftool feature probe** [**kernel**] [**full**] [**macros** [**prefix** *PREFIX*]]
Probe the running kernel and dump a number of eBPF-related
- parameters, such as availability of the **bpf()** system call,
+ parameters, such as availability of the **bpf**\ () system call,
JIT status, eBPF program types availability, eBPF helper
functions availability, and more.
+ By default, bpftool **does not run probes** for
+ **bpf_probe_write_user**\ () and **bpf_trace_printk**\()
+ helpers which print warnings to kernel logs. To enable them
+ and run all probes, the **full** keyword should be used.
+
If the **macros** keyword (but not the **-j** option) is
passed, a subset of the output is dumped as a list of
**#define** macros that are ready to be included in a C
@@ -44,16 +49,22 @@ DESCRIPTION
Keyword **kernel** can be omitted. If no probe target is
specified, probing the kernel is the default behaviour.
- Note that when probed, some eBPF helpers (e.g.
- **bpf_trace_printk**\ () or **bpf_probe_write_user**\ ()) may
- print warnings to kernel logs.
-
- **bpftool feature probe dev** *NAME* [**macros** [**prefix** *PREFIX*]]
+ When the **unprivileged** keyword is used, bpftool will dump
+ only the features available to a user who does not have the
+ **CAP_SYS_ADMIN** capability set. The features available in
+ that case usually represent a small subset of the parameters
+ supported by the system. Unprivileged users MUST use the
+ **unprivileged** keyword: This is to avoid misdetection if
+ bpftool is inadvertently run as non-root, for example. This
+ keyword is unavailable if bpftool was compiled without
+ libcap.
+
+ **bpftool feature probe dev** *NAME* [**full**] [**macros** [**prefix** *PREFIX*]]
Probe network device for supported eBPF features and dump
results to the console.
- The two keywords **macros** and **prefix** have the same
- role as when probing the kernel.
+ The keywords **full**, **macros** and **prefix** have the
+ same role as when probing the kernel.
**bpftool feature help**
Print short help message.
@@ -82,9 +93,13 @@ SEE ALSO
**bpf**\ (2),
**bpf-helpers**\ (7),
**bpftool**\ (8),
- **bpftool-prog**\ (8),
- **bpftool-map**\ (8),
+ **bpftool-btf**\ (8),
**bpftool-cgroup**\ (8),
+ **bpftool-gen**\ (8),
+ **bpftool-iter**\ (8),
+ **bpftool-link**\ (8),
+ **bpftool-map**\ (8),
**bpftool-net**\ (8),
**bpftool-perf**\ (8),
- **bpftool-btf**\ (8)
+ **bpftool-prog**\ (8),
+ **bpftool-struct_ops**\ (8)
diff --git a/tools/bpf/bpftool/Documentation/bpftool-gen.rst b/tools/bpf/bpftool/Documentation/bpftool-gen.rst
index 94d91322895a..df85dbd962c0 100644
--- a/tools/bpf/bpftool/Documentation/bpftool-gen.rst
+++ b/tools/bpf/bpftool/Documentation/bpftool-gen.rst
@@ -14,7 +14,7 @@ SYNOPSIS
*OPTIONS* := { { **-j** | **--json** } [{ **-p** | **--pretty** }] }
- *COMMAND* := { **skeleton | **help** }
+ *COMMAND* := { **skeleton** | **help** }
GEN COMMANDS
=============
@@ -36,12 +36,12 @@ DESCRIPTION
etc. Skeleton eliminates the need to lookup mentioned
components by name. Instead, if skeleton instantiation
succeeds, they are populated in skeleton structure as valid
- libbpf types (e.g., struct bpf_map pointer) and can be
+ libbpf types (e.g., **struct bpf_map** pointer) and can be
passed to existing generic libbpf APIs.
In addition to simple and reliable access to maps and
- programs, skeleton provides a storage for BPF links (struct
- bpf_link) for each BPF program within BPF object. When
+ programs, skeleton provides a storage for BPF links (**struct
+ bpf_link**) for each BPF program within BPF object. When
requested, supported BPF programs will be automatically
attached and resulting BPF links stored for further use by
user in pre-allocated fields in skeleton struct. For BPF
@@ -82,14 +82,14 @@ DESCRIPTION
- **example__open** and **example__open_opts**.
These functions are used to instantiate skeleton. It
- corresponds to libbpf's **bpf_object__open()** API.
+ corresponds to libbpf's **bpf_object__open**\ () API.
**_opts** variants accepts extra **bpf_object_open_opts**
options.
- **example__load**.
This function creates maps, loads and verifies BPF
programs, initializes global data maps. It corresponds to
- libppf's **bpf_object__load** API.
+ libppf's **bpf_object__load**\ () API.
- **example__open_and_load** combines **example__open** and
**example__load** invocations in one commonly used
@@ -296,10 +296,13 @@ SEE ALSO
**bpf**\ (2),
**bpf-helpers**\ (7),
**bpftool**\ (8),
- **bpftool-map**\ (8),
- **bpftool-prog**\ (8),
+ **bpftool-btf**\ (8),
**bpftool-cgroup**\ (8),
**bpftool-feature**\ (8),
+ **bpftool-iter**\ (8),
+ **bpftool-link**\ (8),
+ **bpftool-map**\ (8),
**bpftool-net**\ (8),
**bpftool-perf**\ (8),
- **bpftool-btf**\ (8)
+ **bpftool-prog**\ (8),
+ **bpftool-struct_ops**\ (8)
diff --git a/tools/bpf/bpftool/Documentation/bpftool-iter.rst b/tools/bpf/bpftool/Documentation/bpftool-iter.rst
new file mode 100644
index 000000000000..8dce698eab79
--- /dev/null
+++ b/tools/bpf/bpftool/Documentation/bpftool-iter.rst
@@ -0,0 +1,81 @@
+============
+bpftool-iter
+============
+-------------------------------------------------------------------------------
+tool to create BPF iterators
+-------------------------------------------------------------------------------
+
+:Manual section: 8
+
+SYNOPSIS
+========
+
+ **bpftool** [*OPTIONS*] **iter** *COMMAND*
+
+ *COMMANDS* := { **pin** | **help** }
+
+ITER COMMANDS
+===================
+
+| **bpftool** **iter pin** *OBJ* *PATH*
+| **bpftool** **iter help**
+|
+| *OBJ* := /a/file/of/bpf_iter_target.o
+
+DESCRIPTION
+===========
+ **bpftool iter pin** *OBJ* *PATH*
+ A bpf iterator combines a kernel iterating of
+ particular kernel data (e.g., tasks, bpf_maps, etc.)
+ and a bpf program called for each kernel data object
+ (e.g., one task, one bpf_map, etc.). User space can
+ *read* kernel iterator output through *read()* syscall.
+
+ The *pin* command creates a bpf iterator from *OBJ*,
+ and pin it to *PATH*. The *PATH* should be located
+ in *bpffs* mount. It must not contain a dot
+ character ('.'), which is reserved for future extensions
+ of *bpffs*.
+
+ User can then *cat PATH* to see the bpf iterator output.
+
+ **bpftool iter help**
+ Print short help message.
+
+OPTIONS
+=======
+ -h, --help
+ Print short generic help message (similar to **bpftool help**).
+
+ -V, --version
+ Print version number (similar to **bpftool version**).
+
+ -d, --debug
+ Print all logs available, even debug-level information. This
+ includes logs from libbpf as well as from the verifier, when
+ attempting to load programs.
+
+EXAMPLES
+========
+**# bpftool iter pin bpf_iter_netlink.o /sys/fs/bpf/my_netlink**
+
+::
+
+ Create a file-based bpf iterator from bpf_iter_netlink.o and pin it
+ to /sys/fs/bpf/my_netlink
+
+SEE ALSO
+========
+ **bpf**\ (2),
+ **bpf-helpers**\ (7),
+ **bpftool**\ (8),
+ **bpftool-btf**\ (8),
+ **bpftool-cgroup**\ (8),
+ **bpftool-feature**\ (8),
+ **bpftool-gen**\ (8),
+ **bpftool-link**\ (8),
+ **bpftool-map**\ (8),
+ **bpftool-net**\ (8),
+ **bpftool-perf**\ (8),
+ **bpftool-prog**\ (8),
+ **bpftool-struct_ops**\ (8)
diff --git a/tools/bpf/bpftool/Documentation/bpftool-link.rst b/tools/bpf/bpftool/Documentation/bpftool-link.rst
new file mode 100644
index 000000000000..0e43d7b06c11
--- /dev/null
+++ b/tools/bpf/bpftool/Documentation/bpftool-link.rst
@@ -0,0 +1,121 @@
+================
+bpftool-link
+================
+-------------------------------------------------------------------------------
+tool for inspection and simple manipulation of eBPF links
+-------------------------------------------------------------------------------
+
+:Manual section: 8
+
+SYNOPSIS
+========
+
+ **bpftool** [*OPTIONS*] **link** *COMMAND*
+
+ *OPTIONS* := { { **-j** | **--json** } [{ **-p** | **--pretty** }] | { **-f** | **--bpffs** } }
+
+ *COMMANDS* := { **show** | **list** | **pin** | **help** }
+
+LINK COMMANDS
+=============
+
+| **bpftool** **link { show | list }** [*LINK*]
+| **bpftool** **link pin** *LINK* *FILE*
+| **bpftool** **link help**
+|
+| *LINK* := { **id** *LINK_ID* | **pinned** *FILE* }
+
+
+DESCRIPTION
+===========
+ **bpftool link { show | list }** [*LINK*]
+ Show information about active links. If *LINK* is
+ specified show information only about given link,
+ otherwise list all links currently active on the system.
+
+ Output will start with link ID followed by link type and
+ zero or more named attributes, some of which depend on type
+ of link.
+
+ **bpftool link pin** *LINK* *FILE*
+ Pin link *LINK* as *FILE*.
+
+ Note: *FILE* must be located in *bpffs* mount. It must not
+ contain a dot character ('.'), which is reserved for future
+ extensions of *bpffs*.
+
+ **bpftool link help**
+ Print short help message.
+
+OPTIONS
+=======
+ -h, --help
+ Print short generic help message (similar to **bpftool help**).
+
+ -V, --version
+ Print version number (similar to **bpftool version**).
+
+ -j, --json
+ Generate JSON output. For commands that cannot produce JSON, this
+ option has no effect.
+
+ -p, --pretty
+ Generate human-readable JSON output. Implies **-j**.
+
+ -f, --bpffs
+ When showing BPF links, show file names of pinned
+ links.
+
+ -n, --nomount
+ Do not automatically attempt to mount any virtual file system
+ (such as tracefs or BPF virtual file system) when necessary.
+
+ -d, --debug
+ Print all logs available, even debug-level information. This
+ includes logs from libbpf.
+
+EXAMPLES
+========
+**# bpftool link show**
+
+::
+
+ 10: cgroup prog 25
+ cgroup_id 614 attach_type egress
+
+**# bpftool --json --pretty link show**
+
+::
+
+ [{
+ "type": "cgroup",
+ "prog_id": 25,
+ "cgroup_id": 614,
+ "attach_type": "egress"
+ }
+ ]
+
+|
+| **# bpftool link pin id 10 /sys/fs/bpf/link**
+| **# ls -l /sys/fs/bpf/**
+
+::
+
+ -rw------- 1 root root 0 Apr 23 21:39 link
+
+
+SEE ALSO
+========
+ **bpf**\ (2),
+ **bpf-helpers**\ (7),
+ **bpftool**\ (8),
+ **bpftool-btf**\ (8),
+ **bpftool-cgroup**\ (8),
+ **bpftool-feature**\ (8),
+ **bpftool-gen**\ (8),
+ **bpftool-iter**\ (8),
+ **bpftool-map**\ (8),
+ **bpftool-net**\ (8),
+ **bpftool-perf**\ (8),
+ **bpftool-prog**\ (8),
+ **bpftool-struct_ops**\ (8)
diff --git a/tools/bpf/bpftool/Documentation/bpftool-map.rst b/tools/bpf/bpftool/Documentation/bpftool-map.rst
index cdeae8ae90ba..31101643e57c 100644
--- a/tools/bpf/bpftool/Documentation/bpftool-map.rst
+++ b/tools/bpf/bpftool/Documentation/bpftool-map.rst
@@ -21,7 +21,7 @@ SYNOPSIS
MAP COMMANDS
=============
-| **bpftool** **map { show | list }** [*MAP*]
+| **bpftool** **map** { **show** | **list** } [*MAP*]
| **bpftool** **map create** *FILE* **type** *TYPE* **key** *KEY_SIZE* **value** *VALUE_SIZE* \
| **entries** *MAX_ENTRIES* **name** *NAME* [**flags** *FLAGS*] [**dev** *NAME*]
| **bpftool** **map dump** *MAP*
@@ -49,7 +49,7 @@ MAP COMMANDS
| | **lru_percpu_hash** | **lpm_trie** | **array_of_maps** | **hash_of_maps**
| | **devmap** | **devmap_hash** | **sockmap** | **cpumap** | **xskmap** | **sockhash**
| | **cgroup_storage** | **reuseport_sockarray** | **percpu_cgroup_storage**
-| | **queue** | **stack** }
+| | **queue** | **stack** | **sk_storage** | **struct_ops** }
DESCRIPTION
===========
@@ -66,6 +66,13 @@ DESCRIPTION
Create a new map with given parameters and pin it to *bpffs*
as *FILE*.
+ *FLAGS* should be an integer which is the combination of
+ desired flags, e.g. 1024 for **BPF_F_MMAPABLE** (see bpf.h
+ UAPI header for existing flags).
+
+ Keyword **dev** expects a network interface name, and is used
+ to request hardware offload for the map.
+
**bpftool map dump** *MAP*
Dump all entries in a given *MAP*. In case of **name**,
*MAP* may match several maps which will all be dumped.
@@ -78,7 +85,7 @@ DESCRIPTION
exists; **noexist** update only if entry doesn't exist.
If the **hex** keyword is provided in front of the bytes
- sequence, the bytes are parsed as hexadeximal values, even if
+ sequence, the bytes are parsed as hexadecimal values, even if
no "0x" prefix is added. If the keyword is not provided, then
the bytes are parsed as decimal values, unless a "0x" prefix
(for hexadecimal) or a "0" prefix (for octal) is provided.
@@ -100,10 +107,10 @@ DESCRIPTION
extensions of *bpffs*.
**bpftool** **map event_pipe** *MAP* [**cpu** *N* **index** *M*]
- Read events from a BPF_MAP_TYPE_PERF_EVENT_ARRAY map.
+ Read events from a **BPF_MAP_TYPE_PERF_EVENT_ARRAY** map.
Install perf rings into a perf event array map and dump
- output of any bpf_perf_event_output() call in the kernel.
+ output of any **bpf_perf_event_output**\ () call in the kernel.
By default read the number of CPUs on the system and
install perf ring for each CPU in the corresponding index
in the array.
@@ -116,24 +123,24 @@ DESCRIPTION
receiving events if it installed its rings earlier.
**bpftool map peek** *MAP*
- Peek next **value** in the queue or stack.
+ Peek next value in the queue or stack.
**bpftool map push** *MAP* **value** *VALUE*
- Push **value** onto the stack.
+ Push *VALUE* onto the stack.
**bpftool map pop** *MAP*
- Pop and print **value** from the stack.
+ Pop and print value from the stack.
**bpftool map enqueue** *MAP* **value** *VALUE*
- Enqueue **value** into the queue.
+ Enqueue *VALUE* into the queue.
**bpftool map dequeue** *MAP*
- Dequeue and print **value** from the queue.
+ Dequeue and print value from the queue.
**bpftool map freeze** *MAP*
Freeze the map as read-only from user space. Entries from a
frozen map can not longer be updated or deleted with the
- **bpf\ ()** system call. This operation is not reversible,
+ **bpf**\ () system call. This operation is not reversible,
and the map remains immutable from user space until its
destruction. However, read and write permissions for BPF
programs to the map remain unchanged.
@@ -269,9 +276,13 @@ SEE ALSO
**bpf**\ (2),
**bpf-helpers**\ (7),
**bpftool**\ (8),
- **bpftool-prog**\ (8),
+ **bpftool-btf**\ (8),
**bpftool-cgroup**\ (8),
**bpftool-feature**\ (8),
+ **bpftool-gen**\ (8),
+ **bpftool-iter**\ (8),
+ **bpftool-link**\ (8),
**bpftool-net**\ (8),
**bpftool-perf**\ (8),
- **bpftool-btf**\ (8)
+ **bpftool-prog**\ (8),
+ **bpftool-struct_ops**\ (8)
diff --git a/tools/bpf/bpftool/Documentation/bpftool-net.rst b/tools/bpf/bpftool/Documentation/bpftool-net.rst
index 8651b00b81ea..aa7450736179 100644
--- a/tools/bpf/bpftool/Documentation/bpftool-net.rst
+++ b/tools/bpf/bpftool/Documentation/bpftool-net.rst
@@ -20,7 +20,7 @@ SYNOPSIS
NET COMMANDS
============
-| **bpftool** **net { show | list }** [ **dev** *NAME* ]
+| **bpftool** **net** { **show** | **list** } [ **dev** *NAME* ]
| **bpftool** **net attach** *ATTACH_TYPE* *PROG* **dev** *NAME* [ **overwrite** ]
| **bpftool** **net detach** *ATTACH_TYPE* **dev** *NAME*
| **bpftool** **net help**
@@ -194,9 +194,13 @@ SEE ALSO
**bpf**\ (2),
**bpf-helpers**\ (7),
**bpftool**\ (8),
- **bpftool-prog**\ (8),
- **bpftool-map**\ (8),
+ **bpftool-btf**\ (8),
**bpftool-cgroup**\ (8),
**bpftool-feature**\ (8),
+ **bpftool-gen**\ (8),
+ **bpftool-iter**\ (8),
+ **bpftool-link**\ (8),
+ **bpftool-map**\ (8),
**bpftool-perf**\ (8),
- **bpftool-btf**\ (8)
+ **bpftool-prog**\ (8),
+ **bpftool-struct_ops**\ (8)
diff --git a/tools/bpf/bpftool/Documentation/bpftool-perf.rst b/tools/bpf/bpftool/Documentation/bpftool-perf.rst
index e252bd0bc434..9c592b7c6775 100644
--- a/tools/bpf/bpftool/Documentation/bpftool-perf.rst
+++ b/tools/bpf/bpftool/Documentation/bpftool-perf.rst
@@ -20,7 +20,7 @@ SYNOPSIS
PERF COMMANDS
=============
-| **bpftool** **perf { show | list }**
+| **bpftool** **perf** { **show** | **list** }
| **bpftool** **perf help**
DESCRIPTION
@@ -85,9 +85,13 @@ SEE ALSO
**bpf**\ (2),
**bpf-helpers**\ (7),
**bpftool**\ (8),
- **bpftool-prog**\ (8),
- **bpftool-map**\ (8),
+ **bpftool-btf**\ (8),
**bpftool-cgroup**\ (8),
**bpftool-feature**\ (8),
+ **bpftool-gen**\ (8),
+ **bpftool-iter**\ (8),
+ **bpftool-link**\ (8),
+ **bpftool-map**\ (8),
**bpftool-net**\ (8),
- **bpftool-btf**\ (8)
+ **bpftool-prog**\ (8),
+ **bpftool-struct_ops**\ (8)
diff --git a/tools/bpf/bpftool/Documentation/bpftool-prog.rst b/tools/bpf/bpftool/Documentation/bpftool-prog.rst
index 64ddf8a4c518..2b254959d488 100644
--- a/tools/bpf/bpftool/Documentation/bpftool-prog.rst
+++ b/tools/bpf/bpftool/Documentation/bpftool-prog.rst
@@ -21,15 +21,16 @@ SYNOPSIS
PROG COMMANDS
=============
-| **bpftool** **prog { show | list }** [*PROG*]
+| **bpftool** **prog** { **show** | **list** } [*PROG*]
| **bpftool** **prog dump xlated** *PROG* [{**file** *FILE* | **opcodes** | **visual** | **linum**}]
| **bpftool** **prog dump jited** *PROG* [{**file** *FILE* | **opcodes** | **linum**}]
| **bpftool** **prog pin** *PROG* *FILE*
-| **bpftool** **prog { load | loadall }** *OBJ* *PATH* [**type** *TYPE*] [**map** {**idx** *IDX* | **name** *NAME*} *MAP*] [**dev** *NAME*] [**pinmaps** *MAP_DIR*]
+| **bpftool** **prog** { **load** | **loadall** } *OBJ* *PATH* [**type** *TYPE*] [**map** {**idx** *IDX* | **name** *NAME*} *MAP*] [**dev** *NAME*] [**pinmaps** *MAP_DIR*]
| **bpftool** **prog attach** *PROG* *ATTACH_TYPE* [*MAP*]
| **bpftool** **prog detach** *PROG* *ATTACH_TYPE* [*MAP*]
| **bpftool** **prog tracelog**
| **bpftool** **prog run** *PROG* **data_in** *FILE* [**data_out** *FILE* [**data_size_out** *L*]] [**ctx_in** *FILE* [**ctx_out** *FILE* [**ctx_size_out** *M*]]] [**repeat** *N*]
+| **bpftool** **prog profile** *PROG* [**duration** *DURATION*] *METRICs*
| **bpftool** **prog help**
|
| *MAP* := { **id** *MAP_ID* | **pinned** *FILE* }
@@ -40,13 +41,18 @@ PROG COMMANDS
| **cgroup/sock** | **cgroup/dev** | **lwt_in** | **lwt_out** | **lwt_xmit** |
| **lwt_seg6local** | **sockops** | **sk_skb** | **sk_msg** | **lirc_mode2** |
| **cgroup/bind4** | **cgroup/bind6** | **cgroup/post_bind4** | **cgroup/post_bind6** |
-| **cgroup/connect4** | **cgroup/connect6** | **cgroup/sendmsg4** | **cgroup/sendmsg6** |
+| **cgroup/connect4** | **cgroup/connect6** | **cgroup/getpeername4** | **cgroup/getpeername6** |
+| **cgroup/getsockname4** | **cgroup/getsockname6** | **cgroup/sendmsg4** | **cgroup/sendmsg6** |
| **cgroup/recvmsg4** | **cgroup/recvmsg6** | **cgroup/sysctl** |
-| **cgroup/getsockopt** | **cgroup/setsockopt**
+| **cgroup/getsockopt** | **cgroup/setsockopt** |
+| **struct_ops** | **fentry** | **fexit** | **freplace**
| }
| *ATTACH_TYPE* := {
| **msg_verdict** | **stream_verdict** | **stream_parser** | **flow_dissector**
| }
+| *METRICs* := {
+| **cycles** | **instructions** | **l1d_loads** | **llc_misses**
+| }
DESCRIPTION
@@ -150,7 +156,7 @@ DESCRIPTION
**bpftool prog tracelog**
Dump the trace pipe of the system to the console (stdout).
Hit <Ctrl+C> to stop printing. BPF programs can write to this
- trace pipe at runtime with the **bpf_trace_printk()** helper.
+ trace pipe at runtime with the **bpf_trace_printk**\ () helper.
This should be used only for debugging purposes. For
streaming data from BPF programs to user space, one can use
perf events (see also **bpftool-map**\ (8)).
@@ -188,6 +194,12 @@ DESCRIPTION
not all of them can take the **ctx_in**/**ctx_out**
arguments. bpftool does not perform checks on program types.
+ **bpftool prog profile** *PROG* [**duration** *DURATION*] *METRICs*
+ Profile *METRICs* for bpf program *PROG* for *DURATION*
+ seconds or until user hits <Ctrl+C>. *DURATION* is optional.
+ If *DURATION* is not specified, the profiling will run up to
+ **UINT_MAX** seconds.
+
**bpftool prog help**
Print short help message.
@@ -256,7 +268,7 @@ EXAMPLES
|
| **# bpftool prog dump xlated id 10 file /tmp/t**
-| **# ls -l /tmp/t**
+| **$ ls -l /tmp/t**
::
@@ -310,14 +322,28 @@ EXAMPLES
**# rm /sys/fs/bpf/xdp1**
+|
+| **# bpftool prog profile id 337 duration 10 cycles instructions llc_misses**
+
+::
+
+ 51397 run_cnt
+ 40176203 cycles (83.05%)
+ 42518139 instructions # 1.06 insns per cycle (83.39%)
+ 123 llc_misses # 2.89 LLC misses per million insns (83.15%)
+
SEE ALSO
========
**bpf**\ (2),
**bpf-helpers**\ (7),
**bpftool**\ (8),
- **bpftool-map**\ (8),
+ **bpftool-btf**\ (8),
**bpftool-cgroup**\ (8),
**bpftool-feature**\ (8),
+ **bpftool-gen**\ (8),
+ **bpftool-iter**\ (8),
+ **bpftool-link**\ (8),
+ **bpftool-map**\ (8),
**bpftool-net**\ (8),
**bpftool-perf**\ (8),
- **bpftool-btf**\ (8)
+ **bpftool-struct_ops**\ (8)
diff --git a/tools/bpf/bpftool/Documentation/bpftool-struct_ops.rst b/tools/bpf/bpftool/Documentation/bpftool-struct_ops.rst
new file mode 100644
index 000000000000..d93cd1cb8b0f
--- /dev/null
+++ b/tools/bpf/bpftool/Documentation/bpftool-struct_ops.rst
@@ -0,0 +1,117 @@
+==================
+bpftool-struct_ops
+==================
+-------------------------------------------------------------------------------
+tool to register/unregister/introspect BPF struct_ops
+-------------------------------------------------------------------------------
+
+:Manual section: 8
+
+SYNOPSIS
+========
+
+ **bpftool** [*OPTIONS*] **struct_ops** *COMMAND*
+
+ *OPTIONS* := { { **-j** | **--json** } [{ **-p** | **--pretty** }] }
+
+ *COMMANDS* :=
+ { **show** | **list** | **dump** | **register** | **unregister** | **help** }
+
+STRUCT_OPS COMMANDS
+===================
+
+| **bpftool** **struct_ops { show | list }** [*STRUCT_OPS_MAP*]
+| **bpftool** **struct_ops dump** [*STRUCT_OPS_MAP*]
+| **bpftool** **struct_ops register** *OBJ*
+| **bpftool** **struct_ops unregister** *STRUCT_OPS_MAP*
+| **bpftool** **struct_ops help**
+|
+| *STRUCT_OPS_MAP* := { **id** *STRUCT_OPS_MAP_ID* | **name** *STRUCT_OPS_MAP_NAME* }
+| *OBJ* := /a/file/of/bpf_struct_ops.o
+
+
+DESCRIPTION
+===========
+ **bpftool struct_ops { show | list }** [*STRUCT_OPS_MAP*]
+ Show brief information about the struct_ops in the system.
+ If *STRUCT_OPS_MAP* is specified, it shows information only
+ for the given struct_ops. Otherwise, it lists all struct_ops
+ currently existing in the system.
+
+ Output will start with struct_ops map ID, followed by its map
+ name and its struct_ops's kernel type.
+
+ **bpftool struct_ops dump** [*STRUCT_OPS_MAP*]
+ Dump details information about the struct_ops in the system.
+ If *STRUCT_OPS_MAP* is specified, it dumps information only
+ for the given struct_ops. Otherwise, it dumps all struct_ops
+ currently existing in the system.
+
+ **bpftool struct_ops register** *OBJ*
+ Register bpf struct_ops from *OBJ*. All struct_ops under
+ the ELF section ".struct_ops" will be registered to
+ its kernel subsystem.
+
+ **bpftool struct_ops unregister** *STRUCT_OPS_MAP*
+ Unregister the *STRUCT_OPS_MAP* from the kernel subsystem.
+
+ **bpftool struct_ops help**
+ Print short help message.
+
+OPTIONS
+=======
+ -h, --help
+ Print short generic help message (similar to **bpftool help**).
+
+ -V, --version
+ Print version number (similar to **bpftool version**).
+
+ -j, --json
+ Generate JSON output. For commands that cannot produce JSON, this
+ option has no effect.
+
+ -p, --pretty
+ Generate human-readable JSON output. Implies **-j**.
+
+ -d, --debug
+ Print all logs available, even debug-level information. This
+ includes logs from libbpf as well as from the verifier, when
+ attempting to load programs.
+
+EXAMPLES
+========
+**# bpftool struct_ops show**
+
+::
+
+ 100: dctcp tcp_congestion_ops
+ 105: cubic tcp_congestion_ops
+
+**# bpftool struct_ops unregister id 105**
+
+::
+
+ Unregistered tcp_congestion_ops cubic id 105
+
+**# bpftool struct_ops register bpf_cubic.o**
+
+::
+
+ Registered tcp_congestion_ops cubic id 110
+
+
+SEE ALSO
+========
+ **bpf**\ (2),
+ **bpf-helpers**\ (7),
+ **bpftool**\ (8),
+ **bpftool-btf**\ (8),
+ **bpftool-cgroup**\ (8),
+ **bpftool-feature**\ (8),
+ **bpftool-gen**\ (8),
+ **bpftool-iter**\ (8),
+ **bpftool-link**\ (8),
+ **bpftool-map**\ (8),
+ **bpftool-net**\ (8),
+ **bpftool-perf**\ (8),
+ **bpftool-prog**\ (8)
diff --git a/tools/bpf/bpftool/Documentation/bpftool.rst b/tools/bpf/bpftool/Documentation/bpftool.rst
index 34239fda69ed..420d4d5df8b6 100644
--- a/tools/bpf/bpftool/Documentation/bpftool.rst
+++ b/tools/bpf/bpftool/Documentation/bpftool.rst
@@ -75,11 +75,14 @@ SEE ALSO
========
**bpf**\ (2),
**bpf-helpers**\ (7),
- **bpftool-prog**\ (8),
- **bpftool-map**\ (8),
+ **bpftool-btf**\ (8),
**bpftool-cgroup**\ (8),
**bpftool-feature**\ (8),
+ **bpftool-gen**\ (8),
+ **bpftool-iter**\ (8),
+ **bpftool-link**\ (8),
+ **bpftool-map**\ (8),
**bpftool-net**\ (8),
**bpftool-perf**\ (8),
- **bpftool-btf**\ (8),
- **bpftool-gen**\ (8),
+ **bpftool-prog**\ (8),
+ **bpftool-struct_ops**\ (8)
diff --git a/tools/bpf/bpftool/Makefile b/tools/bpf/bpftool/Makefile
index c4e810335810..9e85f101be85 100644
--- a/tools/bpf/bpftool/Makefile
+++ b/tools/bpf/bpftool/Makefile
@@ -55,14 +55,15 @@ ifneq ($(EXTRA_LDFLAGS),)
LDFLAGS += $(EXTRA_LDFLAGS)
endif
-LIBS = $(LIBBPF) -lelf -lz
-
INSTALL ?= install
RM ?= rm -f
+CLANG ?= clang
FEATURE_USER = .bpftool
-FEATURE_TESTS = libbfd disassembler-four-args reallocarray zlib
-FEATURE_DISPLAY = libbfd disassembler-four-args zlib
+FEATURE_TESTS = libbfd disassembler-four-args reallocarray zlib libcap \
+ clang-bpf-global-var
+FEATURE_DISPLAY = libbfd disassembler-four-args zlib libcap \
+ clang-bpf-global-var
check_feat := 1
NON_CHECK_FEAT_TARGETS := clean uninstall doc doc-clean doc-install doc-uninstall
@@ -88,6 +89,12 @@ ifeq ($(feature-reallocarray), 0)
CFLAGS += -DCOMPAT_NEED_REALLOCARRAY
endif
+LIBS = $(LIBBPF) -lelf -lz
+ifeq ($(feature-libcap), 1)
+CFLAGS += -DUSE_LIBCAP
+LIBS += -lcap
+endif
+
include $(wildcard $(OUTPUT)*.d)
all: $(OUTPUT)bpftool
@@ -110,21 +117,47 @@ SRCS += $(BFD_SRCS)
endif
OBJS = $(patsubst %.c,$(OUTPUT)%.o,$(SRCS)) $(OUTPUT)disasm.o
+_OBJS = $(filter-out $(OUTPUT)prog.o,$(OBJS)) $(OUTPUT)_prog.o
+
+ifeq ($(feature-clang-bpf-global-var),1)
+ __OBJS = $(OBJS)
+else
+ __OBJS = $(_OBJS)
+endif
+
+$(OUTPUT)_prog.o: prog.c
+ $(QUIET_CC)$(CC) $(CFLAGS) -c -MMD -DBPFTOOL_WITHOUT_SKELETONS -o $@ $<
+
+$(OUTPUT)_bpftool: $(_OBJS) $(LIBBPF)
+ $(QUIET_LINK)$(CC) $(CFLAGS) $(LDFLAGS) -o $@ $(_OBJS) $(LIBS)
+
+skeleton/profiler.bpf.o: skeleton/profiler.bpf.c $(LIBBPF)
+ $(QUIET_CLANG)$(CLANG) \
+ -I$(srctree)/tools/include/uapi/ \
+ -I$(LIBBPF_PATH) -I$(srctree)/tools/lib \
+ -g -O2 -target bpf -c $< -o $@
+
+profiler.skel.h: $(OUTPUT)_bpftool skeleton/profiler.bpf.o
+ $(QUIET_GEN)$(OUTPUT)./_bpftool gen skeleton skeleton/profiler.bpf.o > $@
+
+$(OUTPUT)prog.o: prog.c profiler.skel.h
+ $(QUIET_CC)$(CC) $(CFLAGS) -c -MMD -o $@ $<
$(OUTPUT)disasm.o: $(srctree)/kernel/bpf/disasm.c
- $(QUIET_CC)$(COMPILE.c) -MMD -o $@ $<
+ $(QUIET_CC)$(CC) $(CFLAGS) -c -MMD -o $@ $<
$(OUTPUT)feature.o: | zdep
-$(OUTPUT)bpftool: $(OBJS) $(LIBBPF)
- $(QUIET_LINK)$(CC) $(CFLAGS) $(LDFLAGS) -o $@ $(OBJS) $(LIBS)
+$(OUTPUT)bpftool: $(__OBJS) $(LIBBPF)
+ $(QUIET_LINK)$(CC) $(CFLAGS) $(LDFLAGS) -o $@ $(__OBJS) $(LIBS)
$(OUTPUT)%.o: %.c
- $(QUIET_CC)$(COMPILE.c) -MMD -o $@ $<
+ $(QUIET_CC)$(CC) $(CFLAGS) -c -MMD -o $@ $<
clean: $(LIBBPF)-clean
$(call QUIET_CLEAN, bpftool)
$(Q)$(RM) -- $(OUTPUT)bpftool $(OUTPUT)*.o $(OUTPUT)*.d
+ $(Q)$(RM) -- $(OUTPUT)_bpftool profiler.skel.h skeleton/profiler.bpf.o
$(Q)$(RM) -r -- $(OUTPUT)libbpf/
$(call QUIET_CLEAN, core-gen)
$(Q)$(RM) -- $(OUTPUT)FEATURE-DUMP.bpftool
diff --git a/tools/bpf/bpftool/bash-completion/bpftool b/tools/bpf/bpftool/bash-completion/bpftool
index 754d8395e451..25b25aca1112 100644
--- a/tools/bpf/bpftool/bash-completion/bpftool
+++ b/tools/bpf/bpftool/bash-completion/bpftool
@@ -98,6 +98,12 @@ _bpftool_get_btf_ids()
command sed -n 's/.*"id": \(.*\),$/\1/p' )" -- "$cur" ) )
}
+_bpftool_get_link_ids()
+{
+ COMPREPLY+=( $( compgen -W "$( bpftool -jp link 2>&1 | \
+ command sed -n 's/.*"id": \(.*\),$/\1/p' )" -- "$cur" ) )
+}
+
_bpftool_get_obj_map_names()
{
local obj
@@ -337,6 +343,7 @@ _bpftool()
local PROG_TYPE='id pinned tag name'
local MAP_TYPE='id pinned name'
+ local METRIC_TYPE='cycles instructions l1d_loads llc_misses'
case $command in
show|list)
[[ $prev != "$command" ]] && return 0
@@ -388,7 +395,7 @@ _bpftool()
_bpftool_get_prog_ids
;;
name)
- _bpftool_get_map_names
+ _bpftool_get_prog_names
;;
pinned)
_filedir
@@ -465,11 +472,14 @@ _bpftool()
lwt_seg6local sockops sk_skb sk_msg \
lirc_mode2 cgroup/bind4 cgroup/bind6 \
cgroup/connect4 cgroup/connect6 \
+ cgroup/getpeername4 cgroup/getpeername6 \
+ cgroup/getsockname4 cgroup/getsockname6 \
cgroup/sendmsg4 cgroup/sendmsg6 \
cgroup/recvmsg4 cgroup/recvmsg6 \
cgroup/post_bind4 cgroup/post_bind6 \
cgroup/sysctl cgroup/getsockopt \
- cgroup/setsockopt" -- \
+ cgroup/setsockopt struct_ops \
+ fentry fexit freplace" -- \
"$cur" ) )
return 0
;;
@@ -497,9 +507,51 @@ _bpftool()
tracelog)
return 0
;;
+ profile)
+ case $cword in
+ 3)
+ COMPREPLY=( $( compgen -W "$PROG_TYPE" -- "$cur" ) )
+ return 0
+ ;;
+ 4)
+ case $prev in
+ id)
+ _bpftool_get_prog_ids
+ ;;
+ name)
+ _bpftool_get_prog_names
+ ;;
+ pinned)
+ _filedir
+ ;;
+ esac
+ return 0
+ ;;
+ 5)
+ COMPREPLY=( $( compgen -W "$METRIC_TYPE duration" -- "$cur" ) )
+ return 0
+ ;;
+ 6)
+ case $prev in
+ duration)
+ return 0
+ ;;
+ *)
+ COMPREPLY=( $( compgen -W "$METRIC_TYPE" -- "$cur" ) )
+ return 0
+ ;;
+ esac
+ return 0
+ ;;
+ *)
+ COMPREPLY=( $( compgen -W "$METRIC_TYPE" -- "$cur" ) )
+ return 0
+ ;;
+ esac
+ ;;
run)
- if [[ ${#words[@]} -lt 5 ]]; then
- _filedir
+ if [[ ${#words[@]} -eq 4 ]]; then
+ COMPREPLY=( $( compgen -W "$PROG_TYPE" -- "$cur" ) )
return 0
fi
case $prev in
@@ -507,6 +559,10 @@ _bpftool()
_bpftool_get_prog_ids
return 0
;;
+ name)
+ _bpftool_get_prog_names
+ return 0
+ ;;
data_in|data_out|ctx_in|ctx_out)
_filedir
return 0
@@ -524,7 +580,48 @@ _bpftool()
*)
[[ $prev == $object ]] && \
COMPREPLY=( $( compgen -W 'dump help pin attach detach \
- load loadall show list tracelog run' -- "$cur" ) )
+ load loadall show list tracelog run profile' -- "$cur" ) )
+ ;;
+ esac
+ ;;
+ struct_ops)
+ local STRUCT_OPS_TYPE='id name'
+ case $command in
+ show|list|dump|unregister)
+ case $prev in
+ $command)
+ COMPREPLY=( $( compgen -W "$STRUCT_OPS_TYPE" -- "$cur" ) )
+ ;;
+ id)
+ _bpftool_get_map_ids_for_type struct_ops
+ ;;
+ name)
+ _bpftool_get_map_names_for_type struct_ops
+ ;;
+ esac
+ return 0
+ ;;
+ register)
+ _filedir
+ return 0
+ ;;
+ *)
+ [[ $prev == $object ]] && \
+ COMPREPLY=( $( compgen -W 'register unregister show list dump help' \
+ -- "$cur" ) )
+ ;;
+ esac
+ ;;
+ iter)
+ case $command in
+ pin)
+ _filedir
+ return 0
+ ;;
+ *)
+ [[ $prev == $object ]] && \
+ COMPREPLY=( $( compgen -W 'pin help' \
+ -- "$cur" ) )
;;
esac
;;
@@ -712,11 +809,17 @@ _bpftool()
esac
;;
pin)
- if [[ $prev == "$command" ]]; then
- COMPREPLY=( $( compgen -W "$PROG_TYPE" -- "$cur" ) )
- else
- _filedir
- fi
+ case $prev in
+ $command)
+ COMPREPLY=( $( compgen -W "$MAP_TYPE" -- "$cur" ) )
+ ;;
+ id)
+ _bpftool_get_map_ids
+ ;;
+ name)
+ _bpftool_get_map_names
+ ;;
+ esac
return 0
;;
event_pipe)
@@ -843,7 +946,7 @@ _bpftool()
case $command in
skeleton)
_filedir
- ;;
+ ;;
*)
[[ $prev == $object ]] && \
COMPREPLY=( $( compgen -W 'skeleton help' -- "$cur" ) )
@@ -865,9 +968,10 @@ _bpftool()
;;
attach|detach)
local ATTACH_TYPES='ingress egress sock_create sock_ops \
- device bind4 bind6 post_bind4 post_bind6 connect4 \
- connect6 sendmsg4 sendmsg6 recvmsg4 recvmsg6 sysctl \
- getsockopt setsockopt'
+ device bind4 bind6 post_bind4 post_bind6 connect4 connect6 \
+ getpeername4 getpeername6 getsockname4 getsockname6 \
+ sendmsg4 sendmsg6 recvmsg4 recvmsg6 sysctl getsockopt \
+ setsockopt'
local ATTACH_FLAGS='multi override'
local PROG_TYPE='id pinned tag name'
case $prev in
@@ -876,9 +980,9 @@ _bpftool()
return 0
;;
ingress|egress|sock_create|sock_ops|device|bind4|bind6|\
- post_bind4|post_bind6|connect4|connect6|sendmsg4|\
- sendmsg6|recvmsg4|recvmsg6|sysctl|getsockopt|\
- setsockopt)
+ post_bind4|post_bind6|connect4|connect6|getpeername4|\
+ getpeername6|getsockname4|getsockname6|sendmsg4|sendmsg6|\
+ recvmsg4|recvmsg6|sysctl|getsockopt|setsockopt)
COMPREPLY=( $( compgen -W "$PROG_TYPE" -- \
"$cur" ) )
return 0
@@ -943,6 +1047,9 @@ _bpftool()
id)
_bpftool_get_prog_ids
;;
+ name)
+ _bpftool_get_prog_names
+ ;;
pinned)
_filedir
;;
@@ -983,11 +1090,12 @@ _bpftool()
probe)
[[ $prev == "prefix" ]] && return 0
if _bpftool_search_list 'macros'; then
- COMPREPLY+=( $( compgen -W 'prefix' -- "$cur" ) )
+ _bpftool_once_attr 'prefix'
else
COMPREPLY+=( $( compgen -W 'macros' -- "$cur" ) )
fi
_bpftool_one_of_list 'kernel dev'
+ _bpftool_once_attr 'full unprivileged'
return 0
;;
*)
@@ -996,6 +1104,39 @@ _bpftool()
;;
esac
;;
+ link)
+ case $command in
+ show|list|pin)
+ case $prev in
+ id)
+ _bpftool_get_link_ids
+ return 0
+ ;;
+ esac
+ ;;
+ esac
+
+ local LINK_TYPE='id pinned'
+ case $command in
+ show|list)
+ [[ $prev != "$command" ]] && return 0
+ COMPREPLY=( $( compgen -W "$LINK_TYPE" -- "$cur" ) )
+ return 0
+ ;;
+ pin)
+ if [[ $prev == "$command" ]]; then
+ COMPREPLY=( $( compgen -W "$LINK_TYPE" -- "$cur" ) )
+ else
+ _filedir
+ fi
+ return 0
+ ;;
+ *)
+ [[ $prev == $object ]] && \
+ COMPREPLY=( $( compgen -W 'help pin show list' -- "$cur" ) )
+ ;;
+ esac
+ ;;
esac
} &&
complete -F _bpftool bpftool
diff --git a/tools/bpf/bpftool/btf.c b/tools/bpf/bpftool/btf.c
index b3745ed711ba..faac8189b285 100644
--- a/tools/bpf/bpftool/btf.c
+++ b/tools/bpf/bpftool/btf.c
@@ -15,7 +15,6 @@
#include <linux/hashtable.h>
#include <sys/types.h>
#include <sys/stat.h>
-#include <unistd.h>
#include "json_writer.h"
#include "main.h"
@@ -389,6 +388,9 @@ static int dump_btf_c(const struct btf *btf,
if (IS_ERR(d))
return PTR_ERR(d);
+ printf("#ifndef __VMLINUX_H__\n");
+ printf("#define __VMLINUX_H__\n");
+ printf("\n");
printf("#ifndef BPF_NO_PRESERVE_ACCESS_INDEX\n");
printf("#pragma clang attribute push (__attribute__((preserve_access_index)), apply_to = record)\n");
printf("#endif\n\n");
@@ -412,6 +414,8 @@ static int dump_btf_c(const struct btf *btf,
printf("#ifndef BPF_NO_PRESERVE_ACCESS_INDEX\n");
printf("#pragma clang attribute pop\n");
printf("#endif\n");
+ printf("\n");
+ printf("#endif /* __VMLINUX_H__ */\n");
done:
btf_dump__free(d);
@@ -549,7 +553,7 @@ static int do_dump(int argc, char **argv)
btf = btf__parse_elf(*argv, NULL);
if (IS_ERR(btf)) {
- err = PTR_ERR(btf);
+ err = -PTR_ERR(btf);
btf = NULL;
p_err("failed to load BTF from %s: %s",
*argv, strerror(err));
@@ -947,9 +951,9 @@ static int do_help(int argc, char **argv)
}
fprintf(stderr,
- "Usage: %s btf { show | list } [id BTF_ID]\n"
- " %s btf dump BTF_SRC [format FORMAT]\n"
- " %s btf help\n"
+ "Usage: %1$s %2$s { show | list } [id BTF_ID]\n"
+ " %1$s %2$s dump BTF_SRC [format FORMAT]\n"
+ " %1$s %2$s help\n"
"\n"
" BTF_SRC := { id BTF_ID | prog PROG | map MAP [{key | value | kv | all}] | file FILE }\n"
" FORMAT := { raw | c }\n"
@@ -957,7 +961,7 @@ static int do_help(int argc, char **argv)
" " HELP_SPEC_PROGRAM "\n"
" " HELP_SPEC_OPTIONS "\n"
"",
- bin_name, bin_name, bin_name);
+ bin_name, "btf");
return 0;
}
diff --git a/tools/bpf/bpftool/btf_dumper.c b/tools/bpf/bpftool/btf_dumper.c
index 01cc52b834fa..ede162f83eea 100644
--- a/tools/bpf/bpftool/btf_dumper.c
+++ b/tools/bpf/bpftool/btf_dumper.c
@@ -4,11 +4,13 @@
#include <ctype.h>
#include <stdio.h> /* for (FILE *) used by json_writer */
#include <string.h>
+#include <unistd.h>
#include <asm/byteorder.h>
#include <linux/bitops.h>
#include <linux/btf.h>
#include <linux/err.h>
#include <bpf/btf.h>
+#include <bpf/bpf.h>
#include "json_writer.h"
#include "main.h"
@@ -22,13 +24,102 @@
static int btf_dumper_do_type(const struct btf_dumper *d, __u32 type_id,
__u8 bit_offset, const void *data);
-static void btf_dumper_ptr(const void *data, json_writer_t *jw,
- bool is_plain_text)
+static int btf_dump_func(const struct btf *btf, char *func_sig,
+ const struct btf_type *func_proto,
+ const struct btf_type *func, int pos, int size);
+
+static int dump_prog_id_as_func_ptr(const struct btf_dumper *d,
+ const struct btf_type *func_proto,
+ __u32 prog_id)
{
- if (is_plain_text)
- jsonw_printf(jw, "%p", *(void **)data);
+ struct bpf_prog_info_linear *prog_info = NULL;
+ const struct btf_type *func_type;
+ const char *prog_name = NULL;
+ struct bpf_func_info *finfo;
+ struct btf *prog_btf = NULL;
+ struct bpf_prog_info *info;
+ int prog_fd, func_sig_len;
+ char prog_str[1024];
+
+ /* Get the ptr's func_proto */
+ func_sig_len = btf_dump_func(d->btf, prog_str, func_proto, NULL, 0,
+ sizeof(prog_str));
+ if (func_sig_len == -1)
+ return -1;
+
+ if (!prog_id)
+ goto print;
+
+ /* Get the bpf_prog's name. Obtain from func_info. */
+ prog_fd = bpf_prog_get_fd_by_id(prog_id);
+ if (prog_fd == -1)
+ goto print;
+
+ prog_info = bpf_program__get_prog_info_linear(prog_fd,
+ 1UL << BPF_PROG_INFO_FUNC_INFO);
+ close(prog_fd);
+ if (IS_ERR(prog_info)) {
+ prog_info = NULL;
+ goto print;
+ }
+ info = &prog_info->info;
+
+ if (!info->btf_id || !info->nr_func_info ||
+ btf__get_from_id(info->btf_id, &prog_btf))
+ goto print;
+ finfo = (struct bpf_func_info *)info->func_info;
+ func_type = btf__type_by_id(prog_btf, finfo->type_id);
+ if (!func_type || !btf_is_func(func_type))
+ goto print;
+
+ prog_name = btf__name_by_offset(prog_btf, func_type->name_off);
+
+print:
+ if (!prog_id)
+ snprintf(&prog_str[func_sig_len],
+ sizeof(prog_str) - func_sig_len, " 0");
+ else if (prog_name)
+ snprintf(&prog_str[func_sig_len],
+ sizeof(prog_str) - func_sig_len,
+ " %s/prog_id:%u", prog_name, prog_id);
else
- jsonw_printf(jw, "%lu", *(unsigned long *)data);
+ snprintf(&prog_str[func_sig_len],
+ sizeof(prog_str) - func_sig_len,
+ " <unknown_prog_name>/prog_id:%u", prog_id);
+
+ prog_str[sizeof(prog_str) - 1] = '\0';
+ jsonw_string(d->jw, prog_str);
+ btf__free(prog_btf);
+ free(prog_info);
+ return 0;
+}
+
+static void btf_dumper_ptr(const struct btf_dumper *d,
+ const struct btf_type *t,
+ const void *data)
+{
+ unsigned long value = *(unsigned long *)data;
+ const struct btf_type *ptr_type;
+ __s32 ptr_type_id;
+
+ if (!d->prog_id_as_func_ptr || value > UINT32_MAX)
+ goto print_ptr_value;
+
+ ptr_type_id = btf__resolve_type(d->btf, t->type);
+ if (ptr_type_id < 0)
+ goto print_ptr_value;
+ ptr_type = btf__type_by_id(d->btf, ptr_type_id);
+ if (!ptr_type || !btf_is_func_proto(ptr_type))
+ goto print_ptr_value;
+
+ if (!dump_prog_id_as_func_ptr(d, ptr_type, value))
+ return;
+
+print_ptr_value:
+ if (d->is_plain_text)
+ jsonw_printf(d->jw, "%p", (void *)value);
+ else
+ jsonw_printf(d->jw, "%lu", value);
}
static int btf_dumper_modifier(const struct btf_dumper *d, __u32 type_id,
@@ -43,9 +134,78 @@ static int btf_dumper_modifier(const struct btf_dumper *d, __u32 type_id,
return btf_dumper_do_type(d, actual_type_id, bit_offset, data);
}
-static void btf_dumper_enum(const void *data, json_writer_t *jw)
+static int btf_dumper_enum(const struct btf_dumper *d,
+ const struct btf_type *t,
+ const void *data)
+{
+ const struct btf_enum *enums = btf_enum(t);
+ __s64 value;
+ __u16 i;
+
+ switch (t->size) {
+ case 8:
+ value = *(__s64 *)data;
+ break;
+ case 4:
+ value = *(__s32 *)data;
+ break;
+ case 2:
+ value = *(__s16 *)data;
+ break;
+ case 1:
+ value = *(__s8 *)data;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ for (i = 0; i < btf_vlen(t); i++) {
+ if (value == enums[i].val) {
+ jsonw_string(d->jw,
+ btf__name_by_offset(d->btf,
+ enums[i].name_off));
+ return 0;
+ }
+ }
+
+ jsonw_int(d->jw, value);
+ return 0;
+}
+
+static bool is_str_array(const struct btf *btf, const struct btf_array *arr,
+ const char *s)
{
- jsonw_printf(jw, "%d", *(int *)data);
+ const struct btf_type *elem_type;
+ const char *end_s;
+
+ if (!arr->nelems)
+ return false;
+
+ elem_type = btf__type_by_id(btf, arr->type);
+ /* Not skipping typedef. typedef to char does not count as
+ * a string now.
+ */
+ while (elem_type && btf_is_mod(elem_type))
+ elem_type = btf__type_by_id(btf, elem_type->type);
+
+ if (!elem_type || !btf_is_int(elem_type) || elem_type->size != 1)
+ return false;
+
+ if (btf_int_encoding(elem_type) != BTF_INT_CHAR &&
+ strcmp("char", btf__name_by_offset(btf, elem_type->name_off)))
+ return false;
+
+ end_s = s + arr->nelems;
+ while (s < end_s) {
+ if (!*s)
+ return true;
+ if (*s <= 0x1f || *s >= 0x7f)
+ return false;
+ s++;
+ }
+
+ /* '\0' is not found */
+ return false;
}
static int btf_dumper_array(const struct btf_dumper *d, __u32 type_id,
@@ -57,6 +217,11 @@ static int btf_dumper_array(const struct btf_dumper *d, __u32 type_id,
int ret = 0;
__u32 i;
+ if (is_str_array(d->btf, arr, data)) {
+ jsonw_string(d->jw, data);
+ return 0;
+ }
+
elem_size = btf__resolve_size(d->btf, arr->type);
if (elem_size < 0)
return elem_size;
@@ -106,8 +271,8 @@ static void btf_int128_print(json_writer_t *jw, const void *data,
}
}
-static void btf_int128_shift(__u64 *print_num, u16 left_shift_bits,
- u16 right_shift_bits)
+static void btf_int128_shift(__u64 *print_num, __u16 left_shift_bits,
+ __u16 right_shift_bits)
{
__u64 upper_num, lower_num;
@@ -366,10 +531,9 @@ static int btf_dumper_do_type(const struct btf_dumper *d, __u32 type_id,
case BTF_KIND_ARRAY:
return btf_dumper_array(d, type_id, data);
case BTF_KIND_ENUM:
- btf_dumper_enum(data, d->jw);
- return 0;
+ return btf_dumper_enum(d, t, data);
case BTF_KIND_PTR:
- btf_dumper_ptr(data, d->jw, d->is_plain_text);
+ btf_dumper_ptr(d, t, data);
return 0;
case BTF_KIND_UNKN:
jsonw_printf(d->jw, "(unknown)");
@@ -414,10 +578,6 @@ int btf_dumper_type(const struct btf_dumper *d, __u32 type_id,
return -1; \
} while (0)
-static int btf_dump_func(const struct btf *btf, char *func_sig,
- const struct btf_type *func_proto,
- const struct btf_type *func, int pos, int size);
-
static int __btf_dumper_type_only(const struct btf *btf, __u32 type_id,
char *func_sig, int pos, int size)
{
@@ -526,8 +686,15 @@ static int btf_dump_func(const struct btf *btf, char *func_sig,
BTF_PRINT_ARG(", ");
if (arg->type) {
BTF_PRINT_TYPE(arg->type);
- BTF_PRINT_ARG("%s",
- btf__name_by_offset(btf, arg->name_off));
+ if (arg->name_off)
+ BTF_PRINT_ARG("%s",
+ btf__name_by_offset(btf, arg->name_off));
+ else if (pos && func_sig[pos - 1] == ' ')
+ /* Remove unnecessary space for
+ * FUNC_PROTO that does not have
+ * arg->name_off
+ */
+ func_sig[--pos] = '\0';
} else {
BTF_PRINT_ARG("...");
}
diff --git a/tools/bpf/bpftool/cfg.c b/tools/bpf/bpftool/cfg.c
index 3e21f994f262..1951219a9af7 100644
--- a/tools/bpf/bpftool/cfg.c
+++ b/tools/bpf/bpftool/cfg.c
@@ -157,7 +157,7 @@ static bool cfg_partition_funcs(struct cfg *cfg, struct bpf_insn *cur,
return false;
}
-static bool is_jmp_insn(u8 code)
+static bool is_jmp_insn(__u8 code)
{
return BPF_CLASS(code) == BPF_JMP || BPF_CLASS(code) == BPF_JMP32;
}
@@ -176,7 +176,7 @@ static bool func_partition_bb_head(struct func_node *func)
for (; cur <= end; cur++) {
if (is_jmp_insn(cur->code)) {
- u8 opcode = BPF_OP(cur->code);
+ __u8 opcode = BPF_OP(cur->code);
if (opcode == BPF_EXIT || opcode == BPF_CALL)
continue;
diff --git a/tools/bpf/bpftool/cgroup.c b/tools/bpf/bpftool/cgroup.c
index 62c6a1d7cd18..d901cc1b904a 100644
--- a/tools/bpf/bpftool/cgroup.c
+++ b/tools/bpf/bpftool/cgroup.c
@@ -25,48 +25,27 @@
" ATTACH_TYPE := { ingress | egress | sock_create |\n" \
" sock_ops | device | bind4 | bind6 |\n" \
" post_bind4 | post_bind6 | connect4 |\n" \
- " connect6 | sendmsg4 | sendmsg6 |\n" \
- " recvmsg4 | recvmsg6 | sysctl |\n" \
- " getsockopt | setsockopt }"
+ " connect6 | getpeername4 | getpeername6 |\n" \
+ " getsockname4 | getsockname6 | sendmsg4 |\n" \
+ " sendmsg6 | recvmsg4 | recvmsg6 |\n" \
+ " sysctl | getsockopt | setsockopt }"
static unsigned int query_flags;
-static const char * const attach_type_strings[] = {
- [BPF_CGROUP_INET_INGRESS] = "ingress",
- [BPF_CGROUP_INET_EGRESS] = "egress",
- [BPF_CGROUP_INET_SOCK_CREATE] = "sock_create",
- [BPF_CGROUP_SOCK_OPS] = "sock_ops",
- [BPF_CGROUP_DEVICE] = "device",
- [BPF_CGROUP_INET4_BIND] = "bind4",
- [BPF_CGROUP_INET6_BIND] = "bind6",
- [BPF_CGROUP_INET4_CONNECT] = "connect4",
- [BPF_CGROUP_INET6_CONNECT] = "connect6",
- [BPF_CGROUP_INET4_POST_BIND] = "post_bind4",
- [BPF_CGROUP_INET6_POST_BIND] = "post_bind6",
- [BPF_CGROUP_UDP4_SENDMSG] = "sendmsg4",
- [BPF_CGROUP_UDP6_SENDMSG] = "sendmsg6",
- [BPF_CGROUP_SYSCTL] = "sysctl",
- [BPF_CGROUP_UDP4_RECVMSG] = "recvmsg4",
- [BPF_CGROUP_UDP6_RECVMSG] = "recvmsg6",
- [BPF_CGROUP_GETSOCKOPT] = "getsockopt",
- [BPF_CGROUP_SETSOCKOPT] = "setsockopt",
- [__MAX_BPF_ATTACH_TYPE] = NULL,
-};
-
static enum bpf_attach_type parse_attach_type(const char *str)
{
enum bpf_attach_type type;
for (type = 0; type < __MAX_BPF_ATTACH_TYPE; type++) {
- if (attach_type_strings[type] &&
- is_prefix(str, attach_type_strings[type]))
+ if (attach_type_name[type] &&
+ is_prefix(str, attach_type_name[type]))
return type;
}
return __MAX_BPF_ATTACH_TYPE;
}
-static int show_bpf_prog(int id, const char *attach_type_str,
+static int show_bpf_prog(int id, enum bpf_attach_type attach_type,
const char *attach_flags_str,
int level)
{
@@ -86,18 +65,22 @@ static int show_bpf_prog(int id, const char *attach_type_str,
if (json_output) {
jsonw_start_object(json_wtr);
jsonw_uint_field(json_wtr, "id", info.id);
- jsonw_string_field(json_wtr, "attach_type",
- attach_type_str);
+ if (attach_type < ARRAY_SIZE(attach_type_name))
+ jsonw_string_field(json_wtr, "attach_type",
+ attach_type_name[attach_type]);
+ else
+ jsonw_uint_field(json_wtr, "attach_type", attach_type);
jsonw_string_field(json_wtr, "attach_flags",
attach_flags_str);
jsonw_string_field(json_wtr, "name", info.name);
jsonw_end_object(json_wtr);
} else {
- printf("%s%-8u %-15s %-15s %-15s\n", level ? " " : "",
- info.id,
- attach_type_str,
- attach_flags_str,
- info.name);
+ printf("%s%-8u ", level ? " " : "", info.id);
+ if (attach_type < ARRAY_SIZE(attach_type_name))
+ printf("%-15s", attach_type_name[attach_type]);
+ else
+ printf("type %-10u", attach_type);
+ printf(" %-15s %-15s\n", attach_flags_str, info.name);
}
close(prog_fd);
@@ -171,7 +154,7 @@ static int show_attached_bpf_progs(int cgroup_fd, enum bpf_attach_type type,
}
for (iter = 0; iter < prog_cnt; iter++)
- show_bpf_prog(prog_ids[iter], attach_type_strings[type],
+ show_bpf_prog(prog_ids[iter], type,
attach_flags_str, level);
return 0;
@@ -508,20 +491,18 @@ static int do_help(int argc, char **argv)
}
fprintf(stderr,
- "Usage: %s %s { show | list } CGROUP [**effective**]\n"
- " %s %s tree [CGROUP_ROOT] [**effective**]\n"
- " %s %s attach CGROUP ATTACH_TYPE PROG [ATTACH_FLAGS]\n"
- " %s %s detach CGROUP ATTACH_TYPE PROG\n"
- " %s %s help\n"
+ "Usage: %1$s %2$s { show | list } CGROUP [**effective**]\n"
+ " %1$s %2$s tree [CGROUP_ROOT] [**effective**]\n"
+ " %1$s %2$s attach CGROUP ATTACH_TYPE PROG [ATTACH_FLAGS]\n"
+ " %1$s %2$s detach CGROUP ATTACH_TYPE PROG\n"
+ " %1$s %2$s help\n"
"\n"
HELP_SPEC_ATTACH_TYPES "\n"
" " HELP_SPEC_ATTACH_FLAGS "\n"
" " HELP_SPEC_PROGRAM "\n"
" " HELP_SPEC_OPTIONS "\n"
"",
- bin_name, argv[-2],
- bin_name, argv[-2], bin_name, argv[-2],
- bin_name, argv[-2], bin_name, argv[-2]);
+ bin_name, argv[-2]);
return 0;
}
diff --git a/tools/bpf/bpftool/common.c b/tools/bpf/bpftool/common.c
index b75b8ec5469c..c47bdc65de8e 100644
--- a/tools/bpf/bpftool/common.c
+++ b/tools/bpf/bpftool/common.c
@@ -211,39 +211,14 @@ int do_pin_fd(int fd, const char *name)
return err;
}
-int do_pin_any(int argc, char **argv, int (*get_fd_by_id)(__u32))
+int do_pin_any(int argc, char **argv, int (*get_fd)(int *, char ***))
{
- unsigned int id;
- char *endptr;
int err;
int fd;
- if (argc < 3) {
- p_err("too few arguments, id ID and FILE path is required");
- return -1;
- } else if (argc > 3) {
- p_err("too many arguments");
- return -1;
- }
-
- if (!is_prefix(*argv, "id")) {
- p_err("expected 'id' got %s", *argv);
- return -1;
- }
- NEXT_ARG();
-
- id = strtoul(*argv, &endptr, 0);
- if (*endptr) {
- p_err("can't parse %s as ID", *argv);
- return -1;
- }
- NEXT_ARG();
-
- fd = get_fd_by_id(id);
- if (fd < 0) {
- p_err("can't open object by id (%u): %s", id, strerror(errno));
- return -1;
- }
+ fd = get_fd(&argc, &argv);
+ if (fd < 0)
+ return fd;
err = do_pin_fd(fd, *argv);
@@ -287,6 +262,8 @@ int get_fd_type(int fd)
return BPF_OBJ_MAP;
else if (strstr(buf, "bpf-prog"))
return BPF_OBJ_PROG;
+ else if (strstr(buf, "bpf-link"))
+ return BPF_OBJ_LINK;
return BPF_OBJ_UNKNOWN;
}
@@ -597,3 +574,10 @@ int parse_u32_arg(int *argc, char ***argv, __u32 *val, const char *what)
return 0;
}
+
+int __printf(2, 0)
+print_all_levels(__maybe_unused enum libbpf_print_level level,
+ const char *format, va_list args)
+{
+ return vfprintf(stderr, format, args);
+}
diff --git a/tools/bpf/bpftool/feature.c b/tools/bpf/bpftool/feature.c
index 941873d778d8..768bf77df886 100644
--- a/tools/bpf/bpftool/feature.c
+++ b/tools/bpf/bpftool/feature.c
@@ -6,6 +6,9 @@
#include <string.h>
#include <unistd.h>
#include <net/if.h>
+#ifdef USE_LIBCAP
+#include <sys/capability.h>
+#endif
#include <sys/utsname.h>
#include <sys/vfs.h>
@@ -35,6 +38,11 @@ static const char * const helper_name[] = {
#undef BPF_HELPER_MAKE_ENTRY
+static bool full_mode;
+#ifdef USE_LIBCAP
+static bool run_as_unprivileged;
+#endif
+
/* Miscellaneous utility functions */
static bool check_procfs(void)
@@ -72,13 +80,12 @@ print_bool_feature(const char *feat_name, const char *plain_name,
printf("%s is %savailable\n", plain_name, res ? "" : "NOT ");
}
-static void print_kernel_option(const char *name, const char *value)
+static void print_kernel_option(const char *name, const char *value,
+ const char *define_prefix)
{
char *endptr;
int res;
- /* No support for C-style ouptut */
-
if (json_output) {
if (!value) {
jsonw_null_field(json_wtr, name);
@@ -90,6 +97,12 @@ static void print_kernel_option(const char *name, const char *value)
jsonw_int_field(json_wtr, name, res);
else
jsonw_string_field(json_wtr, name, value);
+ } else if (define_prefix) {
+ if (value)
+ printf("#define %s%s %s\n", define_prefix,
+ name, value);
+ else
+ printf("/* %s%s is not set */\n", define_prefix, name);
} else {
if (value)
printf("%s is set to %s\n", name, value);
@@ -112,18 +125,12 @@ print_start_section(const char *json_title, const char *plain_title,
}
}
-static void
-print_end_then_start_section(const char *json_title, const char *plain_title,
- const char *define_comment,
- const char *define_prefix)
+static void print_end_section(void)
{
if (json_output)
jsonw_end_object(json_wtr);
else
printf("\n");
-
- print_start_section(json_title, plain_title, define_comment,
- define_prefix);
}
/* Probing functions */
@@ -313,77 +320,84 @@ static bool read_next_kernel_config_option(gzFile file, char *buf, size_t n,
return false;
}
-static void probe_kernel_image_config(void)
+static void probe_kernel_image_config(const char *define_prefix)
{
- static const char * const options[] = {
+ static const struct {
+ const char * const name;
+ bool macro_dump;
+ } options[] = {
/* Enable BPF */
- "CONFIG_BPF",
+ { "CONFIG_BPF", },
/* Enable bpf() syscall */
- "CONFIG_BPF_SYSCALL",
+ { "CONFIG_BPF_SYSCALL", },
/* Does selected architecture support eBPF JIT compiler */
- "CONFIG_HAVE_EBPF_JIT",
+ { "CONFIG_HAVE_EBPF_JIT", },
/* Compile eBPF JIT compiler */
- "CONFIG_BPF_JIT",
+ { "CONFIG_BPF_JIT", },
/* Avoid compiling eBPF interpreter (use JIT only) */
- "CONFIG_BPF_JIT_ALWAYS_ON",
+ { "CONFIG_BPF_JIT_ALWAYS_ON", },
/* cgroups */
- "CONFIG_CGROUPS",
+ { "CONFIG_CGROUPS", },
/* BPF programs attached to cgroups */
- "CONFIG_CGROUP_BPF",
+ { "CONFIG_CGROUP_BPF", },
/* bpf_get_cgroup_classid() helper */
- "CONFIG_CGROUP_NET_CLASSID",
+ { "CONFIG_CGROUP_NET_CLASSID", },
/* bpf_skb_{,ancestor_}cgroup_id() helpers */
- "CONFIG_SOCK_CGROUP_DATA",
+ { "CONFIG_SOCK_CGROUP_DATA", },
/* Tracing: attach BPF to kprobes, tracepoints, etc. */
- "CONFIG_BPF_EVENTS",
+ { "CONFIG_BPF_EVENTS", },
/* Kprobes */
- "CONFIG_KPROBE_EVENTS",
+ { "CONFIG_KPROBE_EVENTS", },
/* Uprobes */
- "CONFIG_UPROBE_EVENTS",
+ { "CONFIG_UPROBE_EVENTS", },
/* Tracepoints */
- "CONFIG_TRACING",
+ { "CONFIG_TRACING", },
/* Syscall tracepoints */
- "CONFIG_FTRACE_SYSCALLS",
+ { "CONFIG_FTRACE_SYSCALLS", },
/* bpf_override_return() helper support for selected arch */
- "CONFIG_FUNCTION_ERROR_INJECTION",
+ { "CONFIG_FUNCTION_ERROR_INJECTION", },
/* bpf_override_return() helper */
- "CONFIG_BPF_KPROBE_OVERRIDE",
+ { "CONFIG_BPF_KPROBE_OVERRIDE", },
/* Network */
- "CONFIG_NET",
+ { "CONFIG_NET", },
/* AF_XDP sockets */
- "CONFIG_XDP_SOCKETS",
+ { "CONFIG_XDP_SOCKETS", },
/* BPF_PROG_TYPE_LWT_* and related helpers */
- "CONFIG_LWTUNNEL_BPF",
+ { "CONFIG_LWTUNNEL_BPF", },
/* BPF_PROG_TYPE_SCHED_ACT, TC (traffic control) actions */
- "CONFIG_NET_ACT_BPF",
+ { "CONFIG_NET_ACT_BPF", },
/* BPF_PROG_TYPE_SCHED_CLS, TC filters */
- "CONFIG_NET_CLS_BPF",
+ { "CONFIG_NET_CLS_BPF", },
/* TC clsact qdisc */
- "CONFIG_NET_CLS_ACT",
+ { "CONFIG_NET_CLS_ACT", },
/* Ingress filtering with TC */
- "CONFIG_NET_SCH_INGRESS",
+ { "CONFIG_NET_SCH_INGRESS", },
/* bpf_skb_get_xfrm_state() helper */
- "CONFIG_XFRM",
+ { "CONFIG_XFRM", },
/* bpf_get_route_realm() helper */
- "CONFIG_IP_ROUTE_CLASSID",
+ { "CONFIG_IP_ROUTE_CLASSID", },
/* BPF_PROG_TYPE_LWT_SEG6_LOCAL and related helpers */
- "CONFIG_IPV6_SEG6_BPF",
+ { "CONFIG_IPV6_SEG6_BPF", },
/* BPF_PROG_TYPE_LIRC_MODE2 and related helpers */
- "CONFIG_BPF_LIRC_MODE2",
+ { "CONFIG_BPF_LIRC_MODE2", },
/* BPF stream parser and BPF socket maps */
- "CONFIG_BPF_STREAM_PARSER",
+ { "CONFIG_BPF_STREAM_PARSER", },
/* xt_bpf module for passing BPF programs to netfilter */
- "CONFIG_NETFILTER_XT_MATCH_BPF",
+ { "CONFIG_NETFILTER_XT_MATCH_BPF", },
/* bpfilter back-end for iptables */
- "CONFIG_BPFILTER",
+ { "CONFIG_BPFILTER", },
/* bpftilter module with "user mode helper" */
- "CONFIG_BPFILTER_UMH",
+ { "CONFIG_BPFILTER_UMH", },
/* test_bpf module for BPF tests */
- "CONFIG_TEST_BPF",
+ { "CONFIG_TEST_BPF", },
+
+ /* Misc configs useful in BPF C programs */
+ /* jiffies <-> sec conversion for bpf_jiffies64() helper */
+ { "CONFIG_HZ", true, }
};
char *values[ARRAY_SIZE(options)] = { };
struct utsname utsn;
@@ -425,7 +439,8 @@ static void probe_kernel_image_config(void)
while (read_next_kernel_config_option(file, buf, sizeof(buf), &value)) {
for (i = 0; i < ARRAY_SIZE(options); i++) {
- if (values[i] || strcmp(buf, options[i]))
+ if ((define_prefix && !options[i].macro_dump) ||
+ values[i] || strcmp(buf, options[i].name))
continue;
values[i] = strdup(value);
@@ -437,7 +452,9 @@ end_parse:
gzclose(file);
for (i = 0; i < ARRAY_SIZE(options); i++) {
- print_kernel_option(options[i], values[i]);
+ if (define_prefix && !options[i].macro_dump)
+ continue;
+ print_kernel_option(options[i].name, values[i], define_prefix);
free(values[i]);
}
}
@@ -477,6 +494,13 @@ probe_prog_type(enum bpf_prog_type prog_type, bool *supported_types,
}
res = bpf_probe_prog_type(prog_type, ifindex);
+#ifdef USE_LIBCAP
+ /* Probe may succeed even if program load fails, for unprivileged users
+ * check that we did not fail because of insufficient permissions
+ */
+ if (run_as_unprivileged && errno == EPERM)
+ res = false;
+#endif
supported_types[prog_type] |= res;
@@ -505,6 +529,10 @@ probe_map_type(enum bpf_map_type map_type, const char *define_prefix,
res = bpf_probe_map_type(map_type, ifindex);
+ /* Probe result depends on the success of map creation, no additional
+ * check required for unprivileged users
+ */
+
maxlen = sizeof(plain_desc) - strlen(plain_comment) - 1;
if (strlen(map_type_name[map_type]) > maxlen) {
p_info("map type name too long");
@@ -520,13 +548,44 @@ probe_map_type(enum bpf_map_type map_type, const char *define_prefix,
}
static void
+probe_helper_for_progtype(enum bpf_prog_type prog_type, bool supported_type,
+ const char *define_prefix, unsigned int id,
+ const char *ptype_name, __u32 ifindex)
+{
+ bool res = false;
+
+ if (supported_type) {
+ res = bpf_probe_helper(id, prog_type, ifindex);
+#ifdef USE_LIBCAP
+ /* Probe may succeed even if program load fails, for
+ * unprivileged users check that we did not fail because of
+ * insufficient permissions
+ */
+ if (run_as_unprivileged && errno == EPERM)
+ res = false;
+#endif
+ }
+
+ if (json_output) {
+ if (res)
+ jsonw_string(json_wtr, helper_name[id]);
+ } else if (define_prefix) {
+ printf("#define %sBPF__PROG_TYPE_%s__HELPER_%s %s\n",
+ define_prefix, ptype_name, helper_name[id],
+ res ? "1" : "0");
+ } else {
+ if (res)
+ printf("\n\t- %s", helper_name[id]);
+ }
+}
+
+static void
probe_helpers_for_progtype(enum bpf_prog_type prog_type, bool supported_type,
const char *define_prefix, __u32 ifindex)
{
const char *ptype_name = prog_type_name[prog_type];
char feat_name[128];
unsigned int id;
- bool res;
if (ifindex)
/* Only test helpers for offload-able program types */
@@ -548,21 +607,19 @@ probe_helpers_for_progtype(enum bpf_prog_type prog_type, bool supported_type,
}
for (id = 1; id < ARRAY_SIZE(helper_name); id++) {
- if (!supported_type)
- res = false;
- else
- res = bpf_probe_helper(id, prog_type, ifindex);
-
- if (json_output) {
- if (res)
- jsonw_string(json_wtr, helper_name[id]);
- } else if (define_prefix) {
- printf("#define %sBPF__PROG_TYPE_%s__HELPER_%s %s\n",
- define_prefix, ptype_name, helper_name[id],
- res ? "1" : "0");
- } else {
- if (res)
- printf("\n\t- %s", helper_name[id]);
+ /* Skip helper functions which emit dmesg messages when not in
+ * the full mode.
+ */
+ switch (id) {
+ case BPF_FUNC_trace_printk:
+ case BPF_FUNC_probe_write_user:
+ if (!full_mode)
+ continue;
+ /* fallthrough */
+ default:
+ probe_helper_for_progtype(prog_type, supported_type,
+ define_prefix, id, ptype_name,
+ ifindex);
}
}
@@ -584,23 +641,250 @@ probe_large_insn_limit(const char *define_prefix, __u32 ifindex)
res, define_prefix);
}
-static int do_probe(int argc, char **argv)
+static void
+section_system_config(enum probe_component target, const char *define_prefix)
+{
+ switch (target) {
+ case COMPONENT_KERNEL:
+ case COMPONENT_UNSPEC:
+ print_start_section("system_config",
+ "Scanning system configuration...",
+ "/*** Misc kernel config items ***/",
+ define_prefix);
+ if (!define_prefix) {
+ if (check_procfs()) {
+ probe_unprivileged_disabled();
+ probe_jit_enable();
+ probe_jit_harden();
+ probe_jit_kallsyms();
+ probe_jit_limit();
+ } else {
+ p_info("/* procfs not mounted, skipping related probes */");
+ }
+ }
+ probe_kernel_image_config(define_prefix);
+ print_end_section();
+ break;
+ default:
+ break;
+ }
+}
+
+static bool section_syscall_config(const char *define_prefix)
+{
+ bool res;
+
+ print_start_section("syscall_config",
+ "Scanning system call availability...",
+ "/*** System call availability ***/",
+ define_prefix);
+ res = probe_bpf_syscall(define_prefix);
+ print_end_section();
+
+ return res;
+}
+
+static void
+section_program_types(bool *supported_types, const char *define_prefix,
+ __u32 ifindex)
{
- enum probe_component target = COMPONENT_UNSPEC;
- const char *define_prefix = NULL;
- bool supported_types[128] = {};
- __u32 ifindex = 0;
unsigned int i;
- char *ifname;
- /* Detection assumes user has sufficient privileges (CAP_SYS_ADMIN).
- * Let's approximate, and restrict usage to root user only.
+ print_start_section("program_types",
+ "Scanning eBPF program types...",
+ "/*** eBPF program types ***/",
+ define_prefix);
+
+ for (i = BPF_PROG_TYPE_UNSPEC + 1; i < ARRAY_SIZE(prog_type_name); i++)
+ probe_prog_type(i, supported_types, define_prefix, ifindex);
+
+ print_end_section();
+}
+
+static void section_map_types(const char *define_prefix, __u32 ifindex)
+{
+ unsigned int i;
+
+ print_start_section("map_types",
+ "Scanning eBPF map types...",
+ "/*** eBPF map types ***/",
+ define_prefix);
+
+ for (i = BPF_MAP_TYPE_UNSPEC + 1; i < map_type_name_size; i++)
+ probe_map_type(i, define_prefix, ifindex);
+
+ print_end_section();
+}
+
+static void
+section_helpers(bool *supported_types, const char *define_prefix, __u32 ifindex)
+{
+ unsigned int i;
+
+ print_start_section("helpers",
+ "Scanning eBPF helper functions...",
+ "/*** eBPF helper functions ***/",
+ define_prefix);
+
+ if (define_prefix)
+ printf("/*\n"
+ " * Use %sHAVE_PROG_TYPE_HELPER(prog_type_name, helper_name)\n"
+ " * to determine if <helper_name> is available for <prog_type_name>,\n"
+ " * e.g.\n"
+ " * #if %sHAVE_PROG_TYPE_HELPER(xdp, bpf_redirect)\n"
+ " * // do stuff with this helper\n"
+ " * #elif\n"
+ " * // use a workaround\n"
+ " * #endif\n"
+ " */\n"
+ "#define %sHAVE_PROG_TYPE_HELPER(prog_type, helper) \\\n"
+ " %sBPF__PROG_TYPE_ ## prog_type ## __HELPER_ ## helper\n",
+ define_prefix, define_prefix, define_prefix,
+ define_prefix);
+ for (i = BPF_PROG_TYPE_UNSPEC + 1; i < ARRAY_SIZE(prog_type_name); i++)
+ probe_helpers_for_progtype(i, supported_types[i], define_prefix,
+ ifindex);
+
+ print_end_section();
+}
+
+static void section_misc(const char *define_prefix, __u32 ifindex)
+{
+ print_start_section("misc",
+ "Scanning miscellaneous eBPF features...",
+ "/*** eBPF misc features ***/",
+ define_prefix);
+ probe_large_insn_limit(define_prefix, ifindex);
+ print_end_section();
+}
+
+#ifdef USE_LIBCAP
+#define capability(c) { c, false, #c }
+#define capability_msg(a, i) a[i].set ? "" : a[i].name, a[i].set ? "" : ", "
+#endif
+
+static int handle_perms(void)
+{
+#ifdef USE_LIBCAP
+ struct {
+ cap_value_t cap;
+ bool set;
+ char name[14]; /* strlen("CAP_SYS_ADMIN") */
+ } bpf_caps[] = {
+ capability(CAP_SYS_ADMIN),
+#ifdef CAP_BPF
+ capability(CAP_BPF),
+ capability(CAP_NET_ADMIN),
+ capability(CAP_PERFMON),
+#endif
+ };
+ cap_value_t cap_list[ARRAY_SIZE(bpf_caps)];
+ unsigned int i, nb_bpf_caps = 0;
+ bool cap_sys_admin_only = true;
+ cap_flag_value_t val;
+ int res = -1;
+ cap_t caps;
+
+ caps = cap_get_proc();
+ if (!caps) {
+ p_err("failed to get capabilities for process: %s",
+ strerror(errno));
+ return -1;
+ }
+
+#ifdef CAP_BPF
+ if (CAP_IS_SUPPORTED(CAP_BPF))
+ cap_sys_admin_only = false;
+#endif
+
+ for (i = 0; i < ARRAY_SIZE(bpf_caps); i++) {
+ const char *cap_name = bpf_caps[i].name;
+ cap_value_t cap = bpf_caps[i].cap;
+
+ if (cap_get_flag(caps, cap, CAP_EFFECTIVE, &val)) {
+ p_err("bug: failed to retrieve %s status: %s", cap_name,
+ strerror(errno));
+ goto exit_free;
+ }
+
+ if (val == CAP_SET) {
+ bpf_caps[i].set = true;
+ cap_list[nb_bpf_caps++] = cap;
+ }
+
+ if (cap_sys_admin_only)
+ /* System does not know about CAP_BPF, meaning that
+ * CAP_SYS_ADMIN is the only capability required. We
+ * just checked it, break.
+ */
+ break;
+ }
+
+ if ((run_as_unprivileged && !nb_bpf_caps) ||
+ (!run_as_unprivileged && nb_bpf_caps == ARRAY_SIZE(bpf_caps)) ||
+ (!run_as_unprivileged && cap_sys_admin_only && nb_bpf_caps)) {
+ /* We are all good, exit now */
+ res = 0;
+ goto exit_free;
+ }
+
+ if (!run_as_unprivileged) {
+ if (cap_sys_admin_only)
+ p_err("missing %s, required for full feature probing; run as root or use 'unprivileged'",
+ bpf_caps[0].name);
+ else
+ p_err("missing %s%s%s%s%s%s%s%srequired for full feature probing; run as root or use 'unprivileged'",
+ capability_msg(bpf_caps, 0),
+ capability_msg(bpf_caps, 1),
+ capability_msg(bpf_caps, 2),
+ capability_msg(bpf_caps, 3));
+ goto exit_free;
+ }
+
+ /* if (run_as_unprivileged && nb_bpf_caps > 0), drop capabilities. */
+ if (cap_set_flag(caps, CAP_EFFECTIVE, nb_bpf_caps, cap_list,
+ CAP_CLEAR)) {
+ p_err("bug: failed to clear capabilities: %s", strerror(errno));
+ goto exit_free;
+ }
+
+ if (cap_set_proc(caps)) {
+ p_err("failed to drop capabilities: %s", strerror(errno));
+ goto exit_free;
+ }
+
+ res = 0;
+
+exit_free:
+ if (cap_free(caps) && !res) {
+ p_err("failed to clear storage object for capabilities: %s",
+ strerror(errno));
+ res = -1;
+ }
+
+ return res;
+#else
+ /* Detection assumes user has specific privileges.
+ * We do not use libpcap so let's approximate, and restrict usage to
+ * root user only.
*/
if (geteuid()) {
- p_err("please run this command as root user");
+ p_err("full feature probing requires root privileges");
return -1;
}
+ return 0;
+#endif /* USE_LIBCAP */
+}
+
+static int do_probe(int argc, char **argv)
+{
+ enum probe_component target = COMPONENT_UNSPEC;
+ const char *define_prefix = NULL;
+ bool supported_types[128] = {};
+ __u32 ifindex = 0;
+ char *ifname;
+
set_max_rlimit();
while (argc) {
@@ -629,6 +913,9 @@ static int do_probe(int argc, char **argv)
strerror(errno));
return -1;
}
+ } else if (is_prefix(*argv, "full")) {
+ full_mode = true;
+ NEXT_ARG();
} else if (is_prefix(*argv, "macros") && !define_prefix) {
define_prefix = "";
NEXT_ARG();
@@ -646,6 +933,14 @@ static int do_probe(int argc, char **argv)
if (!REQ_ARGS(1))
return -1;
define_prefix = GET_ARG();
+ } else if (is_prefix(*argv, "unprivileged")) {
+#ifdef USE_LIBCAP
+ run_as_unprivileged = true;
+ NEXT_ARG();
+#else
+ p_err("unprivileged run not supported, recompile bpftool with libcap");
+ return -1;
+#endif
} else {
p_err("expected no more arguments, 'kernel', 'dev', 'macros' or 'prefix', got: '%s'?",
*argv);
@@ -653,102 +948,30 @@ static int do_probe(int argc, char **argv)
}
}
+ /* Full feature detection requires specific privileges.
+ * Let's approximate, and warn if user is not root.
+ */
+ if (handle_perms())
+ return -1;
+
if (json_output) {
define_prefix = NULL;
jsonw_start_object(json_wtr);
}
- switch (target) {
- case COMPONENT_KERNEL:
- case COMPONENT_UNSPEC:
- if (define_prefix)
- break;
-
- print_start_section("system_config",
- "Scanning system configuration...",
- NULL, /* define_comment never used here */
- NULL); /* define_prefix always NULL here */
- if (check_procfs()) {
- probe_unprivileged_disabled();
- probe_jit_enable();
- probe_jit_harden();
- probe_jit_kallsyms();
- probe_jit_limit();
- } else {
- p_info("/* procfs not mounted, skipping related probes */");
- }
- probe_kernel_image_config();
- if (json_output)
- jsonw_end_object(json_wtr);
- else
- printf("\n");
- break;
- default:
- break;
- }
-
- print_start_section("syscall_config",
- "Scanning system call availability...",
- "/*** System call availability ***/",
- define_prefix);
-
- if (!probe_bpf_syscall(define_prefix))
+ section_system_config(target, define_prefix);
+ if (!section_syscall_config(define_prefix))
/* bpf() syscall unavailable, don't probe other BPF features */
goto exit_close_json;
-
- print_end_then_start_section("program_types",
- "Scanning eBPF program types...",
- "/*** eBPF program types ***/",
- define_prefix);
-
- for (i = BPF_PROG_TYPE_UNSPEC + 1; i < ARRAY_SIZE(prog_type_name); i++)
- probe_prog_type(i, supported_types, define_prefix, ifindex);
-
- print_end_then_start_section("map_types",
- "Scanning eBPF map types...",
- "/*** eBPF map types ***/",
- define_prefix);
-
- for (i = BPF_MAP_TYPE_UNSPEC + 1; i < map_type_name_size; i++)
- probe_map_type(i, define_prefix, ifindex);
-
- print_end_then_start_section("helpers",
- "Scanning eBPF helper functions...",
- "/*** eBPF helper functions ***/",
- define_prefix);
-
- if (define_prefix)
- printf("/*\n"
- " * Use %sHAVE_PROG_TYPE_HELPER(prog_type_name, helper_name)\n"
- " * to determine if <helper_name> is available for <prog_type_name>,\n"
- " * e.g.\n"
- " * #if %sHAVE_PROG_TYPE_HELPER(xdp, bpf_redirect)\n"
- " * // do stuff with this helper\n"
- " * #elif\n"
- " * // use a workaround\n"
- " * #endif\n"
- " */\n"
- "#define %sHAVE_PROG_TYPE_HELPER(prog_type, helper) \\\n"
- " %sBPF__PROG_TYPE_ ## prog_type ## __HELPER_ ## helper\n",
- define_prefix, define_prefix, define_prefix,
- define_prefix);
- for (i = BPF_PROG_TYPE_UNSPEC + 1; i < ARRAY_SIZE(prog_type_name); i++)
- probe_helpers_for_progtype(i, supported_types[i],
- define_prefix, ifindex);
-
- print_end_then_start_section("misc",
- "Scanning miscellaneous eBPF features...",
- "/*** eBPF misc features ***/",
- define_prefix);
- probe_large_insn_limit(define_prefix, ifindex);
+ section_program_types(supported_types, define_prefix, ifindex);
+ section_map_types(define_prefix, ifindex);
+ section_helpers(supported_types, define_prefix, ifindex);
+ section_misc(define_prefix, ifindex);
exit_close_json:
- if (json_output) {
- /* End current "section" of probes */
- jsonw_end_object(json_wtr);
+ if (json_output)
/* End root object */
jsonw_end_object(json_wtr);
- }
return 0;
}
@@ -761,12 +984,12 @@ static int do_help(int argc, char **argv)
}
fprintf(stderr,
- "Usage: %s %s probe [COMPONENT] [macros [prefix PREFIX]]\n"
- " %s %s help\n"
+ "Usage: %1$s %2$s probe [COMPONENT] [full] [unprivileged] [macros [prefix PREFIX]]\n"
+ " %1$s %2$s help\n"
"\n"
" COMPONENT := { kernel | dev NAME }\n"
"",
- bin_name, argv[-2], bin_name, argv[-2]);
+ bin_name, argv[-2]);
return 0;
}
diff --git a/tools/bpf/bpftool/gen.c b/tools/bpf/bpftool/gen.c
index f8113b3646f5..a3c4bb86c05a 100644
--- a/tools/bpf/bpftool/gen.c
+++ b/tools/bpf/bpftool/gen.c
@@ -17,7 +17,6 @@
#include <sys/types.h>
#include <sys/stat.h>
#include <sys/mman.h>
-#include <unistd.h>
#include <bpf/btf.h>
#include "bpf/libbpf_internal.h"
@@ -587,12 +586,12 @@ static int do_help(int argc, char **argv)
}
fprintf(stderr,
- "Usage: %1$s gen skeleton FILE\n"
- " %1$s gen help\n"
+ "Usage: %1$s %2$s skeleton FILE\n"
+ " %1$s %2$s help\n"
"\n"
" " HELP_SPEC_OPTIONS "\n"
"",
- bin_name);
+ bin_name, "gen");
return 0;
}
diff --git a/tools/bpf/bpftool/iter.c b/tools/bpf/bpftool/iter.c
new file mode 100644
index 000000000000..33240fcc6319
--- /dev/null
+++ b/tools/bpf/bpftool/iter.c
@@ -0,0 +1,88 @@
+// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+// Copyright (C) 2020 Facebook
+
+#define _GNU_SOURCE
+#include <linux/err.h>
+#include <bpf/libbpf.h>
+
+#include "main.h"
+
+static int do_pin(int argc, char **argv)
+{
+ const char *objfile, *path;
+ struct bpf_program *prog;
+ struct bpf_object *obj;
+ struct bpf_link *link;
+ int err;
+
+ if (!REQ_ARGS(2))
+ usage();
+
+ objfile = GET_ARG();
+ path = GET_ARG();
+
+ obj = bpf_object__open(objfile);
+ if (IS_ERR(obj)) {
+ p_err("can't open objfile %s", objfile);
+ return -1;
+ }
+
+ err = bpf_object__load(obj);
+ if (err) {
+ p_err("can't load objfile %s", objfile);
+ goto close_obj;
+ }
+
+ prog = bpf_program__next(NULL, obj);
+ if (!prog) {
+ p_err("can't find bpf program in objfile %s", objfile);
+ goto close_obj;
+ }
+
+ link = bpf_program__attach_iter(prog, NULL);
+ if (IS_ERR(link)) {
+ err = PTR_ERR(link);
+ p_err("attach_iter failed for program %s",
+ bpf_program__name(prog));
+ goto close_obj;
+ }
+
+ err = mount_bpffs_for_pin(path);
+ if (err)
+ goto close_link;
+
+ err = bpf_link__pin(link, path);
+ if (err) {
+ p_err("pin_iter failed for program %s to path %s",
+ bpf_program__name(prog), path);
+ goto close_link;
+ }
+
+close_link:
+ bpf_link__destroy(link);
+close_obj:
+ bpf_object__close(obj);
+ return err;
+}
+
+static int do_help(int argc, char **argv)
+{
+ fprintf(stderr,
+ "Usage: %1$s %2$s pin OBJ PATH\n"
+ " %1$s %2$s help\n"
+ "",
+ bin_name, "iter");
+
+ return 0;
+}
+
+static const struct cmd cmds[] = {
+ { "help", do_help },
+ { "pin", do_pin },
+ { 0 }
+};
+
+int do_iter(int argc, char **argv)
+{
+ return cmd_select(cmds, argc, argv, do_help);
+}
diff --git a/tools/bpf/bpftool/jit_disasm.c b/tools/bpf/bpftool/jit_disasm.c
index f7f5885aa3ba..e7e7eee9f172 100644
--- a/tools/bpf/bpftool/jit_disasm.c
+++ b/tools/bpf/bpftool/jit_disasm.c
@@ -15,7 +15,6 @@
#include <stdio.h>
#include <stdarg.h>
#include <stdint.h>
-#include <stdio.h>
#include <stdlib.h>
#include <assert.h>
#include <unistd.h>
diff --git a/tools/bpf/bpftool/link.c b/tools/bpf/bpftool/link.c
new file mode 100644
index 000000000000..fca57ee8fafe
--- /dev/null
+++ b/tools/bpf/bpftool/link.c
@@ -0,0 +1,343 @@
+// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+/* Copyright (C) 2020 Facebook */
+
+#include <errno.h>
+#include <net/if.h>
+#include <stdio.h>
+#include <unistd.h>
+
+#include <bpf/bpf.h>
+
+#include "json_writer.h"
+#include "main.h"
+
+static const char * const link_type_name[] = {
+ [BPF_LINK_TYPE_UNSPEC] = "unspec",
+ [BPF_LINK_TYPE_RAW_TRACEPOINT] = "raw_tracepoint",
+ [BPF_LINK_TYPE_TRACING] = "tracing",
+ [BPF_LINK_TYPE_CGROUP] = "cgroup",
+ [BPF_LINK_TYPE_ITER] = "iter",
+ [BPF_LINK_TYPE_NETNS] = "netns",
+};
+
+static int link_parse_fd(int *argc, char ***argv)
+{
+ if (is_prefix(**argv, "id")) {
+ unsigned int id;
+ char *endptr;
+
+ NEXT_ARGP();
+
+ id = strtoul(**argv, &endptr, 0);
+ if (*endptr) {
+ p_err("can't parse %s as ID", **argv);
+ return -1;
+ }
+ NEXT_ARGP();
+
+ return bpf_link_get_fd_by_id(id);
+ } else if (is_prefix(**argv, "pinned")) {
+ char *path;
+
+ NEXT_ARGP();
+
+ path = **argv;
+ NEXT_ARGP();
+
+ return open_obj_pinned_any(path, BPF_OBJ_LINK);
+ }
+
+ p_err("expected 'id' or 'pinned', got: '%s'?", **argv);
+ return -1;
+}
+
+static void
+show_link_header_json(struct bpf_link_info *info, json_writer_t *wtr)
+{
+ jsonw_uint_field(wtr, "id", info->id);
+ if (info->type < ARRAY_SIZE(link_type_name))
+ jsonw_string_field(wtr, "type", link_type_name[info->type]);
+ else
+ jsonw_uint_field(wtr, "type", info->type);
+
+ jsonw_uint_field(json_wtr, "prog_id", info->prog_id);
+}
+
+static void show_link_attach_type_json(__u32 attach_type, json_writer_t *wtr)
+{
+ if (attach_type < ARRAY_SIZE(attach_type_name))
+ jsonw_string_field(wtr, "attach_type",
+ attach_type_name[attach_type]);
+ else
+ jsonw_uint_field(wtr, "attach_type", attach_type);
+}
+
+static int get_prog_info(int prog_id, struct bpf_prog_info *info)
+{
+ __u32 len = sizeof(*info);
+ int err, prog_fd;
+
+ prog_fd = bpf_prog_get_fd_by_id(prog_id);
+ if (prog_fd < 0)
+ return prog_fd;
+
+ memset(info, 0, sizeof(*info));
+ err = bpf_obj_get_info_by_fd(prog_fd, info, &len);
+ if (err)
+ p_err("can't get prog info: %s", strerror(errno));
+ close(prog_fd);
+ return err;
+}
+
+static int show_link_close_json(int fd, struct bpf_link_info *info)
+{
+ struct bpf_prog_info prog_info;
+ int err;
+
+ jsonw_start_object(json_wtr);
+
+ show_link_header_json(info, json_wtr);
+
+ switch (info->type) {
+ case BPF_LINK_TYPE_RAW_TRACEPOINT:
+ jsonw_string_field(json_wtr, "tp_name",
+ (const char *)info->raw_tracepoint.tp_name);
+ break;
+ case BPF_LINK_TYPE_TRACING:
+ err = get_prog_info(info->prog_id, &prog_info);
+ if (err)
+ return err;
+
+ if (prog_info.type < ARRAY_SIZE(prog_type_name))
+ jsonw_string_field(json_wtr, "prog_type",
+ prog_type_name[prog_info.type]);
+ else
+ jsonw_uint_field(json_wtr, "prog_type",
+ prog_info.type);
+
+ show_link_attach_type_json(info->tracing.attach_type,
+ json_wtr);
+ break;
+ case BPF_LINK_TYPE_CGROUP:
+ jsonw_lluint_field(json_wtr, "cgroup_id",
+ info->cgroup.cgroup_id);
+ show_link_attach_type_json(info->cgroup.attach_type, json_wtr);
+ break;
+ case BPF_LINK_TYPE_NETNS:
+ jsonw_uint_field(json_wtr, "netns_ino",
+ info->netns.netns_ino);
+ show_link_attach_type_json(info->netns.attach_type, json_wtr);
+ break;
+ default:
+ break;
+ }
+
+ if (!hash_empty(link_table.table)) {
+ struct pinned_obj *obj;
+
+ jsonw_name(json_wtr, "pinned");
+ jsonw_start_array(json_wtr);
+ hash_for_each_possible(link_table.table, obj, hash, info->id) {
+ if (obj->id == info->id)
+ jsonw_string(json_wtr, obj->path);
+ }
+ jsonw_end_array(json_wtr);
+ }
+ jsonw_end_object(json_wtr);
+
+ return 0;
+}
+
+static void show_link_header_plain(struct bpf_link_info *info)
+{
+ printf("%u: ", info->id);
+ if (info->type < ARRAY_SIZE(link_type_name))
+ printf("%s ", link_type_name[info->type]);
+ else
+ printf("type %u ", info->type);
+
+ printf("prog %u ", info->prog_id);
+}
+
+static void show_link_attach_type_plain(__u32 attach_type)
+{
+ if (attach_type < ARRAY_SIZE(attach_type_name))
+ printf("attach_type %s ", attach_type_name[attach_type]);
+ else
+ printf("attach_type %u ", attach_type);
+}
+
+static int show_link_close_plain(int fd, struct bpf_link_info *info)
+{
+ struct bpf_prog_info prog_info;
+ int err;
+
+ show_link_header_plain(info);
+
+ switch (info->type) {
+ case BPF_LINK_TYPE_RAW_TRACEPOINT:
+ printf("\n\ttp '%s' ",
+ (const char *)info->raw_tracepoint.tp_name);
+ break;
+ case BPF_LINK_TYPE_TRACING:
+ err = get_prog_info(info->prog_id, &prog_info);
+ if (err)
+ return err;
+
+ if (prog_info.type < ARRAY_SIZE(prog_type_name))
+ printf("\n\tprog_type %s ",
+ prog_type_name[prog_info.type]);
+ else
+ printf("\n\tprog_type %u ", prog_info.type);
+
+ show_link_attach_type_plain(info->tracing.attach_type);
+ break;
+ case BPF_LINK_TYPE_CGROUP:
+ printf("\n\tcgroup_id %zu ", (size_t)info->cgroup.cgroup_id);
+ show_link_attach_type_plain(info->cgroup.attach_type);
+ break;
+ case BPF_LINK_TYPE_NETNS:
+ printf("\n\tnetns_ino %u ", info->netns.netns_ino);
+ show_link_attach_type_plain(info->netns.attach_type);
+ break;
+ default:
+ break;
+ }
+
+ if (!hash_empty(link_table.table)) {
+ struct pinned_obj *obj;
+
+ hash_for_each_possible(link_table.table, obj, hash, info->id) {
+ if (obj->id == info->id)
+ printf("\n\tpinned %s", obj->path);
+ }
+ }
+
+ printf("\n");
+
+ return 0;
+}
+
+static int do_show_link(int fd)
+{
+ struct bpf_link_info info;
+ __u32 len = sizeof(info);
+ char raw_tp_name[256];
+ int err;
+
+ memset(&info, 0, sizeof(info));
+again:
+ err = bpf_obj_get_info_by_fd(fd, &info, &len);
+ if (err) {
+ p_err("can't get link info: %s",
+ strerror(errno));
+ close(fd);
+ return err;
+ }
+ if (info.type == BPF_LINK_TYPE_RAW_TRACEPOINT &&
+ !info.raw_tracepoint.tp_name) {
+ info.raw_tracepoint.tp_name = (unsigned long)&raw_tp_name;
+ info.raw_tracepoint.tp_name_len = sizeof(raw_tp_name);
+ goto again;
+ }
+
+ if (json_output)
+ show_link_close_json(fd, &info);
+ else
+ show_link_close_plain(fd, &info);
+
+ close(fd);
+ return 0;
+}
+
+static int do_show(int argc, char **argv)
+{
+ __u32 id = 0;
+ int err, fd;
+
+ if (show_pinned)
+ build_pinned_obj_table(&link_table, BPF_OBJ_LINK);
+
+ if (argc == 2) {
+ fd = link_parse_fd(&argc, &argv);
+ if (fd < 0)
+ return fd;
+ return do_show_link(fd);
+ }
+
+ if (argc)
+ return BAD_ARG();
+
+ if (json_output)
+ jsonw_start_array(json_wtr);
+ while (true) {
+ err = bpf_link_get_next_id(id, &id);
+ if (err) {
+ if (errno == ENOENT)
+ break;
+ p_err("can't get next link: %s%s", strerror(errno),
+ errno == EINVAL ? " -- kernel too old?" : "");
+ break;
+ }
+
+ fd = bpf_link_get_fd_by_id(id);
+ if (fd < 0) {
+ if (errno == ENOENT)
+ continue;
+ p_err("can't get link by id (%u): %s",
+ id, strerror(errno));
+ break;
+ }
+
+ err = do_show_link(fd);
+ if (err)
+ break;
+ }
+ if (json_output)
+ jsonw_end_array(json_wtr);
+
+ return errno == ENOENT ? 0 : -1;
+}
+
+static int do_pin(int argc, char **argv)
+{
+ int err;
+
+ err = do_pin_any(argc, argv, link_parse_fd);
+ if (!err && json_output)
+ jsonw_null(json_wtr);
+ return err;
+}
+
+static int do_help(int argc, char **argv)
+{
+ if (json_output) {
+ jsonw_null(json_wtr);
+ return 0;
+ }
+
+ fprintf(stderr,
+ "Usage: %1$s %2$s { show | list } [LINK]\n"
+ " %1$s %2$s pin LINK FILE\n"
+ " %1$s %2$s help\n"
+ "\n"
+ " " HELP_SPEC_LINK "\n"
+ " " HELP_SPEC_OPTIONS "\n"
+ "",
+ bin_name, argv[-2]);
+
+ return 0;
+}
+
+static const struct cmd cmds[] = {
+ { "show", do_show },
+ { "list", do_show },
+ { "help", do_help },
+ { "pin", do_pin },
+ { 0 }
+};
+
+int do_link(int argc, char **argv)
+{
+ return cmd_select(cmds, argc, argv, do_help);
+}
diff --git a/tools/bpf/bpftool/main.c b/tools/bpf/bpftool/main.c
index 6d41bbfc6459..46bd716a9d86 100644
--- a/tools/bpf/bpftool/main.c
+++ b/tools/bpf/bpftool/main.c
@@ -30,6 +30,7 @@ bool verifier_logs;
bool relaxed_maps;
struct pinned_obj_table prog_table;
struct pinned_obj_table map_table;
+struct pinned_obj_table link_table;
static void __noreturn clean_and_exit(int i)
{
@@ -58,7 +59,7 @@ static int do_help(int argc, char **argv)
" %s batch file FILE\n"
" %s version\n"
"\n"
- " OBJECT := { prog | map | cgroup | perf | net | feature | btf | gen }\n"
+ " OBJECT := { prog | map | link | cgroup | perf | net | feature | btf | gen | struct_ops | iter }\n"
" " HELP_SPEC_OPTIONS "\n"
"",
bin_name, bin_name, bin_name);
@@ -79,13 +80,6 @@ static int do_version(int argc, char **argv)
return 0;
}
-static int __printf(2, 0)
-print_all_levels(__maybe_unused enum libbpf_print_level level,
- const char *format, va_list args)
-{
- return vfprintf(stderr, format, args);
-}
-
int cmd_select(const struct cmd *cmds, int argc, char **argv,
int (*help)(int argc, char **argv))
{
@@ -222,12 +216,15 @@ static const struct cmd cmds[] = {
{ "batch", do_batch },
{ "prog", do_prog },
{ "map", do_map },
+ { "link", do_link },
{ "cgroup", do_cgroup },
{ "perf", do_perf },
{ "net", do_net },
{ "feature", do_feature },
{ "btf", do_btf },
{ "gen", do_gen },
+ { "struct_ops", do_struct_ops },
+ { "iter", do_iter },
{ "version", do_version },
{ 0 }
};
@@ -370,6 +367,7 @@ int main(int argc, char **argv)
hash_init(prog_table.table);
hash_init(map_table.table);
+ hash_init(link_table.table);
opterr = 0;
while ((opt = getopt_long(argc, argv, "Vhpjfmnd",
@@ -428,6 +426,7 @@ int main(int argc, char **argv)
if (show_pinned) {
delete_pinned_obj_table(&prog_table);
delete_pinned_obj_table(&map_table);
+ delete_pinned_obj_table(&link_table);
}
return ret;
diff --git a/tools/bpf/bpftool/main.h b/tools/bpf/bpftool/main.h
index 4e75b58d3989..5cdf0bc049bd 100644
--- a/tools/bpf/bpftool/main.h
+++ b/tools/bpf/bpftool/main.h
@@ -14,8 +14,13 @@
#include <linux/hashtable.h>
#include <tools/libc_compat.h>
+#include <bpf/libbpf.h>
+
#include "json_writer.h"
+/* Make sure we do not use kernel-only integer typedefs */
+#pragma GCC poison u8 u16 u32 u64 s8 s16 s32 s64
+
#define ptr_to_u64(ptr) ((__u64)(unsigned long)(ptr))
#define NEXT_ARG() ({ argc--; argv++; if (argc < 0) usage(); })
@@ -48,6 +53,8 @@
"\t {-m|--mapcompat} | {-n|--nomount} }"
#define HELP_SPEC_MAP \
"MAP := { id MAP_ID | pinned FILE | name MAP_NAME }"
+#define HELP_SPEC_LINK \
+ "LINK := { id LINK_ID | pinned FILE }"
static const char * const prog_type_name[] = {
[BPF_PROG_TYPE_UNSPEC] = "unspec",
@@ -76,6 +83,45 @@ static const char * const prog_type_name[] = {
[BPF_PROG_TYPE_CGROUP_SYSCTL] = "cgroup_sysctl",
[BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE] = "raw_tracepoint_writable",
[BPF_PROG_TYPE_CGROUP_SOCKOPT] = "cgroup_sockopt",
+ [BPF_PROG_TYPE_TRACING] = "tracing",
+ [BPF_PROG_TYPE_STRUCT_OPS] = "struct_ops",
+ [BPF_PROG_TYPE_EXT] = "ext",
+};
+
+static const char * const attach_type_name[__MAX_BPF_ATTACH_TYPE] = {
+ [BPF_CGROUP_INET_INGRESS] = "ingress",
+ [BPF_CGROUP_INET_EGRESS] = "egress",
+ [BPF_CGROUP_INET_SOCK_CREATE] = "sock_create",
+ [BPF_CGROUP_SOCK_OPS] = "sock_ops",
+ [BPF_CGROUP_DEVICE] = "device",
+ [BPF_CGROUP_INET4_BIND] = "bind4",
+ [BPF_CGROUP_INET6_BIND] = "bind6",
+ [BPF_CGROUP_INET4_CONNECT] = "connect4",
+ [BPF_CGROUP_INET6_CONNECT] = "connect6",
+ [BPF_CGROUP_INET4_POST_BIND] = "post_bind4",
+ [BPF_CGROUP_INET6_POST_BIND] = "post_bind6",
+ [BPF_CGROUP_INET4_GETPEERNAME] = "getpeername4",
+ [BPF_CGROUP_INET6_GETPEERNAME] = "getpeername6",
+ [BPF_CGROUP_INET4_GETSOCKNAME] = "getsockname4",
+ [BPF_CGROUP_INET6_GETSOCKNAME] = "getsockname6",
+ [BPF_CGROUP_UDP4_SENDMSG] = "sendmsg4",
+ [BPF_CGROUP_UDP6_SENDMSG] = "sendmsg6",
+ [BPF_CGROUP_SYSCTL] = "sysctl",
+ [BPF_CGROUP_UDP4_RECVMSG] = "recvmsg4",
+ [BPF_CGROUP_UDP6_RECVMSG] = "recvmsg6",
+ [BPF_CGROUP_GETSOCKOPT] = "getsockopt",
+ [BPF_CGROUP_SETSOCKOPT] = "setsockopt",
+
+ [BPF_SK_SKB_STREAM_PARSER] = "sk_skb_stream_parser",
+ [BPF_SK_SKB_STREAM_VERDICT] = "sk_skb_stream_verdict",
+ [BPF_SK_MSG_VERDICT] = "sk_msg_verdict",
+ [BPF_LIRC_MODE2] = "lirc_mode2",
+ [BPF_FLOW_DISSECTOR] = "flow_dissector",
+ [BPF_TRACE_RAW_TP] = "raw_tp",
+ [BPF_TRACE_FENTRY] = "fentry",
+ [BPF_TRACE_FEXIT] = "fexit",
+ [BPF_MODIFY_RETURN] = "mod_ret",
+ [BPF_LSM_MAC] = "lsm_mac",
};
extern const char * const map_type_name[];
@@ -85,6 +131,7 @@ enum bpf_obj_type {
BPF_OBJ_UNKNOWN,
BPF_OBJ_PROG,
BPF_OBJ_MAP,
+ BPF_OBJ_LINK,
};
extern const char *bin_name;
@@ -97,6 +144,7 @@ extern bool verifier_logs;
extern bool relaxed_maps;
extern struct pinned_obj_table prog_table;
extern struct pinned_obj_table map_table;
+extern struct pinned_obj_table link_table;
void __printf(1, 2) p_err(const char *fmt, ...);
void __printf(1, 2) p_info(const char *fmt, ...);
@@ -143,11 +191,12 @@ char *get_fdinfo(int fd, const char *key);
int open_obj_pinned(char *path, bool quiet);
int open_obj_pinned_any(char *path, enum bpf_obj_type exp_type);
int mount_bpffs_for_pin(const char *name);
-int do_pin_any(int argc, char **argv, int (*get_fd_by_id)(__u32));
+int do_pin_any(int argc, char **argv, int (*get_fd_by_id)(int *, char ***));
int do_pin_fd(int fd, const char *name);
int do_prog(int argc, char **arg);
int do_map(int argc, char **arg);
+int do_link(int argc, char **arg);
int do_event_pipe(int argc, char **argv);
int do_cgroup(int argc, char **arg);
int do_perf(int argc, char **arg);
@@ -156,6 +205,8 @@ int do_tracelog(int argc, char **arg);
int do_feature(int argc, char **argv);
int do_btf(int argc, char **argv);
int do_gen(int argc, char **argv);
+int do_struct_ops(int argc, char **argv);
+int do_iter(int argc, char **argv);
int parse_u32_arg(int *argc, char ***argv, __u32 *val, const char *what);
int prog_parse_fd(int *argc, char ***argv);
@@ -200,6 +251,7 @@ struct btf_dumper {
const struct btf *btf;
json_writer_t *jw;
bool is_plain_text;
+ bool prog_id_as_func_ptr;
};
/* btf_dumper_type - print data along with type information
@@ -226,4 +278,7 @@ struct tcmsg;
int do_xdp_dump(struct ifinfomsg *ifinfo, struct nlattr **tb);
int do_filter_dump(struct tcmsg *ifinfo, struct nlattr **tb, const char *kind,
const char *devname, int ifindex);
+
+int print_all_levels(__maybe_unused enum libbpf_print_level level,
+ const char *format, va_list args);
#endif
diff --git a/tools/bpf/bpftool/map.c b/tools/bpf/bpftool/map.c
index e6c85680b34d..c5fac8068ba1 100644
--- a/tools/bpf/bpftool/map.c
+++ b/tools/bpf/bpftool/map.c
@@ -1384,7 +1384,7 @@ static int do_pin(int argc, char **argv)
{
int err;
- err = do_pin_any(argc, argv, bpf_map_get_fd_by_id);
+ err = do_pin_any(argc, argv, map_parse_fd);
if (!err && json_output)
jsonw_null(json_wtr);
return err;
@@ -1561,24 +1561,24 @@ static int do_help(int argc, char **argv)
}
fprintf(stderr,
- "Usage: %s %s { show | list } [MAP]\n"
- " %s %s create FILE type TYPE key KEY_SIZE value VALUE_SIZE \\\n"
- " entries MAX_ENTRIES name NAME [flags FLAGS] \\\n"
- " [dev NAME]\n"
- " %s %s dump MAP\n"
- " %s %s update MAP [key DATA] [value VALUE] [UPDATE_FLAGS]\n"
- " %s %s lookup MAP [key DATA]\n"
- " %s %s getnext MAP [key DATA]\n"
- " %s %s delete MAP key DATA\n"
- " %s %s pin MAP FILE\n"
- " %s %s event_pipe MAP [cpu N index M]\n"
- " %s %s peek MAP\n"
- " %s %s push MAP value VALUE\n"
- " %s %s pop MAP\n"
- " %s %s enqueue MAP value VALUE\n"
- " %s %s dequeue MAP\n"
- " %s %s freeze MAP\n"
- " %s %s help\n"
+ "Usage: %1$s %2$s { show | list } [MAP]\n"
+ " %1$s %2$s create FILE type TYPE key KEY_SIZE value VALUE_SIZE \\\n"
+ " entries MAX_ENTRIES name NAME [flags FLAGS] \\\n"
+ " [dev NAME]\n"
+ " %1$s %2$s dump MAP\n"
+ " %1$s %2$s update MAP [key DATA] [value VALUE] [UPDATE_FLAGS]\n"
+ " %1$s %2$s lookup MAP [key DATA]\n"
+ " %1$s %2$s getnext MAP [key DATA]\n"
+ " %1$s %2$s delete MAP key DATA\n"
+ " %1$s %2$s pin MAP FILE\n"
+ " %1$s %2$s event_pipe MAP [cpu N index M]\n"
+ " %1$s %2$s peek MAP\n"
+ " %1$s %2$s push MAP value VALUE\n"
+ " %1$s %2$s pop MAP\n"
+ " %1$s %2$s enqueue MAP value VALUE\n"
+ " %1$s %2$s dequeue MAP\n"
+ " %1$s %2$s freeze MAP\n"
+ " %1$s %2$s help\n"
"\n"
" " HELP_SPEC_MAP "\n"
" DATA := { [hex] BYTES }\n"
@@ -1589,14 +1589,10 @@ static int do_help(int argc, char **argv)
" percpu_array | stack_trace | cgroup_array | lru_hash |\n"
" lru_percpu_hash | lpm_trie | array_of_maps | hash_of_maps |\n"
" devmap | devmap_hash | sockmap | cpumap | xskmap | sockhash |\n"
- " cgroup_storage | reuseport_sockarray | percpu_cgroup_storage }\n"
+ " cgroup_storage | reuseport_sockarray | percpu_cgroup_storage |\n"
+ " queue | stack | sk_storage | struct_ops }\n"
" " HELP_SPEC_OPTIONS "\n"
"",
- bin_name, argv[-2], bin_name, argv[-2], bin_name, argv[-2],
- bin_name, argv[-2], bin_name, argv[-2], bin_name, argv[-2],
- bin_name, argv[-2], bin_name, argv[-2], bin_name, argv[-2],
- bin_name, argv[-2], bin_name, argv[-2], bin_name, argv[-2],
- bin_name, argv[-2], bin_name, argv[-2], bin_name, argv[-2],
bin_name, argv[-2]);
return 0;
diff --git a/tools/bpf/bpftool/map_perf_ring.c b/tools/bpf/bpftool/map_perf_ring.c
index d9b29c17fbb8..825f29f93a57 100644
--- a/tools/bpf/bpftool/map_perf_ring.c
+++ b/tools/bpf/bpftool/map_perf_ring.c
@@ -39,7 +39,7 @@ struct event_ring_info {
struct perf_event_sample {
struct perf_event_header header;
- u64 time;
+ __u64 time;
__u32 size;
unsigned char data[];
};
diff --git a/tools/bpf/bpftool/net.c b/tools/bpf/bpftool/net.c
index c5e3895b7c8b..56c3a2bae3ef 100644
--- a/tools/bpf/bpftool/net.c
+++ b/tools/bpf/bpftool/net.c
@@ -458,10 +458,10 @@ static int do_help(int argc, char **argv)
}
fprintf(stderr,
- "Usage: %s %s { show | list } [dev <devname>]\n"
- " %s %s attach ATTACH_TYPE PROG dev <devname> [ overwrite ]\n"
- " %s %s detach ATTACH_TYPE dev <devname>\n"
- " %s %s help\n"
+ "Usage: %1$s %2$s { show | list } [dev <devname>]\n"
+ " %1$s %2$s attach ATTACH_TYPE PROG dev <devname> [ overwrite ]\n"
+ " %1$s %2$s detach ATTACH_TYPE dev <devname>\n"
+ " %1$s %2$s help\n"
"\n"
" " HELP_SPEC_PROGRAM "\n"
" ATTACH_TYPE := { xdp | xdpgeneric | xdpdrv | xdpoffload }\n"
@@ -470,8 +470,8 @@ static int do_help(int argc, char **argv)
" For progs attached to cgroups, use \"bpftool cgroup\"\n"
" to dump program attachments. For program types\n"
" sk_{filter,skb,msg,reuseport} and lwt/seg6, please\n"
- " consult iproute2.\n",
- bin_name, argv[-2], bin_name, argv[-2], bin_name, argv[-2],
+ " consult iproute2.\n"
+ "",
bin_name, argv[-2]);
return 0;
diff --git a/tools/bpf/bpftool/perf.c b/tools/bpf/bpftool/perf.c
index 3341aa14acda..ad23934819c7 100644
--- a/tools/bpf/bpftool/perf.c
+++ b/tools/bpf/bpftool/perf.c
@@ -231,7 +231,7 @@ static int do_show(int argc, char **argv)
static int do_help(int argc, char **argv)
{
fprintf(stderr,
- "Usage: %s %s { show | list | help }\n"
+ "Usage: %1$s %2$s { show | list | help }\n"
"",
bin_name, argv[-2]);
diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c
index b352ab041160..a5eff83496f2 100644
--- a/tools/bpf/bpftool/prog.c
+++ b/tools/bpf/bpftool/prog.c
@@ -4,6 +4,7 @@
#define _GNU_SOURCE
#include <errno.h>
#include <fcntl.h>
+#include <signal.h>
#include <stdarg.h>
#include <stdio.h>
#include <stdlib.h>
@@ -11,10 +12,13 @@
#include <time.h>
#include <unistd.h>
#include <net/if.h>
+#include <sys/ioctl.h>
#include <sys/types.h>
#include <sys/stat.h>
+#include <sys/syscall.h>
#include <linux/err.h>
+#include <linux/perf_event.h>
#include <linux/sizes.h>
#include <bpf/bpf.h>
@@ -234,7 +238,7 @@ exit_free:
return fd;
}
-static void show_prog_maps(int fd, u32 num_maps)
+static void show_prog_maps(int fd, __u32 num_maps)
{
struct bpf_prog_info info = {};
__u32 len = sizeof(info);
@@ -809,7 +813,7 @@ static int do_pin(int argc, char **argv)
{
int err;
- err = do_pin_any(argc, argv, bpf_prog_get_fd_by_id);
+ err = do_pin_any(argc, argv, prog_parse_fd);
if (!err && json_output)
jsonw_null(json_wtr);
return err;
@@ -1243,6 +1247,25 @@ free_data_in:
return err;
}
+static int
+get_prog_type_by_name(const char *name, enum bpf_prog_type *prog_type,
+ enum bpf_attach_type *expected_attach_type)
+{
+ libbpf_print_fn_t print_backup;
+ int ret;
+
+ ret = libbpf_prog_type_by_name(name, prog_type, expected_attach_type);
+ if (!ret)
+ return ret;
+
+ /* libbpf_prog_type_by_name() failed, let's re-run with debug level */
+ print_backup = libbpf_set_print(print_all_levels);
+ ret = libbpf_prog_type_by_name(name, prog_type, expected_attach_type);
+ libbpf_set_print(print_backup);
+
+ return ret;
+}
+
static int load_with_options(int argc, char **argv, bool first_prog_only)
{
enum bpf_prog_type common_prog_type = BPF_PROG_TYPE_UNSPEC;
@@ -1292,8 +1315,8 @@ static int load_with_options(int argc, char **argv, bool first_prog_only)
strcat(type, *argv);
strcat(type, "/");
- err = libbpf_prog_type_by_name(type, &common_prog_type,
- &expected_attach_type);
+ err = get_prog_type_by_name(type, &common_prog_type,
+ &expected_attach_type);
free(type);
if (err < 0)
goto err_free_reuse_maps;
@@ -1392,8 +1415,8 @@ static int load_with_options(int argc, char **argv, bool first_prog_only)
if (prog_type == BPF_PROG_TYPE_UNSPEC) {
const char *sec_name = bpf_program__title(pos, false);
- err = libbpf_prog_type_by_name(sec_name, &prog_type,
- &expected_attach_type);
+ err = get_prog_type_by_name(sec_name, &prog_type,
+ &expected_attach_type);
if (err < 0)
goto err_close_obj;
}
@@ -1537,6 +1560,422 @@ static int do_loadall(int argc, char **argv)
return load_with_options(argc, argv, false);
}
+#ifdef BPFTOOL_WITHOUT_SKELETONS
+
+static int do_profile(int argc, char **argv)
+{
+ p_err("bpftool prog profile command is not supported. Please build bpftool with clang >= 10.0.0");
+ return 0;
+}
+
+#else /* BPFTOOL_WITHOUT_SKELETONS */
+
+#include "profiler.skel.h"
+
+struct profile_metric {
+ const char *name;
+ struct bpf_perf_event_value val;
+ struct perf_event_attr attr;
+ bool selected;
+
+ /* calculate ratios like instructions per cycle */
+ const int ratio_metric; /* 0 for N/A, 1 for index 0 (cycles) */
+ const char *ratio_desc;
+ const float ratio_mul;
+} metrics[] = {
+ {
+ .name = "cycles",
+ .attr = {
+ .type = PERF_TYPE_HARDWARE,
+ .config = PERF_COUNT_HW_CPU_CYCLES,
+ .exclude_user = 1,
+ },
+ },
+ {
+ .name = "instructions",
+ .attr = {
+ .type = PERF_TYPE_HARDWARE,
+ .config = PERF_COUNT_HW_INSTRUCTIONS,
+ .exclude_user = 1,
+ },
+ .ratio_metric = 1,
+ .ratio_desc = "insns per cycle",
+ .ratio_mul = 1.0,
+ },
+ {
+ .name = "l1d_loads",
+ .attr = {
+ .type = PERF_TYPE_HW_CACHE,
+ .config =
+ PERF_COUNT_HW_CACHE_L1D |
+ (PERF_COUNT_HW_CACHE_OP_READ << 8) |
+ (PERF_COUNT_HW_CACHE_RESULT_ACCESS << 16),
+ .exclude_user = 1,
+ },
+ },
+ {
+ .name = "llc_misses",
+ .attr = {
+ .type = PERF_TYPE_HW_CACHE,
+ .config =
+ PERF_COUNT_HW_CACHE_LL |
+ (PERF_COUNT_HW_CACHE_OP_READ << 8) |
+ (PERF_COUNT_HW_CACHE_RESULT_MISS << 16),
+ .exclude_user = 1
+ },
+ .ratio_metric = 2,
+ .ratio_desc = "LLC misses per million insns",
+ .ratio_mul = 1e6,
+ },
+};
+
+static __u64 profile_total_count;
+
+#define MAX_NUM_PROFILE_METRICS 4
+
+static int profile_parse_metrics(int argc, char **argv)
+{
+ unsigned int metric_cnt;
+ int selected_cnt = 0;
+ unsigned int i;
+
+ metric_cnt = sizeof(metrics) / sizeof(struct profile_metric);
+
+ while (argc > 0) {
+ for (i = 0; i < metric_cnt; i++) {
+ if (is_prefix(argv[0], metrics[i].name)) {
+ if (!metrics[i].selected)
+ selected_cnt++;
+ metrics[i].selected = true;
+ break;
+ }
+ }
+ if (i == metric_cnt) {
+ p_err("unknown metric %s", argv[0]);
+ return -1;
+ }
+ NEXT_ARG();
+ }
+ if (selected_cnt > MAX_NUM_PROFILE_METRICS) {
+ p_err("too many (%d) metrics, please specify no more than %d metrics at at time",
+ selected_cnt, MAX_NUM_PROFILE_METRICS);
+ return -1;
+ }
+ return selected_cnt;
+}
+
+static void profile_read_values(struct profiler_bpf *obj)
+{
+ __u32 m, cpu, num_cpu = obj->rodata->num_cpu;
+ int reading_map_fd, count_map_fd;
+ __u64 counts[num_cpu];
+ __u32 key = 0;
+ int err;
+
+ reading_map_fd = bpf_map__fd(obj->maps.accum_readings);
+ count_map_fd = bpf_map__fd(obj->maps.counts);
+ if (reading_map_fd < 0 || count_map_fd < 0) {
+ p_err("failed to get fd for map");
+ return;
+ }
+
+ err = bpf_map_lookup_elem(count_map_fd, &key, counts);
+ if (err) {
+ p_err("failed to read count_map: %s", strerror(errno));
+ return;
+ }
+
+ profile_total_count = 0;
+ for (cpu = 0; cpu < num_cpu; cpu++)
+ profile_total_count += counts[cpu];
+
+ for (m = 0; m < ARRAY_SIZE(metrics); m++) {
+ struct bpf_perf_event_value values[num_cpu];
+
+ if (!metrics[m].selected)
+ continue;
+
+ err = bpf_map_lookup_elem(reading_map_fd, &key, values);
+ if (err) {
+ p_err("failed to read reading_map: %s",
+ strerror(errno));
+ return;
+ }
+ for (cpu = 0; cpu < num_cpu; cpu++) {
+ metrics[m].val.counter += values[cpu].counter;
+ metrics[m].val.enabled += values[cpu].enabled;
+ metrics[m].val.running += values[cpu].running;
+ }
+ key++;
+ }
+}
+
+static void profile_print_readings_json(void)
+{
+ __u32 m;
+
+ jsonw_start_array(json_wtr);
+ for (m = 0; m < ARRAY_SIZE(metrics); m++) {
+ if (!metrics[m].selected)
+ continue;
+ jsonw_start_object(json_wtr);
+ jsonw_string_field(json_wtr, "metric", metrics[m].name);
+ jsonw_lluint_field(json_wtr, "run_cnt", profile_total_count);
+ jsonw_lluint_field(json_wtr, "value", metrics[m].val.counter);
+ jsonw_lluint_field(json_wtr, "enabled", metrics[m].val.enabled);
+ jsonw_lluint_field(json_wtr, "running", metrics[m].val.running);
+
+ jsonw_end_object(json_wtr);
+ }
+ jsonw_end_array(json_wtr);
+}
+
+static void profile_print_readings_plain(void)
+{
+ __u32 m;
+
+ printf("\n%18llu %-20s\n", profile_total_count, "run_cnt");
+ for (m = 0; m < ARRAY_SIZE(metrics); m++) {
+ struct bpf_perf_event_value *val = &metrics[m].val;
+ int r;
+
+ if (!metrics[m].selected)
+ continue;
+ printf("%18llu %-20s", val->counter, metrics[m].name);
+
+ r = metrics[m].ratio_metric - 1;
+ if (r >= 0 && metrics[r].selected &&
+ metrics[r].val.counter > 0) {
+ printf("# %8.2f %-30s",
+ val->counter * metrics[m].ratio_mul /
+ metrics[r].val.counter,
+ metrics[m].ratio_desc);
+ } else {
+ printf("%-41s", "");
+ }
+
+ if (val->enabled > val->running)
+ printf("(%4.2f%%)",
+ val->running * 100.0 / val->enabled);
+ printf("\n");
+ }
+}
+
+static void profile_print_readings(void)
+{
+ if (json_output)
+ profile_print_readings_json();
+ else
+ profile_print_readings_plain();
+}
+
+static char *profile_target_name(int tgt_fd)
+{
+ struct bpf_prog_info_linear *info_linear;
+ struct bpf_func_info *func_info;
+ const struct btf_type *t;
+ char *name = NULL;
+ struct btf *btf;
+
+ info_linear = bpf_program__get_prog_info_linear(
+ tgt_fd, 1UL << BPF_PROG_INFO_FUNC_INFO);
+ if (IS_ERR_OR_NULL(info_linear)) {
+ p_err("failed to get info_linear for prog FD %d", tgt_fd);
+ return NULL;
+ }
+
+ if (info_linear->info.btf_id == 0 ||
+ btf__get_from_id(info_linear->info.btf_id, &btf)) {
+ p_err("prog FD %d doesn't have valid btf", tgt_fd);
+ goto out;
+ }
+
+ func_info = (struct bpf_func_info *)(info_linear->info.func_info);
+ t = btf__type_by_id(btf, func_info[0].type_id);
+ if (!t) {
+ p_err("btf %d doesn't have type %d",
+ info_linear->info.btf_id, func_info[0].type_id);
+ goto out;
+ }
+ name = strdup(btf__name_by_offset(btf, t->name_off));
+out:
+ free(info_linear);
+ return name;
+}
+
+static struct profiler_bpf *profile_obj;
+static int profile_tgt_fd = -1;
+static char *profile_tgt_name;
+static int *profile_perf_events;
+static int profile_perf_event_cnt;
+
+static void profile_close_perf_events(struct profiler_bpf *obj)
+{
+ int i;
+
+ for (i = profile_perf_event_cnt - 1; i >= 0; i--)
+ close(profile_perf_events[i]);
+
+ free(profile_perf_events);
+ profile_perf_event_cnt = 0;
+}
+
+static int profile_open_perf_events(struct profiler_bpf *obj)
+{
+ unsigned int cpu, m;
+ int map_fd, pmu_fd;
+
+ profile_perf_events = calloc(
+ sizeof(int), obj->rodata->num_cpu * obj->rodata->num_metric);
+ if (!profile_perf_events) {
+ p_err("failed to allocate memory for perf_event array: %s",
+ strerror(errno));
+ return -1;
+ }
+ map_fd = bpf_map__fd(obj->maps.events);
+ if (map_fd < 0) {
+ p_err("failed to get fd for events map");
+ return -1;
+ }
+
+ for (m = 0; m < ARRAY_SIZE(metrics); m++) {
+ if (!metrics[m].selected)
+ continue;
+ for (cpu = 0; cpu < obj->rodata->num_cpu; cpu++) {
+ pmu_fd = syscall(__NR_perf_event_open, &metrics[m].attr,
+ -1/*pid*/, cpu, -1/*group_fd*/, 0);
+ if (pmu_fd < 0 ||
+ bpf_map_update_elem(map_fd, &profile_perf_event_cnt,
+ &pmu_fd, BPF_ANY) ||
+ ioctl(pmu_fd, PERF_EVENT_IOC_ENABLE, 0)) {
+ p_err("failed to create event %s on cpu %d",
+ metrics[m].name, cpu);
+ return -1;
+ }
+ profile_perf_events[profile_perf_event_cnt++] = pmu_fd;
+ }
+ }
+ return 0;
+}
+
+static void profile_print_and_cleanup(void)
+{
+ profile_close_perf_events(profile_obj);
+ profile_read_values(profile_obj);
+ profile_print_readings();
+ profiler_bpf__destroy(profile_obj);
+
+ close(profile_tgt_fd);
+ free(profile_tgt_name);
+}
+
+static void int_exit(int signo)
+{
+ profile_print_and_cleanup();
+ exit(0);
+}
+
+static int do_profile(int argc, char **argv)
+{
+ int num_metric, num_cpu, err = -1;
+ struct bpf_program *prog;
+ unsigned long duration;
+ char *endptr;
+
+ /* we at least need two args for the prog and one metric */
+ if (!REQ_ARGS(3))
+ return -EINVAL;
+
+ /* parse target fd */
+ profile_tgt_fd = prog_parse_fd(&argc, &argv);
+ if (profile_tgt_fd < 0) {
+ p_err("failed to parse fd");
+ return -1;
+ }
+
+ /* parse profiling optional duration */
+ if (argc > 2 && is_prefix(argv[0], "duration")) {
+ NEXT_ARG();
+ duration = strtoul(*argv, &endptr, 0);
+ if (*endptr)
+ usage();
+ NEXT_ARG();
+ } else {
+ duration = UINT_MAX;
+ }
+
+ num_metric = profile_parse_metrics(argc, argv);
+ if (num_metric <= 0)
+ goto out;
+
+ num_cpu = libbpf_num_possible_cpus();
+ if (num_cpu <= 0) {
+ p_err("failed to identify number of CPUs");
+ goto out;
+ }
+
+ profile_obj = profiler_bpf__open();
+ if (!profile_obj) {
+ p_err("failed to open and/or load BPF object");
+ goto out;
+ }
+
+ profile_obj->rodata->num_cpu = num_cpu;
+ profile_obj->rodata->num_metric = num_metric;
+
+ /* adjust map sizes */
+ bpf_map__resize(profile_obj->maps.events, num_metric * num_cpu);
+ bpf_map__resize(profile_obj->maps.fentry_readings, num_metric);
+ bpf_map__resize(profile_obj->maps.accum_readings, num_metric);
+ bpf_map__resize(profile_obj->maps.counts, 1);
+
+ /* change target name */
+ profile_tgt_name = profile_target_name(profile_tgt_fd);
+ if (!profile_tgt_name)
+ goto out;
+
+ bpf_object__for_each_program(prog, profile_obj->obj) {
+ err = bpf_program__set_attach_target(prog, profile_tgt_fd,
+ profile_tgt_name);
+ if (err) {
+ p_err("failed to set attach target\n");
+ goto out;
+ }
+ }
+
+ set_max_rlimit();
+ err = profiler_bpf__load(profile_obj);
+ if (err) {
+ p_err("failed to load profile_obj");
+ goto out;
+ }
+
+ err = profile_open_perf_events(profile_obj);
+ if (err)
+ goto out;
+
+ err = profiler_bpf__attach(profile_obj);
+ if (err) {
+ p_err("failed to attach profile_obj");
+ goto out;
+ }
+ signal(SIGINT, int_exit);
+
+ sleep(duration);
+ profile_print_and_cleanup();
+ return 0;
+
+out:
+ profile_close_perf_events(profile_obj);
+ if (profile_obj)
+ profiler_bpf__destroy(profile_obj);
+ close(profile_tgt_fd);
+ free(profile_tgt_name);
+ return err;
+}
+
+#endif /* BPFTOOL_WITHOUT_SKELETONS */
+
static int do_help(int argc, char **argv)
{
if (json_output) {
@@ -1545,23 +1984,24 @@ static int do_help(int argc, char **argv)
}
fprintf(stderr,
- "Usage: %s %s { show | list } [PROG]\n"
- " %s %s dump xlated PROG [{ file FILE | opcodes | visual | linum }]\n"
- " %s %s dump jited PROG [{ file FILE | opcodes | linum }]\n"
- " %s %s pin PROG FILE\n"
- " %s %s { load | loadall } OBJ PATH \\\n"
+ "Usage: %1$s %2$s { show | list } [PROG]\n"
+ " %1$s %2$s dump xlated PROG [{ file FILE | opcodes | visual | linum }]\n"
+ " %1$s %2$s dump jited PROG [{ file FILE | opcodes | linum }]\n"
+ " %1$s %2$s pin PROG FILE\n"
+ " %1$s %2$s { load | loadall } OBJ PATH \\\n"
" [type TYPE] [dev NAME] \\\n"
" [map { idx IDX | name NAME } MAP]\\\n"
" [pinmaps MAP_DIR]\n"
- " %s %s attach PROG ATTACH_TYPE [MAP]\n"
- " %s %s detach PROG ATTACH_TYPE [MAP]\n"
- " %s %s run PROG \\\n"
+ " %1$s %2$s attach PROG ATTACH_TYPE [MAP]\n"
+ " %1$s %2$s detach PROG ATTACH_TYPE [MAP]\n"
+ " %1$s %2$s run PROG \\\n"
" data_in FILE \\\n"
" [data_out FILE [data_size_out L]] \\\n"
" [ctx_in FILE [ctx_out FILE [ctx_size_out M]]] \\\n"
" [repeat N]\n"
- " %s %s tracelog\n"
- " %s %s help\n"
+ " %1$s %2$s profile PROG [duration DURATION] METRICs\n"
+ " %1$s %2$s tracelog\n"
+ " %1$s %2$s help\n"
"\n"
" " HELP_SPEC_MAP "\n"
" " HELP_SPEC_PROGRAM "\n"
@@ -1572,16 +2012,16 @@ static int do_help(int argc, char **argv)
" sk_reuseport | flow_dissector | cgroup/sysctl |\n"
" cgroup/bind4 | cgroup/bind6 | cgroup/post_bind4 |\n"
" cgroup/post_bind6 | cgroup/connect4 | cgroup/connect6 |\n"
- " cgroup/sendmsg4 | cgroup/sendmsg6 | cgroup/recvmsg4 |\n"
- " cgroup/recvmsg6 | cgroup/getsockopt |\n"
- " cgroup/setsockopt }\n"
+ " cgroup/getpeername4 | cgroup/getpeername6 |\n"
+ " cgroup/getsockname4 | cgroup/getsockname6 | cgroup/sendmsg4 |\n"
+ " cgroup/sendmsg6 | cgroup/recvmsg4 | cgroup/recvmsg6 |\n"
+ " cgroup/getsockopt | cgroup/setsockopt |\n"
+ " struct_ops | fentry | fexit | freplace }\n"
" ATTACH_TYPE := { msg_verdict | stream_verdict | stream_parser |\n"
" flow_dissector }\n"
+ " METRIC := { cycles | instructions | l1d_loads | llc_misses }\n"
" " HELP_SPEC_OPTIONS "\n"
"",
- bin_name, argv[-2], bin_name, argv[-2], bin_name, argv[-2],
- bin_name, argv[-2], bin_name, argv[-2], bin_name, argv[-2],
- bin_name, argv[-2], bin_name, argv[-2], bin_name, argv[-2],
bin_name, argv[-2]);
return 0;
@@ -1599,6 +2039,7 @@ static const struct cmd cmds[] = {
{ "detach", do_detach },
{ "tracelog", do_tracelog },
{ "run", do_run },
+ { "profile", do_profile },
{ 0 }
};
diff --git a/tools/bpf/bpftool/skeleton/profiler.bpf.c b/tools/bpf/bpftool/skeleton/profiler.bpf.c
new file mode 100644
index 000000000000..20034c12f7c5
--- /dev/null
+++ b/tools/bpf/bpftool/skeleton/profiler.bpf.c
@@ -0,0 +1,119 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2020 Facebook
+#include "profiler.h"
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+/* map of perf event fds, num_cpu * num_metric entries */
+struct {
+ __uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY);
+ __uint(key_size, sizeof(u32));
+ __uint(value_size, sizeof(int));
+} events SEC(".maps");
+
+/* readings at fentry */
+struct {
+ __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
+ __uint(key_size, sizeof(u32));
+ __uint(value_size, sizeof(struct bpf_perf_event_value));
+} fentry_readings SEC(".maps");
+
+/* accumulated readings */
+struct {
+ __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
+ __uint(key_size, sizeof(u32));
+ __uint(value_size, sizeof(struct bpf_perf_event_value));
+} accum_readings SEC(".maps");
+
+/* sample counts, one per cpu */
+struct {
+ __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
+ __uint(key_size, sizeof(u32));
+ __uint(value_size, sizeof(u64));
+} counts SEC(".maps");
+
+const volatile __u32 num_cpu = 1;
+const volatile __u32 num_metric = 1;
+#define MAX_NUM_MATRICS 4
+
+SEC("fentry/XXX")
+int BPF_PROG(fentry_XXX)
+{
+ struct bpf_perf_event_value *ptrs[MAX_NUM_MATRICS];
+ u32 key = bpf_get_smp_processor_id();
+ u32 i;
+
+ /* look up before reading, to reduce error */
+ for (i = 0; i < num_metric && i < MAX_NUM_MATRICS; i++) {
+ u32 flag = i;
+
+ ptrs[i] = bpf_map_lookup_elem(&fentry_readings, &flag);
+ if (!ptrs[i])
+ return 0;
+ }
+
+ for (i = 0; i < num_metric && i < MAX_NUM_MATRICS; i++) {
+ struct bpf_perf_event_value reading;
+ int err;
+
+ err = bpf_perf_event_read_value(&events, key, &reading,
+ sizeof(reading));
+ if (err)
+ return 0;
+ *(ptrs[i]) = reading;
+ key += num_cpu;
+ }
+
+ return 0;
+}
+
+static inline void
+fexit_update_maps(u32 id, struct bpf_perf_event_value *after)
+{
+ struct bpf_perf_event_value *before, diff, *accum;
+
+ before = bpf_map_lookup_elem(&fentry_readings, &id);
+ /* only account samples with a valid fentry_reading */
+ if (before && before->counter) {
+ struct bpf_perf_event_value *accum;
+
+ diff.counter = after->counter - before->counter;
+ diff.enabled = after->enabled - before->enabled;
+ diff.running = after->running - before->running;
+
+ accum = bpf_map_lookup_elem(&accum_readings, &id);
+ if (accum) {
+ accum->counter += diff.counter;
+ accum->enabled += diff.enabled;
+ accum->running += diff.running;
+ }
+ }
+}
+
+SEC("fexit/XXX")
+int BPF_PROG(fexit_XXX)
+{
+ struct bpf_perf_event_value readings[MAX_NUM_MATRICS];
+ u32 cpu = bpf_get_smp_processor_id();
+ u32 i, one = 1, zero = 0;
+ int err;
+ u64 *count;
+
+ /* read all events before updating the maps, to reduce error */
+ for (i = 0; i < num_metric && i < MAX_NUM_MATRICS; i++) {
+ err = bpf_perf_event_read_value(&events, cpu + i * num_cpu,
+ readings + i, sizeof(*readings));
+ if (err)
+ return 0;
+ }
+ count = bpf_map_lookup_elem(&counts, &zero);
+ if (count) {
+ *count += 1;
+ for (i = 0; i < num_metric && i < MAX_NUM_MATRICS; i++)
+ fexit_update_maps(i, &readings[i]);
+ }
+ return 0;
+}
+
+char LICENSE[] SEC("license") = "GPL";
diff --git a/tools/bpf/bpftool/skeleton/profiler.h b/tools/bpf/bpftool/skeleton/profiler.h
new file mode 100644
index 000000000000..1f767e9510f7
--- /dev/null
+++ b/tools/bpf/bpftool/skeleton/profiler.h
@@ -0,0 +1,46 @@
+/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */
+#ifndef __PROFILER_H
+#define __PROFILER_H
+
+/* useful typedefs from vmlinux.h */
+
+typedef signed char __s8;
+typedef unsigned char __u8;
+typedef short int __s16;
+typedef short unsigned int __u16;
+typedef int __s32;
+typedef unsigned int __u32;
+typedef long long int __s64;
+typedef long long unsigned int __u64;
+
+typedef __s8 s8;
+typedef __u8 u8;
+typedef __s16 s16;
+typedef __u16 u16;
+typedef __s32 s32;
+typedef __u32 u32;
+typedef __s64 s64;
+typedef __u64 u64;
+
+enum {
+ false = 0,
+ true = 1,
+};
+
+#ifdef __CHECKER__
+#define __bitwise__ __attribute__((bitwise))
+#else
+#define __bitwise__
+#endif
+
+typedef __u16 __bitwise__ __le16;
+typedef __u16 __bitwise__ __be16;
+typedef __u32 __bitwise__ __le32;
+typedef __u32 __bitwise__ __be32;
+typedef __u64 __bitwise__ __le64;
+typedef __u64 __bitwise__ __be64;
+
+typedef __u16 __bitwise__ __sum16;
+typedef __u32 __bitwise__ __wsum;
+
+#endif /* __PROFILER_H */
diff --git a/tools/bpf/bpftool/struct_ops.c b/tools/bpf/bpftool/struct_ops.c
new file mode 100644
index 000000000000..b58b91f62ffb
--- /dev/null
+++ b/tools/bpf/bpftool/struct_ops.c
@@ -0,0 +1,603 @@
+// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+/* Copyright (C) 2020 Facebook */
+
+#include <errno.h>
+#include <stdio.h>
+#include <unistd.h>
+
+#include <linux/err.h>
+
+#include <bpf/bpf.h>
+#include <bpf/btf.h>
+#include <bpf/libbpf.h>
+
+#include "json_writer.h"
+#include "main.h"
+
+#define STRUCT_OPS_VALUE_PREFIX "bpf_struct_ops_"
+
+static const struct btf_type *map_info_type;
+static __u32 map_info_alloc_len;
+static struct btf *btf_vmlinux;
+static __s32 map_info_type_id;
+
+struct res {
+ unsigned int nr_maps;
+ unsigned int nr_errs;
+};
+
+static const struct btf *get_btf_vmlinux(void)
+{
+ if (btf_vmlinux)
+ return btf_vmlinux;
+
+ btf_vmlinux = libbpf_find_kernel_btf();
+ if (IS_ERR(btf_vmlinux))
+ p_err("struct_ops requires kernel CONFIG_DEBUG_INFO_BTF=y");
+
+ return btf_vmlinux;
+}
+
+static const char *get_kern_struct_ops_name(const struct bpf_map_info *info)
+{
+ const struct btf *kern_btf;
+ const struct btf_type *t;
+ const char *st_ops_name;
+
+ kern_btf = get_btf_vmlinux();
+ if (IS_ERR(kern_btf))
+ return "<btf_vmlinux_not_found>";
+
+ t = btf__type_by_id(kern_btf, info->btf_vmlinux_value_type_id);
+ st_ops_name = btf__name_by_offset(kern_btf, t->name_off);
+ st_ops_name += strlen(STRUCT_OPS_VALUE_PREFIX);
+
+ return st_ops_name;
+}
+
+static __s32 get_map_info_type_id(void)
+{
+ const struct btf *kern_btf;
+
+ if (map_info_type_id)
+ return map_info_type_id;
+
+ kern_btf = get_btf_vmlinux();
+ if (IS_ERR(kern_btf)) {
+ map_info_type_id = PTR_ERR(kern_btf);
+ return map_info_type_id;
+ }
+
+ map_info_type_id = btf__find_by_name_kind(kern_btf, "bpf_map_info",
+ BTF_KIND_STRUCT);
+ if (map_info_type_id < 0) {
+ p_err("can't find bpf_map_info from btf_vmlinux");
+ return map_info_type_id;
+ }
+ map_info_type = btf__type_by_id(kern_btf, map_info_type_id);
+
+ /* Ensure map_info_alloc() has at least what the bpftool needs */
+ map_info_alloc_len = map_info_type->size;
+ if (map_info_alloc_len < sizeof(struct bpf_map_info))
+ map_info_alloc_len = sizeof(struct bpf_map_info);
+
+ return map_info_type_id;
+}
+
+/* If the subcmd needs to print out the bpf_map_info,
+ * it should always call map_info_alloc to allocate
+ * a bpf_map_info object instead of allocating it
+ * on the stack.
+ *
+ * map_info_alloc() will take the running kernel's btf
+ * into account. i.e. it will consider the
+ * sizeof(struct bpf_map_info) of the running kernel.
+ *
+ * It will enable the "struct_ops" cmd to print the latest
+ * "struct bpf_map_info".
+ *
+ * [ Recall that "struct_ops" requires the kernel's btf to
+ * be available ]
+ */
+static struct bpf_map_info *map_info_alloc(__u32 *alloc_len)
+{
+ struct bpf_map_info *info;
+
+ if (get_map_info_type_id() < 0)
+ return NULL;
+
+ info = calloc(1, map_info_alloc_len);
+ if (!info)
+ p_err("mem alloc failed");
+ else
+ *alloc_len = map_info_alloc_len;
+
+ return info;
+}
+
+/* It iterates all struct_ops maps of the system.
+ * It returns the fd in "*res_fd" and map_info in "*info".
+ * In the very first iteration, info->id should be 0.
+ * An optional map "*name" filter can be specified.
+ * The filter can be made more flexible in the future.
+ * e.g. filter by kernel-struct-ops-name, regex-name, glob-name, ...etc.
+ *
+ * Return value:
+ * 1: A struct_ops map found. It is returned in "*res_fd" and "*info".
+ * The caller can continue to call get_next in the future.
+ * 0: No struct_ops map is returned.
+ * All struct_ops map has been found.
+ * -1: Error and the caller should abort the iteration.
+ */
+static int get_next_struct_ops_map(const char *name, int *res_fd,
+ struct bpf_map_info *info, __u32 info_len)
+{
+ __u32 id = info->id;
+ int err, fd;
+
+ while (true) {
+ err = bpf_map_get_next_id(id, &id);
+ if (err) {
+ if (errno == ENOENT)
+ return 0;
+ p_err("can't get next map: %s", strerror(errno));
+ return -1;
+ }
+
+ fd = bpf_map_get_fd_by_id(id);
+ if (fd < 0) {
+ if (errno == ENOENT)
+ continue;
+ p_err("can't get map by id (%u): %s",
+ id, strerror(errno));
+ return -1;
+ }
+
+ err = bpf_obj_get_info_by_fd(fd, info, &info_len);
+ if (err) {
+ p_err("can't get map info: %s", strerror(errno));
+ close(fd);
+ return -1;
+ }
+
+ if (info->type == BPF_MAP_TYPE_STRUCT_OPS &&
+ (!name || !strcmp(name, info->name))) {
+ *res_fd = fd;
+ return 1;
+ }
+ close(fd);
+ }
+}
+
+static int cmd_retval(const struct res *res, bool must_have_one_map)
+{
+ if (res->nr_errs || (!res->nr_maps && must_have_one_map))
+ return -1;
+
+ return 0;
+}
+
+/* "data" is the work_func private storage */
+typedef int (*work_func)(int fd, const struct bpf_map_info *info, void *data,
+ struct json_writer *wtr);
+
+/* Find all struct_ops map in the system.
+ * Filter out by "name" (if specified).
+ * Then call "func(fd, info, data, wtr)" on each struct_ops map found.
+ */
+static struct res do_search(const char *name, work_func func, void *data,
+ struct json_writer *wtr)
+{
+ struct bpf_map_info *info;
+ struct res res = {};
+ __u32 info_len;
+ int fd, err;
+
+ info = map_info_alloc(&info_len);
+ if (!info) {
+ res.nr_errs++;
+ return res;
+ }
+
+ if (wtr)
+ jsonw_start_array(wtr);
+ while ((err = get_next_struct_ops_map(name, &fd, info, info_len)) == 1) {
+ res.nr_maps++;
+ err = func(fd, info, data, wtr);
+ if (err)
+ res.nr_errs++;
+ close(fd);
+ }
+ if (wtr)
+ jsonw_end_array(wtr);
+
+ if (err)
+ res.nr_errs++;
+
+ if (!wtr && name && !res.nr_errs && !res.nr_maps)
+ /* It is not printing empty [].
+ * Thus, needs to specifically say nothing found
+ * for "name" here.
+ */
+ p_err("no struct_ops found for %s", name);
+ else if (!wtr && json_output && !res.nr_errs)
+ /* The "func()" above is not writing any json (i.e. !wtr
+ * test here).
+ *
+ * However, "-j" is enabled and there is no errs here,
+ * so call json_null() as the current convention of
+ * other cmds.
+ */
+ jsonw_null(json_wtr);
+
+ free(info);
+ return res;
+}
+
+static struct res do_one_id(const char *id_str, work_func func, void *data,
+ struct json_writer *wtr)
+{
+ struct bpf_map_info *info;
+ struct res res = {};
+ unsigned long id;
+ __u32 info_len;
+ char *endptr;
+ int fd;
+
+ id = strtoul(id_str, &endptr, 0);
+ if (*endptr || !id || id > UINT32_MAX) {
+ p_err("invalid id %s", id_str);
+ res.nr_errs++;
+ return res;
+ }
+
+ fd = bpf_map_get_fd_by_id(id);
+ if (fd == -1) {
+ p_err("can't get map by id (%lu): %s", id, strerror(errno));
+ res.nr_errs++;
+ return res;
+ }
+
+ info = map_info_alloc(&info_len);
+ if (!info) {
+ res.nr_errs++;
+ goto done;
+ }
+
+ if (bpf_obj_get_info_by_fd(fd, info, &info_len)) {
+ p_err("can't get map info: %s", strerror(errno));
+ res.nr_errs++;
+ goto done;
+ }
+
+ if (info->type != BPF_MAP_TYPE_STRUCT_OPS) {
+ p_err("%s id %u is not a struct_ops map", info->name, info->id);
+ res.nr_errs++;
+ goto done;
+ }
+
+ res.nr_maps++;
+
+ if (func(fd, info, data, wtr))
+ res.nr_errs++;
+ else if (!wtr && json_output)
+ /* The "func()" above is not writing any json (i.e. !wtr
+ * test here).
+ *
+ * However, "-j" is enabled and there is no errs here,
+ * so call json_null() as the current convention of
+ * other cmds.
+ */
+ jsonw_null(json_wtr);
+
+done:
+ free(info);
+ close(fd);
+
+ return res;
+}
+
+static struct res do_work_on_struct_ops(const char *search_type,
+ const char *search_term,
+ work_func func, void *data,
+ struct json_writer *wtr)
+{
+ if (search_type) {
+ if (is_prefix(search_type, "id"))
+ return do_one_id(search_term, func, data, wtr);
+ else if (!is_prefix(search_type, "name"))
+ usage();
+ }
+
+ return do_search(search_term, func, data, wtr);
+}
+
+static int __do_show(int fd, const struct bpf_map_info *info, void *data,
+ struct json_writer *wtr)
+{
+ if (wtr) {
+ jsonw_start_object(wtr);
+ jsonw_uint_field(wtr, "id", info->id);
+ jsonw_string_field(wtr, "name", info->name);
+ jsonw_string_field(wtr, "kernel_struct_ops",
+ get_kern_struct_ops_name(info));
+ jsonw_end_object(wtr);
+ } else {
+ printf("%u: %-15s %-32s\n", info->id, info->name,
+ get_kern_struct_ops_name(info));
+ }
+
+ return 0;
+}
+
+static int do_show(int argc, char **argv)
+{
+ const char *search_type = NULL, *search_term = NULL;
+ struct res res;
+
+ if (argc && argc != 2)
+ usage();
+
+ if (argc == 2) {
+ search_type = GET_ARG();
+ search_term = GET_ARG();
+ }
+
+ res = do_work_on_struct_ops(search_type, search_term, __do_show,
+ NULL, json_wtr);
+
+ return cmd_retval(&res, !!search_term);
+}
+
+static int __do_dump(int fd, const struct bpf_map_info *info, void *data,
+ struct json_writer *wtr)
+{
+ struct btf_dumper *d = (struct btf_dumper *)data;
+ const struct btf_type *struct_ops_type;
+ const struct btf *kern_btf = d->btf;
+ const char *struct_ops_name;
+ int zero = 0;
+ void *value;
+
+ /* note: d->jw == wtr */
+
+ kern_btf = d->btf;
+
+ /* The kernel supporting BPF_MAP_TYPE_STRUCT_OPS must have
+ * btf_vmlinux_value_type_id.
+ */
+ struct_ops_type = btf__type_by_id(kern_btf,
+ info->btf_vmlinux_value_type_id);
+ struct_ops_name = btf__name_by_offset(kern_btf,
+ struct_ops_type->name_off);
+ value = calloc(1, info->value_size);
+ if (!value) {
+ p_err("mem alloc failed");
+ return -1;
+ }
+
+ if (bpf_map_lookup_elem(fd, &zero, value)) {
+ p_err("can't lookup struct_ops map %s id %u",
+ info->name, info->id);
+ free(value);
+ return -1;
+ }
+
+ jsonw_start_object(wtr);
+ jsonw_name(wtr, "bpf_map_info");
+ btf_dumper_type(d, map_info_type_id, (void *)info);
+ jsonw_end_object(wtr);
+
+ jsonw_start_object(wtr);
+ jsonw_name(wtr, struct_ops_name);
+ btf_dumper_type(d, info->btf_vmlinux_value_type_id, value);
+ jsonw_end_object(wtr);
+
+ free(value);
+
+ return 0;
+}
+
+static int do_dump(int argc, char **argv)
+{
+ const char *search_type = NULL, *search_term = NULL;
+ json_writer_t *wtr = json_wtr;
+ const struct btf *kern_btf;
+ struct btf_dumper d = {};
+ struct res res;
+
+ if (argc && argc != 2)
+ usage();
+
+ if (argc == 2) {
+ search_type = GET_ARG();
+ search_term = GET_ARG();
+ }
+
+ kern_btf = get_btf_vmlinux();
+ if (IS_ERR(kern_btf))
+ return -1;
+
+ if (!json_output) {
+ wtr = jsonw_new(stdout);
+ if (!wtr) {
+ p_err("can't create json writer");
+ return -1;
+ }
+ jsonw_pretty(wtr, true);
+ }
+
+ d.btf = kern_btf;
+ d.jw = wtr;
+ d.is_plain_text = !json_output;
+ d.prog_id_as_func_ptr = true;
+
+ res = do_work_on_struct_ops(search_type, search_term, __do_dump, &d,
+ wtr);
+
+ if (!json_output)
+ jsonw_destroy(&wtr);
+
+ return cmd_retval(&res, !!search_term);
+}
+
+static int __do_unregister(int fd, const struct bpf_map_info *info, void *data,
+ struct json_writer *wtr)
+{
+ int zero = 0;
+
+ if (bpf_map_delete_elem(fd, &zero)) {
+ p_err("can't unload %s %s id %u: %s",
+ get_kern_struct_ops_name(info), info->name,
+ info->id, strerror(errno));
+ return -1;
+ }
+
+ p_info("Unregistered %s %s id %u",
+ get_kern_struct_ops_name(info), info->name,
+ info->id);
+
+ return 0;
+}
+
+static int do_unregister(int argc, char **argv)
+{
+ const char *search_type, *search_term;
+ struct res res;
+
+ if (argc != 2)
+ usage();
+
+ search_type = GET_ARG();
+ search_term = GET_ARG();
+
+ res = do_work_on_struct_ops(search_type, search_term,
+ __do_unregister, NULL, NULL);
+
+ return cmd_retval(&res, true);
+}
+
+static int do_register(int argc, char **argv)
+{
+ struct bpf_object_load_attr load_attr = {};
+ const struct bpf_map_def *def;
+ struct bpf_map_info info = {};
+ __u32 info_len = sizeof(info);
+ int nr_errs = 0, nr_maps = 0;
+ struct bpf_object *obj;
+ struct bpf_link *link;
+ struct bpf_map *map;
+ const char *file;
+
+ if (argc != 1)
+ usage();
+
+ file = GET_ARG();
+
+ obj = bpf_object__open(file);
+ if (IS_ERR_OR_NULL(obj))
+ return -1;
+
+ set_max_rlimit();
+
+ load_attr.obj = obj;
+ if (verifier_logs)
+ /* log_level1 + log_level2 + stats, but not stable UAPI */
+ load_attr.log_level = 1 + 2 + 4;
+
+ if (bpf_object__load_xattr(&load_attr)) {
+ bpf_object__close(obj);
+ return -1;
+ }
+
+ bpf_object__for_each_map(map, obj) {
+ def = bpf_map__def(map);
+ if (def->type != BPF_MAP_TYPE_STRUCT_OPS)
+ continue;
+
+ link = bpf_map__attach_struct_ops(map);
+ if (IS_ERR(link)) {
+ p_err("can't register struct_ops %s: %s",
+ bpf_map__name(map),
+ strerror(-PTR_ERR(link)));
+ nr_errs++;
+ continue;
+ }
+ nr_maps++;
+
+ bpf_link__disconnect(link);
+ bpf_link__destroy(link);
+
+ if (!bpf_obj_get_info_by_fd(bpf_map__fd(map), &info,
+ &info_len))
+ p_info("Registered %s %s id %u",
+ get_kern_struct_ops_name(&info),
+ bpf_map__name(map),
+ info.id);
+ else
+ /* Not p_err. The struct_ops was attached
+ * successfully.
+ */
+ p_info("Registered %s but can't find id: %s",
+ bpf_map__name(map), strerror(errno));
+ }
+
+ bpf_object__close(obj);
+
+ if (nr_errs)
+ return -1;
+
+ if (!nr_maps) {
+ p_err("no struct_ops found in %s", file);
+ return -1;
+ }
+
+ if (json_output)
+ jsonw_null(json_wtr);
+
+ return 0;
+}
+
+static int do_help(int argc, char **argv)
+{
+ if (json_output) {
+ jsonw_null(json_wtr);
+ return 0;
+ }
+
+ fprintf(stderr,
+ "Usage: %1$s %2$s { show | list } [STRUCT_OPS_MAP]\n"
+ " %1$s %2$s dump [STRUCT_OPS_MAP]\n"
+ " %1$s %2$s register OBJ\n"
+ " %1$s %2$s unregister STRUCT_OPS_MAP\n"
+ " %1$s %2$s help\n"
+ "\n"
+ " OPTIONS := { {-j|--json} [{-p|--pretty}] }\n"
+ " STRUCT_OPS_MAP := [ id STRUCT_OPS_MAP_ID | name STRUCT_OPS_MAP_NAME ]\n"
+ "",
+ bin_name, argv[-2]);
+
+ return 0;
+}
+
+static const struct cmd cmds[] = {
+ { "show", do_show },
+ { "list", do_show },
+ { "register", do_register },
+ { "unregister", do_unregister },
+ { "dump", do_dump },
+ { "help", do_help },
+ { 0 }
+};
+
+int do_struct_ops(int argc, char **argv)
+{
+ int err;
+
+ err = cmd_select(cmds, argc, argv, do_help);
+
+ if (!IS_ERR(btf_vmlinux))
+ btf__free(btf_vmlinux);
+
+ return err;
+}
diff --git a/tools/bpf/runqslower/.gitignore b/tools/bpf/runqslower/.gitignore
index 90a456a2a72f..ffdb70230c8b 100644
--- a/tools/bpf/runqslower/.gitignore
+++ b/tools/bpf/runqslower/.gitignore
@@ -1 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0-only
/.output
diff --git a/tools/bpf/runqslower/Makefile b/tools/bpf/runqslower/Makefile
index 39edd68afa8e..fb1337d69868 100644
--- a/tools/bpf/runqslower/Makefile
+++ b/tools/bpf/runqslower/Makefile
@@ -8,7 +8,8 @@ BPFTOOL ?= $(DEFAULT_BPFTOOL)
LIBBPF_SRC := $(abspath ../../lib/bpf)
BPFOBJ := $(OUTPUT)/libbpf.a
BPF_INCLUDE := $(OUTPUT)
-INCLUDES := -I$(BPF_INCLUDE) -I$(OUTPUT) -I$(abspath ../../lib)
+INCLUDES := -I$(OUTPUT) -I$(BPF_INCLUDE) -I$(abspath ../../lib) \
+ -I$(abspath ../../include/uapi)
CFLAGS := -g -Wall
# Try to detect best kernel BTF source
diff --git a/tools/bpf/runqslower/runqslower.bpf.c b/tools/bpf/runqslower/runqslower.bpf.c
index 48a39f72fadf..1f18a409f044 100644
--- a/tools/bpf/runqslower/runqslower.bpf.c
+++ b/tools/bpf/runqslower/runqslower.bpf.c
@@ -5,9 +5,7 @@
#include "runqslower.h"
#define TASK_RUNNING 0
-
-#define BPF_F_INDEX_MASK 0xffffffffULL
-#define BPF_F_CURRENT_CPU BPF_F_INDEX_MASK
+#define BPF_F_CURRENT_CPU 0xffffffffULL
const volatile __u64 min_us = 0;
const volatile pid_t targ_pid = 0;
diff --git a/tools/build/.gitignore b/tools/build/.gitignore
index a776371a3502..98ae1f509592 100644
--- a/tools/build/.gitignore
+++ b/tools/build/.gitignore
@@ -1 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0-only
fixdep
diff --git a/tools/build/Makefile.feature b/tools/build/Makefile.feature
index 574c2e0b9d20..cb152370fdef 100644
--- a/tools/build/Makefile.feature
+++ b/tools/build/Makefile.feature
@@ -40,7 +40,6 @@ FEATURE_TESTS_BASIC := \
glibc \
gtk2 \
gtk2-infobar \
- libaudit \
libbfd \
libcap \
libelf \
@@ -72,7 +71,8 @@ FEATURE_TESTS_BASIC := \
setns \
libaio \
libzstd \
- disassembler-four-args
+ disassembler-four-args \
+ file-handle
# FEATURE_TESTS_BASIC + FEATURE_TESTS_EXTRA is the complete list
# of all feature tests
@@ -97,7 +97,8 @@ FEATURE_TESTS_EXTRA := \
llvm \
llvm-version \
clang \
- libbpf
+ libbpf \
+ libpfm4
FEATURE_TESTS ?= $(FEATURE_TESTS_BASIC)
@@ -110,7 +111,6 @@ FEATURE_DISPLAY ?= \
dwarf_getlocations \
glibc \
gtk2 \
- libaudit \
libbfd \
libcap \
libelf \
diff --git a/tools/build/feature/.gitignore b/tools/build/feature/.gitignore
index 09b335b98842..15fcd34acdb9 100644
--- a/tools/build/feature/.gitignore
+++ b/tools/build/feature/.gitignore
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
*.d
*.bin
*.output
diff --git a/tools/build/feature/Makefile b/tools/build/feature/Makefile
index 7ac0d8088565..b1f0321180f5 100644
--- a/tools/build/feature/Makefile
+++ b/tools/build/feature/Makefile
@@ -67,7 +67,10 @@ FILES= \
test-llvm.bin \
test-llvm-version.bin \
test-libaio.bin \
- test-libzstd.bin
+ test-libzstd.bin \
+ test-clang-bpf-global-var.bin \
+ test-file-handle.bin \
+ test-libpfm4.bin
FILES := $(addprefix $(OUTPUT),$(FILES))
@@ -75,6 +78,7 @@ CC ?= $(CROSS_COMPILE)gcc
CXX ?= $(CROSS_COMPILE)g++
PKG_CONFIG ?= $(CROSS_COMPILE)pkg-config
LLVM_CONFIG ?= llvm-config
+CLANG ?= clang
all: $(FILES)
@@ -87,7 +91,7 @@ __BUILDXX = $(CXX) $(CXXFLAGS) -MD -Wall -Werror -o $@ $(patsubst %.bin,%.cpp,$(
###############################
$(OUTPUT)test-all.bin:
- $(BUILD) -fstack-protector-all -O2 -D_FORTIFY_SOURCE=2 -ldw -lelf -lnuma -lelf -laudit -I/usr/include/slang -lslang $(shell $(PKG_CONFIG) --libs --cflags gtk+-2.0 2>/dev/null) $(FLAGS_PERL_EMBED) $(FLAGS_PYTHON_EMBED) -DPACKAGE='"perf"' -lbfd -ldl -lz -llzma
+ $(BUILD) -fstack-protector-all -O2 -D_FORTIFY_SOURCE=2 -ldw -lelf -lnuma -lelf -I/usr/include/slang -lslang $(shell $(PKG_CONFIG) --libs --cflags gtk+-2.0 2>/dev/null) $(FLAGS_PERL_EMBED) $(FLAGS_PYTHON_EMBED) -DPACKAGE='"perf"' -lbfd -ldl -lz -llzma
$(OUTPUT)test-hello.bin:
$(BUILD)
@@ -321,6 +325,16 @@ $(OUTPUT)test-libaio.bin:
$(OUTPUT)test-libzstd.bin:
$(BUILD) -lzstd
+$(OUTPUT)test-clang-bpf-global-var.bin:
+ $(CLANG) -S -g -target bpf -o - $(patsubst %.bin,%.c,$(@F)) | \
+ grep BTF_KIND_VAR
+
+$(OUTPUT)test-file-handle.bin:
+ $(BUILD)
+
+$(OUTPUT)test-libpfm4.bin:
+ $(BUILD) -lpfm
+
###############################
clean:
diff --git a/tools/build/feature/test-all.c b/tools/build/feature/test-all.c
index 88145e8cde1a..5479e543b194 100644
--- a/tools/build/feature/test-all.c
+++ b/tools/build/feature/test-all.c
@@ -74,10 +74,6 @@
# include "test-libunwind.c"
#undef main
-#define main main_test_libaudit
-# include "test-libaudit.c"
-#undef main
-
#define main main_test_libslang
# include "test-libslang.c"
#undef main
@@ -208,7 +204,6 @@ int main(int argc, char *argv[])
main_test_libelf_gelf_getnote();
main_test_libelf_getshdrstrndx();
main_test_libunwind();
- main_test_libaudit();
main_test_libslang();
main_test_gtk2(argc, argv);
main_test_gtk2_infobar(argc, argv);
diff --git a/tools/build/feature/test-clang-bpf-global-var.c b/tools/build/feature/test-clang-bpf-global-var.c
new file mode 100644
index 000000000000..221f1481d52e
--- /dev/null
+++ b/tools/build/feature/test-clang-bpf-global-var.c
@@ -0,0 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2020 Facebook
+
+volatile int global_value_for_test = 1;
diff --git a/tools/build/feature/test-file-handle.c b/tools/build/feature/test-file-handle.c
new file mode 100644
index 000000000000..4d3b03b27a0b
--- /dev/null
+++ b/tools/build/feature/test-file-handle.c
@@ -0,0 +1,17 @@
+#define _GNU_SOURCE
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <inttypes.h>
+
+int main(void)
+{
+ struct {
+ struct file_handle fh;
+ uint64_t cgroup_id;
+ } handle;
+ int mount_id;
+
+ name_to_handle_at(AT_FDCWD, "/", &handle.fh, &mount_id, 0);
+ return 0;
+}
diff --git a/tools/build/feature/test-libopencsd.c b/tools/build/feature/test-libopencsd.c
index 2b0e02c38870..1547bc2c0950 100644
--- a/tools/build/feature/test-libopencsd.c
+++ b/tools/build/feature/test-libopencsd.c
@@ -4,9 +4,9 @@
/*
* Check OpenCSD library version is sufficient to provide required features
*/
-#define OCSD_MIN_VER ((0 << 16) | (11 << 8) | (0))
+#define OCSD_MIN_VER ((0 << 16) | (14 << 8) | (0))
#if !defined(OCSD_VER_NUM) || (OCSD_VER_NUM < OCSD_MIN_VER)
-#error "OpenCSD >= 0.11.0 is required"
+#error "OpenCSD >= 0.14.0 is required"
#endif
int main(void)
diff --git a/tools/build/feature/test-libpfm4.c b/tools/build/feature/test-libpfm4.c
new file mode 100644
index 000000000000..af49b259459e
--- /dev/null
+++ b/tools/build/feature/test-libpfm4.c
@@ -0,0 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <sys/types.h>
+#include <perfmon/pfmlib.h>
+
+int main(void)
+{
+ pfm_initialize();
+ return 0;
+}
diff --git a/tools/build/feature/test-sync-compare-and-swap.c b/tools/build/feature/test-sync-compare-and-swap.c
index 1e38d1930a97..3bc6b0768a53 100644
--- a/tools/build/feature/test-sync-compare-and-swap.c
+++ b/tools/build/feature/test-sync-compare-and-swap.c
@@ -7,7 +7,7 @@ int main(int argc, char *argv[])
{
uint64_t old, new = argc;
- argv = argv;
+ (void)argv;
do {
old = __sync_val_compare_and_swap(&x, 0, 0);
} while (!__sync_bool_compare_and_swap(&x, old, new));
diff --git a/tools/cgroup/.gitignore b/tools/cgroup/.gitignore
index 633cd9b874f9..46a82775f2ca 100644
--- a/tools/cgroup/.gitignore
+++ b/tools/cgroup/.gitignore
@@ -1 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0-only
cgroup_event_listener
diff --git a/tools/cgroup/iocost_monitor.py b/tools/cgroup/iocost_monitor.py
index 7427a5ee761b..3c21de88af9e 100644
--- a/tools/cgroup/iocost_monitor.py
+++ b/tools/cgroup/iocost_monitor.py
@@ -28,7 +28,8 @@ parser.add_argument('devname', metavar='DEV',
parser.add_argument('--cgroup', action='append', metavar='REGEX',
help='Regex for target cgroups, ')
parser.add_argument('--interval', '-i', metavar='SECONDS', type=float, default=1,
- help='Monitoring interval in seconds')
+ help='Monitoring interval in seconds (0 exits immediately '
+ 'after checking requirements)')
parser.add_argument('--json', action='store_true',
help='Output in json')
args = parser.parse_args()
@@ -112,14 +113,14 @@ class IocStat:
def dict(self, now):
return { 'device' : devname,
- 'timestamp' : str(now),
- 'enabled' : str(int(self.enabled)),
- 'running' : str(int(self.running)),
- 'period_ms' : str(self.period_ms),
- 'period_at' : str(self.period_at),
- 'period_vtime_at' : str(self.vperiod_at),
- 'busy_level' : str(self.busy_level),
- 'vrate_pct' : str(self.vrate_pct), }
+ 'timestamp' : now,
+ 'enabled' : self.enabled,
+ 'running' : self.running,
+ 'period_ms' : self.period_ms,
+ 'period_at' : self.period_at,
+ 'period_vtime_at' : self.vperiod_at,
+ 'busy_level' : self.busy_level,
+ 'vrate_pct' : self.vrate_pct, }
def table_preamble_str(self):
state = ('RUN' if self.running else 'IDLE') if self.enabled else 'OFF'
@@ -159,7 +160,12 @@ class IocgStat:
else:
self.inflight_pct = 0
- self.debt_ms = iocg.abs_vdebt.counter.value_() / VTIME_PER_USEC / 1000
+ # vdebt used to be an atomic64_t and is now u64, support both
+ try:
+ self.debt_ms = iocg.abs_vdebt.counter.value_() / VTIME_PER_USEC / 1000
+ except:
+ self.debt_ms = iocg.abs_vdebt.value_() / VTIME_PER_USEC / 1000
+
self.use_delay = blkg.use_delay.counter.value_()
self.delay_ms = blkg.delay_nsec.counter.value_() / 1_000_000
@@ -174,19 +180,19 @@ class IocgStat:
def dict(self, now, path):
out = { 'cgroup' : path,
- 'timestamp' : str(now),
- 'is_active' : str(int(self.is_active)),
- 'weight' : str(self.weight),
- 'weight_active' : str(self.active),
- 'weight_inuse' : str(self.inuse),
- 'hweight_active_pct' : str(self.hwa_pct),
- 'hweight_inuse_pct' : str(self.hwi_pct),
- 'inflight_pct' : str(self.inflight_pct),
- 'debt_ms' : str(self.debt_ms),
- 'use_delay' : str(self.use_delay),
- 'delay_ms' : str(self.delay_ms),
- 'usage_pct' : str(self.usage),
- 'address' : str(hex(self.address)) }
+ 'timestamp' : now,
+ 'is_active' : self.is_active,
+ 'weight' : self.weight,
+ 'weight_active' : self.active,
+ 'weight_inuse' : self.inuse,
+ 'hweight_active_pct' : self.hwa_pct,
+ 'hweight_inuse_pct' : self.hwi_pct,
+ 'inflight_pct' : self.inflight_pct,
+ 'debt_ms' : self.debt_ms,
+ 'use_delay' : self.use_delay,
+ 'delay_ms' : self.delay_ms,
+ 'usage_pct' : self.usage,
+ 'address' : self.address }
for i in range(len(self.usages)):
out[f'usage_pct_{i}'] = str(self.usages[i])
return out
@@ -243,6 +249,9 @@ for i, ptr in radix_tree_for_each(blkcg_root.blkg_tree.address_of_()):
if ioc is None:
err(f'Could not find ioc for {devname}');
+if interval == 0:
+ sys.exit(0)
+
# Keep printing
while True:
now = time.time()
diff --git a/tools/edid/1024x768.S b/tools/edid/1024x768.S
new file mode 100644
index 000000000000..4aed3f9ab88a
--- /dev/null
+++ b/tools/edid/1024x768.S
@@ -0,0 +1,43 @@
+/*
+ 1024x768.S: EDID data set for standard 1024x768 60 Hz monitor
+
+ Copyright (C) 2011 Carsten Emde <C.Emde@osadl.org>
+
+ This program is free software; you can redistribute it and/or
+ modify it under the terms of the GNU General Public License
+ as published by the Free Software Foundation; either version 2
+ of the License, or (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+*/
+
+/* EDID */
+#define VERSION 1
+#define REVISION 3
+
+/* Display */
+#define CLOCK 65000 /* kHz */
+#define XPIX 1024
+#define YPIX 768
+#define XY_RATIO XY_RATIO_4_3
+#define XBLANK 320
+#define YBLANK 38
+#define XOFFSET 8
+#define XPULSE 144
+#define YOFFSET 3
+#define YPULSE 6
+#define DPI 72
+#define VFREQ 60 /* Hz */
+#define TIMING_NAME "Linux XGA"
+#define ESTABLISHED_TIMING2_BITS 0x08 /* Bit 3 -> 1024x768 @60 Hz */
+#define HSYNC_POL 0
+#define VSYNC_POL 0
+
+#include "edid.S"
diff --git a/tools/edid/1280x1024.S b/tools/edid/1280x1024.S
new file mode 100644
index 000000000000..b26dd424cad7
--- /dev/null
+++ b/tools/edid/1280x1024.S
@@ -0,0 +1,43 @@
+/*
+ 1280x1024.S: EDID data set for standard 1280x1024 60 Hz monitor
+
+ Copyright (C) 2011 Carsten Emde <C.Emde@osadl.org>
+
+ This program is free software; you can redistribute it and/or
+ modify it under the terms of the GNU General Public License
+ as published by the Free Software Foundation; either version 2
+ of the License, or (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+*/
+
+/* EDID */
+#define VERSION 1
+#define REVISION 3
+
+/* Display */
+#define CLOCK 108000 /* kHz */
+#define XPIX 1280
+#define YPIX 1024
+#define XY_RATIO XY_RATIO_5_4
+#define XBLANK 408
+#define YBLANK 42
+#define XOFFSET 48
+#define XPULSE 112
+#define YOFFSET 1
+#define YPULSE 3
+#define DPI 72
+#define VFREQ 60 /* Hz */
+#define TIMING_NAME "Linux SXGA"
+/* No ESTABLISHED_TIMINGx_BITS */
+#define HSYNC_POL 1
+#define VSYNC_POL 1
+
+#include "edid.S"
diff --git a/tools/edid/1600x1200.S b/tools/edid/1600x1200.S
new file mode 100644
index 000000000000..0d091b282768
--- /dev/null
+++ b/tools/edid/1600x1200.S
@@ -0,0 +1,43 @@
+/*
+ 1600x1200.S: EDID data set for standard 1600x1200 60 Hz monitor
+
+ Copyright (C) 2013 Carsten Emde <C.Emde@osadl.org>
+
+ This program is free software; you can redistribute it and/or
+ modify it under the terms of the GNU General Public License
+ as published by the Free Software Foundation; either version 2
+ of the License, or (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+*/
+
+/* EDID */
+#define VERSION 1
+#define REVISION 3
+
+/* Display */
+#define CLOCK 162000 /* kHz */
+#define XPIX 1600
+#define YPIX 1200
+#define XY_RATIO XY_RATIO_4_3
+#define XBLANK 560
+#define YBLANK 50
+#define XOFFSET 64
+#define XPULSE 192
+#define YOFFSET 1
+#define YPULSE 3
+#define DPI 72
+#define VFREQ 60 /* Hz */
+#define TIMING_NAME "Linux UXGA"
+/* No ESTABLISHED_TIMINGx_BITS */
+#define HSYNC_POL 1
+#define VSYNC_POL 1
+
+#include "edid.S"
diff --git a/tools/edid/1680x1050.S b/tools/edid/1680x1050.S
new file mode 100644
index 000000000000..7dfed9a33eab
--- /dev/null
+++ b/tools/edid/1680x1050.S
@@ -0,0 +1,43 @@
+/*
+ 1680x1050.S: EDID data set for standard 1680x1050 60 Hz monitor
+
+ Copyright (C) 2012 Carsten Emde <C.Emde@osadl.org>
+
+ This program is free software; you can redistribute it and/or
+ modify it under the terms of the GNU General Public License
+ as published by the Free Software Foundation; either version 2
+ of the License, or (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+*/
+
+/* EDID */
+#define VERSION 1
+#define REVISION 3
+
+/* Display */
+#define CLOCK 146250 /* kHz */
+#define XPIX 1680
+#define YPIX 1050
+#define XY_RATIO XY_RATIO_16_10
+#define XBLANK 560
+#define YBLANK 39
+#define XOFFSET 104
+#define XPULSE 176
+#define YOFFSET 3
+#define YPULSE 6
+#define DPI 96
+#define VFREQ 60 /* Hz */
+#define TIMING_NAME "Linux WSXGA"
+/* No ESTABLISHED_TIMINGx_BITS */
+#define HSYNC_POL 1
+#define VSYNC_POL 1
+
+#include "edid.S"
diff --git a/tools/edid/1920x1080.S b/tools/edid/1920x1080.S
new file mode 100644
index 000000000000..d6ffbba28e95
--- /dev/null
+++ b/tools/edid/1920x1080.S
@@ -0,0 +1,43 @@
+/*
+ 1920x1080.S: EDID data set for standard 1920x1080 60 Hz monitor
+
+ Copyright (C) 2012 Carsten Emde <C.Emde@osadl.org>
+
+ This program is free software; you can redistribute it and/or
+ modify it under the terms of the GNU General Public License
+ as published by the Free Software Foundation; either version 2
+ of the License, or (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+*/
+
+/* EDID */
+#define VERSION 1
+#define REVISION 3
+
+/* Display */
+#define CLOCK 148500 /* kHz */
+#define XPIX 1920
+#define YPIX 1080
+#define XY_RATIO XY_RATIO_16_9
+#define XBLANK 280
+#define YBLANK 45
+#define XOFFSET 88
+#define XPULSE 44
+#define YOFFSET 4
+#define YPULSE 5
+#define DPI 96
+#define VFREQ 60 /* Hz */
+#define TIMING_NAME "Linux FHD"
+/* No ESTABLISHED_TIMINGx_BITS */
+#define HSYNC_POL 1
+#define VSYNC_POL 1
+
+#include "edid.S"
diff --git a/tools/edid/800x600.S b/tools/edid/800x600.S
new file mode 100644
index 000000000000..a5616588de08
--- /dev/null
+++ b/tools/edid/800x600.S
@@ -0,0 +1,40 @@
+/*
+ 800x600.S: EDID data set for standard 800x600 60 Hz monitor
+
+ Copyright (C) 2011 Carsten Emde <C.Emde@osadl.org>
+ Copyright (C) 2014 Linaro Limited
+
+ This program is free software; you can redistribute it and/or
+ modify it under the terms of the GNU General Public License
+ as published by the Free Software Foundation; either version 2
+ of the License, or (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+*/
+
+/* EDID */
+#define VERSION 1
+#define REVISION 3
+
+/* Display */
+#define CLOCK 40000 /* kHz */
+#define XPIX 800
+#define YPIX 600
+#define XY_RATIO XY_RATIO_4_3
+#define XBLANK 256
+#define YBLANK 28
+#define XOFFSET 40
+#define XPULSE 128
+#define YOFFSET 1
+#define YPULSE 4
+#define DPI 72
+#define VFREQ 60 /* Hz */
+#define TIMING_NAME "Linux SVGA"
+#define ESTABLISHED_TIMING1_BITS 0x01 /* Bit 0: 800x600 @ 60Hz */
+#define HSYNC_POL 1
+#define VSYNC_POL 1
+
+#include "edid.S"
diff --git a/tools/edid/Makefile b/tools/edid/Makefile
new file mode 100644
index 000000000000..85a927dfab02
--- /dev/null
+++ b/tools/edid/Makefile
@@ -0,0 +1,37 @@
+
+SOURCES := $(wildcard [0-9]*x[0-9]*.S)
+
+BIN := $(patsubst %.S, %.bin, $(SOURCES))
+
+IHEX := $(patsubst %.S, %.bin.ihex, $(SOURCES))
+
+CODE := $(patsubst %.S, %.c, $(SOURCES))
+
+all: $(BIN) $(IHEX) $(CODE)
+
+clean:
+ @rm -f *.o *.bin.ihex *.bin *.c
+
+%.o: %.S
+ @cc -c $^
+
+%.bin.nocrc: %.o
+ @objcopy -Obinary $^ $@
+
+%.crc: %.bin.nocrc
+ @list=$$(for i in `seq 1 127`; do head -c$$i $^ | tail -c1 \
+ | hexdump -v -e '/1 "%02X+"'; done); \
+ echo "ibase=16;100-($${list%?})%100" | bc >$@
+
+%.p: %.crc %.S
+ @cc -c -DCRC="$$(cat $*.crc)" -o $@ $*.S
+
+%.bin: %.p
+ @objcopy -Obinary $^ $@
+
+%.bin.ihex: %.p
+ @objcopy -Oihex $^ $@
+ @dos2unix $@ 2>/dev/null
+
+%.c: %.bin
+ @echo "{" >$@; hexdump -f hex $^ >>$@; echo "};" >>$@
diff --git a/tools/edid/edid.S b/tools/edid/edid.S
new file mode 100644
index 000000000000..c3d13815526d
--- /dev/null
+++ b/tools/edid/edid.S
@@ -0,0 +1,274 @@
+/*
+ edid.S: EDID data template
+
+ Copyright (C) 2012 Carsten Emde <C.Emde@osadl.org>
+
+ This program is free software; you can redistribute it and/or
+ modify it under the terms of the GNU General Public License
+ as published by the Free Software Foundation; either version 2
+ of the License, or (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+*/
+
+
+/* Manufacturer */
+#define MFG_LNX1 'L'
+#define MFG_LNX2 'N'
+#define MFG_LNX3 'X'
+#define SERIAL 0
+#define YEAR 2012
+#define WEEK 5
+
+/* EDID 1.3 standard definitions */
+#define XY_RATIO_16_10 0b00
+#define XY_RATIO_4_3 0b01
+#define XY_RATIO_5_4 0b10
+#define XY_RATIO_16_9 0b11
+
+/* Provide defaults for the timing bits */
+#ifndef ESTABLISHED_TIMING1_BITS
+#define ESTABLISHED_TIMING1_BITS 0x00
+#endif
+#ifndef ESTABLISHED_TIMING2_BITS
+#define ESTABLISHED_TIMING2_BITS 0x00
+#endif
+#ifndef ESTABLISHED_TIMING3_BITS
+#define ESTABLISHED_TIMING3_BITS 0x00
+#endif
+
+#define mfgname2id(v1,v2,v3) \
+ ((((v1-'@')&0x1f)<<10)+(((v2-'@')&0x1f)<<5)+((v3-'@')&0x1f))
+#define swap16(v1) ((v1>>8)+((v1&0xff)<<8))
+#define lsbs2(v1,v2) (((v1&0x0f)<<4)+(v2&0x0f))
+#define msbs2(v1,v2) ((((v1>>8)&0x0f)<<4)+((v2>>8)&0x0f))
+#define msbs4(v1,v2,v3,v4) \
+ ((((v1>>8)&0x03)<<6)+(((v2>>8)&0x03)<<4)+\
+ (((v3>>4)&0x03)<<2)+((v4>>4)&0x03))
+#define pixdpi2mm(pix,dpi) ((pix*25)/dpi)
+#define xsize pixdpi2mm(XPIX,DPI)
+#define ysize pixdpi2mm(YPIX,DPI)
+
+ .data
+
+/* Fixed header pattern */
+header: .byte 0x00,0xff,0xff,0xff,0xff,0xff,0xff,0x00
+
+mfg_id: .hword swap16(mfgname2id(MFG_LNX1, MFG_LNX2, MFG_LNX3))
+
+prod_code: .hword 0
+
+/* Serial number. 32 bits, little endian. */
+serial_number: .long SERIAL
+
+/* Week of manufacture */
+week: .byte WEEK
+
+/* Year of manufacture, less 1990. (1990-2245)
+ If week=255, it is the model year instead */
+year: .byte YEAR-1990
+
+version: .byte VERSION /* EDID version, usually 1 (for 1.3) */
+revision: .byte REVISION /* EDID revision, usually 3 (for 1.3) */
+
+/* If Bit 7=1 Digital input. If set, the following bit definitions apply:
+ Bits 6-1 Reserved, must be 0
+ Bit 0 Signal is compatible with VESA DFP 1.x TMDS CRGB,
+ 1 pixel per clock, up to 8 bits per color, MSB aligned,
+ If Bit 7=0 Analog input. If clear, the following bit definitions apply:
+ Bits 6-5 Video white and sync levels, relative to blank
+ 00=+0.7/-0.3 V; 01=+0.714/-0.286 V;
+ 10=+1.0/-0.4 V; 11=+0.7/0 V
+ Bit 4 Blank-to-black setup (pedestal) expected
+ Bit 3 Separate sync supported
+ Bit 2 Composite sync (on HSync) supported
+ Bit 1 Sync on green supported
+ Bit 0 VSync pulse must be serrated when somposite or
+ sync-on-green is used. */
+video_parms: .byte 0x6d
+
+/* Maximum horizontal image size, in centimetres
+ (max 292 cm/115 in at 16:9 aspect ratio) */
+max_hor_size: .byte xsize/10
+
+/* Maximum vertical image size, in centimetres.
+ If either byte is 0, undefined (e.g. projector) */
+max_vert_size: .byte ysize/10
+
+/* Display gamma, minus 1, times 100 (range 1.00-3.5 */
+gamma: .byte 120
+
+/* Bit 7 DPMS standby supported
+ Bit 6 DPMS suspend supported
+ Bit 5 DPMS active-off supported
+ Bits 4-3 Display type: 00=monochrome; 01=RGB colour;
+ 10=non-RGB multicolour; 11=undefined
+ Bit 2 Standard sRGB colour space. Bytes 25-34 must contain
+ sRGB standard values.
+ Bit 1 Preferred timing mode specified in descriptor block 1.
+ Bit 0 GTF supported with default parameter values. */
+dsp_features: .byte 0xea
+
+/* Chromaticity coordinates. */
+/* Red and green least-significant bits
+ Bits 7-6 Red x value least-significant 2 bits
+ Bits 5-4 Red y value least-significant 2 bits
+ Bits 3-2 Green x value lst-significant 2 bits
+ Bits 1-0 Green y value least-significant 2 bits */
+red_green_lsb: .byte 0x5e
+
+/* Blue and white least-significant 2 bits */
+blue_white_lsb: .byte 0xc0
+
+/* Red x value most significant 8 bits.
+ 0-255 encodes 0-0.996 (255/256); 0-0.999 (1023/1024) with lsbits */
+red_x_msb: .byte 0xa4
+
+/* Red y value most significant 8 bits */
+red_y_msb: .byte 0x59
+
+/* Green x and y value most significant 8 bits */
+green_x_y_msb: .byte 0x4a,0x98
+
+/* Blue x and y value most significant 8 bits */
+blue_x_y_msb: .byte 0x25,0x20
+
+/* Default white point x and y value most significant 8 bits */
+white_x_y_msb: .byte 0x50,0x54
+
+/* Established timings */
+/* Bit 7 720x400 @ 70 Hz
+ Bit 6 720x400 @ 88 Hz
+ Bit 5 640x480 @ 60 Hz
+ Bit 4 640x480 @ 67 Hz
+ Bit 3 640x480 @ 72 Hz
+ Bit 2 640x480 @ 75 Hz
+ Bit 1 800x600 @ 56 Hz
+ Bit 0 800x600 @ 60 Hz */
+estbl_timing1: .byte ESTABLISHED_TIMING1_BITS
+
+/* Bit 7 800x600 @ 72 Hz
+ Bit 6 800x600 @ 75 Hz
+ Bit 5 832x624 @ 75 Hz
+ Bit 4 1024x768 @ 87 Hz, interlaced (1024x768)
+ Bit 3 1024x768 @ 60 Hz
+ Bit 2 1024x768 @ 72 Hz
+ Bit 1 1024x768 @ 75 Hz
+ Bit 0 1280x1024 @ 75 Hz */
+estbl_timing2: .byte ESTABLISHED_TIMING2_BITS
+
+/* Bit 7 1152x870 @ 75 Hz (Apple Macintosh II)
+ Bits 6-0 Other manufacturer-specific display mod */
+estbl_timing3: .byte ESTABLISHED_TIMING3_BITS
+
+/* Standard timing */
+/* X resolution, less 31, divided by 8 (256-2288 pixels) */
+std_xres: .byte (XPIX/8)-31
+/* Y resolution, X:Y pixel ratio
+ Bits 7-6 X:Y pixel ratio: 00=16:10; 01=4:3; 10=5:4; 11=16:9.
+ Bits 5-0 Vertical frequency, less 60 (60-123 Hz) */
+std_vres: .byte (XY_RATIO<<6)+VFREQ-60
+ .fill 7,2,0x0101 /* Unused */
+
+descriptor1:
+/* Pixel clock in 10 kHz units. (0.-655.35 MHz, little-endian) */
+clock: .hword CLOCK/10
+
+/* Horizontal active pixels 8 lsbits (0-4095) */
+x_act_lsb: .byte XPIX&0xff
+/* Horizontal blanking pixels 8 lsbits (0-4095)
+ End of active to start of next active. */
+x_blk_lsb: .byte XBLANK&0xff
+/* Bits 7-4 Horizontal active pixels 4 msbits
+ Bits 3-0 Horizontal blanking pixels 4 msbits */
+x_msbs: .byte msbs2(XPIX,XBLANK)
+
+/* Vertical active lines 8 lsbits (0-4095) */
+y_act_lsb: .byte YPIX&0xff
+/* Vertical blanking lines 8 lsbits (0-4095) */
+y_blk_lsb: .byte YBLANK&0xff
+/* Bits 7-4 Vertical active lines 4 msbits
+ Bits 3-0 Vertical blanking lines 4 msbits */
+y_msbs: .byte msbs2(YPIX,YBLANK)
+
+/* Horizontal sync offset pixels 8 lsbits (0-1023) From blanking start */
+x_snc_off_lsb: .byte XOFFSET&0xff
+/* Horizontal sync pulse width pixels 8 lsbits (0-1023) */
+x_snc_pls_lsb: .byte XPULSE&0xff
+/* Bits 7-4 Vertical sync offset lines 4 lsbits (0-63)
+ Bits 3-0 Vertical sync pulse width lines 4 lsbits (0-63) */
+y_snc_lsb: .byte lsbs2(YOFFSET, YPULSE)
+/* Bits 7-6 Horizontal sync offset pixels 2 msbits
+ Bits 5-4 Horizontal sync pulse width pixels 2 msbits
+ Bits 3-2 Vertical sync offset lines 2 msbits
+ Bits 1-0 Vertical sync pulse width lines 2 msbits */
+xy_snc_msbs: .byte msbs4(XOFFSET,XPULSE,YOFFSET,YPULSE)
+
+/* Horizontal display size, mm, 8 lsbits (0-4095 mm, 161 in) */
+x_dsp_size: .byte xsize&0xff
+
+/* Vertical display size, mm, 8 lsbits (0-4095 mm, 161 in) */
+y_dsp_size: .byte ysize&0xff
+
+/* Bits 7-4 Horizontal display size, mm, 4 msbits
+ Bits 3-0 Vertical display size, mm, 4 msbits */
+dsp_size_mbsb: .byte msbs2(xsize,ysize)
+
+/* Horizontal border pixels (each side; total is twice this) */
+x_border: .byte 0
+/* Vertical border lines (each side; total is twice this) */
+y_border: .byte 0
+
+/* Bit 7 Interlaced
+ Bits 6-5 Stereo mode: 00=No stereo; other values depend on bit 0:
+ Bit 0=0: 01=Field sequential, sync=1 during right; 10=similar,
+ sync=1 during left; 11=4-way interleaved stereo
+ Bit 0=1 2-way interleaved stereo: 01=Right image on even lines;
+ 10=Left image on even lines; 11=side-by-side
+ Bits 4-3 Sync type: 00=Analog composite; 01=Bipolar analog composite;
+ 10=Digital composite (on HSync); 11=Digital separate
+ Bit 2 If digital separate: Vertical sync polarity (1=positive)
+ Other types: VSync serrated (HSync during VSync)
+ Bit 1 If analog sync: Sync on all 3 RGB lines (else green only)
+ Digital: HSync polarity (1=positive)
+ Bit 0 2-way line-interleaved stereo, if bits 4-3 are not 00. */
+features: .byte 0x18+(VSYNC_POL<<2)+(HSYNC_POL<<1)
+
+descriptor2: .byte 0,0 /* Not a detailed timing descriptor */
+ .byte 0 /* Must be zero */
+ .byte 0xff /* Descriptor is monitor serial number (text) */
+ .byte 0 /* Must be zero */
+start1: .ascii "Linux #0"
+end1: .byte 0x0a /* End marker */
+ .fill 12-(end1-start1), 1, 0x20 /* Padded spaces */
+descriptor3: .byte 0,0 /* Not a detailed timing descriptor */
+ .byte 0 /* Must be zero */
+ .byte 0xfd /* Descriptor is monitor range limits */
+ .byte 0 /* Must be zero */
+start2: .byte VFREQ-1 /* Minimum vertical field rate (1-255 Hz) */
+ .byte VFREQ+1 /* Maximum vertical field rate (1-255 Hz) */
+ .byte (CLOCK/(XPIX+XBLANK))-1 /* Minimum horizontal line rate
+ (1-255 kHz) */
+ .byte (CLOCK/(XPIX+XBLANK))+1 /* Maximum horizontal line rate
+ (1-255 kHz) */
+ .byte (CLOCK/10000)+1 /* Maximum pixel clock rate, rounded up
+ to 10 MHz multiple (10-2550 MHz) */
+ .byte 0 /* No extended timing information type */
+end2: .byte 0x0a /* End marker */
+ .fill 12-(end2-start2), 1, 0x20 /* Padded spaces */
+descriptor4: .byte 0,0 /* Not a detailed timing descriptor */
+ .byte 0 /* Must be zero */
+ .byte 0xfc /* Descriptor is text */
+ .byte 0 /* Must be zero */
+start3: .ascii TIMING_NAME
+end3: .byte 0x0a /* End marker */
+ .fill 12-(end3-start3), 1, 0x20 /* Padded spaces */
+extensions: .byte 0 /* Number of extensions to follow */
+checksum: .byte CRC /* Sum of all bytes must be 0 */
diff --git a/tools/edid/hex b/tools/edid/hex
new file mode 100644
index 000000000000..8873ebb618af
--- /dev/null
+++ b/tools/edid/hex
@@ -0,0 +1 @@
+"\t" 8/1 "0x%02x, " "\n"
diff --git a/tools/gpio/.gitignore b/tools/gpio/.gitignore
index a94c0e83b209..a00d604027a2 100644
--- a/tools/gpio/.gitignore
+++ b/tools/gpio/.gitignore
@@ -1,4 +1,6 @@
+# SPDX-License-Identifier: GPL-2.0-only
gpio-event-mon
gpio-hammer
+gpio-watch
lsgpio
include/linux/gpio.h
diff --git a/tools/gpio/Build b/tools/gpio/Build
index 4141f35837db..67c7b7f6a717 100644
--- a/tools/gpio/Build
+++ b/tools/gpio/Build
@@ -2,3 +2,4 @@ gpio-utils-y += gpio-utils.o
lsgpio-y += lsgpio.o gpio-utils.o
gpio-hammer-y += gpio-hammer.o gpio-utils.o
gpio-event-mon-y += gpio-event-mon.o gpio-utils.o
+gpio-watch-y += gpio-watch.o
diff --git a/tools/gpio/Makefile b/tools/gpio/Makefile
index 6080de58861f..440434027557 100644
--- a/tools/gpio/Makefile
+++ b/tools/gpio/Makefile
@@ -18,7 +18,7 @@ MAKEFLAGS += -r
override CFLAGS += -O2 -Wall -g -D_GNU_SOURCE -I$(OUTPUT)include
-ALL_TARGETS := lsgpio gpio-hammer gpio-event-mon
+ALL_TARGETS := lsgpio gpio-hammer gpio-event-mon gpio-watch
ALL_PROGRAMS := $(patsubst %,$(OUTPUT)%,$(ALL_TARGETS))
all: $(ALL_PROGRAMS)
@@ -35,7 +35,7 @@ $(OUTPUT)include/linux/gpio.h: ../../include/uapi/linux/gpio.h
prepare: $(OUTPUT)include/linux/gpio.h
-GPIO_UTILS_IN := $(output)gpio-utils-in.o
+GPIO_UTILS_IN := $(OUTPUT)gpio-utils-in.o
$(GPIO_UTILS_IN): prepare FORCE
$(Q)$(MAKE) $(build)=gpio-utils
@@ -66,6 +66,15 @@ $(GPIO_EVENT_MON_IN): prepare FORCE $(OUTPUT)gpio-utils-in.o
$(OUTPUT)gpio-event-mon: $(GPIO_EVENT_MON_IN)
$(QUIET_LINK)$(CC) $(CFLAGS) $(LDFLAGS) $< -o $@
+#
+# gpio-watch
+#
+GPIO_WATCH_IN := $(OUTPUT)gpio-watch-in.o
+$(GPIO_WATCH_IN): prepare FORCE
+ $(Q)$(MAKE) $(build)=gpio-watch
+$(OUTPUT)gpio-watch: $(GPIO_WATCH_IN)
+ $(QUIET_LINK)$(CC) $(CFLAGS) $(LDFLAGS) $< -o $@
+
clean:
rm -f $(ALL_PROGRAMS)
rm -f $(OUTPUT)include/linux/gpio.h
diff --git a/tools/gpio/gpio-hammer.c b/tools/gpio/gpio-hammer.c
index 0e0060a6eb34..9fd926e8cb52 100644
--- a/tools/gpio/gpio-hammer.c
+++ b/tools/gpio/gpio-hammer.c
@@ -77,7 +77,7 @@ int hammer_device(const char *device_name, unsigned int *lines, int nlines,
fprintf(stdout, "[%c] ", swirr[j]);
j++;
- if (j == sizeof(swirr)-1)
+ if (j == sizeof(swirr) - 1)
j = 0;
fprintf(stdout, "[");
@@ -135,7 +135,14 @@ int main(int argc, char **argv)
device_name = optarg;
break;
case 'o':
- lines[i] = strtoul(optarg, NULL, 10);
+ /*
+ * Avoid overflow. Do not immediately error, we want to
+ * be able to accurately report on the amount of times
+ * '-o' was given to give an accurate error message
+ */
+ if (i < GPIOHANDLES_MAX)
+ lines[i] = strtoul(optarg, NULL, 10);
+
i++;
break;
case '?':
@@ -143,6 +150,14 @@ int main(int argc, char **argv)
return -1;
}
}
+
+ if (i >= GPIOHANDLES_MAX) {
+ fprintf(stderr,
+ "Only %d occurrences of '-o' are allowed, %d were found\n",
+ GPIOHANDLES_MAX, i + 1);
+ return -1;
+ }
+
nlines = i;
if (!device_name || !nlines) {
diff --git a/tools/gpio/gpio-utils.c b/tools/gpio/gpio-utils.c
index 53470de6a502..06003789e7c7 100644
--- a/tools/gpio/gpio-utils.c
+++ b/tools/gpio/gpio-utils.c
@@ -17,7 +17,7 @@
#include <linux/gpio.h>
#include "gpio-utils.h"
-#define COMSUMER "gpio-utils"
+#define CONSUMER "gpio-utils"
/**
* doc: Operation of gpio
@@ -209,7 +209,7 @@ int gpiotools_gets(const char *device_name, unsigned int *lines,
ret = gpiotools_request_linehandle(device_name, lines, nlines,
GPIOHANDLE_REQUEST_INPUT, data,
- COMSUMER);
+ CONSUMER);
if (ret < 0)
return ret;
@@ -259,7 +259,7 @@ int gpiotools_sets(const char *device_name, unsigned int *lines,
ret = gpiotools_request_linehandle(device_name, lines, nlines,
GPIOHANDLE_REQUEST_OUTPUT, data,
- COMSUMER);
+ CONSUMER);
if (ret < 0)
return ret;
diff --git a/tools/gpio/gpio-watch.c b/tools/gpio/gpio-watch.c
new file mode 100644
index 000000000000..5cea24fddfa7
--- /dev/null
+++ b/tools/gpio/gpio-watch.c
@@ -0,0 +1,99 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * gpio-watch - monitor unrequested lines for property changes using the
+ * character device
+ *
+ * Copyright (C) 2019 BayLibre SAS
+ * Author: Bartosz Golaszewski <bgolaszewski@baylibre.com>
+ */
+
+#include <ctype.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <linux/gpio.h>
+#include <poll.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+#include <unistd.h>
+
+int main(int argc, char **argv)
+{
+ struct gpioline_info_changed chg;
+ struct gpioline_info req;
+ struct pollfd pfd;
+ int fd, i, j, ret;
+ char *event, *end;
+ ssize_t rd;
+
+ if (argc < 3)
+ goto err_usage;
+
+ fd = open(argv[1], O_RDWR | O_CLOEXEC);
+ if (fd < 0) {
+ perror("unable to open gpiochip");
+ return EXIT_FAILURE;
+ }
+
+ for (i = 0, j = 2; i < argc - 2; i++, j++) {
+ memset(&req, 0, sizeof(req));
+
+ req.line_offset = strtoul(argv[j], &end, 0);
+ if (*end != '\0')
+ goto err_usage;
+
+ ret = ioctl(fd, GPIO_GET_LINEINFO_WATCH_IOCTL, &req);
+ if (ret) {
+ perror("unable to set up line watch");
+ return EXIT_FAILURE;
+ }
+ }
+
+ pfd.fd = fd;
+ pfd.events = POLLIN | POLLPRI;
+
+ for (;;) {
+ ret = poll(&pfd, 1, 5000);
+ if (ret < 0) {
+ perror("error polling the linechanged fd");
+ return EXIT_FAILURE;
+ } else if (ret > 0) {
+ memset(&chg, 0, sizeof(chg));
+ rd = read(pfd.fd, &chg, sizeof(chg));
+ if (rd < 0 || rd != sizeof(chg)) {
+ if (rd != sizeof(chg))
+ errno = EIO;
+
+ perror("error reading line change event");
+ return EXIT_FAILURE;
+ }
+
+ switch (chg.event_type) {
+ case GPIOLINE_CHANGED_REQUESTED:
+ event = "requested";
+ break;
+ case GPIOLINE_CHANGED_RELEASED:
+ event = "released";
+ break;
+ case GPIOLINE_CHANGED_CONFIG:
+ event = "config changed";
+ break;
+ default:
+ fprintf(stderr,
+ "invalid event type received from the kernel\n");
+ return EXIT_FAILURE;
+ }
+
+ printf("line %u: %s at %llu\n",
+ chg.info.line_offset, event, chg.timestamp);
+ }
+ }
+
+ return 0;
+
+err_usage:
+ printf("%s: <gpiochip> <line0> <line1> ...\n", argv[0]);
+ return EXIT_FAILURE;
+}
diff --git a/tools/gpio/lsgpio.c b/tools/gpio/lsgpio.c
index e1430f504c13..8a71ad36f83b 100644
--- a/tools/gpio/lsgpio.c
+++ b/tools/gpio/lsgpio.c
@@ -49,6 +49,18 @@ struct gpio_flag flagnames[] = {
.name = "open-source",
.mask = GPIOLINE_FLAG_OPEN_SOURCE,
},
+ {
+ .name = "pull-up",
+ .mask = GPIOLINE_FLAG_BIAS_PULL_UP,
+ },
+ {
+ .name = "pull-down",
+ .mask = GPIOLINE_FLAG_BIAS_PULL_DOWN,
+ },
+ {
+ .name = "bias-disabled",
+ .mask = GPIOLINE_FLAG_BIAS_DISABLE,
+ },
};
void print_flags(unsigned long flags)
diff --git a/tools/iio/.gitignore b/tools/iio/.gitignore
index 3758202618bd..5bd6f4df98b7 100644
--- a/tools/iio/.gitignore
+++ b/tools/iio/.gitignore
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
iio_event_monitor
iio_generic_buffer
lsiio
diff --git a/tools/include/linux/bits.h b/tools/include/linux/bits.h
index 669d69441a62..4671fbf28842 100644
--- a/tools/include/linux/bits.h
+++ b/tools/include/linux/bits.h
@@ -3,9 +3,9 @@
#define __LINUX_BITS_H
#include <linux/const.h>
+#include <vdso/bits.h>
#include <asm/bitsperlong.h>
-#define BIT(nr) (UL(1) << (nr))
#define BIT_ULL(nr) (ULL(1) << (nr))
#define BIT_MASK(nr) (UL(1) << ((nr) % BITS_PER_LONG))
#define BIT_WORD(nr) ((nr) / BITS_PER_LONG)
@@ -18,12 +18,30 @@
* position @h. For example
* GENMASK_ULL(39, 21) gives us the 64bit vector 0x000000ffffe00000.
*/
-#define GENMASK(h, l) \
+#if !defined(__ASSEMBLY__) && \
+ (!defined(CONFIG_CC_IS_GCC) || CONFIG_GCC_VERSION >= 49000)
+#include <linux/build_bug.h>
+#define GENMASK_INPUT_CHECK(h, l) \
+ (BUILD_BUG_ON_ZERO(__builtin_choose_expr( \
+ __builtin_constant_p((l) > (h)), (l) > (h), 0)))
+#else
+/*
+ * BUILD_BUG_ON_ZERO is not available in h files included from asm files,
+ * disable the input check if that is the case.
+ */
+#define GENMASK_INPUT_CHECK(h, l) 0
+#endif
+
+#define __GENMASK(h, l) \
(((~UL(0)) - (UL(1) << (l)) + 1) & \
(~UL(0) >> (BITS_PER_LONG - 1 - (h))))
+#define GENMASK(h, l) \
+ (GENMASK_INPUT_CHECK(h, l) + __GENMASK(h, l))
-#define GENMASK_ULL(h, l) \
+#define __GENMASK_ULL(h, l) \
(((~ULL(0)) - (ULL(1) << (l)) + 1) & \
(~ULL(0) >> (BITS_PER_LONG_LONG - 1 - (h))))
+#define GENMASK_ULL(h, l) \
+ (GENMASK_INPUT_CHECK(h, l) + __GENMASK_ULL(h, l))
#endif /* __LINUX_BITS_H */
diff --git a/tools/include/linux/build_bug.h b/tools/include/linux/build_bug.h
new file mode 100644
index 000000000000..cc7070c7439b
--- /dev/null
+++ b/tools/include/linux/build_bug.h
@@ -0,0 +1,82 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _LINUX_BUILD_BUG_H
+#define _LINUX_BUILD_BUG_H
+
+#include <linux/compiler.h>
+
+#ifdef __CHECKER__
+#define BUILD_BUG_ON_ZERO(e) (0)
+#else /* __CHECKER__ */
+/*
+ * Force a compilation error if condition is true, but also produce a
+ * result (of value 0 and type int), so the expression can be used
+ * e.g. in a structure initializer (or where-ever else comma expressions
+ * aren't permitted).
+ */
+#define BUILD_BUG_ON_ZERO(e) ((int)(sizeof(struct { int:(-!!(e)); })))
+#endif /* __CHECKER__ */
+
+/* Force a compilation error if a constant expression is not a power of 2 */
+#define __BUILD_BUG_ON_NOT_POWER_OF_2(n) \
+ BUILD_BUG_ON(((n) & ((n) - 1)) != 0)
+#define BUILD_BUG_ON_NOT_POWER_OF_2(n) \
+ BUILD_BUG_ON((n) == 0 || (((n) & ((n) - 1)) != 0))
+
+/*
+ * BUILD_BUG_ON_INVALID() permits the compiler to check the validity of the
+ * expression but avoids the generation of any code, even if that expression
+ * has side-effects.
+ */
+#define BUILD_BUG_ON_INVALID(e) ((void)(sizeof((__force long)(e))))
+
+/**
+ * BUILD_BUG_ON_MSG - break compile if a condition is true & emit supplied
+ * error message.
+ * @condition: the condition which the compiler should know is false.
+ *
+ * See BUILD_BUG_ON for description.
+ */
+#define BUILD_BUG_ON_MSG(cond, msg) compiletime_assert(!(cond), msg)
+
+/**
+ * BUILD_BUG_ON - break compile if a condition is true.
+ * @condition: the condition which the compiler should know is false.
+ *
+ * If you have some code which relies on certain constants being equal, or
+ * some other compile-time-evaluated condition, you should use BUILD_BUG_ON to
+ * detect if someone changes it.
+ */
+#define BUILD_BUG_ON(condition) \
+ BUILD_BUG_ON_MSG(condition, "BUILD_BUG_ON failed: " #condition)
+
+/**
+ * BUILD_BUG - break compile if used.
+ *
+ * If you have some code that you expect the compiler to eliminate at
+ * build time, you should use BUILD_BUG to detect if it is
+ * unexpectedly used.
+ */
+#define BUILD_BUG() BUILD_BUG_ON_MSG(1, "BUILD_BUG failed")
+
+/**
+ * static_assert - check integer constant expression at build time
+ *
+ * static_assert() is a wrapper for the C11 _Static_assert, with a
+ * little macro magic to make the message optional (defaulting to the
+ * stringification of the tested expression).
+ *
+ * Contrary to BUILD_BUG_ON(), static_assert() can be used at global
+ * scope, but requires the expression to be an integer constant
+ * expression (i.e., it is not enough that __builtin_constant_p() is
+ * true for expr).
+ *
+ * Also note that BUILD_BUG_ON() fails the build if the condition is
+ * true, while static_assert() fails the build if the expression is
+ * false.
+ */
+#ifndef static_assert
+#define static_assert(expr, ...) __static_assert(expr, ##__VA_ARGS__, #expr)
+#define __static_assert(expr, msg, ...) _Static_assert(expr, msg)
+#endif // static_assert
+
+#endif /* _LINUX_BUILD_BUG_H */
diff --git a/tools/include/linux/compiler-gcc.h b/tools/include/linux/compiler-gcc.h
index 95c072b70d0e..b9d4322e1e65 100644
--- a/tools/include/linux/compiler-gcc.h
+++ b/tools/include/linux/compiler-gcc.h
@@ -27,6 +27,18 @@
#define __pure __attribute__((pure))
#endif
#define noinline __attribute__((noinline))
+#ifdef __has_attribute
+#if __has_attribute(disable_tail_calls)
+#define __no_tail_call __attribute__((disable_tail_calls))
+#endif
+#endif
+#ifndef __no_tail_call
+#if GCC_VERSION > 40201
+#define __no_tail_call __attribute__((optimize("no-optimize-sibling-calls")))
+#else
+#define __no_tail_call
+#endif
+#endif
#ifndef __packed
#define __packed __attribute__((packed))
#endif
diff --git a/tools/include/linux/compiler.h b/tools/include/linux/compiler.h
index 1827c2f973f9..9f9002734e19 100644
--- a/tools/include/linux/compiler.h
+++ b/tools/include/linux/compiler.h
@@ -10,6 +10,32 @@
# define __compiletime_error(message)
#endif
+#ifdef __OPTIMIZE__
+# define __compiletime_assert(condition, msg, prefix, suffix) \
+ do { \
+ extern void prefix ## suffix(void) __compiletime_error(msg); \
+ if (!(condition)) \
+ prefix ## suffix(); \
+ } while (0)
+#else
+# define __compiletime_assert(condition, msg, prefix, suffix) do { } while (0)
+#endif
+
+#define _compiletime_assert(condition, msg, prefix, suffix) \
+ __compiletime_assert(condition, msg, prefix, suffix)
+
+/**
+ * compiletime_assert - break build and emit msg if condition is false
+ * @condition: a compile-time constant condition to check
+ * @msg: a message to emit if condition is false
+ *
+ * In tradition of POSIX assert, this macro will break the build if the
+ * supplied condition is *false*, emitting the supplied error message if the
+ * compiler has support to do so.
+ */
+#define compiletime_assert(condition, msg) \
+ _compiletime_assert(condition, msg, __compiletime_assert_, __COUNTER__)
+
/* Optimization barrier */
/* The "volatile" is due to gcc bugs */
#define barrier() __asm__ __volatile__("": : :"memory")
@@ -21,6 +47,9 @@
#ifndef noinline
#define noinline
#endif
+#ifndef __no_tail_call
+#define __no_tail_call
+#endif
/* Are two types/vars the same type (ignoring qualifiers)? */
#ifndef __same_type
diff --git a/tools/include/linux/const.h b/tools/include/linux/const.h
index 7b55a55f5911..81b8aae5a855 100644
--- a/tools/include/linux/const.h
+++ b/tools/include/linux/const.h
@@ -1,9 +1,6 @@
#ifndef _LINUX_CONST_H
#define _LINUX_CONST_H
-#include <uapi/linux/const.h>
-
-#define UL(x) (_UL(x))
-#define ULL(x) (_ULL(x))
+#include <vdso/const.h>
#endif /* _LINUX_CONST_H */
diff --git a/tools/include/linux/irqflags.h b/tools/include/linux/irqflags.h
index e734da3e5b33..67e01bbadbfe 100644
--- a/tools/include/linux/irqflags.h
+++ b/tools/include/linux/irqflags.h
@@ -2,12 +2,12 @@
#ifndef _LIBLOCKDEP_LINUX_TRACE_IRQFLAGS_H_
#define _LIBLOCKDEP_LINUX_TRACE_IRQFLAGS_H_
-# define trace_hardirq_context(p) 0
-# define trace_softirq_context(p) 0
-# define trace_hardirqs_enabled(p) 0
-# define trace_softirqs_enabled(p) 0
-# define trace_hardirq_enter() do { } while (0)
-# define trace_hardirq_exit() do { } while (0)
+# define lockdep_hardirq_context(p) 0
+# define lockdep_softirq_context(p) 0
+# define lockdep_hardirqs_enabled(p) 0
+# define lockdep_softirqs_enabled(p) 0
+# define lockdep_hardirq_enter() do { } while (0)
+# define lockdep_hardirq_exit() do { } while (0)
# define lockdep_softirq_enter() do { } while (0)
# define lockdep_softirq_exit() do { } while (0)
# define INIT_TRACE_IRQFLAGS
diff --git a/tools/include/linux/kallsyms.h b/tools/include/linux/kallsyms.h
index 89ca6fe257cc..efb6c3f5f2a9 100644
--- a/tools/include/linux/kallsyms.h
+++ b/tools/include/linux/kallsyms.h
@@ -20,7 +20,7 @@ static inline const char *kallsyms_lookup(unsigned long addr,
#include <execinfo.h>
#include <stdlib.h>
-static inline void print_ip_sym(unsigned long ip)
+static inline void print_ip_sym(const char *loglvl, unsigned long ip)
{
char **name;
diff --git a/tools/include/linux/kernel.h b/tools/include/linux/kernel.h
index cba226948a0c..a7e54a08fb54 100644
--- a/tools/include/linux/kernel.h
+++ b/tools/include/linux/kernel.h
@@ -5,6 +5,7 @@
#include <stdarg.h>
#include <stddef.h>
#include <assert.h>
+#include <linux/build_bug.h>
#include <linux/compiler.h>
#include <endian.h>
#include <byteswap.h>
@@ -35,9 +36,6 @@
(type *)((char *)__mptr - offsetof(type, member)); })
#endif
-#define BUILD_BUG_ON(condition) ((void)sizeof(char[1 - 2*!!(condition)]))
-#define BUILD_BUG_ON_ZERO(e) (sizeof(struct { int:-!!(e); }))
-
#ifndef max
#define max(x, y) ({ \
typeof(x) _max1 = (x); \
diff --git a/tools/include/linux/rbtree.h b/tools/include/linux/rbtree.h
index e03b1ea23e0e..30dd21f976c3 100644
--- a/tools/include/linux/rbtree.h
+++ b/tools/include/linux/rbtree.h
@@ -11,7 +11,7 @@
I know it's not the cleaner way, but in C (not in C++) to get
performances and genericity...
- See Documentation/rbtree.txt for documentation and samples.
+ See Documentation/core-api/rbtree.rst for documentation and samples.
*/
#ifndef __TOOLS_LINUX_PERF_RBTREE_H
diff --git a/tools/include/linux/rbtree_augmented.h b/tools/include/linux/rbtree_augmented.h
index 381aa948610d..570bb9794421 100644
--- a/tools/include/linux/rbtree_augmented.h
+++ b/tools/include/linux/rbtree_augmented.h
@@ -23,7 +23,7 @@
* rb_insert_augmented() and rb_erase_augmented() are intended to be public.
* The rest are implementation details you are not expected to depend on.
*
- * See Documentation/rbtree.txt for documentation and samples.
+ * See Documentation/core-api/rbtree.rst for documentation and samples.
*/
struct rb_augment_callbacks {
diff --git a/tools/include/uapi/drm/drm.h b/tools/include/uapi/drm/drm.h
index 868bf7996c0f..808b48a93330 100644
--- a/tools/include/uapi/drm/drm.h
+++ b/tools/include/uapi/drm/drm.h
@@ -948,6 +948,8 @@ extern "C" {
#define DRM_IOCTL_SYNCOBJ_TRANSFER DRM_IOWR(0xCC, struct drm_syncobj_transfer)
#define DRM_IOCTL_SYNCOBJ_TIMELINE_SIGNAL DRM_IOWR(0xCD, struct drm_syncobj_timeline_array)
+#define DRM_IOCTL_MODE_GETFB2 DRM_IOWR(0xCE, struct drm_mode_fb_cmd2)
+
/**
* Device specific ioctls should only be in their respective headers
* The device specific ioctl range is from 0x40 to 0x9f.
diff --git a/tools/include/uapi/drm/i915_drm.h b/tools/include/uapi/drm/i915_drm.h
index 829c0a48577f..2813e579b480 100644
--- a/tools/include/uapi/drm/i915_drm.h
+++ b/tools/include/uapi/drm/i915_drm.h
@@ -1619,6 +1619,27 @@ struct drm_i915_gem_context_param {
* By default, new contexts allow persistence.
*/
#define I915_CONTEXT_PARAM_PERSISTENCE 0xb
+
+/*
+ * I915_CONTEXT_PARAM_RINGSIZE:
+ *
+ * Sets the size of the CS ringbuffer to use for logical ring contexts. This
+ * applies a limit of how many batches can be queued to HW before the caller
+ * is blocked due to lack of space for more commands.
+ *
+ * Only reliably possible to be set prior to first use, i.e. during
+ * construction. At any later point, the current execution must be flushed as
+ * the ring can only be changed while the context is idle. Note, the ringsize
+ * can be specified as a constructor property, see
+ * I915_CONTEXT_CREATE_EXT_SETPARAM, but can also be set later if required.
+ *
+ * Only applies to the current set of engine and lost when those engines
+ * are replaced by a new mapping (see I915_CONTEXT_PARAM_ENGINES).
+ *
+ * Must be between 4 - 512 KiB, in intervals of page size [4 KiB].
+ * Default is 16 KiB.
+ */
+#define I915_CONTEXT_PARAM_RINGSIZE 0xc
/* Must be kept compact -- no holes and well documented */
__u64 value;
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 22f235260a3a..c65b374a5090 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -111,6 +111,12 @@ enum bpf_cmd {
BPF_MAP_LOOKUP_AND_DELETE_BATCH,
BPF_MAP_UPDATE_BATCH,
BPF_MAP_DELETE_BATCH,
+ BPF_LINK_CREATE,
+ BPF_LINK_UPDATE,
+ BPF_LINK_GET_FD_BY_ID,
+ BPF_LINK_GET_NEXT_ID,
+ BPF_ENABLE_STATS,
+ BPF_ITER_CREATE,
};
enum bpf_map_type {
@@ -141,6 +147,7 @@ enum bpf_map_type {
BPF_MAP_TYPE_SK_STORAGE,
BPF_MAP_TYPE_DEVMAP_HASH,
BPF_MAP_TYPE_STRUCT_OPS,
+ BPF_MAP_TYPE_RINGBUF,
};
/* Note that tracing related programs such as
@@ -181,6 +188,7 @@ enum bpf_prog_type {
BPF_PROG_TYPE_TRACING,
BPF_PROG_TYPE_STRUCT_OPS,
BPF_PROG_TYPE_EXT,
+ BPF_PROG_TYPE_LSM,
};
enum bpf_attach_type {
@@ -210,11 +218,30 @@ enum bpf_attach_type {
BPF_TRACE_RAW_TP,
BPF_TRACE_FENTRY,
BPF_TRACE_FEXIT,
+ BPF_MODIFY_RETURN,
+ BPF_LSM_MAC,
+ BPF_TRACE_ITER,
+ BPF_CGROUP_INET4_GETPEERNAME,
+ BPF_CGROUP_INET6_GETPEERNAME,
+ BPF_CGROUP_INET4_GETSOCKNAME,
+ BPF_CGROUP_INET6_GETSOCKNAME,
+ BPF_XDP_DEVMAP,
__MAX_BPF_ATTACH_TYPE
};
#define MAX_BPF_ATTACH_TYPE __MAX_BPF_ATTACH_TYPE
+enum bpf_link_type {
+ BPF_LINK_TYPE_UNSPEC = 0,
+ BPF_LINK_TYPE_RAW_TRACEPOINT = 1,
+ BPF_LINK_TYPE_TRACING = 2,
+ BPF_LINK_TYPE_CGROUP = 3,
+ BPF_LINK_TYPE_ITER = 4,
+ BPF_LINK_TYPE_NETNS = 5,
+
+ MAX_BPF_LINK_TYPE,
+};
+
/* cgroup-bpf attach flags used in BPF_PROG_ATTACH command
*
* NONE(default): No further bpf programs allowed in the subtree.
@@ -325,44 +352,46 @@ enum bpf_attach_type {
#define BPF_PSEUDO_CALL 1
/* flags for BPF_MAP_UPDATE_ELEM command */
-#define BPF_ANY 0 /* create new element or update existing */
-#define BPF_NOEXIST 1 /* create new element if it didn't exist */
-#define BPF_EXIST 2 /* update existing element */
-#define BPF_F_LOCK 4 /* spin_lock-ed map_lookup/map_update */
+enum {
+ BPF_ANY = 0, /* create new element or update existing */
+ BPF_NOEXIST = 1, /* create new element if it didn't exist */
+ BPF_EXIST = 2, /* update existing element */
+ BPF_F_LOCK = 4, /* spin_lock-ed map_lookup/map_update */
+};
/* flags for BPF_MAP_CREATE command */
-#define BPF_F_NO_PREALLOC (1U << 0)
+enum {
+ BPF_F_NO_PREALLOC = (1U << 0),
/* Instead of having one common LRU list in the
* BPF_MAP_TYPE_LRU_[PERCPU_]HASH map, use a percpu LRU list
* which can scale and perform better.
* Note, the LRU nodes (including free nodes) cannot be moved
* across different LRU lists.
*/
-#define BPF_F_NO_COMMON_LRU (1U << 1)
+ BPF_F_NO_COMMON_LRU = (1U << 1),
/* Specify numa node during map creation */
-#define BPF_F_NUMA_NODE (1U << 2)
-
-#define BPF_OBJ_NAME_LEN 16U
+ BPF_F_NUMA_NODE = (1U << 2),
/* Flags for accessing BPF object from syscall side. */
-#define BPF_F_RDONLY (1U << 3)
-#define BPF_F_WRONLY (1U << 4)
+ BPF_F_RDONLY = (1U << 3),
+ BPF_F_WRONLY = (1U << 4),
/* Flag for stack_map, store build_id+offset instead of pointer */
-#define BPF_F_STACK_BUILD_ID (1U << 5)
+ BPF_F_STACK_BUILD_ID = (1U << 5),
/* Zero-initialize hash function seed. This should only be used for testing. */
-#define BPF_F_ZERO_SEED (1U << 6)
+ BPF_F_ZERO_SEED = (1U << 6),
/* Flags for accessing BPF object from program side. */
-#define BPF_F_RDONLY_PROG (1U << 7)
-#define BPF_F_WRONLY_PROG (1U << 8)
+ BPF_F_RDONLY_PROG = (1U << 7),
+ BPF_F_WRONLY_PROG = (1U << 8),
/* Clone map from listener for newly accepted socket */
-#define BPF_F_CLONE (1U << 9)
+ BPF_F_CLONE = (1U << 9),
/* Enable memory-mapping BPF map */
-#define BPF_F_MMAPABLE (1U << 10)
+ BPF_F_MMAPABLE = (1U << 10),
+};
/* Flags for BPF_PROG_QUERY. */
@@ -372,6 +401,12 @@ enum bpf_attach_type {
*/
#define BPF_F_QUERY_EFFECTIVE (1U << 0)
+/* type for BPF_ENABLE_STATS */
+enum bpf_stats_type {
+ /* enabled run_time_ns and run_cnt */
+ BPF_STATS_RUN_TIME = 0,
+};
+
enum bpf_stack_build_id_status {
/* user space need an empty entry to identify end of a trace */
BPF_STACK_BUILD_ID_EMPTY = 0,
@@ -391,6 +426,8 @@ struct bpf_stack_build_id {
};
};
+#define BPF_OBJ_NAME_LEN 16U
+
union bpf_attr {
struct { /* anonymous struct used by BPF_MAP_CREATE command */
__u32 map_type; /* one of enum bpf_map_type */
@@ -514,6 +551,7 @@ union bpf_attr {
__u32 prog_id;
__u32 map_id;
__u32 btf_id;
+ __u32 link_id;
};
__u32 next_id;
__u32 open_flags;
@@ -534,7 +572,7 @@ union bpf_attr {
__u32 prog_cnt;
} query;
- struct {
+ struct { /* anonymous struct used by BPF_RAW_TRACEPOINT_OPEN command */
__u64 name;
__u32 prog_fd;
} raw_tracepoint;
@@ -562,6 +600,33 @@ union bpf_attr {
__u64 probe_offset; /* output: probe_offset */
__u64 probe_addr; /* output: probe_addr */
} task_fd_query;
+
+ struct { /* struct used by BPF_LINK_CREATE command */
+ __u32 prog_fd; /* eBPF program to attach */
+ __u32 target_fd; /* object to attach to */
+ __u32 attach_type; /* attach type */
+ __u32 flags; /* extra flags */
+ } link_create;
+
+ struct { /* struct used by BPF_LINK_UPDATE command */
+ __u32 link_fd; /* link fd */
+ /* new program fd to update link with */
+ __u32 new_prog_fd;
+ __u32 flags; /* extra flags */
+ /* expected link's program fd; is specified only if
+ * BPF_F_REPLACE flag is set in flags */
+ __u32 old_prog_fd;
+ } link_update;
+
+ struct { /* struct used by BPF_ENABLE_STATS command */
+ __u32 type;
+ } enable_stats;
+
+ struct { /* struct used by BPF_ITER_CREATE command */
+ __u32 link_fd;
+ __u32 flags;
+ } iter_create;
+
} __attribute__((aligned(8)));
/* The description below is an attempt at providing documentation to eBPF
@@ -617,14 +682,16 @@ union bpf_attr {
* For tracing programs, safely attempt to read *size* bytes from
* kernel space address *unsafe_ptr* and store the data in *dst*.
*
- * Generally, use bpf_probe_read_user() or bpf_probe_read_kernel()
- * instead.
+ * Generally, use **bpf_probe_read_user**\ () or
+ * **bpf_probe_read_kernel**\ () instead.
* Return
* 0 on success, or a negative error in case of failure.
*
* u64 bpf_ktime_get_ns(void)
* Description
* Return the time elapsed since system boot, in nanoseconds.
+ * Does not include time the system was suspended.
+ * See: **clock_gettime**\ (**CLOCK_MONOTONIC**)
* Return
* Current *ktime*.
*
@@ -1483,11 +1550,11 @@ union bpf_attr {
* int bpf_probe_read_str(void *dst, u32 size, const void *unsafe_ptr)
* Description
* Copy a NUL terminated string from an unsafe kernel address
- * *unsafe_ptr* to *dst*. See bpf_probe_read_kernel_str() for
+ * *unsafe_ptr* to *dst*. See **bpf_probe_read_kernel_str**\ () for
* more details.
*
- * Generally, use bpf_probe_read_user_str() or bpf_probe_read_kernel_str()
- * instead.
+ * Generally, use **bpf_probe_read_user_str**\ () or
+ * **bpf_probe_read_kernel_str**\ () instead.
* Return
* On success, the strictly positive length of the string,
* including the trailing NUL character. On error, a negative
@@ -1515,7 +1582,7 @@ union bpf_attr {
*
* u64 bpf_get_socket_cookie(struct bpf_sock_ops *ctx)
* Description
- * Equivalent to bpf_get_socket_cookie() helper that accepts
+ * Equivalent to **bpf_get_socket_cookie**\ () helper that accepts
* *skb*, but gets socket from **struct bpf_sock_ops** context.
* Return
* A 8-byte long non-decreasing number.
@@ -1535,7 +1602,7 @@ union bpf_attr {
* Return
* 0
*
- * int bpf_setsockopt(struct bpf_sock_ops *bpf_socket, int level, int optname, void *optval, int optlen)
+ * int bpf_setsockopt(void *bpf_socket, int level, int optname, void *optval, int optlen)
* Description
* Emulate a call to **setsockopt()** on the socket associated to
* *bpf_socket*, which must be a full socket. The *level* at
@@ -1543,6 +1610,12 @@ union bpf_attr {
* must be specified, see **setsockopt(2)** for more information.
* The option value of length *optlen* is pointed by *optval*.
*
+ * *bpf_socket* should be one of the following:
+ *
+ * * **struct bpf_sock_ops** for **BPF_PROG_TYPE_SOCK_OPS**.
+ * * **struct bpf_sock_addr** for **BPF_CGROUP_INET4_CONNECT**
+ * and **BPF_CGROUP_INET6_CONNECT**.
+ *
* This helper actually implements a subset of **setsockopt()**.
* It supports the following *level*\ s:
*
@@ -1562,6 +1635,13 @@ union bpf_attr {
* Grow or shrink the room for data in the packet associated to
* *skb* by *len_diff*, and according to the selected *mode*.
*
+ * By default, the helper will reset any offloaded checksum
+ * indicator of the skb to CHECKSUM_NONE. This can be avoided
+ * by the following flag:
+ *
+ * * **BPF_F_ADJ_ROOM_NO_CSUM_RESET**: Do not reset offloaded
+ * checksum data of the skb to CHECKSUM_NONE.
+ *
* There are two supported modes at this time:
*
* * **BPF_ADJ_ROOM_MAC**: Adjust room at the mac layer
@@ -1607,15 +1687,15 @@ union bpf_attr {
*
* The lower two bits of *flags* are used as the return code if
* the map lookup fails. This is so that the return value can be
- * one of the XDP program return codes up to XDP_TX, as chosen by
- * the caller. Any higher bits in the *flags* argument must be
+ * one of the XDP program return codes up to **XDP_TX**, as chosen
+ * by the caller. Any higher bits in the *flags* argument must be
* unset.
*
- * See also bpf_redirect(), which only supports redirecting to an
- * ifindex, but doesn't require a map to do so.
+ * See also **bpf_redirect**\ (), which only supports redirecting
+ * to an ifindex, but doesn't require a map to do so.
* Return
* **XDP_REDIRECT** on success, or the value of the two lower bits
- * of the **flags* argument on error.
+ * of the *flags* argument on error.
*
* int bpf_sk_redirect_map(struct sk_buff *skb, struct bpf_map *map, u32 key, u64 flags)
* Description
@@ -1720,7 +1800,7 @@ union bpf_attr {
* the time running for event since last normalization. The
* enabled and running times are accumulated since the perf event
* open. To achieve scaling factor between two invocations of an
- * eBPF program, users can can use CPU id as the key (which is
+ * eBPF program, users can use CPU id as the key (which is
* typical for perf array usage model) to remember the previous
* value and do the calculation inside the eBPF program.
* Return
@@ -1737,7 +1817,7 @@ union bpf_attr {
* Return
* 0 on success, or a negative error in case of failure.
*
- * int bpf_getsockopt(struct bpf_sock_ops *bpf_socket, int level, int optname, void *optval, int optlen)
+ * int bpf_getsockopt(void *bpf_socket, int level, int optname, void *optval, int optlen)
* Description
* Emulate a call to **getsockopt()** on the socket associated to
* *bpf_socket*, which must be a full socket. The *level* at
@@ -1746,6 +1826,12 @@ union bpf_attr {
* The retrieved value is stored in the structure pointed by
* *opval* and of length *optlen*.
*
+ * *bpf_socket* should be one of the following:
+ *
+ * * **struct bpf_sock_ops** for **BPF_PROG_TYPE_SOCK_OPS**.
+ * * **struct bpf_sock_addr** for **BPF_CGROUP_INET4_CONNECT**
+ * and **BPF_CGROUP_INET6_CONNECT**.
+ *
* This helper actually implements a subset of **getsockopt()**.
* It supports the following *level*\ s:
*
@@ -1763,7 +1849,7 @@ union bpf_attr {
* The first argument is the context *regs* on which the kprobe
* works.
*
- * This helper works by setting setting the PC (program counter)
+ * This helper works by setting the PC (program counter)
* to an override function which is run in place of the original
* probed function. This means the probed function is not run at
* all. The replacement function just returns with the required
@@ -1932,18 +2018,19 @@ union bpf_attr {
*
* This helper works for IPv4 and IPv6, TCP and UDP sockets. The
* domain (*addr*\ **->sa_family**) must be **AF_INET** (or
- * **AF_INET6**). Looking for a free port to bind to can be
- * expensive, therefore binding to port is not permitted by the
- * helper: *addr*\ **->sin_port** (or **sin6_port**, respectively)
- * must be set to zero.
+ * **AF_INET6**). It's advised to pass zero port (**sin_port**
+ * or **sin6_port**) which triggers IP_BIND_ADDRESS_NO_PORT-like
+ * behavior and lets the kernel efficiently pick up an unused
+ * port as long as 4-tuple is unique. Passing non-zero port might
+ * lead to degraded performance.
* Return
* 0 on success, or a negative error in case of failure.
*
* int bpf_xdp_adjust_tail(struct xdp_buff *xdp_md, int delta)
* Description
* Adjust (move) *xdp_md*\ **->data_end** by *delta* bytes. It is
- * only possible to shrink the packet as of this writing,
- * therefore *delta* must be a negative integer.
+ * possible to both shrink and grow the packet tail.
+ * Shrink done via *delta* being a negative integer.
*
* A call to this helper is susceptible to change the underlying
* packet buffer. Therefore, at load time, all checks on pointers
@@ -2229,7 +2316,7 @@ union bpf_attr {
* **bpf_rc_keydown**\ () again with the same values, or calling
* **bpf_rc_repeat**\ ().
*
- * Some protocols include a toggle bit, in case the button was
+ * Some protocols include a toggle bit, in case the button was
* released and pressed again between consecutive scancodes.
*
* The *ctx* should point to the lirc sample as passed into
@@ -2575,7 +2662,6 @@ union bpf_attr {
*
* *th* points to the start of the TCP header, while *th_len*
* contains **sizeof**\ (**struct tcphdr**).
- *
* Return
* 0 if *iph* and *th* are a valid SYN cookie ACK, or a negative
* error otherwise.
@@ -2758,7 +2844,6 @@ union bpf_attr {
*
* *th* points to the start of the TCP header, while *th_len*
* contains the length of the TCP header.
- *
* Return
* On success, lower 32 bits hold the generated SYN cookie in
* followed by 16 bits which hold the MSS value for that cookie,
@@ -2841,7 +2926,7 @@ union bpf_attr {
* // size, after checking its boundaries.
* }
*
- * In comparison, using **bpf_probe_read_user()** helper here
+ * In comparison, using **bpf_probe_read_user**\ () helper here
* instead to read the string would require to estimate the length
* at compile time, and would often result in copying more memory
* than necessary.
@@ -2859,14 +2944,14 @@ union bpf_attr {
* int bpf_probe_read_kernel_str(void *dst, u32 size, const void *unsafe_ptr)
* Description
* Copy a NUL terminated string from an unsafe kernel address *unsafe_ptr*
- * to *dst*. Same semantics as with bpf_probe_read_user_str() apply.
+ * to *dst*. Same semantics as with **bpf_probe_read_user_str**\ () apply.
* Return
- * On success, the strictly positive length of the string, including
+ * On success, the strictly positive length of the string, including
* the trailing NUL character. On error, a negative value.
*
* int bpf_tcp_send_ack(void *tp, u32 rcv_nxt)
* Description
- * Send out a tcp-ack. *tp* is the in-kernel struct tcp_sock.
+ * Send out a tcp-ack. *tp* is the in-kernel struct **tcp_sock**.
* *rcv_nxt* is the ack_seq to be sent out.
* Return
* 0 on success, or a negative error in case of failure.
@@ -2890,6 +2975,283 @@ union bpf_attr {
* Obtain the 64bit jiffies
* Return
* The 64 bit jiffies
+ *
+ * int bpf_read_branch_records(struct bpf_perf_event_data *ctx, void *buf, u32 size, u64 flags)
+ * Description
+ * For an eBPF program attached to a perf event, retrieve the
+ * branch records (**struct perf_branch_entry**) associated to *ctx*
+ * and store it in the buffer pointed by *buf* up to size
+ * *size* bytes.
+ * Return
+ * On success, number of bytes written to *buf*. On error, a
+ * negative value.
+ *
+ * The *flags* can be set to **BPF_F_GET_BRANCH_RECORDS_SIZE** to
+ * instead return the number of bytes required to store all the
+ * branch entries. If this flag is set, *buf* may be NULL.
+ *
+ * **-EINVAL** if arguments invalid or **size** not a multiple
+ * of **sizeof**\ (**struct perf_branch_entry**\ ).
+ *
+ * **-ENOENT** if architecture does not support branch records.
+ *
+ * int bpf_get_ns_current_pid_tgid(u64 dev, u64 ino, struct bpf_pidns_info *nsdata, u32 size)
+ * Description
+ * Returns 0 on success, values for *pid* and *tgid* as seen from the current
+ * *namespace* will be returned in *nsdata*.
+ * Return
+ * 0 on success, or one of the following in case of failure:
+ *
+ * **-EINVAL** if dev and inum supplied don't match dev_t and inode number
+ * with nsfs of current task, or if dev conversion to dev_t lost high bits.
+ *
+ * **-ENOENT** if pidns does not exists for the current task.
+ *
+ * int bpf_xdp_output(void *ctx, struct bpf_map *map, u64 flags, void *data, u64 size)
+ * Description
+ * Write raw *data* blob into a special BPF perf event held by
+ * *map* of type **BPF_MAP_TYPE_PERF_EVENT_ARRAY**. This perf
+ * event must have the following attributes: **PERF_SAMPLE_RAW**
+ * as **sample_type**, **PERF_TYPE_SOFTWARE** as **type**, and
+ * **PERF_COUNT_SW_BPF_OUTPUT** as **config**.
+ *
+ * The *flags* are used to indicate the index in *map* for which
+ * the value must be put, masked with **BPF_F_INDEX_MASK**.
+ * Alternatively, *flags* can be set to **BPF_F_CURRENT_CPU**
+ * to indicate that the index of the current CPU core should be
+ * used.
+ *
+ * The value to write, of *size*, is passed through eBPF stack and
+ * pointed by *data*.
+ *
+ * *ctx* is a pointer to in-kernel struct xdp_buff.
+ *
+ * This helper is similar to **bpf_perf_eventoutput**\ () but
+ * restricted to raw_tracepoint bpf programs.
+ * Return
+ * 0 on success, or a negative error in case of failure.
+ *
+ * u64 bpf_get_netns_cookie(void *ctx)
+ * Description
+ * Retrieve the cookie (generated by the kernel) of the network
+ * namespace the input *ctx* is associated with. The network
+ * namespace cookie remains stable for its lifetime and provides
+ * a global identifier that can be assumed unique. If *ctx* is
+ * NULL, then the helper returns the cookie for the initial
+ * network namespace. The cookie itself is very similar to that
+ * of **bpf_get_socket_cookie**\ () helper, but for network
+ * namespaces instead of sockets.
+ * Return
+ * A 8-byte long opaque number.
+ *
+ * u64 bpf_get_current_ancestor_cgroup_id(int ancestor_level)
+ * Description
+ * Return id of cgroup v2 that is ancestor of the cgroup associated
+ * with the current task at the *ancestor_level*. The root cgroup
+ * is at *ancestor_level* zero and each step down the hierarchy
+ * increments the level. If *ancestor_level* == level of cgroup
+ * associated with the current task, then return value will be the
+ * same as that of **bpf_get_current_cgroup_id**\ ().
+ *
+ * The helper is useful to implement policies based on cgroups
+ * that are upper in hierarchy than immediate cgroup associated
+ * with the current task.
+ *
+ * The format of returned id and helper limitations are same as in
+ * **bpf_get_current_cgroup_id**\ ().
+ * Return
+ * The id is returned or 0 in case the id could not be retrieved.
+ *
+ * int bpf_sk_assign(struct sk_buff *skb, struct bpf_sock *sk, u64 flags)
+ * Description
+ * Assign the *sk* to the *skb*. When combined with appropriate
+ * routing configuration to receive the packet towards the socket,
+ * will cause *skb* to be delivered to the specified socket.
+ * Subsequent redirection of *skb* via **bpf_redirect**\ (),
+ * **bpf_clone_redirect**\ () or other methods outside of BPF may
+ * interfere with successful delivery to the socket.
+ *
+ * This operation is only valid from TC ingress path.
+ *
+ * The *flags* argument must be zero.
+ * Return
+ * 0 on success, or a negative error in case of failure:
+ *
+ * **-EINVAL** if specified *flags* are not supported.
+ *
+ * **-ENOENT** if the socket is unavailable for assignment.
+ *
+ * **-ENETUNREACH** if the socket is unreachable (wrong netns).
+ *
+ * **-EOPNOTSUPP** if the operation is not supported, for example
+ * a call from outside of TC ingress.
+ *
+ * **-ESOCKTNOSUPPORT** if the socket type is not supported
+ * (reuseport).
+ *
+ * u64 bpf_ktime_get_boot_ns(void)
+ * Description
+ * Return the time elapsed since system boot, in nanoseconds.
+ * Does include the time the system was suspended.
+ * See: **clock_gettime**\ (**CLOCK_BOOTTIME**)
+ * Return
+ * Current *ktime*.
+ *
+ * int bpf_seq_printf(struct seq_file *m, const char *fmt, u32 fmt_size, const void *data, u32 data_len)
+ * Description
+ * **bpf_seq_printf**\ () uses seq_file **seq_printf**\ () to print
+ * out the format string.
+ * The *m* represents the seq_file. The *fmt* and *fmt_size* are for
+ * the format string itself. The *data* and *data_len* are format string
+ * arguments. The *data* are a **u64** array and corresponding format string
+ * values are stored in the array. For strings and pointers where pointees
+ * are accessed, only the pointer values are stored in the *data* array.
+ * The *data_len* is the size of *data* in bytes.
+ *
+ * Formats **%s**, **%p{i,I}{4,6}** requires to read kernel memory.
+ * Reading kernel memory may fail due to either invalid address or
+ * valid address but requiring a major memory fault. If reading kernel memory
+ * fails, the string for **%s** will be an empty string, and the ip
+ * address for **%p{i,I}{4,6}** will be 0. Not returning error to
+ * bpf program is consistent with what **bpf_trace_printk**\ () does for now.
+ * Return
+ * 0 on success, or a negative error in case of failure:
+ *
+ * **-EBUSY** if per-CPU memory copy buffer is busy, can try again
+ * by returning 1 from bpf program.
+ *
+ * **-EINVAL** if arguments are invalid, or if *fmt* is invalid/unsupported.
+ *
+ * **-E2BIG** if *fmt* contains too many format specifiers.
+ *
+ * **-EOVERFLOW** if an overflow happened: The same object will be tried again.
+ *
+ * int bpf_seq_write(struct seq_file *m, const void *data, u32 len)
+ * Description
+ * **bpf_seq_write**\ () uses seq_file **seq_write**\ () to write the data.
+ * The *m* represents the seq_file. The *data* and *len* represent the
+ * data to write in bytes.
+ * Return
+ * 0 on success, or a negative error in case of failure:
+ *
+ * **-EOVERFLOW** if an overflow happened: The same object will be tried again.
+ *
+ * u64 bpf_sk_cgroup_id(struct bpf_sock *sk)
+ * Description
+ * Return the cgroup v2 id of the socket *sk*.
+ *
+ * *sk* must be a non-**NULL** pointer to a full socket, e.g. one
+ * returned from **bpf_sk_lookup_xxx**\ (),
+ * **bpf_sk_fullsock**\ (), etc. The format of returned id is
+ * same as in **bpf_skb_cgroup_id**\ ().
+ *
+ * This helper is available only if the kernel was compiled with
+ * the **CONFIG_SOCK_CGROUP_DATA** configuration option.
+ * Return
+ * The id is returned or 0 in case the id could not be retrieved.
+ *
+ * u64 bpf_sk_ancestor_cgroup_id(struct bpf_sock *sk, int ancestor_level)
+ * Description
+ * Return id of cgroup v2 that is ancestor of cgroup associated
+ * with the *sk* at the *ancestor_level*. The root cgroup is at
+ * *ancestor_level* zero and each step down the hierarchy
+ * increments the level. If *ancestor_level* == level of cgroup
+ * associated with *sk*, then return value will be same as that
+ * of **bpf_sk_cgroup_id**\ ().
+ *
+ * The helper is useful to implement policies based on cgroups
+ * that are upper in hierarchy than immediate cgroup associated
+ * with *sk*.
+ *
+ * The format of returned id and helper limitations are same as in
+ * **bpf_sk_cgroup_id**\ ().
+ * Return
+ * The id is returned or 0 in case the id could not be retrieved.
+ *
+ * void *bpf_ringbuf_output(void *ringbuf, void *data, u64 size, u64 flags)
+ * Description
+ * Copy *size* bytes from *data* into a ring buffer *ringbuf*.
+ * If BPF_RB_NO_WAKEUP is specified in *flags*, no notification of
+ * new data availability is sent.
+ * IF BPF_RB_FORCE_WAKEUP is specified in *flags*, notification of
+ * new data availability is sent unconditionally.
+ * Return
+ * 0, on success;
+ * < 0, on error.
+ *
+ * void *bpf_ringbuf_reserve(void *ringbuf, u64 size, u64 flags)
+ * Description
+ * Reserve *size* bytes of payload in a ring buffer *ringbuf*.
+ * Return
+ * Valid pointer with *size* bytes of memory available; NULL,
+ * otherwise.
+ *
+ * void bpf_ringbuf_submit(void *data, u64 flags)
+ * Description
+ * Submit reserved ring buffer sample, pointed to by *data*.
+ * If BPF_RB_NO_WAKEUP is specified in *flags*, no notification of
+ * new data availability is sent.
+ * IF BPF_RB_FORCE_WAKEUP is specified in *flags*, notification of
+ * new data availability is sent unconditionally.
+ * Return
+ * Nothing. Always succeeds.
+ *
+ * void bpf_ringbuf_discard(void *data, u64 flags)
+ * Description
+ * Discard reserved ring buffer sample, pointed to by *data*.
+ * If BPF_RB_NO_WAKEUP is specified in *flags*, no notification of
+ * new data availability is sent.
+ * IF BPF_RB_FORCE_WAKEUP is specified in *flags*, notification of
+ * new data availability is sent unconditionally.
+ * Return
+ * Nothing. Always succeeds.
+ *
+ * u64 bpf_ringbuf_query(void *ringbuf, u64 flags)
+ * Description
+ * Query various characteristics of provided ring buffer. What
+ * exactly is queries is determined by *flags*:
+ * - BPF_RB_AVAIL_DATA - amount of data not yet consumed;
+ * - BPF_RB_RING_SIZE - the size of ring buffer;
+ * - BPF_RB_CONS_POS - consumer position (can wrap around);
+ * - BPF_RB_PROD_POS - producer(s) position (can wrap around);
+ * Data returned is just a momentary snapshots of actual values
+ * and could be inaccurate, so this facility should be used to
+ * power heuristics and for reporting, not to make 100% correct
+ * calculation.
+ * Return
+ * Requested value, or 0, if flags are not recognized.
+ *
+ * int bpf_csum_level(struct sk_buff *skb, u64 level)
+ * Description
+ * Change the skbs checksum level by one layer up or down, or
+ * reset it entirely to none in order to have the stack perform
+ * checksum validation. The level is applicable to the following
+ * protocols: TCP, UDP, GRE, SCTP, FCOE. For example, a decap of
+ * | ETH | IP | UDP | GUE | IP | TCP | into | ETH | IP | TCP |
+ * through **bpf_skb_adjust_room**\ () helper with passing in
+ * **BPF_F_ADJ_ROOM_NO_CSUM_RESET** flag would require one call
+ * to **bpf_csum_level**\ () with **BPF_CSUM_LEVEL_DEC** since
+ * the UDP header is removed. Similarly, an encap of the latter
+ * into the former could be accompanied by a helper call to
+ * **bpf_csum_level**\ () with **BPF_CSUM_LEVEL_INC** if the
+ * skb is still intended to be processed in higher layers of the
+ * stack instead of just egressing at tc.
+ *
+ * There are three supported level settings at this time:
+ *
+ * * **BPF_CSUM_LEVEL_INC**: Increases skb->csum_level for skbs
+ * with CHECKSUM_UNNECESSARY.
+ * * **BPF_CSUM_LEVEL_DEC**: Decreases skb->csum_level for skbs
+ * with CHECKSUM_UNNECESSARY.
+ * * **BPF_CSUM_LEVEL_RESET**: Resets skb->csum_level to 0 and
+ * sets CHECKSUM_NONE to force checksum validation by the stack.
+ * * **BPF_CSUM_LEVEL_QUERY**: No-op, returns the current
+ * skb->csum_level.
+ * Return
+ * 0 on success, or a negative error in case of failure. In the
+ * case of **BPF_CSUM_LEVEL_QUERY**, the current skb->csum_level
+ * is returned or the error code -EACCES in case the skb is not
+ * subject to CHECKSUM_UNNECESSARY.
*/
#define __BPF_FUNC_MAPPER(FN) \
FN(unspec), \
@@ -3010,7 +3372,24 @@ union bpf_attr {
FN(probe_read_kernel_str), \
FN(tcp_send_ack), \
FN(send_signal_thread), \
- FN(jiffies64),
+ FN(jiffies64), \
+ FN(read_branch_records), \
+ FN(get_ns_current_pid_tgid), \
+ FN(xdp_output), \
+ FN(get_netns_cookie), \
+ FN(get_current_ancestor_cgroup_id), \
+ FN(sk_assign), \
+ FN(ktime_get_boot_ns), \
+ FN(seq_printf), \
+ FN(seq_write), \
+ FN(sk_cgroup_id), \
+ FN(sk_ancestor_cgroup_id), \
+ FN(ringbuf_output), \
+ FN(ringbuf_reserve), \
+ FN(ringbuf_submit), \
+ FN(ringbuf_discard), \
+ FN(ringbuf_query), \
+ FN(csum_level),
/* integer value in 'imm' field of BPF_CALL instruction selects which helper
* function eBPF program intends to call
@@ -3025,69 +3404,132 @@ enum bpf_func_id {
/* All flags used by eBPF helper functions, placed here. */
/* BPF_FUNC_skb_store_bytes flags. */
-#define BPF_F_RECOMPUTE_CSUM (1ULL << 0)
-#define BPF_F_INVALIDATE_HASH (1ULL << 1)
+enum {
+ BPF_F_RECOMPUTE_CSUM = (1ULL << 0),
+ BPF_F_INVALIDATE_HASH = (1ULL << 1),
+};
/* BPF_FUNC_l3_csum_replace and BPF_FUNC_l4_csum_replace flags.
* First 4 bits are for passing the header field size.
*/
-#define BPF_F_HDR_FIELD_MASK 0xfULL
+enum {
+ BPF_F_HDR_FIELD_MASK = 0xfULL,
+};
/* BPF_FUNC_l4_csum_replace flags. */
-#define BPF_F_PSEUDO_HDR (1ULL << 4)
-#define BPF_F_MARK_MANGLED_0 (1ULL << 5)
-#define BPF_F_MARK_ENFORCE (1ULL << 6)
+enum {
+ BPF_F_PSEUDO_HDR = (1ULL << 4),
+ BPF_F_MARK_MANGLED_0 = (1ULL << 5),
+ BPF_F_MARK_ENFORCE = (1ULL << 6),
+};
/* BPF_FUNC_clone_redirect and BPF_FUNC_redirect flags. */
-#define BPF_F_INGRESS (1ULL << 0)
+enum {
+ BPF_F_INGRESS = (1ULL << 0),
+};
/* BPF_FUNC_skb_set_tunnel_key and BPF_FUNC_skb_get_tunnel_key flags. */
-#define BPF_F_TUNINFO_IPV6 (1ULL << 0)
+enum {
+ BPF_F_TUNINFO_IPV6 = (1ULL << 0),
+};
/* flags for both BPF_FUNC_get_stackid and BPF_FUNC_get_stack. */
-#define BPF_F_SKIP_FIELD_MASK 0xffULL
-#define BPF_F_USER_STACK (1ULL << 8)
+enum {
+ BPF_F_SKIP_FIELD_MASK = 0xffULL,
+ BPF_F_USER_STACK = (1ULL << 8),
/* flags used by BPF_FUNC_get_stackid only. */
-#define BPF_F_FAST_STACK_CMP (1ULL << 9)
-#define BPF_F_REUSE_STACKID (1ULL << 10)
+ BPF_F_FAST_STACK_CMP = (1ULL << 9),
+ BPF_F_REUSE_STACKID = (1ULL << 10),
/* flags used by BPF_FUNC_get_stack only. */
-#define BPF_F_USER_BUILD_ID (1ULL << 11)
+ BPF_F_USER_BUILD_ID = (1ULL << 11),
+};
/* BPF_FUNC_skb_set_tunnel_key flags. */
-#define BPF_F_ZERO_CSUM_TX (1ULL << 1)
-#define BPF_F_DONT_FRAGMENT (1ULL << 2)
-#define BPF_F_SEQ_NUMBER (1ULL << 3)
+enum {
+ BPF_F_ZERO_CSUM_TX = (1ULL << 1),
+ BPF_F_DONT_FRAGMENT = (1ULL << 2),
+ BPF_F_SEQ_NUMBER = (1ULL << 3),
+};
/* BPF_FUNC_perf_event_output, BPF_FUNC_perf_event_read and
* BPF_FUNC_perf_event_read_value flags.
*/
-#define BPF_F_INDEX_MASK 0xffffffffULL
-#define BPF_F_CURRENT_CPU BPF_F_INDEX_MASK
+enum {
+ BPF_F_INDEX_MASK = 0xffffffffULL,
+ BPF_F_CURRENT_CPU = BPF_F_INDEX_MASK,
/* BPF_FUNC_perf_event_output for sk_buff input context. */
-#define BPF_F_CTXLEN_MASK (0xfffffULL << 32)
+ BPF_F_CTXLEN_MASK = (0xfffffULL << 32),
+};
/* Current network namespace */
-#define BPF_F_CURRENT_NETNS (-1L)
+enum {
+ BPF_F_CURRENT_NETNS = (-1L),
+};
+
+/* BPF_FUNC_csum_level level values. */
+enum {
+ BPF_CSUM_LEVEL_QUERY,
+ BPF_CSUM_LEVEL_INC,
+ BPF_CSUM_LEVEL_DEC,
+ BPF_CSUM_LEVEL_RESET,
+};
/* BPF_FUNC_skb_adjust_room flags. */
-#define BPF_F_ADJ_ROOM_FIXED_GSO (1ULL << 0)
+enum {
+ BPF_F_ADJ_ROOM_FIXED_GSO = (1ULL << 0),
+ BPF_F_ADJ_ROOM_ENCAP_L3_IPV4 = (1ULL << 1),
+ BPF_F_ADJ_ROOM_ENCAP_L3_IPV6 = (1ULL << 2),
+ BPF_F_ADJ_ROOM_ENCAP_L4_GRE = (1ULL << 3),
+ BPF_F_ADJ_ROOM_ENCAP_L4_UDP = (1ULL << 4),
+ BPF_F_ADJ_ROOM_NO_CSUM_RESET = (1ULL << 5),
+};
-#define BPF_ADJ_ROOM_ENCAP_L2_MASK 0xff
-#define BPF_ADJ_ROOM_ENCAP_L2_SHIFT 56
+enum {
+ BPF_ADJ_ROOM_ENCAP_L2_MASK = 0xff,
+ BPF_ADJ_ROOM_ENCAP_L2_SHIFT = 56,
+};
-#define BPF_F_ADJ_ROOM_ENCAP_L3_IPV4 (1ULL << 1)
-#define BPF_F_ADJ_ROOM_ENCAP_L3_IPV6 (1ULL << 2)
-#define BPF_F_ADJ_ROOM_ENCAP_L4_GRE (1ULL << 3)
-#define BPF_F_ADJ_ROOM_ENCAP_L4_UDP (1ULL << 4)
#define BPF_F_ADJ_ROOM_ENCAP_L2(len) (((__u64)len & \
BPF_ADJ_ROOM_ENCAP_L2_MASK) \
<< BPF_ADJ_ROOM_ENCAP_L2_SHIFT)
/* BPF_FUNC_sysctl_get_name flags. */
-#define BPF_F_SYSCTL_BASE_NAME (1ULL << 0)
+enum {
+ BPF_F_SYSCTL_BASE_NAME = (1ULL << 0),
+};
/* BPF_FUNC_sk_storage_get flags */
-#define BPF_SK_STORAGE_GET_F_CREATE (1ULL << 0)
+enum {
+ BPF_SK_STORAGE_GET_F_CREATE = (1ULL << 0),
+};
+
+/* BPF_FUNC_read_branch_records flags. */
+enum {
+ BPF_F_GET_BRANCH_RECORDS_SIZE = (1ULL << 0),
+};
+
+/* BPF_FUNC_bpf_ringbuf_commit, BPF_FUNC_bpf_ringbuf_discard, and
+ * BPF_FUNC_bpf_ringbuf_output flags.
+ */
+enum {
+ BPF_RB_NO_WAKEUP = (1ULL << 0),
+ BPF_RB_FORCE_WAKEUP = (1ULL << 1),
+};
+
+/* BPF_FUNC_bpf_ringbuf_query flags */
+enum {
+ BPF_RB_AVAIL_DATA = 0,
+ BPF_RB_RING_SIZE = 1,
+ BPF_RB_CONS_POS = 2,
+ BPF_RB_PROD_POS = 3,
+};
+
+/* BPF ring buffer constants */
+enum {
+ BPF_RINGBUF_BUSY_BIT = (1U << 31),
+ BPF_RINGBUF_DISCARD_BIT = (1U << 30),
+ BPF_RINGBUF_HDR_SZ = 8,
+};
/* Mode for BPF_FUNC_skb_adjust_room helper. */
enum bpf_adj_room_mode {
@@ -3153,6 +3595,7 @@ struct __sk_buff {
__u32 wire_len;
__u32 gso_segs;
__bpf_md_ptr(struct bpf_sock *, sk);
+ __u32 gso_size;
};
struct bpf_tunnel_key {
@@ -3220,6 +3663,7 @@ struct bpf_sock {
__u32 dst_ip4;
__u32 dst_ip6[4];
__u32 state;
+ __s32 rx_queue_mapping;
};
struct bpf_tcp_sock {
@@ -3313,6 +3757,8 @@ struct xdp_md {
/* Below access go through struct xdp_rxq_info */
__u32 ingress_ifindex; /* rxq->dev->ifindex */
__u32 rx_queue_index; /* rxq->queue_index */
+
+ __u32 egress_ifindex; /* txq->dev->ifindex */
};
enum sk_action {
@@ -3335,6 +3781,8 @@ struct sk_msg_md {
__u32 remote_port; /* Stored in network byte order */
__u32 local_port; /* stored in host byte order */
__u32 size; /* Total size of sk_msg */
+
+ __bpf_md_ptr(struct bpf_sock *, sk); /* current socket */
};
struct sk_reuseport_md {
@@ -3425,6 +3873,29 @@ struct bpf_btf_info {
__u32 id;
} __attribute__((aligned(8)));
+struct bpf_link_info {
+ __u32 type;
+ __u32 id;
+ __u32 prog_id;
+ union {
+ struct {
+ __aligned_u64 tp_name; /* in/out: tp_name buffer ptr */
+ __u32 tp_name_len; /* in/out: tp_name buffer len */
+ } raw_tracepoint;
+ struct {
+ __u32 attach_type;
+ } tracing;
+ struct {
+ __u64 cgroup_id;
+ __u32 attach_type;
+ } cgroup;
+ struct {
+ __u32 netns_ino;
+ __u32 attach_type;
+ } netns;
+ };
+} __attribute__((aligned(8)));
+
/* User bpf_sock_addr struct to access socket fields and sockaddr struct passed
* by user and intended to be used by socket (e.g. to bind to, depends on
* attach attach type).
@@ -3437,7 +3908,7 @@ struct bpf_sock_addr {
__u32 user_ip6[4]; /* Allows 1,2,4,8-byte read and 4,8-byte write.
* Stored in network byte order.
*/
- __u32 user_port; /* Allows 4-byte read and write.
+ __u32 user_port; /* Allows 1,2,4-byte read and 4-byte write.
* Stored in network byte order
*/
__u32 family; /* Allows 4-byte read, but no write */
@@ -3505,13 +3976,14 @@ struct bpf_sock_ops {
};
/* Definitions for bpf_sock_ops_cb_flags */
-#define BPF_SOCK_OPS_RTO_CB_FLAG (1<<0)
-#define BPF_SOCK_OPS_RETRANS_CB_FLAG (1<<1)
-#define BPF_SOCK_OPS_STATE_CB_FLAG (1<<2)
-#define BPF_SOCK_OPS_RTT_CB_FLAG (1<<3)
-#define BPF_SOCK_OPS_ALL_CB_FLAGS 0xF /* Mask of all currently
- * supported cb flags
- */
+enum {
+ BPF_SOCK_OPS_RTO_CB_FLAG = (1<<0),
+ BPF_SOCK_OPS_RETRANS_CB_FLAG = (1<<1),
+ BPF_SOCK_OPS_STATE_CB_FLAG = (1<<2),
+ BPF_SOCK_OPS_RTT_CB_FLAG = (1<<3),
+/* Mask of all currently supported cb flags */
+ BPF_SOCK_OPS_ALL_CB_FLAGS = 0xF,
+};
/* List of known BPF sock_ops operators.
* New entries can only be added at the end
@@ -3590,8 +4062,10 @@ enum {
BPF_TCP_MAX_STATES /* Leave at the end! */
};
-#define TCP_BPF_IW 1001 /* Set TCP initial congestion window */
-#define TCP_BPF_SNDCWND_CLAMP 1002 /* Set sndcwnd_clamp */
+enum {
+ TCP_BPF_IW = 1001, /* Set TCP initial congestion window */
+ TCP_BPF_SNDCWND_CLAMP = 1002, /* Set sndcwnd_clamp */
+};
struct bpf_perf_event_value {
__u64 counter;
@@ -3599,12 +4073,16 @@ struct bpf_perf_event_value {
__u64 running;
};
-#define BPF_DEVCG_ACC_MKNOD (1ULL << 0)
-#define BPF_DEVCG_ACC_READ (1ULL << 1)
-#define BPF_DEVCG_ACC_WRITE (1ULL << 2)
+enum {
+ BPF_DEVCG_ACC_MKNOD = (1ULL << 0),
+ BPF_DEVCG_ACC_READ = (1ULL << 1),
+ BPF_DEVCG_ACC_WRITE = (1ULL << 2),
+};
-#define BPF_DEVCG_DEV_BLOCK (1ULL << 0)
-#define BPF_DEVCG_DEV_CHAR (1ULL << 1)
+enum {
+ BPF_DEVCG_DEV_BLOCK = (1ULL << 0),
+ BPF_DEVCG_DEV_CHAR = (1ULL << 1),
+};
struct bpf_cgroup_dev_ctx {
/* access_type encoded as (BPF_DEVCG_ACC_* << 16) | BPF_DEVCG_DEV_* */
@@ -3620,8 +4098,10 @@ struct bpf_raw_tracepoint_args {
/* DIRECT: Skip the FIB rules and go to FIB table associated with device
* OUTPUT: Do lookup from egress perspective; default is ingress
*/
-#define BPF_FIB_LOOKUP_DIRECT (1U << 0)
-#define BPF_FIB_LOOKUP_OUTPUT (1U << 1)
+enum {
+ BPF_FIB_LOOKUP_DIRECT = (1U << 0),
+ BPF_FIB_LOOKUP_OUTPUT = (1U << 1),
+};
enum {
BPF_FIB_LKUP_RET_SUCCESS, /* lookup successful */
@@ -3693,9 +4173,11 @@ enum bpf_task_fd_type {
BPF_FD_TYPE_URETPROBE, /* filename + offset */
};
-#define BPF_FLOW_DISSECTOR_F_PARSE_1ST_FRAG (1U << 0)
-#define BPF_FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL (1U << 1)
-#define BPF_FLOW_DISSECTOR_F_STOP_AT_ENCAP (1U << 2)
+enum {
+ BPF_FLOW_DISSECTOR_F_PARSE_1ST_FRAG = (1U << 0),
+ BPF_FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL = (1U << 1),
+ BPF_FLOW_DISSECTOR_F_STOP_AT_ENCAP = (1U << 2),
+};
struct bpf_flow_keys {
__u16 nhoff;
@@ -3761,4 +4243,8 @@ struct bpf_sockopt {
__s32 retval;
};
+struct bpf_pidns_info {
+ __u32 pid;
+ __u32 tgid;
+};
#endif /* _UAPI__LINUX_BPF_H__ */
diff --git a/tools/include/uapi/linux/fscrypt.h b/tools/include/uapi/linux/fscrypt.h
index 0d8a6f47711c..a10e3cdc2839 100644
--- a/tools/include/uapi/linux/fscrypt.h
+++ b/tools/include/uapi/linux/fscrypt.h
@@ -163,6 +163,7 @@ struct fscrypt_get_key_status_arg {
#define FS_IOC_REMOVE_ENCRYPTION_KEY _IOWR('f', 24, struct fscrypt_remove_key_arg)
#define FS_IOC_REMOVE_ENCRYPTION_KEY_ALL_USERS _IOWR('f', 25, struct fscrypt_remove_key_arg)
#define FS_IOC_GET_ENCRYPTION_KEY_STATUS _IOWR('f', 26, struct fscrypt_get_key_status_arg)
+#define FS_IOC_GET_ENCRYPTION_NONCE _IOR('f', 27, __u8[16])
/**********************************************************************/
diff --git a/tools/include/uapi/linux/if_link.h b/tools/include/uapi/linux/if_link.h
index 024af2d1d0af..cafedbbfefbe 100644
--- a/tools/include/uapi/linux/if_link.h
+++ b/tools/include/uapi/linux/if_link.h
@@ -343,6 +343,7 @@ enum {
IFLA_BRPORT_NEIGH_SUPPRESS,
IFLA_BRPORT_ISOLATED,
IFLA_BRPORT_BACKUP_PORT,
+ IFLA_BRPORT_MRP_RING_OPEN,
__IFLA_BRPORT_MAX
};
#define IFLA_BRPORT_MAX (__IFLA_BRPORT_MAX - 1)
@@ -463,6 +464,7 @@ enum {
IFLA_MACSEC_REPLAY_PROTECT,
IFLA_MACSEC_VALIDATION,
IFLA_MACSEC_PAD,
+ IFLA_MACSEC_OFFLOAD,
__IFLA_MACSEC_MAX,
};
@@ -489,6 +491,7 @@ enum macsec_validation_type {
enum macsec_offload {
MACSEC_OFFLOAD_OFF = 0,
MACSEC_OFFLOAD_PHY = 1,
+ MACSEC_OFFLOAD_MAC = 2,
__MACSEC_OFFLOAD_END,
MACSEC_OFFLOAD_MAX = __MACSEC_OFFLOAD_END - 1,
};
@@ -960,11 +963,12 @@ enum {
#define XDP_FLAGS_SKB_MODE (1U << 1)
#define XDP_FLAGS_DRV_MODE (1U << 2)
#define XDP_FLAGS_HW_MODE (1U << 3)
+#define XDP_FLAGS_REPLACE (1U << 4)
#define XDP_FLAGS_MODES (XDP_FLAGS_SKB_MODE | \
XDP_FLAGS_DRV_MODE | \
XDP_FLAGS_HW_MODE)
#define XDP_FLAGS_MASK (XDP_FLAGS_UPDATE_IF_NOEXIST | \
- XDP_FLAGS_MODES)
+ XDP_FLAGS_MODES | XDP_FLAGS_REPLACE)
/* These are stored into IFLA_XDP_ATTACHED on dump. */
enum {
@@ -984,6 +988,7 @@ enum {
IFLA_XDP_DRV_PROG_ID,
IFLA_XDP_SKB_PROG_ID,
IFLA_XDP_HW_PROG_ID,
+ IFLA_XDP_EXPECTED_FD,
__IFLA_XDP_MAX,
};
diff --git a/tools/include/uapi/linux/kvm.h b/tools/include/uapi/linux/kvm.h
index 4b95f9a31a2f..fdd632c833b4 100644
--- a/tools/include/uapi/linux/kvm.h
+++ b/tools/include/uapi/linux/kvm.h
@@ -116,7 +116,7 @@ struct kvm_irq_level {
* ACPI gsi notion of irq.
* For IA-64 (APIC model) IOAPIC0: irq 0-23; IOAPIC1: irq 24-47..
* For X86 (standard AT mode) PIC0/1: irq 0-15. IOAPIC0: 0-23..
- * For ARM: See Documentation/virt/kvm/api.txt
+ * For ARM: See Documentation/virt/kvm/api.rst
*/
union {
__u32 irq;
@@ -474,12 +474,17 @@ struct kvm_s390_mem_op {
__u32 size; /* amount of bytes */
__u32 op; /* type of operation */
__u64 buf; /* buffer in userspace */
- __u8 ar; /* the access register number */
- __u8 reserved[31]; /* should be set to 0 */
+ union {
+ __u8 ar; /* the access register number */
+ __u32 sida_offset; /* offset into the sida */
+ __u8 reserved[32]; /* should be set to 0 */
+ };
};
/* types for kvm_s390_mem_op->op */
#define KVM_S390_MEMOP_LOGICAL_READ 0
#define KVM_S390_MEMOP_LOGICAL_WRITE 1
+#define KVM_S390_MEMOP_SIDA_READ 2
+#define KVM_S390_MEMOP_SIDA_WRITE 3
/* flags for kvm_s390_mem_op->flags */
#define KVM_S390_MEMOP_F_CHECK_ONLY (1ULL << 0)
#define KVM_S390_MEMOP_F_INJECT_EXCEPTION (1ULL << 1)
@@ -1010,6 +1015,8 @@ struct kvm_ppc_resize_hpt {
#define KVM_CAP_ARM_NISV_TO_USER 177
#define KVM_CAP_ARM_INJECT_EXT_DABT 178
#define KVM_CAP_S390_VCPU_RESETS 179
+#define KVM_CAP_S390_PROTECTED 180
+#define KVM_CAP_PPC_SECURE_GUEST 181
#ifdef KVM_CAP_IRQ_ROUTING
@@ -1100,7 +1107,7 @@ struct kvm_xen_hvm_config {
*
* KVM_IRQFD_FLAG_RESAMPLE indicates resamplefd is valid and specifies
* the irqfd to operate in resampling mode for level triggered interrupt
- * emulation. See Documentation/virt/kvm/api.txt.
+ * emulation. See Documentation/virt/kvm/api.rst.
*/
#define KVM_IRQFD_FLAG_RESAMPLE (1 << 1)
@@ -1478,6 +1485,39 @@ struct kvm_enc_region {
#define KVM_S390_NORMAL_RESET _IO(KVMIO, 0xc3)
#define KVM_S390_CLEAR_RESET _IO(KVMIO, 0xc4)
+struct kvm_s390_pv_sec_parm {
+ __u64 origin;
+ __u64 length;
+};
+
+struct kvm_s390_pv_unp {
+ __u64 addr;
+ __u64 size;
+ __u64 tweak;
+};
+
+enum pv_cmd_id {
+ KVM_PV_ENABLE,
+ KVM_PV_DISABLE,
+ KVM_PV_SET_SEC_PARMS,
+ KVM_PV_UNPACK,
+ KVM_PV_VERIFY,
+ KVM_PV_PREP_RESET,
+ KVM_PV_UNSHARE_ALL,
+};
+
+struct kvm_pv_cmd {
+ __u32 cmd; /* Command to be executed */
+ __u16 rc; /* Ultravisor return code */
+ __u16 rrc; /* Ultravisor return reason code */
+ __u64 data; /* Data or address */
+ __u32 flags; /* flags for future extensions. Must be 0 for now */
+ __u32 reserved[3];
+};
+
+/* Available with KVM_CAP_S390_PROTECTED */
+#define KVM_S390_PV_COMMAND _IOWR(KVMIO, 0xc5, struct kvm_pv_cmd)
+
/* Secure Encrypted Virtualization command */
enum sev_cmd_id {
/* Guest initialization commands */
@@ -1628,4 +1668,7 @@ struct kvm_hyperv_eventfd {
#define KVM_HYPERV_CONN_ID_MASK 0x00ffffff
#define KVM_HYPERV_EVENTFD_DEASSIGN (1 << 0)
+#define KVM_DIRTY_LOG_MANUAL_PROTECT_ENABLE (1 << 0)
+#define KVM_DIRTY_LOG_INITIALLY_SET (1 << 1)
+
#endif /* __LINUX_KVM_H */
diff --git a/tools/include/uapi/linux/mman.h b/tools/include/uapi/linux/mman.h
index fc1a64c3447b..923cc162609c 100644
--- a/tools/include/uapi/linux/mman.h
+++ b/tools/include/uapi/linux/mman.h
@@ -5,8 +5,9 @@
#include <asm/mman.h>
#include <asm-generic/hugetlb_encode.h>
-#define MREMAP_MAYMOVE 1
-#define MREMAP_FIXED 2
+#define MREMAP_MAYMOVE 1
+#define MREMAP_FIXED 2
+#define MREMAP_DONTUNMAP 4
#define OVERCOMMIT_GUESS 0
#define OVERCOMMIT_ALWAYS 1
diff --git a/tools/include/uapi/linux/perf_event.h b/tools/include/uapi/linux/perf_event.h
index 377d794d3105..7b2d6fc9e6ed 100644
--- a/tools/include/uapi/linux/perf_event.h
+++ b/tools/include/uapi/linux/perf_event.h
@@ -142,8 +142,9 @@ enum perf_event_sample_format {
PERF_SAMPLE_REGS_INTR = 1U << 18,
PERF_SAMPLE_PHYS_ADDR = 1U << 19,
PERF_SAMPLE_AUX = 1U << 20,
+ PERF_SAMPLE_CGROUP = 1U << 21,
- PERF_SAMPLE_MAX = 1U << 21, /* non-ABI */
+ PERF_SAMPLE_MAX = 1U << 22, /* non-ABI */
__PERF_SAMPLE_CALLCHAIN_EARLY = 1ULL << 63, /* non-ABI; internal use */
};
@@ -181,6 +182,8 @@ enum perf_branch_sample_type_shift {
PERF_SAMPLE_BRANCH_TYPE_SAVE_SHIFT = 16, /* save branch type */
+ PERF_SAMPLE_BRANCH_HW_INDEX_SHIFT = 17, /* save low level index of raw branch records */
+
PERF_SAMPLE_BRANCH_MAX_SHIFT /* non-ABI */
};
@@ -208,6 +211,8 @@ enum perf_branch_sample_type {
PERF_SAMPLE_BRANCH_TYPE_SAVE =
1U << PERF_SAMPLE_BRANCH_TYPE_SAVE_SHIFT,
+ PERF_SAMPLE_BRANCH_HW_INDEX = 1U << PERF_SAMPLE_BRANCH_HW_INDEX_SHIFT,
+
PERF_SAMPLE_BRANCH_MAX = 1U << PERF_SAMPLE_BRANCH_MAX_SHIFT,
};
@@ -377,7 +382,8 @@ struct perf_event_attr {
ksymbol : 1, /* include ksymbol events */
bpf_event : 1, /* include bpf events */
aux_output : 1, /* generate AUX records instead of events */
- __reserved_1 : 32;
+ cgroup : 1, /* include cgroup events */
+ __reserved_1 : 31;
union {
__u32 wakeup_events; /* wakeup every n events */
@@ -853,7 +859,9 @@ enum perf_event_type {
* char data[size];}&& PERF_SAMPLE_RAW
*
* { u64 nr;
- * { u64 from, to, flags } lbr[nr];} && PERF_SAMPLE_BRANCH_STACK
+ * { u64 hw_idx; } && PERF_SAMPLE_BRANCH_HW_INDEX
+ * { u64 from, to, flags } lbr[nr];
+ * } && PERF_SAMPLE_BRANCH_STACK
*
* { u64 abi; # enum perf_sample_regs_abi
* u64 regs[weight(mask)]; } && PERF_SAMPLE_REGS_USER
@@ -1006,6 +1014,16 @@ enum perf_event_type {
*/
PERF_RECORD_BPF_EVENT = 18,
+ /*
+ * struct {
+ * struct perf_event_header header;
+ * u64 id;
+ * char path[];
+ * struct sample_id sample_id;
+ * };
+ */
+ PERF_RECORD_CGROUP = 19,
+
PERF_RECORD_MAX, /* non-ABI */
};
diff --git a/tools/include/uapi/linux/sched.h b/tools/include/uapi/linux/sched.h
index 2e3bc22c6f20..3bac0a8ceab2 100644
--- a/tools/include/uapi/linux/sched.h
+++ b/tools/include/uapi/linux/sched.h
@@ -35,6 +35,7 @@
/* Flags for the clone3() syscall. */
#define CLONE_CLEAR_SIGHAND 0x100000000ULL /* Clear any signal handler and reset to SIG_DFL. */
+#define CLONE_INTO_CGROUP 0x200000000ULL /* Clone into a specific cgroup given the right permissions. */
/*
* cloning flags intersect with CSIGNAL so can be used with unshare and clone3
@@ -81,6 +82,8 @@
* @set_tid_size: This defines the size of the array referenced
* in @set_tid. This cannot be larger than the
* kernel's limit of nested PID namespaces.
+ * @cgroup: If CLONE_INTO_CGROUP is specified set this to
+ * a file descriptor for the cgroup.
*
* The structure is versioned by size and thus extensible.
* New struct members must go at the end of the struct and
@@ -97,11 +100,13 @@ struct clone_args {
__aligned_u64 tls;
__aligned_u64 set_tid;
__aligned_u64 set_tid_size;
+ __aligned_u64 cgroup;
};
#endif
#define CLONE_ARGS_SIZE_VER0 64 /* sizeof first published struct */
#define CLONE_ARGS_SIZE_VER1 80 /* sizeof second published struct */
+#define CLONE_ARGS_SIZE_VER2 88 /* sizeof third published struct */
/*
* Scheduling policies
diff --git a/tools/include/uapi/linux/stat.h b/tools/include/uapi/linux/stat.h
index ad80a5c885d5..d1192783139a 100644
--- a/tools/include/uapi/linux/stat.h
+++ b/tools/include/uapi/linux/stat.h
@@ -148,9 +148,18 @@ struct statx {
#define STATX_BLOCKS 0x00000400U /* Want/got stx_blocks */
#define STATX_BASIC_STATS 0x000007ffU /* The stuff in the normal stat struct */
#define STATX_BTIME 0x00000800U /* Want/got stx_btime */
-#define STATX_ALL 0x00000fffU /* All currently supported flags */
+
#define STATX__RESERVED 0x80000000U /* Reserved for future struct statx expansion */
+#ifndef __KERNEL__
+/*
+ * This is deprecated, and shall remain the same value in the future. To avoid
+ * confusion please use the equivalent (STATX_BASIC_STATS | STATX_BTIME)
+ * instead.
+ */
+#define STATX_ALL 0x00000fffU
+#endif
+
/*
* Attributes to be found in stx_attributes and masked in stx_attributes_mask.
*
diff --git a/tools/testing/selftests/bpf/include/uapi/linux/types.h b/tools/include/uapi/linux/types.h
index 91fa51a9c31d..91fa51a9c31d 100644
--- a/tools/testing/selftests/bpf/include/uapi/linux/types.h
+++ b/tools/include/uapi/linux/types.h
diff --git a/tools/include/uapi/linux/vhost.h b/tools/include/uapi/linux/vhost.h
index 40d028eed645..9fe72e4b1373 100644
--- a/tools/include/uapi/linux/vhost.h
+++ b/tools/include/uapi/linux/vhost.h
@@ -116,4 +116,28 @@
#define VHOST_VSOCK_SET_GUEST_CID _IOW(VHOST_VIRTIO, 0x60, __u64)
#define VHOST_VSOCK_SET_RUNNING _IOW(VHOST_VIRTIO, 0x61, int)
+/* VHOST_VDPA specific defines */
+
+/* Get the device id. The device ids follow the same definition of
+ * the device id defined in virtio-spec.
+ */
+#define VHOST_VDPA_GET_DEVICE_ID _IOR(VHOST_VIRTIO, 0x70, __u32)
+/* Get and set the status. The status bits follow the same definition
+ * of the device status defined in virtio-spec.
+ */
+#define VHOST_VDPA_GET_STATUS _IOR(VHOST_VIRTIO, 0x71, __u8)
+#define VHOST_VDPA_SET_STATUS _IOW(VHOST_VIRTIO, 0x72, __u8)
+/* Get and set the device config. The device config follows the same
+ * definition of the device config defined in virtio-spec.
+ */
+#define VHOST_VDPA_GET_CONFIG _IOR(VHOST_VIRTIO, 0x73, \
+ struct vhost_vdpa_config)
+#define VHOST_VDPA_SET_CONFIG _IOW(VHOST_VIRTIO, 0x74, \
+ struct vhost_vdpa_config)
+/* Enable/disable the ring. */
+#define VHOST_VDPA_SET_VRING_ENABLE _IOW(VHOST_VIRTIO, 0x75, \
+ struct vhost_vring_state)
+/* Get the max ring size. */
+#define VHOST_VDPA_GET_VRING_NUM _IOR(VHOST_VIRTIO, 0x76, __u16)
+
#endif
diff --git a/tools/include/vdso/bits.h b/tools/include/vdso/bits.h
new file mode 100644
index 000000000000..6d005a1f5d94
--- /dev/null
+++ b/tools/include/vdso/bits.h
@@ -0,0 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __VDSO_BITS_H
+#define __VDSO_BITS_H
+
+#include <vdso/const.h>
+
+#define BIT(nr) (UL(1) << (nr))
+
+#endif /* __VDSO_BITS_H */
diff --git a/tools/include/vdso/const.h b/tools/include/vdso/const.h
new file mode 100644
index 000000000000..94b385ad438d
--- /dev/null
+++ b/tools/include/vdso/const.h
@@ -0,0 +1,10 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __VDSO_CONST_H
+#define __VDSO_CONST_H
+
+#include <uapi/linux/const.h>
+
+#define UL(x) (_UL(x))
+#define ULL(x) (_ULL(x))
+
+#endif /* __VDSO_CONST_H */
diff --git a/tools/kvm/kvm_stat/kvm_stat b/tools/kvm/kvm_stat/kvm_stat
index 4cf93110c259..d199a3694be8 100755
--- a/tools/kvm/kvm_stat/kvm_stat
+++ b/tools/kvm/kvm_stat/kvm_stat
@@ -25,14 +25,17 @@ import sys
import locale
import os
import time
-import optparse
+import argparse
import ctypes
import fcntl
import resource
import struct
import re
import subprocess
+import signal
from collections import defaultdict, namedtuple
+from functools import reduce
+from datetime import datetime
VMX_EXIT_REASONS = {
'EXCEPTION_NMI': 0,
@@ -226,6 +229,8 @@ IOCTL_NUMBERS = {
'RESET': 0x00002403,
}
+signal_received = False
+
ENCODING = locale.getpreferredencoding(False)
TRACE_FILTER = re.compile(r'^[^\(]*$')
@@ -873,7 +878,7 @@ class Stats(object):
if options.debugfs:
providers.append(DebugfsProvider(options.pid, options.fields,
- options.dbgfs_include_past))
+ options.debugfs_include_past))
if options.tracepoints or not providers:
providers.append(TracepointProvider(options.pid, options.fields))
@@ -974,15 +979,17 @@ DELAY_DEFAULT = 3.0
MAX_GUEST_NAME_LEN = 48
MAX_REGEX_LEN = 44
SORT_DEFAULT = 0
+MIN_DELAY = 0.1
+MAX_DELAY = 25.5
class Tui(object):
"""Instruments curses to draw a nice text ui."""
- def __init__(self, stats):
+ def __init__(self, stats, opts):
self.stats = stats
self.screen = None
self._delay_initial = 0.25
- self._delay_regular = DELAY_DEFAULT
+ self._delay_regular = opts.set_delay
self._sorting = SORT_DEFAULT
self._display_guests = 0
@@ -1183,7 +1190,7 @@ class Tui(object):
if not self._is_running_guest(self.stats.pid_filter):
if self._gname:
- try: # ...to identify the guest by name in case it's back
+ try: # ...to identify the guest by name in case it's back
pids = self.get_pid_from_gname(self._gname)
if len(pids) == 1:
self._refresh_header(pids[0])
@@ -1282,7 +1289,8 @@ class Tui(object):
' p filter by guest name/PID',
' q quit',
' r reset stats',
- ' s set update interval',
+ ' s set delay between refreshs (value range: '
+ '%s-%s secs)' % (MIN_DELAY, MAX_DELAY),
' x toggle reporting of stats for individual child trace'
' events',
'Any other key refreshes statistics immediately')
@@ -1336,8 +1344,8 @@ class Tui(object):
msg = ''
while True:
self.screen.erase()
- self.screen.addstr(0, 0, 'Set update interval (defaults to %.1fs).' %
- DELAY_DEFAULT, curses.A_BOLD)
+ self.screen.addstr(0, 0, 'Set update interval (defaults to %.1fs).'
+ % DELAY_DEFAULT, curses.A_BOLD)
self.screen.addstr(4, 0, msg)
self.screen.addstr(2, 0, 'Change delay from %.1fs to ' %
self._delay_regular)
@@ -1348,11 +1356,9 @@ class Tui(object):
try:
if len(val) > 0:
delay = float(val)
- if delay < 0.1:
- msg = '"' + str(val) + '": Value must be >=0.1'
- continue
- if delay > 25.5:
- msg = '"' + str(val) + '": Value must be <=25.5'
+ err = is_delay_valid(delay)
+ if err is not None:
+ msg = err
continue
else:
delay = DELAY_DEFAULT
@@ -1488,32 +1494,106 @@ def batch(stats):
pass
-def log(stats):
- """Prints statistics as reiterating key block, multiple value blocks."""
- keys = sorted(stats.get().keys())
-
- def banner():
+class StdFormat(object):
+ def __init__(self, keys):
+ self._banner = ''
for key in keys:
- print(key.split(' ')[0], end=' ')
- print()
+ self._banner += key.split(' ')[0] + ' '
- def statline():
- s = stats.get()
+ def get_banner(self):
+ return self._banner
+
+ def get_statline(self, keys, s):
+ res = ''
for key in keys:
- print(' %9d' % s[key].delta, end=' ')
- print()
+ res += ' %9d' % s[key].delta
+ return res
+
+
+class CSVFormat(object):
+ def __init__(self, keys):
+ self._banner = 'timestamp'
+ self._banner += reduce(lambda res, key: "{},{!s}".format(res,
+ key.split(' ')[0]), keys, '')
+
+ def get_banner(self):
+ return self._banner
+
+ def get_statline(self, keys, s):
+ return reduce(lambda res, key: "{},{!s}".format(res, s[key].delta),
+ keys, '')
+
+
+def log(stats, opts, frmt, keys):
+ """Prints statistics as reiterating key block, multiple value blocks."""
+ global signal_received
line = 0
banner_repeat = 20
+ f = None
+
+ def do_banner(opts):
+ nonlocal f
+ if opts.log_to_file:
+ if not f:
+ try:
+ f = open(opts.log_to_file, 'a')
+ except (IOError, OSError):
+ sys.exit("Error: Could not open file: %s" %
+ opts.log_to_file)
+ if isinstance(frmt, CSVFormat) and f.tell() != 0:
+ return
+ print(frmt.get_banner(), file=f or sys.stdout)
+
+ def do_statline(opts, values):
+ statline = datetime.now().strftime("%Y-%m-%d %H:%M:%S") + \
+ frmt.get_statline(keys, values)
+ print(statline, file=f or sys.stdout)
+
+ do_banner(opts)
+ banner_printed = True
while True:
try:
- time.sleep(1)
- if line % banner_repeat == 0:
- banner()
- statline()
- line += 1
+ time.sleep(opts.set_delay)
+ if signal_received:
+ banner_printed = True
+ line = 0
+ f.close()
+ do_banner(opts)
+ signal_received = False
+ if (line % banner_repeat == 0 and not banner_printed and
+ not (opts.log_to_file and isinstance(frmt, CSVFormat))):
+ do_banner(opts)
+ banner_printed = True
+ values = stats.get()
+ if (not opts.skip_zero_records or
+ any(values[k].delta != 0 for k in keys)):
+ do_statline(opts, values)
+ line += 1
+ banner_printed = False
except KeyboardInterrupt:
break
+ if opts.log_to_file:
+ f.close()
+
+
+def handle_signal(sig, frame):
+ global signal_received
+
+ signal_received = True
+
+ return
+
+
+def is_delay_valid(delay):
+ """Verify delay is in valid value range."""
+ msg = None
+ if delay < MIN_DELAY:
+ msg = '"' + str(delay) + '": Delay must be >=%s' % MIN_DELAY
+ if delay > MAX_DELAY:
+ msg = '"' + str(delay) + '": Delay must be <=%s' % MAX_DELAY
+ return msg
+
def get_options():
"""Returns processed program arguments."""
@@ -1545,89 +1625,97 @@ Interactive Commands:
p filter by PID
q quit
r reset stats
- s set update interval
+ s set update interval (value range: 0.1-25.5 secs)
x toggle reporting of stats for individual child trace events
Press any other key to refresh statistics immediately.
""" % (PATH_DEBUGFS_KVM, PATH_DEBUGFS_TRACING)
- class PlainHelpFormatter(optparse.IndentedHelpFormatter):
- def format_description(self, description):
- if description:
- return description + "\n"
- else:
- return ""
-
- def cb_guest_to_pid(option, opt, val, parser):
- try:
- pids = Tui.get_pid_from_gname(val)
- except:
- sys.exit('Error while searching for guest "{}". Use "-p" to '
- 'specify a pid instead?'.format(val))
- if len(pids) == 0:
- sys.exit('Error: No guest by the name "{}" found'.format(val))
- if len(pids) > 1:
- sys.exit('Error: Multiple processes found (pids: {}). Use "-p" '
- 'to specify the desired pid'.format(" ".join(pids)))
- parser.values.pid = pids[0]
-
- optparser = optparse.OptionParser(description=description_text,
- formatter=PlainHelpFormatter())
- optparser.add_option('-1', '--once', '--batch',
- action='store_true',
- default=False,
- dest='once',
- help='run in batch mode for one second',
- )
- optparser.add_option('-i', '--debugfs-include-past',
- action='store_true',
- default=False,
- dest='dbgfs_include_past',
- help='include all available data on past events for '
- 'debugfs',
- )
- optparser.add_option('-l', '--log',
- action='store_true',
- default=False,
- dest='log',
- help='run in logging mode (like vmstat)',
- )
- optparser.add_option('-t', '--tracepoints',
- action='store_true',
- default=False,
- dest='tracepoints',
- help='retrieve statistics from tracepoints',
- )
- optparser.add_option('-d', '--debugfs',
- action='store_true',
- default=False,
- dest='debugfs',
- help='retrieve statistics from debugfs',
- )
- optparser.add_option('-f', '--fields',
- action='store',
- default='',
- dest='fields',
- help='''fields to display (regex)
- "-f help" for a list of available events''',
- )
- optparser.add_option('-p', '--pid',
- action='store',
- default=0,
- type='int',
- dest='pid',
- help='restrict statistics to pid',
- )
- optparser.add_option('-g', '--guest',
- action='callback',
- type='string',
- dest='pid',
- metavar='GUEST',
- help='restrict statistics to guest by name',
- callback=cb_guest_to_pid,
- )
- options, unkn = optparser.parse_args(sys.argv)
- if len(unkn) != 1:
- sys.exit('Error: Extra argument(s): ' + ' '.join(unkn[1:]))
+ class Guest_to_pid(argparse.Action):
+ def __call__(self, parser, namespace, values, option_string=None):
+ try:
+ pids = Tui.get_pid_from_gname(values)
+ except:
+ sys.exit('Error while searching for guest "{}". Use "-p" to '
+ 'specify a pid instead?'.format(values))
+ if len(pids) == 0:
+ sys.exit('Error: No guest by the name "{}" found'
+ .format(values))
+ if len(pids) > 1:
+ sys.exit('Error: Multiple processes found (pids: {}). Use "-p"'
+ ' to specify the desired pid'.format(" ".join(pids)))
+ namespace.pid = pids[0]
+
+ argparser = argparse.ArgumentParser(description=description_text,
+ formatter_class=argparse
+ .RawTextHelpFormatter)
+ argparser.add_argument('-1', '--once', '--batch',
+ action='store_true',
+ default=False,
+ help='run in batch mode for one second',
+ )
+ argparser.add_argument('-c', '--csv',
+ action='store_true',
+ default=False,
+ help='log in csv format - requires option -l/-L',
+ )
+ argparser.add_argument('-d', '--debugfs',
+ action='store_true',
+ default=False,
+ help='retrieve statistics from debugfs',
+ )
+ argparser.add_argument('-f', '--fields',
+ default='',
+ help='''fields to display (regex)
+"-f help" for a list of available events''',
+ )
+ argparser.add_argument('-g', '--guest',
+ type=str,
+ help='restrict statistics to guest by name',
+ action=Guest_to_pid,
+ )
+ argparser.add_argument('-i', '--debugfs-include-past',
+ action='store_true',
+ default=False,
+ help='include all available data on past events for'
+ ' debugfs',
+ )
+ argparser.add_argument('-l', '--log',
+ action='store_true',
+ default=False,
+ help='run in logging mode (like vmstat)',
+ )
+ argparser.add_argument('-L', '--log-to-file',
+ type=str,
+ metavar='FILE',
+ help="like '--log', but logging to a file"
+ )
+ argparser.add_argument('-p', '--pid',
+ type=int,
+ default=0,
+ help='restrict statistics to pid',
+ )
+ argparser.add_argument('-s', '--set-delay',
+ type=float,
+ default=DELAY_DEFAULT,
+ metavar='DELAY',
+ help='set delay between refreshs (value range: '
+ '%s-%s secs)' % (MIN_DELAY, MAX_DELAY),
+ )
+ argparser.add_argument('-t', '--tracepoints',
+ action='store_true',
+ default=False,
+ help='retrieve statistics from tracepoints',
+ )
+ argparser.add_argument('-z', '--skip-zero-records',
+ action='store_true',
+ default=False,
+ help='omit records with all zeros in logging mode',
+ )
+ options = argparser.parse_args()
+ if options.csv and not (options.log or options.log_to_file):
+ sys.exit('Error: Option -c/--csv requires -l/--log')
+ if options.skip_zero_records and not (options.log or options.log_to_file):
+ sys.exit('Error: Option -z/--skip-zero-records requires -l/-L')
try:
# verify that we were passed a valid regex up front
re.compile(options.fields)
@@ -1693,6 +1781,10 @@ def main():
sys.stderr.write('Did you use a (unsupported) tid instead of a pid?\n')
sys.exit('Specified pid does not exist.')
+ err = is_delay_valid(options.set_delay)
+ if err is not None:
+ sys.exit('Error: ' + err)
+
stats = Stats(options)
if options.fields == 'help':
@@ -1703,13 +1795,21 @@ def main():
sys.stdout.write(' ' + '\n '.join(sorted(set(event_list))) + '\n')
sys.exit(0)
- if options.log:
- log(stats)
+ if options.log or options.log_to_file:
+ if options.log_to_file:
+ signal.signal(signal.SIGHUP, handle_signal)
+ keys = sorted(stats.get().keys())
+ if options.csv:
+ frmt = CSVFormat(keys)
+ else:
+ frmt = StdFormat(keys)
+ log(stats, options, frmt, keys)
elif not options.once:
- with Tui(stats) as tui:
+ with Tui(stats, options) as tui:
tui.show_stats()
else:
batch(stats)
+
if __name__ == "__main__":
main()
diff --git a/tools/kvm/kvm_stat/kvm_stat.service b/tools/kvm/kvm_stat/kvm_stat.service
new file mode 100644
index 000000000000..71aabaffe779
--- /dev/null
+++ b/tools/kvm/kvm_stat/kvm_stat.service
@@ -0,0 +1,16 @@
+# SPDX-License-Identifier: GPL-2.0-only
+
+[Unit]
+Description=Service that logs KVM kernel module trace events
+Before=qemu-kvm.service
+
+[Service]
+Type=simple
+ExecStart=/usr/bin/kvm_stat -dtcz -s 10 -L /var/log/kvm_stat.csv
+ExecReload=/bin/kill -HUP $MAINPID
+Restart=always
+SyslogIdentifier=kvm_stat
+SyslogLevel=debug
+
+[Install]
+WantedBy=multi-user.target
diff --git a/tools/kvm/kvm_stat/kvm_stat.txt b/tools/kvm/kvm_stat/kvm_stat.txt
index c057ba52364e..feaf46451e83 100644
--- a/tools/kvm/kvm_stat/kvm_stat.txt
+++ b/tools/kvm/kvm_stat/kvm_stat.txt
@@ -49,7 +49,7 @@ INTERACTIVE COMMANDS
*r*:: reset stats
-*s*:: set update interval
+*s*:: set delay between refreshs
*x*:: toggle reporting of stats for child trace events
:: *Note*: The stats for the parents summarize the respective child trace
@@ -64,37 +64,56 @@ OPTIONS
--batch::
run in batch mode for one second
--l::
---log::
- run in logging mode (like vmstat)
-
--t::
---tracepoints::
- retrieve statistics from tracepoints
+-c::
+--csv::
+ log in csv format. Requires option -l/--log or -L/--log-to-file.
+ When used with option -L/--log-to-file, the header is only ever
+ written to start of file to preserve the format.
-d::
--debugfs::
retrieve statistics from debugfs
+-f<fields>::
+--fields=<fields>::
+ fields to display (regex), "-f help" for a list of available events
+
+-g<guest>::
+--guest=<guest_name>::
+ limit statistics to one virtual machine (guest name)
+
+-h::
+--help::
+ show help message
+
-i::
--debugfs-include-past::
include all available data on past events for debugfs
+-l::
+--log::
+ run in logging mode (like vmstat)
+
+
+-L<file>::
+--log-to-file=<file>::
+ like -l/--log, but logging to a file. Appends to existing files.
+
-p<pid>::
--pid=<pid>::
limit statistics to one virtual machine (pid)
--g<guest>::
---guest=<guest_name>::
- limit statistics to one virtual machine (guest name)
+-s::
+--set-delay::
+ set delay between refreshs (value range: 0.1-25.5 secs)
--f<fields>::
---fields=<fields>::
- fields to display (regex), "-f help" for a list of available events
+-t::
+--tracepoints::
+ retrieve statistics from tracepoints
--h::
---help::
- show help message
+*z*::
+--skip-zero-records::
+ omit records with all zeros in logging mode
SEE ALSO
--------
diff --git a/tools/laptop/dslm/.gitignore b/tools/laptop/dslm/.gitignore
index 9fc984e64386..f7f1296b96ae 100644
--- a/tools/laptop/dslm/.gitignore
+++ b/tools/laptop/dslm/.gitignore
@@ -1 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0-only
dslm
diff --git a/tools/laptop/freefall/freefall.c b/tools/laptop/freefall/freefall.c
index d29a86cda87f..d77d7861787c 100644
--- a/tools/laptop/freefall/freefall.c
+++ b/tools/laptop/freefall/freefall.c
@@ -4,7 +4,7 @@
* Copyright 2008 Eric Piel
* Copyright 2009 Pavel Machek <pavel@ucw.cz>
* Copyright 2012 Sonal Santan
- * Copyright 2014 Pali Rohár <pali.rohar@gmail.com>
+ * Copyright 2014 Pali Rohár <pali@kernel.org>
*/
#include <stdio.h>
diff --git a/tools/leds/.gitignore b/tools/leds/.gitignore
index ac96d9f53dfc..06bd3ee1b7c9 100644
--- a/tools/leds/.gitignore
+++ b/tools/leds/.gitignore
@@ -1 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0-only
uledmon
diff --git a/tools/lib/api/fs/Build b/tools/lib/api/fs/Build
index f4ed9629ae85..0f75b28654de 100644
--- a/tools/lib/api/fs/Build
+++ b/tools/lib/api/fs/Build
@@ -1,2 +1,3 @@
libapi-y += fs.o
libapi-y += tracing_path.o
+libapi-y += cgroup.o
diff --git a/tools/lib/api/fs/cgroup.c b/tools/lib/api/fs/cgroup.c
new file mode 100644
index 000000000000..889a6eb4aaca
--- /dev/null
+++ b/tools/lib/api/fs/cgroup.c
@@ -0,0 +1,67 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/stringify.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include "fs.h"
+
+int cgroupfs_find_mountpoint(char *buf, size_t maxlen, const char *subsys)
+{
+ FILE *fp;
+ char mountpoint[PATH_MAX + 1], tokens[PATH_MAX + 1], type[PATH_MAX + 1];
+ char path_v1[PATH_MAX + 1], path_v2[PATH_MAX + 2], *path;
+ char *token, *saved_ptr = NULL;
+
+ fp = fopen("/proc/mounts", "r");
+ if (!fp)
+ return -1;
+
+ /*
+ * in order to handle split hierarchy, we need to scan /proc/mounts
+ * and inspect every cgroupfs mount point to find one that has
+ * perf_event subsystem
+ */
+ path_v1[0] = '\0';
+ path_v2[0] = '\0';
+
+ while (fscanf(fp, "%*s %"__stringify(PATH_MAX)"s %"__stringify(PATH_MAX)"s %"
+ __stringify(PATH_MAX)"s %*d %*d\n",
+ mountpoint, type, tokens) == 3) {
+
+ if (!path_v1[0] && !strcmp(type, "cgroup")) {
+
+ token = strtok_r(tokens, ",", &saved_ptr);
+
+ while (token != NULL) {
+ if (subsys && !strcmp(token, subsys)) {
+ strcpy(path_v1, mountpoint);
+ break;
+ }
+ token = strtok_r(NULL, ",", &saved_ptr);
+ }
+ }
+
+ if (!path_v2[0] && !strcmp(type, "cgroup2"))
+ strcpy(path_v2, mountpoint);
+
+ if (path_v1[0] && path_v2[0])
+ break;
+ }
+ fclose(fp);
+
+ if (path_v1[0])
+ path = path_v1;
+ else if (path_v2[0])
+ path = path_v2;
+ else
+ return -1;
+
+ if (strlen(path) < maxlen) {
+ strcpy(buf, path);
+ return 0;
+ }
+ return -1;
+}
diff --git a/tools/lib/api/fs/fs.c b/tools/lib/api/fs/fs.c
index 027b18f7ed8c..82f53d81a7a7 100644
--- a/tools/lib/api/fs/fs.c
+++ b/tools/lib/api/fs/fs.c
@@ -90,6 +90,7 @@ struct fs {
const char * const *mounts;
char path[PATH_MAX];
bool found;
+ bool checked;
long magic;
};
@@ -111,31 +112,37 @@ static struct fs fs__entries[] = {
.name = "sysfs",
.mounts = sysfs__fs_known_mountpoints,
.magic = SYSFS_MAGIC,
+ .checked = false,
},
[FS__PROCFS] = {
.name = "proc",
.mounts = procfs__known_mountpoints,
.magic = PROC_SUPER_MAGIC,
+ .checked = false,
},
[FS__DEBUGFS] = {
.name = "debugfs",
.mounts = debugfs__known_mountpoints,
.magic = DEBUGFS_MAGIC,
+ .checked = false,
},
[FS__TRACEFS] = {
.name = "tracefs",
.mounts = tracefs__known_mountpoints,
.magic = TRACEFS_MAGIC,
+ .checked = false,
},
[FS__HUGETLBFS] = {
.name = "hugetlbfs",
.mounts = hugetlbfs__known_mountpoints,
.magic = HUGETLBFS_MAGIC,
+ .checked = false,
},
[FS__BPF_FS] = {
.name = "bpf",
.mounts = bpf_fs__known_mountpoints,
.magic = BPF_FS_MAGIC,
+ .checked = false,
},
};
@@ -158,6 +165,7 @@ static bool fs__read_mounts(struct fs *fs)
}
fclose(fp);
+ fs->checked = true;
return fs->found = found;
}
@@ -220,6 +228,7 @@ static bool fs__env_override(struct fs *fs)
return false;
fs->found = true;
+ fs->checked = true;
strncpy(fs->path, override_path, sizeof(fs->path) - 1);
fs->path[sizeof(fs->path) - 1] = '\0';
return true;
@@ -246,6 +255,14 @@ static const char *fs__mountpoint(int idx)
if (fs->found)
return (const char *)fs->path;
+ /* the mount point was already checked for the mount point
+ * but and did not exist, so return NULL to avoid scanning again.
+ * This makes the found and not found paths cost equivalent
+ * in case of multiple calls.
+ */
+ if (fs->checked)
+ return NULL;
+
return fs__get_mountpoint(fs);
}
diff --git a/tools/lib/api/fs/fs.h b/tools/lib/api/fs/fs.h
index 92d03b8396b1..aa222ca30311 100644
--- a/tools/lib/api/fs/fs.h
+++ b/tools/lib/api/fs/fs.h
@@ -18,6 +18,18 @@
const char *name##__mount(void); \
bool name##__configured(void); \
+/*
+ * The xxxx__mountpoint() entry points find the first match mount point for each
+ * filesystems listed below, where xxxx is the filesystem type.
+ *
+ * The interface is as follows:
+ *
+ * - If a mount point is found on first call, it is cached and used for all
+ * subsequent calls.
+ *
+ * - If a mount point is not found, NULL is returned on first call and all
+ * subsequent calls.
+ */
FS(sysfs)
FS(procfs)
FS(debugfs)
@@ -28,6 +40,8 @@ FS(bpf_fs)
#undef FS
+int cgroupfs_find_mountpoint(char *buf, size_t maxlen, const char *subsys);
+
int filename__read_int(const char *filename, int *value);
int filename__read_ull(const char *filename, unsigned long long *value);
int filename__read_xll(const char *filename, unsigned long long *value);
diff --git a/tools/lib/api/io.h b/tools/lib/api/io.h
new file mode 100644
index 000000000000..777c20f6b604
--- /dev/null
+++ b/tools/lib/api/io.h
@@ -0,0 +1,115 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Lightweight buffered reading library.
+ *
+ * Copyright 2019 Google LLC.
+ */
+#ifndef __API_IO__
+#define __API_IO__
+
+#include <stdlib.h>
+#include <unistd.h>
+
+struct io {
+ /* File descriptor being read/ */
+ int fd;
+ /* Size of the read buffer. */
+ unsigned int buf_len;
+ /* Pointer to storage for buffering read. */
+ char *buf;
+ /* End of the storage. */
+ char *end;
+ /* Currently accessed data pointer. */
+ char *data;
+ /* Set true on when the end of file on read error. */
+ bool eof;
+};
+
+static inline void io__init(struct io *io, int fd,
+ char *buf, unsigned int buf_len)
+{
+ io->fd = fd;
+ io->buf_len = buf_len;
+ io->buf = buf;
+ io->end = buf;
+ io->data = buf;
+ io->eof = false;
+}
+
+/* Reads one character from the "io" file with similar semantics to fgetc. */
+static inline int io__get_char(struct io *io)
+{
+ char *ptr = io->data;
+
+ if (io->eof)
+ return -1;
+
+ if (ptr == io->end) {
+ ssize_t n = read(io->fd, io->buf, io->buf_len);
+
+ if (n <= 0) {
+ io->eof = true;
+ return -1;
+ }
+ ptr = &io->buf[0];
+ io->end = &io->buf[n];
+ }
+ io->data = ptr + 1;
+ return *ptr;
+}
+
+/* Read a hexadecimal value with no 0x prefix into the out argument hex. If the
+ * first character isn't hexadecimal returns -2, io->eof returns -1, otherwise
+ * returns the character after the hexadecimal value which may be -1 for eof.
+ * If the read value is larger than a u64 the high-order bits will be dropped.
+ */
+static inline int io__get_hex(struct io *io, __u64 *hex)
+{
+ bool first_read = true;
+
+ *hex = 0;
+ while (true) {
+ int ch = io__get_char(io);
+
+ if (ch < 0)
+ return ch;
+ if (ch >= '0' && ch <= '9')
+ *hex = (*hex << 4) | (ch - '0');
+ else if (ch >= 'a' && ch <= 'f')
+ *hex = (*hex << 4) | (ch - 'a' + 10);
+ else if (ch >= 'A' && ch <= 'F')
+ *hex = (*hex << 4) | (ch - 'A' + 10);
+ else if (first_read)
+ return -2;
+ else
+ return ch;
+ first_read = false;
+ }
+}
+
+/* Read a positive decimal value with out argument dec. If the first character
+ * isn't a decimal returns -2, io->eof returns -1, otherwise returns the
+ * character after the decimal value which may be -1 for eof. If the read value
+ * is larger than a u64 the high-order bits will be dropped.
+ */
+static inline int io__get_dec(struct io *io, __u64 *dec)
+{
+ bool first_read = true;
+
+ *dec = 0;
+ while (true) {
+ int ch = io__get_char(io);
+
+ if (ch < 0)
+ return ch;
+ if (ch >= '0' && ch <= '9')
+ *dec = (*dec * 10) + ch - '0';
+ else if (first_read)
+ return -2;
+ else
+ return ch;
+ first_read = false;
+ }
+}
+
+#endif /* __API_IO__ */
diff --git a/tools/lib/bpf/.gitignore b/tools/lib/bpf/.gitignore
index e97c2ebcf447..8a81b3679d2b 100644
--- a/tools/lib/bpf/.gitignore
+++ b/tools/lib/bpf/.gitignore
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
libbpf_version.h
libbpf.pc
FEATURE-DUMP.libbpf
diff --git a/tools/lib/bpf/Build b/tools/lib/bpf/Build
index e3962cfbc9a6..190366d05588 100644
--- a/tools/lib/bpf/Build
+++ b/tools/lib/bpf/Build
@@ -1,3 +1,3 @@
libbpf-y := libbpf.o bpf.o nlattr.o btf.o libbpf_errno.o str_error.o \
netlink.o bpf_prog_linfo.o libbpf_probes.o xsk.o hashmap.o \
- btf_dump.o
+ btf_dump.o ringbuf.o
diff --git a/tools/lib/bpf/Makefile b/tools/lib/bpf/Makefile
index aee7f1a83c77..bf8ed134cb8a 100644
--- a/tools/lib/bpf/Makefile
+++ b/tools/lib/bpf/Makefile
@@ -151,7 +151,7 @@ GLOBAL_SYM_COUNT = $(shell readelf -s --wide $(BPF_IN_SHARED) | \
sed 's/\[.*\]//' | \
awk '/GLOBAL/ && /DEFAULT/ && !/UND/ {print $$NF}' | \
sort -u | wc -l)
-VERSIONED_SYM_COUNT = $(shell readelf -s --wide $(OUTPUT)libbpf.so | \
+VERSIONED_SYM_COUNT = $(shell readelf --dyn-syms --wide $(OUTPUT)libbpf.so | \
grep -Eo '[^ ]+@LIBBPF_' | cut -d@ -f1 | sort -u | wc -l)
CMD_TARGETS = $(LIB_TARGET) $(PC_FILE)
@@ -218,7 +218,7 @@ check_abi: $(OUTPUT)libbpf.so
sed 's/\[.*\]//' | \
awk '/GLOBAL/ && /DEFAULT/ && !/UND/ {print $$NF}'| \
sort -u > $(OUTPUT)libbpf_global_syms.tmp; \
- readelf -s --wide $(OUTPUT)libbpf.so | \
+ readelf --dyn-syms --wide $(OUTPUT)libbpf.so | \
grep -Eo '[^ ]+@LIBBPF_' | cut -d@ -f1 | \
sort -u > $(OUTPUT)libbpf_versioned_syms.tmp; \
diff -u $(OUTPUT)libbpf_global_syms.tmp \
@@ -264,7 +264,7 @@ install_pkgconfig: $(PC_FILE)
$(call QUIET_INSTALL, $(PC_FILE)) \
$(call do_install,$(PC_FILE),$(libdir_SQ)/pkgconfig,644)
-install: install_lib install_pkgconfig
+install: install_lib install_pkgconfig install_headers
### Cleaning rules
diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c
index c6dafe563176..a7329b671c41 100644
--- a/tools/lib/bpf/bpf.c
+++ b/tools/lib/bpf/bpf.c
@@ -235,7 +235,8 @@ int bpf_load_program_xattr(const struct bpf_load_program_attr *load_attr,
memset(&attr, 0, sizeof(attr));
attr.prog_type = load_attr->prog_type;
attr.expected_attach_type = load_attr->expected_attach_type;
- if (attr.prog_type == BPF_PROG_TYPE_STRUCT_OPS) {
+ if (attr.prog_type == BPF_PROG_TYPE_STRUCT_OPS ||
+ attr.prog_type == BPF_PROG_TYPE_LSM) {
attr.attach_btf_id = load_attr->attach_btf_id;
} else if (attr.prog_type == BPF_PROG_TYPE_TRACING ||
attr.prog_type == BPF_PROG_TYPE_EXT) {
@@ -584,6 +585,50 @@ int bpf_prog_detach2(int prog_fd, int target_fd, enum bpf_attach_type type)
return sys_bpf(BPF_PROG_DETACH, &attr, sizeof(attr));
}
+int bpf_link_create(int prog_fd, int target_fd,
+ enum bpf_attach_type attach_type,
+ const struct bpf_link_create_opts *opts)
+{
+ union bpf_attr attr;
+
+ if (!OPTS_VALID(opts, bpf_link_create_opts))
+ return -EINVAL;
+
+ memset(&attr, 0, sizeof(attr));
+ attr.link_create.prog_fd = prog_fd;
+ attr.link_create.target_fd = target_fd;
+ attr.link_create.attach_type = attach_type;
+
+ return sys_bpf(BPF_LINK_CREATE, &attr, sizeof(attr));
+}
+
+int bpf_link_update(int link_fd, int new_prog_fd,
+ const struct bpf_link_update_opts *opts)
+{
+ union bpf_attr attr;
+
+ if (!OPTS_VALID(opts, bpf_link_update_opts))
+ return -EINVAL;
+
+ memset(&attr, 0, sizeof(attr));
+ attr.link_update.link_fd = link_fd;
+ attr.link_update.new_prog_fd = new_prog_fd;
+ attr.link_update.flags = OPTS_GET(opts, flags, 0);
+ attr.link_update.old_prog_fd = OPTS_GET(opts, old_prog_fd, 0);
+
+ return sys_bpf(BPF_LINK_UPDATE, &attr, sizeof(attr));
+}
+
+int bpf_iter_create(int link_fd)
+{
+ union bpf_attr attr;
+
+ memset(&attr, 0, sizeof(attr));
+ attr.iter_create.link_fd = link_fd;
+
+ return sys_bpf(BPF_ITER_CREATE, &attr, sizeof(attr));
+}
+
int bpf_prog_query(int target_fd, enum bpf_attach_type type, __u32 query_flags,
__u32 *attach_flags, __u32 *prog_ids, __u32 *prog_cnt)
{
@@ -686,6 +731,11 @@ int bpf_btf_get_next_id(__u32 start_id, __u32 *next_id)
return bpf_obj_get_next_id(start_id, next_id, BPF_BTF_GET_NEXT_ID);
}
+int bpf_link_get_next_id(__u32 start_id, __u32 *next_id)
+{
+ return bpf_obj_get_next_id(start_id, next_id, BPF_LINK_GET_NEXT_ID);
+}
+
int bpf_prog_get_fd_by_id(__u32 id)
{
union bpf_attr attr;
@@ -716,13 +766,23 @@ int bpf_btf_get_fd_by_id(__u32 id)
return sys_bpf(BPF_BTF_GET_FD_BY_ID, &attr, sizeof(attr));
}
-int bpf_obj_get_info_by_fd(int prog_fd, void *info, __u32 *info_len)
+int bpf_link_get_fd_by_id(__u32 id)
+{
+ union bpf_attr attr;
+
+ memset(&attr, 0, sizeof(attr));
+ attr.link_id = id;
+
+ return sys_bpf(BPF_LINK_GET_FD_BY_ID, &attr, sizeof(attr));
+}
+
+int bpf_obj_get_info_by_fd(int bpf_fd, void *info, __u32 *info_len)
{
union bpf_attr attr;
int err;
memset(&attr, 0, sizeof(attr));
- attr.info.bpf_fd = prog_fd;
+ attr.info.bpf_fd = bpf_fd;
attr.info.info_len = *info_len;
attr.info.info = ptr_to_u64(info);
@@ -791,3 +851,13 @@ int bpf_task_fd_query(int pid, int fd, __u32 flags, char *buf, __u32 *buf_len,
return err;
}
+
+int bpf_enable_stats(enum bpf_stats_type type)
+{
+ union bpf_attr attr;
+
+ memset(&attr, 0, sizeof(attr));
+ attr.enable_stats.type = type;
+
+ return sys_bpf(BPF_ENABLE_STATS, &attr, sizeof(attr));
+}
diff --git a/tools/lib/bpf/bpf.h b/tools/lib/bpf/bpf.h
index b976e77316cc..1b6015b21ba8 100644
--- a/tools/lib/bpf/bpf.h
+++ b/tools/lib/bpf/bpf.h
@@ -168,6 +168,27 @@ LIBBPF_API int bpf_prog_detach(int attachable_fd, enum bpf_attach_type type);
LIBBPF_API int bpf_prog_detach2(int prog_fd, int attachable_fd,
enum bpf_attach_type type);
+struct bpf_link_create_opts {
+ size_t sz; /* size of this struct for forward/backward compatibility */
+};
+#define bpf_link_create_opts__last_field sz
+
+LIBBPF_API int bpf_link_create(int prog_fd, int target_fd,
+ enum bpf_attach_type attach_type,
+ const struct bpf_link_create_opts *opts);
+
+struct bpf_link_update_opts {
+ size_t sz; /* size of this struct for forward/backward compatibility */
+ __u32 flags; /* extra flags */
+ __u32 old_prog_fd; /* expected old program FD */
+};
+#define bpf_link_update_opts__last_field old_prog_fd
+
+LIBBPF_API int bpf_link_update(int link_fd, int new_prog_fd,
+ const struct bpf_link_update_opts *opts);
+
+LIBBPF_API int bpf_iter_create(int link_fd);
+
struct bpf_prog_test_run_attr {
int prog_fd;
int repeat;
@@ -197,10 +218,12 @@ LIBBPF_API int bpf_prog_test_run(int prog_fd, int repeat, void *data,
LIBBPF_API int bpf_prog_get_next_id(__u32 start_id, __u32 *next_id);
LIBBPF_API int bpf_map_get_next_id(__u32 start_id, __u32 *next_id);
LIBBPF_API int bpf_btf_get_next_id(__u32 start_id, __u32 *next_id);
+LIBBPF_API int bpf_link_get_next_id(__u32 start_id, __u32 *next_id);
LIBBPF_API int bpf_prog_get_fd_by_id(__u32 id);
LIBBPF_API int bpf_map_get_fd_by_id(__u32 id);
LIBBPF_API int bpf_btf_get_fd_by_id(__u32 id);
-LIBBPF_API int bpf_obj_get_info_by_fd(int prog_fd, void *info, __u32 *info_len);
+LIBBPF_API int bpf_link_get_fd_by_id(__u32 id);
+LIBBPF_API int bpf_obj_get_info_by_fd(int bpf_fd, void *info, __u32 *info_len);
LIBBPF_API int bpf_prog_query(int target_fd, enum bpf_attach_type type,
__u32 query_flags, __u32 *attach_flags,
__u32 *prog_ids, __u32 *prog_cnt);
@@ -210,6 +233,7 @@ LIBBPF_API int bpf_load_btf(void *btf, __u32 btf_size, char *log_buf,
LIBBPF_API int bpf_task_fd_query(int pid, int fd, __u32 flags, char *buf,
__u32 *buf_len, __u32 *prog_id, __u32 *fd_type,
__u64 *probe_offset, __u64 *probe_addr);
+LIBBPF_API int bpf_enable_stats(enum bpf_stats_type type);
#ifdef __cplusplus
} /* extern "C" */
diff --git a/tools/lib/bpf/bpf_helpers.h b/tools/lib/bpf/bpf_helpers.h
index f69cc208778a..f67dce2af802 100644
--- a/tools/lib/bpf/bpf_helpers.h
+++ b/tools/lib/bpf/bpf_helpers.h
@@ -2,10 +2,17 @@
#ifndef __BPF_HELPERS__
#define __BPF_HELPERS__
+/*
+ * Note that bpf programs need to include either
+ * vmlinux.h (auto-generated from BTF) or linux/types.h
+ * in advance since bpf_helper_defs.h uses such types
+ * as __u64.
+ */
#include "bpf_helper_defs.h"
#define __uint(name, val) int (*name)[val]
#define __type(name, val) typeof(val) *name
+#define __array(name, val) typeof(val) *name[]
/* Helper macro to print out debug messages */
#define bpf_printk(fmt, ...) \
@@ -30,6 +37,20 @@
#endif
/*
+ * Helper macro to manipulate data structures
+ */
+#ifndef offsetof
+#define offsetof(TYPE, MEMBER) ((size_t)&((TYPE *)0)->MEMBER)
+#endif
+#ifndef container_of
+#define container_of(ptr, type, member) \
+ ({ \
+ void *__mptr = (void *)(ptr); \
+ ((type *)(__mptr - offsetof(type, member))); \
+ })
+#endif
+
+/*
* Helper structure used by eBPF C program
* to describe BPF map attributes to libbpf loader
*/
diff --git a/tools/lib/bpf/bpf_tracing.h b/tools/lib/bpf/bpf_tracing.h
index b0dafe8b4ebc..58eceb884df3 100644
--- a/tools/lib/bpf/bpf_tracing.h
+++ b/tools/lib/bpf/bpf_tracing.h
@@ -49,7 +49,8 @@
#if defined(bpf_target_x86)
-#ifdef __KERNEL__
+#if defined(__KERNEL__) || defined(__VMLINUX_H__)
+
#define PT_REGS_PARM1(x) ((x)->di)
#define PT_REGS_PARM2(x) ((x)->si)
#define PT_REGS_PARM3(x) ((x)->dx)
@@ -60,7 +61,20 @@
#define PT_REGS_RC(x) ((x)->ax)
#define PT_REGS_SP(x) ((x)->sp)
#define PT_REGS_IP(x) ((x)->ip)
+
+#define PT_REGS_PARM1_CORE(x) BPF_CORE_READ((x), di)
+#define PT_REGS_PARM2_CORE(x) BPF_CORE_READ((x), si)
+#define PT_REGS_PARM3_CORE(x) BPF_CORE_READ((x), dx)
+#define PT_REGS_PARM4_CORE(x) BPF_CORE_READ((x), cx)
+#define PT_REGS_PARM5_CORE(x) BPF_CORE_READ((x), r8)
+#define PT_REGS_RET_CORE(x) BPF_CORE_READ((x), sp)
+#define PT_REGS_FP_CORE(x) BPF_CORE_READ((x), bp)
+#define PT_REGS_RC_CORE(x) BPF_CORE_READ((x), ax)
+#define PT_REGS_SP_CORE(x) BPF_CORE_READ((x), sp)
+#define PT_REGS_IP_CORE(x) BPF_CORE_READ((x), ip)
+
#else
+
#ifdef __i386__
/* i386 kernel is built with -mregparm=3 */
#define PT_REGS_PARM1(x) ((x)->eax)
@@ -73,7 +87,20 @@
#define PT_REGS_RC(x) ((x)->eax)
#define PT_REGS_SP(x) ((x)->esp)
#define PT_REGS_IP(x) ((x)->eip)
+
+#define PT_REGS_PARM1_CORE(x) BPF_CORE_READ((x), eax)
+#define PT_REGS_PARM2_CORE(x) BPF_CORE_READ((x), edx)
+#define PT_REGS_PARM3_CORE(x) BPF_CORE_READ((x), ecx)
+#define PT_REGS_PARM4_CORE(x) 0
+#define PT_REGS_PARM5_CORE(x) 0
+#define PT_REGS_RET_CORE(x) BPF_CORE_READ((x), esp)
+#define PT_REGS_FP_CORE(x) BPF_CORE_READ((x), ebp)
+#define PT_REGS_RC_CORE(x) BPF_CORE_READ((x), eax)
+#define PT_REGS_SP_CORE(x) BPF_CORE_READ((x), esp)
+#define PT_REGS_IP_CORE(x) BPF_CORE_READ((x), eip)
+
#else
+
#define PT_REGS_PARM1(x) ((x)->rdi)
#define PT_REGS_PARM2(x) ((x)->rsi)
#define PT_REGS_PARM3(x) ((x)->rdx)
@@ -84,6 +111,18 @@
#define PT_REGS_RC(x) ((x)->rax)
#define PT_REGS_SP(x) ((x)->rsp)
#define PT_REGS_IP(x) ((x)->rip)
+
+#define PT_REGS_PARM1_CORE(x) BPF_CORE_READ((x), rdi)
+#define PT_REGS_PARM2_CORE(x) BPF_CORE_READ((x), rsi)
+#define PT_REGS_PARM3_CORE(x) BPF_CORE_READ((x), rdx)
+#define PT_REGS_PARM4_CORE(x) BPF_CORE_READ((x), rcx)
+#define PT_REGS_PARM5_CORE(x) BPF_CORE_READ((x), r8)
+#define PT_REGS_RET_CORE(x) BPF_CORE_READ((x), rsp)
+#define PT_REGS_FP_CORE(x) BPF_CORE_READ((x), rbp)
+#define PT_REGS_RC_CORE(x) BPF_CORE_READ((x), rax)
+#define PT_REGS_SP_CORE(x) BPF_CORE_READ((x), rsp)
+#define PT_REGS_IP_CORE(x) BPF_CORE_READ((x), rip)
+
#endif
#endif
@@ -104,6 +143,17 @@ struct pt_regs;
#define PT_REGS_SP(x) (((PT_REGS_S390 *)(x))->gprs[15])
#define PT_REGS_IP(x) (((PT_REGS_S390 *)(x))->psw.addr)
+#define PT_REGS_PARM1_CORE(x) BPF_CORE_READ((PT_REGS_S390 *)(x), gprs[2])
+#define PT_REGS_PARM2_CORE(x) BPF_CORE_READ((PT_REGS_S390 *)(x), gprs[3])
+#define PT_REGS_PARM3_CORE(x) BPF_CORE_READ((PT_REGS_S390 *)(x), gprs[4])
+#define PT_REGS_PARM4_CORE(x) BPF_CORE_READ((PT_REGS_S390 *)(x), gprs[5])
+#define PT_REGS_PARM5_CORE(x) BPF_CORE_READ((PT_REGS_S390 *)(x), gprs[6])
+#define PT_REGS_RET_CORE(x) BPF_CORE_READ((PT_REGS_S390 *)(x), gprs[14])
+#define PT_REGS_FP_CORE(x) BPF_CORE_READ((PT_REGS_S390 *)(x), gprs[11])
+#define PT_REGS_RC_CORE(x) BPF_CORE_READ((PT_REGS_S390 *)(x), gprs[2])
+#define PT_REGS_SP_CORE(x) BPF_CORE_READ((PT_REGS_S390 *)(x), gprs[15])
+#define PT_REGS_IP_CORE(x) BPF_CORE_READ((PT_REGS_S390 *)(x), psw.addr)
+
#elif defined(bpf_target_arm)
#define PT_REGS_PARM1(x) ((x)->uregs[0])
@@ -117,6 +167,17 @@ struct pt_regs;
#define PT_REGS_SP(x) ((x)->uregs[13])
#define PT_REGS_IP(x) ((x)->uregs[12])
+#define PT_REGS_PARM1_CORE(x) BPF_CORE_READ((x), uregs[0])
+#define PT_REGS_PARM2_CORE(x) BPF_CORE_READ((x), uregs[1])
+#define PT_REGS_PARM3_CORE(x) BPF_CORE_READ((x), uregs[2])
+#define PT_REGS_PARM4_CORE(x) BPF_CORE_READ((x), uregs[3])
+#define PT_REGS_PARM5_CORE(x) BPF_CORE_READ((x), uregs[4])
+#define PT_REGS_RET_CORE(x) BPF_CORE_READ((x), uregs[14])
+#define PT_REGS_FP_CORE(x) BPF_CORE_READ((x), uregs[11])
+#define PT_REGS_RC_CORE(x) BPF_CORE_READ((x), uregs[0])
+#define PT_REGS_SP_CORE(x) BPF_CORE_READ((x), uregs[13])
+#define PT_REGS_IP_CORE(x) BPF_CORE_READ((x), uregs[12])
+
#elif defined(bpf_target_arm64)
/* arm64 provides struct user_pt_regs instead of struct pt_regs to userspace */
@@ -134,6 +195,17 @@ struct pt_regs;
#define PT_REGS_SP(x) (((PT_REGS_ARM64 *)(x))->sp)
#define PT_REGS_IP(x) (((PT_REGS_ARM64 *)(x))->pc)
+#define PT_REGS_PARM1_CORE(x) BPF_CORE_READ((PT_REGS_ARM64 *)(x), regs[0])
+#define PT_REGS_PARM2_CORE(x) BPF_CORE_READ((PT_REGS_ARM64 *)(x), regs[1])
+#define PT_REGS_PARM3_CORE(x) BPF_CORE_READ((PT_REGS_ARM64 *)(x), regs[2])
+#define PT_REGS_PARM4_CORE(x) BPF_CORE_READ((PT_REGS_ARM64 *)(x), regs[3])
+#define PT_REGS_PARM5_CORE(x) BPF_CORE_READ((PT_REGS_ARM64 *)(x), regs[4])
+#define PT_REGS_RET_CORE(x) BPF_CORE_READ((PT_REGS_ARM64 *)(x), regs[30])
+#define PT_REGS_FP_CORE(x) BPF_CORE_READ((PT_REGS_ARM64 *)(x), regs[29])
+#define PT_REGS_RC_CORE(x) BPF_CORE_READ((PT_REGS_ARM64 *)(x), regs[0])
+#define PT_REGS_SP_CORE(x) BPF_CORE_READ((PT_REGS_ARM64 *)(x), sp)
+#define PT_REGS_IP_CORE(x) BPF_CORE_READ((PT_REGS_ARM64 *)(x), pc)
+
#elif defined(bpf_target_mips)
#define PT_REGS_PARM1(x) ((x)->regs[4])
@@ -147,6 +219,17 @@ struct pt_regs;
#define PT_REGS_SP(x) ((x)->regs[29])
#define PT_REGS_IP(x) ((x)->cp0_epc)
+#define PT_REGS_PARM1_CORE(x) BPF_CORE_READ((x), regs[4])
+#define PT_REGS_PARM2_CORE(x) BPF_CORE_READ((x), regs[5])
+#define PT_REGS_PARM3_CORE(x) BPF_CORE_READ((x), regs[6])
+#define PT_REGS_PARM4_CORE(x) BPF_CORE_READ((x), regs[7])
+#define PT_REGS_PARM5_CORE(x) BPF_CORE_READ((x), regs[8])
+#define PT_REGS_RET_CORE(x) BPF_CORE_READ((x), regs[31])
+#define PT_REGS_FP_CORE(x) BPF_CORE_READ((x), regs[30])
+#define PT_REGS_RC_CORE(x) BPF_CORE_READ((x), regs[1])
+#define PT_REGS_SP_CORE(x) BPF_CORE_READ((x), regs[29])
+#define PT_REGS_IP_CORE(x) BPF_CORE_READ((x), cp0_epc)
+
#elif defined(bpf_target_powerpc)
#define PT_REGS_PARM1(x) ((x)->gpr[3])
@@ -158,6 +241,15 @@ struct pt_regs;
#define PT_REGS_SP(x) ((x)->sp)
#define PT_REGS_IP(x) ((x)->nip)
+#define PT_REGS_PARM1_CORE(x) BPF_CORE_READ((x), gpr[3])
+#define PT_REGS_PARM2_CORE(x) BPF_CORE_READ((x), gpr[4])
+#define PT_REGS_PARM3_CORE(x) BPF_CORE_READ((x), gpr[5])
+#define PT_REGS_PARM4_CORE(x) BPF_CORE_READ((x), gpr[6])
+#define PT_REGS_PARM5_CORE(x) BPF_CORE_READ((x), gpr[7])
+#define PT_REGS_RC_CORE(x) BPF_CORE_READ((x), gpr[3])
+#define PT_REGS_SP_CORE(x) BPF_CORE_READ((x), sp)
+#define PT_REGS_IP_CORE(x) BPF_CORE_READ((x), nip)
+
#elif defined(bpf_target_sparc)
#define PT_REGS_PARM1(x) ((x)->u_regs[UREG_I0])
@@ -169,11 +261,22 @@ struct pt_regs;
#define PT_REGS_RC(x) ((x)->u_regs[UREG_I0])
#define PT_REGS_SP(x) ((x)->u_regs[UREG_FP])
+#define PT_REGS_PARM1_CORE(x) BPF_CORE_READ((x), u_regs[UREG_I0])
+#define PT_REGS_PARM2_CORE(x) BPF_CORE_READ((x), u_regs[UREG_I1])
+#define PT_REGS_PARM3_CORE(x) BPF_CORE_READ((x), u_regs[UREG_I2])
+#define PT_REGS_PARM4_CORE(x) BPF_CORE_READ((x), u_regs[UREG_I3])
+#define PT_REGS_PARM5_CORE(x) BPF_CORE_READ((x), u_regs[UREG_I4])
+#define PT_REGS_RET_CORE(x) BPF_CORE_READ((x), u_regs[UREG_I7])
+#define PT_REGS_RC_CORE(x) BPF_CORE_READ((x), u_regs[UREG_I0])
+#define PT_REGS_SP_CORE(x) BPF_CORE_READ((x), u_regs[UREG_FP])
+
/* Should this also be a bpf_target check for the sparc case? */
#if defined(__arch64__)
#define PT_REGS_IP(x) ((x)->tpc)
+#define PT_REGS_IP_CORE(x) BPF_CORE_READ((x), tpc)
#else
#define PT_REGS_IP(x) ((x)->pc)
+#define PT_REGS_IP_CORE(x) BPF_CORE_READ((x), pc)
#endif
#endif
@@ -192,4 +295,138 @@ struct pt_regs;
(void *)(PT_REGS_FP(ctx) + sizeof(ip))); })
#endif
+#define ___bpf_concat(a, b) a ## b
+#define ___bpf_apply(fn, n) ___bpf_concat(fn, n)
+#define ___bpf_nth(_, _1, _2, _3, _4, _5, _6, _7, _8, _9, _a, _b, _c, N, ...) N
+#define ___bpf_narg(...) \
+ ___bpf_nth(_, ##__VA_ARGS__, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0)
+#define ___bpf_empty(...) \
+ ___bpf_nth(_, ##__VA_ARGS__, N, N, N, N, N, N, N, N, N, N, 0)
+
+#define ___bpf_ctx_cast0() ctx
+#define ___bpf_ctx_cast1(x) ___bpf_ctx_cast0(), (void *)ctx[0]
+#define ___bpf_ctx_cast2(x, args...) ___bpf_ctx_cast1(args), (void *)ctx[1]
+#define ___bpf_ctx_cast3(x, args...) ___bpf_ctx_cast2(args), (void *)ctx[2]
+#define ___bpf_ctx_cast4(x, args...) ___bpf_ctx_cast3(args), (void *)ctx[3]
+#define ___bpf_ctx_cast5(x, args...) ___bpf_ctx_cast4(args), (void *)ctx[4]
+#define ___bpf_ctx_cast6(x, args...) ___bpf_ctx_cast5(args), (void *)ctx[5]
+#define ___bpf_ctx_cast7(x, args...) ___bpf_ctx_cast6(args), (void *)ctx[6]
+#define ___bpf_ctx_cast8(x, args...) ___bpf_ctx_cast7(args), (void *)ctx[7]
+#define ___bpf_ctx_cast9(x, args...) ___bpf_ctx_cast8(args), (void *)ctx[8]
+#define ___bpf_ctx_cast10(x, args...) ___bpf_ctx_cast9(args), (void *)ctx[9]
+#define ___bpf_ctx_cast11(x, args...) ___bpf_ctx_cast10(args), (void *)ctx[10]
+#define ___bpf_ctx_cast12(x, args...) ___bpf_ctx_cast11(args), (void *)ctx[11]
+#define ___bpf_ctx_cast(args...) \
+ ___bpf_apply(___bpf_ctx_cast, ___bpf_narg(args))(args)
+
+/*
+ * BPF_PROG is a convenience wrapper for generic tp_btf/fentry/fexit and
+ * similar kinds of BPF programs, that accept input arguments as a single
+ * pointer to untyped u64 array, where each u64 can actually be a typed
+ * pointer or integer of different size. Instead of requring user to write
+ * manual casts and work with array elements by index, BPF_PROG macro
+ * allows user to declare a list of named and typed input arguments in the
+ * same syntax as for normal C function. All the casting is hidden and
+ * performed transparently, while user code can just assume working with
+ * function arguments of specified type and name.
+ *
+ * Original raw context argument is preserved as well as 'ctx' argument.
+ * This is useful when using BPF helpers that expect original context
+ * as one of the parameters (e.g., for bpf_perf_event_output()).
+ */
+#define BPF_PROG(name, args...) \
+name(unsigned long long *ctx); \
+static __attribute__((always_inline)) typeof(name(0)) \
+____##name(unsigned long long *ctx, ##args); \
+typeof(name(0)) name(unsigned long long *ctx) \
+{ \
+ _Pragma("GCC diagnostic push") \
+ _Pragma("GCC diagnostic ignored \"-Wint-conversion\"") \
+ return ____##name(___bpf_ctx_cast(args)); \
+ _Pragma("GCC diagnostic pop") \
+} \
+static __attribute__((always_inline)) typeof(name(0)) \
+____##name(unsigned long long *ctx, ##args)
+
+struct pt_regs;
+
+#define ___bpf_kprobe_args0() ctx
+#define ___bpf_kprobe_args1(x) \
+ ___bpf_kprobe_args0(), (void *)PT_REGS_PARM1(ctx)
+#define ___bpf_kprobe_args2(x, args...) \
+ ___bpf_kprobe_args1(args), (void *)PT_REGS_PARM2(ctx)
+#define ___bpf_kprobe_args3(x, args...) \
+ ___bpf_kprobe_args2(args), (void *)PT_REGS_PARM3(ctx)
+#define ___bpf_kprobe_args4(x, args...) \
+ ___bpf_kprobe_args3(args), (void *)PT_REGS_PARM4(ctx)
+#define ___bpf_kprobe_args5(x, args...) \
+ ___bpf_kprobe_args4(args), (void *)PT_REGS_PARM5(ctx)
+#define ___bpf_kprobe_args(args...) \
+ ___bpf_apply(___bpf_kprobe_args, ___bpf_narg(args))(args)
+
+/*
+ * BPF_KPROBE serves the same purpose for kprobes as BPF_PROG for
+ * tp_btf/fentry/fexit BPF programs. It hides the underlying platform-specific
+ * low-level way of getting kprobe input arguments from struct pt_regs, and
+ * provides a familiar typed and named function arguments syntax and
+ * semantics of accessing kprobe input paremeters.
+ *
+ * Original struct pt_regs* context is preserved as 'ctx' argument. This might
+ * be necessary when using BPF helpers like bpf_perf_event_output().
+ */
+#define BPF_KPROBE(name, args...) \
+name(struct pt_regs *ctx); \
+static __attribute__((always_inline)) typeof(name(0)) \
+____##name(struct pt_regs *ctx, ##args); \
+typeof(name(0)) name(struct pt_regs *ctx) \
+{ \
+ _Pragma("GCC diagnostic push") \
+ _Pragma("GCC diagnostic ignored \"-Wint-conversion\"") \
+ return ____##name(___bpf_kprobe_args(args)); \
+ _Pragma("GCC diagnostic pop") \
+} \
+static __attribute__((always_inline)) typeof(name(0)) \
+____##name(struct pt_regs *ctx, ##args)
+
+#define ___bpf_kretprobe_args0() ctx
+#define ___bpf_kretprobe_args1(x) \
+ ___bpf_kretprobe_args0(), (void *)PT_REGS_RC(ctx)
+#define ___bpf_kretprobe_args(args...) \
+ ___bpf_apply(___bpf_kretprobe_args, ___bpf_narg(args))(args)
+
+/*
+ * BPF_KRETPROBE is similar to BPF_KPROBE, except, it only provides optional
+ * return value (in addition to `struct pt_regs *ctx`), but no input
+ * arguments, because they will be clobbered by the time probed function
+ * returns.
+ */
+#define BPF_KRETPROBE(name, args...) \
+name(struct pt_regs *ctx); \
+static __attribute__((always_inline)) typeof(name(0)) \
+____##name(struct pt_regs *ctx, ##args); \
+typeof(name(0)) name(struct pt_regs *ctx) \
+{ \
+ _Pragma("GCC diagnostic push") \
+ _Pragma("GCC diagnostic ignored \"-Wint-conversion\"") \
+ return ____##name(___bpf_kretprobe_args(args)); \
+ _Pragma("GCC diagnostic pop") \
+} \
+static __always_inline typeof(name(0)) ____##name(struct pt_regs *ctx, ##args)
+
+/*
+ * BPF_SEQ_PRINTF to wrap bpf_seq_printf to-be-printed values
+ * in a structure.
+ */
+#define BPF_SEQ_PRINTF(seq, fmt, args...) \
+ ({ \
+ _Pragma("GCC diagnostic push") \
+ _Pragma("GCC diagnostic ignored \"-Wint-conversion\"") \
+ static const char ___fmt[] = fmt; \
+ unsigned long long ___param[] = { args }; \
+ _Pragma("GCC diagnostic pop") \
+ int ___ret = bpf_seq_printf(seq, ___fmt, sizeof(___fmt), \
+ ___param, sizeof(___param)); \
+ ___ret; \
+ })
+
#endif
diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c
index 3d1c25fc97ae..bfef3d606b54 100644
--- a/tools/lib/bpf/btf.c
+++ b/tools/lib/bpf/btf.c
@@ -657,22 +657,32 @@ int btf__finalize_data(struct bpf_object *obj, struct btf *btf)
int btf__load(struct btf *btf)
{
- __u32 log_buf_size = BPF_LOG_BUF_SIZE;
+ __u32 log_buf_size = 0;
char *log_buf = NULL;
int err = 0;
if (btf->fd >= 0)
return -EEXIST;
- log_buf = malloc(log_buf_size);
- if (!log_buf)
- return -ENOMEM;
+retry_load:
+ if (log_buf_size) {
+ log_buf = malloc(log_buf_size);
+ if (!log_buf)
+ return -ENOMEM;
- *log_buf = 0;
+ *log_buf = 0;
+ }
btf->fd = bpf_load_btf(btf->data, btf->data_size,
log_buf, log_buf_size, false);
if (btf->fd < 0) {
+ if (!log_buf || errno == ENOSPC) {
+ log_buf_size = max((__u32)BPF_LOG_BUF_SIZE,
+ log_buf_size << 1);
+ free(log_buf);
+ goto retry_load;
+ }
+
err = -errno;
pr_warn("Error loading BTF: %s(%d)\n", strerror(errno), errno);
if (*log_buf)
diff --git a/tools/lib/bpf/btf_dump.c b/tools/lib/bpf/btf_dump.c
index bd09ed1710f1..de07e559a11d 100644
--- a/tools/lib/bpf/btf_dump.c
+++ b/tools/lib/bpf/btf_dump.c
@@ -658,7 +658,7 @@ static void btf_dump_emit_type(struct btf_dump *d, __u32 id, __u32 cont_id)
if (!btf_dump_is_blacklisted(d, id)) {
btf_dump_emit_typedef_def(d, id, t, 0);
btf_dump_printf(d, ";\n\n");
- };
+ }
tstate->fwd_emitted = 1;
break;
default:
@@ -916,13 +916,13 @@ static void btf_dump_emit_enum_def(struct btf_dump *d, __u32 id,
/* enumerators share namespace with typedef idents */
dup_cnt = btf_dump_name_dups(d, d->ident_names, name);
if (dup_cnt > 1) {
- btf_dump_printf(d, "\n%s%s___%zu = %d,",
+ btf_dump_printf(d, "\n%s%s___%zu = %u,",
pfx(lvl + 1), name, dup_cnt,
- (__s32)v->val);
+ (__u32)v->val);
} else {
- btf_dump_printf(d, "\n%s%s = %d,",
+ btf_dump_printf(d, "\n%s%s = %u,",
pfx(lvl + 1), name,
- (__s32)v->val);
+ (__u32)v->val);
}
}
btf_dump_printf(d, "\n%s}", pfx(lvl));
@@ -1030,7 +1030,7 @@ int btf_dump__emit_type_decl(struct btf_dump *d, __u32 id,
if (!OPTS_VALID(opts, btf_dump_emit_type_decl_opts))
return -EINVAL;
- fname = OPTS_GET(opts, field_name, NULL);
+ fname = OPTS_GET(opts, field_name, "");
lvl = OPTS_GET(opts, indent_level, 0);
btf_dump_emit_type_decl(d, id, fname, lvl);
return 0;
diff --git a/tools/lib/bpf/hashmap.c b/tools/lib/bpf/hashmap.c
index 54c30c802070..a405dad068f5 100644
--- a/tools/lib/bpf/hashmap.c
+++ b/tools/lib/bpf/hashmap.c
@@ -59,7 +59,14 @@ struct hashmap *hashmap__new(hashmap_hash_fn hash_fn,
void hashmap__clear(struct hashmap *map)
{
+ struct hashmap_entry *cur, *tmp;
+ size_t bkt;
+
+ hashmap__for_each_entry_safe(map, cur, tmp, bkt) {
+ free(cur);
+ }
free(map->buckets);
+ map->buckets = NULL;
map->cap = map->cap_bits = map->sz = 0;
}
@@ -93,8 +100,7 @@ static int hashmap_grow(struct hashmap *map)
struct hashmap_entry **new_buckets;
struct hashmap_entry *cur, *tmp;
size_t new_cap_bits, new_cap;
- size_t h;
- int bkt;
+ size_t h, bkt;
new_cap_bits = map->cap_bits + 1;
if (new_cap_bits < HASHMAP_MIN_CAP_BITS)
diff --git a/tools/lib/bpf/hashmap.h b/tools/lib/bpf/hashmap.h
index bae8879cdf58..e823b35e7371 100644
--- a/tools/lib/bpf/hashmap.h
+++ b/tools/lib/bpf/hashmap.h
@@ -15,7 +15,6 @@
#else
#include <bits/reg.h>
#endif
-#include "libbpf_internal.h"
static inline size_t hash_bits(size_t h, int bits)
{
diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index 7469c7dcc15e..7f01be2b88b8 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -178,6 +178,8 @@ struct bpf_capabilities {
__u32 array_mmap:1;
/* BTF_FUNC_GLOBAL is supported */
__u32 btf_func_global:1;
+ /* kernel support for expected_attach_type in BPF_PROG_LOAD */
+ __u32 exp_attach_type:1;
};
enum reloc_type {
@@ -194,6 +196,22 @@ struct reloc_desc {
int sym_off;
};
+struct bpf_sec_def;
+
+typedef struct bpf_link *(*attach_fn_t)(const struct bpf_sec_def *sec,
+ struct bpf_program *prog);
+
+struct bpf_sec_def {
+ const char *sec;
+ size_t len;
+ enum bpf_prog_type prog_type;
+ enum bpf_attach_type expected_attach_type;
+ bool is_exp_attach_type_optional;
+ bool is_attachable;
+ bool is_attach_btf;
+ attach_fn_t attach_fn;
+};
+
/*
* bpf_prog should be a better name but it has been used in
* linux/filter.h.
@@ -204,6 +222,7 @@ struct bpf_program {
char *name;
int prog_ifindex;
char *section_name;
+ const struct bpf_sec_def *sec_def;
/* section_name with / replaced by _; makes recursive pinning
* in bpf_object__pin_programs easier
*/
@@ -291,6 +310,7 @@ struct bpf_map {
int map_ifindex;
int inner_map_fd;
struct bpf_map_def def;
+ __u32 btf_var_idx;
__u32 btf_key_type_id;
__u32 btf_value_type_id;
__u32 btf_vmlinux_value_type_id;
@@ -299,6 +319,9 @@ struct bpf_map {
enum libbpf_map_type libbpf_type;
void *mmaped;
struct bpf_struct_ops *st_ops;
+ struct bpf_map *inner_map;
+ void **init_slots;
+ int init_slots_sz;
char *pin_path;
bool pinned;
bool reused;
@@ -370,6 +393,7 @@ struct bpf_object {
int nr_reloc_sects;
int maps_shndx;
int btf_maps_shndx;
+ __u32 btf_maps_sec_btf_id;
int text_shndx;
int symbols_shndx;
int data_shndx;
@@ -1845,7 +1869,6 @@ resolve_func_ptr(const struct btf *btf, __u32 id, __u32 *res_id)
* type definition, while using only sizeof(void *) space in ELF data section.
*/
static bool get_map_field_int(const char *map_name, const struct btf *btf,
- const struct btf_type *def,
const struct btf_member *m, __u32 *res)
{
const struct btf_type *t = skip_mods_and_typedefs(btf, m->type, NULL);
@@ -1896,110 +1919,54 @@ static int build_map_pin_path(struct bpf_map *map, const char *path)
return 0;
}
-static int bpf_object__init_user_btf_map(struct bpf_object *obj,
- const struct btf_type *sec,
- int var_idx, int sec_idx,
- const Elf_Data *data, bool strict,
- const char *pin_root_path)
+
+static int parse_btf_map_def(struct bpf_object *obj,
+ struct bpf_map *map,
+ const struct btf_type *def,
+ bool strict, bool is_inner,
+ const char *pin_root_path)
{
- const struct btf_type *var, *def, *t;
- const struct btf_var_secinfo *vi;
- const struct btf_var *var_extra;
+ const struct btf_type *t;
const struct btf_member *m;
- const char *map_name;
- struct bpf_map *map;
int vlen, i;
- vi = btf_var_secinfos(sec) + var_idx;
- var = btf__type_by_id(obj->btf, vi->type);
- var_extra = btf_var(var);
- map_name = btf__name_by_offset(obj->btf, var->name_off);
- vlen = btf_vlen(var);
-
- if (map_name == NULL || map_name[0] == '\0') {
- pr_warn("map #%d: empty name.\n", var_idx);
- return -EINVAL;
- }
- if ((__u64)vi->offset + vi->size > data->d_size) {
- pr_warn("map '%s' BTF data is corrupted.\n", map_name);
- return -EINVAL;
- }
- if (!btf_is_var(var)) {
- pr_warn("map '%s': unexpected var kind %u.\n",
- map_name, btf_kind(var));
- return -EINVAL;
- }
- if (var_extra->linkage != BTF_VAR_GLOBAL_ALLOCATED &&
- var_extra->linkage != BTF_VAR_STATIC) {
- pr_warn("map '%s': unsupported var linkage %u.\n",
- map_name, var_extra->linkage);
- return -EOPNOTSUPP;
- }
-
- def = skip_mods_and_typedefs(obj->btf, var->type, NULL);
- if (!btf_is_struct(def)) {
- pr_warn("map '%s': unexpected def kind %u.\n",
- map_name, btf_kind(var));
- return -EINVAL;
- }
- if (def->size > vi->size) {
- pr_warn("map '%s': invalid def size.\n", map_name);
- return -EINVAL;
- }
-
- map = bpf_object__add_map(obj);
- if (IS_ERR(map))
- return PTR_ERR(map);
- map->name = strdup(map_name);
- if (!map->name) {
- pr_warn("map '%s': failed to alloc map name.\n", map_name);
- return -ENOMEM;
- }
- map->libbpf_type = LIBBPF_MAP_UNSPEC;
- map->def.type = BPF_MAP_TYPE_UNSPEC;
- map->sec_idx = sec_idx;
- map->sec_offset = vi->offset;
- pr_debug("map '%s': at sec_idx %d, offset %zu.\n",
- map_name, map->sec_idx, map->sec_offset);
-
vlen = btf_vlen(def);
m = btf_members(def);
for (i = 0; i < vlen; i++, m++) {
const char *name = btf__name_by_offset(obj->btf, m->name_off);
if (!name) {
- pr_warn("map '%s': invalid field #%d.\n", map_name, i);
+ pr_warn("map '%s': invalid field #%d.\n", map->name, i);
return -EINVAL;
}
if (strcmp(name, "type") == 0) {
- if (!get_map_field_int(map_name, obj->btf, def, m,
+ if (!get_map_field_int(map->name, obj->btf, m,
&map->def.type))
return -EINVAL;
pr_debug("map '%s': found type = %u.\n",
- map_name, map->def.type);
+ map->name, map->def.type);
} else if (strcmp(name, "max_entries") == 0) {
- if (!get_map_field_int(map_name, obj->btf, def, m,
+ if (!get_map_field_int(map->name, obj->btf, m,
&map->def.max_entries))
return -EINVAL;
pr_debug("map '%s': found max_entries = %u.\n",
- map_name, map->def.max_entries);
+ map->name, map->def.max_entries);
} else if (strcmp(name, "map_flags") == 0) {
- if (!get_map_field_int(map_name, obj->btf, def, m,
+ if (!get_map_field_int(map->name, obj->btf, m,
&map->def.map_flags))
return -EINVAL;
pr_debug("map '%s': found map_flags = %u.\n",
- map_name, map->def.map_flags);
+ map->name, map->def.map_flags);
} else if (strcmp(name, "key_size") == 0) {
__u32 sz;
- if (!get_map_field_int(map_name, obj->btf, def, m,
- &sz))
+ if (!get_map_field_int(map->name, obj->btf, m, &sz))
return -EINVAL;
pr_debug("map '%s': found key_size = %u.\n",
- map_name, sz);
+ map->name, sz);
if (map->def.key_size && map->def.key_size != sz) {
pr_warn("map '%s': conflicting key size %u != %u.\n",
- map_name, map->def.key_size, sz);
+ map->name, map->def.key_size, sz);
return -EINVAL;
}
map->def.key_size = sz;
@@ -2009,25 +1976,25 @@ static int bpf_object__init_user_btf_map(struct bpf_object *obj,
t = btf__type_by_id(obj->btf, m->type);
if (!t) {
pr_warn("map '%s': key type [%d] not found.\n",
- map_name, m->type);
+ map->name, m->type);
return -EINVAL;
}
if (!btf_is_ptr(t)) {
pr_warn("map '%s': key spec is not PTR: %u.\n",
- map_name, btf_kind(t));
+ map->name, btf_kind(t));
return -EINVAL;
}
sz = btf__resolve_size(obj->btf, t->type);
if (sz < 0) {
pr_warn("map '%s': can't determine key size for type [%u]: %zd.\n",
- map_name, t->type, (ssize_t)sz);
+ map->name, t->type, (ssize_t)sz);
return sz;
}
pr_debug("map '%s': found key [%u], sz = %zd.\n",
- map_name, t->type, (ssize_t)sz);
+ map->name, t->type, (ssize_t)sz);
if (map->def.key_size && map->def.key_size != sz) {
pr_warn("map '%s': conflicting key size %u != %zd.\n",
- map_name, map->def.key_size, (ssize_t)sz);
+ map->name, map->def.key_size, (ssize_t)sz);
return -EINVAL;
}
map->def.key_size = sz;
@@ -2035,14 +2002,13 @@ static int bpf_object__init_user_btf_map(struct bpf_object *obj,
} else if (strcmp(name, "value_size") == 0) {
__u32 sz;
- if (!get_map_field_int(map_name, obj->btf, def, m,
- &sz))
+ if (!get_map_field_int(map->name, obj->btf, m, &sz))
return -EINVAL;
pr_debug("map '%s': found value_size = %u.\n",
- map_name, sz);
+ map->name, sz);
if (map->def.value_size && map->def.value_size != sz) {
pr_warn("map '%s': conflicting value size %u != %u.\n",
- map_name, map->def.value_size, sz);
+ map->name, map->def.value_size, sz);
return -EINVAL;
}
map->def.value_size = sz;
@@ -2052,72 +2018,207 @@ static int bpf_object__init_user_btf_map(struct bpf_object *obj,
t = btf__type_by_id(obj->btf, m->type);
if (!t) {
pr_warn("map '%s': value type [%d] not found.\n",
- map_name, m->type);
+ map->name, m->type);
return -EINVAL;
}
if (!btf_is_ptr(t)) {
pr_warn("map '%s': value spec is not PTR: %u.\n",
- map_name, btf_kind(t));
+ map->name, btf_kind(t));
return -EINVAL;
}
sz = btf__resolve_size(obj->btf, t->type);
if (sz < 0) {
pr_warn("map '%s': can't determine value size for type [%u]: %zd.\n",
- map_name, t->type, (ssize_t)sz);
+ map->name, t->type, (ssize_t)sz);
return sz;
}
pr_debug("map '%s': found value [%u], sz = %zd.\n",
- map_name, t->type, (ssize_t)sz);
+ map->name, t->type, (ssize_t)sz);
if (map->def.value_size && map->def.value_size != sz) {
pr_warn("map '%s': conflicting value size %u != %zd.\n",
- map_name, map->def.value_size, (ssize_t)sz);
+ map->name, map->def.value_size, (ssize_t)sz);
return -EINVAL;
}
map->def.value_size = sz;
map->btf_value_type_id = t->type;
+ }
+ else if (strcmp(name, "values") == 0) {
+ int err;
+
+ if (is_inner) {
+ pr_warn("map '%s': multi-level inner maps not supported.\n",
+ map->name);
+ return -ENOTSUP;
+ }
+ if (i != vlen - 1) {
+ pr_warn("map '%s': '%s' member should be last.\n",
+ map->name, name);
+ return -EINVAL;
+ }
+ if (!bpf_map_type__is_map_in_map(map->def.type)) {
+ pr_warn("map '%s': should be map-in-map.\n",
+ map->name);
+ return -ENOTSUP;
+ }
+ if (map->def.value_size && map->def.value_size != 4) {
+ pr_warn("map '%s': conflicting value size %u != 4.\n",
+ map->name, map->def.value_size);
+ return -EINVAL;
+ }
+ map->def.value_size = 4;
+ t = btf__type_by_id(obj->btf, m->type);
+ if (!t) {
+ pr_warn("map '%s': map-in-map inner type [%d] not found.\n",
+ map->name, m->type);
+ return -EINVAL;
+ }
+ if (!btf_is_array(t) || btf_array(t)->nelems) {
+ pr_warn("map '%s': map-in-map inner spec is not a zero-sized array.\n",
+ map->name);
+ return -EINVAL;
+ }
+ t = skip_mods_and_typedefs(obj->btf, btf_array(t)->type,
+ NULL);
+ if (!btf_is_ptr(t)) {
+ pr_warn("map '%s': map-in-map inner def is of unexpected kind %u.\n",
+ map->name, btf_kind(t));
+ return -EINVAL;
+ }
+ t = skip_mods_and_typedefs(obj->btf, t->type, NULL);
+ if (!btf_is_struct(t)) {
+ pr_warn("map '%s': map-in-map inner def is of unexpected kind %u.\n",
+ map->name, btf_kind(t));
+ return -EINVAL;
+ }
+
+ map->inner_map = calloc(1, sizeof(*map->inner_map));
+ if (!map->inner_map)
+ return -ENOMEM;
+ map->inner_map->sec_idx = obj->efile.btf_maps_shndx;
+ map->inner_map->name = malloc(strlen(map->name) +
+ sizeof(".inner") + 1);
+ if (!map->inner_map->name)
+ return -ENOMEM;
+ sprintf(map->inner_map->name, "%s.inner", map->name);
+
+ err = parse_btf_map_def(obj, map->inner_map, t, strict,
+ true /* is_inner */, NULL);
+ if (err)
+ return err;
} else if (strcmp(name, "pinning") == 0) {
__u32 val;
int err;
- if (!get_map_field_int(map_name, obj->btf, def, m,
- &val))
+ if (is_inner) {
+ pr_debug("map '%s': inner def can't be pinned.\n",
+ map->name);
+ return -EINVAL;
+ }
+ if (!get_map_field_int(map->name, obj->btf, m, &val))
return -EINVAL;
pr_debug("map '%s': found pinning = %u.\n",
- map_name, val);
+ map->name, val);
if (val != LIBBPF_PIN_NONE &&
val != LIBBPF_PIN_BY_NAME) {
pr_warn("map '%s': invalid pinning value %u.\n",
- map_name, val);
+ map->name, val);
return -EINVAL;
}
if (val == LIBBPF_PIN_BY_NAME) {
err = build_map_pin_path(map, pin_root_path);
if (err) {
pr_warn("map '%s': couldn't build pin path.\n",
- map_name);
+ map->name);
return err;
}
}
} else {
if (strict) {
pr_warn("map '%s': unknown field '%s'.\n",
- map_name, name);
+ map->name, name);
return -ENOTSUP;
}
pr_debug("map '%s': ignoring unknown field '%s'.\n",
- map_name, name);
+ map->name, name);
}
}
if (map->def.type == BPF_MAP_TYPE_UNSPEC) {
- pr_warn("map '%s': map type isn't specified.\n", map_name);
+ pr_warn("map '%s': map type isn't specified.\n", map->name);
return -EINVAL;
}
return 0;
}
+static int bpf_object__init_user_btf_map(struct bpf_object *obj,
+ const struct btf_type *sec,
+ int var_idx, int sec_idx,
+ const Elf_Data *data, bool strict,
+ const char *pin_root_path)
+{
+ const struct btf_type *var, *def;
+ const struct btf_var_secinfo *vi;
+ const struct btf_var *var_extra;
+ const char *map_name;
+ struct bpf_map *map;
+
+ vi = btf_var_secinfos(sec) + var_idx;
+ var = btf__type_by_id(obj->btf, vi->type);
+ var_extra = btf_var(var);
+ map_name = btf__name_by_offset(obj->btf, var->name_off);
+
+ if (map_name == NULL || map_name[0] == '\0') {
+ pr_warn("map #%d: empty name.\n", var_idx);
+ return -EINVAL;
+ }
+ if ((__u64)vi->offset + vi->size > data->d_size) {
+ pr_warn("map '%s' BTF data is corrupted.\n", map_name);
+ return -EINVAL;
+ }
+ if (!btf_is_var(var)) {
+ pr_warn("map '%s': unexpected var kind %u.\n",
+ map_name, btf_kind(var));
+ return -EINVAL;
+ }
+ if (var_extra->linkage != BTF_VAR_GLOBAL_ALLOCATED &&
+ var_extra->linkage != BTF_VAR_STATIC) {
+ pr_warn("map '%s': unsupported var linkage %u.\n",
+ map_name, var_extra->linkage);
+ return -EOPNOTSUPP;
+ }
+
+ def = skip_mods_and_typedefs(obj->btf, var->type, NULL);
+ if (!btf_is_struct(def)) {
+ pr_warn("map '%s': unexpected def kind %u.\n",
+ map_name, btf_kind(var));
+ return -EINVAL;
+ }
+ if (def->size > vi->size) {
+ pr_warn("map '%s': invalid def size.\n", map_name);
+ return -EINVAL;
+ }
+
+ map = bpf_object__add_map(obj);
+ if (IS_ERR(map))
+ return PTR_ERR(map);
+ map->name = strdup(map_name);
+ if (!map->name) {
+ pr_warn("map '%s': failed to alloc map name.\n", map_name);
+ return -ENOMEM;
+ }
+ map->libbpf_type = LIBBPF_MAP_UNSPEC;
+ map->def.type = BPF_MAP_TYPE_UNSPEC;
+ map->sec_idx = sec_idx;
+ map->sec_offset = vi->offset;
+ map->btf_var_idx = var_idx;
+ pr_debug("map '%s': at sec_idx %d, offset %zu.\n",
+ map_name, map->sec_idx, map->sec_offset);
+
+ return parse_btf_map_def(obj, map, def, strict, false, pin_root_path);
+}
+
static int bpf_object__init_user_btf_maps(struct bpf_object *obj, bool strict,
const char *pin_root_path)
{
@@ -2148,6 +2249,7 @@ static int bpf_object__init_user_btf_maps(struct bpf_object *obj, bool strict,
name = btf__name_by_offset(obj->btf, t->name_off);
if (strcmp(name, MAPS_ELF_SEC) == 0) {
sec = t;
+ obj->efile.btf_maps_sec_btf_id = i;
break;
}
}
@@ -2284,11 +2386,16 @@ static void bpf_object__sanitize_btf_ext(struct bpf_object *obj)
}
}
-static bool bpf_object__is_btf_mandatory(const struct bpf_object *obj)
+static bool libbpf_needs_btf(const struct bpf_object *obj)
{
return obj->efile.btf_maps_shndx >= 0 ||
- obj->efile.st_ops_shndx >= 0 ||
- obj->nr_extern > 0;
+ obj->efile.st_ops_shndx >= 0 ||
+ obj->nr_extern > 0;
+}
+
+static bool kernel_needs_btf(const struct bpf_object *obj)
+{
+ return obj->efile.st_ops_shndx >= 0;
}
static int bpf_object__init_btf(struct bpf_object *obj,
@@ -2324,7 +2431,7 @@ static int bpf_object__init_btf(struct bpf_object *obj,
}
}
out:
- if (err && bpf_object__is_btf_mandatory(obj)) {
+ if (err && libbpf_needs_btf(obj)) {
pr_warn("BTF is required, but is missing or corrupted.\n");
return err;
}
@@ -2348,7 +2455,7 @@ static int bpf_object__finalize_btf(struct bpf_object *obj)
btf_ext__free(obj->btf_ext);
obj->btf_ext = NULL;
- if (bpf_object__is_btf_mandatory(obj)) {
+ if (libbpf_needs_btf(obj)) {
pr_warn("BTF is required, but is missing or corrupted.\n");
return -ENOENT;
}
@@ -2357,7 +2464,8 @@ static int bpf_object__finalize_btf(struct bpf_object *obj)
static inline bool libbpf_prog_needs_vmlinux_btf(struct bpf_program *prog)
{
- if (prog->type == BPF_PROG_TYPE_STRUCT_OPS)
+ if (prog->type == BPF_PROG_TYPE_STRUCT_OPS ||
+ prog->type == BPF_PROG_TYPE_LSM)
return true;
/* BPF_PROG_TYPE_TRACING programs which do not attach to other programs
@@ -2412,7 +2520,7 @@ static int bpf_object__sanitize_and_load_btf(struct bpf_object *obj)
obj->btf_ext = NULL;
}
- if (bpf_object__is_btf_mandatory(obj))
+ if (kernel_needs_btf(obj))
return err;
}
return 0;
@@ -2528,7 +2636,8 @@ static int bpf_object__elf_collect(struct bpf_object *obj)
/* Only do relo for section with exec instructions */
if (!section_have_execinstr(obj, sec) &&
- strcmp(name, ".rel" STRUCT_OPS_SEC)) {
+ strcmp(name, ".rel" STRUCT_OPS_SEC) &&
+ strcmp(name, ".rel" MAPS_ELF_SEC)) {
pr_debug("skip relo %s(%d) for section(%d)\n",
name, idx, sec);
continue;
@@ -3128,7 +3237,7 @@ int bpf_map__resize(struct bpf_map *map, __u32 max_entries)
}
static int
-bpf_object__probe_name(struct bpf_object *obj)
+bpf_object__probe_loading(struct bpf_object *obj)
{
struct bpf_load_program_attr attr;
char *cp, errmsg[STRERR_BUFSIZE];
@@ -3148,15 +3257,36 @@ bpf_object__probe_name(struct bpf_object *obj)
ret = bpf_load_program_xattr(&attr, NULL, 0);
if (ret < 0) {
- cp = libbpf_strerror_r(errno, errmsg, sizeof(errmsg));
- pr_warn("Error in %s():%s(%d). Couldn't load basic 'r0 = 0' BPF program.\n",
- __func__, cp, errno);
- return -errno;
+ ret = errno;
+ cp = libbpf_strerror_r(ret, errmsg, sizeof(errmsg));
+ pr_warn("Error in %s():%s(%d). Couldn't load trivial BPF "
+ "program. Make sure your kernel supports BPF "
+ "(CONFIG_BPF_SYSCALL=y) and/or that RLIMIT_MEMLOCK is "
+ "set to big enough value.\n", __func__, cp, ret);
+ return -ret;
}
close(ret);
- /* now try the same program, but with the name */
+ return 0;
+}
+
+static int
+bpf_object__probe_name(struct bpf_object *obj)
+{
+ struct bpf_load_program_attr attr;
+ struct bpf_insn insns[] = {
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ };
+ int ret;
+
+ /* make sure loading with name works */
+ memset(&attr, 0, sizeof(attr));
+ attr.prog_type = BPF_PROG_TYPE_SOCKET_FILTER;
+ attr.insns = insns;
+ attr.insns_cnt = ARRAY_SIZE(insns);
+ attr.license = "GPL";
attr.name = "test";
ret = bpf_load_program_xattr(&attr, NULL, 0);
if (ret >= 0) {
@@ -3314,6 +3444,37 @@ static int bpf_object__probe_array_mmap(struct bpf_object *obj)
}
static int
+bpf_object__probe_exp_attach_type(struct bpf_object *obj)
+{
+ struct bpf_load_program_attr attr;
+ struct bpf_insn insns[] = {
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ };
+ int fd;
+
+ memset(&attr, 0, sizeof(attr));
+ /* use any valid combination of program type and (optional)
+ * non-zero expected attach type (i.e., not a BPF_CGROUP_INET_INGRESS)
+ * to see if kernel supports expected_attach_type field for
+ * BPF_PROG_LOAD command
+ */
+ attr.prog_type = BPF_PROG_TYPE_CGROUP_SOCK;
+ attr.expected_attach_type = BPF_CGROUP_INET_SOCK_CREATE;
+ attr.insns = insns;
+ attr.insns_cnt = ARRAY_SIZE(insns);
+ attr.license = "GPL";
+
+ fd = bpf_load_program_xattr(&attr, NULL, 0);
+ if (fd >= 0) {
+ obj->caps.exp_attach_type = 1;
+ close(fd);
+ return 1;
+ }
+ return 0;
+}
+
+static int
bpf_object__probe_caps(struct bpf_object *obj)
{
int (*probe_fn[])(struct bpf_object *obj) = {
@@ -3323,6 +3484,7 @@ bpf_object__probe_caps(struct bpf_object *obj)
bpf_object__probe_btf_func_global,
bpf_object__probe_btf_datasec,
bpf_object__probe_array_mmap,
+ bpf_object__probe_exp_attach_type,
};
int i, ret;
@@ -3429,124 +3591,181 @@ bpf_object__populate_internal_map(struct bpf_object *obj, struct bpf_map *map)
return 0;
}
+static void bpf_map__destroy(struct bpf_map *map);
+
+static int bpf_object__create_map(struct bpf_object *obj, struct bpf_map *map)
+{
+ struct bpf_create_map_attr create_attr;
+ struct bpf_map_def *def = &map->def;
+
+ memset(&create_attr, 0, sizeof(create_attr));
+
+ if (obj->caps.name)
+ create_attr.name = map->name;
+ create_attr.map_ifindex = map->map_ifindex;
+ create_attr.map_type = def->type;
+ create_attr.map_flags = def->map_flags;
+ create_attr.key_size = def->key_size;
+ create_attr.value_size = def->value_size;
+
+ if (def->type == BPF_MAP_TYPE_PERF_EVENT_ARRAY && !def->max_entries) {
+ int nr_cpus;
+
+ nr_cpus = libbpf_num_possible_cpus();
+ if (nr_cpus < 0) {
+ pr_warn("map '%s': failed to determine number of system CPUs: %d\n",
+ map->name, nr_cpus);
+ return nr_cpus;
+ }
+ pr_debug("map '%s': setting size to %d\n", map->name, nr_cpus);
+ create_attr.max_entries = nr_cpus;
+ } else {
+ create_attr.max_entries = def->max_entries;
+ }
+
+ if (bpf_map__is_struct_ops(map))
+ create_attr.btf_vmlinux_value_type_id =
+ map->btf_vmlinux_value_type_id;
+
+ create_attr.btf_fd = 0;
+ create_attr.btf_key_type_id = 0;
+ create_attr.btf_value_type_id = 0;
+ if (obj->btf && !bpf_map_find_btf_info(obj, map)) {
+ create_attr.btf_fd = btf__fd(obj->btf);
+ create_attr.btf_key_type_id = map->btf_key_type_id;
+ create_attr.btf_value_type_id = map->btf_value_type_id;
+ }
+
+ if (bpf_map_type__is_map_in_map(def->type)) {
+ if (map->inner_map) {
+ int err;
+
+ err = bpf_object__create_map(obj, map->inner_map);
+ if (err) {
+ pr_warn("map '%s': failed to create inner map: %d\n",
+ map->name, err);
+ return err;
+ }
+ map->inner_map_fd = bpf_map__fd(map->inner_map);
+ }
+ if (map->inner_map_fd >= 0)
+ create_attr.inner_map_fd = map->inner_map_fd;
+ }
+
+ map->fd = bpf_create_map_xattr(&create_attr);
+ if (map->fd < 0 && (create_attr.btf_key_type_id ||
+ create_attr.btf_value_type_id)) {
+ char *cp, errmsg[STRERR_BUFSIZE];
+ int err = -errno;
+
+ cp = libbpf_strerror_r(err, errmsg, sizeof(errmsg));
+ pr_warn("Error in bpf_create_map_xattr(%s):%s(%d). Retrying without BTF.\n",
+ map->name, cp, err);
+ create_attr.btf_fd = 0;
+ create_attr.btf_key_type_id = 0;
+ create_attr.btf_value_type_id = 0;
+ map->btf_key_type_id = 0;
+ map->btf_value_type_id = 0;
+ map->fd = bpf_create_map_xattr(&create_attr);
+ }
+
+ if (map->fd < 0)
+ return -errno;
+
+ if (bpf_map_type__is_map_in_map(def->type) && map->inner_map) {
+ bpf_map__destroy(map->inner_map);
+ zfree(&map->inner_map);
+ }
+
+ return 0;
+}
+
static int
bpf_object__create_maps(struct bpf_object *obj)
{
- struct bpf_create_map_attr create_attr = {};
- int nr_cpus = 0;
- unsigned int i;
+ struct bpf_map *map;
+ char *cp, errmsg[STRERR_BUFSIZE];
+ unsigned int i, j;
int err;
for (i = 0; i < obj->nr_maps; i++) {
- struct bpf_map *map = &obj->maps[i];
- struct bpf_map_def *def = &map->def;
- char *cp, errmsg[STRERR_BUFSIZE];
- int *pfd = &map->fd;
+ map = &obj->maps[i];
if (map->pin_path) {
err = bpf_object__reuse_map(map);
if (err) {
- pr_warn("error reusing pinned map %s\n",
+ pr_warn("map '%s': error reusing pinned map\n",
map->name);
- return err;
+ goto err_out;
}
}
if (map->fd >= 0) {
- pr_debug("skip map create (preset) %s: fd=%d\n",
+ pr_debug("map '%s': skipping creation (preset fd=%d)\n",
map->name, map->fd);
continue;
}
- if (obj->caps.name)
- create_attr.name = map->name;
- create_attr.map_ifindex = map->map_ifindex;
- create_attr.map_type = def->type;
- create_attr.map_flags = def->map_flags;
- create_attr.key_size = def->key_size;
- create_attr.value_size = def->value_size;
- if (def->type == BPF_MAP_TYPE_PERF_EVENT_ARRAY &&
- !def->max_entries) {
- if (!nr_cpus)
- nr_cpus = libbpf_num_possible_cpus();
- if (nr_cpus < 0) {
- pr_warn("failed to determine number of system CPUs: %d\n",
- nr_cpus);
- err = nr_cpus;
- goto err_out;
- }
- pr_debug("map '%s': setting size to %d\n",
- map->name, nr_cpus);
- create_attr.max_entries = nr_cpus;
- } else {
- create_attr.max_entries = def->max_entries;
- }
- create_attr.btf_fd = 0;
- create_attr.btf_key_type_id = 0;
- create_attr.btf_value_type_id = 0;
- if (bpf_map_type__is_map_in_map(def->type) &&
- map->inner_map_fd >= 0)
- create_attr.inner_map_fd = map->inner_map_fd;
- if (bpf_map__is_struct_ops(map))
- create_attr.btf_vmlinux_value_type_id =
- map->btf_vmlinux_value_type_id;
-
- if (obj->btf && !bpf_map_find_btf_info(obj, map)) {
- create_attr.btf_fd = btf__fd(obj->btf);
- create_attr.btf_key_type_id = map->btf_key_type_id;
- create_attr.btf_value_type_id = map->btf_value_type_id;
- }
-
- *pfd = bpf_create_map_xattr(&create_attr);
- if (*pfd < 0 && (create_attr.btf_key_type_id ||
- create_attr.btf_value_type_id)) {
- err = -errno;
- cp = libbpf_strerror_r(err, errmsg, sizeof(errmsg));
- pr_warn("Error in bpf_create_map_xattr(%s):%s(%d). Retrying without BTF.\n",
- map->name, cp, err);
- create_attr.btf_fd = 0;
- create_attr.btf_key_type_id = 0;
- create_attr.btf_value_type_id = 0;
- map->btf_key_type_id = 0;
- map->btf_value_type_id = 0;
- *pfd = bpf_create_map_xattr(&create_attr);
- }
+ err = bpf_object__create_map(obj, map);
+ if (err)
+ goto err_out;
- if (*pfd < 0) {
- size_t j;
-
- err = -errno;
-err_out:
- cp = libbpf_strerror_r(err, errmsg, sizeof(errmsg));
- pr_warn("failed to create map (name: '%s'): %s(%d)\n",
- map->name, cp, err);
- pr_perm_msg(err);
- for (j = 0; j < i; j++)
- zclose(obj->maps[j].fd);
- return err;
- }
+ pr_debug("map '%s': created successfully, fd=%d\n", map->name,
+ map->fd);
if (bpf_map__is_internal(map)) {
err = bpf_object__populate_internal_map(obj, map);
if (err < 0) {
- zclose(*pfd);
+ zclose(map->fd);
goto err_out;
}
}
+ if (map->init_slots_sz) {
+ for (j = 0; j < map->init_slots_sz; j++) {
+ const struct bpf_map *targ_map;
+ int fd;
+
+ if (!map->init_slots[j])
+ continue;
+
+ targ_map = map->init_slots[j];
+ fd = bpf_map__fd(targ_map);
+ err = bpf_map_update_elem(map->fd, &j, &fd, 0);
+ if (err) {
+ err = -errno;
+ pr_warn("map '%s': failed to initialize slot [%d] to map '%s' fd=%d: %d\n",
+ map->name, j, targ_map->name,
+ fd, err);
+ goto err_out;
+ }
+ pr_debug("map '%s': slot [%d] set to map '%s' fd=%d\n",
+ map->name, j, targ_map->name, fd);
+ }
+ zfree(&map->init_slots);
+ map->init_slots_sz = 0;
+ }
+
if (map->pin_path && !map->pinned) {
err = bpf_map__pin(map, NULL);
if (err) {
- pr_warn("failed to auto-pin map name '%s' at '%s'\n",
- map->name, map->pin_path);
- return err;
+ pr_warn("map '%s': failed to auto-pin at '%s': %d\n",
+ map->name, map->pin_path, err);
+ zclose(map->fd);
+ goto err_out;
}
}
-
- pr_debug("created map %s: fd=%d\n", map->name, *pfd);
}
return 0;
+
+err_out:
+ cp = libbpf_strerror_r(err, errmsg, sizeof(errmsg));
+ pr_warn("map '%s': failed to create: %s(%d)\n", map->name, cp, err);
+ pr_perm_msg(err);
+ for (j = 0; j < i; j++)
+ zclose(obj->maps[j].fd);
+ return err;
}
static int
@@ -3868,6 +4087,10 @@ static struct ids_vec *bpf_core_find_cands(const struct btf *local_btf,
if (str_is_empty(targ_name))
continue;
+ t = skip_mods_and_typedefs(targ_btf, i, NULL);
+ if (!btf_is_composite(t) && !btf_is_array(t))
+ continue;
+
targ_essent_len = bpf_core_essential_name_len(targ_name);
if (targ_essent_len != local_essent_len)
continue;
@@ -4794,9 +5017,118 @@ bpf_object__relocate(struct bpf_object *obj, const char *targ_btf_path)
return 0;
}
-static int bpf_object__collect_struct_ops_map_reloc(struct bpf_object *obj,
- GElf_Shdr *shdr,
- Elf_Data *data);
+static int bpf_object__collect_st_ops_relos(struct bpf_object *obj,
+ GElf_Shdr *shdr, Elf_Data *data);
+
+static int bpf_object__collect_map_relos(struct bpf_object *obj,
+ GElf_Shdr *shdr, Elf_Data *data)
+{
+ int i, j, nrels, new_sz, ptr_sz = sizeof(void *);
+ const struct btf_var_secinfo *vi = NULL;
+ const struct btf_type *sec, *var, *def;
+ const struct btf_member *member;
+ struct bpf_map *map, *targ_map;
+ const char *name, *mname;
+ Elf_Data *symbols;
+ unsigned int moff;
+ GElf_Sym sym;
+ GElf_Rel rel;
+ void *tmp;
+
+ if (!obj->efile.btf_maps_sec_btf_id || !obj->btf)
+ return -EINVAL;
+ sec = btf__type_by_id(obj->btf, obj->efile.btf_maps_sec_btf_id);
+ if (!sec)
+ return -EINVAL;
+
+ symbols = obj->efile.symbols;
+ nrels = shdr->sh_size / shdr->sh_entsize;
+ for (i = 0; i < nrels; i++) {
+ if (!gelf_getrel(data, i, &rel)) {
+ pr_warn(".maps relo #%d: failed to get ELF relo\n", i);
+ return -LIBBPF_ERRNO__FORMAT;
+ }
+ if (!gelf_getsym(symbols, GELF_R_SYM(rel.r_info), &sym)) {
+ pr_warn(".maps relo #%d: symbol %zx not found\n",
+ i, (size_t)GELF_R_SYM(rel.r_info));
+ return -LIBBPF_ERRNO__FORMAT;
+ }
+ name = elf_strptr(obj->efile.elf, obj->efile.strtabidx,
+ sym.st_name) ? : "<?>";
+ if (sym.st_shndx != obj->efile.btf_maps_shndx) {
+ pr_warn(".maps relo #%d: '%s' isn't a BTF-defined map\n",
+ i, name);
+ return -LIBBPF_ERRNO__RELOC;
+ }
+
+ pr_debug(".maps relo #%d: for %zd value %zd rel.r_offset %zu name %d ('%s')\n",
+ i, (ssize_t)(rel.r_info >> 32), (size_t)sym.st_value,
+ (size_t)rel.r_offset, sym.st_name, name);
+
+ for (j = 0; j < obj->nr_maps; j++) {
+ map = &obj->maps[j];
+ if (map->sec_idx != obj->efile.btf_maps_shndx)
+ continue;
+
+ vi = btf_var_secinfos(sec) + map->btf_var_idx;
+ if (vi->offset <= rel.r_offset &&
+ rel.r_offset + sizeof(void *) <= vi->offset + vi->size)
+ break;
+ }
+ if (j == obj->nr_maps) {
+ pr_warn(".maps relo #%d: cannot find map '%s' at rel.r_offset %zu\n",
+ i, name, (size_t)rel.r_offset);
+ return -EINVAL;
+ }
+
+ if (!bpf_map_type__is_map_in_map(map->def.type))
+ return -EINVAL;
+ if (map->def.type == BPF_MAP_TYPE_HASH_OF_MAPS &&
+ map->def.key_size != sizeof(int)) {
+ pr_warn(".maps relo #%d: hash-of-maps '%s' should have key size %zu.\n",
+ i, map->name, sizeof(int));
+ return -EINVAL;
+ }
+
+ targ_map = bpf_object__find_map_by_name(obj, name);
+ if (!targ_map)
+ return -ESRCH;
+
+ var = btf__type_by_id(obj->btf, vi->type);
+ def = skip_mods_and_typedefs(obj->btf, var->type, NULL);
+ if (btf_vlen(def) == 0)
+ return -EINVAL;
+ member = btf_members(def) + btf_vlen(def) - 1;
+ mname = btf__name_by_offset(obj->btf, member->name_off);
+ if (strcmp(mname, "values"))
+ return -EINVAL;
+
+ moff = btf_member_bit_offset(def, btf_vlen(def) - 1) / 8;
+ if (rel.r_offset - vi->offset < moff)
+ return -EINVAL;
+
+ moff = rel.r_offset - vi->offset - moff;
+ if (moff % ptr_sz)
+ return -EINVAL;
+ moff /= ptr_sz;
+ if (moff >= map->init_slots_sz) {
+ new_sz = moff + 1;
+ tmp = realloc(map->init_slots, new_sz * ptr_sz);
+ if (!tmp)
+ return -ENOMEM;
+ map->init_slots = tmp;
+ memset(map->init_slots + map->init_slots_sz, 0,
+ (new_sz - map->init_slots_sz) * ptr_sz);
+ map->init_slots_sz = new_sz;
+ }
+ map->init_slots[moff] = targ_map;
+
+ pr_debug(".maps relo #%d: map '%s' slot [%d] points to map '%s'\n",
+ i, map->name, moff, name);
+ }
+
+ return 0;
+}
static int bpf_object__collect_reloc(struct bpf_object *obj)
{
@@ -4819,21 +5151,17 @@ static int bpf_object__collect_reloc(struct bpf_object *obj)
}
if (idx == obj->efile.st_ops_shndx) {
- err = bpf_object__collect_struct_ops_map_reloc(obj,
- shdr,
- data);
- if (err)
- return err;
- continue;
- }
-
- prog = bpf_object__find_prog_by_idx(obj, idx);
- if (!prog) {
- pr_warn("relocation failed: no section(%d)\n", idx);
- return -LIBBPF_ERRNO__RELOC;
+ err = bpf_object__collect_st_ops_relos(obj, shdr, data);
+ } else if (idx == obj->efile.btf_maps_shndx) {
+ err = bpf_object__collect_map_relos(obj, shdr, data);
+ } else {
+ prog = bpf_object__find_prog_by_idx(obj, idx);
+ if (!prog) {
+ pr_warn("relocation failed: no prog in section(%d)\n", idx);
+ return -LIBBPF_ERRNO__RELOC;
+ }
+ err = bpf_program__collect_reloc(prog, shdr, data, obj);
}
-
- err = bpf_program__collect_reloc(prog, shdr, data, obj);
if (err)
return err;
}
@@ -4846,8 +5174,8 @@ load_program(struct bpf_program *prog, struct bpf_insn *insns, int insns_cnt,
{
struct bpf_load_program_attr load_attr;
char *cp, errmsg[STRERR_BUFSIZE];
- int log_buf_size = BPF_LOG_BUF_SIZE;
- char *log_buf;
+ size_t log_buf_size = 0;
+ char *log_buf = NULL;
int btf_fd, ret;
if (!insns || !insns_cnt)
@@ -4855,13 +5183,19 @@ load_program(struct bpf_program *prog, struct bpf_insn *insns, int insns_cnt,
memset(&load_attr, 0, sizeof(struct bpf_load_program_attr));
load_attr.prog_type = prog->type;
- load_attr.expected_attach_type = prog->expected_attach_type;
+ /* old kernels might not support specifying expected_attach_type */
+ if (!prog->caps->exp_attach_type && prog->sec_def &&
+ prog->sec_def->is_exp_attach_type_optional)
+ load_attr.expected_attach_type = 0;
+ else
+ load_attr.expected_attach_type = prog->expected_attach_type;
if (prog->caps->name)
load_attr.name = prog->name;
load_attr.insns = insns;
load_attr.insns_cnt = insns_cnt;
load_attr.license = license;
- if (prog->type == BPF_PROG_TYPE_STRUCT_OPS) {
+ if (prog->type == BPF_PROG_TYPE_STRUCT_OPS ||
+ prog->type == BPF_PROG_TYPE_LSM) {
load_attr.attach_btf_id = prog->attach_btf_id;
} else if (prog->type == BPF_PROG_TYPE_TRACING ||
prog->type == BPF_PROG_TYPE_EXT) {
@@ -4887,22 +5221,28 @@ load_program(struct bpf_program *prog, struct bpf_insn *insns, int insns_cnt,
load_attr.prog_flags = prog->prog_flags;
retry_load:
- log_buf = malloc(log_buf_size);
- if (!log_buf)
- pr_warn("Alloc log buffer for bpf loader error, continue without log\n");
+ if (log_buf_size) {
+ log_buf = malloc(log_buf_size);
+ if (!log_buf)
+ return -ENOMEM;
+
+ *log_buf = 0;
+ }
ret = bpf_load_program_xattr(&load_attr, log_buf, log_buf_size);
if (ret >= 0) {
- if (load_attr.log_level)
+ if (log_buf && load_attr.log_level)
pr_debug("verifier log:\n%s", log_buf);
*pfd = ret;
ret = 0;
goto out;
}
- if (errno == ENOSPC) {
- log_buf_size <<= 1;
+ if (!log_buf || errno == ENOSPC) {
+ log_buf_size = max((size_t)BPF_LOG_BUF_SIZE,
+ log_buf_size << 1);
+
free(log_buf);
goto retry_load;
}
@@ -4945,8 +5285,9 @@ int bpf_program__load(struct bpf_program *prog, char *license, __u32 kern_ver)
{
int err = 0, fd, i, btf_id;
- if (prog->type == BPF_PROG_TYPE_TRACING ||
- prog->type == BPF_PROG_TYPE_EXT) {
+ if ((prog->type == BPF_PROG_TYPE_TRACING ||
+ prog->type == BPF_PROG_TYPE_LSM ||
+ prog->type == BPF_PROG_TYPE_EXT) && !prog->attach_btf_id) {
btf_id = libbpf_find_attach_btf_id(prog);
if (btf_id <= 0)
return btf_id;
@@ -5048,6 +5389,8 @@ bpf_object__load_progs(struct bpf_object *obj, int log_level)
return 0;
}
+static const struct bpf_sec_def *find_sec_def(const char *sec_name);
+
static struct bpf_object *
__bpf_object__open(const char *path, const void *obj_buf, size_t obj_buf_sz,
const struct bpf_object_open_opts *opts)
@@ -5103,24 +5446,17 @@ __bpf_object__open(const char *path, const void *obj_buf, size_t obj_buf_sz,
bpf_object__elf_finish(obj);
bpf_object__for_each_program(prog, obj) {
- enum bpf_prog_type prog_type;
- enum bpf_attach_type attach_type;
-
- if (prog->type != BPF_PROG_TYPE_UNSPEC)
- continue;
-
- err = libbpf_prog_type_by_name(prog->section_name, &prog_type,
- &attach_type);
- if (err == -ESRCH)
+ prog->sec_def = find_sec_def(prog->section_name);
+ if (!prog->sec_def)
/* couldn't guess, but user might manually specify */
continue;
- if (err)
- goto out;
- bpf_program__set_type(prog, prog_type);
- bpf_program__set_expected_attach_type(prog, attach_type);
- if (prog_type == BPF_PROG_TYPE_TRACING ||
- prog_type == BPF_PROG_TYPE_EXT)
+ bpf_program__set_type(prog, prog->sec_def->prog_type);
+ bpf_program__set_expected_attach_type(prog,
+ prog->sec_def->expected_attach_type);
+
+ if (prog->sec_def->prog_type == BPF_PROG_TYPE_TRACING ||
+ prog->sec_def->prog_type == BPF_PROG_TYPE_EXT)
prog->attach_prog_fd = OPTS_GET(opts, attach_prog_fd, 0);
}
@@ -5321,7 +5657,8 @@ int bpf_object__load_xattr(struct bpf_object_load_attr *attr)
obj->loaded = true;
- err = bpf_object__probe_caps(obj);
+ err = bpf_object__probe_loading(obj);
+ err = err ? : bpf_object__probe_caps(obj);
err = err ? : bpf_object__resolve_externs(obj, obj->kconfig);
err = err ? : bpf_object__sanitize_and_load_btf(obj);
err = err ? : bpf_object__sanitize_maps(obj);
@@ -5890,6 +6227,40 @@ int bpf_object__pin(struct bpf_object *obj, const char *path)
return 0;
}
+static void bpf_map__destroy(struct bpf_map *map)
+{
+ if (map->clear_priv)
+ map->clear_priv(map, map->priv);
+ map->priv = NULL;
+ map->clear_priv = NULL;
+
+ if (map->inner_map) {
+ bpf_map__destroy(map->inner_map);
+ zfree(&map->inner_map);
+ }
+
+ zfree(&map->init_slots);
+ map->init_slots_sz = 0;
+
+ if (map->mmaped) {
+ munmap(map->mmaped, bpf_map_mmap_sz(map));
+ map->mmaped = NULL;
+ }
+
+ if (map->st_ops) {
+ zfree(&map->st_ops->data);
+ zfree(&map->st_ops->progs);
+ zfree(&map->st_ops->kern_func_off);
+ zfree(&map->st_ops);
+ }
+
+ zfree(&map->name);
+ zfree(&map->pin_path);
+
+ if (map->fd >= 0)
+ zclose(map->fd);
+}
+
void bpf_object__close(struct bpf_object *obj)
{
size_t i;
@@ -5905,29 +6276,8 @@ void bpf_object__close(struct bpf_object *obj)
btf__free(obj->btf);
btf_ext__free(obj->btf_ext);
- for (i = 0; i < obj->nr_maps; i++) {
- struct bpf_map *map = &obj->maps[i];
-
- if (map->clear_priv)
- map->clear_priv(map, map->priv);
- map->priv = NULL;
- map->clear_priv = NULL;
-
- if (map->mmaped) {
- munmap(map->mmaped, bpf_map_mmap_sz(map));
- map->mmaped = NULL;
- }
-
- if (map->st_ops) {
- zfree(&map->st_ops->data);
- zfree(&map->st_ops->progs);
- zfree(&map->st_ops->kern_func_off);
- zfree(&map->st_ops);
- }
-
- zfree(&map->name);
- zfree(&map->pin_path);
- }
+ for (i = 0; i < obj->nr_maps; i++)
+ bpf_map__destroy(&obj->maps[i]);
zfree(&obj->kconfig);
zfree(&obj->externs);
@@ -6185,6 +6535,7 @@ bool bpf_program__is_##NAME(const struct bpf_program *prog) \
} \
BPF_PROG_TYPE_FNS(socket_filter, BPF_PROG_TYPE_SOCKET_FILTER);
+BPF_PROG_TYPE_FNS(lsm, BPF_PROG_TYPE_LSM);
BPF_PROG_TYPE_FNS(kprobe, BPF_PROG_TYPE_KPROBE);
BPF_PROG_TYPE_FNS(sched_cls, BPF_PROG_TYPE_SCHED_CLS);
BPF_PROG_TYPE_FNS(sched_act, BPF_PROG_TYPE_SCHED_ACT);
@@ -6208,23 +6559,32 @@ void bpf_program__set_expected_attach_type(struct bpf_program *prog,
prog->expected_attach_type = type;
}
-#define BPF_PROG_SEC_IMPL(string, ptype, eatype, is_attachable, btf, atype) \
- { string, sizeof(string) - 1, ptype, eatype, is_attachable, btf, atype }
+#define BPF_PROG_SEC_IMPL(string, ptype, eatype, eatype_optional, \
+ attachable, attach_btf) \
+ { \
+ .sec = string, \
+ .len = sizeof(string) - 1, \
+ .prog_type = ptype, \
+ .expected_attach_type = eatype, \
+ .is_exp_attach_type_optional = eatype_optional, \
+ .is_attachable = attachable, \
+ .is_attach_btf = attach_btf, \
+ }
/* Programs that can NOT be attached. */
#define BPF_PROG_SEC(string, ptype) BPF_PROG_SEC_IMPL(string, ptype, 0, 0, 0, 0)
/* Programs that can be attached. */
#define BPF_APROG_SEC(string, ptype, atype) \
- BPF_PROG_SEC_IMPL(string, ptype, 0, 1, 0, atype)
+ BPF_PROG_SEC_IMPL(string, ptype, atype, true, 1, 0)
/* Programs that must specify expected attach type at load time. */
#define BPF_EAPROG_SEC(string, ptype, eatype) \
- BPF_PROG_SEC_IMPL(string, ptype, eatype, 1, 0, eatype)
+ BPF_PROG_SEC_IMPL(string, ptype, eatype, false, 1, 0)
/* Programs that use BTF to identify attach point */
#define BPF_PROG_BTF(string, ptype, eatype) \
- BPF_PROG_SEC_IMPL(string, ptype, eatype, 0, 1, 0)
+ BPF_PROG_SEC_IMPL(string, ptype, eatype, false, 0, 1)
/* Programs that can be attached but attach type can't be identified by section
* name. Kept for backward compatibility.
@@ -6238,11 +6598,6 @@ void bpf_program__set_expected_attach_type(struct bpf_program *prog,
__VA_ARGS__ \
}
-struct bpf_sec_def;
-
-typedef struct bpf_link *(*attach_fn_t)(const struct bpf_sec_def *sec,
- struct bpf_program *prog);
-
static struct bpf_link *attach_kprobe(const struct bpf_sec_def *sec,
struct bpf_program *prog);
static struct bpf_link *attach_tp(const struct bpf_sec_def *sec,
@@ -6251,17 +6606,10 @@ static struct bpf_link *attach_raw_tp(const struct bpf_sec_def *sec,
struct bpf_program *prog);
static struct bpf_link *attach_trace(const struct bpf_sec_def *sec,
struct bpf_program *prog);
-
-struct bpf_sec_def {
- const char *sec;
- size_t len;
- enum bpf_prog_type prog_type;
- enum bpf_attach_type expected_attach_type;
- bool is_attachable;
- bool is_attach_btf;
- enum bpf_attach_type attach_type;
- attach_fn_t attach_fn;
-};
+static struct bpf_link *attach_lsm(const struct bpf_sec_def *sec,
+ struct bpf_program *prog);
+static struct bpf_link *attach_iter(const struct bpf_sec_def *sec,
+ struct bpf_program *prog);
static const struct bpf_sec_def section_defs[] = {
BPF_PROG_SEC("socket", BPF_PROG_TYPE_SOCKET_FILTER),
@@ -6290,6 +6638,10 @@ static const struct bpf_sec_def section_defs[] = {
.expected_attach_type = BPF_TRACE_FENTRY,
.is_attach_btf = true,
.attach_fn = attach_trace),
+ SEC_DEF("fmod_ret/", TRACING,
+ .expected_attach_type = BPF_MODIFY_RETURN,
+ .is_attach_btf = true,
+ .attach_fn = attach_trace),
SEC_DEF("fexit/", TRACING,
.expected_attach_type = BPF_TRACE_FEXIT,
.is_attach_btf = true,
@@ -6297,6 +6649,16 @@ static const struct bpf_sec_def section_defs[] = {
SEC_DEF("freplace/", EXT,
.is_attach_btf = true,
.attach_fn = attach_trace),
+ SEC_DEF("lsm/", LSM,
+ .is_attach_btf = true,
+ .expected_attach_type = BPF_LSM_MAC,
+ .attach_fn = attach_lsm),
+ SEC_DEF("iter/", TRACING,
+ .expected_attach_type = BPF_TRACE_ITER,
+ .is_attach_btf = true,
+ .attach_fn = attach_iter),
+ BPF_EAPROG_SEC("xdp_devmap", BPF_PROG_TYPE_XDP,
+ BPF_XDP_DEVMAP),
BPF_PROG_SEC("xdp", BPF_PROG_TYPE_XDP),
BPF_PROG_SEC("perf_event", BPF_PROG_TYPE_PERF_EVENT),
BPF_PROG_SEC("lwt_in", BPF_PROG_TYPE_LWT_IN),
@@ -6345,6 +6707,14 @@ static const struct bpf_sec_def section_defs[] = {
BPF_CGROUP_UDP4_RECVMSG),
BPF_EAPROG_SEC("cgroup/recvmsg6", BPF_PROG_TYPE_CGROUP_SOCK_ADDR,
BPF_CGROUP_UDP6_RECVMSG),
+ BPF_EAPROG_SEC("cgroup/getpeername4", BPF_PROG_TYPE_CGROUP_SOCK_ADDR,
+ BPF_CGROUP_INET4_GETPEERNAME),
+ BPF_EAPROG_SEC("cgroup/getpeername6", BPF_PROG_TYPE_CGROUP_SOCK_ADDR,
+ BPF_CGROUP_INET6_GETPEERNAME),
+ BPF_EAPROG_SEC("cgroup/getsockname4", BPF_PROG_TYPE_CGROUP_SOCK_ADDR,
+ BPF_CGROUP_INET4_GETSOCKNAME),
+ BPF_EAPROG_SEC("cgroup/getsockname6", BPF_PROG_TYPE_CGROUP_SOCK_ADDR,
+ BPF_CGROUP_INET6_GETSOCKNAME),
BPF_EAPROG_SEC("cgroup/sysctl", BPF_PROG_TYPE_CGROUP_SYSCTL,
BPF_CGROUP_SYSCTL),
BPF_EAPROG_SEC("cgroup/getsockopt", BPF_PROG_TYPE_CGROUP_SOCKOPT,
@@ -6447,9 +6817,8 @@ static struct bpf_map *find_struct_ops_map_by_offset(struct bpf_object *obj,
}
/* Collect the reloc from ELF and populate the st_ops->progs[] */
-static int bpf_object__collect_struct_ops_map_reloc(struct bpf_object *obj,
- GElf_Shdr *shdr,
- Elf_Data *data)
+static int bpf_object__collect_st_ops_relos(struct bpf_object *obj,
+ GElf_Shdr *shdr, Elf_Data *data)
{
const struct btf_member *member;
struct bpf_struct_ops *st_ops;
@@ -6559,6 +6928,8 @@ invalid_prog:
}
#define BTF_TRACE_PREFIX "btf_trace_"
+#define BTF_LSM_PREFIX "bpf_lsm_"
+#define BTF_ITER_PREFIX "bpf_iter_"
#define BTF_MAX_NAME_SIZE 128
static int find_btf_by_prefix_kind(const struct btf *btf, const char *prefix,
@@ -6586,9 +6957,18 @@ static inline int __find_vmlinux_btf_id(struct btf *btf, const char *name,
if (attach_type == BPF_TRACE_RAW_TP)
err = find_btf_by_prefix_kind(btf, BTF_TRACE_PREFIX, name,
BTF_KIND_TYPEDEF);
+ else if (attach_type == BPF_LSM_MAC)
+ err = find_btf_by_prefix_kind(btf, BTF_LSM_PREFIX, name,
+ BTF_KIND_FUNC);
+ else if (attach_type == BPF_TRACE_ITER)
+ err = find_btf_by_prefix_kind(btf, BTF_ITER_PREFIX, name,
+ BTF_KIND_FUNC);
else
err = btf__find_by_name_kind(btf, name, BTF_KIND_FUNC);
+ if (err <= 0)
+ pr_warn("%s is not found in vmlinux BTF\n", name);
+
return err;
}
@@ -6596,6 +6976,7 @@ int libbpf_find_vmlinux_btf_id(const char *name,
enum bpf_attach_type attach_type)
{
struct btf *btf;
+ int err;
btf = libbpf_find_kernel_btf();
if (IS_ERR(btf)) {
@@ -6603,7 +6984,9 @@ int libbpf_find_vmlinux_btf_id(const char *name,
return -EINVAL;
}
- return __find_vmlinux_btf_id(btf, name, attach_type);
+ err = __find_vmlinux_btf_id(btf, name, attach_type);
+ btf__free(btf);
+ return err;
}
static int libbpf_find_prog_btf_id(const char *name, __u32 attach_prog_fd)
@@ -6661,8 +7044,6 @@ static int libbpf_find_attach_btf_id(struct bpf_program *prog)
err = __find_vmlinux_btf_id(prog->obj->btf_vmlinux,
name + section_defs[i].len,
attach_type);
- if (err <= 0)
- pr_warn("%s is not found in vmlinux BTF\n", name);
return err;
}
pr_warn("failed to identify btf_id based on ELF section name '%s'\n", name);
@@ -6683,7 +7064,7 @@ int libbpf_attach_type_by_name(const char *name,
continue;
if (!section_defs[i].is_attachable)
return -EINVAL;
- *attach_type = section_defs[i].attach_type;
+ *attach_type = section_defs[i].expected_attach_type;
return 0;
}
pr_debug("failed to guess attach type based on ELF section name '%s'\n", name);
@@ -6742,6 +7123,17 @@ void *bpf_map__priv(const struct bpf_map *map)
return map ? map->priv : ERR_PTR(-EINVAL);
}
+int bpf_map__set_initial_value(struct bpf_map *map,
+ const void *data, size_t size)
+{
+ if (!map->mmaped || map->libbpf_type == LIBBPF_MAP_KCONFIG ||
+ size != map->def.value_size || map->fd >= 0)
+ return -EINVAL;
+
+ memcpy(map->mmaped, data, size);
+ return 0;
+}
+
bool bpf_map__is_offload_neutral(const struct bpf_map *map)
{
return map->def.type == BPF_MAP_TYPE_PERF_EVENT_ARRAY;
@@ -6921,7 +7313,7 @@ int bpf_prog_load_xattr(const struct bpf_prog_load_attr *attr,
err = bpf_object__load(obj);
if (err) {
bpf_object__close(obj);
- return -EINVAL;
+ return err;
}
*pobj = obj;
@@ -6932,9 +7324,17 @@ int bpf_prog_load_xattr(const struct bpf_prog_load_attr *attr,
struct bpf_link {
int (*detach)(struct bpf_link *link);
int (*destroy)(struct bpf_link *link);
+ char *pin_path; /* NULL, if not pinned */
+ int fd; /* hook FD, -1 if not applicable */
bool disconnected;
};
+/* Replace link's underlying BPF program with the new one */
+int bpf_link__update_program(struct bpf_link *link, struct bpf_program *prog)
+{
+ return bpf_link_update(bpf_link__fd(link), bpf_program__fd(prog), NULL);
+}
+
/* Release "ownership" of underlying BPF resource (typically, BPF program
* attached to some BPF hook, e.g., tracepoint, kprobe, etc). Disconnected
* link, when destructed through bpf_link__destroy() call won't attempt to
@@ -6961,26 +7361,109 @@ int bpf_link__destroy(struct bpf_link *link)
err = link->detach(link);
if (link->destroy)
link->destroy(link);
+ if (link->pin_path)
+ free(link->pin_path);
free(link);
return err;
}
-struct bpf_link_fd {
- struct bpf_link link; /* has to be at the top of struct */
- int fd; /* hook FD */
-};
+int bpf_link__fd(const struct bpf_link *link)
+{
+ return link->fd;
+}
+
+const char *bpf_link__pin_path(const struct bpf_link *link)
+{
+ return link->pin_path;
+}
+
+static int bpf_link__detach_fd(struct bpf_link *link)
+{
+ return close(link->fd);
+}
+
+struct bpf_link *bpf_link__open(const char *path)
+{
+ struct bpf_link *link;
+ int fd;
+
+ fd = bpf_obj_get(path);
+ if (fd < 0) {
+ fd = -errno;
+ pr_warn("failed to open link at %s: %d\n", path, fd);
+ return ERR_PTR(fd);
+ }
+
+ link = calloc(1, sizeof(*link));
+ if (!link) {
+ close(fd);
+ return ERR_PTR(-ENOMEM);
+ }
+ link->detach = &bpf_link__detach_fd;
+ link->fd = fd;
+
+ link->pin_path = strdup(path);
+ if (!link->pin_path) {
+ bpf_link__destroy(link);
+ return ERR_PTR(-ENOMEM);
+ }
+
+ return link;
+}
+
+int bpf_link__pin(struct bpf_link *link, const char *path)
+{
+ int err;
+
+ if (link->pin_path)
+ return -EBUSY;
+ err = make_parent_dir(path);
+ if (err)
+ return err;
+ err = check_path(path);
+ if (err)
+ return err;
+
+ link->pin_path = strdup(path);
+ if (!link->pin_path)
+ return -ENOMEM;
+
+ if (bpf_obj_pin(link->fd, link->pin_path)) {
+ err = -errno;
+ zfree(&link->pin_path);
+ return err;
+ }
+
+ pr_debug("link fd=%d: pinned at %s\n", link->fd, link->pin_path);
+ return 0;
+}
+
+int bpf_link__unpin(struct bpf_link *link)
+{
+ int err;
+
+ if (!link->pin_path)
+ return -EINVAL;
+
+ err = unlink(link->pin_path);
+ if (err != 0)
+ return -errno;
+
+ pr_debug("link fd=%d: unpinned from %s\n", link->fd, link->pin_path);
+ zfree(&link->pin_path);
+ return 0;
+}
static int bpf_link__detach_perf_event(struct bpf_link *link)
{
- struct bpf_link_fd *l = (void *)link;
int err;
- err = ioctl(l->fd, PERF_EVENT_IOC_DISABLE, 0);
+ err = ioctl(link->fd, PERF_EVENT_IOC_DISABLE, 0);
if (err)
err = -errno;
- close(l->fd);
+ close(link->fd);
return err;
}
@@ -6988,7 +7471,7 @@ struct bpf_link *bpf_program__attach_perf_event(struct bpf_program *prog,
int pfd)
{
char errmsg[STRERR_BUFSIZE];
- struct bpf_link_fd *link;
+ struct bpf_link *link;
int prog_fd, err;
if (pfd < 0) {
@@ -7006,7 +7489,7 @@ struct bpf_link *bpf_program__attach_perf_event(struct bpf_program *prog,
link = calloc(1, sizeof(*link));
if (!link)
return ERR_PTR(-ENOMEM);
- link->link.detach = &bpf_link__detach_perf_event;
+ link->detach = &bpf_link__detach_perf_event;
link->fd = pfd;
if (ioctl(pfd, PERF_EVENT_IOC_SET_BPF, prog_fd) < 0) {
@@ -7025,7 +7508,7 @@ struct bpf_link *bpf_program__attach_perf_event(struct bpf_program *prog,
libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
return ERR_PTR(err);
}
- return (struct bpf_link *)link;
+ return link;
}
/*
@@ -7313,18 +7796,11 @@ out:
return link;
}
-static int bpf_link__detach_fd(struct bpf_link *link)
-{
- struct bpf_link_fd *l = (void *)link;
-
- return close(l->fd);
-}
-
struct bpf_link *bpf_program__attach_raw_tracepoint(struct bpf_program *prog,
const char *tp_name)
{
char errmsg[STRERR_BUFSIZE];
- struct bpf_link_fd *link;
+ struct bpf_link *link;
int prog_fd, pfd;
prog_fd = bpf_program__fd(prog);
@@ -7337,7 +7813,7 @@ struct bpf_link *bpf_program__attach_raw_tracepoint(struct bpf_program *prog,
link = calloc(1, sizeof(*link));
if (!link)
return ERR_PTR(-ENOMEM);
- link->link.detach = &bpf_link__detach_fd;
+ link->detach = &bpf_link__detach_fd;
pfd = bpf_raw_tracepoint_open(tp_name, prog_fd);
if (pfd < 0) {
@@ -7349,7 +7825,7 @@ struct bpf_link *bpf_program__attach_raw_tracepoint(struct bpf_program *prog,
return ERR_PTR(pfd);
}
link->fd = pfd;
- return (struct bpf_link *)link;
+ return link;
}
static struct bpf_link *attach_raw_tp(const struct bpf_sec_def *sec,
@@ -7360,10 +7836,11 @@ static struct bpf_link *attach_raw_tp(const struct bpf_sec_def *sec,
return bpf_program__attach_raw_tracepoint(prog, tp_name);
}
-struct bpf_link *bpf_program__attach_trace(struct bpf_program *prog)
+/* Common logic for all BPF program types that attach to a btf_id */
+static struct bpf_link *bpf_program__attach_btf_id(struct bpf_program *prog)
{
char errmsg[STRERR_BUFSIZE];
- struct bpf_link_fd *link;
+ struct bpf_link *link;
int prog_fd, pfd;
prog_fd = bpf_program__fd(prog);
@@ -7376,13 +7853,13 @@ struct bpf_link *bpf_program__attach_trace(struct bpf_program *prog)
link = calloc(1, sizeof(*link));
if (!link)
return ERR_PTR(-ENOMEM);
- link->link.detach = &bpf_link__detach_fd;
+ link->detach = &bpf_link__detach_fd;
pfd = bpf_raw_tracepoint_open(NULL, prog_fd);
if (pfd < 0) {
pfd = -errno;
free(link);
- pr_warn("program '%s': failed to attach to trace: %s\n",
+ pr_warn("program '%s': failed to attach: %s\n",
bpf_program__title(prog, false),
libbpf_strerror_r(pfd, errmsg, sizeof(errmsg)));
return ERR_PTR(pfd);
@@ -7391,12 +7868,117 @@ struct bpf_link *bpf_program__attach_trace(struct bpf_program *prog)
return (struct bpf_link *)link;
}
+struct bpf_link *bpf_program__attach_trace(struct bpf_program *prog)
+{
+ return bpf_program__attach_btf_id(prog);
+}
+
+struct bpf_link *bpf_program__attach_lsm(struct bpf_program *prog)
+{
+ return bpf_program__attach_btf_id(prog);
+}
+
static struct bpf_link *attach_trace(const struct bpf_sec_def *sec,
struct bpf_program *prog)
{
return bpf_program__attach_trace(prog);
}
+static struct bpf_link *attach_lsm(const struct bpf_sec_def *sec,
+ struct bpf_program *prog)
+{
+ return bpf_program__attach_lsm(prog);
+}
+
+static struct bpf_link *attach_iter(const struct bpf_sec_def *sec,
+ struct bpf_program *prog)
+{
+ return bpf_program__attach_iter(prog, NULL);
+}
+
+static struct bpf_link *
+bpf_program__attach_fd(struct bpf_program *prog, int target_fd,
+ const char *target_name)
+{
+ enum bpf_attach_type attach_type;
+ char errmsg[STRERR_BUFSIZE];
+ struct bpf_link *link;
+ int prog_fd, link_fd;
+
+ prog_fd = bpf_program__fd(prog);
+ if (prog_fd < 0) {
+ pr_warn("program '%s': can't attach before loaded\n",
+ bpf_program__title(prog, false));
+ return ERR_PTR(-EINVAL);
+ }
+
+ link = calloc(1, sizeof(*link));
+ if (!link)
+ return ERR_PTR(-ENOMEM);
+ link->detach = &bpf_link__detach_fd;
+
+ attach_type = bpf_program__get_expected_attach_type(prog);
+ link_fd = bpf_link_create(prog_fd, target_fd, attach_type, NULL);
+ if (link_fd < 0) {
+ link_fd = -errno;
+ free(link);
+ pr_warn("program '%s': failed to attach to %s: %s\n",
+ bpf_program__title(prog, false), target_name,
+ libbpf_strerror_r(link_fd, errmsg, sizeof(errmsg)));
+ return ERR_PTR(link_fd);
+ }
+ link->fd = link_fd;
+ return link;
+}
+
+struct bpf_link *
+bpf_program__attach_cgroup(struct bpf_program *prog, int cgroup_fd)
+{
+ return bpf_program__attach_fd(prog, cgroup_fd, "cgroup");
+}
+
+struct bpf_link *
+bpf_program__attach_netns(struct bpf_program *prog, int netns_fd)
+{
+ return bpf_program__attach_fd(prog, netns_fd, "netns");
+}
+
+struct bpf_link *
+bpf_program__attach_iter(struct bpf_program *prog,
+ const struct bpf_iter_attach_opts *opts)
+{
+ char errmsg[STRERR_BUFSIZE];
+ struct bpf_link *link;
+ int prog_fd, link_fd;
+
+ if (!OPTS_VALID(opts, bpf_iter_attach_opts))
+ return ERR_PTR(-EINVAL);
+
+ prog_fd = bpf_program__fd(prog);
+ if (prog_fd < 0) {
+ pr_warn("program '%s': can't attach before loaded\n",
+ bpf_program__title(prog, false));
+ return ERR_PTR(-EINVAL);
+ }
+
+ link = calloc(1, sizeof(*link));
+ if (!link)
+ return ERR_PTR(-ENOMEM);
+ link->detach = &bpf_link__detach_fd;
+
+ link_fd = bpf_link_create(prog_fd, 0, BPF_TRACE_ITER, NULL);
+ if (link_fd < 0) {
+ link_fd = -errno;
+ free(link);
+ pr_warn("program '%s': failed to attach to iterator: %s\n",
+ bpf_program__title(prog, false),
+ libbpf_strerror_r(link_fd, errmsg, sizeof(errmsg)));
+ return ERR_PTR(link_fd);
+ }
+ link->fd = link_fd;
+ return link;
+}
+
struct bpf_link *bpf_program__attach(struct bpf_program *prog)
{
const struct bpf_sec_def *sec_def;
@@ -7410,10 +7992,9 @@ struct bpf_link *bpf_program__attach(struct bpf_program *prog)
static int bpf_link__detach_struct_ops(struct bpf_link *link)
{
- struct bpf_link_fd *l = (void *)link;
__u32 zero = 0;
- if (bpf_map_delete_elem(l->fd, &zero))
+ if (bpf_map_delete_elem(link->fd, &zero))
return -errno;
return 0;
@@ -7422,7 +8003,7 @@ static int bpf_link__detach_struct_ops(struct bpf_link *link)
struct bpf_link *bpf_map__attach_struct_ops(struct bpf_map *map)
{
struct bpf_struct_ops *st_ops;
- struct bpf_link_fd *link;
+ struct bpf_link *link;
__u32 i, zero = 0;
int err;
@@ -7454,10 +8035,10 @@ struct bpf_link *bpf_map__attach_struct_ops(struct bpf_map *map)
return ERR_PTR(err);
}
- link->link.detach = bpf_link__detach_struct_ops;
+ link->detach = bpf_link__detach_struct_ops;
link->fd = map->fd;
- return (struct bpf_link *)link;
+ return link;
}
enum bpf_perf_event_ret
@@ -7571,9 +8152,12 @@ void perf_buffer__free(struct perf_buffer *pb)
if (!pb)
return;
if (pb->cpu_bufs) {
- for (i = 0; i < pb->cpu_cnt && pb->cpu_bufs[i]; i++) {
+ for (i = 0; i < pb->cpu_cnt; i++) {
struct perf_cpu_buf *cpu_buf = pb->cpu_bufs[i];
+ if (!cpu_buf)
+ continue;
+
bpf_map_delete_elem(pb->map_fd, &cpu_buf->map_key);
perf_buffer__free_cpu_buf(pb, cpu_buf);
}
@@ -7816,7 +8400,7 @@ error:
struct perf_sample_raw {
struct perf_event_header header;
uint32_t size;
- char data[0];
+ char data[];
};
struct perf_sample_lost {
@@ -7890,6 +8474,25 @@ int perf_buffer__poll(struct perf_buffer *pb, int timeout_ms)
return cnt < 0 ? -errno : cnt;
}
+int perf_buffer__consume(struct perf_buffer *pb)
+{
+ int i, err;
+
+ for (i = 0; i < pb->cpu_cnt; i++) {
+ struct perf_cpu_buf *cpu_buf = pb->cpu_bufs[i];
+
+ if (!cpu_buf)
+ continue;
+
+ err = perf_buffer__process_records(pb, cpu_buf);
+ if (err) {
+ pr_warn("error while processing records: %d\n", err);
+ return err;
+ }
+ }
+ return 0;
+}
+
struct bpf_prog_info_array_desc {
int array_offset; /* e.g. offset of jited_prog_insns */
int count_offset; /* e.g. offset of jited_prog_len */
@@ -8138,6 +8741,31 @@ void bpf_program__bpil_offs_to_addr(struct bpf_prog_info_linear *info_linear)
}
}
+int bpf_program__set_attach_target(struct bpf_program *prog,
+ int attach_prog_fd,
+ const char *attach_func_name)
+{
+ int btf_id;
+
+ if (!prog || attach_prog_fd < 0 || !attach_func_name)
+ return -EINVAL;
+
+ if (attach_prog_fd)
+ btf_id = libbpf_find_prog_btf_id(attach_func_name,
+ attach_prog_fd);
+ else
+ btf_id = __find_vmlinux_btf_id(prog->obj->btf_vmlinux,
+ attach_func_name,
+ prog->expected_attach_type);
+
+ if (btf_id < 0)
+ return btf_id;
+
+ prog->attach_btf_id = btf_id;
+ prog->attach_prog_fd = attach_prog_fd;
+ return 0;
+}
+
int parse_cpu_mask_str(const char *s, bool **mask, int *mask_sz)
{
int err = 0, n, len, start, end = -1;
diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h
index 3fe12c9d1f92..334437af3014 100644
--- a/tools/lib/bpf/libbpf.h
+++ b/tools/lib/bpf/libbpf.h
@@ -219,6 +219,13 @@ LIBBPF_API void bpf_program__unload(struct bpf_program *prog);
struct bpf_link;
+LIBBPF_API struct bpf_link *bpf_link__open(const char *path);
+LIBBPF_API int bpf_link__fd(const struct bpf_link *link);
+LIBBPF_API const char *bpf_link__pin_path(const struct bpf_link *link);
+LIBBPF_API int bpf_link__pin(struct bpf_link *link, const char *path);
+LIBBPF_API int bpf_link__unpin(struct bpf_link *link);
+LIBBPF_API int bpf_link__update_program(struct bpf_link *link,
+ struct bpf_program *prog);
LIBBPF_API void bpf_link__disconnect(struct bpf_link *link);
LIBBPF_API int bpf_link__destroy(struct bpf_link *link);
@@ -240,11 +247,28 @@ bpf_program__attach_tracepoint(struct bpf_program *prog,
LIBBPF_API struct bpf_link *
bpf_program__attach_raw_tracepoint(struct bpf_program *prog,
const char *tp_name);
-
LIBBPF_API struct bpf_link *
bpf_program__attach_trace(struct bpf_program *prog);
+LIBBPF_API struct bpf_link *
+bpf_program__attach_lsm(struct bpf_program *prog);
+LIBBPF_API struct bpf_link *
+bpf_program__attach_cgroup(struct bpf_program *prog, int cgroup_fd);
+LIBBPF_API struct bpf_link *
+bpf_program__attach_netns(struct bpf_program *prog, int netns_fd);
+
struct bpf_map;
+
LIBBPF_API struct bpf_link *bpf_map__attach_struct_ops(struct bpf_map *map);
+
+struct bpf_iter_attach_opts {
+ size_t sz; /* size of this struct for forward/backward compatibility */
+};
+#define bpf_iter_attach_opts__last_field sz
+
+LIBBPF_API struct bpf_link *
+bpf_program__attach_iter(struct bpf_program *prog,
+ const struct bpf_iter_attach_opts *opts);
+
struct bpf_insn;
/*
@@ -316,6 +340,7 @@ LIBBPF_API int bpf_program__set_socket_filter(struct bpf_program *prog);
LIBBPF_API int bpf_program__set_tracepoint(struct bpf_program *prog);
LIBBPF_API int bpf_program__set_raw_tracepoint(struct bpf_program *prog);
LIBBPF_API int bpf_program__set_kprobe(struct bpf_program *prog);
+LIBBPF_API int bpf_program__set_lsm(struct bpf_program *prog);
LIBBPF_API int bpf_program__set_sched_cls(struct bpf_program *prog);
LIBBPF_API int bpf_program__set_sched_act(struct bpf_program *prog);
LIBBPF_API int bpf_program__set_xdp(struct bpf_program *prog);
@@ -334,10 +359,15 @@ LIBBPF_API void
bpf_program__set_expected_attach_type(struct bpf_program *prog,
enum bpf_attach_type type);
+LIBBPF_API int
+bpf_program__set_attach_target(struct bpf_program *prog, int attach_prog_fd,
+ const char *attach_func_name);
+
LIBBPF_API bool bpf_program__is_socket_filter(const struct bpf_program *prog);
LIBBPF_API bool bpf_program__is_tracepoint(const struct bpf_program *prog);
LIBBPF_API bool bpf_program__is_raw_tracepoint(const struct bpf_program *prog);
LIBBPF_API bool bpf_program__is_kprobe(const struct bpf_program *prog);
+LIBBPF_API bool bpf_program__is_lsm(const struct bpf_program *prog);
LIBBPF_API bool bpf_program__is_sched_cls(const struct bpf_program *prog);
LIBBPF_API bool bpf_program__is_sched_act(const struct bpf_program *prog);
LIBBPF_API bool bpf_program__is_xdp(const struct bpf_program *prog);
@@ -398,6 +428,8 @@ typedef void (*bpf_map_clear_priv_t)(struct bpf_map *, void *);
LIBBPF_API int bpf_map__set_priv(struct bpf_map *map, void *priv,
bpf_map_clear_priv_t clear_priv);
LIBBPF_API void *bpf_map__priv(const struct bpf_map *map);
+LIBBPF_API int bpf_map__set_initial_value(struct bpf_map *map,
+ const void *data, size_t size);
LIBBPF_API int bpf_map__reuse_fd(struct bpf_map *map, int fd);
LIBBPF_API int bpf_map__resize(struct bpf_map *map, __u32 max_entries);
LIBBPF_API bool bpf_map__is_offload_neutral(const struct bpf_map *map);
@@ -435,11 +467,40 @@ struct xdp_link_info {
__u8 attach_mode;
};
+struct bpf_xdp_set_link_opts {
+ size_t sz;
+ int old_fd;
+};
+#define bpf_xdp_set_link_opts__last_field old_fd
+
LIBBPF_API int bpf_set_link_xdp_fd(int ifindex, int fd, __u32 flags);
+LIBBPF_API int bpf_set_link_xdp_fd_opts(int ifindex, int fd, __u32 flags,
+ const struct bpf_xdp_set_link_opts *opts);
LIBBPF_API int bpf_get_link_xdp_id(int ifindex, __u32 *prog_id, __u32 flags);
LIBBPF_API int bpf_get_link_xdp_info(int ifindex, struct xdp_link_info *info,
size_t info_size, __u32 flags);
+/* Ring buffer APIs */
+struct ring_buffer;
+
+typedef int (*ring_buffer_sample_fn)(void *ctx, void *data, size_t size);
+
+struct ring_buffer_opts {
+ size_t sz; /* size of this struct, for forward/backward compatiblity */
+};
+
+#define ring_buffer_opts__last_field sz
+
+LIBBPF_API struct ring_buffer *
+ring_buffer__new(int map_fd, ring_buffer_sample_fn sample_cb, void *ctx,
+ const struct ring_buffer_opts *opts);
+LIBBPF_API void ring_buffer__free(struct ring_buffer *rb);
+LIBBPF_API int ring_buffer__add(struct ring_buffer *rb, int map_fd,
+ ring_buffer_sample_fn sample_cb, void *ctx);
+LIBBPF_API int ring_buffer__poll(struct ring_buffer *rb, int timeout_ms);
+LIBBPF_API int ring_buffer__consume(struct ring_buffer *rb);
+
+/* Perf buffer APIs */
struct perf_buffer;
typedef void (*perf_buffer_sample_fn)(void *ctx, int cpu,
@@ -495,6 +556,7 @@ perf_buffer__new_raw(int map_fd, size_t page_cnt,
LIBBPF_API void perf_buffer__free(struct perf_buffer *pb);
LIBBPF_API int perf_buffer__poll(struct perf_buffer *pb, int timeout_ms);
+LIBBPF_API int perf_buffer__consume(struct perf_buffer *pb);
typedef enum bpf_perf_event_ret
(*bpf_perf_event_print_t)(struct perf_event_header *hdr,
diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map
index b035122142bb..f732c77b7ed0 100644
--- a/tools/lib/bpf/libbpf.map
+++ b/tools/lib/bpf/libbpf.map
@@ -235,3 +235,38 @@ LIBBPF_0.0.7 {
btf__align_of;
libbpf_find_kernel_btf;
} LIBBPF_0.0.6;
+
+LIBBPF_0.0.8 {
+ global:
+ bpf_link__fd;
+ bpf_link__open;
+ bpf_link__pin;
+ bpf_link__pin_path;
+ bpf_link__unpin;
+ bpf_link__update_program;
+ bpf_link_create;
+ bpf_link_update;
+ bpf_map__set_initial_value;
+ bpf_program__attach_cgroup;
+ bpf_program__attach_lsm;
+ bpf_program__is_lsm;
+ bpf_program__set_attach_target;
+ bpf_program__set_lsm;
+ bpf_set_link_xdp_fd_opts;
+} LIBBPF_0.0.7;
+
+LIBBPF_0.0.9 {
+ global:
+ bpf_enable_stats;
+ bpf_iter_create;
+ bpf_link_get_fd_by_id;
+ bpf_link_get_next_id;
+ bpf_program__attach_iter;
+ bpf_program__attach_netns;
+ perf_buffer__consume;
+ ring_buffer__add;
+ ring_buffer__consume;
+ ring_buffer__free;
+ ring_buffer__new;
+ ring_buffer__poll;
+} LIBBPF_0.0.8;
diff --git a/tools/lib/bpf/libbpf_internal.h b/tools/lib/bpf/libbpf_internal.h
index 8c3afbd97747..50d70e90d5f1 100644
--- a/tools/lib/bpf/libbpf_internal.h
+++ b/tools/lib/bpf/libbpf_internal.h
@@ -153,7 +153,7 @@ struct btf_ext_info_sec {
__u32 sec_name_off;
__u32 num_info;
/* Followed by num_info * record_size number of bytes */
- __u8 data[0];
+ __u8 data[];
};
/* The minimum bpf_func_info checked by the loader */
diff --git a/tools/lib/bpf/libbpf_probes.c b/tools/lib/bpf/libbpf_probes.c
index b782ebef6ac9..10cd8d1891f5 100644
--- a/tools/lib/bpf/libbpf_probes.c
+++ b/tools/lib/bpf/libbpf_probes.c
@@ -108,6 +108,7 @@ probe_load(enum bpf_prog_type prog_type, const struct bpf_insn *insns,
case BPF_PROG_TYPE_TRACING:
case BPF_PROG_TYPE_STRUCT_OPS:
case BPF_PROG_TYPE_EXT:
+ case BPF_PROG_TYPE_LSM:
default:
break;
}
@@ -237,6 +238,11 @@ bool bpf_probe_map_type(enum bpf_map_type map_type, __u32 ifindex)
if (btf_fd < 0)
return false;
break;
+ case BPF_MAP_TYPE_RINGBUF:
+ key_size = 0;
+ value_size = 0;
+ max_entries = 4096;
+ break;
case BPF_MAP_TYPE_UNSPEC:
case BPF_MAP_TYPE_HASH:
case BPF_MAP_TYPE_ARRAY:
diff --git a/tools/lib/bpf/netlink.c b/tools/lib/bpf/netlink.c
index 431bd25c6cdb..312f887570b2 100644
--- a/tools/lib/bpf/netlink.c
+++ b/tools/lib/bpf/netlink.c
@@ -132,7 +132,8 @@ done:
return ret;
}
-int bpf_set_link_xdp_fd(int ifindex, int fd, __u32 flags)
+static int __bpf_set_link_xdp_fd_replace(int ifindex, int fd, int old_fd,
+ __u32 flags)
{
int sock, seq = 0, ret;
struct nlattr *nla, *nla_xdp;
@@ -141,7 +142,7 @@ int bpf_set_link_xdp_fd(int ifindex, int fd, __u32 flags)
struct ifinfomsg ifinfo;
char attrbuf[64];
} req;
- __u32 nl_pid;
+ __u32 nl_pid = 0;
sock = libbpf_netlink_open(&nl_pid);
if (sock < 0)
@@ -178,6 +179,14 @@ int bpf_set_link_xdp_fd(int ifindex, int fd, __u32 flags)
nla->nla_len += nla_xdp->nla_len;
}
+ if (flags & XDP_FLAGS_REPLACE) {
+ nla_xdp = (struct nlattr *)((char *)nla + nla->nla_len);
+ nla_xdp->nla_type = IFLA_XDP_EXPECTED_FD;
+ nla_xdp->nla_len = NLA_HDRLEN + sizeof(old_fd);
+ memcpy((char *)nla_xdp + NLA_HDRLEN, &old_fd, sizeof(old_fd));
+ nla->nla_len += nla_xdp->nla_len;
+ }
+
req.nh.nlmsg_len += NLA_ALIGN(nla->nla_len);
if (send(sock, &req, req.nh.nlmsg_len, 0) < 0) {
@@ -191,6 +200,29 @@ cleanup:
return ret;
}
+int bpf_set_link_xdp_fd_opts(int ifindex, int fd, __u32 flags,
+ const struct bpf_xdp_set_link_opts *opts)
+{
+ int old_fd = -1;
+
+ if (!OPTS_VALID(opts, bpf_xdp_set_link_opts))
+ return -EINVAL;
+
+ if (OPTS_HAS(opts, old_fd)) {
+ old_fd = OPTS_GET(opts, old_fd, -1);
+ flags |= XDP_FLAGS_REPLACE;
+ }
+
+ return __bpf_set_link_xdp_fd_replace(ifindex, fd,
+ old_fd,
+ flags);
+}
+
+int bpf_set_link_xdp_fd(int ifindex, int fd, __u32 flags)
+{
+ return __bpf_set_link_xdp_fd_replace(ifindex, fd, 0, flags);
+}
+
static int __dump_link_nlmsg(struct nlmsghdr *nlh,
libbpf_dump_nlmsg_t dump_link_nlmsg, void *cookie)
{
@@ -256,7 +288,7 @@ int bpf_get_link_xdp_info(int ifindex, struct xdp_link_info *info,
{
struct xdp_id_md xdp_id = {};
int sock, ret;
- __u32 nl_pid;
+ __u32 nl_pid = 0;
__u32 mask;
if (flags & ~XDP_FLAGS_MASK || !info_size)
@@ -289,7 +321,9 @@ int bpf_get_link_xdp_info(int ifindex, struct xdp_link_info *info,
static __u32 get_xdp_id(struct xdp_link_info *info, __u32 flags)
{
- if (info->attach_mode != XDP_ATTACHED_MULTI)
+ flags &= XDP_FLAGS_MODES;
+
+ if (info->attach_mode != XDP_ATTACHED_MULTI && !flags)
return info->prog_id;
if (flags & XDP_FLAGS_DRV_MODE)
return info->drv_prog_id;
diff --git a/tools/lib/bpf/ringbuf.c b/tools/lib/bpf/ringbuf.c
new file mode 100644
index 000000000000..4fc6c6cbb4eb
--- /dev/null
+++ b/tools/lib/bpf/ringbuf.c
@@ -0,0 +1,288 @@
+// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
+/*
+ * Ring buffer operations.
+ *
+ * Copyright (C) 2020 Facebook, Inc.
+ */
+#ifndef _GNU_SOURCE
+#define _GNU_SOURCE
+#endif
+#include <stdlib.h>
+#include <stdio.h>
+#include <errno.h>
+#include <unistd.h>
+#include <linux/err.h>
+#include <linux/bpf.h>
+#include <asm/barrier.h>
+#include <sys/mman.h>
+#include <sys/epoll.h>
+#include <tools/libc_compat.h>
+
+#include "libbpf.h"
+#include "libbpf_internal.h"
+#include "bpf.h"
+
+/* make sure libbpf doesn't use kernel-only integer typedefs */
+#pragma GCC poison u8 u16 u32 u64 s8 s16 s32 s64
+
+struct ring {
+ ring_buffer_sample_fn sample_cb;
+ void *ctx;
+ void *data;
+ unsigned long *consumer_pos;
+ unsigned long *producer_pos;
+ unsigned long mask;
+ int map_fd;
+};
+
+struct ring_buffer {
+ struct epoll_event *events;
+ struct ring *rings;
+ size_t page_size;
+ int epoll_fd;
+ int ring_cnt;
+};
+
+static void ringbuf_unmap_ring(struct ring_buffer *rb, struct ring *r)
+{
+ if (r->consumer_pos) {
+ munmap(r->consumer_pos, rb->page_size);
+ r->consumer_pos = NULL;
+ }
+ if (r->producer_pos) {
+ munmap(r->producer_pos, rb->page_size + 2 * (r->mask + 1));
+ r->producer_pos = NULL;
+ }
+}
+
+/* Add extra RINGBUF maps to this ring buffer manager */
+int ring_buffer__add(struct ring_buffer *rb, int map_fd,
+ ring_buffer_sample_fn sample_cb, void *ctx)
+{
+ struct bpf_map_info info;
+ __u32 len = sizeof(info);
+ struct epoll_event *e;
+ struct ring *r;
+ void *tmp;
+ int err;
+
+ memset(&info, 0, sizeof(info));
+
+ err = bpf_obj_get_info_by_fd(map_fd, &info, &len);
+ if (err) {
+ err = -errno;
+ pr_warn("ringbuf: failed to get map info for fd=%d: %d\n",
+ map_fd, err);
+ return err;
+ }
+
+ if (info.type != BPF_MAP_TYPE_RINGBUF) {
+ pr_warn("ringbuf: map fd=%d is not BPF_MAP_TYPE_RINGBUF\n",
+ map_fd);
+ return -EINVAL;
+ }
+
+ tmp = reallocarray(rb->rings, rb->ring_cnt + 1, sizeof(*rb->rings));
+ if (!tmp)
+ return -ENOMEM;
+ rb->rings = tmp;
+
+ tmp = reallocarray(rb->events, rb->ring_cnt + 1, sizeof(*rb->events));
+ if (!tmp)
+ return -ENOMEM;
+ rb->events = tmp;
+
+ r = &rb->rings[rb->ring_cnt];
+ memset(r, 0, sizeof(*r));
+
+ r->map_fd = map_fd;
+ r->sample_cb = sample_cb;
+ r->ctx = ctx;
+ r->mask = info.max_entries - 1;
+
+ /* Map writable consumer page */
+ tmp = mmap(NULL, rb->page_size, PROT_READ | PROT_WRITE, MAP_SHARED,
+ map_fd, 0);
+ if (tmp == MAP_FAILED) {
+ err = -errno;
+ pr_warn("ringbuf: failed to mmap consumer page for map fd=%d: %d\n",
+ map_fd, err);
+ return err;
+ }
+ r->consumer_pos = tmp;
+
+ /* Map read-only producer page and data pages. We map twice as big
+ * data size to allow simple reading of samples that wrap around the
+ * end of a ring buffer. See kernel implementation for details.
+ * */
+ tmp = mmap(NULL, rb->page_size + 2 * info.max_entries, PROT_READ,
+ MAP_SHARED, map_fd, rb->page_size);
+ if (tmp == MAP_FAILED) {
+ err = -errno;
+ ringbuf_unmap_ring(rb, r);
+ pr_warn("ringbuf: failed to mmap data pages for map fd=%d: %d\n",
+ map_fd, err);
+ return err;
+ }
+ r->producer_pos = tmp;
+ r->data = tmp + rb->page_size;
+
+ e = &rb->events[rb->ring_cnt];
+ memset(e, 0, sizeof(*e));
+
+ e->events = EPOLLIN;
+ e->data.fd = rb->ring_cnt;
+ if (epoll_ctl(rb->epoll_fd, EPOLL_CTL_ADD, map_fd, e) < 0) {
+ err = -errno;
+ ringbuf_unmap_ring(rb, r);
+ pr_warn("ringbuf: failed to epoll add map fd=%d: %d\n",
+ map_fd, err);
+ return err;
+ }
+
+ rb->ring_cnt++;
+ return 0;
+}
+
+void ring_buffer__free(struct ring_buffer *rb)
+{
+ int i;
+
+ if (!rb)
+ return;
+
+ for (i = 0; i < rb->ring_cnt; ++i)
+ ringbuf_unmap_ring(rb, &rb->rings[i]);
+ if (rb->epoll_fd >= 0)
+ close(rb->epoll_fd);
+
+ free(rb->events);
+ free(rb->rings);
+ free(rb);
+}
+
+struct ring_buffer *
+ring_buffer__new(int map_fd, ring_buffer_sample_fn sample_cb, void *ctx,
+ const struct ring_buffer_opts *opts)
+{
+ struct ring_buffer *rb;
+ int err;
+
+ if (!OPTS_VALID(opts, ring_buffer_opts))
+ return NULL;
+
+ rb = calloc(1, sizeof(*rb));
+ if (!rb)
+ return NULL;
+
+ rb->page_size = getpagesize();
+
+ rb->epoll_fd = epoll_create1(EPOLL_CLOEXEC);
+ if (rb->epoll_fd < 0) {
+ err = -errno;
+ pr_warn("ringbuf: failed to create epoll instance: %d\n", err);
+ goto err_out;
+ }
+
+ err = ring_buffer__add(rb, map_fd, sample_cb, ctx);
+ if (err)
+ goto err_out;
+
+ return rb;
+
+err_out:
+ ring_buffer__free(rb);
+ return NULL;
+}
+
+static inline int roundup_len(__u32 len)
+{
+ /* clear out top 2 bits (discard and busy, if set) */
+ len <<= 2;
+ len >>= 2;
+ /* add length prefix */
+ len += BPF_RINGBUF_HDR_SZ;
+ /* round up to 8 byte alignment */
+ return (len + 7) / 8 * 8;
+}
+
+static int ringbuf_process_ring(struct ring* r)
+{
+ int *len_ptr, len, err, cnt = 0;
+ unsigned long cons_pos, prod_pos;
+ bool got_new_data;
+ void *sample;
+
+ cons_pos = smp_load_acquire(r->consumer_pos);
+ do {
+ got_new_data = false;
+ prod_pos = smp_load_acquire(r->producer_pos);
+ while (cons_pos < prod_pos) {
+ len_ptr = r->data + (cons_pos & r->mask);
+ len = smp_load_acquire(len_ptr);
+
+ /* sample not committed yet, bail out for now */
+ if (len & BPF_RINGBUF_BUSY_BIT)
+ goto done;
+
+ got_new_data = true;
+ cons_pos += roundup_len(len);
+
+ if ((len & BPF_RINGBUF_DISCARD_BIT) == 0) {
+ sample = (void *)len_ptr + BPF_RINGBUF_HDR_SZ;
+ err = r->sample_cb(r->ctx, sample, len);
+ if (err) {
+ /* update consumer pos and bail out */
+ smp_store_release(r->consumer_pos,
+ cons_pos);
+ return err;
+ }
+ cnt++;
+ }
+
+ smp_store_release(r->consumer_pos, cons_pos);
+ }
+ } while (got_new_data);
+done:
+ return cnt;
+}
+
+/* Consume available ring buffer(s) data without event polling.
+ * Returns number of records consumed across all registered ring buffers, or
+ * negative number if any of the callbacks return error.
+ */
+int ring_buffer__consume(struct ring_buffer *rb)
+{
+ int i, err, res = 0;
+
+ for (i = 0; i < rb->ring_cnt; i++) {
+ struct ring *ring = &rb->rings[i];
+
+ err = ringbuf_process_ring(ring);
+ if (err < 0)
+ return err;
+ res += err;
+ }
+ return res;
+}
+
+/* Poll for available data and consume records, if any are available.
+ * Returns number of records consumed, or negative number, if any of the
+ * registered callbacks returned error.
+ */
+int ring_buffer__poll(struct ring_buffer *rb, int timeout_ms)
+{
+ int i, cnt, err, res = 0;
+
+ cnt = epoll_wait(rb->epoll_fd, rb->events, rb->ring_cnt, timeout_ms);
+ for (i = 0; i < cnt; i++) {
+ __u32 ring_id = rb->events[i].data.fd;
+ struct ring *ring = &rb->rings[ring_id];
+
+ err = ringbuf_process_ring(ring);
+ if (err < 0)
+ return err;
+ res += cnt;
+ }
+ return cnt < 0 ? -errno : res;
+}
diff --git a/tools/lib/bpf/xsk.c b/tools/lib/bpf/xsk.c
index 9807903f121e..f7f4efb70a4c 100644
--- a/tools/lib/bpf/xsk.c
+++ b/tools/lib/bpf/xsk.c
@@ -280,7 +280,11 @@ int xsk_umem__create_v0_0_4(struct xsk_umem **umem_ptr, void *umem_area,
fill->consumer = map + off.fr.consumer;
fill->flags = map + off.fr.flags;
fill->ring = map + off.fr.desc;
- fill->cached_cons = umem->config.fill_size;
+ fill->cached_prod = *fill->producer;
+ /* cached_cons is "size" bigger than the real consumer pointer
+ * See xsk_prod_nb_free
+ */
+ fill->cached_cons = *fill->consumer + umem->config.fill_size;
map = mmap(NULL, off.cr.desc + umem->config.comp_size * sizeof(__u64),
PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE, umem->fd,
@@ -297,6 +301,8 @@ int xsk_umem__create_v0_0_4(struct xsk_umem **umem_ptr, void *umem_area,
comp->consumer = map + off.cr.consumer;
comp->flags = map + off.cr.flags;
comp->ring = map + off.cr.desc;
+ comp->cached_prod = *comp->producer;
+ comp->cached_cons = *comp->consumer;
*umem_ptr = umem;
return 0;
@@ -672,6 +678,8 @@ int xsk_socket__create(struct xsk_socket **xsk_ptr, const char *ifname,
rx->consumer = rx_map + off.rx.consumer;
rx->flags = rx_map + off.rx.flags;
rx->ring = rx_map + off.rx.desc;
+ rx->cached_prod = *rx->producer;
+ rx->cached_cons = *rx->consumer;
}
xsk->rx = rx;
@@ -691,7 +699,11 @@ int xsk_socket__create(struct xsk_socket **xsk_ptr, const char *ifname,
tx->consumer = tx_map + off.tx.consumer;
tx->flags = tx_map + off.tx.flags;
tx->ring = tx_map + off.tx.desc;
- tx->cached_cons = xsk->config.tx_size;
+ tx->cached_prod = *tx->producer;
+ /* cached_cons is r->size bigger than the real consumer pointer
+ * See xsk_prod_nb_free
+ */
+ tx->cached_cons = *tx->consumer + xsk->config.tx_size;
}
xsk->tx = tx;
diff --git a/tools/lib/lockdep/.gitignore b/tools/lib/lockdep/.gitignore
index cc0e7a9f99e3..6c308ac4388c 100644
--- a/tools/lib/lockdep/.gitignore
+++ b/tools/lib/lockdep/.gitignore
@@ -1 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0-only
liblockdep.so.*
diff --git a/tools/lib/perf/Documentation/examples/counting.c b/tools/lib/perf/Documentation/examples/counting.c
new file mode 100644
index 000000000000..6085693571ef
--- /dev/null
+++ b/tools/lib/perf/Documentation/examples/counting.c
@@ -0,0 +1,83 @@
+#include <linux/perf_event.h>
+#include <perf/evlist.h>
+#include <perf/evsel.h>
+#include <perf/cpumap.h>
+#include <perf/threadmap.h>
+#include <perf/mmap.h>
+#include <perf/core.h>
+#include <perf/event.h>
+#include <stdio.h>
+#include <unistd.h>
+
+static int libperf_print(enum libperf_print_level level,
+ const char *fmt, va_list ap)
+{
+ return vfprintf(stderr, fmt, ap);
+}
+
+int main(int argc, char **argv)
+{
+ int count = 100000, err = 0;
+ struct perf_evlist *evlist;
+ struct perf_evsel *evsel;
+ struct perf_thread_map *threads;
+ struct perf_counts_values counts;
+
+ struct perf_event_attr attr1 = {
+ .type = PERF_TYPE_SOFTWARE,
+ .config = PERF_COUNT_SW_CPU_CLOCK,
+ .read_format = PERF_FORMAT_TOTAL_TIME_ENABLED|PERF_FORMAT_TOTAL_TIME_RUNNING,
+ .disabled = 1,
+ };
+ struct perf_event_attr attr2 = {
+ .type = PERF_TYPE_SOFTWARE,
+ .config = PERF_COUNT_SW_TASK_CLOCK,
+ .read_format = PERF_FORMAT_TOTAL_TIME_ENABLED|PERF_FORMAT_TOTAL_TIME_RUNNING,
+ .disabled = 1,
+ };
+
+ libperf_init(libperf_print);
+ threads = perf_thread_map__new_dummy();
+ if (!threads) {
+ fprintf(stderr, "failed to create threads\n");
+ return -1;
+ }
+ perf_thread_map__set_pid(threads, 0, 0);
+ evlist = perf_evlist__new();
+ if (!evlist) {
+ fprintf(stderr, "failed to create evlist\n");
+ goto out_threads;
+ }
+ evsel = perf_evsel__new(&attr1);
+ if (!evsel) {
+ fprintf(stderr, "failed to create evsel1\n");
+ goto out_evlist;
+ }
+ perf_evlist__add(evlist, evsel);
+ evsel = perf_evsel__new(&attr2);
+ if (!evsel) {
+ fprintf(stderr, "failed to create evsel2\n");
+ goto out_evlist;
+ }
+ perf_evlist__add(evlist, evsel);
+ perf_evlist__set_maps(evlist, NULL, threads);
+ err = perf_evlist__open(evlist);
+ if (err) {
+ fprintf(stderr, "failed to open evsel\n");
+ goto out_evlist;
+ }
+ perf_evlist__enable(evlist);
+ while (count--);
+ perf_evlist__disable(evlist);
+ perf_evlist__for_each_evsel(evlist, evsel) {
+ perf_evsel__read(evsel, 0, 0, &counts);
+ fprintf(stdout, "count %llu, enabled %llu, run %llu\n",
+ counts.val, counts.ena, counts.run);
+ }
+ perf_evlist__close(evlist);
+out_evlist:
+ perf_evlist__delete(evlist);
+out_threads:
+ perf_thread_map__put(threads);
+ return err;
+}
diff --git a/tools/lib/perf/cpumap.c b/tools/lib/perf/cpumap.c
index f93f4e703e4c..ca0215047c32 100644
--- a/tools/lib/perf/cpumap.c
+++ b/tools/lib/perf/cpumap.c
@@ -247,7 +247,7 @@ out:
int perf_cpu_map__cpu(const struct perf_cpu_map *cpus, int idx)
{
- if (idx < cpus->nr)
+ if (cpus && idx < cpus->nr)
return cpus->map[idx];
return -1;
diff --git a/tools/lib/perf/evlist.c b/tools/lib/perf/evlist.c
index 5b9f2ca50591..6a875a0f01bb 100644
--- a/tools/lib/perf/evlist.c
+++ b/tools/lib/perf/evlist.c
@@ -11,10 +11,8 @@
#include <internal/mmap.h>
#include <internal/cpumap.h>
#include <internal/threadmap.h>
-#include <internal/xyarray.h>
#include <internal/lib.h>
#include <linux/zalloc.h>
-#include <sys/ioctl.h>
#include <stdlib.h>
#include <errno.h>
#include <unistd.h>
@@ -125,8 +123,10 @@ static void perf_evlist__purge(struct perf_evlist *evlist)
void perf_evlist__exit(struct perf_evlist *evlist)
{
perf_cpu_map__put(evlist->cpus);
+ perf_cpu_map__put(evlist->all_cpus);
perf_thread_map__put(evlist->threads);
evlist->cpus = NULL;
+ evlist->all_cpus = NULL;
evlist->threads = NULL;
fdarray__exit(&evlist->pollfd);
}
diff --git a/tools/lib/perf/include/perf/event.h b/tools/lib/perf/include/perf/event.h
index 18106899cb4e..69b44d2cc0f5 100644
--- a/tools/lib/perf/include/perf/event.h
+++ b/tools/lib/perf/include/perf/event.h
@@ -105,6 +105,12 @@ struct perf_record_bpf_event {
__u8 tag[BPF_TAG_SIZE]; // prog tag
};
+struct perf_record_cgroup {
+ struct perf_event_header header;
+ __u64 id;
+ char path[PATH_MAX];
+};
+
struct perf_record_sample {
struct perf_event_header header;
__u64 array[];
@@ -352,6 +358,7 @@ union perf_event {
struct perf_record_mmap2 mmap2;
struct perf_record_comm comm;
struct perf_record_namespaces namespaces;
+ struct perf_record_cgroup cgroup;
struct perf_record_fork fork;
struct perf_record_lost lost;
struct perf_record_lost_samples lost_samples;
diff --git a/tools/lib/rbtree.c b/tools/lib/rbtree.c
index 2548ff8c4d9c..06ac7bd2144b 100644
--- a/tools/lib/rbtree.c
+++ b/tools/lib/rbtree.c
@@ -497,7 +497,7 @@ struct rb_node *rb_next(const struct rb_node *node)
if (node->rb_right) {
node = node->rb_right;
while (node->rb_left)
- node=node->rb_left;
+ node = node->rb_left;
return (struct rb_node *)node;
}
@@ -528,7 +528,7 @@ struct rb_node *rb_prev(const struct rb_node *node)
if (node->rb_left) {
node = node->rb_left;
while (node->rb_right)
- node=node->rb_right;
+ node = node->rb_right;
return (struct rb_node *)node;
}
diff --git a/tools/lib/subcmd/parse-options.h b/tools/lib/subcmd/parse-options.h
index af9def589863..d2414144eb8c 100644
--- a/tools/lib/subcmd/parse-options.h
+++ b/tools/lib/subcmd/parse-options.h
@@ -151,6 +151,8 @@ struct option {
{ .type = OPTION_CALLBACK, .short_name = (s), .long_name = (l), .value = (v), .argh = "time", .help = (h), .callback = parse_opt_approxidate_cb }
#define OPT_CALLBACK(s, l, v, a, h, f) \
{ .type = OPTION_CALLBACK, .short_name = (s), .long_name = (l), .value = (v), .argh = (a), .help = (h), .callback = (f) }
+#define OPT_CALLBACK_SET(s, l, v, os, a, h, f) \
+ { .type = OPTION_CALLBACK, .short_name = (s), .long_name = (l), .value = (v), .argh = (a), .help = (h), .callback = (f), .set = check_vtype(os, bool *)}
#define OPT_CALLBACK_NOOPT(s, l, v, a, h, f) \
{ .type = OPTION_CALLBACK, .short_name = (s), .long_name = (l), .value = (v), .argh = (a), .help = (h), .callback = (f), .flags = PARSE_OPT_NOARG }
#define OPT_CALLBACK_DEFAULT(s, l, v, a, h, f, d) \
diff --git a/tools/lib/symbol/kallsyms.c b/tools/lib/symbol/kallsyms.c
index 1a7a9f877095..e335ac2b9e19 100644
--- a/tools/lib/symbol/kallsyms.c
+++ b/tools/lib/symbol/kallsyms.c
@@ -1,7 +1,9 @@
// SPDX-License-Identifier: GPL-2.0
#include "symbol/kallsyms.h"
+#include "api/io.h"
#include <stdio.h>
-#include <stdlib.h>
+#include <sys/stat.h>
+#include <fcntl.h>
u8 kallsyms2elf_type(char type)
{
@@ -15,74 +17,62 @@ bool kallsyms__is_function(char symbol_type)
return symbol_type == 'T' || symbol_type == 'W';
}
-/*
- * While we find nice hex chars, build a long_val.
- * Return number of chars processed.
- */
-int hex2u64(const char *ptr, u64 *long_val)
+static void read_to_eol(struct io *io)
{
- char *p;
+ int ch;
- *long_val = strtoull(ptr, &p, 16);
-
- return p - ptr;
+ for (;;) {
+ ch = io__get_char(io);
+ if (ch < 0 || ch == '\n')
+ return;
+ }
}
int kallsyms__parse(const char *filename, void *arg,
int (*process_symbol)(void *arg, const char *name,
char type, u64 start))
{
- char *line = NULL;
- size_t n;
- int err = -1;
- FILE *file = fopen(filename, "r");
-
- if (file == NULL)
- goto out_failure;
-
- err = 0;
+ struct io io;
+ char bf[BUFSIZ];
+ int err;
- while (!feof(file)) {
- u64 start;
- int line_len, len;
- char symbol_type;
- char *symbol_name;
+ io.fd = open(filename, O_RDONLY, 0);
- line_len = getline(&line, &n, file);
- if (line_len < 0 || !line)
- break;
+ if (io.fd < 0)
+ return -1;
- line[--line_len] = '\0'; /* \n */
+ io__init(&io, io.fd, bf, sizeof(bf));
- len = hex2u64(line, &start);
+ err = 0;
+ while (!io.eof) {
+ __u64 start;
+ int ch;
+ size_t i;
+ char symbol_type;
+ char symbol_name[KSYM_NAME_LEN + 1];
- /* Skip the line if we failed to parse the address. */
- if (!len)
+ if (io__get_hex(&io, &start) != ' ') {
+ read_to_eol(&io);
continue;
-
- len++;
- if (len + 2 >= line_len)
+ }
+ symbol_type = io__get_char(&io);
+ if (io__get_char(&io) != ' ') {
+ read_to_eol(&io);
continue;
-
- symbol_type = line[len];
- len += 2;
- symbol_name = line + len;
- len = line_len - len;
-
- if (len >= KSYM_NAME_LEN) {
- err = -1;
- break;
}
+ for (i = 0; i < sizeof(symbol_name); i++) {
+ ch = io__get_char(&io);
+ if (ch < 0 || ch == '\n')
+ break;
+ symbol_name[i] = ch;
+ }
+ symbol_name[i] = '\0';
err = process_symbol(arg, symbol_name, symbol_type, start);
if (err)
break;
}
- free(line);
- fclose(file);
+ close(io.fd);
return err;
-
-out_failure:
- return -1;
}
diff --git a/tools/lib/symbol/kallsyms.h b/tools/lib/symbol/kallsyms.h
index bd988f7b18d4..72ab9870454b 100644
--- a/tools/lib/symbol/kallsyms.h
+++ b/tools/lib/symbol/kallsyms.h
@@ -18,8 +18,6 @@ static inline u8 kallsyms2elf_binding(char type)
return isupper(type) ? STB_GLOBAL : STB_LOCAL;
}
-int hex2u64(const char *ptr, u64 *long_val);
-
u8 kallsyms2elf_type(char type);
bool kallsyms__is_function(char symbol_type);
diff --git a/tools/lib/traceevent/.gitignore b/tools/lib/traceevent/.gitignore
index 9e9f25fb1922..7123c70b9ebc 100644
--- a/tools/lib/traceevent/.gitignore
+++ b/tools/lib/traceevent/.gitignore
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
TRACEEVENT-CFLAGS
libtraceevent-dynamic-list
libtraceevent.so.*
diff --git a/tools/lib/traceevent/event-parse.c b/tools/lib/traceevent/event-parse.c
index beaa8b8c08ff..e1bd2a93c6db 100644
--- a/tools/lib/traceevent/event-parse.c
+++ b/tools/lib/traceevent/event-parse.c
@@ -5541,7 +5541,7 @@ static void print_event_time(struct tep_handle *tep, struct trace_seq *s,
if (p10 > 1 && p10 < time)
trace_seq_printf(s, "%5llu.%0*llu", time / p10, prec, time % p10);
else
- trace_seq_printf(s, "%12llu\n", time);
+ trace_seq_printf(s, "%12llu", time);
}
struct print_event_type {
diff --git a/tools/lib/traceevent/kbuffer-parse.c b/tools/lib/traceevent/kbuffer-parse.c
index b887e7437d67..27f3b07fdae8 100644
--- a/tools/lib/traceevent/kbuffer-parse.c
+++ b/tools/lib/traceevent/kbuffer-parse.c
@@ -438,7 +438,7 @@ void *kbuffer_translate_data(int swap, void *data, unsigned int *size)
case KBUFFER_TYPE_TIME_EXTEND:
case KBUFFER_TYPE_TIME_STAMP:
return NULL;
- };
+ }
*size = length;
diff --git a/tools/lib/traceevent/parse-filter.c b/tools/lib/traceevent/parse-filter.c
index 20eed719542e..c271aeeb227d 100644
--- a/tools/lib/traceevent/parse-filter.c
+++ b/tools/lib/traceevent/parse-filter.c
@@ -1958,7 +1958,8 @@ static char *op_to_str(struct tep_event_filter *filter, struct tep_filter_arg *a
default:
break;
}
- asprintf(&str, val ? "TRUE" : "FALSE");
+ if (asprintf(&str, val ? "TRUE" : "FALSE") < 0)
+ str = NULL;
break;
}
}
@@ -1976,7 +1977,8 @@ static char *op_to_str(struct tep_event_filter *filter, struct tep_filter_arg *a
break;
}
- asprintf(&str, "(%s) %s (%s)", left, op, right);
+ if (asprintf(&str, "(%s) %s (%s)", left, op, right) < 0)
+ str = NULL;
break;
case TEP_FILTER_OP_NOT:
@@ -1992,10 +1994,12 @@ static char *op_to_str(struct tep_event_filter *filter, struct tep_filter_arg *a
right_val = 0;
if (right_val >= 0) {
/* just return the opposite */
- asprintf(&str, right_val ? "FALSE" : "TRUE");
+ if (asprintf(&str, right_val ? "FALSE" : "TRUE") < 0)
+ str = NULL;
break;
}
- asprintf(&str, "%s(%s)", op, right);
+ if (asprintf(&str, "%s(%s)", op, right) < 0)
+ str = NULL;
break;
default:
@@ -2011,7 +2015,8 @@ static char *val_to_str(struct tep_event_filter *filter, struct tep_filter_arg *
{
char *str = NULL;
- asprintf(&str, "%lld", arg->value.val);
+ if (asprintf(&str, "%lld", arg->value.val) < 0)
+ str = NULL;
return str;
}
@@ -2069,7 +2074,8 @@ static char *exp_to_str(struct tep_event_filter *filter, struct tep_filter_arg *
break;
}
- asprintf(&str, "%s %s %s", lstr, op, rstr);
+ if (asprintf(&str, "%s %s %s", lstr, op, rstr) < 0)
+ str = NULL;
out:
free(lstr);
free(rstr);
@@ -2113,7 +2119,8 @@ static char *num_to_str(struct tep_event_filter *filter, struct tep_filter_arg *
if (!op)
op = "<=";
- asprintf(&str, "%s %s %s", lstr, op, rstr);
+ if (asprintf(&str, "%s %s %s", lstr, op, rstr) < 0)
+ str = NULL;
break;
default:
@@ -2148,8 +2155,9 @@ static char *str_to_str(struct tep_event_filter *filter, struct tep_filter_arg *
if (!op)
op = "!~";
- asprintf(&str, "%s %s \"%s\"",
- arg->str.field->name, op, arg->str.val);
+ if (asprintf(&str, "%s %s \"%s\"",
+ arg->str.field->name, op, arg->str.val) < 0)
+ str = NULL;
break;
default:
@@ -2165,7 +2173,8 @@ static char *arg_to_str(struct tep_event_filter *filter, struct tep_filter_arg *
switch (arg->type) {
case TEP_FILTER_ARG_BOOLEAN:
- asprintf(&str, arg->boolean.value ? "TRUE" : "FALSE");
+ if (asprintf(&str, arg->boolean.value ? "TRUE" : "FALSE") < 0)
+ str = NULL;
return str;
case TEP_FILTER_ARG_OP:
diff --git a/tools/memory-model/.gitignore b/tools/memory-model/.gitignore
index b1d34c52f3c3..cf4cd66d8fbf 100644
--- a/tools/memory-model/.gitignore
+++ b/tools/memory-model/.gitignore
@@ -1 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0-only
litmus
diff --git a/tools/memory-model/litmus-tests/.gitignore b/tools/memory-model/litmus-tests/.gitignore
index 6e2ddc54152f..c492a1ddad91 100644
--- a/tools/memory-model/litmus-tests/.gitignore
+++ b/tools/memory-model/litmus-tests/.gitignore
@@ -1 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0-only
*.litmus.out
diff --git a/tools/objtool/.gitignore b/tools/objtool/.gitignore
index 914cff12899b..45cefda24c7b 100644
--- a/tools/objtool/.gitignore
+++ b/tools/objtool/.gitignore
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
arch/x86/lib/inat-tables.c
objtool
fixdep
diff --git a/tools/objtool/Build b/tools/objtool/Build
index 8dc4f0848362..b7222d5cc7bc 100644
--- a/tools/objtool/Build
+++ b/tools/objtool/Build
@@ -1,16 +1,22 @@
objtool-y += arch/$(SRCARCH)/
+
+objtool-y += weak.o
+
+objtool-$(SUBCMD_CHECK) += check.o
+objtool-$(SUBCMD_CHECK) += special.o
+objtool-$(SUBCMD_ORC) += check.o
+objtool-$(SUBCMD_ORC) += orc_gen.o
+objtool-$(SUBCMD_ORC) += orc_dump.o
+
objtool-y += builtin-check.o
objtool-y += builtin-orc.o
-objtool-y += check.o
-objtool-y += orc_gen.o
-objtool-y += orc_dump.o
objtool-y += elf.o
-objtool-y += special.o
objtool-y += objtool.o
objtool-y += libstring.o
objtool-y += libctype.o
objtool-y += str_error_r.o
+objtool-y += librbtree.o
CFLAGS += -I$(srctree)/tools/lib
@@ -25,3 +31,7 @@ $(OUTPUT)libctype.o: ../lib/ctype.c FORCE
$(OUTPUT)str_error_r.o: ../lib/str_error_r.c FORCE
$(call rule_mkdir)
$(call if_changed_dep,cc_o_c)
+
+$(OUTPUT)librbtree.o: ../lib/rbtree.c FORCE
+ $(call rule_mkdir)
+ $(call if_changed_dep,cc_o_c)
diff --git a/tools/objtool/Documentation/stack-validation.txt b/tools/objtool/Documentation/stack-validation.txt
index de094670050b..0542e46c7552 100644
--- a/tools/objtool/Documentation/stack-validation.txt
+++ b/tools/objtool/Documentation/stack-validation.txt
@@ -289,6 +289,47 @@ they mean, and suggestions for how to fix them.
might be corrupt due to a gcc bug. For more details, see:
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=70646
+9. file.o: warning: objtool: funcA() call to funcB() with UACCESS enabled
+
+ This means that an unexpected call to a non-whitelisted function exists
+ outside of arch-specific guards.
+ X86: SMAP (stac/clac): __uaccess_begin()/__uaccess_end()
+ ARM: PAN: uaccess_enable()/uaccess_disable()
+
+ These functions should be called to denote a minimal critical section around
+ access to __user variables. See also: https://lwn.net/Articles/517475/
+
+ The intention of the warning is to prevent calls to funcB() from eventually
+ calling schedule(), potentially leaking the AC flags state, and not
+ restoring them correctly.
+
+ It also helps verify that there are no unexpected calls to funcB() which may
+ access user space pages with protections against doing so disabled.
+
+ To fix, either:
+ 1) remove explicit calls to funcB() from funcA().
+ 2) add the correct guards before and after calls to low level functions like
+ __get_user_size()/__put_user_size().
+ 3) add funcB to uaccess_safe_builtin whitelist in tools/objtool/check.c, if
+ funcB obviously does not call schedule(), and is marked notrace (since
+ function tracing inserts additional calls, which is not obvious from the
+ sources).
+
+10. file.o: warning: func()+0x5c: alternative modifies stack
+
+ This means that an alternative includes instructions that modify the
+ stack. The problem is that there is only one ORC unwind table, this means
+ that the ORC unwind entries must be valid for each of the alternatives.
+ The easiest way to enforce this is to ensure alternatives do not contain
+ any ORC entries, which in turn implies the above constraint.
+
+11. file.o: warning: unannotated intra-function call
+
+ This warning means that a direct call is done to a destination which
+ is not at the beginning of a function. If this is a legit call, you
+ can remove this warning by putting the ANNOTATE_INTRA_FUNCTION_CALL
+ directive right before the call.
+
If the error doesn't seem to make sense, it could be a bug in objtool.
Feel free to ask the objtool maintainer for help.
diff --git a/tools/objtool/Makefile b/tools/objtool/Makefile
index ee08aeff30a1..7770edcda3a0 100644
--- a/tools/objtool/Makefile
+++ b/tools/objtool/Makefile
@@ -3,9 +3,15 @@ include ../scripts/Makefile.include
include ../scripts/Makefile.arch
# always use the host compiler
+ifneq ($(LLVM),)
+HOSTAR ?= llvm-ar
+HOSTCC ?= clang
+HOSTLD ?= ld.lld
+else
HOSTAR ?= ar
HOSTCC ?= gcc
HOSTLD ?= ld
+endif
AR = $(HOSTAR)
CC = $(HOSTCC)
LD = $(HOSTLD)
@@ -29,7 +35,8 @@ all: $(OBJTOOL)
INCLUDES := -I$(srctree)/tools/include \
-I$(srctree)/tools/arch/$(HOSTARCH)/include/uapi \
- -I$(srctree)/tools/arch/$(SRCARCH)/include
+ -I$(srctree)/tools/arch/$(SRCARCH)/include \
+ -I$(srctree)/tools/objtool/arch/$(SRCARCH)/include
WARNINGS := $(EXTRA_WARNINGS) -Wno-switch-default -Wno-switch-enum -Wno-packed
CFLAGS := -Werror $(WARNINGS) $(KBUILD_HOSTCFLAGS) -g $(INCLUDES) $(LIBELF_FLAGS)
LDFLAGS += $(LIBELF_LIBS) $(LIBSUBCMD) $(KBUILD_HOSTLDFLAGS)
@@ -39,14 +46,24 @@ elfshdr := $(shell echo '$(pound)include <libelf.h>' | $(CC) $(CFLAGS) -x c -E -
CFLAGS += $(if $(elfshdr),,-DLIBELF_USE_DEPRECATED)
AWK = awk
+
+SUBCMD_CHECK := n
+SUBCMD_ORC := n
+
+ifeq ($(SRCARCH),x86)
+ SUBCMD_CHECK := y
+ SUBCMD_ORC := y
+endif
+
+export SUBCMD_CHECK SUBCMD_ORC
export srctree OUTPUT CFLAGS SRCARCH AWK
include $(srctree)/tools/build/Makefile.include
$(OBJTOOL_IN): fixdep FORCE
+ @$(CONFIG_SHELL) ./sync-check.sh
@$(MAKE) $(build)=objtool
$(OBJTOOL): $(LIBSUBCMD) $(OBJTOOL_IN)
- @$(CONFIG_SHELL) ./sync-check.sh
$(QUIET_LINK)$(CC) $(OBJTOOL_IN) $(LDFLAGS) -o $@
diff --git a/tools/objtool/arch.h b/tools/objtool/arch.h
index ced3765c4f44..eda15a5a285e 100644
--- a/tools/objtool/arch.h
+++ b/tools/objtool/arch.h
@@ -8,9 +8,11 @@
#include <stdbool.h>
#include <linux/list.h>
-#include "elf.h"
+#include "objtool.h"
#include "cfi.h"
+#include <asm/orc_types.h>
+
enum insn_type {
INSN_JUMP_CONDITIONAL,
INSN_JUMP_UNCONDITIONAL,
@@ -20,7 +22,6 @@ enum insn_type {
INSN_CALL_DYNAMIC,
INSN_RETURN,
INSN_CONTEXT_SWITCH,
- INSN_STACK,
INSN_BUG,
INSN_NOP,
INSN_STAC,
@@ -64,15 +65,23 @@ struct op_src {
struct stack_op {
struct op_dest dest;
struct op_src src;
+ struct list_head list;
};
-void arch_initial_func_cfi_state(struct cfi_state *state);
+struct instruction;
+
+void arch_initial_func_cfi_state(struct cfi_init_state *state);
-int arch_decode_instruction(struct elf *elf, struct section *sec,
+int arch_decode_instruction(const struct elf *elf, const struct section *sec,
unsigned long offset, unsigned int maxlen,
unsigned int *len, enum insn_type *type,
- unsigned long *immediate, struct stack_op *op);
+ unsigned long *immediate,
+ struct list_head *ops_list);
bool arch_callee_saved_reg(unsigned char reg);
+unsigned long arch_jump_destination(struct instruction *insn);
+
+unsigned long arch_dest_rela_offset(int addend);
+
#endif /* _ARCH_H */
diff --git a/tools/objtool/arch/x86/decode.c b/tools/objtool/arch/x86/decode.c
index a62e032863a8..4b504fc90bbb 100644
--- a/tools/objtool/arch/x86/decode.c
+++ b/tools/objtool/arch/x86/decode.c
@@ -11,6 +11,7 @@
#include "../../../arch/x86/lib/inat.c"
#include "../../../arch/x86/lib/insn.c"
+#include "../../check.h"
#include "../../elf.h"
#include "../../arch.h"
#include "../../warn.h"
@@ -26,7 +27,7 @@ static unsigned char op_to_cfi_reg[][2] = {
{CFI_DI, CFI_R15},
};
-static int is_x86_64(struct elf *elf)
+static int is_x86_64(const struct elf *elf)
{
switch (elf->ehdr.e_machine) {
case EM_X86_64:
@@ -66,16 +67,34 @@ bool arch_callee_saved_reg(unsigned char reg)
}
}
-int arch_decode_instruction(struct elf *elf, struct section *sec,
+unsigned long arch_dest_rela_offset(int addend)
+{
+ return addend + 4;
+}
+
+unsigned long arch_jump_destination(struct instruction *insn)
+{
+ return insn->offset + insn->len + insn->immediate;
+}
+
+#define ADD_OP(op) \
+ if (!(op = calloc(1, sizeof(*op)))) \
+ return -1; \
+ else for (list_add_tail(&op->list, ops_list); op; op = NULL)
+
+int arch_decode_instruction(const struct elf *elf, const struct section *sec,
unsigned long offset, unsigned int maxlen,
unsigned int *len, enum insn_type *type,
- unsigned long *immediate, struct stack_op *op)
+ unsigned long *immediate,
+ struct list_head *ops_list)
{
struct insn insn;
int x86_64, sign;
unsigned char op1, op2, rex = 0, rex_b = 0, rex_r = 0, rex_w = 0,
rex_x = 0, modrm = 0, modrm_mod = 0, modrm_rm = 0,
modrm_reg = 0, sib = 0;
+ struct stack_op *op = NULL;
+ struct symbol *sym;
x86_64 = is_x86_64(elf);
if (x86_64 == -1)
@@ -85,7 +104,7 @@ int arch_decode_instruction(struct elf *elf, struct section *sec,
insn_get_length(&insn);
if (!insn_complete(&insn)) {
- WARN_FUNC("can't decode instruction", sec, offset);
+ WARN("can't decode instruction at %s:0x%lx", sec->name, offset);
return -1;
}
@@ -123,40 +142,44 @@ int arch_decode_instruction(struct elf *elf, struct section *sec,
if (rex_w && !rex_b && modrm_mod == 3 && modrm_rm == 4) {
/* add/sub reg, %rsp */
- *type = INSN_STACK;
- op->src.type = OP_SRC_ADD;
- op->src.reg = op_to_cfi_reg[modrm_reg][rex_r];
- op->dest.type = OP_DEST_REG;
- op->dest.reg = CFI_SP;
+ ADD_OP(op) {
+ op->src.type = OP_SRC_ADD;
+ op->src.reg = op_to_cfi_reg[modrm_reg][rex_r];
+ op->dest.type = OP_DEST_REG;
+ op->dest.reg = CFI_SP;
+ }
}
break;
case 0x50 ... 0x57:
/* push reg */
- *type = INSN_STACK;
- op->src.type = OP_SRC_REG;
- op->src.reg = op_to_cfi_reg[op1 & 0x7][rex_b];
- op->dest.type = OP_DEST_PUSH;
+ ADD_OP(op) {
+ op->src.type = OP_SRC_REG;
+ op->src.reg = op_to_cfi_reg[op1 & 0x7][rex_b];
+ op->dest.type = OP_DEST_PUSH;
+ }
break;
case 0x58 ... 0x5f:
/* pop reg */
- *type = INSN_STACK;
- op->src.type = OP_SRC_POP;
- op->dest.type = OP_DEST_REG;
- op->dest.reg = op_to_cfi_reg[op1 & 0x7][rex_b];
+ ADD_OP(op) {
+ op->src.type = OP_SRC_POP;
+ op->dest.type = OP_DEST_REG;
+ op->dest.reg = op_to_cfi_reg[op1 & 0x7][rex_b];
+ }
break;
case 0x68:
case 0x6a:
/* push immediate */
- *type = INSN_STACK;
- op->src.type = OP_SRC_CONST;
- op->dest.type = OP_DEST_PUSH;
+ ADD_OP(op) {
+ op->src.type = OP_SRC_CONST;
+ op->dest.type = OP_DEST_PUSH;
+ }
break;
case 0x70 ... 0x7f:
@@ -170,12 +193,13 @@ int arch_decode_instruction(struct elf *elf, struct section *sec,
if (modrm == 0xe4) {
/* and imm, %rsp */
- *type = INSN_STACK;
- op->src.type = OP_SRC_AND;
- op->src.reg = CFI_SP;
- op->src.offset = insn.immediate.value;
- op->dest.type = OP_DEST_REG;
- op->dest.reg = CFI_SP;
+ ADD_OP(op) {
+ op->src.type = OP_SRC_AND;
+ op->src.reg = CFI_SP;
+ op->src.offset = insn.immediate.value;
+ op->dest.type = OP_DEST_REG;
+ op->dest.reg = CFI_SP;
+ }
break;
}
@@ -187,34 +211,37 @@ int arch_decode_instruction(struct elf *elf, struct section *sec,
break;
/* add/sub imm, %rsp */
- *type = INSN_STACK;
- op->src.type = OP_SRC_ADD;
- op->src.reg = CFI_SP;
- op->src.offset = insn.immediate.value * sign;
- op->dest.type = OP_DEST_REG;
- op->dest.reg = CFI_SP;
+ ADD_OP(op) {
+ op->src.type = OP_SRC_ADD;
+ op->src.reg = CFI_SP;
+ op->src.offset = insn.immediate.value * sign;
+ op->dest.type = OP_DEST_REG;
+ op->dest.reg = CFI_SP;
+ }
break;
case 0x89:
if (rex_w && !rex_r && modrm_mod == 3 && modrm_reg == 4) {
/* mov %rsp, reg */
- *type = INSN_STACK;
- op->src.type = OP_SRC_REG;
- op->src.reg = CFI_SP;
- op->dest.type = OP_DEST_REG;
- op->dest.reg = op_to_cfi_reg[modrm_rm][rex_b];
+ ADD_OP(op) {
+ op->src.type = OP_SRC_REG;
+ op->src.reg = CFI_SP;
+ op->dest.type = OP_DEST_REG;
+ op->dest.reg = op_to_cfi_reg[modrm_rm][rex_b];
+ }
break;
}
if (rex_w && !rex_b && modrm_mod == 3 && modrm_rm == 4) {
/* mov reg, %rsp */
- *type = INSN_STACK;
- op->src.type = OP_SRC_REG;
- op->src.reg = op_to_cfi_reg[modrm_reg][rex_r];
- op->dest.type = OP_DEST_REG;
- op->dest.reg = CFI_SP;
+ ADD_OP(op) {
+ op->src.type = OP_SRC_REG;
+ op->src.reg = op_to_cfi_reg[modrm_reg][rex_r];
+ op->dest.type = OP_DEST_REG;
+ op->dest.reg = CFI_SP;
+ }
break;
}
@@ -224,22 +251,24 @@ int arch_decode_instruction(struct elf *elf, struct section *sec,
(modrm_mod == 1 || modrm_mod == 2) && modrm_rm == 5) {
/* mov reg, disp(%rbp) */
- *type = INSN_STACK;
- op->src.type = OP_SRC_REG;
- op->src.reg = op_to_cfi_reg[modrm_reg][rex_r];
- op->dest.type = OP_DEST_REG_INDIRECT;
- op->dest.reg = CFI_BP;
- op->dest.offset = insn.displacement.value;
+ ADD_OP(op) {
+ op->src.type = OP_SRC_REG;
+ op->src.reg = op_to_cfi_reg[modrm_reg][rex_r];
+ op->dest.type = OP_DEST_REG_INDIRECT;
+ op->dest.reg = CFI_BP;
+ op->dest.offset = insn.displacement.value;
+ }
} else if (rex_w && !rex_b && modrm_rm == 4 && sib == 0x24) {
/* mov reg, disp(%rsp) */
- *type = INSN_STACK;
- op->src.type = OP_SRC_REG;
- op->src.reg = op_to_cfi_reg[modrm_reg][rex_r];
- op->dest.type = OP_DEST_REG_INDIRECT;
- op->dest.reg = CFI_SP;
- op->dest.offset = insn.displacement.value;
+ ADD_OP(op) {
+ op->src.type = OP_SRC_REG;
+ op->src.reg = op_to_cfi_reg[modrm_reg][rex_r];
+ op->dest.type = OP_DEST_REG_INDIRECT;
+ op->dest.reg = CFI_SP;
+ op->dest.offset = insn.displacement.value;
+ }
}
break;
@@ -248,23 +277,25 @@ int arch_decode_instruction(struct elf *elf, struct section *sec,
if (rex_w && !rex_b && modrm_mod == 1 && modrm_rm == 5) {
/* mov disp(%rbp), reg */
- *type = INSN_STACK;
- op->src.type = OP_SRC_REG_INDIRECT;
- op->src.reg = CFI_BP;
- op->src.offset = insn.displacement.value;
- op->dest.type = OP_DEST_REG;
- op->dest.reg = op_to_cfi_reg[modrm_reg][rex_r];
+ ADD_OP(op) {
+ op->src.type = OP_SRC_REG_INDIRECT;
+ op->src.reg = CFI_BP;
+ op->src.offset = insn.displacement.value;
+ op->dest.type = OP_DEST_REG;
+ op->dest.reg = op_to_cfi_reg[modrm_reg][rex_r];
+ }
} else if (rex_w && !rex_b && sib == 0x24 &&
modrm_mod != 3 && modrm_rm == 4) {
/* mov disp(%rsp), reg */
- *type = INSN_STACK;
- op->src.type = OP_SRC_REG_INDIRECT;
- op->src.reg = CFI_SP;
- op->src.offset = insn.displacement.value;
- op->dest.type = OP_DEST_REG;
- op->dest.reg = op_to_cfi_reg[modrm_reg][rex_r];
+ ADD_OP(op) {
+ op->src.type = OP_SRC_REG_INDIRECT;
+ op->src.reg = CFI_SP;
+ op->src.offset = insn.displacement.value;
+ op->dest.type = OP_DEST_REG;
+ op->dest.reg = op_to_cfi_reg[modrm_reg][rex_r];
+ }
}
break;
@@ -272,28 +303,30 @@ int arch_decode_instruction(struct elf *elf, struct section *sec,
case 0x8d:
if (sib == 0x24 && rex_w && !rex_b && !rex_x) {
- *type = INSN_STACK;
- if (!insn.displacement.value) {
- /* lea (%rsp), reg */
- op->src.type = OP_SRC_REG;
- } else {
- /* lea disp(%rsp), reg */
- op->src.type = OP_SRC_ADD;
- op->src.offset = insn.displacement.value;
+ ADD_OP(op) {
+ if (!insn.displacement.value) {
+ /* lea (%rsp), reg */
+ op->src.type = OP_SRC_REG;
+ } else {
+ /* lea disp(%rsp), reg */
+ op->src.type = OP_SRC_ADD;
+ op->src.offset = insn.displacement.value;
+ }
+ op->src.reg = CFI_SP;
+ op->dest.type = OP_DEST_REG;
+ op->dest.reg = op_to_cfi_reg[modrm_reg][rex_r];
}
- op->src.reg = CFI_SP;
- op->dest.type = OP_DEST_REG;
- op->dest.reg = op_to_cfi_reg[modrm_reg][rex_r];
} else if (rex == 0x48 && modrm == 0x65) {
/* lea disp(%rbp), %rsp */
- *type = INSN_STACK;
- op->src.type = OP_SRC_ADD;
- op->src.reg = CFI_BP;
- op->src.offset = insn.displacement.value;
- op->dest.type = OP_DEST_REG;
- op->dest.reg = CFI_SP;
+ ADD_OP(op) {
+ op->src.type = OP_SRC_ADD;
+ op->src.reg = CFI_BP;
+ op->src.offset = insn.displacement.value;
+ op->dest.type = OP_DEST_REG;
+ op->dest.reg = CFI_SP;
+ }
} else if (rex == 0x49 && modrm == 0x62 &&
insn.displacement.value == -8) {
@@ -304,12 +337,13 @@ int arch_decode_instruction(struct elf *elf, struct section *sec,
* Restoring rsp back to its original value after a
* stack realignment.
*/
- *type = INSN_STACK;
- op->src.type = OP_SRC_ADD;
- op->src.reg = CFI_R10;
- op->src.offset = -8;
- op->dest.type = OP_DEST_REG;
- op->dest.reg = CFI_SP;
+ ADD_OP(op) {
+ op->src.type = OP_SRC_ADD;
+ op->src.reg = CFI_R10;
+ op->src.offset = -8;
+ op->dest.type = OP_DEST_REG;
+ op->dest.reg = CFI_SP;
+ }
} else if (rex == 0x49 && modrm == 0x65 &&
insn.displacement.value == -16) {
@@ -320,21 +354,23 @@ int arch_decode_instruction(struct elf *elf, struct section *sec,
* Restoring rsp back to its original value after a
* stack realignment.
*/
- *type = INSN_STACK;
- op->src.type = OP_SRC_ADD;
- op->src.reg = CFI_R13;
- op->src.offset = -16;
- op->dest.type = OP_DEST_REG;
- op->dest.reg = CFI_SP;
+ ADD_OP(op) {
+ op->src.type = OP_SRC_ADD;
+ op->src.reg = CFI_R13;
+ op->src.offset = -16;
+ op->dest.type = OP_DEST_REG;
+ op->dest.reg = CFI_SP;
+ }
}
break;
case 0x8f:
/* pop to mem */
- *type = INSN_STACK;
- op->src.type = OP_SRC_POP;
- op->dest.type = OP_DEST_MEM;
+ ADD_OP(op) {
+ op->src.type = OP_SRC_POP;
+ op->dest.type = OP_DEST_MEM;
+ }
break;
case 0x90:
@@ -343,16 +379,18 @@ int arch_decode_instruction(struct elf *elf, struct section *sec,
case 0x9c:
/* pushf */
- *type = INSN_STACK;
- op->src.type = OP_SRC_CONST;
- op->dest.type = OP_DEST_PUSHF;
+ ADD_OP(op) {
+ op->src.type = OP_SRC_CONST;
+ op->dest.type = OP_DEST_PUSHF;
+ }
break;
case 0x9d:
/* popf */
- *type = INSN_STACK;
- op->src.type = OP_SRC_POPF;
- op->dest.type = OP_DEST_MEM;
+ ADD_OP(op) {
+ op->src.type = OP_SRC_POPF;
+ op->dest.type = OP_DEST_MEM;
+ }
break;
case 0x0f:
@@ -387,16 +425,18 @@ int arch_decode_instruction(struct elf *elf, struct section *sec,
} else if (op2 == 0xa0 || op2 == 0xa8) {
/* push fs/gs */
- *type = INSN_STACK;
- op->src.type = OP_SRC_CONST;
- op->dest.type = OP_DEST_PUSH;
+ ADD_OP(op) {
+ op->src.type = OP_SRC_CONST;
+ op->dest.type = OP_DEST_PUSH;
+ }
} else if (op2 == 0xa1 || op2 == 0xa9) {
/* pop fs/gs */
- *type = INSN_STACK;
- op->src.type = OP_SRC_POP;
- op->dest.type = OP_DEST_MEM;
+ ADD_OP(op) {
+ op->src.type = OP_SRC_POP;
+ op->dest.type = OP_DEST_MEM;
+ }
}
break;
@@ -409,8 +449,8 @@ int arch_decode_instruction(struct elf *elf, struct section *sec,
* mov bp, sp
* pop bp
*/
- *type = INSN_STACK;
- op->dest.type = OP_DEST_LEAVE;
+ ADD_OP(op)
+ op->dest.type = OP_DEST_LEAVE;
break;
@@ -429,14 +469,41 @@ int arch_decode_instruction(struct elf *elf, struct section *sec,
*type = INSN_RETURN;
break;
+ case 0xcf: /* iret */
+ /*
+ * Handle sync_core(), which has an IRET to self.
+ * All other IRET are in STT_NONE entry code.
+ */
+ sym = find_symbol_containing(sec, offset);
+ if (sym && sym->type == STT_FUNC) {
+ ADD_OP(op) {
+ /* add $40, %rsp */
+ op->src.type = OP_SRC_ADD;
+ op->src.reg = CFI_SP;
+ op->src.offset = 5*8;
+ op->dest.type = OP_DEST_REG;
+ op->dest.reg = CFI_SP;
+ }
+ break;
+ }
+
+ /* fallthrough */
+
case 0xca: /* retf */
case 0xcb: /* retf */
- case 0xcf: /* iret */
*type = INSN_CONTEXT_SWITCH;
break;
case 0xe8:
*type = INSN_CALL;
+ /*
+ * For the impact on the stack, a CALL behaves like
+ * a PUSH of an immediate value (the return address).
+ */
+ ADD_OP(op) {
+ op->src.type = OP_SRC_CONST;
+ op->dest.type = OP_DEST_PUSH;
+ }
break;
case 0xfc:
@@ -464,9 +531,10 @@ int arch_decode_instruction(struct elf *elf, struct section *sec,
else if (modrm_reg == 6) {
/* push from mem */
- *type = INSN_STACK;
- op->src.type = OP_SRC_CONST;
- op->dest.type = OP_DEST_PUSH;
+ ADD_OP(op) {
+ op->src.type = OP_SRC_CONST;
+ op->dest.type = OP_DEST_PUSH;
+ }
}
break;
@@ -480,7 +548,7 @@ int arch_decode_instruction(struct elf *elf, struct section *sec,
return 0;
}
-void arch_initial_func_cfi_state(struct cfi_state *state)
+void arch_initial_func_cfi_state(struct cfi_init_state *state)
{
int i;
diff --git a/tools/objtool/arch/x86/include/cfi_regs.h b/tools/objtool/arch/x86/include/cfi_regs.h
new file mode 100644
index 000000000000..79bc517efba8
--- /dev/null
+++ b/tools/objtool/arch/x86/include/cfi_regs.h
@@ -0,0 +1,25 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+
+#ifndef _OBJTOOL_CFI_REGS_H
+#define _OBJTOOL_CFI_REGS_H
+
+#define CFI_AX 0
+#define CFI_DX 1
+#define CFI_CX 2
+#define CFI_BX 3
+#define CFI_SI 4
+#define CFI_DI 5
+#define CFI_BP 6
+#define CFI_SP 7
+#define CFI_R8 8
+#define CFI_R9 9
+#define CFI_R10 10
+#define CFI_R11 11
+#define CFI_R12 12
+#define CFI_R13 13
+#define CFI_R14 14
+#define CFI_R15 15
+#define CFI_RA 16
+#define CFI_NUM_REGS 17
+
+#endif /* _OBJTOOL_CFI_REGS_H */
diff --git a/tools/objtool/builtin-check.c b/tools/objtool/builtin-check.c
index c807984a03c1..7a44174967b5 100644
--- a/tools/objtool/builtin-check.c
+++ b/tools/objtool/builtin-check.c
@@ -14,10 +14,11 @@
*/
#include <subcmd/parse-options.h>
+#include <string.h>
#include "builtin.h"
-#include "check.h"
+#include "objtool.h"
-bool no_fp, no_unreachable, retpoline, module, backtrace, uaccess;
+bool no_fp, no_unreachable, retpoline, module, backtrace, uaccess, stats, validate_dup, vmlinux;
static const char * const check_usage[] = {
"objtool check [<options>] file.o",
@@ -31,12 +32,15 @@ const struct option check_options[] = {
OPT_BOOLEAN('m', "module", &module, "Indicates the object will be part of a kernel module"),
OPT_BOOLEAN('b', "backtrace", &backtrace, "unwind on error"),
OPT_BOOLEAN('a', "uaccess", &uaccess, "enable uaccess checking"),
+ OPT_BOOLEAN('s', "stats", &stats, "print statistics"),
+ OPT_BOOLEAN('d', "duplicate", &validate_dup, "duplicate validation for vmlinux.o"),
+ OPT_BOOLEAN('l', "vmlinux", &vmlinux, "vmlinux.o validation"),
OPT_END(),
};
int cmd_check(int argc, const char **argv)
{
- const char *objname;
+ const char *objname, *s;
argc = parse_options(argc, argv, check_options, check_usage, 0);
@@ -45,5 +49,9 @@ int cmd_check(int argc, const char **argv)
objname = argv[0];
+ s = strstr(objname, "vmlinux.o");
+ if (s && !s[9])
+ vmlinux = true;
+
return check(objname, false);
}
diff --git a/tools/objtool/builtin-orc.c b/tools/objtool/builtin-orc.c
index 5f7cc6157edd..b1dfe2007962 100644
--- a/tools/objtool/builtin-orc.c
+++ b/tools/objtool/builtin-orc.c
@@ -14,8 +14,7 @@
#include <string.h>
#include "builtin.h"
-#include "check.h"
-
+#include "objtool.h"
static const char *orc_usage[] = {
"objtool orc generate [<options>] file.o",
diff --git a/tools/objtool/builtin.h b/tools/objtool/builtin.h
index a32736f8d2a4..85c979caa367 100644
--- a/tools/objtool/builtin.h
+++ b/tools/objtool/builtin.h
@@ -8,7 +8,7 @@
#include <subcmd/parse-options.h>
extern const struct option check_options[];
-extern bool no_fp, no_unreachable, retpoline, module, backtrace, uaccess;
+extern bool no_fp, no_unreachable, retpoline, module, backtrace, uaccess, stats, validate_dup, vmlinux;
extern int cmd_check(int argc, const char **argv);
extern int cmd_orc(int argc, const char **argv);
diff --git a/tools/objtool/cfi.h b/tools/objtool/cfi.h
index 4427bf8ed686..c7c59c6a44ee 100644
--- a/tools/objtool/cfi.h
+++ b/tools/objtool/cfi.h
@@ -6,38 +6,33 @@
#ifndef _OBJTOOL_CFI_H
#define _OBJTOOL_CFI_H
+#include "cfi_regs.h"
+
#define CFI_UNDEFINED -1
#define CFI_CFA -2
#define CFI_SP_INDIRECT -3
#define CFI_BP_INDIRECT -4
-#define CFI_AX 0
-#define CFI_DX 1
-#define CFI_CX 2
-#define CFI_BX 3
-#define CFI_SI 4
-#define CFI_DI 5
-#define CFI_BP 6
-#define CFI_SP 7
-#define CFI_R8 8
-#define CFI_R9 9
-#define CFI_R10 10
-#define CFI_R11 11
-#define CFI_R12 12
-#define CFI_R13 13
-#define CFI_R14 14
-#define CFI_R15 15
-#define CFI_RA 16
-#define CFI_NUM_REGS 17
-
struct cfi_reg {
int base;
int offset;
};
-struct cfi_state {
+struct cfi_init_state {
+ struct cfi_reg regs[CFI_NUM_REGS];
struct cfi_reg cfa;
+};
+
+struct cfi_state {
struct cfi_reg regs[CFI_NUM_REGS];
+ struct cfi_reg vals[CFI_NUM_REGS];
+ struct cfi_reg cfa;
+ int stack_size;
+ int drap_reg, drap_offset;
+ unsigned char type;
+ bool bp_scratch;
+ bool drap;
+ bool end;
};
#endif /* _OBJTOOL_CFI_H */
diff --git a/tools/objtool/check.c b/tools/objtool/check.c
index 4768d91c6d68..63d65a702900 100644
--- a/tools/objtool/check.c
+++ b/tools/objtool/check.c
@@ -7,10 +7,10 @@
#include <stdlib.h>
#include "builtin.h"
+#include "cfi.h"
+#include "arch.h"
#include "check.h"
-#include "elf.h"
#include "special.h"
-#include "arch.h"
#include "warn.h"
#include <linux/hashtable.h>
@@ -27,16 +27,17 @@ struct alternative {
};
const char *objname;
-struct cfi_state initial_func_cfi;
+struct cfi_init_state initial_func_cfi;
struct instruction *find_insn(struct objtool_file *file,
struct section *sec, unsigned long offset)
{
struct instruction *insn;
- hash_for_each_possible(file->insn_hash, insn, hash, offset)
+ hash_for_each_possible(file->insn_hash, insn, hash, sec_offset_hash(sec, offset)) {
if (insn->sec == sec && insn->offset == offset)
return insn;
+ }
return NULL;
}
@@ -72,22 +73,33 @@ static struct instruction *next_insn_same_func(struct objtool_file *file,
return find_insn(file, func->cfunc->sec, func->cfunc->offset);
}
-#define func_for_each_insn_all(file, func, insn) \
+static struct instruction *prev_insn_same_sym(struct objtool_file *file,
+ struct instruction *insn)
+{
+ struct instruction *prev = list_prev_entry(insn, list);
+
+ if (&prev->list != &file->insn_list && prev->func == insn->func)
+ return prev;
+
+ return NULL;
+}
+
+#define func_for_each_insn(file, func, insn) \
for (insn = find_insn(file, func->sec, func->offset); \
insn; \
insn = next_insn_same_func(file, insn))
-#define func_for_each_insn(file, func, insn) \
- for (insn = find_insn(file, func->sec, func->offset); \
+#define sym_for_each_insn(file, sym, insn) \
+ for (insn = find_insn(file, sym->sec, sym->offset); \
insn && &insn->list != &file->insn_list && \
- insn->sec == func->sec && \
- insn->offset < func->offset + func->len; \
+ insn->sec == sym->sec && \
+ insn->offset < sym->offset + sym->len; \
insn = list_next_entry(insn, list))
-#define func_for_each_insn_continue_reverse(file, func, insn) \
+#define sym_for_each_insn_continue_reverse(file, sym, insn) \
for (insn = list_prev_entry(insn, list); \
&insn->list != &file->insn_list && \
- insn->sec == func->sec && insn->offset >= func->offset; \
+ insn->sec == sym->sec && insn->offset >= sym->offset; \
insn = list_prev_entry(insn, list))
#define sec_for_each_insn_from(file, insn) \
@@ -97,14 +109,19 @@ static struct instruction *next_insn_same_func(struct objtool_file *file,
for (insn = next_insn_same_sec(file, insn); insn; \
insn = next_insn_same_sec(file, insn))
+static bool is_static_jump(struct instruction *insn)
+{
+ return insn->type == INSN_JUMP_CONDITIONAL ||
+ insn->type == INSN_JUMP_UNCONDITIONAL;
+}
+
static bool is_sibling_call(struct instruction *insn)
{
/* An indirect jump is either a sibling call or a jump to a table. */
if (insn->type == INSN_JUMP_DYNAMIC)
return list_empty(&insn->alts);
- if (insn->type != INSN_JUMP_CONDITIONAL &&
- insn->type != INSN_JUMP_UNCONDITIONAL)
+ if (!is_static_jump(insn))
return false;
/* add_jump_destinations() sets insn->call_dest for sibling calls. */
@@ -165,7 +182,7 @@ static bool __dead_end_function(struct objtool_file *file, struct symbol *func,
if (!insn->func)
return false;
- func_for_each_insn_all(file, func, insn) {
+ func_for_each_insn(file, func, insn) {
empty = false;
if (insn->type == INSN_RETURN)
@@ -180,7 +197,7 @@ static bool __dead_end_function(struct objtool_file *file, struct symbol *func,
* case, the function's dead-end status depends on whether the target
* of the sibling call returns.
*/
- func_for_each_insn_all(file, func, insn) {
+ func_for_each_insn(file, func, insn) {
if (is_sibling_call(insn)) {
struct instruction *dest = insn->jump_dest;
@@ -210,18 +227,31 @@ static bool dead_end_function(struct objtool_file *file, struct symbol *func)
return __dead_end_function(file, func, 0);
}
-static void clear_insn_state(struct insn_state *state)
+static void init_cfi_state(struct cfi_state *cfi)
{
int i;
- memset(state, 0, sizeof(*state));
- state->cfa.base = CFI_UNDEFINED;
for (i = 0; i < CFI_NUM_REGS; i++) {
- state->regs[i].base = CFI_UNDEFINED;
- state->vals[i].base = CFI_UNDEFINED;
+ cfi->regs[i].base = CFI_UNDEFINED;
+ cfi->vals[i].base = CFI_UNDEFINED;
}
- state->drap_reg = CFI_UNDEFINED;
- state->drap_offset = -1;
+ cfi->cfa.base = CFI_UNDEFINED;
+ cfi->drap_reg = CFI_UNDEFINED;
+ cfi->drap_offset = -1;
+}
+
+static void init_insn_state(struct insn_state *state, struct section *sec)
+{
+ memset(state, 0, sizeof(*state));
+ init_cfi_state(&state->cfi);
+
+ /*
+ * We need the full vmlinux for noinstr validation, otherwise we can
+ * not correctly determine insn->call_dest->sec (external symbols do
+ * not have a section).
+ */
+ if (vmlinux && sec)
+ state->noinstr = sec->noinstr;
}
/*
@@ -234,6 +264,7 @@ static int decode_instructions(struct objtool_file *file)
struct symbol *func;
unsigned long offset;
struct instruction *insn;
+ unsigned long nr_insns = 0;
int ret;
for_each_sec(file, sec) {
@@ -246,6 +277,10 @@ static int decode_instructions(struct objtool_file *file)
strncmp(sec->name, ".discard.", 9))
sec->text = true;
+ if (!strcmp(sec->name, ".noinstr.text") ||
+ !strcmp(sec->name, ".entry.text"))
+ sec->noinstr = true;
+
for (offset = 0; offset < sec->len; offset += insn->len) {
insn = malloc(sizeof(*insn));
if (!insn) {
@@ -254,7 +289,8 @@ static int decode_instructions(struct objtool_file *file)
}
memset(insn, 0, sizeof(*insn));
INIT_LIST_HEAD(&insn->alts);
- clear_insn_state(&insn->state);
+ INIT_LIST_HEAD(&insn->stack_ops);
+ init_cfi_state(&insn->cfi);
insn->sec = sec;
insn->offset = offset;
@@ -263,12 +299,13 @@ static int decode_instructions(struct objtool_file *file)
sec->len - offset,
&insn->len, &insn->type,
&insn->immediate,
- &insn->stack_op);
+ &insn->stack_ops);
if (ret)
goto err;
- hash_add(file->insn_hash, &insn->hash, insn->offset);
+ hash_add(file->insn_hash, &insn->hash, sec_offset_hash(sec, insn->offset));
list_add_tail(&insn->list, &file->insn_list);
+ nr_insns++;
}
list_for_each_entry(func, &sec->symbol_list, list) {
@@ -281,11 +318,14 @@ static int decode_instructions(struct objtool_file *file)
return -1;
}
- func_for_each_insn(file, func, insn)
+ sym_for_each_insn(file, func, insn)
insn->func = func;
}
}
+ if (stats)
+ printf("nr_insns: %lu\n", nr_insns);
+
return 0;
err:
@@ -293,6 +333,19 @@ err:
return ret;
}
+static struct instruction *find_last_insn(struct objtool_file *file,
+ struct section *sec)
+{
+ struct instruction *insn = NULL;
+ unsigned int offset;
+ unsigned int end = (sec->len > 10) ? sec->len - 10 : 0;
+
+ for (offset = sec->len - 1; offset >= end && !insn; offset--)
+ insn = find_insn(file, sec, offset);
+
+ return insn;
+}
+
/*
* Mark "ud2" instructions and manually annotated dead ends.
*/
@@ -301,7 +354,6 @@ static int add_dead_ends(struct objtool_file *file)
struct section *sec;
struct rela *rela;
struct instruction *insn;
- bool found;
/*
* By default, "ud2" is a dead end unless otherwise annotated, because
@@ -327,15 +379,8 @@ static int add_dead_ends(struct objtool_file *file)
if (insn)
insn = list_prev_entry(insn, list);
else if (rela->addend == rela->sym->sec->len) {
- found = false;
- list_for_each_entry_reverse(insn, &file->insn_list, list) {
- if (insn->sec == rela->sym->sec) {
- found = true;
- break;
- }
- }
-
- if (!found) {
+ insn = find_last_insn(file, rela->sym->sec);
+ if (!insn) {
WARN("can't find unreachable insn at %s+0x%x",
rela->sym->sec->name, rela->addend);
return -1;
@@ -369,15 +414,8 @@ reachable:
if (insn)
insn = list_prev_entry(insn, list);
else if (rela->addend == rela->sym->sec->len) {
- found = false;
- list_for_each_entry_reverse(insn, &file->insn_list, list) {
- if (insn->sec == rela->sym->sec) {
- found = true;
- break;
- }
- }
-
- if (!found) {
+ insn = find_last_insn(file, rela->sym->sec);
+ if (!insn) {
WARN("can't find reachable insn at %s+0x%x",
rela->sym->sec->name, rela->addend);
return -1;
@@ -415,8 +453,8 @@ static void add_ignores(struct objtool_file *file)
break;
case STT_SECTION:
- func = find_symbol_by_offset(rela->sym->sec, rela->addend);
- if (!func || func->type != STT_FUNC)
+ func = find_func_by_offset(rela->sym->sec, rela->addend);
+ if (!func)
continue;
break;
@@ -425,7 +463,7 @@ static void add_ignores(struct objtool_file *file)
continue;
}
- func_for_each_insn_all(file, func, insn)
+ func_for_each_insn(file, func, insn)
insn->ignore = true;
}
}
@@ -469,6 +507,7 @@ static const char *uaccess_safe_builtin[] = {
"__asan_report_store16_noabort",
/* KCOV */
"write_comp_data",
+ "check_kcov_mode",
"__sanitizer_cov_trace_pc",
"__sanitizer_cov_trace_const_cmp1",
"__sanitizer_cov_trace_const_cmp2",
@@ -478,6 +517,7 @@ static const char *uaccess_safe_builtin[] = {
"__sanitizer_cov_trace_cmp2",
"__sanitizer_cov_trace_cmp4",
"__sanitizer_cov_trace_cmp8",
+ "__sanitizer_cov_trace_switch",
/* UBSAN */
"ubsan_type_mismatch_common",
"__ubsan_handle_type_mismatch",
@@ -553,24 +593,24 @@ static int add_jump_destinations(struct objtool_file *file)
unsigned long dest_off;
for_each_insn(file, insn) {
- if (insn->type != INSN_JUMP_CONDITIONAL &&
- insn->type != INSN_JUMP_UNCONDITIONAL)
+ if (!is_static_jump(insn))
continue;
if (insn->ignore || insn->offset == FAKE_JUMP_OFFSET)
continue;
- rela = find_rela_by_dest_range(insn->sec, insn->offset,
- insn->len);
+ rela = find_rela_by_dest_range(file->elf, insn->sec,
+ insn->offset, insn->len);
if (!rela) {
dest_sec = insn->sec;
- dest_off = insn->offset + insn->len + insn->immediate;
+ dest_off = arch_jump_destination(insn);
} else if (rela->sym->type == STT_SECTION) {
dest_sec = rela->sym->sec;
- dest_off = rela->addend + 4;
+ dest_off = arch_dest_rela_offset(rela->addend);
} else if (rela->sym->sec->idx) {
dest_sec = rela->sym->sec;
- dest_off = rela->sym->sym.st_value + rela->addend + 4;
+ dest_off = rela->sym->sym.st_value +
+ arch_dest_rela_offset(rela->addend);
} else if (strstr(rela->sym->name, "_indirect_thunk_")) {
/*
* Retpoline jumps are really dynamic jumps in
@@ -644,6 +684,16 @@ static int add_jump_destinations(struct objtool_file *file)
return 0;
}
+static void remove_insn_ops(struct instruction *insn)
+{
+ struct stack_op *op, *tmp;
+
+ list_for_each_entry_safe(op, tmp, &insn->stack_ops, list) {
+ list_del(&op->list);
+ free(op);
+ }
+}
+
/*
* Find the destination instructions for all calls.
*/
@@ -657,34 +707,50 @@ static int add_call_destinations(struct objtool_file *file)
if (insn->type != INSN_CALL)
continue;
- rela = find_rela_by_dest_range(insn->sec, insn->offset,
- insn->len);
+ rela = find_rela_by_dest_range(file->elf, insn->sec,
+ insn->offset, insn->len);
if (!rela) {
- dest_off = insn->offset + insn->len + insn->immediate;
- insn->call_dest = find_symbol_by_offset(insn->sec,
- dest_off);
+ dest_off = arch_jump_destination(insn);
+ insn->call_dest = find_func_by_offset(insn->sec, dest_off);
+ if (!insn->call_dest)
+ insn->call_dest = find_symbol_by_offset(insn->sec, dest_off);
+
+ if (insn->ignore)
+ continue;
- if (!insn->call_dest && !insn->ignore) {
- WARN_FUNC("unsupported intra-function call",
+ if (!insn->call_dest) {
+ WARN_FUNC("unannotated intra-function call", insn->sec, insn->offset);
+ return -1;
+ }
+
+ if (insn->func && insn->call_dest->type != STT_FUNC) {
+ WARN_FUNC("unsupported call to non-function",
insn->sec, insn->offset);
- if (retpoline)
- WARN("If this is a retpoline, please patch it in with alternatives and annotate it with ANNOTATE_NOSPEC_ALTERNATIVE.");
return -1;
}
} else if (rela->sym->type == STT_SECTION) {
- insn->call_dest = find_symbol_by_offset(rela->sym->sec,
- rela->addend+4);
- if (!insn->call_dest ||
- insn->call_dest->type != STT_FUNC) {
- WARN_FUNC("can't find call dest symbol at %s+0x%x",
+ dest_off = arch_dest_rela_offset(rela->addend);
+ insn->call_dest = find_func_by_offset(rela->sym->sec,
+ dest_off);
+ if (!insn->call_dest) {
+ WARN_FUNC("can't find call dest symbol at %s+0x%lx",
insn->sec, insn->offset,
rela->sym->sec->name,
- rela->addend + 4);
+ dest_off);
return -1;
}
} else
insn->call_dest = rela->sym;
+
+ /*
+ * Whatever stack impact regular CALLs have, should be undone
+ * by the RETURN of the called function.
+ *
+ * Annotated intra-function calls retain the stack_ops but
+ * are converted to JUMP, see read_intra_function_calls().
+ */
+ remove_insn_ops(insn);
}
return 0;
@@ -712,7 +778,9 @@ static int handle_group_alt(struct objtool_file *file,
struct instruction *orig_insn,
struct instruction **new_insn)
{
+ static unsigned int alt_group_next_index = 1;
struct instruction *last_orig_insn, *last_new_insn, *insn, *fake_jump = NULL;
+ unsigned int alt_group = alt_group_next_index++;
unsigned long dest_off;
last_orig_insn = NULL;
@@ -721,7 +789,7 @@ static int handle_group_alt(struct objtool_file *file,
if (insn->offset >= special_alt->orig_off + special_alt->orig_len)
break;
- insn->alt_group = true;
+ insn->alt_group = alt_group;
last_orig_insn = insn;
}
@@ -733,7 +801,8 @@ static int handle_group_alt(struct objtool_file *file,
}
memset(fake_jump, 0, sizeof(*fake_jump));
INIT_LIST_HEAD(&fake_jump->alts);
- clear_insn_state(&fake_jump->state);
+ INIT_LIST_HEAD(&fake_jump->stack_ops);
+ init_cfi_state(&fake_jump->cfi);
fake_jump->sec = special_alt->new_sec;
fake_jump->offset = FAKE_JUMP_OFFSET;
@@ -754,6 +823,7 @@ static int handle_group_alt(struct objtool_file *file,
}
last_new_insn = NULL;
+ alt_group = alt_group_next_index++;
insn = *new_insn;
sec_for_each_insn_from(file, insn) {
if (insn->offset >= special_alt->new_off + special_alt->new_len)
@@ -763,15 +833,36 @@ static int handle_group_alt(struct objtool_file *file,
insn->ignore = orig_insn->ignore_alts;
insn->func = orig_insn->func;
+ insn->alt_group = alt_group;
+
+ /*
+ * Since alternative replacement code is copy/pasted by the
+ * kernel after applying relocations, generally such code can't
+ * have relative-address relocation references to outside the
+ * .altinstr_replacement section, unless the arch's
+ * alternatives code can adjust the relative offsets
+ * accordingly.
+ *
+ * The x86 alternatives code adjusts the offsets only when it
+ * encounters a branch instruction at the very beginning of the
+ * replacement group.
+ */
+ if ((insn->offset != special_alt->new_off ||
+ (insn->type != INSN_CALL && !is_static_jump(insn))) &&
+ find_rela_by_dest_range(file->elf, insn->sec, insn->offset, insn->len)) {
- if (insn->type != INSN_JUMP_CONDITIONAL &&
- insn->type != INSN_JUMP_UNCONDITIONAL)
+ WARN_FUNC("unsupported relocation in alternatives section",
+ insn->sec, insn->offset);
+ return -1;
+ }
+
+ if (!is_static_jump(insn))
continue;
if (!insn->immediate)
continue;
- dest_off = insn->offset + insn->len + insn->immediate;
+ dest_off = arch_jump_destination(insn);
if (dest_off == special_alt->new_off + special_alt->new_len) {
if (!fake_jump) {
WARN("%s: alternative jump to end of section",
@@ -866,6 +957,12 @@ static int add_special_section_alts(struct objtool_file *file)
}
if (special_alt->group) {
+ if (!special_alt->orig_len) {
+ WARN_FUNC("empty alternative entry",
+ orig_insn->sec, orig_insn->offset);
+ continue;
+ }
+
ret = handle_group_alt(file, special_alt, orig_insn,
&new_insn);
if (ret)
@@ -1001,7 +1098,7 @@ static struct rela *find_jump_table(struct objtool_file *file,
struct instruction *insn)
{
struct rela *text_rela, *table_rela;
- struct instruction *orig_insn = insn;
+ struct instruction *dest_insn, *orig_insn = insn;
struct section *table_sec;
unsigned long table_offset;
@@ -1011,11 +1108,8 @@ static struct rela *find_jump_table(struct objtool_file *file,
* it.
*/
for (;
- &insn->list != &file->insn_list &&
- insn->sec == func->sec &&
- insn->offset >= func->offset;
-
- insn = insn->first_jump_src ?: list_prev_entry(insn, list)) {
+ insn && insn->func && insn->func->pfunc == func;
+ insn = insn->first_jump_src ?: prev_insn_same_sym(file, insn)) {
if (insn != orig_insn && insn->type == INSN_JUMP_DYNAMIC)
break;
@@ -1028,8 +1122,8 @@ static struct rela *find_jump_table(struct objtool_file *file,
break;
/* look for a relocation which references .rodata */
- text_rela = find_rela_by_dest_range(insn->sec, insn->offset,
- insn->len);
+ text_rela = find_rela_by_dest_range(file->elf, insn->sec,
+ insn->offset, insn->len);
if (!text_rela || text_rela->sym->type != STT_SECTION ||
!text_rela->sym->sec->rodata)
continue;
@@ -1053,10 +1147,17 @@ static struct rela *find_jump_table(struct objtool_file *file,
strcmp(table_sec->name, C_JUMP_TABLE_SECTION))
continue;
- /* Each table entry has a rela associated with it. */
- table_rela = find_rela_by_dest(table_sec, table_offset);
+ /*
+ * Each table entry has a rela associated with it. The rela
+ * should reference text in the same function as the original
+ * instruction.
+ */
+ table_rela = find_rela_by_dest(file->elf, table_sec, table_offset);
if (!table_rela)
continue;
+ dest_insn = find_insn(file, table_rela->sym->sec, table_rela->addend);
+ if (!dest_insn || !dest_insn->func || dest_insn->func->pfunc != func)
+ continue;
/*
* Use of RIP-relative switch jumps is quite rare, and
@@ -1082,7 +1183,7 @@ static void mark_func_jump_tables(struct objtool_file *file,
struct instruction *insn, *last = NULL;
struct rela *rela;
- func_for_each_insn_all(file, func, insn) {
+ func_for_each_insn(file, func, insn) {
if (!last)
last = insn;
@@ -1117,7 +1218,7 @@ static int add_func_jump_tables(struct objtool_file *file,
struct instruction *insn;
int ret;
- func_for_each_insn_all(file, func, insn) {
+ func_for_each_insn(file, func, insn) {
if (!insn->jump_table)
continue;
@@ -1187,7 +1288,7 @@ static int read_unwind_hints(struct objtool_file *file)
for (i = 0; i < sec->len / sizeof(struct unwind_hint); i++) {
hint = (struct unwind_hint *)sec->data->d_buf + i;
- rela = find_rela_by_dest(sec, i * sizeof(*hint));
+ rela = find_rela_by_dest(file->elf, sec, i * sizeof(*hint));
if (!rela) {
WARN("can't find rela for unwind_hints[%d]", i);
return -1;
@@ -1199,15 +1300,10 @@ static int read_unwind_hints(struct objtool_file *file)
return -1;
}
- cfa = &insn->state.cfa;
+ cfa = &insn->cfi.cfa;
- if (hint->type == UNWIND_HINT_TYPE_SAVE) {
- insn->save = true;
- continue;
-
- } else if (hint->type == UNWIND_HINT_TYPE_RESTORE) {
- insn->restore = true;
- insn->hint = true;
+ if (hint->type == UNWIND_HINT_TYPE_RET_OFFSET) {
+ insn->ret_offset = hint->sp_offset;
continue;
}
@@ -1245,8 +1341,8 @@ static int read_unwind_hints(struct objtool_file *file)
}
cfa->offset = hint->sp_offset;
- insn->state.type = hint->type;
- insn->state.end = hint->end;
+ insn->cfi.type = hint->type;
+ insn->cfi.end = hint->end;
}
return 0;
@@ -1287,6 +1383,104 @@ static int read_retpoline_hints(struct objtool_file *file)
return 0;
}
+static int read_instr_hints(struct objtool_file *file)
+{
+ struct section *sec;
+ struct instruction *insn;
+ struct rela *rela;
+
+ sec = find_section_by_name(file->elf, ".rela.discard.instr_end");
+ if (!sec)
+ return 0;
+
+ list_for_each_entry(rela, &sec->rela_list, list) {
+ if (rela->sym->type != STT_SECTION) {
+ WARN("unexpected relocation symbol type in %s", sec->name);
+ return -1;
+ }
+
+ insn = find_insn(file, rela->sym->sec, rela->addend);
+ if (!insn) {
+ WARN("bad .discard.instr_end entry");
+ return -1;
+ }
+
+ insn->instr--;
+ }
+
+ sec = find_section_by_name(file->elf, ".rela.discard.instr_begin");
+ if (!sec)
+ return 0;
+
+ list_for_each_entry(rela, &sec->rela_list, list) {
+ if (rela->sym->type != STT_SECTION) {
+ WARN("unexpected relocation symbol type in %s", sec->name);
+ return -1;
+ }
+
+ insn = find_insn(file, rela->sym->sec, rela->addend);
+ if (!insn) {
+ WARN("bad .discard.instr_begin entry");
+ return -1;
+ }
+
+ insn->instr++;
+ }
+
+ return 0;
+}
+
+static int read_intra_function_calls(struct objtool_file *file)
+{
+ struct instruction *insn;
+ struct section *sec;
+ struct rela *rela;
+
+ sec = find_section_by_name(file->elf, ".rela.discard.intra_function_calls");
+ if (!sec)
+ return 0;
+
+ list_for_each_entry(rela, &sec->rela_list, list) {
+ unsigned long dest_off;
+
+ if (rela->sym->type != STT_SECTION) {
+ WARN("unexpected relocation symbol type in %s",
+ sec->name);
+ return -1;
+ }
+
+ insn = find_insn(file, rela->sym->sec, rela->addend);
+ if (!insn) {
+ WARN("bad .discard.intra_function_call entry");
+ return -1;
+ }
+
+ if (insn->type != INSN_CALL) {
+ WARN_FUNC("intra_function_call not a direct call",
+ insn->sec, insn->offset);
+ return -1;
+ }
+
+ /*
+ * Treat intra-function CALLs as JMPs, but with a stack_op.
+ * See add_call_destinations(), which strips stack_ops from
+ * normal CALLs.
+ */
+ insn->type = INSN_JUMP_UNCONDITIONAL;
+
+ dest_off = insn->offset + insn->len + insn->immediate;
+ insn->jump_dest = find_insn(file, insn->sec, dest_off);
+ if (!insn->jump_dest) {
+ WARN_FUNC("can't find call dest at %s+0x%lx",
+ insn->sec, insn->offset,
+ insn->sec->name, dest_off);
+ return -1;
+ }
+ }
+
+ return 0;
+}
+
static void mark_rodata(struct objtool_file *file)
{
struct section *sec;
@@ -1303,8 +1497,8 @@ static void mark_rodata(struct objtool_file *file)
* .rodata.str1.* sections are ignored; they don't contain jump tables.
*/
for_each_sec(file, sec) {
- if ((!strncmp(sec->name, ".rodata", 7) && !strstr(sec->name, ".str1.")) ||
- !strcmp(sec->name, C_JUMP_TABLE_SECTION)) {
+ if (!strncmp(sec->name, ".rodata", 7) &&
+ !strstr(sec->name, ".str1.")) {
sec->rodata = true;
found = true;
}
@@ -1342,6 +1536,10 @@ static int decode_sections(struct objtool_file *file)
if (ret)
return ret;
+ ret = read_intra_function_calls(file);
+ if (ret)
+ return ret;
+
ret = add_call_destinations(file);
if (ret)
return ret;
@@ -1358,12 +1556,16 @@ static int decode_sections(struct objtool_file *file)
if (ret)
return ret;
+ ret = read_instr_hints(file);
+ if (ret)
+ return ret;
+
return 0;
}
static bool is_fentry_call(struct instruction *insn)
{
- if (insn->type == INSN_CALL &&
+ if (insn->type == INSN_CALL && insn->call_dest &&
insn->call_dest->type == STT_NOTYPE &&
!strcmp(insn->call_dest->name, "__fentry__"))
return true;
@@ -1371,42 +1573,59 @@ static bool is_fentry_call(struct instruction *insn)
return false;
}
-static bool has_modified_stack_frame(struct insn_state *state)
+static bool has_modified_stack_frame(struct instruction *insn, struct insn_state *state)
{
+ u8 ret_offset = insn->ret_offset;
+ struct cfi_state *cfi = &state->cfi;
int i;
- if (state->cfa.base != initial_func_cfi.cfa.base ||
- state->cfa.offset != initial_func_cfi.cfa.offset ||
- state->stack_size != initial_func_cfi.cfa.offset ||
- state->drap)
+ if (cfi->cfa.base != initial_func_cfi.cfa.base || cfi->drap)
return true;
- for (i = 0; i < CFI_NUM_REGS; i++)
- if (state->regs[i].base != initial_func_cfi.regs[i].base ||
- state->regs[i].offset != initial_func_cfi.regs[i].offset)
+ if (cfi->cfa.offset != initial_func_cfi.cfa.offset + ret_offset)
+ return true;
+
+ if (cfi->stack_size != initial_func_cfi.cfa.offset + ret_offset)
+ return true;
+
+ /*
+ * If there is a ret offset hint then don't check registers
+ * because a callee-saved register might have been pushed on
+ * the stack.
+ */
+ if (ret_offset)
+ return false;
+
+ for (i = 0; i < CFI_NUM_REGS; i++) {
+ if (cfi->regs[i].base != initial_func_cfi.regs[i].base ||
+ cfi->regs[i].offset != initial_func_cfi.regs[i].offset)
return true;
+ }
return false;
}
static bool has_valid_stack_frame(struct insn_state *state)
{
- if (state->cfa.base == CFI_BP && state->regs[CFI_BP].base == CFI_CFA &&
- state->regs[CFI_BP].offset == -16)
+ struct cfi_state *cfi = &state->cfi;
+
+ if (cfi->cfa.base == CFI_BP && cfi->regs[CFI_BP].base == CFI_CFA &&
+ cfi->regs[CFI_BP].offset == -16)
return true;
- if (state->drap && state->regs[CFI_BP].base == CFI_BP)
+ if (cfi->drap && cfi->regs[CFI_BP].base == CFI_BP)
return true;
return false;
}
-static int update_insn_state_regs(struct instruction *insn, struct insn_state *state)
+static int update_cfi_state_regs(struct instruction *insn,
+ struct cfi_state *cfi,
+ struct stack_op *op)
{
- struct cfi_reg *cfa = &state->cfa;
- struct stack_op *op = &insn->stack_op;
+ struct cfi_reg *cfa = &cfi->cfa;
- if (cfa->base != CFI_SP)
+ if (cfa->base != CFI_SP && cfa->base != CFI_SP_INDIRECT)
return 0;
/* push */
@@ -1425,20 +1644,19 @@ static int update_insn_state_regs(struct instruction *insn, struct insn_state *s
return 0;
}
-static void save_reg(struct insn_state *state, unsigned char reg, int base,
- int offset)
+static void save_reg(struct cfi_state *cfi, unsigned char reg, int base, int offset)
{
if (arch_callee_saved_reg(reg) &&
- state->regs[reg].base == CFI_UNDEFINED) {
- state->regs[reg].base = base;
- state->regs[reg].offset = offset;
+ cfi->regs[reg].base == CFI_UNDEFINED) {
+ cfi->regs[reg].base = base;
+ cfi->regs[reg].offset = offset;
}
}
-static void restore_reg(struct insn_state *state, unsigned char reg)
+static void restore_reg(struct cfi_state *cfi, unsigned char reg)
{
- state->regs[reg].base = CFI_UNDEFINED;
- state->regs[reg].offset = 0;
+ cfi->regs[reg].base = initial_func_cfi.regs[reg].base;
+ cfi->regs[reg].offset = initial_func_cfi.regs[reg].offset;
}
/*
@@ -1494,11 +1712,11 @@ static void restore_reg(struct insn_state *state, unsigned char reg)
* 41 5d pop %r13
* c3 retq
*/
-static int update_insn_state(struct instruction *insn, struct insn_state *state)
+static int update_cfi_state(struct instruction *insn, struct cfi_state *cfi,
+ struct stack_op *op)
{
- struct stack_op *op = &insn->stack_op;
- struct cfi_reg *cfa = &state->cfa;
- struct cfi_reg *regs = state->regs;
+ struct cfi_reg *cfa = &cfi->cfa;
+ struct cfi_reg *regs = cfi->regs;
/* stack operations don't make sense with an undefined CFA */
if (cfa->base == CFI_UNDEFINED) {
@@ -1509,8 +1727,8 @@ static int update_insn_state(struct instruction *insn, struct insn_state *state)
return 0;
}
- if (state->type == ORC_TYPE_REGS || state->type == ORC_TYPE_REGS_IRET)
- return update_insn_state_regs(insn, state);
+ if (cfi->type == ORC_TYPE_REGS || cfi->type == ORC_TYPE_REGS_IRET)
+ return update_cfi_state_regs(insn, cfi, op);
switch (op->dest.type) {
@@ -1525,16 +1743,16 @@ static int update_insn_state(struct instruction *insn, struct insn_state *state)
/* mov %rsp, %rbp */
cfa->base = op->dest.reg;
- state->bp_scratch = false;
+ cfi->bp_scratch = false;
}
else if (op->src.reg == CFI_SP &&
- op->dest.reg == CFI_BP && state->drap) {
+ op->dest.reg == CFI_BP && cfi->drap) {
/* drap: mov %rsp, %rbp */
regs[CFI_BP].base = CFI_BP;
- regs[CFI_BP].offset = -state->stack_size;
- state->bp_scratch = false;
+ regs[CFI_BP].offset = -cfi->stack_size;
+ cfi->bp_scratch = false;
}
else if (op->src.reg == CFI_SP && cfa->base == CFI_SP) {
@@ -1549,8 +1767,8 @@ static int update_insn_state(struct instruction *insn, struct insn_state *state)
* ...
* mov %rax, %rsp
*/
- state->vals[op->dest.reg].base = CFI_CFA;
- state->vals[op->dest.reg].offset = -state->stack_size;
+ cfi->vals[op->dest.reg].base = CFI_CFA;
+ cfi->vals[op->dest.reg].offset = -cfi->stack_size;
}
else if (op->src.reg == CFI_BP && op->dest.reg == CFI_SP &&
@@ -1561,14 +1779,14 @@ static int update_insn_state(struct instruction *insn, struct insn_state *state)
*
* Restore the original stack pointer (Clang).
*/
- state->stack_size = -state->regs[CFI_BP].offset;
+ cfi->stack_size = -cfi->regs[CFI_BP].offset;
}
else if (op->dest.reg == cfa->base) {
/* mov %reg, %rsp */
if (cfa->base == CFI_SP &&
- state->vals[op->src.reg].base == CFI_CFA) {
+ cfi->vals[op->src.reg].base == CFI_CFA) {
/*
* This is needed for the rare case
@@ -1578,8 +1796,8 @@ static int update_insn_state(struct instruction *insn, struct insn_state *state)
* ...
* mov %rcx, %rsp
*/
- cfa->offset = -state->vals[op->src.reg].offset;
- state->stack_size = cfa->offset;
+ cfa->offset = -cfi->vals[op->src.reg].offset;
+ cfi->stack_size = cfa->offset;
} else {
cfa->base = CFI_UNDEFINED;
@@ -1593,7 +1811,7 @@ static int update_insn_state(struct instruction *insn, struct insn_state *state)
if (op->dest.reg == CFI_SP && op->src.reg == CFI_SP) {
/* add imm, %rsp */
- state->stack_size -= op->src.offset;
+ cfi->stack_size -= op->src.offset;
if (cfa->base == CFI_SP)
cfa->offset -= op->src.offset;
break;
@@ -1602,14 +1820,14 @@ static int update_insn_state(struct instruction *insn, struct insn_state *state)
if (op->dest.reg == CFI_SP && op->src.reg == CFI_BP) {
/* lea disp(%rbp), %rsp */
- state->stack_size = -(op->src.offset + regs[CFI_BP].offset);
+ cfi->stack_size = -(op->src.offset + regs[CFI_BP].offset);
break;
}
if (op->src.reg == CFI_SP && cfa->base == CFI_SP) {
/* drap: lea disp(%rsp), %drap */
- state->drap_reg = op->dest.reg;
+ cfi->drap_reg = op->dest.reg;
/*
* lea disp(%rsp), %reg
@@ -1621,25 +1839,25 @@ static int update_insn_state(struct instruction *insn, struct insn_state *state)
* ...
* mov %rcx, %rsp
*/
- state->vals[op->dest.reg].base = CFI_CFA;
- state->vals[op->dest.reg].offset = \
- -state->stack_size + op->src.offset;
+ cfi->vals[op->dest.reg].base = CFI_CFA;
+ cfi->vals[op->dest.reg].offset = \
+ -cfi->stack_size + op->src.offset;
break;
}
- if (state->drap && op->dest.reg == CFI_SP &&
- op->src.reg == state->drap_reg) {
+ if (cfi->drap && op->dest.reg == CFI_SP &&
+ op->src.reg == cfi->drap_reg) {
/* drap: lea disp(%drap), %rsp */
cfa->base = CFI_SP;
- cfa->offset = state->stack_size = -op->src.offset;
- state->drap_reg = CFI_UNDEFINED;
- state->drap = false;
+ cfa->offset = cfi->stack_size = -op->src.offset;
+ cfi->drap_reg = CFI_UNDEFINED;
+ cfi->drap = false;
break;
}
- if (op->dest.reg == state->cfa.base) {
+ if (op->dest.reg == cfi->cfa.base) {
WARN_FUNC("unsupported stack register modification",
insn->sec, insn->offset);
return -1;
@@ -1649,18 +1867,18 @@ static int update_insn_state(struct instruction *insn, struct insn_state *state)
case OP_SRC_AND:
if (op->dest.reg != CFI_SP ||
- (state->drap_reg != CFI_UNDEFINED && cfa->base != CFI_SP) ||
- (state->drap_reg == CFI_UNDEFINED && cfa->base != CFI_BP)) {
+ (cfi->drap_reg != CFI_UNDEFINED && cfa->base != CFI_SP) ||
+ (cfi->drap_reg == CFI_UNDEFINED && cfa->base != CFI_BP)) {
WARN_FUNC("unsupported stack pointer realignment",
insn->sec, insn->offset);
return -1;
}
- if (state->drap_reg != CFI_UNDEFINED) {
+ if (cfi->drap_reg != CFI_UNDEFINED) {
/* drap: and imm, %rsp */
- cfa->base = state->drap_reg;
- cfa->offset = state->stack_size = 0;
- state->drap = true;
+ cfa->base = cfi->drap_reg;
+ cfa->offset = cfi->stack_size = 0;
+ cfi->drap = true;
}
/*
@@ -1672,57 +1890,55 @@ static int update_insn_state(struct instruction *insn, struct insn_state *state)
case OP_SRC_POP:
case OP_SRC_POPF:
- if (!state->drap && op->dest.type == OP_DEST_REG &&
- op->dest.reg == cfa->base) {
+ if (!cfi->drap && op->dest.reg == cfa->base) {
/* pop %rbp */
cfa->base = CFI_SP;
}
- if (state->drap && cfa->base == CFI_BP_INDIRECT &&
- op->dest.type == OP_DEST_REG &&
- op->dest.reg == state->drap_reg &&
- state->drap_offset == -state->stack_size) {
+ if (cfi->drap && cfa->base == CFI_BP_INDIRECT &&
+ op->dest.reg == cfi->drap_reg &&
+ cfi->drap_offset == -cfi->stack_size) {
/* drap: pop %drap */
- cfa->base = state->drap_reg;
+ cfa->base = cfi->drap_reg;
cfa->offset = 0;
- state->drap_offset = -1;
+ cfi->drap_offset = -1;
- } else if (regs[op->dest.reg].offset == -state->stack_size) {
+ } else if (regs[op->dest.reg].offset == -cfi->stack_size) {
/* pop %reg */
- restore_reg(state, op->dest.reg);
+ restore_reg(cfi, op->dest.reg);
}
- state->stack_size -= 8;
+ cfi->stack_size -= 8;
if (cfa->base == CFI_SP)
cfa->offset -= 8;
break;
case OP_SRC_REG_INDIRECT:
- if (state->drap && op->src.reg == CFI_BP &&
- op->src.offset == state->drap_offset) {
+ if (cfi->drap && op->src.reg == CFI_BP &&
+ op->src.offset == cfi->drap_offset) {
/* drap: mov disp(%rbp), %drap */
- cfa->base = state->drap_reg;
+ cfa->base = cfi->drap_reg;
cfa->offset = 0;
- state->drap_offset = -1;
+ cfi->drap_offset = -1;
}
- if (state->drap && op->src.reg == CFI_BP &&
+ if (cfi->drap && op->src.reg == CFI_BP &&
op->src.offset == regs[op->dest.reg].offset) {
/* drap: mov disp(%rbp), %reg */
- restore_reg(state, op->dest.reg);
+ restore_reg(cfi, op->dest.reg);
} else if (op->src.reg == cfa->base &&
op->src.offset == regs[op->dest.reg].offset + cfa->offset) {
/* mov disp(%rbp), %reg */
/* mov disp(%rsp), %reg */
- restore_reg(state, op->dest.reg);
+ restore_reg(cfi, op->dest.reg);
}
break;
@@ -1737,78 +1953,78 @@ static int update_insn_state(struct instruction *insn, struct insn_state *state)
case OP_DEST_PUSH:
case OP_DEST_PUSHF:
- state->stack_size += 8;
+ cfi->stack_size += 8;
if (cfa->base == CFI_SP)
cfa->offset += 8;
if (op->src.type != OP_SRC_REG)
break;
- if (state->drap) {
- if (op->src.reg == cfa->base && op->src.reg == state->drap_reg) {
+ if (cfi->drap) {
+ if (op->src.reg == cfa->base && op->src.reg == cfi->drap_reg) {
/* drap: push %drap */
cfa->base = CFI_BP_INDIRECT;
- cfa->offset = -state->stack_size;
+ cfa->offset = -cfi->stack_size;
/* save drap so we know when to restore it */
- state->drap_offset = -state->stack_size;
+ cfi->drap_offset = -cfi->stack_size;
- } else if (op->src.reg == CFI_BP && cfa->base == state->drap_reg) {
+ } else if (op->src.reg == CFI_BP && cfa->base == cfi->drap_reg) {
/* drap: push %rbp */
- state->stack_size = 0;
+ cfi->stack_size = 0;
} else if (regs[op->src.reg].base == CFI_UNDEFINED) {
/* drap: push %reg */
- save_reg(state, op->src.reg, CFI_BP, -state->stack_size);
+ save_reg(cfi, op->src.reg, CFI_BP, -cfi->stack_size);
}
} else {
/* push %reg */
- save_reg(state, op->src.reg, CFI_CFA, -state->stack_size);
+ save_reg(cfi, op->src.reg, CFI_CFA, -cfi->stack_size);
}
/* detect when asm code uses rbp as a scratch register */
if (!no_fp && insn->func && op->src.reg == CFI_BP &&
cfa->base != CFI_BP)
- state->bp_scratch = true;
+ cfi->bp_scratch = true;
break;
case OP_DEST_REG_INDIRECT:
- if (state->drap) {
- if (op->src.reg == cfa->base && op->src.reg == state->drap_reg) {
+ if (cfi->drap) {
+ if (op->src.reg == cfa->base && op->src.reg == cfi->drap_reg) {
/* drap: mov %drap, disp(%rbp) */
cfa->base = CFI_BP_INDIRECT;
cfa->offset = op->dest.offset;
/* save drap offset so we know when to restore it */
- state->drap_offset = op->dest.offset;
+ cfi->drap_offset = op->dest.offset;
}
else if (regs[op->src.reg].base == CFI_UNDEFINED) {
/* drap: mov reg, disp(%rbp) */
- save_reg(state, op->src.reg, CFI_BP, op->dest.offset);
+ save_reg(cfi, op->src.reg, CFI_BP, op->dest.offset);
}
} else if (op->dest.reg == cfa->base) {
/* mov reg, disp(%rbp) */
/* mov reg, disp(%rsp) */
- save_reg(state, op->src.reg, CFI_CFA,
- op->dest.offset - state->cfa.offset);
+ save_reg(cfi, op->src.reg, CFI_CFA,
+ op->dest.offset - cfi->cfa.offset);
}
break;
case OP_DEST_LEAVE:
- if ((!state->drap && cfa->base != CFI_BP) ||
- (state->drap && cfa->base != state->drap_reg)) {
+ if ((!cfi->drap && cfa->base != CFI_BP) ||
+ (cfi->drap && cfa->base != cfi->drap_reg)) {
WARN_FUNC("leave instruction with modified stack frame",
insn->sec, insn->offset);
return -1;
@@ -1816,10 +2032,10 @@ static int update_insn_state(struct instruction *insn, struct insn_state *state)
/* leave (mov %rbp, %rsp; pop %rbp) */
- state->stack_size = -state->regs[CFI_BP].offset - 8;
- restore_reg(state, CFI_BP);
+ cfi->stack_size = -cfi->regs[CFI_BP].offset - 8;
+ restore_reg(cfi, CFI_BP);
- if (!state->drap) {
+ if (!cfi->drap) {
cfa->base = CFI_SP;
cfa->offset -= 8;
}
@@ -1834,7 +2050,7 @@ static int update_insn_state(struct instruction *insn, struct insn_state *state)
}
/* pop mem */
- state->stack_size -= 8;
+ cfi->stack_size -= 8;
if (cfa->base == CFI_SP)
cfa->offset -= 8;
@@ -1849,41 +2065,86 @@ static int update_insn_state(struct instruction *insn, struct insn_state *state)
return 0;
}
-static bool insn_state_match(struct instruction *insn, struct insn_state *state)
+static int handle_insn_ops(struct instruction *insn, struct insn_state *state)
+{
+ struct stack_op *op;
+
+ list_for_each_entry(op, &insn->stack_ops, list) {
+ struct cfi_state old_cfi = state->cfi;
+ int res;
+
+ res = update_cfi_state(insn, &state->cfi, op);
+ if (res)
+ return res;
+
+ if (insn->alt_group && memcmp(&state->cfi, &old_cfi, sizeof(struct cfi_state))) {
+ WARN_FUNC("alternative modifies stack", insn->sec, insn->offset);
+ return -1;
+ }
+
+ if (op->dest.type == OP_DEST_PUSHF) {
+ if (!state->uaccess_stack) {
+ state->uaccess_stack = 1;
+ } else if (state->uaccess_stack >> 31) {
+ WARN_FUNC("PUSHF stack exhausted",
+ insn->sec, insn->offset);
+ return 1;
+ }
+ state->uaccess_stack <<= 1;
+ state->uaccess_stack |= state->uaccess;
+ }
+
+ if (op->src.type == OP_SRC_POPF) {
+ if (state->uaccess_stack) {
+ state->uaccess = state->uaccess_stack & 1;
+ state->uaccess_stack >>= 1;
+ if (state->uaccess_stack == 1)
+ state->uaccess_stack = 0;
+ }
+ }
+ }
+
+ return 0;
+}
+
+static bool insn_cfi_match(struct instruction *insn, struct cfi_state *cfi2)
{
- struct insn_state *state1 = &insn->state, *state2 = state;
+ struct cfi_state *cfi1 = &insn->cfi;
int i;
- if (memcmp(&state1->cfa, &state2->cfa, sizeof(state1->cfa))) {
+ if (memcmp(&cfi1->cfa, &cfi2->cfa, sizeof(cfi1->cfa))) {
+
WARN_FUNC("stack state mismatch: cfa1=%d%+d cfa2=%d%+d",
insn->sec, insn->offset,
- state1->cfa.base, state1->cfa.offset,
- state2->cfa.base, state2->cfa.offset);
+ cfi1->cfa.base, cfi1->cfa.offset,
+ cfi2->cfa.base, cfi2->cfa.offset);
- } else if (memcmp(&state1->regs, &state2->regs, sizeof(state1->regs))) {
+ } else if (memcmp(&cfi1->regs, &cfi2->regs, sizeof(cfi1->regs))) {
for (i = 0; i < CFI_NUM_REGS; i++) {
- if (!memcmp(&state1->regs[i], &state2->regs[i],
+ if (!memcmp(&cfi1->regs[i], &cfi2->regs[i],
sizeof(struct cfi_reg)))
continue;
WARN_FUNC("stack state mismatch: reg1[%d]=%d%+d reg2[%d]=%d%+d",
insn->sec, insn->offset,
- i, state1->regs[i].base, state1->regs[i].offset,
- i, state2->regs[i].base, state2->regs[i].offset);
+ i, cfi1->regs[i].base, cfi1->regs[i].offset,
+ i, cfi2->regs[i].base, cfi2->regs[i].offset);
break;
}
- } else if (state1->type != state2->type) {
+ } else if (cfi1->type != cfi2->type) {
+
WARN_FUNC("stack state mismatch: type1=%d type2=%d",
- insn->sec, insn->offset, state1->type, state2->type);
+ insn->sec, insn->offset, cfi1->type, cfi2->type);
+
+ } else if (cfi1->drap != cfi2->drap ||
+ (cfi1->drap && cfi1->drap_reg != cfi2->drap_reg) ||
+ (cfi1->drap && cfi1->drap_offset != cfi2->drap_offset)) {
- } else if (state1->drap != state2->drap ||
- (state1->drap && state1->drap_reg != state2->drap_reg) ||
- (state1->drap && state1->drap_offset != state2->drap_offset)) {
WARN_FUNC("stack state mismatch: drap1=%d(%d,%d) drap2=%d(%d,%d)",
insn->sec, insn->offset,
- state1->drap, state1->drap_reg, state1->drap_offset,
- state2->drap, state2->drap_reg, state2->drap_offset);
+ cfi1->drap, cfi1->drap_reg, cfi1->drap_offset,
+ cfi2->drap, cfi2->drap_reg, cfi2->drap_offset);
} else
return true;
@@ -1909,6 +2170,13 @@ static inline const char *call_dest_name(struct instruction *insn)
static int validate_call(struct instruction *insn, struct insn_state *state)
{
+ if (state->noinstr && state->instr <= 0 &&
+ (!insn->call_dest || !insn->call_dest->sec->noinstr)) {
+ WARN_FUNC("call to %s() leaves .noinstr.text section",
+ insn->sec, insn->offset, call_dest_name(insn));
+ return 1;
+ }
+
if (state->uaccess && !func_uaccess_safe(insn->call_dest)) {
WARN_FUNC("call to %s() with UACCESS enabled",
insn->sec, insn->offset, call_dest_name(insn));
@@ -1926,7 +2194,7 @@ static int validate_call(struct instruction *insn, struct insn_state *state)
static int validate_sibling_call(struct instruction *insn, struct insn_state *state)
{
- if (has_modified_stack_frame(state)) {
+ if (has_modified_stack_frame(insn, state)) {
WARN_FUNC("sibling call from callable instruction with modified stack frame",
insn->sec, insn->offset);
return 1;
@@ -1935,6 +2203,71 @@ static int validate_sibling_call(struct instruction *insn, struct insn_state *st
return validate_call(insn, state);
}
+static int validate_return(struct symbol *func, struct instruction *insn, struct insn_state *state)
+{
+ if (state->noinstr && state->instr > 0) {
+ WARN_FUNC("return with instrumentation enabled",
+ insn->sec, insn->offset);
+ return 1;
+ }
+
+ if (state->uaccess && !func_uaccess_safe(func)) {
+ WARN_FUNC("return with UACCESS enabled",
+ insn->sec, insn->offset);
+ return 1;
+ }
+
+ if (!state->uaccess && func_uaccess_safe(func)) {
+ WARN_FUNC("return with UACCESS disabled from a UACCESS-safe function",
+ insn->sec, insn->offset);
+ return 1;
+ }
+
+ if (state->df) {
+ WARN_FUNC("return with DF set",
+ insn->sec, insn->offset);
+ return 1;
+ }
+
+ if (func && has_modified_stack_frame(insn, state)) {
+ WARN_FUNC("return with modified stack frame",
+ insn->sec, insn->offset);
+ return 1;
+ }
+
+ if (state->cfi.bp_scratch) {
+ WARN_FUNC("BP used as a scratch register",
+ insn->sec, insn->offset);
+ return 1;
+ }
+
+ return 0;
+}
+
+/*
+ * Alternatives should not contain any ORC entries, this in turn means they
+ * should not contain any CFI ops, which implies all instructions should have
+ * the same same CFI state.
+ *
+ * It is possible to constuct alternatives that have unreachable holes that go
+ * unreported (because they're NOPs), such holes would result in CFI_UNDEFINED
+ * states which then results in ORC entries, which we just said we didn't want.
+ *
+ * Avoid them by copying the CFI entry of the first instruction into the whole
+ * alternative.
+ */
+static void fill_alternative_cfi(struct objtool_file *file, struct instruction *insn)
+{
+ struct instruction *first_insn = insn;
+ int alt_group = insn->alt_group;
+
+ sec_for_each_insn_continue(file, insn) {
+ if (insn->alt_group != alt_group)
+ break;
+ insn->cfi = first_insn->cfi;
+ }
+}
+
/*
* Follow the branch starting at the given instruction, and recursively follow
* any other branches (jumps). Meanwhile, track the frame pointer state at
@@ -1942,23 +2275,16 @@ static int validate_sibling_call(struct instruction *insn, struct insn_state *st
* tools/objtool/Documentation/stack-validation.txt.
*/
static int validate_branch(struct objtool_file *file, struct symbol *func,
- struct instruction *first, struct insn_state state)
+ struct instruction *insn, struct insn_state state)
{
struct alternative *alt;
- struct instruction *insn, *next_insn;
+ struct instruction *next_insn;
struct section *sec;
u8 visited;
int ret;
- insn = first;
sec = insn->sec;
- if (insn->alt_group && list_empty(&insn->alts)) {
- WARN_FUNC("don't know how to handle branch to middle of alternative instruction group",
- sec, insn->offset);
- return 1;
- }
-
while (1) {
next_insn = next_insn_same_sec(file, insn);
@@ -1976,59 +2302,24 @@ static int validate_branch(struct objtool_file *file, struct symbol *func,
visited = 1 << state.uaccess;
if (insn->visited) {
- if (!insn->hint && !insn_state_match(insn, &state))
+ if (!insn->hint && !insn_cfi_match(insn, &state.cfi))
return 1;
if (insn->visited & visited)
return 0;
}
- if (insn->hint) {
- if (insn->restore) {
- struct instruction *save_insn, *i;
-
- i = insn;
- save_insn = NULL;
- func_for_each_insn_continue_reverse(file, func, i) {
- if (i->save) {
- save_insn = i;
- break;
- }
- }
-
- if (!save_insn) {
- WARN_FUNC("no corresponding CFI save for CFI restore",
- sec, insn->offset);
- return 1;
- }
-
- if (!save_insn->visited) {
- /*
- * Oops, no state to copy yet.
- * Hopefully we can reach this
- * instruction from another branch
- * after the save insn has been
- * visited.
- */
- if (insn == first)
- return 0;
+ if (state.noinstr)
+ state.instr += insn->instr;
- WARN_FUNC("objtool isn't smart enough to handle this CFI save/restore combo",
- sec, insn->offset);
- return 1;
- }
-
- insn->state = save_insn->state;
- }
-
- state = insn->state;
-
- } else
- insn->state = state;
+ if (insn->hint)
+ state.cfi = insn->cfi;
+ else
+ insn->cfi = state.cfi;
insn->visited |= visited;
- if (!insn->ignore_alts) {
+ if (!insn->ignore_alts && !list_empty(&insn->alts)) {
bool skip_orig = false;
list_for_each_entry(alt, &insn->alts, list) {
@@ -2043,41 +2334,20 @@ static int validate_branch(struct objtool_file *file, struct symbol *func,
}
}
+ if (insn->alt_group)
+ fill_alternative_cfi(file, insn);
+
if (skip_orig)
return 0;
}
+ if (handle_insn_ops(insn, &state))
+ return 1;
+
switch (insn->type) {
case INSN_RETURN:
- if (state.uaccess && !func_uaccess_safe(func)) {
- WARN_FUNC("return with UACCESS enabled", sec, insn->offset);
- return 1;
- }
-
- if (!state.uaccess && func_uaccess_safe(func)) {
- WARN_FUNC("return with UACCESS disabled from a UACCESS-safe function", sec, insn->offset);
- return 1;
- }
-
- if (state.df) {
- WARN_FUNC("return with DF set", sec, insn->offset);
- return 1;
- }
-
- if (func && has_modified_stack_frame(&state)) {
- WARN_FUNC("return with modified stack frame",
- sec, insn->offset);
- return 1;
- }
-
- if (state.bp_scratch) {
- WARN("%s uses BP as a scratch register",
- func->name);
- return 1;
- }
-
- return 0;
+ return validate_return(func, insn, &state);
case INSN_CALL:
case INSN_CALL_DYNAMIC:
@@ -2140,32 +2410,6 @@ static int validate_branch(struct objtool_file *file, struct symbol *func,
}
return 0;
- case INSN_STACK:
- if (update_insn_state(insn, &state))
- return 1;
-
- if (insn->stack_op.dest.type == OP_DEST_PUSHF) {
- if (!state.uaccess_stack) {
- state.uaccess_stack = 1;
- } else if (state.uaccess_stack >> 31) {
- WARN_FUNC("PUSHF stack exhausted", sec, insn->offset);
- return 1;
- }
- state.uaccess_stack <<= 1;
- state.uaccess_stack |= state.uaccess;
- }
-
- if (insn->stack_op.src.type == OP_SRC_POPF) {
- if (state.uaccess_stack) {
- state.uaccess = state.uaccess_stack & 1;
- state.uaccess_stack >>= 1;
- if (state.uaccess_stack == 1)
- state.uaccess_stack = 0;
- }
- }
-
- break;
-
case INSN_STAC:
if (state.uaccess) {
WARN_FUNC("recursive UACCESS enable", sec, insn->offset);
@@ -2211,7 +2455,7 @@ static int validate_branch(struct objtool_file *file, struct symbol *func,
return 0;
if (!next_insn) {
- if (state.cfa.base == CFI_UNDEFINED)
+ if (state.cfi.cfa.base == CFI_UNDEFINED)
return 0;
WARN("%s: unexpected end of section", sec->name);
return 1;
@@ -2223,24 +2467,34 @@ static int validate_branch(struct objtool_file *file, struct symbol *func,
return 0;
}
-static int validate_unwind_hints(struct objtool_file *file)
+static int validate_unwind_hints(struct objtool_file *file, struct section *sec)
{
struct instruction *insn;
- int ret, warnings = 0;
struct insn_state state;
+ int ret, warnings = 0;
if (!file->hints)
return 0;
- clear_insn_state(&state);
+ init_insn_state(&state, sec);
- for_each_insn(file, insn) {
+ if (sec) {
+ insn = find_insn(file, sec, 0);
+ if (!insn)
+ return 0;
+ } else {
+ insn = list_first_entry(&file->insn_list, typeof(*insn), list);
+ }
+
+ while (&insn->list != &file->insn_list && (!sec || insn->sec == sec)) {
if (insn->hint && !insn->visited) {
ret = validate_branch(file, insn->func, insn, state);
if (ret && backtrace)
BT_FUNC("<=== (hint)", insn);
warnings += ret;
}
+
+ insn = list_next_entry(insn, list);
}
return warnings;
@@ -2310,14 +2564,27 @@ static bool ignore_unreachable_insn(struct instruction *insn)
!strcmp(insn->sec->name, ".altinstr_aux"))
return true;
+ if (!insn->func)
+ return false;
+
+ /*
+ * CONFIG_UBSAN_TRAP inserts a UD2 when it sees
+ * __builtin_unreachable(). The BUG() macro has an unreachable() after
+ * the UD2, which causes GCC's undefined trap logic to emit another UD2
+ * (or occasionally a JMP to UD2).
+ */
+ if (list_prev_entry(insn, list)->dead_end &&
+ (insn->type == INSN_BUG ||
+ (insn->type == INSN_JUMP_UNCONDITIONAL &&
+ insn->jump_dest && insn->jump_dest->type == INSN_BUG)))
+ return true;
+
/*
* Check if this (or a subsequent) instruction is related to
* CONFIG_UBSAN or CONFIG_KASAN.
*
* End the search at 5 instructions to avoid going into the weeds.
*/
- if (!insn->func)
- return false;
for (i = 0; i < 5; i++) {
if (is_kasan_insn(insn) || is_ubsan_insn(insn))
@@ -2342,46 +2609,84 @@ static bool ignore_unreachable_insn(struct instruction *insn)
return false;
}
-static int validate_functions(struct objtool_file *file)
+static int validate_symbol(struct objtool_file *file, struct section *sec,
+ struct symbol *sym, struct insn_state *state)
{
- struct section *sec;
- struct symbol *func;
struct instruction *insn;
+ int ret;
+
+ if (!sym->len) {
+ WARN("%s() is missing an ELF size annotation", sym->name);
+ return 1;
+ }
+
+ if (sym->pfunc != sym || sym->alias != sym)
+ return 0;
+
+ insn = find_insn(file, sec, sym->offset);
+ if (!insn || insn->ignore || insn->visited)
+ return 0;
+
+ state->uaccess = sym->uaccess_safe;
+
+ ret = validate_branch(file, insn->func, insn, *state);
+ if (ret && backtrace)
+ BT_FUNC("<=== (sym)", insn);
+ return ret;
+}
+
+static int validate_section(struct objtool_file *file, struct section *sec)
+{
struct insn_state state;
- int ret, warnings = 0;
+ struct symbol *func;
+ int warnings = 0;
- clear_insn_state(&state);
+ list_for_each_entry(func, &sec->symbol_list, list) {
+ if (func->type != STT_FUNC)
+ continue;
- state.cfa = initial_func_cfi.cfa;
- memcpy(&state.regs, &initial_func_cfi.regs,
- CFI_NUM_REGS * sizeof(struct cfi_reg));
- state.stack_size = initial_func_cfi.cfa.offset;
+ init_insn_state(&state, sec);
+ state.cfi.cfa = initial_func_cfi.cfa;
+ memcpy(&state.cfi.regs, &initial_func_cfi.regs,
+ CFI_NUM_REGS * sizeof(struct cfi_reg));
+ state.cfi.stack_size = initial_func_cfi.cfa.offset;
- for_each_sec(file, sec) {
- list_for_each_entry(func, &sec->symbol_list, list) {
- if (func->type != STT_FUNC)
- continue;
+ warnings += validate_symbol(file, sec, func, &state);
+ }
- if (!func->len) {
- WARN("%s() is missing an ELF size annotation",
- func->name);
- warnings++;
- }
+ return warnings;
+}
- if (func->pfunc != func || func->alias != func)
- continue;
+static int validate_vmlinux_functions(struct objtool_file *file)
+{
+ struct section *sec;
+ int warnings = 0;
- insn = find_insn(file, sec, func->offset);
- if (!insn || insn->ignore || insn->visited)
- continue;
+ sec = find_section_by_name(file->elf, ".noinstr.text");
+ if (sec) {
+ warnings += validate_section(file, sec);
+ warnings += validate_unwind_hints(file, sec);
+ }
- state.uaccess = func->uaccess_safe;
+ sec = find_section_by_name(file->elf, ".entry.text");
+ if (sec) {
+ warnings += validate_section(file, sec);
+ warnings += validate_unwind_hints(file, sec);
+ }
- ret = validate_branch(file, func, insn, state);
- if (ret && backtrace)
- BT_FUNC("<=== (func)", insn);
- warnings += ret;
- }
+ return warnings;
+}
+
+static int validate_functions(struct objtool_file *file)
+{
+ struct section *sec;
+ int warnings = 0;
+
+ for_each_sec(file, sec) {
+ if (!(sec->sh.sh_flags & SHF_EXECINSTR))
+ continue;
+
+ warnings += validate_section(file, sec);
}
return warnings;
@@ -2405,23 +2710,6 @@ static int validate_reachable_instructions(struct objtool_file *file)
return 0;
}
-static void cleanup(struct objtool_file *file)
-{
- struct instruction *insn, *tmpinsn;
- struct alternative *alt, *tmpalt;
-
- list_for_each_entry_safe(insn, tmpinsn, &file->insn_list, list) {
- list_for_each_entry_safe(alt, tmpalt, &insn->alts, list) {
- list_del(&alt->list);
- free(alt);
- }
- list_del(&insn->list);
- hash_del(&insn->hash);
- free(insn);
- }
- elf_close(file->elf);
-}
-
static struct objtool_file file;
int check(const char *_objname, bool orc)
@@ -2430,7 +2718,7 @@ int check(const char *_objname, bool orc)
objname = _objname;
- file.elf = elf_read(objname, orc ? O_RDWR : O_RDONLY);
+ file.elf = elf_open_read(objname, orc ? O_RDWR : O_RDONLY);
if (!file.elf)
return 1;
@@ -2450,6 +2738,15 @@ int check(const char *_objname, bool orc)
if (list_empty(&file.insn_list))
goto out;
+ if (vmlinux && !validate_dup) {
+ ret = validate_vmlinux_functions(&file);
+ if (ret < 0)
+ goto out;
+
+ warnings += ret;
+ goto out;
+ }
+
if (retpoline) {
ret = validate_retpoline(&file);
if (ret < 0)
@@ -2462,7 +2759,7 @@ int check(const char *_objname, bool orc)
goto out;
warnings += ret;
- ret = validate_unwind_hints(&file);
+ ret = validate_unwind_hints(&file, NULL);
if (ret < 0)
goto out;
warnings += ret;
@@ -2489,10 +2786,14 @@ int check(const char *_objname, bool orc)
}
out:
- cleanup(&file);
+ if (ret < 0) {
+ /*
+ * Fatal error. The binary is corrupt or otherwise broken in
+ * some way, or objtool itself is broken. Fail the kernel
+ * build.
+ */
+ return ret;
+ }
- /* ignore warnings for now until we get all the code cleaned up */
- if (ret || warnings)
- return 0;
return 0;
}
diff --git a/tools/objtool/check.h b/tools/objtool/check.h
index 6d875ca6fce0..906b5210f7ca 100644
--- a/tools/objtool/check.h
+++ b/tools/objtool/check.h
@@ -7,22 +7,16 @@
#define _CHECK_H
#include <stdbool.h>
-#include "elf.h"
#include "cfi.h"
#include "arch.h"
-#include "orc.h"
-#include <linux/hashtable.h>
struct insn_state {
- struct cfi_reg cfa;
- struct cfi_reg regs[CFI_NUM_REGS];
- int stack_size;
- unsigned char type;
- bool bp_scratch;
- bool drap, end, uaccess, df;
+ struct cfi_state cfi;
unsigned int uaccess_stack;
- int drap_reg, drap_offset;
- struct cfi_reg vals[CFI_NUM_REGS];
+ bool uaccess;
+ bool df;
+ bool noinstr;
+ s8 instr;
};
struct instruction {
@@ -33,29 +27,24 @@ struct instruction {
unsigned int len;
enum insn_type type;
unsigned long immediate;
- bool alt_group, dead_end, ignore, hint, save, restore, ignore_alts;
+ bool dead_end, ignore, ignore_alts;
+ bool hint;
bool retpoline_safe;
+ s8 instr;
u8 visited;
+ u8 ret_offset;
+ int alt_group;
struct symbol *call_dest;
struct instruction *jump_dest;
struct instruction *first_jump_src;
struct rela *jump_table;
struct list_head alts;
struct symbol *func;
- struct stack_op stack_op;
- struct insn_state state;
+ struct list_head stack_ops;
+ struct cfi_state cfi;
struct orc_entry orc;
};
-struct objtool_file {
- struct elf *elf;
- struct list_head insn_list;
- DECLARE_HASHTABLE(insn_hash, 16);
- bool ignore_unreachables, c_file, hints, rodata;
-};
-
-int check(const char *objname, bool orc);
-
struct instruction *find_insn(struct objtool_file *file,
struct section *sec, unsigned long offset);
diff --git a/tools/objtool/elf.c b/tools/objtool/elf.c
index edba4745f25a..84225679f96d 100644
--- a/tools/objtool/elf.c
+++ b/tools/objtool/elf.c
@@ -15,17 +15,123 @@
#include <string.h>
#include <unistd.h>
#include <errno.h>
+#include "builtin.h"
#include "elf.h"
#include "warn.h"
#define MAX_NAME_LEN 128
-struct section *find_section_by_name(struct elf *elf, const char *name)
+static inline u32 str_hash(const char *str)
+{
+ return jhash(str, strlen(str), 0);
+}
+
+static inline int elf_hash_bits(void)
+{
+ return vmlinux ? ELF_HASH_BITS : 16;
+}
+
+#define elf_hash_add(hashtable, node, key) \
+ hlist_add_head(node, &hashtable[hash_min(key, elf_hash_bits())])
+
+static void elf_hash_init(struct hlist_head *table)
+{
+ __hash_init(table, 1U << elf_hash_bits());
+}
+
+#define elf_hash_for_each_possible(name, obj, member, key) \
+ hlist_for_each_entry(obj, &name[hash_min(key, elf_hash_bits())], member)
+
+static void rb_add(struct rb_root *tree, struct rb_node *node,
+ int (*cmp)(struct rb_node *, const struct rb_node *))
+{
+ struct rb_node **link = &tree->rb_node;
+ struct rb_node *parent = NULL;
+
+ while (*link) {
+ parent = *link;
+ if (cmp(node, parent) < 0)
+ link = &parent->rb_left;
+ else
+ link = &parent->rb_right;
+ }
+
+ rb_link_node(node, parent, link);
+ rb_insert_color(node, tree);
+}
+
+static struct rb_node *rb_find_first(const struct rb_root *tree, const void *key,
+ int (*cmp)(const void *key, const struct rb_node *))
+{
+ struct rb_node *node = tree->rb_node;
+ struct rb_node *match = NULL;
+
+ while (node) {
+ int c = cmp(key, node);
+ if (c <= 0) {
+ if (!c)
+ match = node;
+ node = node->rb_left;
+ } else if (c > 0) {
+ node = node->rb_right;
+ }
+ }
+
+ return match;
+}
+
+static struct rb_node *rb_next_match(struct rb_node *node, const void *key,
+ int (*cmp)(const void *key, const struct rb_node *))
+{
+ node = rb_next(node);
+ if (node && cmp(key, node))
+ node = NULL;
+ return node;
+}
+
+#define rb_for_each(tree, node, key, cmp) \
+ for ((node) = rb_find_first((tree), (key), (cmp)); \
+ (node); (node) = rb_next_match((node), (key), (cmp)))
+
+static int symbol_to_offset(struct rb_node *a, const struct rb_node *b)
+{
+ struct symbol *sa = rb_entry(a, struct symbol, node);
+ struct symbol *sb = rb_entry(b, struct symbol, node);
+
+ if (sa->offset < sb->offset)
+ return -1;
+ if (sa->offset > sb->offset)
+ return 1;
+
+ if (sa->len < sb->len)
+ return -1;
+ if (sa->len > sb->len)
+ return 1;
+
+ sa->alias = sb;
+
+ return 0;
+}
+
+static int symbol_by_offset(const void *key, const struct rb_node *node)
+{
+ const struct symbol *s = rb_entry(node, struct symbol, node);
+ const unsigned long *o = key;
+
+ if (*o < s->offset)
+ return -1;
+ if (*o >= s->offset + s->len)
+ return 1;
+
+ return 0;
+}
+
+struct section *find_section_by_name(const struct elf *elf, const char *name)
{
struct section *sec;
- list_for_each_entry(sec, &elf->sections, list)
+ elf_hash_for_each_possible(elf->section_name_hash, sec, name_hash, str_hash(name))
if (!strcmp(sec->name, name))
return sec;
@@ -37,7 +143,7 @@ static struct section *find_section_by_index(struct elf *elf,
{
struct section *sec;
- list_for_each_entry(sec, &elf->sections, list)
+ elf_hash_for_each_possible(elf->section_hash, sec, hash, idx)
if (sec->idx == idx)
return sec;
@@ -46,88 +152,116 @@ static struct section *find_section_by_index(struct elf *elf,
static struct symbol *find_symbol_by_index(struct elf *elf, unsigned int idx)
{
- struct section *sec;
struct symbol *sym;
- list_for_each_entry(sec, &elf->sections, list)
- hash_for_each_possible(sec->symbol_hash, sym, hash, idx)
- if (sym->idx == idx)
- return sym;
+ elf_hash_for_each_possible(elf->symbol_hash, sym, hash, idx)
+ if (sym->idx == idx)
+ return sym;
return NULL;
}
struct symbol *find_symbol_by_offset(struct section *sec, unsigned long offset)
{
- struct symbol *sym;
+ struct rb_node *node;
- list_for_each_entry(sym, &sec->symbol_list, list)
- if (sym->type != STT_SECTION &&
- sym->offset == offset)
- return sym;
+ rb_for_each(&sec->symbol_tree, node, &offset, symbol_by_offset) {
+ struct symbol *s = rb_entry(node, struct symbol, node);
+
+ if (s->offset == offset && s->type != STT_SECTION)
+ return s;
+ }
return NULL;
}
-struct symbol *find_symbol_by_name(struct elf *elf, const char *name)
+struct symbol *find_func_by_offset(struct section *sec, unsigned long offset)
{
- struct section *sec;
- struct symbol *sym;
+ struct rb_node *node;
- list_for_each_entry(sec, &elf->sections, list)
- list_for_each_entry(sym, &sec->symbol_list, list)
- if (!strcmp(sym->name, name))
- return sym;
+ rb_for_each(&sec->symbol_tree, node, &offset, symbol_by_offset) {
+ struct symbol *s = rb_entry(node, struct symbol, node);
+
+ if (s->offset == offset && s->type == STT_FUNC)
+ return s;
+ }
return NULL;
}
-struct symbol *find_symbol_containing(struct section *sec, unsigned long offset)
+struct symbol *find_symbol_containing(const struct section *sec, unsigned long offset)
{
- struct symbol *sym;
+ struct rb_node *node;
- list_for_each_entry(sym, &sec->symbol_list, list)
- if (sym->type != STT_SECTION &&
- offset >= sym->offset && offset < sym->offset + sym->len)
- return sym;
+ rb_for_each(&sec->symbol_tree, node, &offset, symbol_by_offset) {
+ struct symbol *s = rb_entry(node, struct symbol, node);
+
+ if (s->type != STT_SECTION)
+ return s;
+ }
return NULL;
}
-struct rela *find_rela_by_dest_range(struct section *sec, unsigned long offset,
- unsigned int len)
+struct symbol *find_func_containing(struct section *sec, unsigned long offset)
{
- struct rela *rela;
- unsigned long o;
+ struct rb_node *node;
- if (!sec->rela)
- return NULL;
+ rb_for_each(&sec->symbol_tree, node, &offset, symbol_by_offset) {
+ struct symbol *s = rb_entry(node, struct symbol, node);
- for (o = offset; o < offset + len; o++)
- hash_for_each_possible(sec->rela->rela_hash, rela, hash, o)
- if (rela->offset == o)
- return rela;
+ if (s->type == STT_FUNC)
+ return s;
+ }
return NULL;
}
-struct rela *find_rela_by_dest(struct section *sec, unsigned long offset)
+struct symbol *find_symbol_by_name(const struct elf *elf, const char *name)
{
- return find_rela_by_dest_range(sec, offset, 1);
+ struct symbol *sym;
+
+ elf_hash_for_each_possible(elf->symbol_name_hash, sym, name_hash, str_hash(name))
+ if (!strcmp(sym->name, name))
+ return sym;
+
+ return NULL;
}
-struct symbol *find_containing_func(struct section *sec, unsigned long offset)
+struct rela *find_rela_by_dest_range(const struct elf *elf, struct section *sec,
+ unsigned long offset, unsigned int len)
{
- struct symbol *func;
+ struct rela *rela, *r = NULL;
+ unsigned long o;
- list_for_each_entry(func, &sec->symbol_list, list)
- if (func->type == STT_FUNC && offset >= func->offset &&
- offset < func->offset + func->len)
- return func;
+ if (!sec->rela)
+ return NULL;
+
+ sec = sec->rela;
+
+ for_offset_range(o, offset, offset + len) {
+ elf_hash_for_each_possible(elf->rela_hash, rela, hash,
+ sec_offset_hash(sec, o)) {
+ if (rela->sec != sec)
+ continue;
+
+ if (rela->offset >= offset && rela->offset < offset + len) {
+ if (!r || rela->offset < r->offset)
+ r = rela;
+ }
+ }
+ if (r)
+ return r;
+ }
return NULL;
}
+struct rela *find_rela_by_dest(const struct elf *elf, struct section *sec, unsigned long offset)
+{
+ return find_rela_by_dest_range(elf, sec, offset, 1);
+}
+
static int read_sections(struct elf *elf)
{
Elf_Scn *s = NULL;
@@ -155,10 +289,6 @@ static int read_sections(struct elf *elf)
INIT_LIST_HEAD(&sec->symbol_list);
INIT_LIST_HEAD(&sec->rela_list);
- hash_init(sec->rela_hash);
- hash_init(sec->symbol_hash);
-
- list_add_tail(&sec->list, &elf->sections);
s = elf_getscn(elf->elf, i);
if (!s) {
@@ -193,8 +323,15 @@ static int read_sections(struct elf *elf)
}
}
sec->len = sec->sh.sh_size;
+
+ list_add_tail(&sec->list, &elf->sections);
+ elf_hash_add(elf->section_hash, &sec->hash, sec->idx);
+ elf_hash_add(elf->section_name_hash, &sec->name_hash, str_hash(sec->name));
}
+ if (stats)
+ printf("nr_sections: %lu\n", (unsigned long)sections_nr);
+
/* sanity check, one more call to elf_nextscn() should return NULL */
if (elf_nextscn(elf->elf, s)) {
WARN("section entry mismatch");
@@ -206,11 +343,14 @@ static int read_sections(struct elf *elf)
static int read_symbols(struct elf *elf)
{
- struct section *symtab, *sec;
- struct symbol *sym, *pfunc, *alias;
- struct list_head *entry, *tmp;
+ struct section *symtab, *symtab_shndx, *sec;
+ struct symbol *sym, *pfunc;
+ struct list_head *entry;
+ struct rb_node *pnode;
int symbols_nr, i;
char *coldstr;
+ Elf_Data *shndx_data = NULL;
+ Elf32_Word shndx;
symtab = find_section_by_name(elf, ".symtab");
if (!symtab) {
@@ -218,6 +358,10 @@ static int read_symbols(struct elf *elf)
return -1;
}
+ symtab_shndx = find_section_by_name(elf, ".symtab_shndx");
+ if (symtab_shndx)
+ shndx_data = symtab_shndx->data;
+
symbols_nr = symtab->sh.sh_size / symtab->sh.sh_entsize;
for (i = 0; i < symbols_nr; i++) {
@@ -227,12 +371,13 @@ static int read_symbols(struct elf *elf)
return -1;
}
memset(sym, 0, sizeof(*sym));
- alias = sym;
+ sym->alias = sym;
sym->idx = i;
- if (!gelf_getsym(symtab->data, i, &sym->sym)) {
- WARN_ELF("gelf_getsym");
+ if (!gelf_getsymshndx(symtab->data, shndx_data, i, &sym->sym,
+ &shndx)) {
+ WARN_ELF("gelf_getsymshndx");
goto err;
}
@@ -246,10 +391,13 @@ static int read_symbols(struct elf *elf)
sym->type = GELF_ST_TYPE(sym->sym.st_info);
sym->bind = GELF_ST_BIND(sym->sym.st_info);
- if (sym->sym.st_shndx > SHN_UNDEF &&
- sym->sym.st_shndx < SHN_LORESERVE) {
- sym->sec = find_section_by_index(elf,
- sym->sym.st_shndx);
+ if ((sym->sym.st_shndx > SHN_UNDEF &&
+ sym->sym.st_shndx < SHN_LORESERVE) ||
+ (shndx_data && sym->sym.st_shndx == SHN_XINDEX)) {
+ if (sym->sym.st_shndx != SHN_XINDEX)
+ shndx = sym->sym.st_shndx;
+
+ sym->sec = find_section_by_index(elf, shndx);
if (!sym->sec) {
WARN("couldn't find section for symbol %s",
sym->name);
@@ -265,33 +413,20 @@ static int read_symbols(struct elf *elf)
sym->offset = sym->sym.st_value;
sym->len = sym->sym.st_size;
- /* sorted insert into a per-section list */
- entry = &sym->sec->symbol_list;
- list_for_each_prev(tmp, &sym->sec->symbol_list) {
- struct symbol *s;
-
- s = list_entry(tmp, struct symbol, list);
-
- if (sym->offset > s->offset) {
- entry = tmp;
- break;
- }
-
- if (sym->offset == s->offset) {
- if (sym->len && sym->len == s->len && alias == sym)
- alias = s;
-
- if (sym->len >= s->len) {
- entry = tmp;
- break;
- }
- }
- }
- sym->alias = alias;
+ rb_add(&sym->sec->symbol_tree, &sym->node, symbol_to_offset);
+ pnode = rb_prev(&sym->node);
+ if (pnode)
+ entry = &rb_entry(pnode, struct symbol, node)->list;
+ else
+ entry = &sym->sec->symbol_list;
list_add(&sym->list, entry);
- hash_add(sym->sec->symbol_hash, &sym->hash, sym->idx);
+ elf_hash_add(elf->symbol_hash, &sym->hash, sym->idx);
+ elf_hash_add(elf->symbol_name_hash, &sym->name_hash, str_hash(sym->name));
}
+ if (stats)
+ printf("nr_symbols: %lu\n", (unsigned long)symbols_nr);
+
/* Create parent/child links for any cold subfunctions */
list_for_each_entry(sec, &elf->sections, list) {
list_for_each_entry(sym, &sec->symbol_list, list) {
@@ -347,12 +482,21 @@ err:
return -1;
}
+void elf_add_rela(struct elf *elf, struct rela *rela)
+{
+ struct section *sec = rela->sec;
+
+ list_add_tail(&rela->list, &sec->rela_list);
+ elf_hash_add(elf->rela_hash, &rela->hash, rela_hash(rela));
+}
+
static int read_relas(struct elf *elf)
{
struct section *sec;
struct rela *rela;
int i;
unsigned int symndx;
+ unsigned long nr_rela, max_rela = 0, tot_rela = 0;
list_for_each_entry(sec, &elf->sections, list) {
if (sec->sh.sh_type != SHT_RELA)
@@ -367,6 +511,7 @@ static int read_relas(struct elf *elf)
sec->base->rela = sec;
+ nr_rela = 0;
for (i = 0; i < sec->sh.sh_size / sec->sh.sh_entsize; i++) {
rela = malloc(sizeof(*rela));
if (!rela) {
@@ -392,16 +537,22 @@ static int read_relas(struct elf *elf)
return -1;
}
- list_add_tail(&rela->list, &sec->rela_list);
- hash_add(sec->rela_hash, &rela->hash, rela->offset);
-
+ elf_add_rela(elf, rela);
+ nr_rela++;
}
+ max_rela = max(max_rela, nr_rela);
+ tot_rela += nr_rela;
+ }
+
+ if (stats) {
+ printf("max_rela: %lu\n", max_rela);
+ printf("tot_rela: %lu\n", tot_rela);
}
return 0;
}
-struct elf *elf_read(const char *name, int flags)
+struct elf *elf_open_read(const char *name, int flags)
{
struct elf *elf;
Elf_Cmd cmd;
@@ -413,10 +564,16 @@ struct elf *elf_read(const char *name, int flags)
perror("malloc");
return NULL;
}
- memset(elf, 0, sizeof(*elf));
+ memset(elf, 0, offsetof(struct elf, sections));
INIT_LIST_HEAD(&elf->sections);
+ elf_hash_init(elf->symbol_hash);
+ elf_hash_init(elf->symbol_name_hash);
+ elf_hash_init(elf->section_hash);
+ elf_hash_init(elf->section_name_hash);
+ elf_hash_init(elf->rela_hash);
+
elf->fd = open(name, flags);
if (elf->fd == -1) {
fprintf(stderr, "objtool: Can't open '%s': %s\n",
@@ -475,10 +632,6 @@ struct section *elf_create_section(struct elf *elf, const char *name,
INIT_LIST_HEAD(&sec->symbol_list);
INIT_LIST_HEAD(&sec->rela_list);
- hash_init(sec->rela_hash);
- hash_init(sec->symbol_hash);
-
- list_add_tail(&sec->list, &elf->sections);
s = elf_newscn(elf->elf);
if (!s) {
@@ -556,6 +709,10 @@ struct section *elf_create_section(struct elf *elf, const char *name,
shstrtab->len += strlen(name) + 1;
shstrtab->changed = true;
+ list_add_tail(&sec->list, &elf->sections);
+ elf_hash_add(elf->section_hash, &sec->hash, sec->idx);
+ elf_hash_add(elf->section_name_hash, &sec->name_hash, str_hash(sec->name));
+
return sec;
}
@@ -622,7 +779,7 @@ int elf_rebuild_rela_section(struct section *sec)
return 0;
}
-int elf_write(struct elf *elf)
+int elf_write(const struct elf *elf)
{
struct section *sec;
Elf_Scn *s;
diff --git a/tools/objtool/elf.h b/tools/objtool/elf.h
index 44150204db4d..f4fe1d6ea392 100644
--- a/tools/objtool/elf.h
+++ b/tools/objtool/elf.h
@@ -10,6 +10,8 @@
#include <gelf.h>
#include <linux/list.h>
#include <linux/hashtable.h>
+#include <linux/rbtree.h>
+#include <linux/jhash.h>
#ifdef LIBELF_USE_DEPRECATED
# define elf_getshdrnum elf_getshnum
@@ -25,23 +27,26 @@
struct section {
struct list_head list;
+ struct hlist_node hash;
+ struct hlist_node name_hash;
GElf_Shdr sh;
+ struct rb_root symbol_tree;
struct list_head symbol_list;
- DECLARE_HASHTABLE(symbol_hash, 8);
struct list_head rela_list;
- DECLARE_HASHTABLE(rela_hash, 16);
struct section *base, *rela;
struct symbol *sym;
Elf_Data *data;
char *name;
int idx;
unsigned int len;
- bool changed, text, rodata;
+ bool changed, text, rodata, noinstr;
};
struct symbol {
struct list_head list;
+ struct rb_node node;
struct hlist_node hash;
+ struct hlist_node name_hash;
GElf_Sym sym;
struct section *sec;
char *name;
@@ -65,32 +70,68 @@ struct rela {
bool jump_table_start;
};
+#define ELF_HASH_BITS 20
+
struct elf {
Elf *elf;
GElf_Ehdr ehdr;
int fd;
char *name;
struct list_head sections;
- DECLARE_HASHTABLE(rela_hash, 16);
+ DECLARE_HASHTABLE(symbol_hash, ELF_HASH_BITS);
+ DECLARE_HASHTABLE(symbol_name_hash, ELF_HASH_BITS);
+ DECLARE_HASHTABLE(section_hash, ELF_HASH_BITS);
+ DECLARE_HASHTABLE(section_name_hash, ELF_HASH_BITS);
+ DECLARE_HASHTABLE(rela_hash, ELF_HASH_BITS);
};
+#define OFFSET_STRIDE_BITS 4
+#define OFFSET_STRIDE (1UL << OFFSET_STRIDE_BITS)
+#define OFFSET_STRIDE_MASK (~(OFFSET_STRIDE - 1))
-struct elf *elf_read(const char *name, int flags);
-struct section *find_section_by_name(struct elf *elf, const char *name);
-struct symbol *find_symbol_by_offset(struct section *sec, unsigned long offset);
-struct symbol *find_symbol_by_name(struct elf *elf, const char *name);
-struct symbol *find_symbol_containing(struct section *sec, unsigned long offset);
-struct rela *find_rela_by_dest(struct section *sec, unsigned long offset);
-struct rela *find_rela_by_dest_range(struct section *sec, unsigned long offset,
- unsigned int len);
-struct symbol *find_containing_func(struct section *sec, unsigned long offset);
-struct section *elf_create_section(struct elf *elf, const char *name, size_t
- entsize, int nr);
+#define for_offset_range(_offset, _start, _end) \
+ for (_offset = ((_start) & OFFSET_STRIDE_MASK); \
+ _offset >= ((_start) & OFFSET_STRIDE_MASK) && \
+ _offset <= ((_end) & OFFSET_STRIDE_MASK); \
+ _offset += OFFSET_STRIDE)
+
+static inline u32 sec_offset_hash(struct section *sec, unsigned long offset)
+{
+ u32 ol, oh, idx = sec->idx;
+
+ offset &= OFFSET_STRIDE_MASK;
+
+ ol = offset;
+ oh = (offset >> 16) >> 16;
+
+ __jhash_mix(ol, oh, idx);
+
+ return ol;
+}
+
+static inline u32 rela_hash(struct rela *rela)
+{
+ return sec_offset_hash(rela->sec, rela->offset);
+}
+
+struct elf *elf_open_read(const char *name, int flags);
+struct section *elf_create_section(struct elf *elf, const char *name, size_t entsize, int nr);
struct section *elf_create_rela_section(struct elf *elf, struct section *base);
-int elf_rebuild_rela_section(struct section *sec);
-int elf_write(struct elf *elf);
+void elf_add_rela(struct elf *elf, struct rela *rela);
+int elf_write(const struct elf *elf);
void elf_close(struct elf *elf);
+struct section *find_section_by_name(const struct elf *elf, const char *name);
+struct symbol *find_func_by_offset(struct section *sec, unsigned long offset);
+struct symbol *find_symbol_by_offset(struct section *sec, unsigned long offset);
+struct symbol *find_symbol_by_name(const struct elf *elf, const char *name);
+struct symbol *find_symbol_containing(const struct section *sec, unsigned long offset);
+struct rela *find_rela_by_dest(const struct elf *elf, struct section *sec, unsigned long offset);
+struct rela *find_rela_by_dest_range(const struct elf *elf, struct section *sec,
+ unsigned long offset, unsigned int len);
+struct symbol *find_func_containing(struct section *sec, unsigned long offset);
+int elf_rebuild_rela_section(struct section *sec);
+
#define for_each_sec(file, sec) \
list_for_each_entry(sec, &file->elf->sections, list)
diff --git a/tools/objtool/objtool.c b/tools/objtool/objtool.c
index 0b3528f05053..58fdda510653 100644
--- a/tools/objtool/objtool.c
+++ b/tools/objtool/objtool.c
@@ -58,7 +58,9 @@ static void cmd_usage(void)
printf("\n");
- exit(129);
+ if (!help)
+ exit(129);
+ exit(0);
}
static void handle_options(int *argc, const char ***argv)
diff --git a/tools/objtool/objtool.h b/tools/objtool/objtool.h
new file mode 100644
index 000000000000..528028a66816
--- /dev/null
+++ b/tools/objtool/objtool.h
@@ -0,0 +1,27 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright (C) 2020 Matt Helsley <mhelsley@vmware.com>
+ */
+
+#ifndef _OBJTOOL_H
+#define _OBJTOOL_H
+
+#include <stdbool.h>
+#include <linux/list.h>
+#include <linux/hashtable.h>
+
+#include "elf.h"
+
+struct objtool_file {
+ struct elf *elf;
+ struct list_head insn_list;
+ DECLARE_HASHTABLE(insn_hash, 20);
+ bool ignore_unreachables, c_file, hints, rodata;
+};
+
+int check(const char *objname, bool orc);
+int orc_dump(const char *objname);
+int create_orc(struct objtool_file *file);
+int create_orc_sections(struct objtool_file *file);
+
+#endif /* _OBJTOOL_H */
diff --git a/tools/objtool/orc.h b/tools/objtool/orc.h
deleted file mode 100644
index ee2832221e62..000000000000
--- a/tools/objtool/orc.h
+++ /dev/null
@@ -1,18 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- * Copyright (C) 2017 Josh Poimboeuf <jpoimboe@redhat.com>
- */
-
-#ifndef _ORC_H
-#define _ORC_H
-
-#include <asm/orc_types.h>
-
-struct objtool_file;
-
-int create_orc(struct objtool_file *file);
-int create_orc_sections(struct objtool_file *file);
-
-int orc_dump(const char *objname);
-
-#endif /* _ORC_H */
diff --git a/tools/objtool/orc_dump.c b/tools/objtool/orc_dump.c
index 13ccf775a83a..fca46e006fc2 100644
--- a/tools/objtool/orc_dump.c
+++ b/tools/objtool/orc_dump.c
@@ -4,7 +4,8 @@
*/
#include <unistd.h>
-#include "orc.h"
+#include <asm/orc_types.h>
+#include "objtool.h"
#include "warn.h"
static const char *reg_name(unsigned int reg)
@@ -66,7 +67,7 @@ int orc_dump(const char *_objname)
char *name;
size_t nr_sections;
Elf64_Addr orc_ip_addr = 0;
- size_t shstrtab_idx;
+ size_t shstrtab_idx, strtab_idx = 0;
Elf *elf;
Elf_Scn *scn;
GElf_Shdr sh;
@@ -127,6 +128,8 @@ int orc_dump(const char *_objname)
if (!strcmp(name, ".symtab")) {
symtab = data;
+ } else if (!strcmp(name, ".strtab")) {
+ strtab_idx = i;
} else if (!strcmp(name, ".orc_unwind")) {
orc = data->d_buf;
orc_size = sh.sh_size;
@@ -138,7 +141,7 @@ int orc_dump(const char *_objname)
}
}
- if (!symtab || !orc || !orc_ip)
+ if (!symtab || !strtab_idx || !orc || !orc_ip)
return 0;
if (orc_size % sizeof(*orc) != 0) {
@@ -159,21 +162,29 @@ int orc_dump(const char *_objname)
return -1;
}
- scn = elf_getscn(elf, sym.st_shndx);
- if (!scn) {
- WARN_ELF("elf_getscn");
- return -1;
- }
-
- if (!gelf_getshdr(scn, &sh)) {
- WARN_ELF("gelf_getshdr");
- return -1;
- }
-
- name = elf_strptr(elf, shstrtab_idx, sh.sh_name);
- if (!name || !*name) {
- WARN_ELF("elf_strptr");
- return -1;
+ if (GELF_ST_TYPE(sym.st_info) == STT_SECTION) {
+ scn = elf_getscn(elf, sym.st_shndx);
+ if (!scn) {
+ WARN_ELF("elf_getscn");
+ return -1;
+ }
+
+ if (!gelf_getshdr(scn, &sh)) {
+ WARN_ELF("gelf_getshdr");
+ return -1;
+ }
+
+ name = elf_strptr(elf, shstrtab_idx, sh.sh_name);
+ if (!name) {
+ WARN_ELF("elf_strptr");
+ return -1;
+ }
+ } else {
+ name = elf_strptr(elf, strtab_idx, sym.st_name);
+ if (!name) {
+ WARN_ELF("elf_strptr");
+ return -1;
+ }
}
printf("%s+%llx:", name, (unsigned long long)rela.r_addend);
diff --git a/tools/objtool/orc_gen.c b/tools/objtool/orc_gen.c
index 27a4112848c2..c9549988121a 100644
--- a/tools/objtool/orc_gen.c
+++ b/tools/objtool/orc_gen.c
@@ -6,7 +6,6 @@
#include <stdlib.h>
#include <string.h>
-#include "orc.h"
#include "check.h"
#include "warn.h"
@@ -16,10 +15,10 @@ int create_orc(struct objtool_file *file)
for_each_insn(file, insn) {
struct orc_entry *orc = &insn->orc;
- struct cfi_reg *cfa = &insn->state.cfa;
- struct cfi_reg *bp = &insn->state.regs[CFI_BP];
+ struct cfi_reg *cfa = &insn->cfi.cfa;
+ struct cfi_reg *bp = &insn->cfi.regs[CFI_BP];
- orc->end = insn->state.end;
+ orc->end = insn->cfi.end;
if (cfa->base == CFI_UNDEFINED) {
orc->sp_reg = ORC_REG_UNDEFINED;
@@ -75,24 +74,19 @@ int create_orc(struct objtool_file *file)
orc->sp_offset = cfa->offset;
orc->bp_offset = bp->offset;
- orc->type = insn->state.type;
+ orc->type = insn->cfi.type;
}
return 0;
}
-static int create_orc_entry(struct section *u_sec, struct section *ip_relasec,
+static int create_orc_entry(struct elf *elf, struct section *u_sec, struct section *ip_relasec,
unsigned int idx, struct section *insn_sec,
unsigned long insn_off, struct orc_entry *o)
{
struct orc_entry *orc;
struct rela *rela;
- if (!insn_sec->sym) {
- WARN("missing symbol for section %s", insn_sec->name);
- return -1;
- }
-
/* populate ORC data */
orc = (struct orc_entry *)u_sec->data->d_buf + idx;
memcpy(orc, o, sizeof(*orc));
@@ -105,13 +99,37 @@ static int create_orc_entry(struct section *u_sec, struct section *ip_relasec,
}
memset(rela, 0, sizeof(*rela));
- rela->sym = insn_sec->sym;
- rela->addend = insn_off;
+ if (insn_sec->sym) {
+ rela->sym = insn_sec->sym;
+ rela->addend = insn_off;
+ } else {
+ /*
+ * The Clang assembler doesn't produce section symbols, so we
+ * have to reference the function symbol instead:
+ */
+ rela->sym = find_symbol_containing(insn_sec, insn_off);
+ if (!rela->sym) {
+ /*
+ * Hack alert. This happens when we need to reference
+ * the NOP pad insn immediately after the function.
+ */
+ rela->sym = find_symbol_containing(insn_sec,
+ insn_off - 1);
+ }
+ if (!rela->sym) {
+ WARN("missing symbol for insn at offset 0x%lx\n",
+ insn_off);
+ return -1;
+ }
+
+ rela->addend = insn_off - rela->sym->offset;
+ }
+
rela->type = R_X86_64_PC32;
rela->offset = idx * sizeof(int);
+ rela->sec = ip_relasec;
- list_add_tail(&rela->list, &ip_relasec->rela_list);
- hash_add(ip_relasec->rela_hash, &rela->hash, rela->offset);
+ elf_add_rela(elf, rela);
return 0;
}
@@ -182,7 +200,7 @@ int create_orc_sections(struct objtool_file *file)
if (!prev_insn || memcmp(&insn->orc, &prev_insn->orc,
sizeof(struct orc_entry))) {
- if (create_orc_entry(u_sec, ip_relasec, idx,
+ if (create_orc_entry(file->elf, u_sec, ip_relasec, idx,
insn->sec, insn->offset,
&insn->orc))
return -1;
@@ -194,7 +212,7 @@ int create_orc_sections(struct objtool_file *file)
/* section terminator */
if (prev_insn) {
- if (create_orc_entry(u_sec, ip_relasec, idx,
+ if (create_orc_entry(file->elf, u_sec, ip_relasec, idx,
prev_insn->sec,
prev_insn->offset + prev_insn->len,
&empty))
diff --git a/tools/objtool/special.c b/tools/objtool/special.c
index fdbaa611146d..e74e0189de22 100644
--- a/tools/objtool/special.c
+++ b/tools/objtool/special.c
@@ -118,7 +118,7 @@ static int get_alt_entry(struct elf *elf, struct special_entry *entry,
}
}
- orig_rela = find_rela_by_dest(sec, offset + entry->orig);
+ orig_rela = find_rela_by_dest(elf, sec, offset + entry->orig);
if (!orig_rela) {
WARN_FUNC("can't find orig rela", sec, offset + entry->orig);
return -1;
@@ -133,7 +133,7 @@ static int get_alt_entry(struct elf *elf, struct special_entry *entry,
alt->orig_off = orig_rela->addend;
if (!entry->group || alt->new_len) {
- new_rela = find_rela_by_dest(sec, offset + entry->new);
+ new_rela = find_rela_by_dest(elf, sec, offset + entry->new);
if (!new_rela) {
WARN_FUNC("can't find new rela",
sec, offset + entry->new);
diff --git a/tools/objtool/warn.h b/tools/objtool/warn.h
index cbb0a02b7480..7799f60de80a 100644
--- a/tools/objtool/warn.h
+++ b/tools/objtool/warn.h
@@ -21,7 +21,7 @@ static inline char *offstr(struct section *sec, unsigned long offset)
char *name, *str;
unsigned long name_off;
- func = find_containing_func(sec, offset);
+ func = find_func_containing(sec, offset);
if (func) {
name = func->name;
name_off = offset - func->offset;
diff --git a/tools/objtool/weak.c b/tools/objtool/weak.c
new file mode 100644
index 000000000000..942ea5e8ac36
--- /dev/null
+++ b/tools/objtool/weak.c
@@ -0,0 +1,40 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (C) 2020 Matt Helsley <mhelsley@vmware.com>
+ * Weak definitions necessary to compile objtool without
+ * some subcommands (e.g. check, orc).
+ */
+
+#include <stdbool.h>
+#include <errno.h>
+#include "objtool.h"
+
+#define __weak __attribute__((weak))
+
+#define UNSUPPORTED(name) \
+({ \
+ fprintf(stderr, "error: objtool: " name " not implemented\n"); \
+ return ENOSYS; \
+})
+
+const char __weak *objname;
+
+int __weak check(const char *_objname, bool orc)
+{
+ UNSUPPORTED("check subcommand");
+}
+
+int __weak orc_dump(const char *_objname)
+{
+ UNSUPPORTED("orc");
+}
+
+int __weak create_orc(struct objtool_file *file)
+{
+ UNSUPPORTED("orc");
+}
+
+int __weak create_orc_sections(struct objtool_file *file)
+{
+ UNSUPPORTED("orc");
+}
diff --git a/tools/pci/pcitest.c b/tools/pci/pcitest.c
index 32b7c6f9043d..0a1344c45213 100644
--- a/tools/pci/pcitest.c
+++ b/tools/pci/pcitest.c
@@ -30,14 +30,17 @@ struct pci_test {
int irqtype;
bool set_irqtype;
bool get_irqtype;
+ bool clear_irq;
bool read;
bool write;
bool copy;
unsigned long size;
+ bool use_dma;
};
static int run_test(struct pci_test *test)
{
+ struct pci_endpoint_test_xfer_param param;
int ret = -EINVAL;
int fd;
@@ -74,6 +77,15 @@ static int run_test(struct pci_test *test)
fprintf(stdout, "%s\n", irq[ret]);
}
+ if (test->clear_irq) {
+ ret = ioctl(fd, PCITEST_CLEAR_IRQ);
+ fprintf(stdout, "CLEAR IRQ:\t\t");
+ if (ret < 0)
+ fprintf(stdout, "FAILED\n");
+ else
+ fprintf(stdout, "%s\n", result[ret]);
+ }
+
if (test->legacyirq) {
ret = ioctl(fd, PCITEST_LEGACY_IRQ, 0);
fprintf(stdout, "LEGACY IRQ:\t");
@@ -102,7 +114,10 @@ static int run_test(struct pci_test *test)
}
if (test->write) {
- ret = ioctl(fd, PCITEST_WRITE, test->size);
+ param.size = test->size;
+ if (test->use_dma)
+ param.flags = PCITEST_FLAGS_USE_DMA;
+ ret = ioctl(fd, PCITEST_WRITE, &param);
fprintf(stdout, "WRITE (%7ld bytes):\t\t", test->size);
if (ret < 0)
fprintf(stdout, "TEST FAILED\n");
@@ -111,7 +126,10 @@ static int run_test(struct pci_test *test)
}
if (test->read) {
- ret = ioctl(fd, PCITEST_READ, test->size);
+ param.size = test->size;
+ if (test->use_dma)
+ param.flags = PCITEST_FLAGS_USE_DMA;
+ ret = ioctl(fd, PCITEST_READ, &param);
fprintf(stdout, "READ (%7ld bytes):\t\t", test->size);
if (ret < 0)
fprintf(stdout, "TEST FAILED\n");
@@ -120,7 +138,10 @@ static int run_test(struct pci_test *test)
}
if (test->copy) {
- ret = ioctl(fd, PCITEST_COPY, test->size);
+ param.size = test->size;
+ if (test->use_dma)
+ param.flags = PCITEST_FLAGS_USE_DMA;
+ ret = ioctl(fd, PCITEST_COPY, &param);
fprintf(stdout, "COPY (%7ld bytes):\t\t", test->size);
if (ret < 0)
fprintf(stdout, "TEST FAILED\n");
@@ -153,7 +174,7 @@ int main(int argc, char **argv)
/* set default endpoint device */
test->device = "/dev/pci-endpoint-test.0";
- while ((c = getopt(argc, argv, "D:b:m:x:i:Ilhrwcs:")) != EOF)
+ while ((c = getopt(argc, argv, "D:b:m:x:i:deIlhrwcs:")) != EOF)
switch (c) {
case 'D':
test->device = optarg;
@@ -194,9 +215,15 @@ int main(int argc, char **argv)
case 'c':
test->copy = true;
continue;
+ case 'e':
+ test->clear_irq = true;
+ continue;
case 's':
test->size = strtoul(optarg, NULL, 0);
continue;
+ case 'd':
+ test->use_dma = true;
+ continue;
case 'h':
default:
usage:
@@ -208,7 +235,9 @@ usage:
"\t-m <msi num> MSI test (msi number between 1..32)\n"
"\t-x <msix num> \tMSI-X test (msix number between 1..2048)\n"
"\t-i <irq type> \tSet IRQ type (0 - Legacy, 1 - MSI, 2 - MSI-X)\n"
+ "\t-e Clear IRQ\n"
"\t-I Get current IRQ type configured\n"
+ "\t-d Use DMA\n"
"\t-l Legacy IRQ test\n"
"\t-r Read buffer test\n"
"\t-w Write buffer test\n"
diff --git a/tools/pcmcia/.gitignore b/tools/pcmcia/.gitignore
index 53d081336757..94cb97b77f06 100644
--- a/tools/pcmcia/.gitignore
+++ b/tools/pcmcia/.gitignore
@@ -1 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0-only
crc32hash
diff --git a/tools/perf/.gitignore b/tools/perf/.gitignore
index bf1252dc2cb0..f3f84781fd74 100644
--- a/tools/perf/.gitignore
+++ b/tools/perf/.gitignore
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
PERF-CFLAGS
PERF-GUI-VARS
PERF-VERSION-FILE
diff --git a/tools/perf/Documentation/Makefile b/tools/perf/Documentation/Makefile
index adc5a7e44b98..6e54979c2124 100644
--- a/tools/perf/Documentation/Makefile
+++ b/tools/perf/Documentation/Makefile
@@ -48,7 +48,7 @@ man5dir=$(mandir)/man5
man7dir=$(mandir)/man7
ASCIIDOC=asciidoc
-ASCIIDOC_EXTRA = --unsafe -f asciidoc.conf
+ASCIIDOC_EXTRA += --unsafe -f asciidoc.conf
ASCIIDOC_HTML = xhtml11
MANPAGE_XSL = manpage-normal.xsl
XMLTO_EXTRA =
@@ -59,7 +59,7 @@ HTML_REF = origin/html
ifdef USE_ASCIIDOCTOR
ASCIIDOC = asciidoctor
-ASCIIDOC_EXTRA = -a compat-mode
+ASCIIDOC_EXTRA += -a compat-mode
ASCIIDOC_EXTRA += -I. -rasciidoctor-extensions
ASCIIDOC_EXTRA += -a mansource="perf" -a manmanual="perf Manual"
ASCIIDOC_HTML = xhtml5
@@ -295,7 +295,10 @@ $(OUTPUT)%.1 $(OUTPUT)%.5 $(OUTPUT)%.7 : $(OUTPUT)%.xml
$(OUTPUT)%.xml : %.txt
$(QUIET_ASCIIDOC)$(RM) $@+ $@ && \
$(ASCIIDOC) -b docbook -d manpage \
- $(ASCIIDOC_EXTRA) -aperf_version=$(PERF_VERSION) -o $@+ $< && \
+ $(ASCIIDOC_EXTRA) -aperf_version=$(PERF_VERSION) \
+ -aperf_date=$(shell git log -1 --pretty="format:%cd" \
+ --date=short $<) \
+ -o $@+ $< && \
mv $@+ $@
XSLT = docbook.xsl
diff --git a/tools/perf/Documentation/intel-pt.txt b/tools/perf/Documentation/intel-pt.txt
index 2cf2d9e9d0da..fd9241a1b987 100644
--- a/tools/perf/Documentation/intel-pt.txt
+++ b/tools/perf/Documentation/intel-pt.txt
@@ -1,991 +1 @@
-Intel Processor Trace
-=====================
-
-Overview
-========
-
-Intel Processor Trace (Intel PT) is an extension of Intel Architecture that
-collects information about software execution such as control flow, execution
-modes and timings and formats it into highly compressed binary packets.
-Technical details are documented in the Intel 64 and IA-32 Architectures
-Software Developer Manuals, Chapter 36 Intel Processor Trace.
-
-Intel PT is first supported in Intel Core M and 5th generation Intel Core
-processors that are based on the Intel micro-architecture code name Broadwell.
-
-Trace data is collected by 'perf record' and stored within the perf.data file.
-See below for options to 'perf record'.
-
-Trace data must be 'decoded' which involves walking the object code and matching
-the trace data packets. For example a TNT packet only tells whether a
-conditional branch was taken or not taken, so to make use of that packet the
-decoder must know precisely which instruction was being executed.
-
-Decoding is done on-the-fly. The decoder outputs samples in the same format as
-samples output by perf hardware events, for example as though the "instructions"
-or "branches" events had been recorded. Presently 3 tools support this:
-'perf script', 'perf report' and 'perf inject'. See below for more information
-on using those tools.
-
-The main distinguishing feature of Intel PT is that the decoder can determine
-the exact flow of software execution. Intel PT can be used to understand why
-and how did software get to a certain point, or behave a certain way. The
-software does not have to be recompiled, so Intel PT works with debug or release
-builds, however the executed images are needed - which makes use in JIT-compiled
-environments, or with self-modified code, a challenge. Also symbols need to be
-provided to make sense of addresses.
-
-A limitation of Intel PT is that it produces huge amounts of trace data
-(hundreds of megabytes per second per core) which takes a long time to decode,
-for example two or three orders of magnitude longer than it took to collect.
-Another limitation is the performance impact of tracing, something that will
-vary depending on the use-case and architecture.
-
-
-Quickstart
-==========
-
-It is important to start small. That is because it is easy to capture vastly
-more data than can possibly be processed.
-
-The simplest thing to do with Intel PT is userspace profiling of small programs.
-Data is captured with 'perf record' e.g. to trace 'ls' userspace-only:
-
- perf record -e intel_pt//u ls
-
-And profiled with 'perf report' e.g.
-
- perf report
-
-To also trace kernel space presents a problem, namely kernel self-modifying
-code. A fairly good kernel image is available in /proc/kcore but to get an
-accurate image a copy of /proc/kcore needs to be made under the same conditions
-as the data capture. A script perf-with-kcore can do that, but beware that the
-script makes use of 'sudo' to copy /proc/kcore. If you have perf installed
-locally from the source tree you can do:
-
- ~/libexec/perf-core/perf-with-kcore record pt_ls -e intel_pt// -- ls
-
-which will create a directory named 'pt_ls' and put the perf.data file and
-copies of /proc/kcore, /proc/kallsyms and /proc/modules into it. Then to use
-'perf report' becomes:
-
- ~/libexec/perf-core/perf-with-kcore report pt_ls
-
-Because samples are synthesized after-the-fact, the sampling period can be
-selected for reporting. e.g. sample every microsecond
-
- ~/libexec/perf-core/perf-with-kcore report pt_ls --itrace=i1usge
-
-See the sections below for more information about the --itrace option.
-
-Beware the smaller the period, the more samples that are produced, and the
-longer it takes to process them.
-
-Also note that the coarseness of Intel PT timing information will start to
-distort the statistical value of the sampling as the sampling period becomes
-smaller.
-
-To represent software control flow, "branches" samples are produced. By default
-a branch sample is synthesized for every single branch. To get an idea what
-data is available you can use the 'perf script' tool with all itrace sampling
-options, which will list all the samples.
-
- perf record -e intel_pt//u ls
- perf script --itrace=ibxwpe
-
-An interesting field that is not printed by default is 'flags' which can be
-displayed as follows:
-
- perf script --itrace=ibxwpe -F+flags
-
-The flags are "bcrosyiABEx" which stand for branch, call, return, conditional,
-system, asynchronous, interrupt, transaction abort, trace begin, trace end, and
-in transaction, respectively.
-
-Another interesting field that is not printed by default is 'ipc' which can be
-displayed as follows:
-
- perf script --itrace=be -F+ipc
-
-There are two ways that instructions-per-cycle (IPC) can be calculated depending
-on the recording.
-
-If the 'cyc' config term (see config terms section below) was used, then IPC is
-calculated using the cycle count from CYC packets, otherwise MTC packets are
-used - refer to the 'mtc' config term. When MTC is used, however, the values
-are less accurate because the timing is less accurate.
-
-Because Intel PT does not update the cycle count on every branch or instruction,
-the values will often be zero. When there are values, they will be the number
-of instructions and number of cycles since the last update, and thus represent
-the average IPC since the last IPC for that event type. Note IPC for "branches"
-events is calculated separately from IPC for "instructions" events.
-
-Also note that the IPC instruction count may or may not include the current
-instruction. If the cycle count is associated with an asynchronous branch
-(e.g. page fault or interrupt), then the instruction count does not include the
-current instruction, otherwise it does. That is consistent with whether or not
-that instruction has retired when the cycle count is updated.
-
-Another note, in the case of "branches" events, non-taken branches are not
-presently sampled, so IPC values for them do not appear e.g. a CYC packet with a
-TNT packet that starts with a non-taken branch. To see every possible IPC
-value, "instructions" events can be used e.g. --itrace=i0ns
-
-While it is possible to create scripts to analyze the data, an alternative
-approach is available to export the data to a sqlite or postgresql database.
-Refer to script export-to-sqlite.py or export-to-postgresql.py for more details,
-and to script exported-sql-viewer.py for an example of using the database.
-
-There is also script intel-pt-events.py which provides an example of how to
-unpack the raw data for power events and PTWRITE.
-
-As mentioned above, it is easy to capture too much data. One way to limit the
-data captured is to use 'snapshot' mode which is explained further below.
-Refer to 'new snapshot option' and 'Intel PT modes of operation' further below.
-
-Another problem that will be experienced is decoder errors. They can be caused
-by inability to access the executed image, self-modified or JIT-ed code, or the
-inability to match side-band information (such as context switches and mmaps)
-which results in the decoder not knowing what code was executed.
-
-There is also the problem of perf not being able to copy the data fast enough,
-resulting in data lost because the buffer was full. See 'Buffer handling' below
-for more details.
-
-
-perf record
-===========
-
-new event
----------
-
-The Intel PT kernel driver creates a new PMU for Intel PT. PMU events are
-selected by providing the PMU name followed by the "config" separated by slashes.
-An enhancement has been made to allow default "config" e.g. the option
-
- -e intel_pt//
-
-will use a default config value. Currently that is the same as
-
- -e intel_pt/tsc,noretcomp=0/
-
-which is the same as
-
- -e intel_pt/tsc=1,noretcomp=0/
-
-Note there are now new config terms - see section 'config terms' further below.
-
-The config terms are listed in /sys/devices/intel_pt/format. They are bit
-fields within the config member of the struct perf_event_attr which is
-passed to the kernel by the perf_event_open system call. They correspond to bit
-fields in the IA32_RTIT_CTL MSR. Here is a list of them and their definitions:
-
- $ grep -H . /sys/bus/event_source/devices/intel_pt/format/*
- /sys/bus/event_source/devices/intel_pt/format/cyc:config:1
- /sys/bus/event_source/devices/intel_pt/format/cyc_thresh:config:19-22
- /sys/bus/event_source/devices/intel_pt/format/mtc:config:9
- /sys/bus/event_source/devices/intel_pt/format/mtc_period:config:14-17
- /sys/bus/event_source/devices/intel_pt/format/noretcomp:config:11
- /sys/bus/event_source/devices/intel_pt/format/psb_period:config:24-27
- /sys/bus/event_source/devices/intel_pt/format/tsc:config:10
-
-Note that the default config must be overridden for each term i.e.
-
- -e intel_pt/noretcomp=0/
-
-is the same as:
-
- -e intel_pt/tsc=1,noretcomp=0/
-
-So, to disable TSC packets use:
-
- -e intel_pt/tsc=0/
-
-It is also possible to specify the config value explicitly:
-
- -e intel_pt/config=0x400/
-
-Note that, as with all events, the event is suffixed with event modifiers:
-
- u userspace
- k kernel
- h hypervisor
- G guest
- H host
- p precise ip
-
-'h', 'G' and 'H' are for virtualization which is not supported by Intel PT.
-'p' is also not relevant to Intel PT. So only options 'u' and 'k' are
-meaningful for Intel PT.
-
-perf_event_attr is displayed if the -vv option is used e.g.
-
- ------------------------------------------------------------
- perf_event_attr:
- type 6
- size 112
- config 0x400
- { sample_period, sample_freq } 1
- sample_type IP|TID|TIME|CPU|IDENTIFIER
- read_format ID
- disabled 1
- inherit 1
- exclude_kernel 1
- exclude_hv 1
- enable_on_exec 1
- sample_id_all 1
- ------------------------------------------------------------
- sys_perf_event_open: pid 31104 cpu 0 group_fd -1 flags 0x8
- sys_perf_event_open: pid 31104 cpu 1 group_fd -1 flags 0x8
- sys_perf_event_open: pid 31104 cpu 2 group_fd -1 flags 0x8
- sys_perf_event_open: pid 31104 cpu 3 group_fd -1 flags 0x8
- ------------------------------------------------------------
-
-
-config terms
-------------
-
-The June 2015 version of Intel 64 and IA-32 Architectures Software Developer
-Manuals, Chapter 36 Intel Processor Trace, defined new Intel PT features.
-Some of the features are reflect in new config terms. All the config terms are
-described below.
-
-tsc Always supported. Produces TSC timestamp packets to provide
- timing information. In some cases it is possible to decode
- without timing information, for example a per-thread context
- that does not overlap executable memory maps.
-
- The default config selects tsc (i.e. tsc=1).
-
-noretcomp Always supported. Disables "return compression" so a TIP packet
- is produced when a function returns. Causes more packets to be
- produced but might make decoding more reliable.
-
- The default config does not select noretcomp (i.e. noretcomp=0).
-
-psb_period Allows the frequency of PSB packets to be specified.
-
- The PSB packet is a synchronization packet that provides a
- starting point for decoding or recovery from errors.
-
- Support for psb_period is indicated by:
-
- /sys/bus/event_source/devices/intel_pt/caps/psb_cyc
-
- which contains "1" if the feature is supported and "0"
- otherwise.
-
- Valid values are given by:
-
- /sys/bus/event_source/devices/intel_pt/caps/psb_periods
-
- which contains a hexadecimal value, the bits of which represent
- valid values e.g. bit 2 set means value 2 is valid.
-
- The psb_period value is converted to the approximate number of
- trace bytes between PSB packets as:
-
- 2 ^ (value + 11)
-
- e.g. value 3 means 16KiB bytes between PSBs
-
- If an invalid value is entered, the error message
- will give a list of valid values e.g.
-
- $ perf record -e intel_pt/psb_period=15/u uname
- Invalid psb_period for intel_pt. Valid values are: 0-5
-
- If MTC packets are selected, the default config selects a value
- of 3 (i.e. psb_period=3) or the nearest lower value that is
- supported (0 is always supported). Otherwise the default is 0.
-
- If decoding is expected to be reliable and the buffer is large
- then a large PSB period can be used.
-
- Because a TSC packet is produced with PSB, the PSB period can
- also affect the granularity to timing information in the absence
- of MTC or CYC.
-
-mtc Produces MTC timing packets.
-
- MTC packets provide finer grain timestamp information than TSC
- packets. MTC packets record time using the hardware crystal
- clock (CTC) which is related to TSC packets using a TMA packet.
-
- Support for this feature is indicated by:
-
- /sys/bus/event_source/devices/intel_pt/caps/mtc
-
- which contains "1" if the feature is supported and
- "0" otherwise.
-
- The frequency of MTC packets can also be specified - see
- mtc_period below.
-
-mtc_period Specifies how frequently MTC packets are produced - see mtc
- above for how to determine if MTC packets are supported.
-
- Valid values are given by:
-
- /sys/bus/event_source/devices/intel_pt/caps/mtc_periods
-
- which contains a hexadecimal value, the bits of which represent
- valid values e.g. bit 2 set means value 2 is valid.
-
- The mtc_period value is converted to the MTC frequency as:
-
- CTC-frequency / (2 ^ value)
-
- e.g. value 3 means one eighth of CTC-frequency
-
- Where CTC is the hardware crystal clock, the frequency of which
- can be related to TSC via values provided in cpuid leaf 0x15.
-
- If an invalid value is entered, the error message
- will give a list of valid values e.g.
-
- $ perf record -e intel_pt/mtc_period=15/u uname
- Invalid mtc_period for intel_pt. Valid values are: 0,3,6,9
-
- The default value is 3 or the nearest lower value
- that is supported (0 is always supported).
-
-cyc Produces CYC timing packets.
-
- CYC packets provide even finer grain timestamp information than
- MTC and TSC packets. A CYC packet contains the number of CPU
- cycles since the last CYC packet. Unlike MTC and TSC packets,
- CYC packets are only sent when another packet is also sent.
-
- Support for this feature is indicated by:
-
- /sys/bus/event_source/devices/intel_pt/caps/psb_cyc
-
- which contains "1" if the feature is supported and
- "0" otherwise.
-
- The number of CYC packets produced can be reduced by specifying
- a threshold - see cyc_thresh below.
-
-cyc_thresh Specifies how frequently CYC packets are produced - see cyc
- above for how to determine if CYC packets are supported.
-
- Valid cyc_thresh values are given by:
-
- /sys/bus/event_source/devices/intel_pt/caps/cycle_thresholds
-
- which contains a hexadecimal value, the bits of which represent
- valid values e.g. bit 2 set means value 2 is valid.
-
- The cyc_thresh value represents the minimum number of CPU cycles
- that must have passed before a CYC packet can be sent. The
- number of CPU cycles is:
-
- 2 ^ (value - 1)
-
- e.g. value 4 means 8 CPU cycles must pass before a CYC packet
- can be sent. Note a CYC packet is still only sent when another
- packet is sent, not at, e.g. every 8 CPU cycles.
-
- If an invalid value is entered, the error message
- will give a list of valid values e.g.
-
- $ perf record -e intel_pt/cyc,cyc_thresh=15/u uname
- Invalid cyc_thresh for intel_pt. Valid values are: 0-12
-
- CYC packets are not requested by default.
-
-pt Specifies pass-through which enables the 'branch' config term.
-
- The default config selects 'pt' if it is available, so a user will
- never need to specify this term.
-
-branch Enable branch tracing. Branch tracing is enabled by default so to
- disable branch tracing use 'branch=0'.
-
- The default config selects 'branch' if it is available.
-
-ptw Enable PTWRITE packets which are produced when a ptwrite instruction
- is executed.
-
- Support for this feature is indicated by:
-
- /sys/bus/event_source/devices/intel_pt/caps/ptwrite
-
- which contains "1" if the feature is supported and
- "0" otherwise.
-
-fup_on_ptw Enable a FUP packet to follow the PTWRITE packet. The FUP packet
- provides the address of the ptwrite instruction. In the absence of
- fup_on_ptw, the decoder will use the address of the previous branch
- if branch tracing is enabled, otherwise the address will be zero.
- Note that fup_on_ptw will work even when branch tracing is disabled.
-
-pwr_evt Enable power events. The power events provide information about
- changes to the CPU C-state.
-
- Support for this feature is indicated by:
-
- /sys/bus/event_source/devices/intel_pt/caps/power_event_trace
-
- which contains "1" if the feature is supported and
- "0" otherwise.
-
-
-AUX area sampling option
-------------------------
-
-To select Intel PT "sampling" the AUX area sampling option can be used:
-
- --aux-sample
-
-Optionally it can be followed by the sample size in bytes e.g.
-
- --aux-sample=8192
-
-In addition, the Intel PT event to sample must be defined e.g.
-
- -e intel_pt//u
-
-Samples on other events will be created containing Intel PT data e.g. the
-following will create Intel PT samples on the branch-misses event, note the
-events must be grouped using {}:
-
- perf record --aux-sample -e '{intel_pt//u,branch-misses:u}'
-
-An alternative to '--aux-sample' is to add the config term 'aux-sample-size' to
-events. In this case, the grouping is implied e.g.
-
- perf record -e intel_pt//u -e branch-misses/aux-sample-size=8192/u
-
-is the same as:
-
- perf record -e '{intel_pt//u,branch-misses/aux-sample-size=8192/u}'
-
-but allows for also using an address filter e.g.:
-
- perf record -e intel_pt//u --filter 'filter * @/bin/ls' -e branch-misses/aux-sample-size=8192/u -- ls
-
-It is important to select a sample size that is big enough to contain at least
-one PSB packet. If not a warning will be displayed:
-
- Intel PT sample size (%zu) may be too small for PSB period (%zu)
-
-The calculation used for that is: if sample_size <= psb_period + 256 display the
-warning. When sampling is used, psb_period defaults to 0 (2KiB).
-
-The default sample size is 4KiB.
-
-The sample size is passed in aux_sample_size in struct perf_event_attr. The
-sample size is limited by the maximum event size which is 64KiB. It is
-difficult to know how big the event might be without the trace sample attached,
-but the tool validates that the sample size is not greater than 60KiB.
-
-
-new snapshot option
--------------------
-
-The difference between full trace and snapshot from the kernel's perspective is
-that in full trace we don't overwrite trace data that the user hasn't collected
-yet (and indicated that by advancing aux_tail), whereas in snapshot mode we let
-the trace run and overwrite older data in the buffer so that whenever something
-interesting happens, we can stop it and grab a snapshot of what was going on
-around that interesting moment.
-
-To select snapshot mode a new option has been added:
-
- -S
-
-Optionally it can be followed by the snapshot size e.g.
-
- -S0x100000
-
-The default snapshot size is the auxtrace mmap size. If neither auxtrace mmap size
-nor snapshot size is specified, then the default is 4MiB for privileged users
-(or if /proc/sys/kernel/perf_event_paranoid < 0), 128KiB for unprivileged users.
-If an unprivileged user does not specify mmap pages, the mmap pages will be
-reduced as described in the 'new auxtrace mmap size option' section below.
-
-The snapshot size is displayed if the option -vv is used e.g.
-
- Intel PT snapshot size: %zu
-
-
-new auxtrace mmap size option
----------------------------
-
-Intel PT buffer size is specified by an addition to the -m option e.g.
-
- -m,16
-
-selects a buffer size of 16 pages i.e. 64KiB.
-
-Note that the existing functionality of -m is unchanged. The auxtrace mmap size
-is specified by the optional addition of a comma and the value.
-
-The default auxtrace mmap size for Intel PT is 4MiB/page_size for privileged users
-(or if /proc/sys/kernel/perf_event_paranoid < 0), 128KiB for unprivileged users.
-If an unprivileged user does not specify mmap pages, the mmap pages will be
-reduced from the default 512KiB/page_size to 256KiB/page_size, otherwise the
-user is likely to get an error as they exceed their mlock limit (Max locked
-memory as shown in /proc/self/limits). Note that perf does not count the first
-512KiB (actually /proc/sys/kernel/perf_event_mlock_kb minus 1 page) per cpu
-against the mlock limit so an unprivileged user is allowed 512KiB per cpu plus
-their mlock limit (which defaults to 64KiB but is not multiplied by the number
-of cpus).
-
-In full-trace mode, powers of two are allowed for buffer size, with a minimum
-size of 2 pages. In snapshot mode or sampling mode, it is the same but the
-minimum size is 1 page.
-
-The mmap size and auxtrace mmap size are displayed if the -vv option is used e.g.
-
- mmap length 528384
- auxtrace mmap length 4198400
-
-
-Intel PT modes of operation
----------------------------
-
-Intel PT can be used in 2 modes:
- full-trace mode
- sample mode
- snapshot mode
-
-Full-trace mode traces continuously e.g.
-
- perf record -e intel_pt//u uname
-
-Sample mode attaches a Intel PT sample to other events e.g.
-
- perf record --aux-sample -e intel_pt//u -e branch-misses:u
-
-Snapshot mode captures the available data when a signal is sent e.g.
-
- perf record -v -e intel_pt//u -S ./loopy 1000000000 &
- [1] 11435
- kill -USR2 11435
- Recording AUX area tracing snapshot
-
-Note that the signal sent is SIGUSR2.
-Note that "Recording AUX area tracing snapshot" is displayed because the -v
-option is used.
-
-The 2 modes cannot be used together.
-
-
-Buffer handling
----------------
-
-There may be buffer limitations (i.e. single ToPa entry) which means that actual
-buffer sizes are limited to powers of 2 up to 4MiB (MAX_ORDER). In order to
-provide other sizes, and in particular an arbitrarily large size, multiple
-buffers are logically concatenated. However an interrupt must be used to switch
-between buffers. That has two potential problems:
- a) the interrupt may not be handled in time so that the current buffer
- becomes full and some trace data is lost.
- b) the interrupts may slow the system and affect the performance
- results.
-
-If trace data is lost, the driver sets 'truncated' in the PERF_RECORD_AUX event
-which the tools report as an error.
-
-In full-trace mode, the driver waits for data to be copied out before allowing
-the (logical) buffer to wrap-around. If data is not copied out quickly enough,
-again 'truncated' is set in the PERF_RECORD_AUX event. If the driver has to
-wait, the intel_pt event gets disabled. Because it is difficult to know when
-that happens, perf tools always re-enable the intel_pt event after copying out
-data.
-
-
-Intel PT and build ids
-----------------------
-
-By default "perf record" post-processes the event stream to find all build ids
-for executables for all addresses sampled. Deliberately, Intel PT is not
-decoded for that purpose (it would take too long). Instead the build ids for
-all executables encountered (due to mmap, comm or task events) are included
-in the perf.data file.
-
-To see buildids included in the perf.data file use the command:
-
- perf buildid-list
-
-If the perf.data file contains Intel PT data, that is the same as:
-
- perf buildid-list --with-hits
-
-
-Snapshot mode and event disabling
----------------------------------
-
-In order to make a snapshot, the intel_pt event is disabled using an IOCTL,
-namely PERF_EVENT_IOC_DISABLE. However doing that can also disable the
-collection of side-band information. In order to prevent that, a dummy
-software event has been introduced that permits tracking events (like mmaps) to
-continue to be recorded while intel_pt is disabled. That is important to ensure
-there is complete side-band information to allow the decoding of subsequent
-snapshots.
-
-A test has been created for that. To find the test:
-
- perf test list
- ...
- 23: Test using a dummy software event to keep tracking
-
-To run the test:
-
- perf test 23
- 23: Test using a dummy software event to keep tracking : Ok
-
-
-perf record modes (nothing new here)
-------------------------------------
-
-perf record essentially operates in one of three modes:
- per thread
- per cpu
- workload only
-
-"per thread" mode is selected by -t or by --per-thread (with -p or -u or just a
-workload).
-"per cpu" is selected by -C or -a.
-"workload only" mode is selected by not using the other options but providing a
-command to run (i.e. the workload).
-
-In per-thread mode an exact list of threads is traced. There is no inheritance.
-Each thread has its own event buffer.
-
-In per-cpu mode all processes (or processes from the selected cgroup i.e. -G
-option, or processes selected with -p or -u) are traced. Each cpu has its own
-buffer. Inheritance is allowed.
-
-In workload-only mode, the workload is traced but with per-cpu buffers.
-Inheritance is allowed. Note that you can now trace a workload in per-thread
-mode by using the --per-thread option.
-
-
-Privileged vs non-privileged users
-----------------------------------
-
-Unless /proc/sys/kernel/perf_event_paranoid is set to -1, unprivileged users
-have memory limits imposed upon them. That affects what buffer sizes they can
-have as outlined above.
-
-The v4.2 kernel introduced support for a context switch metadata event,
-PERF_RECORD_SWITCH, which allows unprivileged users to see when their processes
-are scheduled out and in, just not by whom, which is left for the
-PERF_RECORD_SWITCH_CPU_WIDE, that is only accessible in system wide context,
-which in turn requires CAP_SYS_ADMIN.
-
-Please see the 45ac1403f564 ("perf: Add PERF_RECORD_SWITCH to indicate context
-switches") commit, that introduces these metadata events for further info.
-
-When working with kernels < v4.2, the following considerations must be taken,
-as the sched:sched_switch tracepoints will be used to receive such information:
-
-Unless /proc/sys/kernel/perf_event_paranoid is set to -1, unprivileged users are
-not permitted to use tracepoints which means there is insufficient side-band
-information to decode Intel PT in per-cpu mode, and potentially workload-only
-mode too if the workload creates new processes.
-
-Note also, that to use tracepoints, read-access to debugfs is required. So if
-debugfs is not mounted or the user does not have read-access, it will again not
-be possible to decode Intel PT in per-cpu mode.
-
-
-sched_switch tracepoint
------------------------
-
-The sched_switch tracepoint is used to provide side-band data for Intel PT
-decoding in kernels where the PERF_RECORD_SWITCH metadata event isn't
-available.
-
-The sched_switch events are automatically added. e.g. the second event shown
-below:
-
- $ perf record -vv -e intel_pt//u uname
- ------------------------------------------------------------
- perf_event_attr:
- type 6
- size 112
- config 0x400
- { sample_period, sample_freq } 1
- sample_type IP|TID|TIME|CPU|IDENTIFIER
- read_format ID
- disabled 1
- inherit 1
- exclude_kernel 1
- exclude_hv 1
- enable_on_exec 1
- sample_id_all 1
- ------------------------------------------------------------
- sys_perf_event_open: pid 31104 cpu 0 group_fd -1 flags 0x8
- sys_perf_event_open: pid 31104 cpu 1 group_fd -1 flags 0x8
- sys_perf_event_open: pid 31104 cpu 2 group_fd -1 flags 0x8
- sys_perf_event_open: pid 31104 cpu 3 group_fd -1 flags 0x8
- ------------------------------------------------------------
- perf_event_attr:
- type 2
- size 112
- config 0x108
- { sample_period, sample_freq } 1
- sample_type IP|TID|TIME|CPU|PERIOD|RAW|IDENTIFIER
- read_format ID
- inherit 1
- sample_id_all 1
- exclude_guest 1
- ------------------------------------------------------------
- sys_perf_event_open: pid -1 cpu 0 group_fd -1 flags 0x8
- sys_perf_event_open: pid -1 cpu 1 group_fd -1 flags 0x8
- sys_perf_event_open: pid -1 cpu 2 group_fd -1 flags 0x8
- sys_perf_event_open: pid -1 cpu 3 group_fd -1 flags 0x8
- ------------------------------------------------------------
- perf_event_attr:
- type 1
- size 112
- config 0x9
- { sample_period, sample_freq } 1
- sample_type IP|TID|TIME|IDENTIFIER
- read_format ID
- disabled 1
- inherit 1
- exclude_kernel 1
- exclude_hv 1
- mmap 1
- comm 1
- enable_on_exec 1
- task 1
- sample_id_all 1
- mmap2 1
- comm_exec 1
- ------------------------------------------------------------
- sys_perf_event_open: pid 31104 cpu 0 group_fd -1 flags 0x8
- sys_perf_event_open: pid 31104 cpu 1 group_fd -1 flags 0x8
- sys_perf_event_open: pid 31104 cpu 2 group_fd -1 flags 0x8
- sys_perf_event_open: pid 31104 cpu 3 group_fd -1 flags 0x8
- mmap size 528384B
- AUX area mmap length 4194304
- perf event ring buffer mmapped per cpu
- Synthesizing auxtrace information
- Linux
- [ perf record: Woken up 1 times to write data ]
- [ perf record: Captured and wrote 0.042 MB perf.data ]
-
-Note, the sched_switch event is only added if the user is permitted to use it
-and only in per-cpu mode.
-
-Note also, the sched_switch event is only added if TSC packets are requested.
-That is because, in the absence of timing information, the sched_switch events
-cannot be matched against the Intel PT trace.
-
-
-perf script
-===========
-
-By default, perf script will decode trace data found in the perf.data file.
-This can be further controlled by new option --itrace.
-
-
-New --itrace option
--------------------
-
-Having no option is the same as
-
- --itrace
-
-which, in turn, is the same as
-
- --itrace=cepwx
-
-The letters are:
-
- i synthesize "instructions" events
- b synthesize "branches" events
- x synthesize "transactions" events
- w synthesize "ptwrite" events
- p synthesize "power" events
- c synthesize branches events (calls only)
- r synthesize branches events (returns only)
- e synthesize tracing error events
- d create a debug log
- g synthesize a call chain (use with i or x)
- l synthesize last branch entries (use with i or x)
- s skip initial number of events
-
-"Instructions" events look like they were recorded by "perf record -e
-instructions".
-
-"Branches" events look like they were recorded by "perf record -e branches". "c"
-and "r" can be combined to get calls and returns.
-
-"Transactions" events correspond to the start or end of transactions. The
-'flags' field can be used in perf script to determine whether the event is a
-tranasaction start, commit or abort.
-
-Note that "instructions", "branches" and "transactions" events depend on code
-flow packets which can be disabled by using the config term "branch=0". Refer
-to the config terms section above.
-
-"ptwrite" events record the payload of the ptwrite instruction and whether
-"fup_on_ptw" was used. "ptwrite" events depend on PTWRITE packets which are
-recorded only if the "ptw" config term was used. Refer to the config terms
-section above. perf script "synth" field displays "ptwrite" information like
-this: "ip: 0 payload: 0x123456789abcdef0" where "ip" is 1 if "fup_on_ptw" was
-used.
-
-"Power" events correspond to power event packets and CBR (core-to-bus ratio)
-packets. While CBR packets are always recorded when tracing is enabled, power
-event packets are recorded only if the "pwr_evt" config term was used. Refer to
-the config terms section above. The power events record information about
-C-state changes, whereas CBR is indicative of CPU frequency. perf script
-"event,synth" fields display information like this:
- cbr: cbr: 22 freq: 2189 MHz (200%)
- mwait: hints: 0x60 extensions: 0x1
- pwre: hw: 0 cstate: 2 sub-cstate: 0
- exstop: ip: 1
- pwrx: deepest cstate: 2 last cstate: 2 wake reason: 0x4
-Where:
- "cbr" includes the frequency and the percentage of maximum non-turbo
- "mwait" shows mwait hints and extensions
- "pwre" shows C-state transitions (to a C-state deeper than C0) and
- whether initiated by hardware
- "exstop" indicates execution stopped and whether the IP was recorded
- exactly,
- "pwrx" indicates return to C0
-For more details refer to the Intel 64 and IA-32 Architectures Software
-Developer Manuals.
-
-Error events show where the decoder lost the trace. Error events
-are quite important. Users must know if what they are seeing is a complete
-picture or not.
-
-The "d" option will cause the creation of a file "intel_pt.log" containing all
-decoded packets and instructions. Note that this option slows down the decoder
-and that the resulting file may be very large.
-
-In addition, the period of the "instructions" event can be specified. e.g.
-
- --itrace=i10us
-
-sets the period to 10us i.e. one instruction sample is synthesized for each 10
-microseconds of trace. Alternatives to "us" are "ms" (milliseconds),
-"ns" (nanoseconds), "t" (TSC ticks) or "i" (instructions).
-
-"ms", "us" and "ns" are converted to TSC ticks.
-
-The timing information included with Intel PT does not give the time of every
-instruction. Consequently, for the purpose of sampling, the decoder estimates
-the time since the last timing packet based on 1 tick per instruction. The time
-on the sample is *not* adjusted and reflects the last known value of TSC.
-
-For Intel PT, the default period is 100us.
-
-Setting it to a zero period means "as often as possible".
-
-In the case of Intel PT that is the same as a period of 1 and a unit of
-'instructions' (i.e. --itrace=i1i).
-
-Also the call chain size (default 16, max. 1024) for instructions or
-transactions events can be specified. e.g.
-
- --itrace=ig32
- --itrace=xg32
-
-Also the number of last branch entries (default 64, max. 1024) for instructions or
-transactions events can be specified. e.g.
-
- --itrace=il10
- --itrace=xl10
-
-Note that last branch entries are cleared for each sample, so there is no overlap
-from one sample to the next.
-
-To disable trace decoding entirely, use the option --no-itrace.
-
-It is also possible to skip events generated (instructions, branches, transactions)
-at the beginning. This is useful to ignore initialization code.
-
- --itrace=i0nss1000000
-
-skips the first million instructions.
-
-dump option
------------
-
-perf script has an option (-D) to "dump" the events i.e. display the binary
-data.
-
-When -D is used, Intel PT packets are displayed. The packet decoder does not
-pay attention to PSB packets, but just decodes the bytes - so the packets seen
-by the actual decoder may not be identical in places where the data is corrupt.
-One example of that would be when the buffer-switching interrupt has been too
-slow, and the buffer has been filled completely. In that case, the last packet
-in the buffer might be truncated and immediately followed by a PSB as the trace
-continues in the next buffer.
-
-To disable the display of Intel PT packets, combine the -D option with
---no-itrace.
-
-
-perf report
-===========
-
-By default, perf report will decode trace data found in the perf.data file.
-This can be further controlled by new option --itrace exactly the same as
-perf script, with the exception that the default is --itrace=igxe.
-
-
-perf inject
-===========
-
-perf inject also accepts the --itrace option in which case tracing data is
-removed and replaced with the synthesized events. e.g.
-
- perf inject --itrace -i perf.data -o perf.data.new
-
-Below is an example of using Intel PT with autofdo. It requires autofdo
-(https://github.com/google/autofdo) and gcc version 5. The bubble
-sort example is from the AutoFDO tutorial (https://gcc.gnu.org/wiki/AutoFDO/Tutorial)
-amended to take the number of elements as a parameter.
-
- $ gcc-5 -O3 sort.c -o sort_optimized
- $ ./sort_optimized 30000
- Bubble sorting array of 30000 elements
- 2254 ms
-
- $ cat ~/.perfconfig
- [intel-pt]
- mispred-all = on
-
- $ perf record -e intel_pt//u ./sort 3000
- Bubble sorting array of 3000 elements
- 58 ms
- [ perf record: Woken up 2 times to write data ]
- [ perf record: Captured and wrote 3.939 MB perf.data ]
- $ perf inject -i perf.data -o inj --itrace=i100usle --strip
- $ ./create_gcov --binary=./sort --profile=inj --gcov=sort.gcov -gcov_version=1
- $ gcc-5 -O3 -fauto-profile=sort.gcov sort.c -o sort_autofdo
- $ ./sort_autofdo 30000
- Bubble sorting array of 30000 elements
- 2155 ms
-
-Note there is currently no advantage to using Intel PT instead of LBR, but
-that may change in the future if greater use is made of the data.
-
-
-PEBS via Intel PT
-=================
-
-Some hardware has the feature to redirect PEBS records to the Intel PT trace.
-Recording is selected by using the aux-output config term e.g.
-
- perf record -c 10000 -e '{intel_pt/branch=0/,cycles/aux-output/ppp}' uname
-
-Note that currently, software only supports redirecting at most one PEBS event.
-
-To display PEBS events from the Intel PT trace, use the itrace 'o' option e.g.
-
- perf script --itrace=oe
+Documentation for support for Intel Processor Trace within perf tools' has moved to file perf-intel-pt.txt
diff --git a/tools/perf/Documentation/itrace.txt b/tools/perf/Documentation/itrace.txt
index 82ff7dad40c2..e817179c5027 100644
--- a/tools/perf/Documentation/itrace.txt
+++ b/tools/perf/Documentation/itrace.txt
@@ -1,5 +1,5 @@
i synthesize instructions events
- b synthesize branches events
+ b synthesize branches events (branch misses for Arm SPE)
c synthesize branches events (calls only)
r synthesize branches events (returns only)
x synthesize transactions events
@@ -9,8 +9,14 @@
of aux-output (refer to perf record)
e synthesize error events
d create a debug log
+ f synthesize first level cache events
+ m synthesize last level cache events
+ t synthesize TLB events
+ a synthesize remote access events
g synthesize a call chain (use with i or x)
+ G synthesize a call chain on existing event records
l synthesize last branch entries (use with i or x)
+ L synthesize last branch entries on existing event records
s skip initial number of events
The default is all events i.e. the same as --itrace=ibxwpe,
@@ -31,6 +37,10 @@
Also the number of last branch entries (default 64, max. 1024) for
instructions or transactions events can be specified.
+ Similar to options g and l, size may also be specified for options G and L.
+ On x86, note that G and L work poorly when data has been recorded with
+ large PEBS. Refer linkperf:perf-intel-pt[1] man page for details.
+
It is also possible to skip events generated (instructions, branches, transactions,
ptwrite, power) at the beginning. This is useful to ignore initialization code.
diff --git a/tools/perf/Documentation/perf-bench.txt b/tools/perf/Documentation/perf-bench.txt
index 0921a3c67381..bad16512c48d 100644
--- a/tools/perf/Documentation/perf-bench.txt
+++ b/tools/perf/Documentation/perf-bench.txt
@@ -61,6 +61,9 @@ SUBSYSTEM
'epoll'::
Eventpoll (epoll) stressing benchmarks.
+'internals'::
+ Benchmark internal perf functionality.
+
'all'::
All benchmark subsystems.
@@ -214,6 +217,11 @@ Suite for evaluating concurrent epoll_wait calls.
*ctl*::
Suite for evaluating multiple epoll_ctl calls.
+SUITES FOR 'internals'
+~~~~~~~~~~~~~~~~~~~~~~
+*synthesize*::
+Suite for evaluating perf's event synthesis performance.
+
SEE ALSO
--------
linkperf:perf[1]
diff --git a/tools/perf/Documentation/perf-c2c.txt b/tools/perf/Documentation/perf-c2c.txt
index e6150f21267d..98efdab5fbd4 100644
--- a/tools/perf/Documentation/perf-c2c.txt
+++ b/tools/perf/Documentation/perf-c2c.txt
@@ -40,7 +40,7 @@ RECORD OPTIONS
--------------
-e::
--event=::
- Select the PMU event. Use 'perf mem record -e list'
+ Select the PMU event. Use 'perf c2c record -e list'
to list available events.
-v::
@@ -111,6 +111,17 @@ REPORT OPTIONS
--display::
Switch to HITM type (rmt, lcl) to display and sort on. Total HITMs as default.
+--stitch-lbr::
+ Show callgraph with stitched LBRs, which may have more complete
+ callgraph. The perf.data file must have been obtained using
+ perf c2c record --call-graph lbr.
+ Disabled by default. In common cases with call stack overflows,
+ it can recreate better call stacks than the default lbr call stack
+ output. But this approach is not full proof. There can be cases
+ where it creates incorrect call stacks from incorrect matches.
+ The known limitations include exception handing such as
+ setjmp/longjmp will have calls/returns not match.
+
C2C RECORD
----------
The perf c2c record command setup options related to HITM cacheline analysis
diff --git a/tools/perf/Documentation/perf-config.txt b/tools/perf/Documentation/perf-config.txt
index 8ead55593984..c7d3df5798e2 100644
--- a/tools/perf/Documentation/perf-config.txt
+++ b/tools/perf/Documentation/perf-config.txt
@@ -405,14 +405,16 @@ ui.*::
This option is only applied to TUI.
call-graph.*::
- When sub-commands 'top' and 'report' work with -g/—-children
- there're options in control of call-graph.
+ The following controls the handling of call-graphs (obtained via the
+ -g/--call-graph options).
call-graph.record-mode::
- The record-mode can be 'fp' (frame pointer), 'dwarf' and 'lbr'.
- The value of 'dwarf' is effective only if perf detect needed library
- (libunwind or a recent version of libdw).
- 'lbr' only work for cpus that support it.
+ The mode for user space can be 'fp' (frame pointer), 'dwarf'
+ and 'lbr'. The value 'dwarf' is effective only if libunwind
+ (or a recent version of libdw) is present on the system;
+ the value 'lbr' only works for certain cpus. The method for
+ kernel space is controlled not by this option but by the
+ kernel config (CONFIG_UNWINDER_*).
call-graph.dump-size::
The size of stack to dump in order to do post-unwinding. Default is 8192 (byte).
@@ -665,6 +667,11 @@ convert.*::
Limit the size of ordered_events queue, so we could control
allocation size of perf data files without proper finished
round events.
+stat.*::
+
+ stat.big-num::
+ (boolean) Change the default for "--big-num". To make
+ "--no-big-num" the default, set "stat.big-num=false".
intel-pt.*::
diff --git a/tools/perf/Documentation/perf-inject.txt b/tools/perf/Documentation/perf-inject.txt
index a64d6588470e..70969ea73e01 100644
--- a/tools/perf/Documentation/perf-inject.txt
+++ b/tools/perf/Documentation/perf-inject.txt
@@ -66,4 +66,5 @@ include::itrace.txt[]
SEE ALSO
--------
-linkperf:perf-record[1], linkperf:perf-report[1], linkperf:perf-archive[1]
+linkperf:perf-record[1], linkperf:perf-report[1], linkperf:perf-archive[1],
+linkperf:perf-intel-pt[1]
diff --git a/tools/perf/Documentation/perf-intel-pt.txt b/tools/perf/Documentation/perf-intel-pt.txt
new file mode 100644
index 000000000000..f4cd49a7fcdb
--- /dev/null
+++ b/tools/perf/Documentation/perf-intel-pt.txt
@@ -0,0 +1,1042 @@
+perf-intel-pt(1)
+================
+
+NAME
+----
+perf-intel-pt - Support for Intel Processor Trace within perf tools
+
+SYNOPSIS
+--------
+[verse]
+'perf record' -e intel_pt//
+
+DESCRIPTION
+-----------
+
+Intel Processor Trace (Intel PT) is an extension of Intel Architecture that
+collects information about software execution such as control flow, execution
+modes and timings and formats it into highly compressed binary packets.
+Technical details are documented in the Intel 64 and IA-32 Architectures
+Software Developer Manuals, Chapter 36 Intel Processor Trace.
+
+Intel PT is first supported in Intel Core M and 5th generation Intel Core
+processors that are based on the Intel micro-architecture code name Broadwell.
+
+Trace data is collected by 'perf record' and stored within the perf.data file.
+See below for options to 'perf record'.
+
+Trace data must be 'decoded' which involves walking the object code and matching
+the trace data packets. For example a TNT packet only tells whether a
+conditional branch was taken or not taken, so to make use of that packet the
+decoder must know precisely which instruction was being executed.
+
+Decoding is done on-the-fly. The decoder outputs samples in the same format as
+samples output by perf hardware events, for example as though the "instructions"
+or "branches" events had been recorded. Presently 3 tools support this:
+'perf script', 'perf report' and 'perf inject'. See below for more information
+on using those tools.
+
+The main distinguishing feature of Intel PT is that the decoder can determine
+the exact flow of software execution. Intel PT can be used to understand why
+and how did software get to a certain point, or behave a certain way. The
+software does not have to be recompiled, so Intel PT works with debug or release
+builds, however the executed images are needed - which makes use in JIT-compiled
+environments, or with self-modified code, a challenge. Also symbols need to be
+provided to make sense of addresses.
+
+A limitation of Intel PT is that it produces huge amounts of trace data
+(hundreds of megabytes per second per core) which takes a long time to decode,
+for example two or three orders of magnitude longer than it took to collect.
+Another limitation is the performance impact of tracing, something that will
+vary depending on the use-case and architecture.
+
+
+Quickstart
+----------
+
+It is important to start small. That is because it is easy to capture vastly
+more data than can possibly be processed.
+
+The simplest thing to do with Intel PT is userspace profiling of small programs.
+Data is captured with 'perf record' e.g. to trace 'ls' userspace-only:
+
+ perf record -e intel_pt//u ls
+
+And profiled with 'perf report' e.g.
+
+ perf report
+
+To also trace kernel space presents a problem, namely kernel self-modifying
+code. A fairly good kernel image is available in /proc/kcore but to get an
+accurate image a copy of /proc/kcore needs to be made under the same conditions
+as the data capture. 'perf record' can make a copy of /proc/kcore if the option
+--kcore is used, but access to /proc/kcore is restricted e.g.
+
+ sudo perf record -o pt_ls --kcore -e intel_pt// -- ls
+
+which will create a directory named 'pt_ls' and put the perf.data file (named
+simply 'data') and copies of /proc/kcore, /proc/kallsyms and /proc/modules into
+it. The other tools understand the directory format, so to use 'perf report'
+becomes:
+
+ sudo perf report -i pt_ls
+
+Because samples are synthesized after-the-fact, the sampling period can be
+selected for reporting. e.g. sample every microsecond
+
+ sudo perf report pt_ls --itrace=i1usge
+
+See the sections below for more information about the --itrace option.
+
+Beware the smaller the period, the more samples that are produced, and the
+longer it takes to process them.
+
+Also note that the coarseness of Intel PT timing information will start to
+distort the statistical value of the sampling as the sampling period becomes
+smaller.
+
+To represent software control flow, "branches" samples are produced. By default
+a branch sample is synthesized for every single branch. To get an idea what
+data is available you can use the 'perf script' tool with all itrace sampling
+options, which will list all the samples.
+
+ perf record -e intel_pt//u ls
+ perf script --itrace=ibxwpe
+
+An interesting field that is not printed by default is 'flags' which can be
+displayed as follows:
+
+ perf script --itrace=ibxwpe -F+flags
+
+The flags are "bcrosyiABEx" which stand for branch, call, return, conditional,
+system, asynchronous, interrupt, transaction abort, trace begin, trace end, and
+in transaction, respectively.
+
+Another interesting field that is not printed by default is 'ipc' which can be
+displayed as follows:
+
+ perf script --itrace=be -F+ipc
+
+There are two ways that instructions-per-cycle (IPC) can be calculated depending
+on the recording.
+
+If the 'cyc' config term (see config terms section below) was used, then IPC is
+calculated using the cycle count from CYC packets, otherwise MTC packets are
+used - refer to the 'mtc' config term. When MTC is used, however, the values
+are less accurate because the timing is less accurate.
+
+Because Intel PT does not update the cycle count on every branch or instruction,
+the values will often be zero. When there are values, they will be the number
+of instructions and number of cycles since the last update, and thus represent
+the average IPC since the last IPC for that event type. Note IPC for "branches"
+events is calculated separately from IPC for "instructions" events.
+
+Also note that the IPC instruction count may or may not include the current
+instruction. If the cycle count is associated with an asynchronous branch
+(e.g. page fault or interrupt), then the instruction count does not include the
+current instruction, otherwise it does. That is consistent with whether or not
+that instruction has retired when the cycle count is updated.
+
+Another note, in the case of "branches" events, non-taken branches are not
+presently sampled, so IPC values for them do not appear e.g. a CYC packet with a
+TNT packet that starts with a non-taken branch. To see every possible IPC
+value, "instructions" events can be used e.g. --itrace=i0ns
+
+While it is possible to create scripts to analyze the data, an alternative
+approach is available to export the data to a sqlite or postgresql database.
+Refer to script export-to-sqlite.py or export-to-postgresql.py for more details,
+and to script exported-sql-viewer.py for an example of using the database.
+
+There is also script intel-pt-events.py which provides an example of how to
+unpack the raw data for power events and PTWRITE.
+
+As mentioned above, it is easy to capture too much data. One way to limit the
+data captured is to use 'snapshot' mode which is explained further below.
+Refer to 'new snapshot option' and 'Intel PT modes of operation' further below.
+
+Another problem that will be experienced is decoder errors. They can be caused
+by inability to access the executed image, self-modified or JIT-ed code, or the
+inability to match side-band information (such as context switches and mmaps)
+which results in the decoder not knowing what code was executed.
+
+There is also the problem of perf not being able to copy the data fast enough,
+resulting in data lost because the buffer was full. See 'Buffer handling' below
+for more details.
+
+
+perf record
+-----------
+
+new event
+~~~~~~~~~
+
+The Intel PT kernel driver creates a new PMU for Intel PT. PMU events are
+selected by providing the PMU name followed by the "config" separated by slashes.
+An enhancement has been made to allow default "config" e.g. the option
+
+ -e intel_pt//
+
+will use a default config value. Currently that is the same as
+
+ -e intel_pt/tsc,noretcomp=0/
+
+which is the same as
+
+ -e intel_pt/tsc=1,noretcomp=0/
+
+Note there are now new config terms - see section 'config terms' further below.
+
+The config terms are listed in /sys/devices/intel_pt/format. They are bit
+fields within the config member of the struct perf_event_attr which is
+passed to the kernel by the perf_event_open system call. They correspond to bit
+fields in the IA32_RTIT_CTL MSR. Here is a list of them and their definitions:
+
+ $ grep -H . /sys/bus/event_source/devices/intel_pt/format/*
+ /sys/bus/event_source/devices/intel_pt/format/cyc:config:1
+ /sys/bus/event_source/devices/intel_pt/format/cyc_thresh:config:19-22
+ /sys/bus/event_source/devices/intel_pt/format/mtc:config:9
+ /sys/bus/event_source/devices/intel_pt/format/mtc_period:config:14-17
+ /sys/bus/event_source/devices/intel_pt/format/noretcomp:config:11
+ /sys/bus/event_source/devices/intel_pt/format/psb_period:config:24-27
+ /sys/bus/event_source/devices/intel_pt/format/tsc:config:10
+
+Note that the default config must be overridden for each term i.e.
+
+ -e intel_pt/noretcomp=0/
+
+is the same as:
+
+ -e intel_pt/tsc=1,noretcomp=0/
+
+So, to disable TSC packets use:
+
+ -e intel_pt/tsc=0/
+
+It is also possible to specify the config value explicitly:
+
+ -e intel_pt/config=0x400/
+
+Note that, as with all events, the event is suffixed with event modifiers:
+
+ u userspace
+ k kernel
+ h hypervisor
+ G guest
+ H host
+ p precise ip
+
+'h', 'G' and 'H' are for virtualization which is not supported by Intel PT.
+'p' is also not relevant to Intel PT. So only options 'u' and 'k' are
+meaningful for Intel PT.
+
+perf_event_attr is displayed if the -vv option is used e.g.
+
+ ------------------------------------------------------------
+ perf_event_attr:
+ type 6
+ size 112
+ config 0x400
+ { sample_period, sample_freq } 1
+ sample_type IP|TID|TIME|CPU|IDENTIFIER
+ read_format ID
+ disabled 1
+ inherit 1
+ exclude_kernel 1
+ exclude_hv 1
+ enable_on_exec 1
+ sample_id_all 1
+ ------------------------------------------------------------
+ sys_perf_event_open: pid 31104 cpu 0 group_fd -1 flags 0x8
+ sys_perf_event_open: pid 31104 cpu 1 group_fd -1 flags 0x8
+ sys_perf_event_open: pid 31104 cpu 2 group_fd -1 flags 0x8
+ sys_perf_event_open: pid 31104 cpu 3 group_fd -1 flags 0x8
+ ------------------------------------------------------------
+
+
+config terms
+~~~~~~~~~~~~
+
+The June 2015 version of Intel 64 and IA-32 Architectures Software Developer
+Manuals, Chapter 36 Intel Processor Trace, defined new Intel PT features.
+Some of the features are reflect in new config terms. All the config terms are
+described below.
+
+tsc Always supported. Produces TSC timestamp packets to provide
+ timing information. In some cases it is possible to decode
+ without timing information, for example a per-thread context
+ that does not overlap executable memory maps.
+
+ The default config selects tsc (i.e. tsc=1).
+
+noretcomp Always supported. Disables "return compression" so a TIP packet
+ is produced when a function returns. Causes more packets to be
+ produced but might make decoding more reliable.
+
+ The default config does not select noretcomp (i.e. noretcomp=0).
+
+psb_period Allows the frequency of PSB packets to be specified.
+
+ The PSB packet is a synchronization packet that provides a
+ starting point for decoding or recovery from errors.
+
+ Support for psb_period is indicated by:
+
+ /sys/bus/event_source/devices/intel_pt/caps/psb_cyc
+
+ which contains "1" if the feature is supported and "0"
+ otherwise.
+
+ Valid values are given by:
+
+ /sys/bus/event_source/devices/intel_pt/caps/psb_periods
+
+ which contains a hexadecimal value, the bits of which represent
+ valid values e.g. bit 2 set means value 2 is valid.
+
+ The psb_period value is converted to the approximate number of
+ trace bytes between PSB packets as:
+
+ 2 ^ (value + 11)
+
+ e.g. value 3 means 16KiB bytes between PSBs
+
+ If an invalid value is entered, the error message
+ will give a list of valid values e.g.
+
+ $ perf record -e intel_pt/psb_period=15/u uname
+ Invalid psb_period for intel_pt. Valid values are: 0-5
+
+ If MTC packets are selected, the default config selects a value
+ of 3 (i.e. psb_period=3) or the nearest lower value that is
+ supported (0 is always supported). Otherwise the default is 0.
+
+ If decoding is expected to be reliable and the buffer is large
+ then a large PSB period can be used.
+
+ Because a TSC packet is produced with PSB, the PSB period can
+ also affect the granularity to timing information in the absence
+ of MTC or CYC.
+
+mtc Produces MTC timing packets.
+
+ MTC packets provide finer grain timestamp information than TSC
+ packets. MTC packets record time using the hardware crystal
+ clock (CTC) which is related to TSC packets using a TMA packet.
+
+ Support for this feature is indicated by:
+
+ /sys/bus/event_source/devices/intel_pt/caps/mtc
+
+ which contains "1" if the feature is supported and
+ "0" otherwise.
+
+ The frequency of MTC packets can also be specified - see
+ mtc_period below.
+
+mtc_period Specifies how frequently MTC packets are produced - see mtc
+ above for how to determine if MTC packets are supported.
+
+ Valid values are given by:
+
+ /sys/bus/event_source/devices/intel_pt/caps/mtc_periods
+
+ which contains a hexadecimal value, the bits of which represent
+ valid values e.g. bit 2 set means value 2 is valid.
+
+ The mtc_period value is converted to the MTC frequency as:
+
+ CTC-frequency / (2 ^ value)
+
+ e.g. value 3 means one eighth of CTC-frequency
+
+ Where CTC is the hardware crystal clock, the frequency of which
+ can be related to TSC via values provided in cpuid leaf 0x15.
+
+ If an invalid value is entered, the error message
+ will give a list of valid values e.g.
+
+ $ perf record -e intel_pt/mtc_period=15/u uname
+ Invalid mtc_period for intel_pt. Valid values are: 0,3,6,9
+
+ The default value is 3 or the nearest lower value
+ that is supported (0 is always supported).
+
+cyc Produces CYC timing packets.
+
+ CYC packets provide even finer grain timestamp information than
+ MTC and TSC packets. A CYC packet contains the number of CPU
+ cycles since the last CYC packet. Unlike MTC and TSC packets,
+ CYC packets are only sent when another packet is also sent.
+
+ Support for this feature is indicated by:
+
+ /sys/bus/event_source/devices/intel_pt/caps/psb_cyc
+
+ which contains "1" if the feature is supported and
+ "0" otherwise.
+
+ The number of CYC packets produced can be reduced by specifying
+ a threshold - see cyc_thresh below.
+
+cyc_thresh Specifies how frequently CYC packets are produced - see cyc
+ above for how to determine if CYC packets are supported.
+
+ Valid cyc_thresh values are given by:
+
+ /sys/bus/event_source/devices/intel_pt/caps/cycle_thresholds
+
+ which contains a hexadecimal value, the bits of which represent
+ valid values e.g. bit 2 set means value 2 is valid.
+
+ The cyc_thresh value represents the minimum number of CPU cycles
+ that must have passed before a CYC packet can be sent. The
+ number of CPU cycles is:
+
+ 2 ^ (value - 1)
+
+ e.g. value 4 means 8 CPU cycles must pass before a CYC packet
+ can be sent. Note a CYC packet is still only sent when another
+ packet is sent, not at, e.g. every 8 CPU cycles.
+
+ If an invalid value is entered, the error message
+ will give a list of valid values e.g.
+
+ $ perf record -e intel_pt/cyc,cyc_thresh=15/u uname
+ Invalid cyc_thresh for intel_pt. Valid values are: 0-12
+
+ CYC packets are not requested by default.
+
+pt Specifies pass-through which enables the 'branch' config term.
+
+ The default config selects 'pt' if it is available, so a user will
+ never need to specify this term.
+
+branch Enable branch tracing. Branch tracing is enabled by default so to
+ disable branch tracing use 'branch=0'.
+
+ The default config selects 'branch' if it is available.
+
+ptw Enable PTWRITE packets which are produced when a ptwrite instruction
+ is executed.
+
+ Support for this feature is indicated by:
+
+ /sys/bus/event_source/devices/intel_pt/caps/ptwrite
+
+ which contains "1" if the feature is supported and
+ "0" otherwise.
+
+fup_on_ptw Enable a FUP packet to follow the PTWRITE packet. The FUP packet
+ provides the address of the ptwrite instruction. In the absence of
+ fup_on_ptw, the decoder will use the address of the previous branch
+ if branch tracing is enabled, otherwise the address will be zero.
+ Note that fup_on_ptw will work even when branch tracing is disabled.
+
+pwr_evt Enable power events. The power events provide information about
+ changes to the CPU C-state.
+
+ Support for this feature is indicated by:
+
+ /sys/bus/event_source/devices/intel_pt/caps/power_event_trace
+
+ which contains "1" if the feature is supported and
+ "0" otherwise.
+
+
+AUX area sampling option
+~~~~~~~~~~~~~~~~~~~~~~~~
+
+To select Intel PT "sampling" the AUX area sampling option can be used:
+
+ --aux-sample
+
+Optionally it can be followed by the sample size in bytes e.g.
+
+ --aux-sample=8192
+
+In addition, the Intel PT event to sample must be defined e.g.
+
+ -e intel_pt//u
+
+Samples on other events will be created containing Intel PT data e.g. the
+following will create Intel PT samples on the branch-misses event, note the
+events must be grouped using {}:
+
+ perf record --aux-sample -e '{intel_pt//u,branch-misses:u}'
+
+An alternative to '--aux-sample' is to add the config term 'aux-sample-size' to
+events. In this case, the grouping is implied e.g.
+
+ perf record -e intel_pt//u -e branch-misses/aux-sample-size=8192/u
+
+is the same as:
+
+ perf record -e '{intel_pt//u,branch-misses/aux-sample-size=8192/u}'
+
+but allows for also using an address filter e.g.:
+
+ perf record -e intel_pt//u --filter 'filter * @/bin/ls' -e branch-misses/aux-sample-size=8192/u -- ls
+
+It is important to select a sample size that is big enough to contain at least
+one PSB packet. If not a warning will be displayed:
+
+ Intel PT sample size (%zu) may be too small for PSB period (%zu)
+
+The calculation used for that is: if sample_size <= psb_period + 256 display the
+warning. When sampling is used, psb_period defaults to 0 (2KiB).
+
+The default sample size is 4KiB.
+
+The sample size is passed in aux_sample_size in struct perf_event_attr. The
+sample size is limited by the maximum event size which is 64KiB. It is
+difficult to know how big the event might be without the trace sample attached,
+but the tool validates that the sample size is not greater than 60KiB.
+
+
+new snapshot option
+~~~~~~~~~~~~~~~~~~~
+
+The difference between full trace and snapshot from the kernel's perspective is
+that in full trace we don't overwrite trace data that the user hasn't collected
+yet (and indicated that by advancing aux_tail), whereas in snapshot mode we let
+the trace run and overwrite older data in the buffer so that whenever something
+interesting happens, we can stop it and grab a snapshot of what was going on
+around that interesting moment.
+
+To select snapshot mode a new option has been added:
+
+ -S
+
+Optionally it can be followed by the snapshot size e.g.
+
+ -S0x100000
+
+The default snapshot size is the auxtrace mmap size. If neither auxtrace mmap size
+nor snapshot size is specified, then the default is 4MiB for privileged users
+(or if /proc/sys/kernel/perf_event_paranoid < 0), 128KiB for unprivileged users.
+If an unprivileged user does not specify mmap pages, the mmap pages will be
+reduced as described in the 'new auxtrace mmap size option' section below.
+
+The snapshot size is displayed if the option -vv is used e.g.
+
+ Intel PT snapshot size: %zu
+
+
+new auxtrace mmap size option
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Intel PT buffer size is specified by an addition to the -m option e.g.
+
+ -m,16
+
+selects a buffer size of 16 pages i.e. 64KiB.
+
+Note that the existing functionality of -m is unchanged. The auxtrace mmap size
+is specified by the optional addition of a comma and the value.
+
+The default auxtrace mmap size for Intel PT is 4MiB/page_size for privileged users
+(or if /proc/sys/kernel/perf_event_paranoid < 0), 128KiB for unprivileged users.
+If an unprivileged user does not specify mmap pages, the mmap pages will be
+reduced from the default 512KiB/page_size to 256KiB/page_size, otherwise the
+user is likely to get an error as they exceed their mlock limit (Max locked
+memory as shown in /proc/self/limits). Note that perf does not count the first
+512KiB (actually /proc/sys/kernel/perf_event_mlock_kb minus 1 page) per cpu
+against the mlock limit so an unprivileged user is allowed 512KiB per cpu plus
+their mlock limit (which defaults to 64KiB but is not multiplied by the number
+of cpus).
+
+In full-trace mode, powers of two are allowed for buffer size, with a minimum
+size of 2 pages. In snapshot mode or sampling mode, it is the same but the
+minimum size is 1 page.
+
+The mmap size and auxtrace mmap size are displayed if the -vv option is used e.g.
+
+ mmap length 528384
+ auxtrace mmap length 4198400
+
+
+Intel PT modes of operation
+~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Intel PT can be used in 2 modes:
+ full-trace mode
+ sample mode
+ snapshot mode
+
+Full-trace mode traces continuously e.g.
+
+ perf record -e intel_pt//u uname
+
+Sample mode attaches a Intel PT sample to other events e.g.
+
+ perf record --aux-sample -e intel_pt//u -e branch-misses:u
+
+Snapshot mode captures the available data when a signal is sent e.g.
+
+ perf record -v -e intel_pt//u -S ./loopy 1000000000 &
+ [1] 11435
+ kill -USR2 11435
+ Recording AUX area tracing snapshot
+
+Note that the signal sent is SIGUSR2.
+Note that "Recording AUX area tracing snapshot" is displayed because the -v
+option is used.
+
+The 2 modes cannot be used together.
+
+
+Buffer handling
+~~~~~~~~~~~~~~~
+
+There may be buffer limitations (i.e. single ToPa entry) which means that actual
+buffer sizes are limited to powers of 2 up to 4MiB (MAX_ORDER). In order to
+provide other sizes, and in particular an arbitrarily large size, multiple
+buffers are logically concatenated. However an interrupt must be used to switch
+between buffers. That has two potential problems:
+ a) the interrupt may not be handled in time so that the current buffer
+ becomes full and some trace data is lost.
+ b) the interrupts may slow the system and affect the performance
+ results.
+
+If trace data is lost, the driver sets 'truncated' in the PERF_RECORD_AUX event
+which the tools report as an error.
+
+In full-trace mode, the driver waits for data to be copied out before allowing
+the (logical) buffer to wrap-around. If data is not copied out quickly enough,
+again 'truncated' is set in the PERF_RECORD_AUX event. If the driver has to
+wait, the intel_pt event gets disabled. Because it is difficult to know when
+that happens, perf tools always re-enable the intel_pt event after copying out
+data.
+
+
+Intel PT and build ids
+~~~~~~~~~~~~~~~~~~~~~~
+
+By default "perf record" post-processes the event stream to find all build ids
+for executables for all addresses sampled. Deliberately, Intel PT is not
+decoded for that purpose (it would take too long). Instead the build ids for
+all executables encountered (due to mmap, comm or task events) are included
+in the perf.data file.
+
+To see buildids included in the perf.data file use the command:
+
+ perf buildid-list
+
+If the perf.data file contains Intel PT data, that is the same as:
+
+ perf buildid-list --with-hits
+
+
+Snapshot mode and event disabling
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+In order to make a snapshot, the intel_pt event is disabled using an IOCTL,
+namely PERF_EVENT_IOC_DISABLE. However doing that can also disable the
+collection of side-band information. In order to prevent that, a dummy
+software event has been introduced that permits tracking events (like mmaps) to
+continue to be recorded while intel_pt is disabled. That is important to ensure
+there is complete side-band information to allow the decoding of subsequent
+snapshots.
+
+A test has been created for that. To find the test:
+
+ perf test list
+ ...
+ 23: Test using a dummy software event to keep tracking
+
+To run the test:
+
+ perf test 23
+ 23: Test using a dummy software event to keep tracking : Ok
+
+
+perf record modes (nothing new here)
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+perf record essentially operates in one of three modes:
+ per thread
+ per cpu
+ workload only
+
+"per thread" mode is selected by -t or by --per-thread (with -p or -u or just a
+workload).
+"per cpu" is selected by -C or -a.
+"workload only" mode is selected by not using the other options but providing a
+command to run (i.e. the workload).
+
+In per-thread mode an exact list of threads is traced. There is no inheritance.
+Each thread has its own event buffer.
+
+In per-cpu mode all processes (or processes from the selected cgroup i.e. -G
+option, or processes selected with -p or -u) are traced. Each cpu has its own
+buffer. Inheritance is allowed.
+
+In workload-only mode, the workload is traced but with per-cpu buffers.
+Inheritance is allowed. Note that you can now trace a workload in per-thread
+mode by using the --per-thread option.
+
+
+Privileged vs non-privileged users
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Unless /proc/sys/kernel/perf_event_paranoid is set to -1, unprivileged users
+have memory limits imposed upon them. That affects what buffer sizes they can
+have as outlined above.
+
+The v4.2 kernel introduced support for a context switch metadata event,
+PERF_RECORD_SWITCH, which allows unprivileged users to see when their processes
+are scheduled out and in, just not by whom, which is left for the
+PERF_RECORD_SWITCH_CPU_WIDE, that is only accessible in system wide context,
+which in turn requires CAP_PERFMON or CAP_SYS_ADMIN.
+
+Please see the 45ac1403f564 ("perf: Add PERF_RECORD_SWITCH to indicate context
+switches") commit, that introduces these metadata events for further info.
+
+When working with kernels < v4.2, the following considerations must be taken,
+as the sched:sched_switch tracepoints will be used to receive such information:
+
+Unless /proc/sys/kernel/perf_event_paranoid is set to -1, unprivileged users are
+not permitted to use tracepoints which means there is insufficient side-band
+information to decode Intel PT in per-cpu mode, and potentially workload-only
+mode too if the workload creates new processes.
+
+Note also, that to use tracepoints, read-access to debugfs is required. So if
+debugfs is not mounted or the user does not have read-access, it will again not
+be possible to decode Intel PT in per-cpu mode.
+
+
+sched_switch tracepoint
+~~~~~~~~~~~~~~~~~~~~~~~
+
+The sched_switch tracepoint is used to provide side-band data for Intel PT
+decoding in kernels where the PERF_RECORD_SWITCH metadata event isn't
+available.
+
+The sched_switch events are automatically added. e.g. the second event shown
+below:
+
+ $ perf record -vv -e intel_pt//u uname
+ ------------------------------------------------------------
+ perf_event_attr:
+ type 6
+ size 112
+ config 0x400
+ { sample_period, sample_freq } 1
+ sample_type IP|TID|TIME|CPU|IDENTIFIER
+ read_format ID
+ disabled 1
+ inherit 1
+ exclude_kernel 1
+ exclude_hv 1
+ enable_on_exec 1
+ sample_id_all 1
+ ------------------------------------------------------------
+ sys_perf_event_open: pid 31104 cpu 0 group_fd -1 flags 0x8
+ sys_perf_event_open: pid 31104 cpu 1 group_fd -1 flags 0x8
+ sys_perf_event_open: pid 31104 cpu 2 group_fd -1 flags 0x8
+ sys_perf_event_open: pid 31104 cpu 3 group_fd -1 flags 0x8
+ ------------------------------------------------------------
+ perf_event_attr:
+ type 2
+ size 112
+ config 0x108
+ { sample_period, sample_freq } 1
+ sample_type IP|TID|TIME|CPU|PERIOD|RAW|IDENTIFIER
+ read_format ID
+ inherit 1
+ sample_id_all 1
+ exclude_guest 1
+ ------------------------------------------------------------
+ sys_perf_event_open: pid -1 cpu 0 group_fd -1 flags 0x8
+ sys_perf_event_open: pid -1 cpu 1 group_fd -1 flags 0x8
+ sys_perf_event_open: pid -1 cpu 2 group_fd -1 flags 0x8
+ sys_perf_event_open: pid -1 cpu 3 group_fd -1 flags 0x8
+ ------------------------------------------------------------
+ perf_event_attr:
+ type 1
+ size 112
+ config 0x9
+ { sample_period, sample_freq } 1
+ sample_type IP|TID|TIME|IDENTIFIER
+ read_format ID
+ disabled 1
+ inherit 1
+ exclude_kernel 1
+ exclude_hv 1
+ mmap 1
+ comm 1
+ enable_on_exec 1
+ task 1
+ sample_id_all 1
+ mmap2 1
+ comm_exec 1
+ ------------------------------------------------------------
+ sys_perf_event_open: pid 31104 cpu 0 group_fd -1 flags 0x8
+ sys_perf_event_open: pid 31104 cpu 1 group_fd -1 flags 0x8
+ sys_perf_event_open: pid 31104 cpu 2 group_fd -1 flags 0x8
+ sys_perf_event_open: pid 31104 cpu 3 group_fd -1 flags 0x8
+ mmap size 528384B
+ AUX area mmap length 4194304
+ perf event ring buffer mmapped per cpu
+ Synthesizing auxtrace information
+ Linux
+ [ perf record: Woken up 1 times to write data ]
+ [ perf record: Captured and wrote 0.042 MB perf.data ]
+
+Note, the sched_switch event is only added if the user is permitted to use it
+and only in per-cpu mode.
+
+Note also, the sched_switch event is only added if TSC packets are requested.
+That is because, in the absence of timing information, the sched_switch events
+cannot be matched against the Intel PT trace.
+
+
+perf script
+-----------
+
+By default, perf script will decode trace data found in the perf.data file.
+This can be further controlled by new option --itrace.
+
+
+New --itrace option
+~~~~~~~~~~~~~~~~~~~
+
+Having no option is the same as
+
+ --itrace
+
+which, in turn, is the same as
+
+ --itrace=cepwx
+
+The letters are:
+
+ i synthesize "instructions" events
+ b synthesize "branches" events
+ x synthesize "transactions" events
+ w synthesize "ptwrite" events
+ p synthesize "power" events
+ c synthesize branches events (calls only)
+ r synthesize branches events (returns only)
+ e synthesize tracing error events
+ d create a debug log
+ g synthesize a call chain (use with i or x)
+ G synthesize a call chain on existing event records
+ l synthesize last branch entries (use with i or x)
+ L synthesize last branch entries on existing event records
+ s skip initial number of events
+
+"Instructions" events look like they were recorded by "perf record -e
+instructions".
+
+"Branches" events look like they were recorded by "perf record -e branches". "c"
+and "r" can be combined to get calls and returns.
+
+"Transactions" events correspond to the start or end of transactions. The
+'flags' field can be used in perf script to determine whether the event is a
+tranasaction start, commit or abort.
+
+Note that "instructions", "branches" and "transactions" events depend on code
+flow packets which can be disabled by using the config term "branch=0". Refer
+to the config terms section above.
+
+"ptwrite" events record the payload of the ptwrite instruction and whether
+"fup_on_ptw" was used. "ptwrite" events depend on PTWRITE packets which are
+recorded only if the "ptw" config term was used. Refer to the config terms
+section above. perf script "synth" field displays "ptwrite" information like
+this: "ip: 0 payload: 0x123456789abcdef0" where "ip" is 1 if "fup_on_ptw" was
+used.
+
+"Power" events correspond to power event packets and CBR (core-to-bus ratio)
+packets. While CBR packets are always recorded when tracing is enabled, power
+event packets are recorded only if the "pwr_evt" config term was used. Refer to
+the config terms section above. The power events record information about
+C-state changes, whereas CBR is indicative of CPU frequency. perf script
+"event,synth" fields display information like this:
+ cbr: cbr: 22 freq: 2189 MHz (200%)
+ mwait: hints: 0x60 extensions: 0x1
+ pwre: hw: 0 cstate: 2 sub-cstate: 0
+ exstop: ip: 1
+ pwrx: deepest cstate: 2 last cstate: 2 wake reason: 0x4
+Where:
+ "cbr" includes the frequency and the percentage of maximum non-turbo
+ "mwait" shows mwait hints and extensions
+ "pwre" shows C-state transitions (to a C-state deeper than C0) and
+ whether initiated by hardware
+ "exstop" indicates execution stopped and whether the IP was recorded
+ exactly,
+ "pwrx" indicates return to C0
+For more details refer to the Intel 64 and IA-32 Architectures Software
+Developer Manuals.
+
+Error events show where the decoder lost the trace. Error events
+are quite important. Users must know if what they are seeing is a complete
+picture or not.
+
+The "d" option will cause the creation of a file "intel_pt.log" containing all
+decoded packets and instructions. Note that this option slows down the decoder
+and that the resulting file may be very large.
+
+In addition, the period of the "instructions" event can be specified. e.g.
+
+ --itrace=i10us
+
+sets the period to 10us i.e. one instruction sample is synthesized for each 10
+microseconds of trace. Alternatives to "us" are "ms" (milliseconds),
+"ns" (nanoseconds), "t" (TSC ticks) or "i" (instructions).
+
+"ms", "us" and "ns" are converted to TSC ticks.
+
+The timing information included with Intel PT does not give the time of every
+instruction. Consequently, for the purpose of sampling, the decoder estimates
+the time since the last timing packet based on 1 tick per instruction. The time
+on the sample is *not* adjusted and reflects the last known value of TSC.
+
+For Intel PT, the default period is 100us.
+
+Setting it to a zero period means "as often as possible".
+
+In the case of Intel PT that is the same as a period of 1 and a unit of
+'instructions' (i.e. --itrace=i1i).
+
+Also the call chain size (default 16, max. 1024) for instructions or
+transactions events can be specified. e.g.
+
+ --itrace=ig32
+ --itrace=xg32
+
+Also the number of last branch entries (default 64, max. 1024) for instructions or
+transactions events can be specified. e.g.
+
+ --itrace=il10
+ --itrace=xl10
+
+Note that last branch entries are cleared for each sample, so there is no overlap
+from one sample to the next.
+
+The G and L options are designed in particular for sample mode, and work much
+like g and l but add call chain and branch stack to the other selected events
+instead of synthesized events. For example, to record branch-misses events for
+'ls' and then add a call chain derived from the Intel PT trace:
+
+ perf record --aux-sample -e '{intel_pt//u,branch-misses:u}' -- ls
+ perf report --itrace=Ge
+
+Although in fact G is a default for perf report, so that is the same as just:
+
+ perf report
+
+One caveat with the G and L options is that they work poorly with "Large PEBS".
+Large PEBS means PEBS records will be accumulated by hardware and the written
+into the event buffer in one go. That reduces interrupts, but can give very
+late timestamps. Because the Intel PT trace is synchronized by timestamps,
+the PEBS events do not match the trace. Currently, Large PEBS is used only in
+certain circumstances:
+ - hardware supports it
+ - PEBS is used
+ - event period is specified, instead of frequency
+ - the sample type is limited to the following flags:
+ PERF_SAMPLE_IP | PERF_SAMPLE_TID | PERF_SAMPLE_ADDR |
+ PERF_SAMPLE_ID | PERF_SAMPLE_CPU | PERF_SAMPLE_STREAM_ID |
+ PERF_SAMPLE_DATA_SRC | PERF_SAMPLE_IDENTIFIER |
+ PERF_SAMPLE_TRANSACTION | PERF_SAMPLE_PHYS_ADDR |
+ PERF_SAMPLE_REGS_INTR | PERF_SAMPLE_REGS_USER |
+ PERF_SAMPLE_PERIOD (and sometimes) | PERF_SAMPLE_TIME
+Because Intel PT sample mode uses a different sample type to the list above,
+Large PEBS is not used with Intel PT sample mode. To avoid Large PEBS in other
+cases, avoid specifying the event period i.e. avoid the 'perf record' -c option,
+--count option, or 'period' config term.
+
+To disable trace decoding entirely, use the option --no-itrace.
+
+It is also possible to skip events generated (instructions, branches, transactions)
+at the beginning. This is useful to ignore initialization code.
+
+ --itrace=i0nss1000000
+
+skips the first million instructions.
+
+dump option
+~~~~~~~~~~~
+
+perf script has an option (-D) to "dump" the events i.e. display the binary
+data.
+
+When -D is used, Intel PT packets are displayed. The packet decoder does not
+pay attention to PSB packets, but just decodes the bytes - so the packets seen
+by the actual decoder may not be identical in places where the data is corrupt.
+One example of that would be when the buffer-switching interrupt has been too
+slow, and the buffer has been filled completely. In that case, the last packet
+in the buffer might be truncated and immediately followed by a PSB as the trace
+continues in the next buffer.
+
+To disable the display of Intel PT packets, combine the -D option with
+--no-itrace.
+
+
+perf report
+-----------
+
+By default, perf report will decode trace data found in the perf.data file.
+This can be further controlled by new option --itrace exactly the same as
+perf script, with the exception that the default is --itrace=igxe.
+
+
+perf inject
+-----------
+
+perf inject also accepts the --itrace option in which case tracing data is
+removed and replaced with the synthesized events. e.g.
+
+ perf inject --itrace -i perf.data -o perf.data.new
+
+Below is an example of using Intel PT with autofdo. It requires autofdo
+(https://github.com/google/autofdo) and gcc version 5. The bubble
+sort example is from the AutoFDO tutorial (https://gcc.gnu.org/wiki/AutoFDO/Tutorial)
+amended to take the number of elements as a parameter.
+
+ $ gcc-5 -O3 sort.c -o sort_optimized
+ $ ./sort_optimized 30000
+ Bubble sorting array of 30000 elements
+ 2254 ms
+
+ $ cat ~/.perfconfig
+ [intel-pt]
+ mispred-all = on
+
+ $ perf record -e intel_pt//u ./sort 3000
+ Bubble sorting array of 3000 elements
+ 58 ms
+ [ perf record: Woken up 2 times to write data ]
+ [ perf record: Captured and wrote 3.939 MB perf.data ]
+ $ perf inject -i perf.data -o inj --itrace=i100usle --strip
+ $ ./create_gcov --binary=./sort --profile=inj --gcov=sort.gcov -gcov_version=1
+ $ gcc-5 -O3 -fauto-profile=sort.gcov sort.c -o sort_autofdo
+ $ ./sort_autofdo 30000
+ Bubble sorting array of 30000 elements
+ 2155 ms
+
+Note there is currently no advantage to using Intel PT instead of LBR, but
+that may change in the future if greater use is made of the data.
+
+
+PEBS via Intel PT
+-----------------
+
+Some hardware has the feature to redirect PEBS records to the Intel PT trace.
+Recording is selected by using the aux-output config term e.g.
+
+ perf record -c 10000 -e '{intel_pt/branch=0/,cycles/aux-output/ppp}' uname
+
+Note that currently, software only supports redirecting at most one PEBS event.
+
+To display PEBS events from the Intel PT trace, use the itrace 'o' option e.g.
+
+ perf script --itrace=oe
+
+
+SEE ALSO
+--------
+
+linkperf:perf-record[1], linkperf:perf-script[1], linkperf:perf-report[1],
+linkperf:perf-inject[1]
diff --git a/tools/perf/Documentation/perf-list.txt b/tools/perf/Documentation/perf-list.txt
index 6345db33c533..376a50b3452d 100644
--- a/tools/perf/Documentation/perf-list.txt
+++ b/tools/perf/Documentation/perf-list.txt
@@ -115,6 +115,11 @@ raw encoding of 0x1A8 can be used:
perf stat -e r1a8 -a sleep 1
perf record -e r1a8 ...
+It's also possible to use pmu syntax:
+
+ perf record -e r1a8 -a sleep 1
+ perf record -e cpu/r1a8/ ...
+
You should refer to the processor specific documentation for getting these
details. Some of them are referenced in the SEE ALSO section below.
@@ -258,6 +263,9 @@ Normally all events in an event group sample, but with :S only
the first event (the leader) samples, and it only reads the values of the
other events in the group.
+However, in the case AUX area events (e.g. Intel PT or CoreSight), the AUX
+area event must be the leader, so then the second event samples, not the first.
+
OPTIONS
-------
diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt
index b23a4012a606..fa8a5fcd27ab 100644
--- a/tools/perf/Documentation/perf-record.txt
+++ b/tools/perf/Documentation/perf-record.txt
@@ -237,16 +237,22 @@ OPTIONS
option and remains only for backward compatibility. See --event.
-g::
- Enables call-graph (stack chain/backtrace) recording.
+ Enables call-graph (stack chain/backtrace) recording for both
+ kernel space and user space.
--call-graph::
Setup and enable call-graph (stack chain/backtrace) recording,
- implies -g. Default is "fp".
+ implies -g. Default is "fp" (for user space).
- Allows specifying "fp" (frame pointer) or "dwarf"
- (DWARF's CFI - Call Frame Information) or "lbr"
- (Hardware Last Branch Record facility) as the method to collect
- the information used to show the call graphs.
+ The unwinding method used for kernel space is dependent on the
+ unwinder used by the active kernel configuration, i.e
+ CONFIG_UNWINDER_FRAME_POINTER (fp) or CONFIG_UNWINDER_ORC (orc)
+
+ Any option specified here controls the method used for user space.
+
+ Valid options are "fp" (frame pointer), "dwarf" (DWARF's CFI -
+ Call Frame Information) or "lbr" (Hardware Last Branch Record
+ facility).
In some systems, where binaries are build with gcc
--fomit-frame-pointer, using the "fp" method will produce bogus
@@ -385,7 +391,10 @@ displayed with the weight and local_weight sort keys. This currently works for
abort events and some memory events in precise mode on modern Intel CPUs.
--namespaces::
-Record events of type PERF_RECORD_NAMESPACES.
+Record events of type PERF_RECORD_NAMESPACES. This enables 'cgroup_id' sort key.
+
+--all-cgroups::
+Record events of type PERF_RECORD_CGROUP. This enables 'cgroup' sort key.
--transaction::
Record transaction flags for transaction related events.
@@ -449,7 +458,9 @@ This option sets the time out limit. The default value is 500 ms.
--switch-events::
Record context switch events i.e. events of type PERF_RECORD_SWITCH or
-PERF_RECORD_SWITCH_CPU_WIDE.
+PERF_RECORD_SWITCH_CPU_WIDE. In some cases (e.g. Intel PT or CoreSight)
+switch events will be enabled automatically, which can be suppressed by
+by the option --no-switch-events.
--clang-path=PATH::
Path to clang binary to use for compiling BPF scriptlets.
@@ -547,6 +558,19 @@ overhead. You can still switch them on with:
--switch-output --no-no-buildid --no-no-buildid-cache
+--switch-output-event::
+Events that will cause the switch of the perf.data file, auto-selecting
+--switch-output=signal, the results are similar as internally the side band
+thread will also send a SIGUSR2 to the main one.
+
+Uses the same syntax as --event, it will just not be recorded, serving only to
+switch the perf.data file as soon as the --switch-output event is processed by
+a separate sideband thread.
+
+This sideband thread is also used to other purposes, like processing the
+PERF_RECORD_BPF_EVENT records as they happen, asking the kernel for extra BPF
+information, etc.
+
--switch-max-files=N::
When rotating perf.data with --switch-output, only keep N files.
@@ -587,6 +611,21 @@ Make a copy of /proc/kcore and place it into a directory with the perf data file
Limit the sample data max size, <size> is expected to be a number with
appended unit character - B/K/M/G
+--num-thread-synthesize::
+ The number of threads to run when synthesizing events for existing processes.
+ By default, the number of threads equals 1.
+
+ifdef::HAVE_LIBPFM[]
+--pfm-events events::
+Select a PMU event using libpfm4 syntax (see http://perfmon2.sf.net)
+including support for event filters. For example '--pfm-events
+inst_retired:any_p:u:c=1:i'. More than one event can be passed to the
+option using the comma separator. Hardware events and generic hardware
+events cannot be mixed together. The latter must be used with the -e
+option. The -e option and this one can be mixed and matched. Events
+can be grouped using the {} notation.
+endif::HAVE_LIBPFM[]
+
SEE ALSO
--------
-linkperf:perf-stat[1], linkperf:perf-list[1]
+linkperf:perf-stat[1], linkperf:perf-list[1], linkperf:perf-intel-pt[1]
diff --git a/tools/perf/Documentation/perf-report.txt b/tools/perf/Documentation/perf-report.txt
index db61f16ffa56..d068103690cc 100644
--- a/tools/perf/Documentation/perf-report.txt
+++ b/tools/perf/Documentation/perf-report.txt
@@ -95,6 +95,7 @@ OPTIONS
abort cost. This is the global weight.
- local_weight: Local weight version of the weight above.
- cgroup_id: ID derived from cgroup namespace device and inode numbers.
+ - cgroup: cgroup pathname in the cgroupfs.
- transaction: Transaction abort flags.
- overhead: Overhead percentage of sample
- overhead_sys: Overhead percentage of sample running in system mode
@@ -377,6 +378,11 @@ OPTIONS
Show event group information together. It forces group output also
if there are no groups defined in data file.
+--group-sort-idx::
+ Sort the output by the event at the index n in group. If n is invalid,
+ sort by the first event. It can support multiple groups with different
+ amount of events. WARNING: This should be used on grouped events.
+
--demangle::
Demangle symbol names to human readable form. It's enabled by default,
disable with --no-demangle.
@@ -482,6 +488,17 @@ include::itrace.txt[]
This option extends the perf report to show reference callgraphs,
which collected by reference event, in no callgraph event.
+--stitch-lbr::
+ Show callgraph with stitched LBRs, which may have more complete
+ callgraph. The perf.data file must have been obtained using
+ perf record --call-graph lbr.
+ Disabled by default. In common cases with call stack overflows,
+ it can recreate better call stacks than the default lbr call stack
+ output. But this approach is not full proof. There can be cases
+ where it creates incorrect call stacks from incorrect matches.
+ The known limitations include exception handing such as
+ setjmp/longjmp will have calls/returns not match.
+
--socket-filter::
Only report the samples on the processor socket that match with this filter
@@ -546,4 +563,5 @@ include::callchain-overhead-calculation.txt[]
SEE ALSO
--------
-linkperf:perf-stat[1], linkperf:perf-annotate[1], linkperf:perf-record[1]
+linkperf:perf-stat[1], linkperf:perf-annotate[1], linkperf:perf-record[1],
+linkperf:perf-intel-pt[1]
diff --git a/tools/perf/Documentation/perf-script.txt b/tools/perf/Documentation/perf-script.txt
index 2599b057e47b..372dfd110e6d 100644
--- a/tools/perf/Documentation/perf-script.txt
+++ b/tools/perf/Documentation/perf-script.txt
@@ -319,6 +319,9 @@ OPTIONS
--show-bpf-events
Display bpf events i.e. events of type PERF_RECORD_KSYMBOL and PERF_RECORD_BPF_EVENT.
+--show-cgroup-events
+ Display cgroup events i.e. events of type PERF_RECORD_CGROUP.
+
--demangle::
Demangle symbol names to human readable form. It's enabled by default,
disable with --no-demangle.
@@ -390,6 +393,9 @@ include::itrace.txt[]
--reltime::
Print time stamps relative to trace start.
+--deltatime::
+ Print time stamps relative to previous event.
+
--per-event-dump::
Create per event files with a "perf.data.EVENT.dump" name instead of
printing to stdout, useful, for instance, for generating flamegraphs.
@@ -406,6 +412,14 @@ include::itrace.txt[]
--xed::
Run xed disassembler on output. Requires installing the xed disassembler.
+-S::
+--symbols=symbol[,symbol...]::
+ Only consider the listed symbols. Symbols are typically a name
+ but they may also be hexadecimal address.
+
+ For example, to select the symbol noploop or the address 0x4007a0:
+ perf script --symbols=noploop,0x4007a0
+
--call-trace::
Show call stream for intel_pt traces. The CPUs are interleaved, but
can be filtered with -C.
@@ -426,7 +440,18 @@ include::itrace.txt[]
--show-on-off-events::
Show the --switch-on/off events too.
+--stitch-lbr::
+ Show callgraph with stitched LBRs, which may have more complete
+ callgraph. The perf.data file must have been obtained using
+ perf record --call-graph lbr.
+ Disabled by default. In common cases with call stack overflows,
+ it can recreate better call stacks than the default lbr call stack
+ output. But this approach is not full proof. There can be cases
+ where it creates incorrect call stacks from incorrect matches.
+ The known limitations include exception handing such as
+ setjmp/longjmp will have calls/returns not match.
+
SEE ALSO
--------
linkperf:perf-record[1], linkperf:perf-script-perl[1],
-linkperf:perf-script-python[1]
+linkperf:perf-script-python[1], linkperf:perf-intel-pt[1]
diff --git a/tools/perf/Documentation/perf-stat.txt b/tools/perf/Documentation/perf-stat.txt
index 9431b8066fb4..b029ee728a0b 100644
--- a/tools/perf/Documentation/perf-stat.txt
+++ b/tools/perf/Documentation/perf-stat.txt
@@ -71,6 +71,16 @@ report::
--tid=<tid>::
stat events on existing thread id (comma separated list)
+ifdef::HAVE_LIBPFM[]
+--pfm-events events::
+Select a PMU event using libpfm4 syntax (see http://perfmon2.sf.net)
+including support for event filters. For example '--pfm-events
+inst_retired:any_p:u:c=1:i'. More than one event can be passed to the
+option using the comma separator. Hardware events and generic hardware
+events cannot be mixed together. The latter must be used with the -e
+option. The -e option and this one can be mixed and matched. Events
+can be grouped using the {} notation.
+endif::HAVE_LIBPFM[]
-a::
--all-cpus::
@@ -93,7 +103,9 @@ report::
-B::
--big-num::
- print large numbers with thousands' separators according to locale
+ print large numbers with thousands' separators according to locale.
+ Enabled by default. Use "--no-big-num" to disable.
+ Default setting can be changed with "perf config stat.big-num=false".
-C::
--cpu=::
@@ -176,6 +188,8 @@ Print count deltas every N milliseconds (minimum: 1ms)
The overhead percentage could be high in some cases, for instance with small, sub 100ms intervals. Use with caution.
example: 'perf stat -I 1000 -e cycles -a sleep 5'
+If the metric exists, it is calculated by the counts generated in this interval and the metric is printed after #.
+
--interval-count times::
Print count deltas for fixed number of times.
This option should be used together with "-I" option.
@@ -232,6 +246,25 @@ filter out the startup phase of the program, which is often very different.
Print statistics of transactional execution if supported.
+--metric-no-group::
+By default, events to compute a metric are placed in weak groups. The
+group tries to enforce scheduling all or none of the events. The
+--metric-no-group option places events outside of groups and may
+increase the chance of the event being scheduled - leading to more
+accuracy. However, as events may not be scheduled together accuracy
+for metrics like instructions per cycle can be lower - as both metrics
+may no longer be being measured at the same time.
+
+--metric-no-merge::
+By default metric events in different weak groups can be shared if one
+group contains all the events needed by another. In such cases one
+group will be eliminated reducing event multiplexing and making it so
+that certain groups of metrics sum to 100%. A downside to sharing a
+group is that the group may require multiplexing and so accuracy for a
+small group that need not have multiplexing is lowered. This option
+forbids the event merging logic from sharing events between groups and
+may be used to increase accuracy in this case.
+
STAT RECORD
-----------
Stores stat data into perf data file.
@@ -334,6 +367,15 @@ Configure all used events to run in kernel space.
--all-user::
Configure all used events to run in user space.
+--percore-show-thread::
+The event modifier "percore" has supported to sum up the event counts
+for all hardware threads in a core and show the counts per core.
+
+This option with event modifier "percore" enabled also sums up the event
+counts for all hardware threads in a core but show the sum counts per
+hardware thread. This is essentially a replacement for the any bit and
+convenient for post processing.
+
EXAMPLES
--------
diff --git a/tools/perf/Documentation/perf-top.txt b/tools/perf/Documentation/perf-top.txt
index 324b6b53c86b..ee2024691d46 100644
--- a/tools/perf/Documentation/perf-top.txt
+++ b/tools/perf/Documentation/perf-top.txt
@@ -53,6 +53,11 @@ Default is to monitor all CPUS.
--group::
Put the counters into a counter group.
+--group-sort-idx::
+ Sort the output by the event at the index n in group. If n is invalid,
+ sort by the first event. It can support multiple groups with different
+ amount of events. WARNING: This should be used on grouped events.
+
-F <freq>::
--freq=<freq>::
Profile at this frequency. Use 'max' to use the currently maximum
@@ -272,6 +277,10 @@ Default is to monitor all CPUS.
Record events of type PERF_RECORD_NAMESPACES and display it with the
'cgroup_id' sort key.
+--all-cgroups::
+ Record events of type PERF_RECORD_CGROUP and display it with the
+ 'cgroup' sort key.
+
--switch-on EVENT_NAME::
Only consider events after this event is found.
@@ -310,6 +319,26 @@ Default is to monitor all CPUS.
go straight to the histogram browser, just like 'perf top' with no events
explicitely specified does.
+--stitch-lbr::
+ Show callgraph with stitched LBRs, which may have more complete
+ callgraph. The option must be used with --call-graph lbr recording.
+ Disabled by default. In common cases with call stack overflows,
+ it can recreate better call stacks than the default lbr call stack
+ output. But this approach is not full proof. There can be cases
+ where it creates incorrect call stacks from incorrect matches.
+ The known limitations include exception handing such as
+ setjmp/longjmp will have calls/returns not match.
+
+ifdef::HAVE_LIBPFM[]
+--pfm-events events::
+Select a PMU event using libpfm4 syntax (see http://perfmon2.sf.net)
+including support for event filters. For example '--pfm-events
+inst_retired:any_p:u:c=1:i'. More than one event can be passed to the
+option using the comma separator. Hardware events and generic hardware
+events cannot be mixed together. The latter must be used with the -e
+option. The -e option and this one can be mixed and matched. Events
+can be grouped using the {} notation.
+endif::HAVE_LIBPFM[]
INTERACTIVE PROMPTING KEYS
--------------------------
diff --git a/tools/perf/Documentation/perf.data-file-format.txt b/tools/perf/Documentation/perf.data-file-format.txt
index b0152e1095c5..b6472e463284 100644
--- a/tools/perf/Documentation/perf.data-file-format.txt
+++ b/tools/perf/Documentation/perf.data-file-format.txt
@@ -373,6 +373,22 @@ struct {
Indicates that trace contains records of PERF_RECORD_COMPRESSED type
that have perf_events records in compressed form.
+ HEADER_CPU_PMU_CAPS = 28,
+
+ A list of cpu PMU capabilities. The format of data is as below.
+
+struct {
+ u32 nr_cpu_pmu_caps;
+ {
+ char name[];
+ char value[];
+ } [nr_cpu_pmu_caps]
+};
+
+
+Example:
+ cpu pmu capabilities: branches=32, max_precise=3, pmu_name=icelake
+
other bits are reserved and should ignored for now
HEADER_FEAT_BITS = 256,
diff --git a/tools/perf/Documentation/security.txt b/tools/perf/Documentation/security.txt
new file mode 100644
index 000000000000..4fe3b8b1958f
--- /dev/null
+++ b/tools/perf/Documentation/security.txt
@@ -0,0 +1,237 @@
+Overview
+========
+
+For general security related questions of perf_event_open() syscall usage,
+performance monitoring and observability operations by Perf see here:
+https://www.kernel.org/doc/html/latest/admin-guide/perf-security.html
+
+Enabling LSM based mandatory access control (MAC) to perf_event_open() syscall
+==============================================================================
+
+LSM hooks for mandatory access control for perf_event_open() syscall can be
+used starting from Linux v5.3. Below are the steps to extend Fedora (v31) with
+Targeted policy with perf_event_open() access control capabilities:
+
+1. Download selinux-policy SRPM package (e.g. selinux-policy-3.14.4-48.fc31.src.rpm on FC31)
+ and install it so rpmbuild directory would exist in the current working directory:
+
+ # rpm -Uhv selinux-policy-3.14.4-48.fc31.src.rpm
+
+2. Get into rpmbuild/SPECS directory and unpack the source code:
+
+ # rpmbuild -bp selinux-policy.spec
+
+3. Place patch below at rpmbuild/BUILD/selinux-policy-b86eaaf4dbcf2d51dd4432df7185c0eaf3cbcc02
+ directory and apply it:
+
+ # patch -p1 < selinux-policy-perf-events-perfmon.patch
+ patching file policy/flask/access_vectors
+ patching file policy/flask/security_classes
+ # cat selinux-policy-perf-events-perfmon.patch
+diff -Nura a/policy/flask/access_vectors b/policy/flask/access_vectors
+--- a/policy/flask/access_vectors 2020-02-04 18:19:53.000000000 +0300
++++ b/policy/flask/access_vectors 2020-02-28 23:37:25.000000000 +0300
+@@ -174,6 +174,7 @@
+ wake_alarm
+ block_suspend
+ audit_read
++ perfmon
+ }
+
+ #
+@@ -1099,3 +1100,15 @@
+
+ class xdp_socket
+ inherits socket
++
++class perf_event
++{
++ open
++ cpu
++ kernel
++ tracepoint
++ read
++ write
++}
++
++
+diff -Nura a/policy/flask/security_classes b/policy/flask/security_classes
+--- a/policy/flask/security_classes 2020-02-04 18:19:53.000000000 +0300
++++ b/policy/flask/security_classes 2020-02-28 21:35:17.000000000 +0300
+@@ -200,4 +200,6 @@
+
+ class xdp_socket
+
++class perf_event
++
+ # FLASK
+
+4. Get into rpmbuild/SPECS directory and build policy packages from patched sources:
+
+ # rpmbuild --noclean --noprep -ba selinux-policy.spec
+
+ so you have this:
+
+ # ls -alh rpmbuild/RPMS/noarch/
+ total 33M
+ drwxr-xr-x. 2 root root 4.0K Mar 20 12:16 .
+ drwxr-xr-x. 3 root root 4.0K Mar 20 12:16 ..
+ -rw-r--r--. 1 root root 112K Mar 20 12:16 selinux-policy-3.14.4-48.fc31.noarch.rpm
+ -rw-r--r--. 1 root root 1.2M Mar 20 12:17 selinux-policy-devel-3.14.4-48.fc31.noarch.rpm
+ -rw-r--r--. 1 root root 2.3M Mar 20 12:17 selinux-policy-doc-3.14.4-48.fc31.noarch.rpm
+ -rw-r--r--. 1 root root 12M Mar 20 12:17 selinux-policy-minimum-3.14.4-48.fc31.noarch.rpm
+ -rw-r--r--. 1 root root 4.5M Mar 20 12:16 selinux-policy-mls-3.14.4-48.fc31.noarch.rpm
+ -rw-r--r--. 1 root root 111K Mar 20 12:16 selinux-policy-sandbox-3.14.4-48.fc31.noarch.rpm
+ -rw-r--r--. 1 root root 14M Mar 20 12:17 selinux-policy-targeted-3.14.4-48.fc31.noarch.rpm
+
+5. Install SELinux packages from Fedora repo, if not already done so, and
+ update with the patched rpms above:
+
+ # rpm -Uhv rpmbuild/RPMS/noarch/selinux-policy-*
+
+6. Enable SELinux Permissive mode for Targeted policy, if not already done so:
+
+ # cat /etc/selinux/config
+
+ # This file controls the state of SELinux on the system.
+ # SELINUX= can take one of these three values:
+ # enforcing - SELinux security policy is enforced.
+ # permissive - SELinux prints warnings instead of enforcing.
+ # disabled - No SELinux policy is loaded.
+ SELINUX=permissive
+ # SELINUXTYPE= can take one of these three values:
+ # targeted - Targeted processes are protected,
+ # minimum - Modification of targeted policy. Only selected processes are protected.
+ # mls - Multi Level Security protection.
+ SELINUXTYPE=targeted
+
+7. Enable filesystem SELinux labeling at the next reboot:
+
+ # touch /.autorelabel
+
+8. Reboot machine and it will label filesystems and load Targeted policy into the kernel;
+
+9. Login and check that dmesg output doesn't mention that perf_event class is unknown to SELinux subsystem;
+
+10. Check that SELinux is enabled and in Permissive mode
+
+ # getenforce
+ Permissive
+
+11. Turn SELinux into Enforcing mode:
+
+ # setenforce 1
+ # getenforce
+ Enforcing
+
+Opening access to perf_event_open() syscall on Fedora with SELinux
+==================================================================
+
+Access to performance monitoring and observability operations by Perf
+can be limited for superuser or CAP_PERFMON or CAP_SYS_ADMIN privileged
+processes. MAC policy settings (e.g. SELinux) can be loaded into the kernel
+and prevent unauthorized access to perf_event_open() syscall. In such case
+Perf tool provides a message similar to the one below:
+
+ # perf stat
+ Error:
+ Access to performance monitoring and observability operations is limited.
+ Enforced MAC policy settings (SELinux) can limit access to performance
+ monitoring and observability operations. Inspect system audit records for
+ more perf_event access control information and adjusting the policy.
+ Consider adjusting /proc/sys/kernel/perf_event_paranoid setting to open
+ access to performance monitoring and observability operations for users
+ without CAP_PERFMON or CAP_SYS_ADMIN Linux capability.
+ perf_event_paranoid setting is -1:
+ -1: Allow use of (almost) all events by all users
+ Ignore mlock limit after perf_event_mlock_kb without CAP_IPC_LOCK
+ >= 0: Disallow raw and ftrace function tracepoint access
+ >= 1: Disallow CPU event access
+ >= 2: Disallow kernel profiling
+ To make the adjusted perf_event_paranoid setting permanent preserve it
+ in /etc/sysctl.conf (e.g. kernel.perf_event_paranoid = <setting>)
+
+To make sure that access is limited by MAC policy settings inspect system
+audit records using journalctl command or /var/log/audit/audit.log so the
+output would contain AVC denied records related to perf_event:
+
+ # journalctl --reverse --no-pager | grep perf_event
+
+ python3[1318099]: SELinux is preventing perf from open access on the perf_event labeled unconfined_t.
+ If you believe that perf should be allowed open access on perf_event labeled unconfined_t by default.
+ setroubleshoot[1318099]: SELinux is preventing perf from open access on the perf_event labeled unconfined_t. For complete SELinux messages run: sealert -l 4595ce5b-e58f-462c-9d86-3bc2074935de
+ audit[1318098]: AVC avc: denied { open } for pid=1318098 comm="perf" scontext=unconfined_u:unconfined_r:unconfined_t:s0-s0:c0.c1023 tcontext=unconfined_u:unconfined_r:unconfined_t:s0-s0:c0.c1023 tclass=perf_event permissive=0
+
+In order to open access to perf_event_open() syscall MAC policy settings can
+require to be extended. On SELinux system this can be done by loading a special
+policy module extending base policy settings. Perf related policy module can
+be generated using the system audit records about blocking perf_event access.
+Run the command below to generate my-perf.te policy extension file with
+perf_event related rules:
+
+ # ausearch -c 'perf' --raw | audit2allow -M my-perf && cat my-perf.te
+
+ module my-perf 1.0;
+
+ require {
+ type unconfined_t;
+ class perf_event { cpu kernel open read tracepoint write };
+ }
+
+ #============= unconfined_t ==============
+ allow unconfined_t self:perf_event { cpu kernel open read tracepoint write };
+
+Now compile, pack and load my-perf.pp extension module into the kernel:
+
+ # checkmodule -M -m -o my-perf.mod my-perf.te
+ # semodule_package -o my-perf.pp -m my-perf.mod
+ # semodule -X 300 -i my-perf.pp
+
+After all those taken steps above access to perf_event_open() syscall should
+now be allowed by the policy settings. Check access running Perf like this:
+
+ # perf stat
+ ^C
+ Performance counter stats for 'system wide':
+
+ 36,387.41 msec cpu-clock # 7.999 CPUs utilized
+ 2,629 context-switches # 0.072 K/sec
+ 57 cpu-migrations # 0.002 K/sec
+ 1 page-faults # 0.000 K/sec
+ 263,721,559 cycles # 0.007 GHz
+ 175,746,713 instructions # 0.67 insn per cycle
+ 19,628,798 branches # 0.539 M/sec
+ 1,259,201 branch-misses # 6.42% of all branches
+
+ 4.549061439 seconds time elapsed
+
+The generated perf-event.pp related policy extension module can be removed
+from the kernel using this command:
+
+ # semodule -X 300 -r my-perf
+
+Alternatively the module can be temporarily disabled and enabled back using
+these two commands:
+
+ # semodule -d my-perf
+ # semodule -e my-perf
+
+If something went wrong
+=======================
+
+To turn SELinux into Permissive mode:
+ # setenforce 0
+
+To fully disable SELinux during kernel boot [3] set kernel command line parameter selinux=0
+
+To remove SELinux labeling from local filesystems:
+ # find / -mount -print0 | xargs -0 setfattr -h -x security.selinux
+
+To fully turn SELinux off a machine set SELINUX=disabled at /etc/selinux/config file and reboot;
+
+Links
+=====
+
+[1] https://download-ib01.fedoraproject.org/pub/fedora/linux/updates/31/Everything/SRPMS/Packages/s/selinux-policy-3.14.4-49.fc31.src.rpm
+[2] https://docs.fedoraproject.org/en-US/Fedora/11/html/Security-Enhanced_Linux/sect-Security-Enhanced_Linux-Working_with_SELinux-Enabling_and_Disabling_SELinux.html
+[3] https://danwalsh.livejournal.com/10972.html
diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config
index 80e55e796be9..877ca6be0ed0 100644
--- a/tools/perf/Makefile.config
+++ b/tools/perf/Makefile.config
@@ -23,12 +23,28 @@ include $(srctree)/tools/scripts/Makefile.arch
$(call detected_var,SRCARCH)
NO_PERF_REGS := 1
-NO_SYSCALL_TABLE := 1
+
+ifneq ($(NO_SYSCALL_TABLE),1)
+ NO_SYSCALL_TABLE := 1
+
+ ifeq ($(SRCARCH),x86)
+ ifeq (${IS_64_BIT}, 1)
+ NO_SYSCALL_TABLE := 0
+ endif
+ else
+ ifeq ($(SRCARCH),$(filter $(SRCARCH),powerpc arm64 s390))
+ NO_SYSCALL_TABLE := 0
+ endif
+ endif
+
+ ifneq ($(NO_SYSCALL_TABLE),1)
+ CFLAGS += -DHAVE_SYSCALL_TABLE_SUPPORT
+ endif
+endif
# Additional ARCH settings for ppc
ifeq ($(SRCARCH),powerpc)
NO_PERF_REGS := 0
- NO_SYSCALL_TABLE := 0
CFLAGS += -I$(OUTPUT)arch/powerpc/include/generated
LIBUNWIND_LIBS := -lunwind -lunwind-ppc64
endif
@@ -37,7 +53,6 @@ endif
ifeq ($(SRCARCH),x86)
$(call detected,CONFIG_X86)
ifeq (${IS_64_BIT}, 1)
- NO_SYSCALL_TABLE := 0
CFLAGS += -DHAVE_ARCH_X86_64_SUPPORT -I$(OUTPUT)arch/x86/include/generated
ARCH_INCLUDE = ../../arch/x86/lib/memcpy_64.S ../../arch/x86/lib/memset_64.S
LIBUNWIND_LIBS = -lunwind-x86_64 -lunwind -llzma
@@ -55,7 +70,6 @@ endif
ifeq ($(SRCARCH),arm64)
NO_PERF_REGS := 0
- NO_SYSCALL_TABLE := 0
CFLAGS += -I$(OUTPUT)arch/arm64/include/generated
LIBUNWIND_LIBS = -lunwind -lunwind-aarch64
endif
@@ -70,7 +84,6 @@ endif
ifeq ($(ARCH),s390)
NO_PERF_REGS := 0
- NO_SYSCALL_TABLE := 0
CFLAGS += -fPIC -I$(OUTPUT)arch/s390/include/generated
endif
@@ -78,10 +91,6 @@ ifeq ($(NO_PERF_REGS),0)
$(call detected,CONFIG_PERF_REGS)
endif
-ifneq ($(NO_SYSCALL_TABLE),1)
- CFLAGS += -DHAVE_SYSCALL_TABLE_SUPPORT
-endif
-
# So far there's only x86 and arm libdw unwind support merged in perf.
# Disable it on all other architectures in case libdw unwind
# support is detected in system. Add supported architectures
@@ -228,8 +237,17 @@ strip-libs = $(filter-out -l%,$(1))
PYTHON_CONFIG_SQ := $(call shell-sq,$(PYTHON_CONFIG))
+# Python 3.8 changed the output of `python-config --ldflags` to not include the
+# '-lpythonX.Y' flag unless '--embed' is also passed. The feature check for
+# libpython fails if that flag is not included in LDFLAGS
+ifeq ($(shell $(PYTHON_CONFIG_SQ) --ldflags --embed 2>&1 1>/dev/null; echo $$?), 0)
+ PYTHON_CONFIG_LDFLAGS := --ldflags --embed
+else
+ PYTHON_CONFIG_LDFLAGS := --ldflags
+endif
+
ifdef PYTHON_CONFIG
- PYTHON_EMBED_LDOPTS := $(shell $(PYTHON_CONFIG_SQ) --ldflags 2>/dev/null)
+ PYTHON_EMBED_LDOPTS := $(shell $(PYTHON_CONFIG_SQ) $(PYTHON_CONFIG_LDFLAGS) 2>/dev/null)
PYTHON_EMBED_LDFLAGS := $(call strip-libs,$(PYTHON_EMBED_LDOPTS))
PYTHON_EMBED_LIBADD := $(call grep-libs,$(PYTHON_EMBED_LDOPTS)) -lutil
PYTHON_EMBED_CCOPTS := $(shell $(PYTHON_CONFIG_SQ) --includes 2>/dev/null)
@@ -337,7 +355,7 @@ ifndef NO_BIONIC
endif
ifeq ($(feature-eventfd), 1)
- CFLAGS += -DHAVE_EVENTFD
+ CFLAGS += -DHAVE_EVENTFD_SUPPORT
endif
ifeq ($(feature-get_current_dir_name), 1)
@@ -348,6 +366,10 @@ ifeq ($(feature-gettid), 1)
CFLAGS += -DHAVE_GETTID
endif
+ifeq ($(feature-file-handle), 1)
+ CFLAGS += -DHAVE_FILE_HANDLE
+endif
+
ifdef NO_LIBELF
NO_DWARF := 1
NO_DEMANGLE := 1
@@ -638,6 +660,7 @@ ifeq ($(NO_SYSCALL_TABLE),0)
$(call detected,CONFIG_TRACE)
else
ifndef NO_LIBAUDIT
+ $(call feature_check,libaudit)
ifneq ($(feature-libaudit), 1)
msg := $(warning No libaudit.h found, disables 'trace' tool, please install audit-libs-devel or libaudit-dev);
NO_LIBAUDIT := 1
@@ -999,6 +1022,19 @@ ifdef LIBCLANGLLVM
endif
endif
+ifdef LIBPFM4
+ $(call feature_check,libpfm4)
+ ifeq ($(feature-libpfm4), 1)
+ CFLAGS += -DHAVE_LIBPFM
+ EXTLIBS += -lpfm
+ ASCIIDOC_EXTRA = -aHAVE_LIBPFM=1
+ $(call detected,CONFIG_LIBPFM4)
+ else
+ msg := $(warning libpfm4 not found, disables libpfm4 support. Please install libpfm4-dev);
+ NO_LIBPFM4 := 1
+ endif
+endif
+
# Among the variables below, these:
# perfexecdir
# perf_include_dir
diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf
index 3eda9d4b88e7..86dbb51bb272 100644
--- a/tools/perf/Makefile.perf
+++ b/tools/perf/Makefile.perf
@@ -118,6 +118,12 @@ include ../scripts/utilities.mak
#
# Define LIBBPF_DYNAMIC to enable libbpf dynamic linking.
#
+# Define NO_SYSCALL_TABLE=1 to disable the use of syscall id to/from name tables
+# generated from the kernel .tbl or unistd.h files and use, if available, libaudit
+# for doing the conversions to/from strings/id.
+#
+# Define LIBPFM4 to enable libpfm4 events extension.
+#
# As per kernel Makefile, avoid funny character set dependencies
unexport LC_ALL
@@ -188,7 +194,7 @@ AWK = awk
# non-config cases
config := 1
-NON_CONFIG_TARGETS := clean python-clean TAGS tags cscope help install-doc install-man install-html install-info install-pdf doc man html info pdf
+NON_CONFIG_TARGETS := clean python-clean TAGS tags cscope help
ifdef MAKECMDGOALS
ifeq ($(filter-out $(NON_CONFIG_TARGETS),$(MAKECMDGOALS)),)
@@ -231,6 +237,7 @@ TRACE_EVENT_DIR = $(srctree)/tools/lib/traceevent/
BPF_DIR = $(srctree)/tools/lib/bpf/
SUBCMD_DIR = $(srctree)/tools/lib/subcmd/
LIBPERF_DIR = $(srctree)/tools/lib/perf/
+DOC_DIR = $(srctree)/tools/perf/Documentation/
# Set FEATURE_TESTS to 'all' so all possible feature checkers are executed.
# Without this setting the output feature dump file misses some features, for
@@ -277,6 +284,7 @@ strip-libs = $(filter-out -l%,$(1))
ifneq ($(OUTPUT),)
TE_PATH=$(OUTPUT)
+ PLUGINS_PATH=$(OUTPUT)
BPF_PATH=$(OUTPUT)
SUBCMD_PATH=$(OUTPUT)
LIBPERF_PATH=$(OUTPUT)
@@ -287,6 +295,7 @@ else
endif
else
TE_PATH=$(TRACE_EVENT_DIR)
+ PLUGINS_PATH=$(TRACE_EVENT_DIR)plugins/
API_PATH=$(LIB_DIR)
BPF_PATH=$(BPF_DIR)
SUBCMD_PATH=$(SUBCMD_DIR)
@@ -296,7 +305,7 @@ endif
LIBTRACEEVENT = $(TE_PATH)libtraceevent.a
export LIBTRACEEVENT
-LIBTRACEEVENT_DYNAMIC_LIST = $(TE_PATH)plugins/libtraceevent-dynamic-list
+LIBTRACEEVENT_DYNAMIC_LIST = $(PLUGINS_PATH)libtraceevent-dynamic-list
#
# The static build has no dynsym table, so this does not work for
@@ -573,7 +582,7 @@ arch_errno_hdr_dir := $(srctree)/tools
arch_errno_tbl := $(srctree)/tools/perf/trace/beauty/arch_errno_names.sh
$(arch_errno_name_array): $(arch_errno_tbl)
- $(Q)$(SHELL) '$(arch_errno_tbl)' $(CC) $(arch_errno_hdr_dir) > $@
+ $(Q)$(SHELL) '$(arch_errno_tbl)' $(firstword $(CC)) $(arch_errno_hdr_dir) > $@
sync_file_range_arrays := $(beauty_outdir)/sync_file_range_arrays.c
sync_file_range_tbls := $(srctree)/tools/perf/trace/beauty/sync_file_range.sh
@@ -755,10 +764,10 @@ $(LIBTRACEEVENT): FORCE
$(Q)$(MAKE) -C $(TRACE_EVENT_DIR) $(LIBTRACEEVENT_FLAGS) O=$(OUTPUT) $(OUTPUT)libtraceevent.a
libtraceevent_plugins: FORCE
- $(Q)$(MAKE) -C $(TRACE_EVENT_DIR) $(LIBTRACEEVENT_FLAGS) O=$(OUTPUT) plugins
+ $(Q)$(MAKE) -C $(TRACE_EVENT_DIR)plugins $(LIBTRACEEVENT_FLAGS) O=$(OUTPUT) plugins
$(LIBTRACEEVENT_DYNAMIC_LIST): libtraceevent_plugins
- $(Q)$(MAKE) -C $(TRACE_EVENT_DIR) $(LIBTRACEEVENT_FLAGS) O=$(OUTPUT) $(OUTPUT)plugins/libtraceevent-dynamic-list
+ $(Q)$(MAKE) -C $(TRACE_EVENT_DIR)plugins $(LIBTRACEEVENT_FLAGS) O=$(OUTPUT) $(OUTPUT)libtraceevent-dynamic-list
$(LIBTRACEEVENT)-clean:
$(call QUIET_CLEAN, libtraceevent)
@@ -792,7 +801,6 @@ $(LIBSUBCMD): FORCE
$(Q)$(MAKE) -C $(SUBCMD_DIR) O=$(OUTPUT) $(OUTPUT)libsubcmd.a
$(LIBSUBCMD)-clean:
- $(call QUIET_CLEAN, libsubcmd)
$(Q)$(MAKE) -C $(SUBCMD_DIR) O=$(OUTPUT) clean
help:
@@ -832,7 +840,7 @@ INSTALL_DOC_TARGETS += quick-install-doc quick-install-man quick-install-html
# 'make doc' should call 'make -C Documentation all'
$(DOC_TARGETS):
- $(QUIET_SUBDIR0)Documentation $(QUIET_SUBDIR1) $(@:doc=all)
+ $(Q)$(MAKE) -C $(DOC_DIR) O=$(OUTPUT) $(@:doc=all) ASCIIDOC_EXTRA=$(ASCIIDOC_EXTRA)
TAG_FOLDERS= . ../lib ../include
TAG_FILES= ../../include/uapi/linux/perf_event.h
@@ -959,7 +967,7 @@ install-python_ext:
# 'make install-doc' should call 'make -C Documentation install'
$(INSTALL_DOC_TARGETS):
- $(QUIET_SUBDIR0)Documentation $(QUIET_SUBDIR1) $(@:-doc=)
+ $(Q)$(MAKE) -C $(DOC_DIR) O=$(OUTPUT) $(@:-doc=) ASCIIDOC_EXTRA=$(ASCIIDOC_EXTRA)
### Cleaning rules
@@ -1008,7 +1016,8 @@ clean:: $(LIBTRACEEVENT)-clean $(LIBAPI)-clean $(LIBBPF)-clean $(LIBSUBCMD)-clea
$(OUTPUT)$(rename_flags_array) \
$(OUTPUT)$(arch_errno_name_array) \
$(OUTPUT)$(sync_file_range_arrays)
- $(QUIET_SUBDIR0)Documentation $(QUIET_SUBDIR1) clean
+ $(call QUIET_CLEAN, Documentation) \
+ $(MAKE) -C $(DOC_DIR) O=$(OUTPUT) clean >/dev/null
#
# To provide FEATURE-DUMP into $(FEATURE_DUMP_COPY)
diff --git a/tools/perf/arch/arm/util/cs-etm.c b/tools/perf/arch/arm/util/cs-etm.c
index 941f814820b8..cea5e33d61d2 100644
--- a/tools/perf/arch/arm/util/cs-etm.c
+++ b/tools/perf/arch/arm/util/cs-etm.c
@@ -23,6 +23,7 @@
#include "../../util/event.h"
#include "../../util/evlist.h"
#include "../../util/evsel.h"
+#include "../../util/perf_api_probe.h"
#include "../../util/evsel_config.h"
#include "../../util/pmu.h"
#include "../../util/cs-etm.h"
@@ -215,7 +216,7 @@ static int cs_etm_set_sink_attr(struct perf_pmu *pmu,
struct evsel *evsel)
{
char msg[BUFSIZ], path[PATH_MAX], *sink;
- struct perf_evsel_config_term *term;
+ struct evsel_config_term *term;
int ret = -EINVAL;
u32 hash;
@@ -223,7 +224,7 @@ static int cs_etm_set_sink_attr(struct perf_pmu *pmu,
return 0;
list_for_each_entry(term, &evsel->config_terms, list) {
- if (term->type != PERF_EVSEL__CONFIG_TERM_DRV_CFG)
+ if (term->type != EVSEL__CONFIG_TERM_DRV_CFG)
continue;
sink = term->val.str;
@@ -232,7 +233,7 @@ static int cs_etm_set_sink_attr(struct perf_pmu *pmu,
ret = perf_pmu__scan_file(pmu, path, "%x", &hash);
if (ret != 1) {
pr_err("failed to set sink \"%s\" on event %s with %d (%s)\n",
- sink, perf_evsel__name(evsel), errno,
+ sink, evsel__name(evsel), errno,
str_error_r(errno, msg, sizeof(msg)));
return ret;
}
@@ -264,7 +265,8 @@ static int cs_etm_recording_options(struct auxtrace_record *itr,
ptr->evlist = evlist;
ptr->snapshot_mode = opts->auxtrace_snapshot_mode;
- if (perf_can_record_switch_events())
+ if (!record_opts__no_switch_events(opts) &&
+ perf_can_record_switch_events())
opts->record_switch_events = true;
evlist__for_each_entry(evlist, evsel) {
@@ -401,7 +403,7 @@ static int cs_etm_recording_options(struct auxtrace_record *itr,
* when a context switch happened.
*/
if (!perf_cpu_map__empty(cpus)) {
- perf_evsel__set_sample_bit(cs_etm_evsel, CPU);
+ evsel__set_sample_bit(cs_etm_evsel, CPU);
err = cs_etm_set_option(itr, cs_etm_evsel,
ETM_OPT_CTXTID | ETM_OPT_TS);
@@ -425,7 +427,7 @@ static int cs_etm_recording_options(struct auxtrace_record *itr,
/* In per-cpu case, always need the time of mmap events etc */
if (!perf_cpu_map__empty(cpus))
- perf_evsel__set_sample_bit(tracking_evsel, TIME);
+ evsel__set_sample_bit(tracking_evsel, TIME);
}
out:
diff --git a/tools/perf/arch/arm64/util/Build b/tools/perf/arch/arm64/util/Build
index 0a7782c61209..5c13438c7bd4 100644
--- a/tools/perf/arch/arm64/util/Build
+++ b/tools/perf/arch/arm64/util/Build
@@ -1,6 +1,6 @@
perf-y += header.o
+perf-y += machine.o
perf-y += perf_regs.o
-perf-y += sym-handling.o
perf-$(CONFIG_DWARF) += dwarf-regs.o
perf-$(CONFIG_LOCAL_LIBUNWIND) += unwind-libunwind.o
perf-$(CONFIG_LIBDW_DWARF_UNWIND) += unwind-libdw.o
diff --git a/tools/perf/arch/arm64/util/arm-spe.c b/tools/perf/arch/arm64/util/arm-spe.c
index 27653be24447..e3593063b3d1 100644
--- a/tools/perf/arch/arm64/util/arm-spe.c
+++ b/tools/perf/arch/arm64/util/arm-spe.c
@@ -120,9 +120,9 @@ static int arm_spe_recording_options(struct auxtrace_record *itr,
*/
perf_evlist__to_front(evlist, arm_spe_evsel);
- perf_evsel__set_sample_bit(arm_spe_evsel, CPU);
- perf_evsel__set_sample_bit(arm_spe_evsel, TIME);
- perf_evsel__set_sample_bit(arm_spe_evsel, TID);
+ evsel__set_sample_bit(arm_spe_evsel, CPU);
+ evsel__set_sample_bit(arm_spe_evsel, TIME);
+ evsel__set_sample_bit(arm_spe_evsel, TID);
/* Add dummy event to keep tracking */
err = parse_events(evlist, "dummy:u", NULL);
@@ -134,9 +134,9 @@ static int arm_spe_recording_options(struct auxtrace_record *itr,
tracking_evsel->core.attr.freq = 0;
tracking_evsel->core.attr.sample_period = 1;
- perf_evsel__set_sample_bit(tracking_evsel, TIME);
- perf_evsel__set_sample_bit(tracking_evsel, CPU);
- perf_evsel__reset_sample_bit(tracking_evsel, BRANCH_STACK);
+ evsel__set_sample_bit(tracking_evsel, TIME);
+ evsel__set_sample_bit(tracking_evsel, CPU);
+ evsel__reset_sample_bit(tracking_evsel, BRANCH_STACK);
return 0;
}
diff --git a/tools/perf/arch/arm64/util/machine.c b/tools/perf/arch/arm64/util/machine.c
new file mode 100644
index 000000000000..d41b27e781d3
--- /dev/null
+++ b/tools/perf/arch/arm64/util/machine.c
@@ -0,0 +1,27 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <stdio.h>
+#include <string.h>
+#include "debug.h"
+#include "symbol.h"
+
+/* On arm64, kernel text segment start at high memory address,
+ * for example 0xffff 0000 8xxx xxxx. Modules start at a low memory
+ * address, like 0xffff 0000 00ax xxxx. When only samll amount of
+ * memory is used by modules, gap between end of module's text segment
+ * and start of kernel text segment may be reach 2G.
+ * Therefore do not fill this gap and do not assign it to the kernel dso map.
+ */
+
+#define SYMBOL_LIMIT (1 << 12) /* 4K */
+
+void arch__symbols__fixup_end(struct symbol *p, struct symbol *c)
+{
+ if ((strchr(p->name, '[') && strchr(c->name, '[') == NULL) ||
+ (strchr(p->name, '[') == NULL && strchr(c->name, '[')))
+ /* Limit range of last symbol in module and kernel */
+ p->end += SYMBOL_LIMIT;
+ else
+ p->end = c->start;
+ pr_debug4("%s sym:%s end:%#lx\n", __func__, p->name, p->end);
+}
diff --git a/tools/perf/arch/arm64/util/sym-handling.c b/tools/perf/arch/arm64/util/sym-handling.c
deleted file mode 100644
index 8dfa3e5229f1..000000000000
--- a/tools/perf/arch/arm64/util/sym-handling.c
+++ /dev/null
@@ -1,19 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- *
- * Copyright (C) 2015 Naveen N. Rao, IBM Corporation
- */
-
-#include "symbol.h" // for the elf__needs_adjust_symbols() prototype
-#include <stdbool.h>
-
-#ifdef HAVE_LIBELF_SUPPORT
-#include <gelf.h>
-
-bool elf__needs_adjust_symbols(GElf_Ehdr ehdr)
-{
- return ehdr.e_type == ET_EXEC ||
- ehdr.e_type == ET_REL ||
- ehdr.e_type == ET_DYN;
-}
-#endif
diff --git a/tools/perf/arch/arm64/util/unwind-libdw.c b/tools/perf/arch/arm64/util/unwind-libdw.c
index 7623d85e77f3..a50941629649 100644
--- a/tools/perf/arch/arm64/util/unwind-libdw.c
+++ b/tools/perf/arch/arm64/util/unwind-libdw.c
@@ -1,8 +1,8 @@
// SPDX-License-Identifier: GPL-2.0
#include <elfutils/libdwfl.h>
-#include "../../util/unwind-libdw.h"
-#include "../../util/perf_regs.h"
-#include "../../util/event.h"
+#include "../../../util/unwind-libdw.h"
+#include "../../../util/perf_regs.h"
+#include "../../../util/event.h"
bool libdw__arch_set_initial_registers(Dwfl_Thread *thread, void *arg)
{
diff --git a/tools/perf/arch/powerpc/util/Build b/tools/perf/arch/powerpc/util/Build
index 7cf0b8803097..e86e210bf514 100644
--- a/tools/perf/arch/powerpc/util/Build
+++ b/tools/perf/arch/powerpc/util/Build
@@ -1,8 +1,8 @@
perf-y += header.o
-perf-y += sym-handling.o
perf-y += kvm-stat.o
perf-y += perf_regs.o
perf-y += mem-events.o
+perf-y += sym-handling.o
perf-$(CONFIG_DWARF) += dwarf-regs.o
perf-$(CONFIG_DWARF) += skip-callchain-idx.o
diff --git a/tools/perf/arch/powerpc/util/header.c b/tools/perf/arch/powerpc/util/header.c
index 3b4cdfc5efd6..d4870074f14c 100644
--- a/tools/perf/arch/powerpc/util/header.c
+++ b/tools/perf/arch/powerpc/util/header.c
@@ -7,6 +7,8 @@
#include <string.h>
#include <linux/stringify.h>
#include "header.h"
+#include "metricgroup.h"
+#include <api/fs/fs.h>
#define mfspr(rn) ({unsigned long rval; \
asm volatile("mfspr %0," __stringify(rn) \
@@ -44,3 +46,9 @@ get_cpuid_str(struct perf_pmu *pmu __maybe_unused)
return bufp;
}
+
+int arch_get_runtimeparam(void)
+{
+ int count;
+ return sysfs__read_int("/devices/hv_24x7/interface/sockets", &count) < 0 ? 1 : count;
+}
diff --git a/tools/perf/arch/powerpc/util/kvm-stat.c b/tools/perf/arch/powerpc/util/kvm-stat.c
index 16807269317c..eed9e5a42935 100644
--- a/tools/perf/arch/powerpc/util/kvm-stat.c
+++ b/tools/perf/arch/powerpc/util/kvm-stat.c
@@ -39,7 +39,7 @@ static void hcall_event_get_key(struct evsel *evsel,
struct event_key *key)
{
key->info = 0;
- key->key = perf_evsel__intval(evsel, sample, "req");
+ key->key = evsel__intval(evsel, sample, "req");
}
static const char *get_hcall_exit_reason(u64 exit_code)
diff --git a/tools/perf/arch/powerpc/util/sym-handling.c b/tools/perf/arch/powerpc/util/sym-handling.c
index abb7a12d8f93..0856b32f9e08 100644
--- a/tools/perf/arch/powerpc/util/sym-handling.c
+++ b/tools/perf/arch/powerpc/util/sym-handling.c
@@ -10,16 +10,6 @@
#include "probe-event.h"
#include "probe-file.h"
-#ifdef HAVE_LIBELF_SUPPORT
-bool elf__needs_adjust_symbols(GElf_Ehdr ehdr)
-{
- return ehdr.e_type == ET_EXEC ||
- ehdr.e_type == ET_REL ||
- ehdr.e_type == ET_DYN;
-}
-
-#endif
-
int arch__choose_best_symbol(struct symbol *syma,
struct symbol *symb __maybe_unused)
{
diff --git a/tools/perf/arch/powerpc/util/unwind-libdw.c b/tools/perf/arch/powerpc/util/unwind-libdw.c
index abf2dbc7f829..7b2d96ec28e3 100644
--- a/tools/perf/arch/powerpc/util/unwind-libdw.c
+++ b/tools/perf/arch/powerpc/util/unwind-libdw.c
@@ -1,9 +1,9 @@
// SPDX-License-Identifier: GPL-2.0
#include <elfutils/libdwfl.h>
#include <linux/kernel.h>
-#include "../../util/unwind-libdw.h"
-#include "../../util/perf_regs.h"
-#include "../../util/event.h"
+#include "../../../util/unwind-libdw.h"
+#include "../../../util/perf_regs.h"
+#include "../../../util/event.h"
/* See backends/ppc_initreg.c and backends/ppc_regs.c in elfutils. */
static const int special_regs[3][2] = {
diff --git a/tools/perf/arch/s390/util/kvm-stat.c b/tools/perf/arch/s390/util/kvm-stat.c
index 0fd4e9f49ed0..34da89ced29a 100644
--- a/tools/perf/arch/s390/util/kvm-stat.c
+++ b/tools/perf/arch/s390/util/kvm-stat.c
@@ -30,7 +30,7 @@ static void event_icpt_insn_get_key(struct evsel *evsel,
{
unsigned long insn;
- insn = perf_evsel__intval(evsel, sample, "instruction");
+ insn = evsel__intval(evsel, sample, "instruction");
key->key = icpt_insn_decoder(insn);
key->exit_reasons = sie_icpt_insn_codes;
}
@@ -39,7 +39,7 @@ static void event_sigp_get_key(struct evsel *evsel,
struct perf_sample *sample,
struct event_key *key)
{
- key->key = perf_evsel__intval(evsel, sample, "order_code");
+ key->key = evsel__intval(evsel, sample, "order_code");
key->exit_reasons = sie_sigp_order_codes;
}
@@ -47,7 +47,7 @@ static void event_diag_get_key(struct evsel *evsel,
struct perf_sample *sample,
struct event_key *key)
{
- key->key = perf_evsel__intval(evsel, sample, "code");
+ key->key = evsel__intval(evsel, sample, "code");
key->exit_reasons = sie_diagnose_codes;
}
@@ -55,7 +55,7 @@ static void event_icpt_prog_get_key(struct evsel *evsel,
struct perf_sample *sample,
struct event_key *key)
{
- key->key = perf_evsel__intval(evsel, sample, "code");
+ key->key = evsel__intval(evsel, sample, "code");
key->exit_reasons = sie_icpt_prog_codes;
}
diff --git a/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl b/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl
index 44d510bc9b78..37b844f839bc 100644
--- a/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl
+++ b/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl
@@ -8,357 +8,357 @@
#
# The abi is "common", "64" or "x32" for this file.
#
-0 common read __x64_sys_read
-1 common write __x64_sys_write
-2 common open __x64_sys_open
-3 common close __x64_sys_close
-4 common stat __x64_sys_newstat
-5 common fstat __x64_sys_newfstat
-6 common lstat __x64_sys_newlstat
-7 common poll __x64_sys_poll
-8 common lseek __x64_sys_lseek
-9 common mmap __x64_sys_mmap
-10 common mprotect __x64_sys_mprotect
-11 common munmap __x64_sys_munmap
-12 common brk __x64_sys_brk
-13 64 rt_sigaction __x64_sys_rt_sigaction
-14 common rt_sigprocmask __x64_sys_rt_sigprocmask
-15 64 rt_sigreturn __x64_sys_rt_sigreturn/ptregs
-16 64 ioctl __x64_sys_ioctl
-17 common pread64 __x64_sys_pread64
-18 common pwrite64 __x64_sys_pwrite64
-19 64 readv __x64_sys_readv
-20 64 writev __x64_sys_writev
-21 common access __x64_sys_access
-22 common pipe __x64_sys_pipe
-23 common select __x64_sys_select
-24 common sched_yield __x64_sys_sched_yield
-25 common mremap __x64_sys_mremap
-26 common msync __x64_sys_msync
-27 common mincore __x64_sys_mincore
-28 common madvise __x64_sys_madvise
-29 common shmget __x64_sys_shmget
-30 common shmat __x64_sys_shmat
-31 common shmctl __x64_sys_shmctl
-32 common dup __x64_sys_dup
-33 common dup2 __x64_sys_dup2
-34 common pause __x64_sys_pause
-35 common nanosleep __x64_sys_nanosleep
-36 common getitimer __x64_sys_getitimer
-37 common alarm __x64_sys_alarm
-38 common setitimer __x64_sys_setitimer
-39 common getpid __x64_sys_getpid
-40 common sendfile __x64_sys_sendfile64
-41 common socket __x64_sys_socket
-42 common connect __x64_sys_connect
-43 common accept __x64_sys_accept
-44 common sendto __x64_sys_sendto
-45 64 recvfrom __x64_sys_recvfrom
-46 64 sendmsg __x64_sys_sendmsg
-47 64 recvmsg __x64_sys_recvmsg
-48 common shutdown __x64_sys_shutdown
-49 common bind __x64_sys_bind
-50 common listen __x64_sys_listen
-51 common getsockname __x64_sys_getsockname
-52 common getpeername __x64_sys_getpeername
-53 common socketpair __x64_sys_socketpair
-54 64 setsockopt __x64_sys_setsockopt
-55 64 getsockopt __x64_sys_getsockopt
-56 common clone __x64_sys_clone/ptregs
-57 common fork __x64_sys_fork/ptregs
-58 common vfork __x64_sys_vfork/ptregs
-59 64 execve __x64_sys_execve/ptregs
-60 common exit __x64_sys_exit
-61 common wait4 __x64_sys_wait4
-62 common kill __x64_sys_kill
-63 common uname __x64_sys_newuname
-64 common semget __x64_sys_semget
-65 common semop __x64_sys_semop
-66 common semctl __x64_sys_semctl
-67 common shmdt __x64_sys_shmdt
-68 common msgget __x64_sys_msgget
-69 common msgsnd __x64_sys_msgsnd
-70 common msgrcv __x64_sys_msgrcv
-71 common msgctl __x64_sys_msgctl
-72 common fcntl __x64_sys_fcntl
-73 common flock __x64_sys_flock
-74 common fsync __x64_sys_fsync
-75 common fdatasync __x64_sys_fdatasync
-76 common truncate __x64_sys_truncate
-77 common ftruncate __x64_sys_ftruncate
-78 common getdents __x64_sys_getdents
-79 common getcwd __x64_sys_getcwd
-80 common chdir __x64_sys_chdir
-81 common fchdir __x64_sys_fchdir
-82 common rename __x64_sys_rename
-83 common mkdir __x64_sys_mkdir
-84 common rmdir __x64_sys_rmdir
-85 common creat __x64_sys_creat
-86 common link __x64_sys_link
-87 common unlink __x64_sys_unlink
-88 common symlink __x64_sys_symlink
-89 common readlink __x64_sys_readlink
-90 common chmod __x64_sys_chmod
-91 common fchmod __x64_sys_fchmod
-92 common chown __x64_sys_chown
-93 common fchown __x64_sys_fchown
-94 common lchown __x64_sys_lchown
-95 common umask __x64_sys_umask
-96 common gettimeofday __x64_sys_gettimeofday
-97 common getrlimit __x64_sys_getrlimit
-98 common getrusage __x64_sys_getrusage
-99 common sysinfo __x64_sys_sysinfo
-100 common times __x64_sys_times
-101 64 ptrace __x64_sys_ptrace
-102 common getuid __x64_sys_getuid
-103 common syslog __x64_sys_syslog
-104 common getgid __x64_sys_getgid
-105 common setuid __x64_sys_setuid
-106 common setgid __x64_sys_setgid
-107 common geteuid __x64_sys_geteuid
-108 common getegid __x64_sys_getegid
-109 common setpgid __x64_sys_setpgid
-110 common getppid __x64_sys_getppid
-111 common getpgrp __x64_sys_getpgrp
-112 common setsid __x64_sys_setsid
-113 common setreuid __x64_sys_setreuid
-114 common setregid __x64_sys_setregid
-115 common getgroups __x64_sys_getgroups
-116 common setgroups __x64_sys_setgroups
-117 common setresuid __x64_sys_setresuid
-118 common getresuid __x64_sys_getresuid
-119 common setresgid __x64_sys_setresgid
-120 common getresgid __x64_sys_getresgid
-121 common getpgid __x64_sys_getpgid
-122 common setfsuid __x64_sys_setfsuid
-123 common setfsgid __x64_sys_setfsgid
-124 common getsid __x64_sys_getsid
-125 common capget __x64_sys_capget
-126 common capset __x64_sys_capset
-127 64 rt_sigpending __x64_sys_rt_sigpending
-128 64 rt_sigtimedwait __x64_sys_rt_sigtimedwait
-129 64 rt_sigqueueinfo __x64_sys_rt_sigqueueinfo
-130 common rt_sigsuspend __x64_sys_rt_sigsuspend
-131 64 sigaltstack __x64_sys_sigaltstack
-132 common utime __x64_sys_utime
-133 common mknod __x64_sys_mknod
+0 common read sys_read
+1 common write sys_write
+2 common open sys_open
+3 common close sys_close
+4 common stat sys_newstat
+5 common fstat sys_newfstat
+6 common lstat sys_newlstat
+7 common poll sys_poll
+8 common lseek sys_lseek
+9 common mmap sys_mmap
+10 common mprotect sys_mprotect
+11 common munmap sys_munmap
+12 common brk sys_brk
+13 64 rt_sigaction sys_rt_sigaction
+14 common rt_sigprocmask sys_rt_sigprocmask
+15 64 rt_sigreturn sys_rt_sigreturn
+16 64 ioctl sys_ioctl
+17 common pread64 sys_pread64
+18 common pwrite64 sys_pwrite64
+19 64 readv sys_readv
+20 64 writev sys_writev
+21 common access sys_access
+22 common pipe sys_pipe
+23 common select sys_select
+24 common sched_yield sys_sched_yield
+25 common mremap sys_mremap
+26 common msync sys_msync
+27 common mincore sys_mincore
+28 common madvise sys_madvise
+29 common shmget sys_shmget
+30 common shmat sys_shmat
+31 common shmctl sys_shmctl
+32 common dup sys_dup
+33 common dup2 sys_dup2
+34 common pause sys_pause
+35 common nanosleep sys_nanosleep
+36 common getitimer sys_getitimer
+37 common alarm sys_alarm
+38 common setitimer sys_setitimer
+39 common getpid sys_getpid
+40 common sendfile sys_sendfile64
+41 common socket sys_socket
+42 common connect sys_connect
+43 common accept sys_accept
+44 common sendto sys_sendto
+45 64 recvfrom sys_recvfrom
+46 64 sendmsg sys_sendmsg
+47 64 recvmsg sys_recvmsg
+48 common shutdown sys_shutdown
+49 common bind sys_bind
+50 common listen sys_listen
+51 common getsockname sys_getsockname
+52 common getpeername sys_getpeername
+53 common socketpair sys_socketpair
+54 64 setsockopt sys_setsockopt
+55 64 getsockopt sys_getsockopt
+56 common clone sys_clone
+57 common fork sys_fork
+58 common vfork sys_vfork
+59 64 execve sys_execve
+60 common exit sys_exit
+61 common wait4 sys_wait4
+62 common kill sys_kill
+63 common uname sys_newuname
+64 common semget sys_semget
+65 common semop sys_semop
+66 common semctl sys_semctl
+67 common shmdt sys_shmdt
+68 common msgget sys_msgget
+69 common msgsnd sys_msgsnd
+70 common msgrcv sys_msgrcv
+71 common msgctl sys_msgctl
+72 common fcntl sys_fcntl
+73 common flock sys_flock
+74 common fsync sys_fsync
+75 common fdatasync sys_fdatasync
+76 common truncate sys_truncate
+77 common ftruncate sys_ftruncate
+78 common getdents sys_getdents
+79 common getcwd sys_getcwd
+80 common chdir sys_chdir
+81 common fchdir sys_fchdir
+82 common rename sys_rename
+83 common mkdir sys_mkdir
+84 common rmdir sys_rmdir
+85 common creat sys_creat
+86 common link sys_link
+87 common unlink sys_unlink
+88 common symlink sys_symlink
+89 common readlink sys_readlink
+90 common chmod sys_chmod
+91 common fchmod sys_fchmod
+92 common chown sys_chown
+93 common fchown sys_fchown
+94 common lchown sys_lchown
+95 common umask sys_umask
+96 common gettimeofday sys_gettimeofday
+97 common getrlimit sys_getrlimit
+98 common getrusage sys_getrusage
+99 common sysinfo sys_sysinfo
+100 common times sys_times
+101 64 ptrace sys_ptrace
+102 common getuid sys_getuid
+103 common syslog sys_syslog
+104 common getgid sys_getgid
+105 common setuid sys_setuid
+106 common setgid sys_setgid
+107 common geteuid sys_geteuid
+108 common getegid sys_getegid
+109 common setpgid sys_setpgid
+110 common getppid sys_getppid
+111 common getpgrp sys_getpgrp
+112 common setsid sys_setsid
+113 common setreuid sys_setreuid
+114 common setregid sys_setregid
+115 common getgroups sys_getgroups
+116 common setgroups sys_setgroups
+117 common setresuid sys_setresuid
+118 common getresuid sys_getresuid
+119 common setresgid sys_setresgid
+120 common getresgid sys_getresgid
+121 common getpgid sys_getpgid
+122 common setfsuid sys_setfsuid
+123 common setfsgid sys_setfsgid
+124 common getsid sys_getsid
+125 common capget sys_capget
+126 common capset sys_capset
+127 64 rt_sigpending sys_rt_sigpending
+128 64 rt_sigtimedwait sys_rt_sigtimedwait
+129 64 rt_sigqueueinfo sys_rt_sigqueueinfo
+130 common rt_sigsuspend sys_rt_sigsuspend
+131 64 sigaltstack sys_sigaltstack
+132 common utime sys_utime
+133 common mknod sys_mknod
134 64 uselib
-135 common personality __x64_sys_personality
-136 common ustat __x64_sys_ustat
-137 common statfs __x64_sys_statfs
-138 common fstatfs __x64_sys_fstatfs
-139 common sysfs __x64_sys_sysfs
-140 common getpriority __x64_sys_getpriority
-141 common setpriority __x64_sys_setpriority
-142 common sched_setparam __x64_sys_sched_setparam
-143 common sched_getparam __x64_sys_sched_getparam
-144 common sched_setscheduler __x64_sys_sched_setscheduler
-145 common sched_getscheduler __x64_sys_sched_getscheduler
-146 common sched_get_priority_max __x64_sys_sched_get_priority_max
-147 common sched_get_priority_min __x64_sys_sched_get_priority_min
-148 common sched_rr_get_interval __x64_sys_sched_rr_get_interval
-149 common mlock __x64_sys_mlock
-150 common munlock __x64_sys_munlock
-151 common mlockall __x64_sys_mlockall
-152 common munlockall __x64_sys_munlockall
-153 common vhangup __x64_sys_vhangup
-154 common modify_ldt __x64_sys_modify_ldt
-155 common pivot_root __x64_sys_pivot_root
-156 64 _sysctl __x64_sys_sysctl
-157 common prctl __x64_sys_prctl
-158 common arch_prctl __x64_sys_arch_prctl
-159 common adjtimex __x64_sys_adjtimex
-160 common setrlimit __x64_sys_setrlimit
-161 common chroot __x64_sys_chroot
-162 common sync __x64_sys_sync
-163 common acct __x64_sys_acct
-164 common settimeofday __x64_sys_settimeofday
-165 common mount __x64_sys_mount
-166 common umount2 __x64_sys_umount
-167 common swapon __x64_sys_swapon
-168 common swapoff __x64_sys_swapoff
-169 common reboot __x64_sys_reboot
-170 common sethostname __x64_sys_sethostname
-171 common setdomainname __x64_sys_setdomainname
-172 common iopl __x64_sys_iopl/ptregs
-173 common ioperm __x64_sys_ioperm
+135 common personality sys_personality
+136 common ustat sys_ustat
+137 common statfs sys_statfs
+138 common fstatfs sys_fstatfs
+139 common sysfs sys_sysfs
+140 common getpriority sys_getpriority
+141 common setpriority sys_setpriority
+142 common sched_setparam sys_sched_setparam
+143 common sched_getparam sys_sched_getparam
+144 common sched_setscheduler sys_sched_setscheduler
+145 common sched_getscheduler sys_sched_getscheduler
+146 common sched_get_priority_max sys_sched_get_priority_max
+147 common sched_get_priority_min sys_sched_get_priority_min
+148 common sched_rr_get_interval sys_sched_rr_get_interval
+149 common mlock sys_mlock
+150 common munlock sys_munlock
+151 common mlockall sys_mlockall
+152 common munlockall sys_munlockall
+153 common vhangup sys_vhangup
+154 common modify_ldt sys_modify_ldt
+155 common pivot_root sys_pivot_root
+156 64 _sysctl sys_sysctl
+157 common prctl sys_prctl
+158 common arch_prctl sys_arch_prctl
+159 common adjtimex sys_adjtimex
+160 common setrlimit sys_setrlimit
+161 common chroot sys_chroot
+162 common sync sys_sync
+163 common acct sys_acct
+164 common settimeofday sys_settimeofday
+165 common mount sys_mount
+166 common umount2 sys_umount
+167 common swapon sys_swapon
+168 common swapoff sys_swapoff
+169 common reboot sys_reboot
+170 common sethostname sys_sethostname
+171 common setdomainname sys_setdomainname
+172 common iopl sys_iopl
+173 common ioperm sys_ioperm
174 64 create_module
-175 common init_module __x64_sys_init_module
-176 common delete_module __x64_sys_delete_module
+175 common init_module sys_init_module
+176 common delete_module sys_delete_module
177 64 get_kernel_syms
178 64 query_module
-179 common quotactl __x64_sys_quotactl
+179 common quotactl sys_quotactl
180 64 nfsservctl
181 common getpmsg
182 common putpmsg
183 common afs_syscall
184 common tuxcall
185 common security
-186 common gettid __x64_sys_gettid
-187 common readahead __x64_sys_readahead
-188 common setxattr __x64_sys_setxattr
-189 common lsetxattr __x64_sys_lsetxattr
-190 common fsetxattr __x64_sys_fsetxattr
-191 common getxattr __x64_sys_getxattr
-192 common lgetxattr __x64_sys_lgetxattr
-193 common fgetxattr __x64_sys_fgetxattr
-194 common listxattr __x64_sys_listxattr
-195 common llistxattr __x64_sys_llistxattr
-196 common flistxattr __x64_sys_flistxattr
-197 common removexattr __x64_sys_removexattr
-198 common lremovexattr __x64_sys_lremovexattr
-199 common fremovexattr __x64_sys_fremovexattr
-200 common tkill __x64_sys_tkill
-201 common time __x64_sys_time
-202 common futex __x64_sys_futex
-203 common sched_setaffinity __x64_sys_sched_setaffinity
-204 common sched_getaffinity __x64_sys_sched_getaffinity
+186 common gettid sys_gettid
+187 common readahead sys_readahead
+188 common setxattr sys_setxattr
+189 common lsetxattr sys_lsetxattr
+190 common fsetxattr sys_fsetxattr
+191 common getxattr sys_getxattr
+192 common lgetxattr sys_lgetxattr
+193 common fgetxattr sys_fgetxattr
+194 common listxattr sys_listxattr
+195 common llistxattr sys_llistxattr
+196 common flistxattr sys_flistxattr
+197 common removexattr sys_removexattr
+198 common lremovexattr sys_lremovexattr
+199 common fremovexattr sys_fremovexattr
+200 common tkill sys_tkill
+201 common time sys_time
+202 common futex sys_futex
+203 common sched_setaffinity sys_sched_setaffinity
+204 common sched_getaffinity sys_sched_getaffinity
205 64 set_thread_area
-206 64 io_setup __x64_sys_io_setup
-207 common io_destroy __x64_sys_io_destroy
-208 common io_getevents __x64_sys_io_getevents
-209 64 io_submit __x64_sys_io_submit
-210 common io_cancel __x64_sys_io_cancel
+206 64 io_setup sys_io_setup
+207 common io_destroy sys_io_destroy
+208 common io_getevents sys_io_getevents
+209 64 io_submit sys_io_submit
+210 common io_cancel sys_io_cancel
211 64 get_thread_area
-212 common lookup_dcookie __x64_sys_lookup_dcookie
-213 common epoll_create __x64_sys_epoll_create
+212 common lookup_dcookie sys_lookup_dcookie
+213 common epoll_create sys_epoll_create
214 64 epoll_ctl_old
215 64 epoll_wait_old
-216 common remap_file_pages __x64_sys_remap_file_pages
-217 common getdents64 __x64_sys_getdents64
-218 common set_tid_address __x64_sys_set_tid_address
-219 common restart_syscall __x64_sys_restart_syscall
-220 common semtimedop __x64_sys_semtimedop
-221 common fadvise64 __x64_sys_fadvise64
-222 64 timer_create __x64_sys_timer_create
-223 common timer_settime __x64_sys_timer_settime
-224 common timer_gettime __x64_sys_timer_gettime
-225 common timer_getoverrun __x64_sys_timer_getoverrun
-226 common timer_delete __x64_sys_timer_delete
-227 common clock_settime __x64_sys_clock_settime
-228 common clock_gettime __x64_sys_clock_gettime
-229 common clock_getres __x64_sys_clock_getres
-230 common clock_nanosleep __x64_sys_clock_nanosleep
-231 common exit_group __x64_sys_exit_group
-232 common epoll_wait __x64_sys_epoll_wait
-233 common epoll_ctl __x64_sys_epoll_ctl
-234 common tgkill __x64_sys_tgkill
-235 common utimes __x64_sys_utimes
+216 common remap_file_pages sys_remap_file_pages
+217 common getdents64 sys_getdents64
+218 common set_tid_address sys_set_tid_address
+219 common restart_syscall sys_restart_syscall
+220 common semtimedop sys_semtimedop
+221 common fadvise64 sys_fadvise64
+222 64 timer_create sys_timer_create
+223 common timer_settime sys_timer_settime
+224 common timer_gettime sys_timer_gettime
+225 common timer_getoverrun sys_timer_getoverrun
+226 common timer_delete sys_timer_delete
+227 common clock_settime sys_clock_settime
+228 common clock_gettime sys_clock_gettime
+229 common clock_getres sys_clock_getres
+230 common clock_nanosleep sys_clock_nanosleep
+231 common exit_group sys_exit_group
+232 common epoll_wait sys_epoll_wait
+233 common epoll_ctl sys_epoll_ctl
+234 common tgkill sys_tgkill
+235 common utimes sys_utimes
236 64 vserver
-237 common mbind __x64_sys_mbind
-238 common set_mempolicy __x64_sys_set_mempolicy
-239 common get_mempolicy __x64_sys_get_mempolicy
-240 common mq_open __x64_sys_mq_open
-241 common mq_unlink __x64_sys_mq_unlink
-242 common mq_timedsend __x64_sys_mq_timedsend
-243 common mq_timedreceive __x64_sys_mq_timedreceive
-244 64 mq_notify __x64_sys_mq_notify
-245 common mq_getsetattr __x64_sys_mq_getsetattr
-246 64 kexec_load __x64_sys_kexec_load
-247 64 waitid __x64_sys_waitid
-248 common add_key __x64_sys_add_key
-249 common request_key __x64_sys_request_key
-250 common keyctl __x64_sys_keyctl
-251 common ioprio_set __x64_sys_ioprio_set
-252 common ioprio_get __x64_sys_ioprio_get
-253 common inotify_init __x64_sys_inotify_init
-254 common inotify_add_watch __x64_sys_inotify_add_watch
-255 common inotify_rm_watch __x64_sys_inotify_rm_watch
-256 common migrate_pages __x64_sys_migrate_pages
-257 common openat __x64_sys_openat
-258 common mkdirat __x64_sys_mkdirat
-259 common mknodat __x64_sys_mknodat
-260 common fchownat __x64_sys_fchownat
-261 common futimesat __x64_sys_futimesat
-262 common newfstatat __x64_sys_newfstatat
-263 common unlinkat __x64_sys_unlinkat
-264 common renameat __x64_sys_renameat
-265 common linkat __x64_sys_linkat
-266 common symlinkat __x64_sys_symlinkat
-267 common readlinkat __x64_sys_readlinkat
-268 common fchmodat __x64_sys_fchmodat
-269 common faccessat __x64_sys_faccessat
-270 common pselect6 __x64_sys_pselect6
-271 common ppoll __x64_sys_ppoll
-272 common unshare __x64_sys_unshare
-273 64 set_robust_list __x64_sys_set_robust_list
-274 64 get_robust_list __x64_sys_get_robust_list
-275 common splice __x64_sys_splice
-276 common tee __x64_sys_tee
-277 common sync_file_range __x64_sys_sync_file_range
-278 64 vmsplice __x64_sys_vmsplice
-279 64 move_pages __x64_sys_move_pages
-280 common utimensat __x64_sys_utimensat
-281 common epoll_pwait __x64_sys_epoll_pwait
-282 common signalfd __x64_sys_signalfd
-283 common timerfd_create __x64_sys_timerfd_create
-284 common eventfd __x64_sys_eventfd
-285 common fallocate __x64_sys_fallocate
-286 common timerfd_settime __x64_sys_timerfd_settime
-287 common timerfd_gettime __x64_sys_timerfd_gettime
-288 common accept4 __x64_sys_accept4
-289 common signalfd4 __x64_sys_signalfd4
-290 common eventfd2 __x64_sys_eventfd2
-291 common epoll_create1 __x64_sys_epoll_create1
-292 common dup3 __x64_sys_dup3
-293 common pipe2 __x64_sys_pipe2
-294 common inotify_init1 __x64_sys_inotify_init1
-295 64 preadv __x64_sys_preadv
-296 64 pwritev __x64_sys_pwritev
-297 64 rt_tgsigqueueinfo __x64_sys_rt_tgsigqueueinfo
-298 common perf_event_open __x64_sys_perf_event_open
-299 64 recvmmsg __x64_sys_recvmmsg
-300 common fanotify_init __x64_sys_fanotify_init
-301 common fanotify_mark __x64_sys_fanotify_mark
-302 common prlimit64 __x64_sys_prlimit64
-303 common name_to_handle_at __x64_sys_name_to_handle_at
-304 common open_by_handle_at __x64_sys_open_by_handle_at
-305 common clock_adjtime __x64_sys_clock_adjtime
-306 common syncfs __x64_sys_syncfs
-307 64 sendmmsg __x64_sys_sendmmsg
-308 common setns __x64_sys_setns
-309 common getcpu __x64_sys_getcpu
-310 64 process_vm_readv __x64_sys_process_vm_readv
-311 64 process_vm_writev __x64_sys_process_vm_writev
-312 common kcmp __x64_sys_kcmp
-313 common finit_module __x64_sys_finit_module
-314 common sched_setattr __x64_sys_sched_setattr
-315 common sched_getattr __x64_sys_sched_getattr
-316 common renameat2 __x64_sys_renameat2
-317 common seccomp __x64_sys_seccomp
-318 common getrandom __x64_sys_getrandom
-319 common memfd_create __x64_sys_memfd_create
-320 common kexec_file_load __x64_sys_kexec_file_load
-321 common bpf __x64_sys_bpf
-322 64 execveat __x64_sys_execveat/ptregs
-323 common userfaultfd __x64_sys_userfaultfd
-324 common membarrier __x64_sys_membarrier
-325 common mlock2 __x64_sys_mlock2
-326 common copy_file_range __x64_sys_copy_file_range
-327 64 preadv2 __x64_sys_preadv2
-328 64 pwritev2 __x64_sys_pwritev2
-329 common pkey_mprotect __x64_sys_pkey_mprotect
-330 common pkey_alloc __x64_sys_pkey_alloc
-331 common pkey_free __x64_sys_pkey_free
-332 common statx __x64_sys_statx
-333 common io_pgetevents __x64_sys_io_pgetevents
-334 common rseq __x64_sys_rseq
+237 common mbind sys_mbind
+238 common set_mempolicy sys_set_mempolicy
+239 common get_mempolicy sys_get_mempolicy
+240 common mq_open sys_mq_open
+241 common mq_unlink sys_mq_unlink
+242 common mq_timedsend sys_mq_timedsend
+243 common mq_timedreceive sys_mq_timedreceive
+244 64 mq_notify sys_mq_notify
+245 common mq_getsetattr sys_mq_getsetattr
+246 64 kexec_load sys_kexec_load
+247 64 waitid sys_waitid
+248 common add_key sys_add_key
+249 common request_key sys_request_key
+250 common keyctl sys_keyctl
+251 common ioprio_set sys_ioprio_set
+252 common ioprio_get sys_ioprio_get
+253 common inotify_init sys_inotify_init
+254 common inotify_add_watch sys_inotify_add_watch
+255 common inotify_rm_watch sys_inotify_rm_watch
+256 common migrate_pages sys_migrate_pages
+257 common openat sys_openat
+258 common mkdirat sys_mkdirat
+259 common mknodat sys_mknodat
+260 common fchownat sys_fchownat
+261 common futimesat sys_futimesat
+262 common newfstatat sys_newfstatat
+263 common unlinkat sys_unlinkat
+264 common renameat sys_renameat
+265 common linkat sys_linkat
+266 common symlinkat sys_symlinkat
+267 common readlinkat sys_readlinkat
+268 common fchmodat sys_fchmodat
+269 common faccessat sys_faccessat
+270 common pselect6 sys_pselect6
+271 common ppoll sys_ppoll
+272 common unshare sys_unshare
+273 64 set_robust_list sys_set_robust_list
+274 64 get_robust_list sys_get_robust_list
+275 common splice sys_splice
+276 common tee sys_tee
+277 common sync_file_range sys_sync_file_range
+278 64 vmsplice sys_vmsplice
+279 64 move_pages sys_move_pages
+280 common utimensat sys_utimensat
+281 common epoll_pwait sys_epoll_pwait
+282 common signalfd sys_signalfd
+283 common timerfd_create sys_timerfd_create
+284 common eventfd sys_eventfd
+285 common fallocate sys_fallocate
+286 common timerfd_settime sys_timerfd_settime
+287 common timerfd_gettime sys_timerfd_gettime
+288 common accept4 sys_accept4
+289 common signalfd4 sys_signalfd4
+290 common eventfd2 sys_eventfd2
+291 common epoll_create1 sys_epoll_create1
+292 common dup3 sys_dup3
+293 common pipe2 sys_pipe2
+294 common inotify_init1 sys_inotify_init1
+295 64 preadv sys_preadv
+296 64 pwritev sys_pwritev
+297 64 rt_tgsigqueueinfo sys_rt_tgsigqueueinfo
+298 common perf_event_open sys_perf_event_open
+299 64 recvmmsg sys_recvmmsg
+300 common fanotify_init sys_fanotify_init
+301 common fanotify_mark sys_fanotify_mark
+302 common prlimit64 sys_prlimit64
+303 common name_to_handle_at sys_name_to_handle_at
+304 common open_by_handle_at sys_open_by_handle_at
+305 common clock_adjtime sys_clock_adjtime
+306 common syncfs sys_syncfs
+307 64 sendmmsg sys_sendmmsg
+308 common setns sys_setns
+309 common getcpu sys_getcpu
+310 64 process_vm_readv sys_process_vm_readv
+311 64 process_vm_writev sys_process_vm_writev
+312 common kcmp sys_kcmp
+313 common finit_module sys_finit_module
+314 common sched_setattr sys_sched_setattr
+315 common sched_getattr sys_sched_getattr
+316 common renameat2 sys_renameat2
+317 common seccomp sys_seccomp
+318 common getrandom sys_getrandom
+319 common memfd_create sys_memfd_create
+320 common kexec_file_load sys_kexec_file_load
+321 common bpf sys_bpf
+322 64 execveat sys_execveat
+323 common userfaultfd sys_userfaultfd
+324 common membarrier sys_membarrier
+325 common mlock2 sys_mlock2
+326 common copy_file_range sys_copy_file_range
+327 64 preadv2 sys_preadv2
+328 64 pwritev2 sys_pwritev2
+329 common pkey_mprotect sys_pkey_mprotect
+330 common pkey_alloc sys_pkey_alloc
+331 common pkey_free sys_pkey_free
+332 common statx sys_statx
+333 common io_pgetevents sys_io_pgetevents
+334 common rseq sys_rseq
# don't use numbers 387 through 423, add new calls after the last
# 'common' entry
-424 common pidfd_send_signal __x64_sys_pidfd_send_signal
-425 common io_uring_setup __x64_sys_io_uring_setup
-426 common io_uring_enter __x64_sys_io_uring_enter
-427 common io_uring_register __x64_sys_io_uring_register
-428 common open_tree __x64_sys_open_tree
-429 common move_mount __x64_sys_move_mount
-430 common fsopen __x64_sys_fsopen
-431 common fsconfig __x64_sys_fsconfig
-432 common fsmount __x64_sys_fsmount
-433 common fspick __x64_sys_fspick
-434 common pidfd_open __x64_sys_pidfd_open
-435 common clone3 __x64_sys_clone3/ptregs
-437 common openat2 __x64_sys_openat2
-438 common pidfd_getfd __x64_sys_pidfd_getfd
+424 common pidfd_send_signal sys_pidfd_send_signal
+425 common io_uring_setup sys_io_uring_setup
+426 common io_uring_enter sys_io_uring_enter
+427 common io_uring_register sys_io_uring_register
+428 common open_tree sys_open_tree
+429 common move_mount sys_move_mount
+430 common fsopen sys_fsopen
+431 common fsconfig sys_fsconfig
+432 common fsmount sys_fsmount
+433 common fspick sys_fspick
+434 common pidfd_open sys_pidfd_open
+435 common clone3 sys_clone3
+437 common openat2 sys_openat2
+438 common pidfd_getfd sys_pidfd_getfd
#
# x32-specific system call numbers start at 512 to avoid cache impact
@@ -366,39 +366,39 @@
# on-the-fly for compat_sys_*() compatibility system calls if X86_X32
# is defined.
#
-512 x32 rt_sigaction __x32_compat_sys_rt_sigaction
-513 x32 rt_sigreturn sys32_x32_rt_sigreturn
-514 x32 ioctl __x32_compat_sys_ioctl
-515 x32 readv __x32_compat_sys_readv
-516 x32 writev __x32_compat_sys_writev
-517 x32 recvfrom __x32_compat_sys_recvfrom
-518 x32 sendmsg __x32_compat_sys_sendmsg
-519 x32 recvmsg __x32_compat_sys_recvmsg
-520 x32 execve __x32_compat_sys_execve/ptregs
-521 x32 ptrace __x32_compat_sys_ptrace
-522 x32 rt_sigpending __x32_compat_sys_rt_sigpending
-523 x32 rt_sigtimedwait __x32_compat_sys_rt_sigtimedwait_time64
-524 x32 rt_sigqueueinfo __x32_compat_sys_rt_sigqueueinfo
-525 x32 sigaltstack __x32_compat_sys_sigaltstack
-526 x32 timer_create __x32_compat_sys_timer_create
-527 x32 mq_notify __x32_compat_sys_mq_notify
-528 x32 kexec_load __x32_compat_sys_kexec_load
-529 x32 waitid __x32_compat_sys_waitid
-530 x32 set_robust_list __x32_compat_sys_set_robust_list
-531 x32 get_robust_list __x32_compat_sys_get_robust_list
-532 x32 vmsplice __x32_compat_sys_vmsplice
-533 x32 move_pages __x32_compat_sys_move_pages
-534 x32 preadv __x32_compat_sys_preadv64
-535 x32 pwritev __x32_compat_sys_pwritev64
-536 x32 rt_tgsigqueueinfo __x32_compat_sys_rt_tgsigqueueinfo
-537 x32 recvmmsg __x32_compat_sys_recvmmsg_time64
-538 x32 sendmmsg __x32_compat_sys_sendmmsg
-539 x32 process_vm_readv __x32_compat_sys_process_vm_readv
-540 x32 process_vm_writev __x32_compat_sys_process_vm_writev
-541 x32 setsockopt __x32_compat_sys_setsockopt
-542 x32 getsockopt __x32_compat_sys_getsockopt
-543 x32 io_setup __x32_compat_sys_io_setup
-544 x32 io_submit __x32_compat_sys_io_submit
-545 x32 execveat __x32_compat_sys_execveat/ptregs
-546 x32 preadv2 __x32_compat_sys_preadv64v2
-547 x32 pwritev2 __x32_compat_sys_pwritev64v2
+512 x32 rt_sigaction compat_sys_rt_sigaction
+513 x32 rt_sigreturn compat_sys_x32_rt_sigreturn
+514 x32 ioctl compat_sys_ioctl
+515 x32 readv compat_sys_readv
+516 x32 writev compat_sys_writev
+517 x32 recvfrom compat_sys_recvfrom
+518 x32 sendmsg compat_sys_sendmsg
+519 x32 recvmsg compat_sys_recvmsg
+520 x32 execve compat_sys_execve
+521 x32 ptrace compat_sys_ptrace
+522 x32 rt_sigpending compat_sys_rt_sigpending
+523 x32 rt_sigtimedwait compat_sys_rt_sigtimedwait_time64
+524 x32 rt_sigqueueinfo compat_sys_rt_sigqueueinfo
+525 x32 sigaltstack compat_sys_sigaltstack
+526 x32 timer_create compat_sys_timer_create
+527 x32 mq_notify compat_sys_mq_notify
+528 x32 kexec_load compat_sys_kexec_load
+529 x32 waitid compat_sys_waitid
+530 x32 set_robust_list compat_sys_set_robust_list
+531 x32 get_robust_list compat_sys_get_robust_list
+532 x32 vmsplice compat_sys_vmsplice
+533 x32 move_pages compat_sys_move_pages
+534 x32 preadv compat_sys_preadv64
+535 x32 pwritev compat_sys_pwritev64
+536 x32 rt_tgsigqueueinfo compat_sys_rt_tgsigqueueinfo
+537 x32 recvmmsg compat_sys_recvmmsg_time64
+538 x32 sendmmsg compat_sys_sendmmsg
+539 x32 process_vm_readv compat_sys_process_vm_readv
+540 x32 process_vm_writev compat_sys_process_vm_writev
+541 x32 setsockopt compat_sys_setsockopt
+542 x32 getsockopt compat_sys_getsockopt
+543 x32 io_setup compat_sys_io_setup
+544 x32 io_submit compat_sys_io_submit
+545 x32 execveat compat_sys_execveat
+546 x32 preadv2 compat_sys_preadv64v2
+547 x32 pwritev2 compat_sys_pwritev64v2
diff --git a/tools/perf/arch/x86/tests/dwarf-unwind.c b/tools/perf/arch/x86/tests/dwarf-unwind.c
index ef43be9b6ec2..4e40402a4f81 100644
--- a/tools/perf/arch/x86/tests/dwarf-unwind.c
+++ b/tools/perf/arch/x86/tests/dwarf-unwind.c
@@ -55,6 +55,14 @@ int test__arch_unwind_sample(struct perf_sample *sample,
return -1;
}
+#ifdef MEMORY_SANITIZER
+ /*
+ * Assignments to buf in the assembly function perf_regs_load aren't
+ * seen by memory sanitizer. Zero the memory to convince memory
+ * sanitizer the memory is initialized.
+ */
+ memset(buf, 0, sizeof(u64) * PERF_REGS_MAX);
+#endif
perf_regs_load(buf);
regs->abi = PERF_SAMPLE_REGS_ABI;
regs->regs = buf;
diff --git a/tools/perf/arch/x86/tests/insn-x86-dat-32.c b/tools/perf/arch/x86/tests/insn-x86-dat-32.c
index e6461abc9e7b..9708ae892061 100644
--- a/tools/perf/arch/x86/tests/insn-x86-dat-32.c
+++ b/tools/perf/arch/x86/tests/insn-x86-dat-32.c
@@ -2085,6 +2085,118 @@
"67 f3 0f 38 f8 1c \tenqcmds (%si),%bx",},
{{0x67, 0xf3, 0x0f, 0x38, 0xf8, 0x8c, 0x34, 0x12, }, 8, 0, "", "",
"67 f3 0f 38 f8 8c 34 12 \tenqcmds 0x1234(%si),%cx",},
+{{0xf3, 0x0f, 0xae, 0xe8, }, 4, 0, "", "",
+"f3 0f ae e8 \tincsspd %eax",},
+{{0x0f, 0xae, 0x28, }, 3, 0, "", "",
+"0f ae 28 \txrstor (%eax)",},
+{{0x0f, 0xae, 0x2d, 0x78, 0x56, 0x34, 0x12, }, 7, 0, "", "",
+"0f ae 2d 78 56 34 12 \txrstor 0x12345678",},
+{{0x0f, 0xae, 0xac, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "",
+"0f ae ac c8 78 56 34 12 \txrstor 0x12345678(%eax,%ecx,8)",},
+{{0x0f, 0xae, 0xe8, }, 3, 0, "", "",
+"0f ae e8 \tlfence ",},
+{{0xf3, 0x0f, 0x1e, 0xc8, }, 4, 0, "", "",
+"f3 0f 1e c8 \trdsspd %eax",},
+{{0xf3, 0x0f, 0x01, 0xea, }, 4, 0, "", "",
+"f3 0f 01 ea \tsaveprevssp ",},
+{{0xf3, 0x0f, 0x01, 0x28, }, 4, 0, "", "",
+"f3 0f 01 28 \trstorssp (%eax)",},
+{{0xf3, 0x0f, 0x01, 0x2d, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "",
+"f3 0f 01 2d 78 56 34 12 \trstorssp 0x12345678",},
+{{0xf3, 0x0f, 0x01, 0xac, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
+"f3 0f 01 ac c8 78 56 34 12 \trstorssp 0x12345678(%eax,%ecx,8)",},
+{{0x0f, 0x38, 0xf6, 0x08, }, 4, 0, "", "",
+"0f 38 f6 08 \twrssd %ecx,(%eax)",},
+{{0x0f, 0x38, 0xf6, 0x15, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "",
+"0f 38 f6 15 78 56 34 12 \twrssd %edx,0x12345678",},
+{{0x0f, 0x38, 0xf6, 0x94, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
+"0f 38 f6 94 c8 78 56 34 12 \twrssd %edx,0x12345678(%eax,%ecx,8)",},
+{{0x66, 0x0f, 0x38, 0xf5, 0x08, }, 5, 0, "", "",
+"66 0f 38 f5 08 \twrussd %ecx,(%eax)",},
+{{0x66, 0x0f, 0x38, 0xf5, 0x15, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
+"66 0f 38 f5 15 78 56 34 12 \twrussd %edx,0x12345678",},
+{{0x66, 0x0f, 0x38, 0xf5, 0x94, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 10, 0, "", "",
+"66 0f 38 f5 94 c8 78 56 34 12 \twrussd %edx,0x12345678(%eax,%ecx,8)",},
+{{0xf3, 0x0f, 0x01, 0xe8, }, 4, 0, "", "",
+"f3 0f 01 e8 \tsetssbsy ",},
+{{0x0f, 0x01, 0xee, }, 3, 0, "", "",
+"0f 01 ee \trdpkru ",},
+{{0x0f, 0x01, 0xef, }, 3, 0, "", "",
+"0f 01 ef \twrpkru ",},
+{{0xf3, 0x0f, 0xae, 0x30, }, 4, 0, "", "",
+"f3 0f ae 30 \tclrssbsy (%eax)",},
+{{0xf3, 0x0f, 0xae, 0x35, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "",
+"f3 0f ae 35 78 56 34 12 \tclrssbsy 0x12345678",},
+{{0xf3, 0x0f, 0xae, 0xb4, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
+"f3 0f ae b4 c8 78 56 34 12 \tclrssbsy 0x12345678(%eax,%ecx,8)",},
+{{0xf3, 0x0f, 0x1e, 0xfb, }, 4, 0, "", "",
+"f3 0f 1e fb \tendbr32 ",},
+{{0xf3, 0x0f, 0x1e, 0xfa, }, 4, 0, "", "",
+"f3 0f 1e fa \tendbr64 ",},
+{{0xff, 0xd0, }, 2, 0, "call", "indirect",
+"ff d0 \tcall *%eax",},
+{{0xff, 0x10, }, 2, 0, "call", "indirect",
+"ff 10 \tcall *(%eax)",},
+{{0xff, 0x15, 0x78, 0x56, 0x34, 0x12, }, 6, 0, "call", "indirect",
+"ff 15 78 56 34 12 \tcall *0x12345678",},
+{{0xff, 0x94, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 7, 0, "call", "indirect",
+"ff 94 c8 78 56 34 12 \tcall *0x12345678(%eax,%ecx,8)",},
+{{0xf2, 0xff, 0xd0, }, 3, 0, "call", "indirect",
+"f2 ff d0 \tbnd call *%eax",},
+{{0xf2, 0xff, 0x10, }, 3, 0, "call", "indirect",
+"f2 ff 10 \tbnd call *(%eax)",},
+{{0xf2, 0xff, 0x15, 0x78, 0x56, 0x34, 0x12, }, 7, 0, "call", "indirect",
+"f2 ff 15 78 56 34 12 \tbnd call *0x12345678",},
+{{0xf2, 0xff, 0x94, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "call", "indirect",
+"f2 ff 94 c8 78 56 34 12 \tbnd call *0x12345678(%eax,%ecx,8)",},
+{{0x3e, 0xff, 0xd0, }, 3, 0, "call", "indirect",
+"3e ff d0 \tnotrack call *%eax",},
+{{0x3e, 0xff, 0x10, }, 3, 0, "call", "indirect",
+"3e ff 10 \tnotrack call *(%eax)",},
+{{0x3e, 0xff, 0x15, 0x78, 0x56, 0x34, 0x12, }, 7, 0, "call", "indirect",
+"3e ff 15 78 56 34 12 \tnotrack call *0x12345678",},
+{{0x3e, 0xff, 0x94, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "call", "indirect",
+"3e ff 94 c8 78 56 34 12 \tnotrack call *0x12345678(%eax,%ecx,8)",},
+{{0x3e, 0xf2, 0xff, 0xd0, }, 4, 0, "call", "indirect",
+"3e f2 ff d0 \tnotrack bnd call *%eax",},
+{{0x3e, 0xf2, 0xff, 0x10, }, 4, 0, "call", "indirect",
+"3e f2 ff 10 \tnotrack bnd call *(%eax)",},
+{{0x3e, 0xf2, 0xff, 0x15, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "call", "indirect",
+"3e f2 ff 15 78 56 34 12 \tnotrack bnd call *0x12345678",},
+{{0x3e, 0xf2, 0xff, 0x94, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "call", "indirect",
+"3e f2 ff 94 c8 78 56 34 12 \tnotrack bnd call *0x12345678(%eax,%ecx,8)",},
+{{0xff, 0xe0, }, 2, 0, "jmp", "indirect",
+"ff e0 \tjmp *%eax",},
+{{0xff, 0x20, }, 2, 0, "jmp", "indirect",
+"ff 20 \tjmp *(%eax)",},
+{{0xff, 0x25, 0x78, 0x56, 0x34, 0x12, }, 6, 0, "jmp", "indirect",
+"ff 25 78 56 34 12 \tjmp *0x12345678",},
+{{0xff, 0xa4, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 7, 0, "jmp", "indirect",
+"ff a4 c8 78 56 34 12 \tjmp *0x12345678(%eax,%ecx,8)",},
+{{0xf2, 0xff, 0xe0, }, 3, 0, "jmp", "indirect",
+"f2 ff e0 \tbnd jmp *%eax",},
+{{0xf2, 0xff, 0x20, }, 3, 0, "jmp", "indirect",
+"f2 ff 20 \tbnd jmp *(%eax)",},
+{{0xf2, 0xff, 0x25, 0x78, 0x56, 0x34, 0x12, }, 7, 0, "jmp", "indirect",
+"f2 ff 25 78 56 34 12 \tbnd jmp *0x12345678",},
+{{0xf2, 0xff, 0xa4, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "jmp", "indirect",
+"f2 ff a4 c8 78 56 34 12 \tbnd jmp *0x12345678(%eax,%ecx,8)",},
+{{0x3e, 0xff, 0xe0, }, 3, 0, "jmp", "indirect",
+"3e ff e0 \tnotrack jmp *%eax",},
+{{0x3e, 0xff, 0x20, }, 3, 0, "jmp", "indirect",
+"3e ff 20 \tnotrack jmp *(%eax)",},
+{{0x3e, 0xff, 0x25, 0x78, 0x56, 0x34, 0x12, }, 7, 0, "jmp", "indirect",
+"3e ff 25 78 56 34 12 \tnotrack jmp *0x12345678",},
+{{0x3e, 0xff, 0xa4, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "jmp", "indirect",
+"3e ff a4 c8 78 56 34 12 \tnotrack jmp *0x12345678(%eax,%ecx,8)",},
+{{0x3e, 0xf2, 0xff, 0xe0, }, 4, 0, "jmp", "indirect",
+"3e f2 ff e0 \tnotrack bnd jmp *%eax",},
+{{0x3e, 0xf2, 0xff, 0x20, }, 4, 0, "jmp", "indirect",
+"3e f2 ff 20 \tnotrack bnd jmp *(%eax)",},
+{{0x3e, 0xf2, 0xff, 0x25, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "jmp", "indirect",
+"3e f2 ff 25 78 56 34 12 \tnotrack bnd jmp *0x12345678",},
+{{0x3e, 0xf2, 0xff, 0xa4, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "jmp", "indirect",
+"3e f2 ff a4 c8 78 56 34 12 \tnotrack bnd jmp *0x12345678(%eax,%ecx,8)",},
{{0x0f, 0x01, 0xcf, }, 3, 0, "", "",
"0f 01 cf \tencls ",},
{{0x0f, 0x01, 0xd7, }, 3, 0, "", "",
diff --git a/tools/perf/arch/x86/tests/insn-x86-dat-64.c b/tools/perf/arch/x86/tests/insn-x86-dat-64.c
index 567ecccfad7c..5da17d41d302 100644
--- a/tools/perf/arch/x86/tests/insn-x86-dat-64.c
+++ b/tools/perf/arch/x86/tests/insn-x86-dat-64.c
@@ -2263,6 +2263,202 @@
"67 f3 0f 38 f8 18 \tenqcmds (%eax),%ebx",},
{{0x67, 0xf3, 0x0f, 0x38, 0xf8, 0x88, 0x78, 0x56, 0x34, 0x12, }, 10, 0, "", "",
"67 f3 0f 38 f8 88 78 56 34 12 \tenqcmds 0x12345678(%eax),%ecx",},
+{{0xf3, 0x0f, 0xae, 0xe8, }, 4, 0, "", "",
+"f3 0f ae e8 \tincsspd %eax",},
+{{0xf3, 0x41, 0x0f, 0xae, 0xe8, }, 5, 0, "", "",
+"f3 41 0f ae e8 \tincsspd %r8d",},
+{{0xf3, 0x48, 0x0f, 0xae, 0xe8, }, 5, 0, "", "",
+"f3 48 0f ae e8 \tincsspq %rax",},
+{{0xf3, 0x49, 0x0f, 0xae, 0xe8, }, 5, 0, "", "",
+"f3 49 0f ae e8 \tincsspq %r8",},
+{{0x0f, 0xae, 0x28, }, 3, 0, "", "",
+"0f ae 28 \txrstor (%rax)",},
+{{0x41, 0x0f, 0xae, 0x28, }, 4, 0, "", "",
+"41 0f ae 28 \txrstor (%r8)",},
+{{0x0f, 0xae, 0x2c, 0x25, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "",
+"0f ae 2c 25 78 56 34 12 \txrstor 0x12345678",},
+{{0x0f, 0xae, 0xac, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "",
+"0f ae ac c8 78 56 34 12 \txrstor 0x12345678(%rax,%rcx,8)",},
+{{0x41, 0x0f, 0xae, 0xac, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
+"41 0f ae ac c8 78 56 34 12 \txrstor 0x12345678(%r8,%rcx,8)",},
+{{0x0f, 0xae, 0xe8, }, 3, 0, "", "",
+"0f ae e8 \tlfence ",},
+{{0xf3, 0x0f, 0x1e, 0xc8, }, 4, 0, "", "",
+"f3 0f 1e c8 \trdsspd %eax",},
+{{0xf3, 0x41, 0x0f, 0x1e, 0xc8, }, 5, 0, "", "",
+"f3 41 0f 1e c8 \trdsspd %r8d",},
+{{0xf3, 0x48, 0x0f, 0x1e, 0xc8, }, 5, 0, "", "",
+"f3 48 0f 1e c8 \trdsspq %rax",},
+{{0xf3, 0x49, 0x0f, 0x1e, 0xc8, }, 5, 0, "", "",
+"f3 49 0f 1e c8 \trdsspq %r8",},
+{{0xf3, 0x0f, 0x01, 0xea, }, 4, 0, "", "",
+"f3 0f 01 ea \tsaveprevssp ",},
+{{0xf3, 0x0f, 0x01, 0x28, }, 4, 0, "", "",
+"f3 0f 01 28 \trstorssp (%rax)",},
+{{0xf3, 0x41, 0x0f, 0x01, 0x28, }, 5, 0, "", "",
+"f3 41 0f 01 28 \trstorssp (%r8)",},
+{{0xf3, 0x0f, 0x01, 0x2c, 0x25, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
+"f3 0f 01 2c 25 78 56 34 12 \trstorssp 0x12345678",},
+{{0xf3, 0x0f, 0x01, 0xac, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
+"f3 0f 01 ac c8 78 56 34 12 \trstorssp 0x12345678(%rax,%rcx,8)",},
+{{0xf3, 0x41, 0x0f, 0x01, 0xac, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 10, 0, "", "",
+"f3 41 0f 01 ac c8 78 56 34 12 \trstorssp 0x12345678(%r8,%rcx,8)",},
+{{0x0f, 0x38, 0xf6, 0x08, }, 4, 0, "", "",
+"0f 38 f6 08 \twrssd %ecx,(%rax)",},
+{{0x41, 0x0f, 0x38, 0xf6, 0x10, }, 5, 0, "", "",
+"41 0f 38 f6 10 \twrssd %edx,(%r8)",},
+{{0x0f, 0x38, 0xf6, 0x14, 0x25, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
+"0f 38 f6 14 25 78 56 34 12 \twrssd %edx,0x12345678",},
+{{0x0f, 0x38, 0xf6, 0x94, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
+"0f 38 f6 94 c8 78 56 34 12 \twrssd %edx,0x12345678(%rax,%rcx,8)",},
+{{0x41, 0x0f, 0x38, 0xf6, 0x94, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 10, 0, "", "",
+"41 0f 38 f6 94 c8 78 56 34 12 \twrssd %edx,0x12345678(%r8,%rcx,8)",},
+{{0x48, 0x0f, 0x38, 0xf6, 0x08, }, 5, 0, "", "",
+"48 0f 38 f6 08 \twrssq %rcx,(%rax)",},
+{{0x49, 0x0f, 0x38, 0xf6, 0x10, }, 5, 0, "", "",
+"49 0f 38 f6 10 \twrssq %rdx,(%r8)",},
+{{0x48, 0x0f, 0x38, 0xf6, 0x14, 0x25, 0x78, 0x56, 0x34, 0x12, }, 10, 0, "", "",
+"48 0f 38 f6 14 25 78 56 34 12 \twrssq %rdx,0x12345678",},
+{{0x48, 0x0f, 0x38, 0xf6, 0x94, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 10, 0, "", "",
+"48 0f 38 f6 94 c8 78 56 34 12 \twrssq %rdx,0x12345678(%rax,%rcx,8)",},
+{{0x49, 0x0f, 0x38, 0xf6, 0x94, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 10, 0, "", "",
+"49 0f 38 f6 94 c8 78 56 34 12 \twrssq %rdx,0x12345678(%r8,%rcx,8)",},
+{{0x66, 0x0f, 0x38, 0xf5, 0x08, }, 5, 0, "", "",
+"66 0f 38 f5 08 \twrussd %ecx,(%rax)",},
+{{0x66, 0x41, 0x0f, 0x38, 0xf5, 0x10, }, 6, 0, "", "",
+"66 41 0f 38 f5 10 \twrussd %edx,(%r8)",},
+{{0x66, 0x0f, 0x38, 0xf5, 0x14, 0x25, 0x78, 0x56, 0x34, 0x12, }, 10, 0, "", "",
+"66 0f 38 f5 14 25 78 56 34 12 \twrussd %edx,0x12345678",},
+{{0x66, 0x0f, 0x38, 0xf5, 0x94, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 10, 0, "", "",
+"66 0f 38 f5 94 c8 78 56 34 12 \twrussd %edx,0x12345678(%rax,%rcx,8)",},
+{{0x66, 0x41, 0x0f, 0x38, 0xf5, 0x94, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"66 41 0f 38 f5 94 c8 78 56 34 12 \twrussd %edx,0x12345678(%r8,%rcx,8)",},
+{{0x66, 0x48, 0x0f, 0x38, 0xf5, 0x08, }, 6, 0, "", "",
+"66 48 0f 38 f5 08 \twrussq %rcx,(%rax)",},
+{{0x66, 0x49, 0x0f, 0x38, 0xf5, 0x10, }, 6, 0, "", "",
+"66 49 0f 38 f5 10 \twrussq %rdx,(%r8)",},
+{{0x66, 0x48, 0x0f, 0x38, 0xf5, 0x14, 0x25, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"66 48 0f 38 f5 14 25 78 56 34 12 \twrussq %rdx,0x12345678",},
+{{0x66, 0x48, 0x0f, 0x38, 0xf5, 0x94, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"66 48 0f 38 f5 94 c8 78 56 34 12 \twrussq %rdx,0x12345678(%rax,%rcx,8)",},
+{{0x66, 0x49, 0x0f, 0x38, 0xf5, 0x94, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
+"66 49 0f 38 f5 94 c8 78 56 34 12 \twrussq %rdx,0x12345678(%r8,%rcx,8)",},
+{{0xf3, 0x0f, 0x01, 0xe8, }, 4, 0, "", "",
+"f3 0f 01 e8 \tsetssbsy ",},
+{{0x0f, 0x01, 0xee, }, 3, 0, "", "",
+"0f 01 ee \trdpkru ",},
+{{0x0f, 0x01, 0xef, }, 3, 0, "", "",
+"0f 01 ef \twrpkru ",},
+{{0xf3, 0x0f, 0xae, 0x30, }, 4, 0, "", "",
+"f3 0f ae 30 \tclrssbsy (%rax)",},
+{{0xf3, 0x41, 0x0f, 0xae, 0x30, }, 5, 0, "", "",
+"f3 41 0f ae 30 \tclrssbsy (%r8)",},
+{{0xf3, 0x0f, 0xae, 0x34, 0x25, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
+"f3 0f ae 34 25 78 56 34 12 \tclrssbsy 0x12345678",},
+{{0xf3, 0x0f, 0xae, 0xb4, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
+"f3 0f ae b4 c8 78 56 34 12 \tclrssbsy 0x12345678(%rax,%rcx,8)",},
+{{0xf3, 0x41, 0x0f, 0xae, 0xb4, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 10, 0, "", "",
+"f3 41 0f ae b4 c8 78 56 34 12 \tclrssbsy 0x12345678(%r8,%rcx,8)",},
+{{0xf3, 0x0f, 0x1e, 0xfb, }, 4, 0, "", "",
+"f3 0f 1e fb \tendbr32 ",},
+{{0xf3, 0x0f, 0x1e, 0xfa, }, 4, 0, "", "",
+"f3 0f 1e fa \tendbr64 ",},
+{{0xff, 0xd0, }, 2, 0, "call", "indirect",
+"ff d0 \tcallq *%rax",},
+{{0xff, 0x10, }, 2, 0, "call", "indirect",
+"ff 10 \tcallq *(%rax)",},
+{{0x41, 0xff, 0x10, }, 3, 0, "call", "indirect",
+"41 ff 10 \tcallq *(%r8)",},
+{{0xff, 0x14, 0x25, 0x78, 0x56, 0x34, 0x12, }, 7, 0, "call", "indirect",
+"ff 14 25 78 56 34 12 \tcallq *0x12345678",},
+{{0xff, 0x94, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 7, 0, "call", "indirect",
+"ff 94 c8 78 56 34 12 \tcallq *0x12345678(%rax,%rcx,8)",},
+{{0x41, 0xff, 0x94, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "call", "indirect",
+"41 ff 94 c8 78 56 34 12 \tcallq *0x12345678(%r8,%rcx,8)",},
+{{0xf2, 0xff, 0xd0, }, 3, 0, "call", "indirect",
+"f2 ff d0 \tbnd callq *%rax",},
+{{0xf2, 0xff, 0x10, }, 3, 0, "call", "indirect",
+"f2 ff 10 \tbnd callq *(%rax)",},
+{{0xf2, 0x41, 0xff, 0x10, }, 4, 0, "call", "indirect",
+"f2 41 ff 10 \tbnd callq *(%r8)",},
+{{0xf2, 0xff, 0x14, 0x25, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "call", "indirect",
+"f2 ff 14 25 78 56 34 12 \tbnd callq *0x12345678",},
+{{0xf2, 0xff, 0x94, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "call", "indirect",
+"f2 ff 94 c8 78 56 34 12 \tbnd callq *0x12345678(%rax,%rcx,8)",},
+{{0xf2, 0x41, 0xff, 0x94, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "call", "indirect",
+"f2 41 ff 94 c8 78 56 34 12 \tbnd callq *0x12345678(%r8,%rcx,8)",},
+{{0x3e, 0xff, 0xd0, }, 3, 0, "call", "indirect",
+"3e ff d0 \tnotrack callq *%rax",},
+{{0x3e, 0xff, 0x10, }, 3, 0, "call", "indirect",
+"3e ff 10 \tnotrack callq *(%rax)",},
+{{0x3e, 0x41, 0xff, 0x10, }, 4, 0, "call", "indirect",
+"3e 41 ff 10 \tnotrack callq *(%r8)",},
+{{0x3e, 0xff, 0x14, 0x25, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "call", "indirect",
+"3e ff 14 25 78 56 34 12 \tnotrack callq *0x12345678",},
+{{0x3e, 0xff, 0x94, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "call", "indirect",
+"3e ff 94 c8 78 56 34 12 \tnotrack callq *0x12345678(%rax,%rcx,8)",},
+{{0x3e, 0x41, 0xff, 0x94, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "call", "indirect",
+"3e 41 ff 94 c8 78 56 34 12 \tnotrack callq *0x12345678(%r8,%rcx,8)",},
+{{0x3e, 0xf2, 0xff, 0xd0, }, 4, 0, "call", "indirect",
+"3e f2 ff d0 \tnotrack bnd callq *%rax",},
+{{0x3e, 0xf2, 0xff, 0x10, }, 4, 0, "call", "indirect",
+"3e f2 ff 10 \tnotrack bnd callq *(%rax)",},
+{{0x3e, 0xf2, 0x41, 0xff, 0x10, }, 5, 0, "call", "indirect",
+"3e f2 41 ff 10 \tnotrack bnd callq *(%r8)",},
+{{0x3e, 0xf2, 0xff, 0x14, 0x25, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "call", "indirect",
+"3e f2 ff 14 25 78 56 34 12 \tnotrack bnd callq *0x12345678",},
+{{0x3e, 0xf2, 0xff, 0x94, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "call", "indirect",
+"3e f2 ff 94 c8 78 56 34 12 \tnotrack bnd callq *0x12345678(%rax,%rcx,8)",},
+{{0x3e, 0xf2, 0x41, 0xff, 0x94, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 10, 0, "call", "indirect",
+"3e f2 41 ff 94 c8 78 56 34 12 \tnotrack bnd callq *0x12345678(%r8,%rcx,8)",},
+{{0xff, 0xe0, }, 2, 0, "jmp", "indirect",
+"ff e0 \tjmpq *%rax",},
+{{0xff, 0x20, }, 2, 0, "jmp", "indirect",
+"ff 20 \tjmpq *(%rax)",},
+{{0x41, 0xff, 0x20, }, 3, 0, "jmp", "indirect",
+"41 ff 20 \tjmpq *(%r8)",},
+{{0xff, 0x24, 0x25, 0x78, 0x56, 0x34, 0x12, }, 7, 0, "jmp", "indirect",
+"ff 24 25 78 56 34 12 \tjmpq *0x12345678",},
+{{0xff, 0xa4, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 7, 0, "jmp", "indirect",
+"ff a4 c8 78 56 34 12 \tjmpq *0x12345678(%rax,%rcx,8)",},
+{{0x41, 0xff, 0xa4, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "jmp", "indirect",
+"41 ff a4 c8 78 56 34 12 \tjmpq *0x12345678(%r8,%rcx,8)",},
+{{0xf2, 0xff, 0xe0, }, 3, 0, "jmp", "indirect",
+"f2 ff e0 \tbnd jmpq *%rax",},
+{{0xf2, 0xff, 0x20, }, 3, 0, "jmp", "indirect",
+"f2 ff 20 \tbnd jmpq *(%rax)",},
+{{0xf2, 0x41, 0xff, 0x20, }, 4, 0, "jmp", "indirect",
+"f2 41 ff 20 \tbnd jmpq *(%r8)",},
+{{0xf2, 0xff, 0x24, 0x25, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "jmp", "indirect",
+"f2 ff 24 25 78 56 34 12 \tbnd jmpq *0x12345678",},
+{{0xf2, 0xff, 0xa4, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "jmp", "indirect",
+"f2 ff a4 c8 78 56 34 12 \tbnd jmpq *0x12345678(%rax,%rcx,8)",},
+{{0xf2, 0x41, 0xff, 0xa4, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "jmp", "indirect",
+"f2 41 ff a4 c8 78 56 34 12 \tbnd jmpq *0x12345678(%r8,%rcx,8)",},
+{{0x3e, 0xff, 0xe0, }, 3, 0, "jmp", "indirect",
+"3e ff e0 \tnotrack jmpq *%rax",},
+{{0x3e, 0xff, 0x20, }, 3, 0, "jmp", "indirect",
+"3e ff 20 \tnotrack jmpq *(%rax)",},
+{{0x3e, 0x41, 0xff, 0x20, }, 4, 0, "jmp", "indirect",
+"3e 41 ff 20 \tnotrack jmpq *(%r8)",},
+{{0x3e, 0xff, 0x24, 0x25, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "jmp", "indirect",
+"3e ff 24 25 78 56 34 12 \tnotrack jmpq *0x12345678",},
+{{0x3e, 0xff, 0xa4, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "jmp", "indirect",
+"3e ff a4 c8 78 56 34 12 \tnotrack jmpq *0x12345678(%rax,%rcx,8)",},
+{{0x3e, 0x41, 0xff, 0xa4, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "jmp", "indirect",
+"3e 41 ff a4 c8 78 56 34 12 \tnotrack jmpq *0x12345678(%r8,%rcx,8)",},
+{{0x3e, 0xf2, 0xff, 0xe0, }, 4, 0, "jmp", "indirect",
+"3e f2 ff e0 \tnotrack bnd jmpq *%rax",},
+{{0x3e, 0xf2, 0xff, 0x20, }, 4, 0, "jmp", "indirect",
+"3e f2 ff 20 \tnotrack bnd jmpq *(%rax)",},
+{{0x3e, 0xf2, 0x41, 0xff, 0x20, }, 5, 0, "jmp", "indirect",
+"3e f2 41 ff 20 \tnotrack bnd jmpq *(%r8)",},
+{{0x3e, 0xf2, 0xff, 0x24, 0x25, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "jmp", "indirect",
+"3e f2 ff 24 25 78 56 34 12 \tnotrack bnd jmpq *0x12345678",},
+{{0x3e, 0xf2, 0xff, 0xa4, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "jmp", "indirect",
+"3e f2 ff a4 c8 78 56 34 12 \tnotrack bnd jmpq *0x12345678(%rax,%rcx,8)",},
+{{0x3e, 0xf2, 0x41, 0xff, 0xa4, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 10, 0, "jmp", "indirect",
+"3e f2 41 ff a4 c8 78 56 34 12 \tnotrack bnd jmpq *0x12345678(%r8,%rcx,8)",},
{{0x0f, 0x01, 0xcf, }, 3, 0, "", "",
"0f 01 cf \tencls ",},
{{0x0f, 0x01, 0xd7, }, 3, 0, "", "",
diff --git a/tools/perf/arch/x86/tests/insn-x86-dat-src.c b/tools/perf/arch/x86/tests/insn-x86-dat-src.c
index ddbf07c50bb8..c3808e94c46e 100644
--- a/tools/perf/arch/x86/tests/insn-x86-dat-src.c
+++ b/tools/perf/arch/x86/tests/insn-x86-dat-src.c
@@ -1771,6 +1771,145 @@ int main(void)
asm volatile("enqcmds (%eax),%ebx");
asm volatile("enqcmds 0x12345678(%eax),%ecx");
+ /* incsspd/q */
+
+ asm volatile("incsspd %eax");
+ asm volatile("incsspd %r8d");
+ asm volatile("incsspq %rax");
+ asm volatile("incsspq %r8");
+ /* Also check instructions in the same group encoding as incsspd/q */
+ asm volatile("xrstor (%rax)");
+ asm volatile("xrstor (%r8)");
+ asm volatile("xrstor (0x12345678)");
+ asm volatile("xrstor 0x12345678(%rax,%rcx,8)");
+ asm volatile("xrstor 0x12345678(%r8,%rcx,8)");
+ asm volatile("lfence");
+
+ /* rdsspd/q */
+
+ asm volatile("rdsspd %eax");
+ asm volatile("rdsspd %r8d");
+ asm volatile("rdsspq %rax");
+ asm volatile("rdsspq %r8");
+
+ /* saveprevssp */
+
+ asm volatile("saveprevssp");
+
+ /* rstorssp */
+
+ asm volatile("rstorssp (%rax)");
+ asm volatile("rstorssp (%r8)");
+ asm volatile("rstorssp (0x12345678)");
+ asm volatile("rstorssp 0x12345678(%rax,%rcx,8)");
+ asm volatile("rstorssp 0x12345678(%r8,%rcx,8)");
+
+ /* wrssd/q */
+
+ asm volatile("wrssd %ecx,(%rax)");
+ asm volatile("wrssd %edx,(%r8)");
+ asm volatile("wrssd %edx,(0x12345678)");
+ asm volatile("wrssd %edx,0x12345678(%rax,%rcx,8)");
+ asm volatile("wrssd %edx,0x12345678(%r8,%rcx,8)");
+ asm volatile("wrssq %rcx,(%rax)");
+ asm volatile("wrssq %rdx,(%r8)");
+ asm volatile("wrssq %rdx,(0x12345678)");
+ asm volatile("wrssq %rdx,0x12345678(%rax,%rcx,8)");
+ asm volatile("wrssq %rdx,0x12345678(%r8,%rcx,8)");
+
+ /* wrussd/q */
+
+ asm volatile("wrussd %ecx,(%rax)");
+ asm volatile("wrussd %edx,(%r8)");
+ asm volatile("wrussd %edx,(0x12345678)");
+ asm volatile("wrussd %edx,0x12345678(%rax,%rcx,8)");
+ asm volatile("wrussd %edx,0x12345678(%r8,%rcx,8)");
+ asm volatile("wrussq %rcx,(%rax)");
+ asm volatile("wrussq %rdx,(%r8)");
+ asm volatile("wrussq %rdx,(0x12345678)");
+ asm volatile("wrussq %rdx,0x12345678(%rax,%rcx,8)");
+ asm volatile("wrussq %rdx,0x12345678(%r8,%rcx,8)");
+
+ /* setssbsy */
+
+ asm volatile("setssbsy");
+ /* Also check instructions in the same group encoding as setssbsy */
+ asm volatile("rdpkru");
+ asm volatile("wrpkru");
+
+ /* clrssbsy */
+
+ asm volatile("clrssbsy (%rax)");
+ asm volatile("clrssbsy (%r8)");
+ asm volatile("clrssbsy (0x12345678)");
+ asm volatile("clrssbsy 0x12345678(%rax,%rcx,8)");
+ asm volatile("clrssbsy 0x12345678(%r8,%rcx,8)");
+
+ /* endbr32/64 */
+
+ asm volatile("endbr32");
+ asm volatile("endbr64");
+
+ /* call with/without notrack prefix */
+
+ asm volatile("callq *%rax"); /* Expecting: call indirect 0 */
+ asm volatile("callq *(%rax)"); /* Expecting: call indirect 0 */
+ asm volatile("callq *(%r8)"); /* Expecting: call indirect 0 */
+ asm volatile("callq *(0x12345678)"); /* Expecting: call indirect 0 */
+ asm volatile("callq *0x12345678(%rax,%rcx,8)"); /* Expecting: call indirect 0 */
+ asm volatile("callq *0x12345678(%r8,%rcx,8)"); /* Expecting: call indirect 0 */
+
+ asm volatile("bnd callq *%rax"); /* Expecting: call indirect 0 */
+ asm volatile("bnd callq *(%rax)"); /* Expecting: call indirect 0 */
+ asm volatile("bnd callq *(%r8)"); /* Expecting: call indirect 0 */
+ asm volatile("bnd callq *(0x12345678)"); /* Expecting: call indirect 0 */
+ asm volatile("bnd callq *0x12345678(%rax,%rcx,8)"); /* Expecting: call indirect 0 */
+ asm volatile("bnd callq *0x12345678(%r8,%rcx,8)"); /* Expecting: call indirect 0 */
+
+ asm volatile("notrack callq *%rax"); /* Expecting: call indirect 0 */
+ asm volatile("notrack callq *(%rax)"); /* Expecting: call indirect 0 */
+ asm volatile("notrack callq *(%r8)"); /* Expecting: call indirect 0 */
+ asm volatile("notrack callq *(0x12345678)"); /* Expecting: call indirect 0 */
+ asm volatile("notrack callq *0x12345678(%rax,%rcx,8)"); /* Expecting: call indirect 0 */
+ asm volatile("notrack callq *0x12345678(%r8,%rcx,8)"); /* Expecting: call indirect 0 */
+
+ asm volatile("notrack bnd callq *%rax"); /* Expecting: call indirect 0 */
+ asm volatile("notrack bnd callq *(%rax)"); /* Expecting: call indirect 0 */
+ asm volatile("notrack bnd callq *(%r8)"); /* Expecting: call indirect 0 */
+ asm volatile("notrack bnd callq *(0x12345678)"); /* Expecting: call indirect 0 */
+ asm volatile("notrack bnd callq *0x12345678(%rax,%rcx,8)"); /* Expecting: call indirect 0 */
+ asm volatile("notrack bnd callq *0x12345678(%r8,%rcx,8)"); /* Expecting: call indirect 0 */
+
+ /* jmp with/without notrack prefix */
+
+ asm volatile("jmpq *%rax"); /* Expecting: jmp indirect 0 */
+ asm volatile("jmpq *(%rax)"); /* Expecting: jmp indirect 0 */
+ asm volatile("jmpq *(%r8)"); /* Expecting: jmp indirect 0 */
+ asm volatile("jmpq *(0x12345678)"); /* Expecting: jmp indirect 0 */
+ asm volatile("jmpq *0x12345678(%rax,%rcx,8)"); /* Expecting: jmp indirect 0 */
+ asm volatile("jmpq *0x12345678(%r8,%rcx,8)"); /* Expecting: jmp indirect 0 */
+
+ asm volatile("bnd jmpq *%rax"); /* Expecting: jmp indirect 0 */
+ asm volatile("bnd jmpq *(%rax)"); /* Expecting: jmp indirect 0 */
+ asm volatile("bnd jmpq *(%r8)"); /* Expecting: jmp indirect 0 */
+ asm volatile("bnd jmpq *(0x12345678)"); /* Expecting: jmp indirect 0 */
+ asm volatile("bnd jmpq *0x12345678(%rax,%rcx,8)"); /* Expecting: jmp indirect 0 */
+ asm volatile("bnd jmpq *0x12345678(%r8,%rcx,8)"); /* Expecting: jmp indirect 0 */
+
+ asm volatile("notrack jmpq *%rax"); /* Expecting: jmp indirect 0 */
+ asm volatile("notrack jmpq *(%rax)"); /* Expecting: jmp indirect 0 */
+ asm volatile("notrack jmpq *(%r8)"); /* Expecting: jmp indirect 0 */
+ asm volatile("notrack jmpq *(0x12345678)"); /* Expecting: jmp indirect 0 */
+ asm volatile("notrack jmpq *0x12345678(%rax,%rcx,8)"); /* Expecting: jmp indirect 0 */
+ asm volatile("notrack jmpq *0x12345678(%r8,%rcx,8)"); /* Expecting: jmp indirect 0 */
+
+ asm volatile("notrack bnd jmpq *%rax"); /* Expecting: jmp indirect 0 */
+ asm volatile("notrack bnd jmpq *(%rax)"); /* Expecting: jmp indirect 0 */
+ asm volatile("notrack bnd jmpq *(%r8)"); /* Expecting: jmp indirect 0 */
+ asm volatile("notrack bnd jmpq *(0x12345678)"); /* Expecting: jmp indirect 0 */
+ asm volatile("notrack bnd jmpq *0x12345678(%rax,%rcx,8)"); /* Expecting: jmp indirect 0 */
+ asm volatile("notrack bnd jmpq *0x12345678(%r8,%rcx,8)"); /* Expecting: jmp indirect 0 */
+
#else /* #ifdef __x86_64__ */
/* bound r32, mem (same op code as EVEX prefix) */
@@ -3434,6 +3573,103 @@ int main(void)
asm volatile("enqcmds (%si),%bx");
asm volatile("enqcmds 0x1234(%si),%cx");
+ /* incsspd */
+
+ asm volatile("incsspd %eax");
+ /* Also check instructions in the same group encoding as incsspd */
+ asm volatile("xrstor (%eax)");
+ asm volatile("xrstor (0x12345678)");
+ asm volatile("xrstor 0x12345678(%eax,%ecx,8)");
+ asm volatile("lfence");
+
+ /* rdsspd */
+
+ asm volatile("rdsspd %eax");
+
+ /* saveprevssp */
+
+ asm volatile("saveprevssp");
+
+ /* rstorssp */
+
+ asm volatile("rstorssp (%eax)");
+ asm volatile("rstorssp (0x12345678)");
+ asm volatile("rstorssp 0x12345678(%eax,%ecx,8)");
+
+ /* wrssd */
+
+ asm volatile("wrssd %ecx,(%eax)");
+ asm volatile("wrssd %edx,(0x12345678)");
+ asm volatile("wrssd %edx,0x12345678(%eax,%ecx,8)");
+
+ /* wrussd */
+
+ asm volatile("wrussd %ecx,(%eax)");
+ asm volatile("wrussd %edx,(0x12345678)");
+ asm volatile("wrussd %edx,0x12345678(%eax,%ecx,8)");
+
+ /* setssbsy */
+
+ asm volatile("setssbsy");
+ /* Also check instructions in the same group encoding as setssbsy */
+ asm volatile("rdpkru");
+ asm volatile("wrpkru");
+
+ /* clrssbsy */
+
+ asm volatile("clrssbsy (%eax)");
+ asm volatile("clrssbsy (0x12345678)");
+ asm volatile("clrssbsy 0x12345678(%eax,%ecx,8)");
+
+ /* endbr32/64 */
+
+ asm volatile("endbr32");
+ asm volatile("endbr64");
+
+ /* call with/without notrack prefix */
+
+ asm volatile("call *%eax"); /* Expecting: call indirect 0 */
+ asm volatile("call *(%eax)"); /* Expecting: call indirect 0 */
+ asm volatile("call *(0x12345678)"); /* Expecting: call indirect 0 */
+ asm volatile("call *0x12345678(%eax,%ecx,8)"); /* Expecting: call indirect 0 */
+
+ asm volatile("bnd call *%eax"); /* Expecting: call indirect 0 */
+ asm volatile("bnd call *(%eax)"); /* Expecting: call indirect 0 */
+ asm volatile("bnd call *(0x12345678)"); /* Expecting: call indirect 0 */
+ asm volatile("bnd call *0x12345678(%eax,%ecx,8)"); /* Expecting: call indirect 0 */
+
+ asm volatile("notrack call *%eax"); /* Expecting: call indirect 0 */
+ asm volatile("notrack call *(%eax)"); /* Expecting: call indirect 0 */
+ asm volatile("notrack call *(0x12345678)"); /* Expecting: call indirect 0 */
+ asm volatile("notrack call *0x12345678(%eax,%ecx,8)"); /* Expecting: call indirect 0 */
+
+ asm volatile("notrack bnd call *%eax"); /* Expecting: call indirect 0 */
+ asm volatile("notrack bnd call *(%eax)"); /* Expecting: call indirect 0 */
+ asm volatile("notrack bnd call *(0x12345678)"); /* Expecting: call indirect 0 */
+ asm volatile("notrack bnd call *0x12345678(%eax,%ecx,8)"); /* Expecting: call indirect 0 */
+
+ /* jmp with/without notrack prefix */
+
+ asm volatile("jmp *%eax"); /* Expecting: jmp indirect 0 */
+ asm volatile("jmp *(%eax)"); /* Expecting: jmp indirect 0 */
+ asm volatile("jmp *(0x12345678)"); /* Expecting: jmp indirect 0 */
+ asm volatile("jmp *0x12345678(%eax,%ecx,8)"); /* Expecting: jmp indirect 0 */
+
+ asm volatile("bnd jmp *%eax"); /* Expecting: jmp indirect 0 */
+ asm volatile("bnd jmp *(%eax)"); /* Expecting: jmp indirect 0 */
+ asm volatile("bnd jmp *(0x12345678)"); /* Expecting: jmp indirect 0 */
+ asm volatile("bnd jmp *0x12345678(%eax,%ecx,8)"); /* Expecting: jmp indirect 0 */
+
+ asm volatile("notrack jmp *%eax"); /* Expecting: jmp indirect 0 */
+ asm volatile("notrack jmp *(%eax)"); /* Expecting: jmp indirect 0 */
+ asm volatile("notrack jmp *(0x12345678)"); /* Expecting: jmp indirect 0 */
+ asm volatile("notrack jmp *0x12345678(%eax,%ecx,8)"); /* Expecting: jmp indirect 0 */
+
+ asm volatile("notrack bnd jmp *%eax"); /* Expecting: jmp indirect 0 */
+ asm volatile("notrack bnd jmp *(%eax)"); /* Expecting: jmp indirect 0 */
+ asm volatile("notrack bnd jmp *(0x12345678)"); /* Expecting: jmp indirect 0 */
+ asm volatile("notrack bnd jmp *0x12345678(%eax,%ecx,8)"); /* Expecting: jmp indirect 0 */
+
#endif /* #ifndef __x86_64__ */
/* SGX */
diff --git a/tools/perf/arch/x86/tests/perf-time-to-tsc.c b/tools/perf/arch/x86/tests/perf-time-to-tsc.c
index 909ead08a6f6..026d32ed078e 100644
--- a/tools/perf/arch/x86/tests/perf-time-to-tsc.c
+++ b/tools/perf/arch/x86/tests/perf-time-to-tsc.c
@@ -130,13 +130,11 @@ int test__perf_time_to_tsc(struct test *test __maybe_unused, int subtest __maybe
goto next_event;
if (strcmp(event->comm.comm, comm1) == 0) {
- CHECK__(perf_evsel__parse_sample(evsel, event,
- &sample));
+ CHECK__(evsel__parse_sample(evsel, event, &sample));
comm1_time = sample.time;
}
if (strcmp(event->comm.comm, comm2) == 0) {
- CHECK__(perf_evsel__parse_sample(evsel, event,
- &sample));
+ CHECK__(evsel__parse_sample(evsel, event, &sample));
comm2_time = sample.time;
}
next_event:
diff --git a/tools/perf/arch/x86/util/intel-bts.c b/tools/perf/arch/x86/util/intel-bts.c
index 09f93800bffd..0dc09b5809c1 100644
--- a/tools/perf/arch/x86/util/intel-bts.c
+++ b/tools/perf/arch/x86/util/intel-bts.c
@@ -224,7 +224,7 @@ static int intel_bts_recording_options(struct auxtrace_record *itr,
* AUX event.
*/
if (!perf_cpu_map__empty(cpus))
- perf_evsel__set_sample_bit(intel_bts_evsel, CPU);
+ evsel__set_sample_bit(intel_bts_evsel, CPU);
}
/* Add dummy event to keep tracking */
diff --git a/tools/perf/arch/x86/util/intel-pt.c b/tools/perf/arch/x86/util/intel-pt.c
index 1643aed8c4c8..839ef52c1ac2 100644
--- a/tools/perf/arch/x86/util/intel-pt.c
+++ b/tools/perf/arch/x86/util/intel-pt.c
@@ -25,6 +25,7 @@
#include "../../../util/pmu.h"
#include "../../../util/debug.h"
#include "../../../util/auxtrace.h"
+#include "../../../util/perf_api_probe.h"
#include "../../../util/record.h"
#include "../../../util/target.h"
#include "../../../util/tsc.h"
@@ -58,7 +59,8 @@ struct intel_pt_recording {
size_t priv_size;
};
-static int intel_pt_parse_terms_with_default(struct list_head *formats,
+static int intel_pt_parse_terms_with_default(const char *pmu_name,
+ struct list_head *formats,
const char *str,
u64 *config)
{
@@ -77,7 +79,8 @@ static int intel_pt_parse_terms_with_default(struct list_head *formats,
goto out_free;
attr.config = *config;
- err = perf_pmu__config_terms(formats, &attr, terms, true, NULL);
+ err = perf_pmu__config_terms(pmu_name, formats, &attr, terms, true,
+ NULL);
if (err)
goto out_free;
@@ -87,11 +90,12 @@ out_free:
return err;
}
-static int intel_pt_parse_terms(struct list_head *formats, const char *str,
- u64 *config)
+static int intel_pt_parse_terms(const char *pmu_name, struct list_head *formats,
+ const char *str, u64 *config)
{
*config = 0;
- return intel_pt_parse_terms_with_default(formats, str, config);
+ return intel_pt_parse_terms_with_default(pmu_name, formats, str,
+ config);
}
static u64 intel_pt_masked_bits(u64 mask, u64 bits)
@@ -228,7 +232,8 @@ static u64 intel_pt_default_config(struct perf_pmu *intel_pt_pmu)
pr_debug2("%s default config: %s\n", intel_pt_pmu->name, buf);
- intel_pt_parse_terms(&intel_pt_pmu->format, buf, &config);
+ intel_pt_parse_terms(intel_pt_pmu->name, &intel_pt_pmu->format, buf,
+ &config);
return config;
}
@@ -336,13 +341,16 @@ static int intel_pt_info_fill(struct auxtrace_record *itr,
if (priv_size != ptr->priv_size)
return -EINVAL;
- intel_pt_parse_terms(&intel_pt_pmu->format, "tsc", &tsc_bit);
- intel_pt_parse_terms(&intel_pt_pmu->format, "noretcomp",
- &noretcomp_bit);
- intel_pt_parse_terms(&intel_pt_pmu->format, "mtc", &mtc_bit);
+ intel_pt_parse_terms(intel_pt_pmu->name, &intel_pt_pmu->format,
+ "tsc", &tsc_bit);
+ intel_pt_parse_terms(intel_pt_pmu->name, &intel_pt_pmu->format,
+ "noretcomp", &noretcomp_bit);
+ intel_pt_parse_terms(intel_pt_pmu->name, &intel_pt_pmu->format,
+ "mtc", &mtc_bit);
mtc_freq_bits = perf_pmu__format_bits(&intel_pt_pmu->format,
"mtc_period");
- intel_pt_parse_terms(&intel_pt_pmu->format, "cyc", &cyc_bit);
+ intel_pt_parse_terms(intel_pt_pmu->name, &intel_pt_pmu->format,
+ "cyc", &cyc_bit);
intel_pt_tsc_ctc_ratio(&tsc_ctc_ratio_n, &tsc_ctc_ratio_d);
@@ -420,8 +428,8 @@ static int intel_pt_track_switches(struct evlist *evlist)
evsel = evlist__last(evlist);
- perf_evsel__set_sample_bit(evsel, CPU);
- perf_evsel__set_sample_bit(evsel, TIME);
+ evsel__set_sample_bit(evsel, CPU);
+ evsel__set_sample_bit(evsel, TIME);
evsel->core.system_wide = true;
evsel->no_aux_samples = true;
@@ -555,10 +563,9 @@ static int intel_pt_validate_config(struct perf_pmu *intel_pt_pmu,
static void intel_pt_config_sample_mode(struct perf_pmu *intel_pt_pmu,
struct evsel *evsel)
{
- struct perf_evsel_config_term *term;
u64 user_bits = 0, bits;
+ struct evsel_config_term *term = evsel__get_config_term(evsel, CFG_CHG);
- term = perf_evsel__get_config_term(evsel, CFG_CHG);
if (term)
user_bits = term->val.cfg_chg;
@@ -768,7 +775,8 @@ static int intel_pt_recording_options(struct auxtrace_record *itr,
}
}
- intel_pt_parse_terms(&intel_pt_pmu->format, "tsc", &tsc_bit);
+ intel_pt_parse_terms(intel_pt_pmu->name, &intel_pt_pmu->format,
+ "tsc", &tsc_bit);
if (opts->full_auxtrace && (intel_pt_evsel->core.attr.config & tsc_bit))
have_timing_info = true;
@@ -779,7 +787,8 @@ static int intel_pt_recording_options(struct auxtrace_record *itr,
* Per-cpu recording needs sched_switch events to distinguish different
* threads.
*/
- if (have_timing_info && !perf_cpu_map__empty(cpus)) {
+ if (have_timing_info && !perf_cpu_map__empty(cpus) &&
+ !record_opts__no_switch_events(opts)) {
if (perf_can_record_switch_events()) {
bool cpu_wide = !target__none(&opts->target) &&
!target__has_task(&opts->target);
@@ -801,10 +810,10 @@ static int intel_pt_recording_options(struct auxtrace_record *itr,
switch_evsel->no_aux_samples = true;
switch_evsel->immediate = true;
- perf_evsel__set_sample_bit(switch_evsel, TID);
- perf_evsel__set_sample_bit(switch_evsel, TIME);
- perf_evsel__set_sample_bit(switch_evsel, CPU);
- perf_evsel__reset_sample_bit(switch_evsel, BRANCH_STACK);
+ evsel__set_sample_bit(switch_evsel, TID);
+ evsel__set_sample_bit(switch_evsel, TIME);
+ evsel__set_sample_bit(switch_evsel, CPU);
+ evsel__reset_sample_bit(switch_evsel, BRANCH_STACK);
opts->record_switch_events = false;
ptr->have_sched_switch = 3;
@@ -838,7 +847,7 @@ static int intel_pt_recording_options(struct auxtrace_record *itr,
* AUX event.
*/
if (!perf_cpu_map__empty(cpus))
- perf_evsel__set_sample_bit(intel_pt_evsel, CPU);
+ evsel__set_sample_bit(intel_pt_evsel, CPU);
}
/* Add dummy event to keep tracking */
@@ -862,11 +871,11 @@ static int intel_pt_recording_options(struct auxtrace_record *itr,
/* In per-cpu case, always need the time of mmap events etc */
if (!perf_cpu_map__empty(cpus)) {
- perf_evsel__set_sample_bit(tracking_evsel, TIME);
+ evsel__set_sample_bit(tracking_evsel, TIME);
/* And the CPU for switch events */
- perf_evsel__set_sample_bit(tracking_evsel, CPU);
+ evsel__set_sample_bit(tracking_evsel, CPU);
}
- perf_evsel__reset_sample_bit(tracking_evsel, BRANCH_STACK);
+ evsel__reset_sample_bit(tracking_evsel, BRANCH_STACK);
}
/*
@@ -874,7 +883,8 @@ static int intel_pt_recording_options(struct auxtrace_record *itr,
* per-cpu with no sched_switch (except workload-only).
*/
if (!ptr->have_sched_switch && !perf_cpu_map__empty(cpus) &&
- !target__none(&opts->target))
+ !target__none(&opts->target) &&
+ !intel_pt_evsel->core.attr.exclude_user)
ui__warning("Intel Processor Trace decoding will not be possible except for kernel tracing!\n");
return 0;
diff --git a/tools/perf/arch/x86/util/kvm-stat.c b/tools/perf/arch/x86/util/kvm-stat.c
index c0775c39227f..072920475b65 100644
--- a/tools/perf/arch/x86/util/kvm-stat.c
+++ b/tools/perf/arch/x86/util/kvm-stat.c
@@ -31,8 +31,8 @@ const char *kvm_exit_trace = "kvm:kvm_exit";
static void mmio_event_get_key(struct evsel *evsel, struct perf_sample *sample,
struct event_key *key)
{
- key->key = perf_evsel__intval(evsel, sample, "gpa");
- key->info = perf_evsel__intval(evsel, sample, "type");
+ key->key = evsel__intval(evsel, sample, "gpa");
+ key->info = evsel__intval(evsel, sample, "type");
}
#define KVM_TRACE_MMIO_READ_UNSATISFIED 0
@@ -48,7 +48,7 @@ static bool mmio_event_begin(struct evsel *evsel,
/* MMIO write begin event in kernel. */
if (!strcmp(evsel->name, "kvm:kvm_mmio") &&
- perf_evsel__intval(evsel, sample, "type") == KVM_TRACE_MMIO_WRITE) {
+ evsel__intval(evsel, sample, "type") == KVM_TRACE_MMIO_WRITE) {
mmio_event_get_key(evsel, sample, key);
return true;
}
@@ -65,7 +65,7 @@ static bool mmio_event_end(struct evsel *evsel, struct perf_sample *sample,
/* MMIO read end event in kernel.*/
if (!strcmp(evsel->name, "kvm:kvm_mmio") &&
- perf_evsel__intval(evsel, sample, "type") == KVM_TRACE_MMIO_READ) {
+ evsel__intval(evsel, sample, "type") == KVM_TRACE_MMIO_READ) {
mmio_event_get_key(evsel, sample, key);
return true;
}
@@ -94,8 +94,8 @@ static void ioport_event_get_key(struct evsel *evsel,
struct perf_sample *sample,
struct event_key *key)
{
- key->key = perf_evsel__intval(evsel, sample, "port");
- key->info = perf_evsel__intval(evsel, sample, "rw");
+ key->key = evsel__intval(evsel, sample, "port");
+ key->info = evsel__intval(evsel, sample, "rw");
}
static bool ioport_event_begin(struct evsel *evsel,
diff --git a/tools/perf/arch/x86/util/unwind-libdw.c b/tools/perf/arch/x86/util/unwind-libdw.c
index fda8f4206ee4..eea2bf87232b 100644
--- a/tools/perf/arch/x86/util/unwind-libdw.c
+++ b/tools/perf/arch/x86/util/unwind-libdw.c
@@ -1,8 +1,8 @@
// SPDX-License-Identifier: GPL-2.0
#include <elfutils/libdwfl.h>
-#include "../../util/unwind-libdw.h"
-#include "../../util/perf_regs.h"
-#include "../../util/event.h"
+#include "../../../util/unwind-libdw.h"
+#include "../../../util/perf_regs.h"
+#include "../../../util/event.h"
bool libdw__arch_set_initial_registers(Dwfl_Thread *thread, void *arg)
{
diff --git a/tools/perf/bench/Build b/tools/perf/bench/Build
index e4e321b6f883..768e408757a0 100644
--- a/tools/perf/bench/Build
+++ b/tools/perf/bench/Build
@@ -6,9 +6,10 @@ perf-y += futex-wake.o
perf-y += futex-wake-parallel.o
perf-y += futex-requeue.o
perf-y += futex-lock-pi.o
-
perf-y += epoll-wait.o
perf-y += epoll-ctl.o
+perf-y += synthesize.o
+perf-y += kallsyms-parse.o
perf-$(CONFIG_X86_64) += mem-memcpy-x86-64-lib.o
perf-$(CONFIG_X86_64) += mem-memcpy-x86-64-asm.o
diff --git a/tools/perf/bench/bench.h b/tools/perf/bench/bench.h
index 4aa6de1aa67d..61cae4966cae 100644
--- a/tools/perf/bench/bench.h
+++ b/tools/perf/bench/bench.h
@@ -41,9 +41,10 @@ int bench_futex_wake_parallel(int argc, const char **argv);
int bench_futex_requeue(int argc, const char **argv);
/* pi futexes */
int bench_futex_lock_pi(int argc, const char **argv);
-
int bench_epoll_wait(int argc, const char **argv);
int bench_epoll_ctl(int argc, const char **argv);
+int bench_synthesize(int argc, const char **argv);
+int bench_kallsyms_parse(int argc, const char **argv);
#define BENCH_FORMAT_DEFAULT_STR "default"
#define BENCH_FORMAT_DEFAULT 0
diff --git a/tools/perf/bench/epoll-ctl.c b/tools/perf/bench/epoll-ctl.c
index cadc18d42aa4..ca2d591aad8a 100644
--- a/tools/perf/bench/epoll-ctl.c
+++ b/tools/perf/bench/epoll-ctl.c
@@ -5,7 +5,7 @@
* Benchmark the various operations allowed for epoll_ctl(2).
* The idea is to concurrently stress a single epoll instance
*/
-#ifdef HAVE_EVENTFD
+#ifdef HAVE_EVENTFD_SUPPORT
/* For the CLR_() macros */
#include <string.h>
#include <pthread.h>
@@ -412,4 +412,4 @@ int bench_epoll_ctl(int argc, const char **argv)
errmem:
err(EXIT_FAILURE, "calloc");
}
-#endif // HAVE_EVENTFD
+#endif // HAVE_EVENTFD_SUPPORT
diff --git a/tools/perf/bench/epoll-wait.c b/tools/perf/bench/epoll-wait.c
index f938c585d512..75dca9773186 100644
--- a/tools/perf/bench/epoll-wait.c
+++ b/tools/perf/bench/epoll-wait.c
@@ -1,5 +1,5 @@
// SPDX-License-Identifier: GPL-2.0
-#ifdef HAVE_EVENTFD
+#ifdef HAVE_EVENTFD_SUPPORT
/*
* Copyright (C) 2018 Davidlohr Bueso.
*
@@ -519,7 +519,8 @@ int bench_epoll_wait(int argc, const char **argv)
qsort(worker, nthreads, sizeof(struct worker), cmpworker);
for (i = 0; i < nthreads; i++) {
- unsigned long t = worker[i].ops / bench__runtime.tv_sec;
+ unsigned long t = bench__runtime.tv_sec > 0 ?
+ worker[i].ops / bench__runtime.tv_sec : 0;
update_stats(&throughput_stats, t);
@@ -539,4 +540,4 @@ int bench_epoll_wait(int argc, const char **argv)
errmem:
err(EXIT_FAILURE, "calloc");
}
-#endif // HAVE_EVENTFD
+#endif // HAVE_EVENTFD_SUPPORT
diff --git a/tools/perf/bench/futex-hash.c b/tools/perf/bench/futex-hash.c
index 65eebe06c04d..915bf3da7ce2 100644
--- a/tools/perf/bench/futex-hash.c
+++ b/tools/perf/bench/futex-hash.c
@@ -205,7 +205,8 @@ int bench_futex_hash(int argc, const char **argv)
pthread_mutex_destroy(&thread_lock);
for (i = 0; i < nthreads; i++) {
- unsigned long t = worker[i].ops / bench__runtime.tv_sec;
+ unsigned long t = bench__runtime.tv_sec > 0 ?
+ worker[i].ops / bench__runtime.tv_sec : 0;
update_stats(&throughput_stats, t);
if (!silent) {
if (nfutexes == 1)
diff --git a/tools/perf/bench/futex-lock-pi.c b/tools/perf/bench/futex-lock-pi.c
index 89fd8f325f38..bb25d8beb3b8 100644
--- a/tools/perf/bench/futex-lock-pi.c
+++ b/tools/perf/bench/futex-lock-pi.c
@@ -211,7 +211,8 @@ int bench_futex_lock_pi(int argc, const char **argv)
pthread_mutex_destroy(&thread_lock);
for (i = 0; i < nthreads; i++) {
- unsigned long t = worker[i].ops / bench__runtime.tv_sec;
+ unsigned long t = bench__runtime.tv_sec > 0 ?
+ worker[i].ops / bench__runtime.tv_sec : 0;
update_stats(&throughput_stats, t);
if (!silent)
diff --git a/tools/perf/bench/kallsyms-parse.c b/tools/perf/bench/kallsyms-parse.c
new file mode 100644
index 000000000000..2b0d0f980ae9
--- /dev/null
+++ b/tools/perf/bench/kallsyms-parse.c
@@ -0,0 +1,75 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Benchmark of /proc/kallsyms parsing.
+ *
+ * Copyright 2020 Google LLC.
+ */
+#include <stdlib.h>
+#include "bench.h"
+#include "../util/stat.h"
+#include <linux/time64.h>
+#include <subcmd/parse-options.h>
+#include <symbol/kallsyms.h>
+
+static unsigned int iterations = 100;
+
+static const struct option options[] = {
+ OPT_UINTEGER('i', "iterations", &iterations,
+ "Number of iterations used to compute average"),
+ OPT_END()
+};
+
+static const char *const bench_usage[] = {
+ "perf bench internals kallsyms-parse <options>",
+ NULL
+};
+
+static int bench_process_symbol(void *arg __maybe_unused,
+ const char *name __maybe_unused,
+ char type __maybe_unused,
+ u64 start __maybe_unused)
+{
+ return 0;
+}
+
+static int do_kallsyms_parse(void)
+{
+ struct timeval start, end, diff;
+ u64 runtime_us;
+ unsigned int i;
+ double time_average, time_stddev;
+ int err;
+ struct stats time_stats;
+
+ init_stats(&time_stats);
+
+ for (i = 0; i < iterations; i++) {
+ gettimeofday(&start, NULL);
+ err = kallsyms__parse("/proc/kallsyms", NULL,
+ bench_process_symbol);
+ if (err)
+ return err;
+
+ gettimeofday(&end, NULL);
+ timersub(&end, &start, &diff);
+ runtime_us = diff.tv_sec * USEC_PER_SEC + diff.tv_usec;
+ update_stats(&time_stats, runtime_us);
+ }
+
+ time_average = avg_stats(&time_stats) / USEC_PER_MSEC;
+ time_stddev = stddev_stats(&time_stats) / USEC_PER_MSEC;
+ printf(" Average kallsyms__parse took: %.3f ms (+- %.3f ms)\n",
+ time_average, time_stddev);
+ return 0;
+}
+
+int bench_kallsyms_parse(int argc, const char **argv)
+{
+ argc = parse_options(argc, argv, options, bench_usage, 0);
+ if (argc) {
+ usage_with_options(bench_usage, options);
+ exit(EXIT_FAILURE);
+ }
+
+ return do_kallsyms_parse();
+}
diff --git a/tools/perf/bench/sched-messaging.c b/tools/perf/bench/sched-messaging.c
index 97e4a4fb3362..71d830d7b923 100644
--- a/tools/perf/bench/sched-messaging.c
+++ b/tools/perf/bench/sched-messaging.c
@@ -40,7 +40,7 @@ struct sender_context {
unsigned int num_fds;
int ready_out;
int wakefd;
- int out_fds[0];
+ int out_fds[];
};
struct receiver_context {
diff --git a/tools/perf/bench/synthesize.c b/tools/perf/bench/synthesize.c
new file mode 100644
index 000000000000..8d624aea1c5e
--- /dev/null
+++ b/tools/perf/bench/synthesize.c
@@ -0,0 +1,262 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Benchmark synthesis of perf events such as at the start of a 'perf
+ * record'. Synthesis is done on the current process and the 'dummy' event
+ * handlers are invoked that support dump_trace but otherwise do nothing.
+ *
+ * Copyright 2019 Google LLC.
+ */
+#include <stdio.h>
+#include "bench.h"
+#include "../util/debug.h"
+#include "../util/session.h"
+#include "../util/stat.h"
+#include "../util/synthetic-events.h"
+#include "../util/target.h"
+#include "../util/thread_map.h"
+#include "../util/tool.h"
+#include "../util/util.h"
+#include <linux/atomic.h>
+#include <linux/err.h>
+#include <linux/time64.h>
+#include <subcmd/parse-options.h>
+
+static unsigned int min_threads = 1;
+static unsigned int max_threads = UINT_MAX;
+static unsigned int single_iterations = 10000;
+static unsigned int multi_iterations = 10;
+static bool run_st;
+static bool run_mt;
+
+static const struct option options[] = {
+ OPT_BOOLEAN('s', "st", &run_st, "Run single threaded benchmark"),
+ OPT_BOOLEAN('t', "mt", &run_mt, "Run multi-threaded benchmark"),
+ OPT_UINTEGER('m', "min-threads", &min_threads,
+ "Minimum number of threads in multithreaded bench"),
+ OPT_UINTEGER('M', "max-threads", &max_threads,
+ "Maximum number of threads in multithreaded bench"),
+ OPT_UINTEGER('i', "single-iterations", &single_iterations,
+ "Number of iterations used to compute single-threaded average"),
+ OPT_UINTEGER('I', "multi-iterations", &multi_iterations,
+ "Number of iterations used to compute multi-threaded average"),
+ OPT_END()
+};
+
+static const char *const bench_usage[] = {
+ "perf bench internals synthesize <options>",
+ NULL
+};
+
+static atomic_t event_count;
+
+static int process_synthesized_event(struct perf_tool *tool __maybe_unused,
+ union perf_event *event __maybe_unused,
+ struct perf_sample *sample __maybe_unused,
+ struct machine *machine __maybe_unused)
+{
+ atomic_inc(&event_count);
+ return 0;
+}
+
+static int do_run_single_threaded(struct perf_session *session,
+ struct perf_thread_map *threads,
+ struct target *target, bool data_mmap)
+{
+ const unsigned int nr_threads_synthesize = 1;
+ struct timeval start, end, diff;
+ u64 runtime_us;
+ unsigned int i;
+ double time_average, time_stddev, event_average, event_stddev;
+ int err;
+ struct stats time_stats, event_stats;
+
+ init_stats(&time_stats);
+ init_stats(&event_stats);
+
+ for (i = 0; i < single_iterations; i++) {
+ atomic_set(&event_count, 0);
+ gettimeofday(&start, NULL);
+ err = __machine__synthesize_threads(&session->machines.host,
+ NULL,
+ target, threads,
+ process_synthesized_event,
+ data_mmap,
+ nr_threads_synthesize);
+ if (err)
+ return err;
+
+ gettimeofday(&end, NULL);
+ timersub(&end, &start, &diff);
+ runtime_us = diff.tv_sec * USEC_PER_SEC + diff.tv_usec;
+ update_stats(&time_stats, runtime_us);
+ update_stats(&event_stats, atomic_read(&event_count));
+ }
+
+ time_average = avg_stats(&time_stats);
+ time_stddev = stddev_stats(&time_stats);
+ printf(" Average %ssynthesis took: %.3f usec (+- %.3f usec)\n",
+ data_mmap ? "data " : "", time_average, time_stddev);
+
+ event_average = avg_stats(&event_stats);
+ event_stddev = stddev_stats(&event_stats);
+ printf(" Average num. events: %.3f (+- %.3f)\n",
+ event_average, event_stddev);
+
+ printf(" Average time per event %.3f usec\n",
+ time_average / event_average);
+ return 0;
+}
+
+static int run_single_threaded(void)
+{
+ struct perf_session *session;
+ struct target target = {
+ .pid = "self",
+ };
+ struct perf_thread_map *threads;
+ int err;
+
+ perf_set_singlethreaded();
+ session = perf_session__new(NULL, false, NULL);
+ if (IS_ERR(session)) {
+ pr_err("Session creation failed.\n");
+ return PTR_ERR(session);
+ }
+ threads = thread_map__new_by_pid(getpid());
+ if (!threads) {
+ pr_err("Thread map creation failed.\n");
+ err = -ENOMEM;
+ goto err_out;
+ }
+
+ puts(
+"Computing performance of single threaded perf event synthesis by\n"
+"synthesizing events on the perf process itself:");
+
+ err = do_run_single_threaded(session, threads, &target, false);
+ if (err)
+ goto err_out;
+
+ err = do_run_single_threaded(session, threads, &target, true);
+
+err_out:
+ if (threads)
+ perf_thread_map__put(threads);
+
+ perf_session__delete(session);
+ return err;
+}
+
+static int do_run_multi_threaded(struct target *target,
+ unsigned int nr_threads_synthesize)
+{
+ struct timeval start, end, diff;
+ u64 runtime_us;
+ unsigned int i;
+ double time_average, time_stddev, event_average, event_stddev;
+ int err;
+ struct stats time_stats, event_stats;
+ struct perf_session *session;
+
+ init_stats(&time_stats);
+ init_stats(&event_stats);
+ for (i = 0; i < multi_iterations; i++) {
+ session = perf_session__new(NULL, false, NULL);
+ if (!session)
+ return -ENOMEM;
+
+ atomic_set(&event_count, 0);
+ gettimeofday(&start, NULL);
+ err = __machine__synthesize_threads(&session->machines.host,
+ NULL,
+ target, NULL,
+ process_synthesized_event,
+ false,
+ nr_threads_synthesize);
+ if (err) {
+ perf_session__delete(session);
+ return err;
+ }
+
+ gettimeofday(&end, NULL);
+ timersub(&end, &start, &diff);
+ runtime_us = diff.tv_sec * USEC_PER_SEC + diff.tv_usec;
+ update_stats(&time_stats, runtime_us);
+ update_stats(&event_stats, atomic_read(&event_count));
+ perf_session__delete(session);
+ }
+
+ time_average = avg_stats(&time_stats);
+ time_stddev = stddev_stats(&time_stats);
+ printf(" Average synthesis took: %.3f usec (+- %.3f usec)\n",
+ time_average, time_stddev);
+
+ event_average = avg_stats(&event_stats);
+ event_stddev = stddev_stats(&event_stats);
+ printf(" Average num. events: %.3f (+- %.3f)\n",
+ event_average, event_stddev);
+
+ printf(" Average time per event %.3f usec\n",
+ time_average / event_average);
+ return 0;
+}
+
+static int run_multi_threaded(void)
+{
+ struct target target = {
+ .cpu_list = "0"
+ };
+ unsigned int nr_threads_synthesize;
+ int err;
+
+ if (max_threads == UINT_MAX)
+ max_threads = sysconf(_SC_NPROCESSORS_ONLN);
+
+ puts(
+"Computing performance of multi threaded perf event synthesis by\n"
+"synthesizing events on CPU 0:");
+
+ for (nr_threads_synthesize = min_threads;
+ nr_threads_synthesize <= max_threads;
+ nr_threads_synthesize++) {
+ if (nr_threads_synthesize == 1)
+ perf_set_singlethreaded();
+ else
+ perf_set_multithreaded();
+
+ printf(" Number of synthesis threads: %u\n",
+ nr_threads_synthesize);
+
+ err = do_run_multi_threaded(&target, nr_threads_synthesize);
+ if (err)
+ return err;
+ }
+ perf_set_singlethreaded();
+ return 0;
+}
+
+int bench_synthesize(int argc, const char **argv)
+{
+ int err = 0;
+
+ argc = parse_options(argc, argv, options, bench_usage, 0);
+ if (argc) {
+ usage_with_options(bench_usage, options);
+ exit(EXIT_FAILURE);
+ }
+
+ /*
+ * If neither single threaded or multi-threaded are specified, default
+ * to running just single threaded.
+ */
+ if (!run_st && !run_mt)
+ run_st = true;
+
+ if (run_st)
+ err = run_single_threaded();
+
+ if (!err && run_mt)
+ err = run_multi_threaded();
+
+ return err;
+}
diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c
index 6c0a0412502e..4940d10074c3 100644
--- a/tools/perf/builtin-annotate.c
+++ b/tools/perf/builtin-annotate.c
@@ -212,11 +212,9 @@ static bool has_annotation(struct perf_annotate *ann)
return ui__has_annotation() || ann->use_stdio2;
}
-static int perf_evsel__add_sample(struct evsel *evsel,
- struct perf_sample *sample,
- struct addr_location *al,
- struct perf_annotate *ann,
- struct machine *machine)
+static int evsel__add_sample(struct evsel *evsel, struct perf_sample *sample,
+ struct addr_location *al, struct perf_annotate *ann,
+ struct machine *machine)
{
struct hists *hists = evsel__hists(evsel);
struct hist_entry *he;
@@ -278,7 +276,7 @@ static int process_sample_event(struct perf_tool *tool,
goto out_put;
if (!al.filtered &&
- perf_evsel__add_sample(evsel, sample, &al, ann, machine)) {
+ evsel__add_sample(evsel, sample, &al, ann, machine)) {
pr_warning("problem incrementing symbol count, "
"skipping event\n");
ret = -1;
@@ -433,11 +431,10 @@ static int __cmd_annotate(struct perf_annotate *ann)
total_nr_samples += nr_samples;
hists__collapse_resort(hists, NULL);
/* Don't sort callchain */
- perf_evsel__reset_sample_bit(pos, CALLCHAIN);
- perf_evsel__output_resort(pos, NULL);
+ evsel__reset_sample_bit(pos, CALLCHAIN);
+ evsel__output_resort(pos, NULL);
- if (symbol_conf.event_group &&
- !perf_evsel__is_group_leader(pos))
+ if (symbol_conf.event_group && !evsel__is_group_leader(pos))
continue;
hists__find_annotations(hists, pos, ann);
diff --git a/tools/perf/builtin-bench.c b/tools/perf/builtin-bench.c
index c06fe21c8613..cad31b1d3438 100644
--- a/tools/perf/builtin-bench.c
+++ b/tools/perf/builtin-bench.c
@@ -67,14 +67,20 @@ static struct bench futex_benchmarks[] = {
{ NULL, NULL, NULL }
};
-#ifdef HAVE_EVENTFD
+#ifdef HAVE_EVENTFD_SUPPORT
static struct bench epoll_benchmarks[] = {
{ "wait", "Benchmark epoll concurrent epoll_waits", bench_epoll_wait },
{ "ctl", "Benchmark epoll concurrent epoll_ctls", bench_epoll_ctl },
{ "all", "Run all futex benchmarks", NULL },
{ NULL, NULL, NULL }
};
-#endif // HAVE_EVENTFD
+#endif // HAVE_EVENTFD_SUPPORT
+
+static struct bench internals_benchmarks[] = {
+ { "synthesize", "Benchmark perf event synthesis", bench_synthesize },
+ { "kallsyms-parse", "Benchmark kallsyms parsing", bench_kallsyms_parse },
+ { NULL, NULL, NULL }
+};
struct collection {
const char *name;
@@ -89,9 +95,10 @@ static struct collection collections[] = {
{ "numa", "NUMA scheduling and MM benchmarks", numa_benchmarks },
#endif
{"futex", "Futex stressing benchmarks", futex_benchmarks },
-#ifdef HAVE_EVENTFD
+#ifdef HAVE_EVENTFD_SUPPORT
{"epoll", "Epoll stressing benchmarks", epoll_benchmarks },
#endif
+ { "internals", "Perf-internals benchmarks", internals_benchmarks },
{ "all", "All benchmarks", NULL },
{ NULL, NULL, NULL }
};
diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c
index 246ac0b4d54f..d617d5682c68 100644
--- a/tools/perf/builtin-c2c.c
+++ b/tools/perf/builtin-c2c.c
@@ -95,6 +95,7 @@ struct perf_c2c {
bool use_stdio;
bool stats_only;
bool symbol_full;
+ bool stitch_lbr;
/* HITM shared clines stats */
struct c2c_stats hitm_stats;
@@ -273,6 +274,9 @@ static int process_sample_event(struct perf_tool *tool __maybe_unused,
return -1;
}
+ if (c2c.stitch_lbr)
+ al.thread->lbr_stitch_enable = true;
+
ret = sample__resolve_callchain(sample, &callchain_cursor, NULL,
evsel, &al, sysctl_perf_event_max_stack);
if (ret)
@@ -1705,7 +1709,7 @@ static struct c2c_dimension *get_dimension(const char *name)
if (!strcmp(dim->name, name))
return dim;
- };
+ }
return NULL;
}
@@ -1921,7 +1925,7 @@ static bool he__display(struct hist_entry *he, struct c2c_stats *stats)
FILTER_HITM(tot_hitm);
default:
break;
- };
+ }
#undef FILTER_HITM
@@ -2255,8 +2259,7 @@ static void print_c2c_info(FILE *out, struct perf_session *session)
fprintf(out, "=================================================\n");
evlist__for_each_entry(evlist, evsel) {
- fprintf(out, "%-36s: %s\n", first ? " Events" : "",
- perf_evsel__name(evsel));
+ fprintf(out, "%-36s: %s\n", first ? " Events" : "", evsel__name(evsel));
first = false;
}
fprintf(out, " Cachelines sort on : %s HITMs\n",
@@ -2601,6 +2604,12 @@ static int setup_callchain(struct evlist *evlist)
}
}
+ if (c2c.stitch_lbr && (mode != CALLCHAIN_LBR)) {
+ ui__warning("Can't find LBR callchain. Switch off --stitch-lbr.\n"
+ "Please apply --call-graph lbr when recording.\n");
+ c2c.stitch_lbr = false;
+ }
+
callchain_param.record_mode = mode;
callchain_param.min_percent = 0;
return 0;
@@ -2752,6 +2761,8 @@ static int perf_c2c__report(int argc, const char **argv)
OPT_STRING('c', "coalesce", &coalesce, "coalesce fields",
"coalesce fields: pid,tid,iaddr,dso"),
OPT_BOOLEAN('f', "force", &symbol_conf.force, "don't complain, do it"),
+ OPT_BOOLEAN(0, "stitch-lbr", &c2c.stitch_lbr,
+ "Enable LBR callgraph stitching approach"),
OPT_PARENT(c2c_options),
OPT_END()
};
@@ -2876,8 +2887,15 @@ static int parse_record_events(const struct option *opt,
{
bool *event_set = (bool *) opt->value;
+ if (!strcmp(str, "list")) {
+ perf_mem_events__list();
+ exit(0);
+ }
+ if (perf_mem_events__parse(str))
+ exit(-1);
+
*event_set = true;
- return perf_mem_events__parse(str);
+ return 0;
}
@@ -2947,7 +2965,7 @@ static int perf_c2c__record(int argc, const char **argv)
rec_argv[i++] = "-e";
rec_argv[i++] = perf_mem_events__name(j);
- };
+ }
if (all_user)
rec_argv[i++] = "--all-user";
diff --git a/tools/perf/builtin-diff.c b/tools/perf/builtin-diff.c
index c03c36fde7e2..f8c9bdd8269a 100644
--- a/tools/perf/builtin-diff.c
+++ b/tools/perf/builtin-diff.c
@@ -455,6 +455,7 @@ static struct perf_diff pdiff = {
.fork = perf_event__process_fork,
.lost = perf_event__process_lost,
.namespaces = perf_event__process_namespaces,
+ .cgroup = perf_event__process_cgroup,
.ordered_events = true,
.ordering_requires_timestamps = true,
},
@@ -466,7 +467,7 @@ static struct evsel *evsel_match(struct evsel *evsel,
struct evsel *e;
evlist__for_each_entry(evlist, e) {
- if (perf_evsel__match2(evsel, e))
+ if (evsel__match2(evsel, e))
return e;
}
@@ -572,29 +573,12 @@ static void init_block_hist(struct block_hist *bh)
bh->valid = true;
}
-static int block_pair_cmp(struct hist_entry *a, struct hist_entry *b)
-{
- struct block_info *bi_a = a->block_info;
- struct block_info *bi_b = b->block_info;
- int cmp;
-
- if (!bi_a->sym || !bi_b->sym)
- return -1;
-
- cmp = strcmp(bi_a->sym->name, bi_b->sym->name);
-
- if ((!cmp) && (bi_a->start == bi_b->start) && (bi_a->end == bi_b->end))
- return 0;
-
- return -1;
-}
-
static struct hist_entry *get_block_pair(struct hist_entry *he,
struct hists *hists_pair)
{
struct rb_root_cached *root = hists_pair->entries_in;
struct rb_node *next = rb_first_cached(root);
- int cmp;
+ int64_t cmp;
while (next != NULL) {
struct hist_entry *he_pair = rb_entry(next, struct hist_entry,
@@ -602,7 +586,7 @@ static struct hist_entry *get_block_pair(struct hist_entry *he,
next = rb_next(&he_pair->rb_node_in);
- cmp = block_pair_cmp(he_pair, he);
+ cmp = __block_info__cmp(he_pair, he);
if (!cmp)
return he_pair;
}
@@ -997,7 +981,7 @@ static void data_process(void)
if (!quiet) {
fprintf(stdout, "%s# Event '%s'\n#\n", first ? "" : "\n",
- perf_evsel__name(evsel_base));
+ evsel__name(evsel_base));
}
first = false;
@@ -1006,7 +990,7 @@ static void data_process(void)
data__fprintf();
/* Don't sort callchain for perf diff */
- perf_evsel__reset_sample_bit(evsel_base, CALLCHAIN);
+ evsel__reset_sample_bit(evsel_base, CALLCHAIN);
hists__process(hists_base);
}
@@ -1578,7 +1562,7 @@ hpp__entry_pair(struct hist_entry *he, struct hist_entry *pair,
default:
BUG_ON(1);
- };
+ }
}
static void
diff --git a/tools/perf/builtin-evlist.c b/tools/perf/builtin-evlist.c
index 440501994931..98e992801251 100644
--- a/tools/perf/builtin-evlist.c
+++ b/tools/perf/builtin-evlist.c
@@ -34,7 +34,7 @@ static int __cmd_evlist(const char *file_name, struct perf_attr_details *details
return PTR_ERR(session);
evlist__for_each_entry(session->evlist, pos) {
- perf_evsel__fprintf(pos, details, stdout);
+ evsel__fprintf(pos, details, stdout);
if (pos->core.attr.type == PERF_TYPE_TRACEPOINT)
has_tracepoint = true;
diff --git a/tools/perf/builtin-ftrace.c b/tools/perf/builtin-ftrace.c
index d5adc417a4ca..2bfc1b0db536 100644
--- a/tools/perf/builtin-ftrace.c
+++ b/tools/perf/builtin-ftrace.c
@@ -45,6 +45,7 @@ struct filter_entry {
char name[];
};
+static volatile int workload_exec_errno;
static bool done;
static void sig_handler(int sig __maybe_unused)
@@ -63,7 +64,7 @@ static void ftrace__workload_exec_failed_signal(int signo __maybe_unused,
siginfo_t *info __maybe_unused,
void *ucontext __maybe_unused)
{
- /* workload_exec_errno = info->si_value.sival_int; */
+ workload_exec_errno = info->si_value.sival_int;
done = true;
}
@@ -284,10 +285,11 @@ static int __cmd_ftrace(struct perf_ftrace *ftrace, int argc, const char **argv)
.events = POLLIN,
};
- if (!perf_cap__capable(CAP_SYS_ADMIN)) {
+ if (!(perf_cap__capable(CAP_PERFMON) ||
+ perf_cap__capable(CAP_SYS_ADMIN))) {
pr_err("ftrace only works for %s!\n",
#ifdef HAVE_LIBCAP_SUPPORT
- "users with the SYS_ADMIN capability"
+ "users with the CAP_PERFMON or CAP_SYS_ADMIN capability"
#else
"root"
#endif
@@ -382,6 +384,14 @@ static int __cmd_ftrace(struct perf_ftrace *ftrace, int argc, const char **argv)
write_tracing_file("tracing_on", "0");
+ if (workload_exec_errno) {
+ const char *emsg = str_error_r(workload_exec_errno, buf, sizeof(buf));
+ /* flush stdout first so below error msg appears at the end. */
+ fflush(stdout);
+ pr_err("workload failed: %s\n", emsg);
+ goto out_close_fd;
+ }
+
/* read remaining buffer contents */
while (true) {
int n = read(trace_fd, buf, sizeof(buf));
@@ -396,7 +406,7 @@ out_close_fd:
out_reset:
reset_tracing_files(ftrace);
out:
- return done ? 0 : -1;
+ return (done && !workload_exec_errno) ? 0 : -1;
}
static int perf_ftrace_config(const char *var, const char *value, void *cb)
@@ -493,7 +503,7 @@ int cmd_ftrace(int argc, const char **argv)
argc = parse_options(argc, argv, ftrace_options, ftrace_usage,
PARSE_OPT_STOP_AT_NON_OPTION);
if (!argc && target__none(&ftrace.target))
- usage_with_options(ftrace_usage, ftrace_options);
+ ftrace.target.system_wide = true;
ret = target__validate(&ftrace.target);
if (ret) {
diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c
index 7e124a7b8bfd..4a6de4b03ac0 100644
--- a/tools/perf/builtin-inject.c
+++ b/tools/perf/builtin-inject.c
@@ -51,7 +51,7 @@ struct perf_inject {
struct event_entry {
struct list_head node;
u32 tid;
- union perf_event event[0];
+ union perf_event event[];
};
static int output_bytes(struct perf_inject *inject, void *buf, size_t sz)
@@ -536,7 +536,7 @@ static int perf_inject__sched_stat(struct perf_tool *tool,
union perf_event *event_sw;
struct perf_sample sample_sw;
struct perf_inject *inject = container_of(tool, struct perf_inject, tool);
- u32 pid = perf_evsel__intval(evsel, sample, "pid");
+ u32 pid = evsel__intval(evsel, sample, "pid");
list_for_each_entry(ent, &inject->samples, node) {
if (pid == ent->tid)
@@ -546,7 +546,7 @@ static int perf_inject__sched_stat(struct perf_tool *tool,
return 0;
found:
event_sw = &ent->event[0];
- perf_evsel__parse_sample(evsel, event_sw, &sample_sw);
+ evsel__parse_sample(evsel, event_sw, &sample_sw);
sample_sw.period = sample->period;
sample_sw.time = sample->time;
@@ -561,11 +561,10 @@ static void sig_handler(int sig __maybe_unused)
session_done = 1;
}
-static int perf_evsel__check_stype(struct evsel *evsel,
- u64 sample_type, const char *sample_msg)
+static int evsel__check_stype(struct evsel *evsel, u64 sample_type, const char *sample_msg)
{
struct perf_event_attr *attr = &evsel->core.attr;
- const char *name = perf_evsel__name(evsel);
+ const char *name = evsel__name(evsel);
if (!(attr->sample_type & sample_type)) {
pr_err("Samples for %s event do not have %s attribute set.",
@@ -622,10 +621,10 @@ static int __cmd_inject(struct perf_inject *inject)
struct evsel *evsel;
evlist__for_each_entry(session->evlist, evsel) {
- const char *name = perf_evsel__name(evsel);
+ const char *name = evsel__name(evsel);
if (!strcmp(name, "sched:sched_switch")) {
- if (perf_evsel__check_stype(evsel, PERF_SAMPLE_TID, "TID"))
+ if (evsel__check_stype(evsel, PERF_SAMPLE_TID, "TID"))
return -EINVAL;
evsel->handler = perf_inject__sched_switch;
@@ -684,14 +683,14 @@ static int __cmd_inject(struct perf_inject *inject)
perf_header__clear_feat(&session->header,
HEADER_AUXTRACE);
- if (inject->itrace_synth_opts.last_branch)
+ if (inject->itrace_synth_opts.last_branch ||
+ inject->itrace_synth_opts.add_last_branch)
perf_header__set_feat(&session->header,
HEADER_BRANCH_STACK);
evsel = perf_evlist__id2evsel_strict(session->evlist,
inject->aux_id);
if (evsel) {
- pr_debug("Deleting %s\n",
- perf_evsel__name(evsel));
+ pr_debug("Deleting %s\n", evsel__name(evsel));
evlist__remove(session->evlist, evsel);
evsel__delete(evsel);
}
diff --git a/tools/perf/builtin-kmem.c b/tools/perf/builtin-kmem.c
index 003c85f5f56c..38a5ab683ebc 100644
--- a/tools/perf/builtin-kmem.c
+++ b/tools/perf/builtin-kmem.c
@@ -169,13 +169,12 @@ static int insert_caller_stat(unsigned long call_site,
return 0;
}
-static int perf_evsel__process_alloc_event(struct evsel *evsel,
- struct perf_sample *sample)
+static int evsel__process_alloc_event(struct evsel *evsel, struct perf_sample *sample)
{
- unsigned long ptr = perf_evsel__intval(evsel, sample, "ptr"),
- call_site = perf_evsel__intval(evsel, sample, "call_site");
- int bytes_req = perf_evsel__intval(evsel, sample, "bytes_req"),
- bytes_alloc = perf_evsel__intval(evsel, sample, "bytes_alloc");
+ unsigned long ptr = evsel__intval(evsel, sample, "ptr"),
+ call_site = evsel__intval(evsel, sample, "call_site");
+ int bytes_req = evsel__intval(evsel, sample, "bytes_req"),
+ bytes_alloc = evsel__intval(evsel, sample, "bytes_alloc");
if (insert_alloc_stat(call_site, ptr, bytes_req, bytes_alloc, sample->cpu) ||
insert_caller_stat(call_site, bytes_req, bytes_alloc))
@@ -188,14 +187,13 @@ static int perf_evsel__process_alloc_event(struct evsel *evsel,
return 0;
}
-static int perf_evsel__process_alloc_node_event(struct evsel *evsel,
- struct perf_sample *sample)
+static int evsel__process_alloc_node_event(struct evsel *evsel, struct perf_sample *sample)
{
- int ret = perf_evsel__process_alloc_event(evsel, sample);
+ int ret = evsel__process_alloc_event(evsel, sample);
if (!ret) {
int node1 = cpu__get_node(sample->cpu),
- node2 = perf_evsel__intval(evsel, sample, "node");
+ node2 = evsel__intval(evsel, sample, "node");
if (node1 != node2)
nr_cross_allocs++;
@@ -232,10 +230,9 @@ static struct alloc_stat *search_alloc_stat(unsigned long ptr,
return NULL;
}
-static int perf_evsel__process_free_event(struct evsel *evsel,
- struct perf_sample *sample)
+static int evsel__process_free_event(struct evsel *evsel, struct perf_sample *sample)
{
- unsigned long ptr = perf_evsel__intval(evsel, sample, "ptr");
+ unsigned long ptr = evsel__intval(evsel, sample, "ptr");
struct alloc_stat *s_alloc, *s_caller;
s_alloc = search_alloc_stat(ptr, 0, &root_alloc_stat, ptr_cmp);
@@ -784,13 +781,12 @@ static int parse_gfp_flags(struct evsel *evsel, struct perf_sample *sample,
return 0;
}
-static int perf_evsel__process_page_alloc_event(struct evsel *evsel,
- struct perf_sample *sample)
+static int evsel__process_page_alloc_event(struct evsel *evsel, struct perf_sample *sample)
{
u64 page;
- unsigned int order = perf_evsel__intval(evsel, sample, "order");
- unsigned int gfp_flags = perf_evsel__intval(evsel, sample, "gfp_flags");
- unsigned int migrate_type = perf_evsel__intval(evsel, sample,
+ unsigned int order = evsel__intval(evsel, sample, "order");
+ unsigned int gfp_flags = evsel__intval(evsel, sample, "gfp_flags");
+ unsigned int migrate_type = evsel__intval(evsel, sample,
"migratetype");
u64 bytes = kmem_page_size << order;
u64 callsite;
@@ -802,9 +798,9 @@ static int perf_evsel__process_page_alloc_event(struct evsel *evsel,
};
if (use_pfn)
- page = perf_evsel__intval(evsel, sample, "pfn");
+ page = evsel__intval(evsel, sample, "pfn");
else
- page = perf_evsel__intval(evsel, sample, "page");
+ page = evsel__intval(evsel, sample, "page");
nr_page_allocs++;
total_page_alloc_bytes += bytes;
@@ -857,11 +853,10 @@ static int perf_evsel__process_page_alloc_event(struct evsel *evsel,
return 0;
}
-static int perf_evsel__process_page_free_event(struct evsel *evsel,
- struct perf_sample *sample)
+static int evsel__process_page_free_event(struct evsel *evsel, struct perf_sample *sample)
{
u64 page;
- unsigned int order = perf_evsel__intval(evsel, sample, "order");
+ unsigned int order = evsel__intval(evsel, sample, "order");
u64 bytes = kmem_page_size << order;
struct page_stat *pstat;
struct page_stat this = {
@@ -869,9 +864,9 @@ static int perf_evsel__process_page_free_event(struct evsel *evsel,
};
if (use_pfn)
- page = perf_evsel__intval(evsel, sample, "pfn");
+ page = evsel__intval(evsel, sample, "pfn");
else
- page = perf_evsel__intval(evsel, sample, "page");
+ page = evsel__intval(evsel, sample, "page");
nr_page_frees++;
total_page_free_bytes += bytes;
@@ -1371,15 +1366,15 @@ static int __cmd_kmem(struct perf_session *session)
struct evsel *evsel;
const struct evsel_str_handler kmem_tracepoints[] = {
/* slab allocator */
- { "kmem:kmalloc", perf_evsel__process_alloc_event, },
- { "kmem:kmem_cache_alloc", perf_evsel__process_alloc_event, },
- { "kmem:kmalloc_node", perf_evsel__process_alloc_node_event, },
- { "kmem:kmem_cache_alloc_node", perf_evsel__process_alloc_node_event, },
- { "kmem:kfree", perf_evsel__process_free_event, },
- { "kmem:kmem_cache_free", perf_evsel__process_free_event, },
+ { "kmem:kmalloc", evsel__process_alloc_event, },
+ { "kmem:kmem_cache_alloc", evsel__process_alloc_event, },
+ { "kmem:kmalloc_node", evsel__process_alloc_node_event, },
+ { "kmem:kmem_cache_alloc_node", evsel__process_alloc_node_event, },
+ { "kmem:kfree", evsel__process_free_event, },
+ { "kmem:kmem_cache_free", evsel__process_free_event, },
/* page allocator */
- { "kmem:mm_page_alloc", perf_evsel__process_page_alloc_event, },
- { "kmem:mm_page_free", perf_evsel__process_page_free_event, },
+ { "kmem:mm_page_alloc", evsel__process_page_alloc_event, },
+ { "kmem:mm_page_free", evsel__process_page_free_event, },
};
if (!perf_session__has_traces(session, "kmem record"))
@@ -1391,8 +1386,8 @@ static int __cmd_kmem(struct perf_session *session)
}
evlist__for_each_entry(session->evlist, evsel) {
- if (!strcmp(perf_evsel__name(evsel), "kmem:mm_page_alloc") &&
- perf_evsel__field(evsel, "pfn")) {
+ if (!strcmp(evsel__name(evsel), "kmem:mm_page_alloc") &&
+ evsel__field(evsel, "pfn")) {
use_pfn = true;
break;
}
diff --git a/tools/perf/builtin-kvm.c b/tools/perf/builtin-kvm.c
index 577af4f3297a..95a77058023e 100644
--- a/tools/perf/builtin-kvm.c
+++ b/tools/perf/builtin-kvm.c
@@ -69,7 +69,7 @@ void exit_event_get_key(struct evsel *evsel,
struct event_key *key)
{
key->info = 0;
- key->key = perf_evsel__intval(evsel, sample, kvm_exit_reason);
+ key->key = evsel__intval(evsel, sample, kvm_exit_reason);
}
bool kvm_exit_event(struct evsel *evsel)
@@ -416,8 +416,7 @@ struct vcpu_event_record *per_vcpu_record(struct thread *thread,
return NULL;
}
- vcpu_record->vcpu_id = perf_evsel__intval(evsel, sample,
- vcpu_id_str);
+ vcpu_record->vcpu_id = evsel__intval(evsel, sample, vcpu_id_str);
thread__set_priv(thread, vcpu_record);
}
@@ -1033,16 +1032,16 @@ static int kvm_live_open_events(struct perf_kvm_stat *kvm)
struct perf_event_attr *attr = &pos->core.attr;
/* make sure these *are* set */
- perf_evsel__set_sample_bit(pos, TID);
- perf_evsel__set_sample_bit(pos, TIME);
- perf_evsel__set_sample_bit(pos, CPU);
- perf_evsel__set_sample_bit(pos, RAW);
+ evsel__set_sample_bit(pos, TID);
+ evsel__set_sample_bit(pos, TIME);
+ evsel__set_sample_bit(pos, CPU);
+ evsel__set_sample_bit(pos, RAW);
/* make sure these are *not*; want as small a sample as possible */
- perf_evsel__reset_sample_bit(pos, PERIOD);
- perf_evsel__reset_sample_bit(pos, IP);
- perf_evsel__reset_sample_bit(pos, CALLCHAIN);
- perf_evsel__reset_sample_bit(pos, ADDR);
- perf_evsel__reset_sample_bit(pos, READ);
+ evsel__reset_sample_bit(pos, PERIOD);
+ evsel__reset_sample_bit(pos, IP);
+ evsel__reset_sample_bit(pos, CALLCHAIN);
+ evsel__reset_sample_bit(pos, ADDR);
+ evsel__reset_sample_bit(pos, READ);
attr->mmap = 0;
attr->comm = 0;
attr->task = 0;
diff --git a/tools/perf/builtin-list.c b/tools/perf/builtin-list.c
index 965ef017496f..0a7fe4cb5555 100644
--- a/tools/perf/builtin-list.c
+++ b/tools/perf/builtin-list.c
@@ -42,7 +42,7 @@ int cmd_list(int argc, const char **argv)
OPT_END()
};
const char * const list_usage[] = {
- "perf list [<options>] [hw|sw|cache|tracepoint|pmu|sdt|event_glob]",
+ "perf list [<options>] [hw|sw|cache|tracepoint|pmu|sdt|metric|metricgroup|event_glob]",
NULL
};
diff --git a/tools/perf/builtin-lock.c b/tools/perf/builtin-lock.c
index 474dfd59d7eb..f0a1dbacb46c 100644
--- a/tools/perf/builtin-lock.c
+++ b/tools/perf/builtin-lock.c
@@ -48,7 +48,7 @@ struct lock_stat {
struct rb_node rb; /* used for sorting */
/*
- * FIXME: perf_evsel__intval() returns u64,
+ * FIXME: evsel__intval() returns u64,
* so address of lockdep_map should be dealed as 64bit.
* Is there more better solution?
*/
@@ -404,9 +404,9 @@ static int report_lock_acquire_event(struct evsel *evsel,
struct lock_stat *ls;
struct thread_stat *ts;
struct lock_seq_stat *seq;
- const char *name = perf_evsel__strval(evsel, sample, "name");
- u64 tmp = perf_evsel__intval(evsel, sample, "lockdep_addr");
- int flag = perf_evsel__intval(evsel, sample, "flag");
+ const char *name = evsel__strval(evsel, sample, "name");
+ u64 tmp = evsel__intval(evsel, sample, "lockdep_addr");
+ int flag = evsel__intval(evsel, sample, "flag");
memcpy(&addr, &tmp, sizeof(void *));
@@ -477,8 +477,8 @@ static int report_lock_acquired_event(struct evsel *evsel,
struct thread_stat *ts;
struct lock_seq_stat *seq;
u64 contended_term;
- const char *name = perf_evsel__strval(evsel, sample, "name");
- u64 tmp = perf_evsel__intval(evsel, sample, "lockdep_addr");
+ const char *name = evsel__strval(evsel, sample, "name");
+ u64 tmp = evsel__intval(evsel, sample, "lockdep_addr");
memcpy(&addr, &tmp, sizeof(void *));
@@ -539,8 +539,8 @@ static int report_lock_contended_event(struct evsel *evsel,
struct lock_stat *ls;
struct thread_stat *ts;
struct lock_seq_stat *seq;
- const char *name = perf_evsel__strval(evsel, sample, "name");
- u64 tmp = perf_evsel__intval(evsel, sample, "lockdep_addr");
+ const char *name = evsel__strval(evsel, sample, "name");
+ u64 tmp = evsel__intval(evsel, sample, "lockdep_addr");
memcpy(&addr, &tmp, sizeof(void *));
@@ -594,8 +594,8 @@ static int report_lock_release_event(struct evsel *evsel,
struct lock_stat *ls;
struct thread_stat *ts;
struct lock_seq_stat *seq;
- const char *name = perf_evsel__strval(evsel, sample, "name");
- u64 tmp = perf_evsel__intval(evsel, sample, "lockdep_addr");
+ const char *name = evsel__strval(evsel, sample, "name");
+ u64 tmp = evsel__intval(evsel, sample, "lockdep_addr");
memcpy(&addr, &tmp, sizeof(void *));
@@ -657,32 +657,28 @@ static struct trace_lock_handler report_lock_ops = {
static struct trace_lock_handler *trace_handler;
-static int perf_evsel__process_lock_acquire(struct evsel *evsel,
- struct perf_sample *sample)
+static int evsel__process_lock_acquire(struct evsel *evsel, struct perf_sample *sample)
{
if (trace_handler->acquire_event)
return trace_handler->acquire_event(evsel, sample);
return 0;
}
-static int perf_evsel__process_lock_acquired(struct evsel *evsel,
- struct perf_sample *sample)
+static int evsel__process_lock_acquired(struct evsel *evsel, struct perf_sample *sample)
{
if (trace_handler->acquired_event)
return trace_handler->acquired_event(evsel, sample);
return 0;
}
-static int perf_evsel__process_lock_contended(struct evsel *evsel,
- struct perf_sample *sample)
+static int evsel__process_lock_contended(struct evsel *evsel, struct perf_sample *sample)
{
if (trace_handler->contended_event)
return trace_handler->contended_event(evsel, sample);
return 0;
}
-static int perf_evsel__process_lock_release(struct evsel *evsel,
- struct perf_sample *sample)
+static int evsel__process_lock_release(struct evsel *evsel, struct perf_sample *sample)
{
if (trace_handler->release_event)
return trace_handler->release_event(evsel, sample);
@@ -775,7 +771,7 @@ static void dump_threads(void)
pr_info("%10d: %s\n", st->tid, thread__comm_str(t));
node = rb_next(node);
thread__put(t);
- };
+ }
}
static void dump_map(void)
@@ -849,10 +845,10 @@ static void sort_result(void)
}
static const struct evsel_str_handler lock_tracepoints[] = {
- { "lock:lock_acquire", perf_evsel__process_lock_acquire, }, /* CONFIG_LOCKDEP */
- { "lock:lock_acquired", perf_evsel__process_lock_acquired, }, /* CONFIG_LOCKDEP, CONFIG_LOCK_STAT */
- { "lock:lock_contended", perf_evsel__process_lock_contended, }, /* CONFIG_LOCKDEP, CONFIG_LOCK_STAT */
- { "lock:lock_release", perf_evsel__process_lock_release, }, /* CONFIG_LOCKDEP */
+ { "lock:lock_acquire", evsel__process_lock_acquire, }, /* CONFIG_LOCKDEP */
+ { "lock:lock_acquired", evsel__process_lock_acquired, }, /* CONFIG_LOCKDEP, CONFIG_LOCK_STAT */
+ { "lock:lock_contended", evsel__process_lock_contended, }, /* CONFIG_LOCKDEP, CONFIG_LOCK_STAT */
+ { "lock:lock_release", evsel__process_lock_release, }, /* CONFIG_LOCKDEP */
};
static bool force;
diff --git a/tools/perf/builtin-mem.c b/tools/perf/builtin-mem.c
index a13f5817d6fc..3523279af6af 100644
--- a/tools/perf/builtin-mem.c
+++ b/tools/perf/builtin-mem.c
@@ -38,26 +38,16 @@ static int parse_record_events(const struct option *opt,
const char *str, int unset __maybe_unused)
{
struct perf_mem *mem = *(struct perf_mem **)opt->value;
- int j;
- if (strcmp(str, "list")) {
- if (!perf_mem_events__parse(str)) {
- mem->operation = 0;
- return 0;
- }
- exit(-1);
+ if (!strcmp(str, "list")) {
+ perf_mem_events__list();
+ exit(0);
}
+ if (perf_mem_events__parse(str))
+ exit(-1);
- for (j = 0; j < PERF_MEM_EVENTS__MAX; j++) {
- struct perf_mem_event *e = &perf_mem_events[j];
-
- fprintf(stderr, "%-13s%-*s%s\n",
- e->tag,
- verbose > 0 ? 25 : 0,
- verbose > 0 ? perf_mem_events__name(j) : "",
- e->supported ? ": available" : "");
- }
- exit(0);
+ mem->operation = 0;
+ return 0;
}
static const char * const __usage[] = {
@@ -123,7 +113,7 @@ static int __cmd_record(int argc, const char **argv, struct perf_mem *mem)
rec_argv[i++] = "-e";
rec_argv[i++] = perf_mem_events__name(j);
- };
+ }
if (all_user)
rec_argv[i++] = "--all-user";
diff --git a/tools/perf/builtin-probe.c b/tools/perf/builtin-probe.c
index 70548df2abb9..6b1507566770 100644
--- a/tools/perf/builtin-probe.c
+++ b/tools/perf/builtin-probe.c
@@ -364,6 +364,9 @@ static int perf_add_probe_events(struct perf_probe_event *pevs, int npevs)
for (k = 0; k < pev->ntevs; k++) {
struct probe_trace_event *tev = &pev->tevs[k];
+ /* Skipped events have no event name */
+ if (!tev->event)
+ continue;
/* We use tev's name for showing new events */
show_perf_probe_event(tev->group, tev->event, pev,
diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index 4c301466101b..e108d90ae2ed 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -34,6 +34,7 @@
#include "util/tsc.h"
#include "util/parse-branch-options.h"
#include "util/parse-regs-options.h"
+#include "util/perf_api_probe.h"
#include "util/llvm-utils.h"
#include "util/bpf-loader.h"
#include "util/trigger.h"
@@ -43,6 +44,8 @@
#include "util/time-utils.h"
#include "util/units.h"
#include "util/bpf-event.h"
+#include "util/util.h"
+#include "util/pfm.h"
#include "asm/bug.h"
#include "perf.h"
@@ -50,9 +53,13 @@
#include <inttypes.h>
#include <locale.h>
#include <poll.h>
+#include <pthread.h>
#include <unistd.h>
#include <sched.h>
#include <signal.h>
+#ifdef HAVE_EVENTFD_SUPPORT
+#include <sys/eventfd.h>
+#endif
#include <sys/mman.h>
#include <sys/wait.h>
#include <sys/types.h>
@@ -84,7 +91,10 @@ struct record {
struct auxtrace_record *itr;
struct evlist *evlist;
struct perf_session *session;
+ struct evlist *sb_evlist;
+ pthread_t thread_id;
int realtime_prio;
+ bool switch_output_event_set;
bool no_buildid;
bool no_buildid_set;
bool no_buildid_cache;
@@ -503,6 +513,20 @@ static int process_synthesized_event(struct perf_tool *tool,
return record__write(rec, NULL, event, event->header.size);
}
+static int process_locked_synthesized_event(struct perf_tool *tool,
+ union perf_event *event,
+ struct perf_sample *sample __maybe_unused,
+ struct machine *machine __maybe_unused)
+{
+ static pthread_mutex_t synth_lock = PTHREAD_MUTEX_INITIALIZER;
+ int ret;
+
+ pthread_mutex_lock(&synth_lock);
+ ret = process_synthesized_event(tool, event, sample, machine);
+ pthread_mutex_unlock(&synth_lock);
+ return ret;
+}
+
static int record__pushfn(struct mmap *map, void *to, void *bf, size_t size)
{
struct record *rec = to;
@@ -518,6 +542,9 @@ static int record__pushfn(struct mmap *map, void *to, void *bf, size_t size)
static volatile int signr = -1;
static volatile int child_finished;
+#ifdef HAVE_EVENTFD_SUPPORT
+static int done_fd = -1;
+#endif
static void sig_handler(int sig)
{
@@ -527,6 +554,21 @@ static void sig_handler(int sig)
signr = sig;
done = 1;
+#ifdef HAVE_EVENTFD_SUPPORT
+{
+ u64 tmp = 1;
+ /*
+ * It is possible for this signal handler to run after done is checked
+ * in the main loop, but before the perf counter fds are polled. If this
+ * happens, the poll() will continue to wait even though done is set,
+ * and will only break out if either another signal is received, or the
+ * counters are ready for read. To ensure the poll() doesn't sleep when
+ * done is set, use an eventfd (done_fd) to wake up the poll().
+ */
+ if (write(done_fd, &tmp, sizeof(tmp)) < 0)
+ pr_err("failed to signal wakeup fd, error: %m\n");
+}
+#endif // HAVE_EVENTFD_SUPPORT
}
static void sigsegv_handler(int sig)
@@ -805,19 +847,28 @@ static int record__open(struct record *rec)
int rc = 0;
/*
- * For initial_delay we need to add a dummy event so that we can track
- * PERF_RECORD_MMAP while we wait for the initial delay to enable the
- * real events, the ones asked by the user.
+ * For initial_delay or system wide, we need to add a dummy event so
+ * that we can track PERF_RECORD_MMAP to cover the delay of waiting or
+ * event synthesis.
*/
- if (opts->initial_delay) {
+ if (opts->initial_delay || target__has_cpu(&opts->target)) {
if (perf_evlist__add_dummy(evlist))
return -ENOMEM;
+ /* Disable tracking of mmaps on lead event. */
pos = evlist__first(evlist);
pos->tracking = 0;
+ /* Set up dummy event. */
pos = evlist__last(evlist);
pos->tracking = 1;
- pos->core.attr.enable_on_exec = 1;
+ /*
+ * Enable the dummy event when the process is forked for
+ * initial_delay, immediately for system wide.
+ */
+ if (opts->initial_delay)
+ pos->core.attr.enable_on_exec = 1;
+ else
+ pos->immediate = 1;
}
perf_evlist__config(evlist, opts, &callchain_param);
@@ -825,7 +876,7 @@ static int record__open(struct record *rec)
evlist__for_each_entry(evlist, pos) {
try_again:
if (evsel__open(pos, pos->core.cpus, pos->core.threads) < 0) {
- if (perf_evsel__fallback(pos, errno, msg, sizeof(msg))) {
+ if (evsel__fallback(pos, errno, msg, sizeof(msg))) {
if (verbose > 0)
ui__warning("%s\n", msg);
goto try_again;
@@ -837,8 +888,7 @@ try_again:
goto try_again;
}
rc = -errno;
- perf_evsel__open_strerror(pos, &opts->target,
- errno, msg, sizeof(msg));
+ evsel__open_strerror(pos, &opts->target, errno, msg, sizeof(msg));
ui__error("%s\n", msg);
goto out;
}
@@ -859,7 +909,7 @@ try_again:
if (perf_evlist__apply_filters(evlist, &pos)) {
pr_err("failed to set filter \"%s\" on event %s with %d (%s)\n",
- pos->filter, perf_evsel__name(pos), errno,
+ pos->filter, evsel__name(pos), errno,
str_error_r(errno, msg, sizeof(msg)));
rc = -1;
goto out;
@@ -1288,6 +1338,7 @@ static int record__synthesize(struct record *rec, bool tail)
struct perf_tool *tool = &rec->tool;
int fd = perf_data__fd(data);
int err = 0;
+ event_op f = process_synthesized_event;
if (rec->opts.tail_synthesize != tail)
return 0;
@@ -1397,13 +1448,72 @@ static int record__synthesize(struct record *rec, bool tail)
if (err < 0)
pr_warning("Couldn't synthesize bpf events.\n");
+ err = perf_event__synthesize_cgroups(tool, process_synthesized_event,
+ machine);
+ if (err < 0)
+ pr_warning("Couldn't synthesize cgroup events.\n");
+
+ if (rec->opts.nr_threads_synthesize > 1) {
+ perf_set_multithreaded();
+ f = process_locked_synthesized_event;
+ }
+
err = __machine__synthesize_threads(machine, tool, &opts->target, rec->evlist->core.threads,
- process_synthesized_event, opts->sample_address,
- 1);
+ f, opts->sample_address,
+ rec->opts.nr_threads_synthesize);
+
+ if (rec->opts.nr_threads_synthesize > 1)
+ perf_set_singlethreaded();
+
out:
return err;
}
+static int record__process_signal_event(union perf_event *event __maybe_unused, void *data)
+{
+ struct record *rec = data;
+ pthread_kill(rec->thread_id, SIGUSR2);
+ return 0;
+}
+
+static int record__setup_sb_evlist(struct record *rec)
+{
+ struct record_opts *opts = &rec->opts;
+
+ if (rec->sb_evlist != NULL) {
+ /*
+ * We get here if --switch-output-event populated the
+ * sb_evlist, so associate a callback that will send a SIGUSR2
+ * to the main thread.
+ */
+ evlist__set_cb(rec->sb_evlist, record__process_signal_event, rec);
+ rec->thread_id = pthread_self();
+ }
+
+ if (!opts->no_bpf_event) {
+ if (rec->sb_evlist == NULL) {
+ rec->sb_evlist = evlist__new();
+
+ if (rec->sb_evlist == NULL) {
+ pr_err("Couldn't create side band evlist.\n.");
+ return -1;
+ }
+ }
+
+ if (evlist__add_bpf_sb_event(rec->sb_evlist, &rec->session->header.env)) {
+ pr_err("Couldn't ask for PERF_RECORD_BPF_EVENT side band events.\n.");
+ return -1;
+ }
+ }
+
+ if (perf_evlist__start_sb_thread(rec->sb_evlist, &rec->opts.target)) {
+ pr_debug("Couldn't start the BPF side band thread:\nBPF programs starting from now on won't be annotatable\n");
+ opts->no_bpf_event = true;
+ }
+
+ return 0;
+}
+
static int __cmd_record(struct record *rec, int argc, const char **argv)
{
int err;
@@ -1415,7 +1525,6 @@ static int __cmd_record(struct record *rec, int argc, const char **argv)
struct perf_data *data = &rec->data;
struct perf_session *session;
bool disabled = false, draining = false;
- struct evlist *sb_evlist = NULL;
int fd;
float ratio = 0;
@@ -1428,6 +1537,15 @@ static int __cmd_record(struct record *rec, int argc, const char **argv)
if (rec->opts.record_namespaces)
tool->namespace_events = true;
+ if (rec->opts.record_cgroup) {
+#ifdef HAVE_FILE_HANDLE
+ tool->cgroup_events = true;
+#else
+ pr_err("cgroup tracking is not supported\n");
+ return -1;
+#endif
+ }
+
if (rec->opts.auxtrace_snapshot_mode || rec->switch_output.enabled) {
signal(SIGUSR2, snapshot_sig_handler);
if (rec->opts.auxtrace_snapshot_mode)
@@ -1451,6 +1569,20 @@ static int __cmd_record(struct record *rec, int argc, const char **argv)
pr_err("Compression initialization failed.\n");
return -1;
}
+#ifdef HAVE_EVENTFD_SUPPORT
+ done_fd = eventfd(0, EFD_NONBLOCK);
+ if (done_fd < 0) {
+ pr_err("Failed to create wakeup eventfd, error: %m\n");
+ status = -1;
+ goto out_delete_session;
+ }
+ err = evlist__add_pollfd(rec->evlist, done_fd);
+ if (err < 0) {
+ pr_err("Failed to add wakeup eventfd to poll list\n");
+ status = err;
+ goto out_delete_session;
+ }
+#endif // HAVE_EVENTFD_SUPPORT
session->header.env.comp_type = PERF_COMP_ZSTD;
session->header.env.comp_level = rec->opts.comp_level;
@@ -1532,21 +1664,17 @@ static int __cmd_record(struct record *rec, int argc, const char **argv)
goto out_child;
}
+ err = -1;
if (!rec->no_buildid
&& !perf_header__has_feat(&session->header, HEADER_BUILD_ID)) {
pr_err("Couldn't generate buildids. "
"Use --no-buildid to profile anyway.\n");
- err = -1;
goto out_child;
}
- if (!opts->no_bpf_event)
- bpf_event__add_sb_event(&sb_evlist, &session->header.env);
-
- if (perf_evlist__start_sb_thread(sb_evlist, &rec->opts.target)) {
- pr_debug("Couldn't start the BPF side band thread:\nBPF programs starting from now on won't be annotatable\n");
- opts->no_bpf_event = true;
- }
+ err = record__setup_sb_evlist(rec);
+ if (err)
+ goto out_child;
err = record__synthesize(rec, false);
if (err < 0)
@@ -1813,11 +1941,15 @@ out_child:
}
out_delete_session:
+#ifdef HAVE_EVENTFD_SUPPORT
+ if (done_fd >= 0)
+ close(done_fd);
+#endif
zstd_fini(&session->zstd_data);
perf_session__delete(session);
if (!opts->no_bpf_event)
- perf_evlist__stop_sb_thread(sb_evlist);
+ perf_evlist__stop_sb_thread(rec->sb_evlist);
return status;
}
@@ -2128,10 +2260,19 @@ static int switch_output_setup(struct record *rec)
};
unsigned long val;
+ /*
+ * If we're using --switch-output-events, then we imply its
+ * --switch-output=signal, as we'll send a SIGUSR2 from the side band
+ * thread to its parent.
+ */
+ if (rec->switch_output_event_set)
+ goto do_signal;
+
if (!s->set)
return 0;
if (!strcmp(s->str, "signal")) {
+do_signal:
s->signal = true;
pr_debug("switch-output with SIGUSR2 signal\n");
goto enabled;
@@ -2218,6 +2359,7 @@ static struct record record = {
.default_per_cpu = true,
},
.mmap_flush = MMAP_FLUSH_DEFAULT,
+ .nr_threads_synthesize = 1,
},
.tool = {
.sample = process_sample_event,
@@ -2358,8 +2500,11 @@ static struct option __record_options[] = {
"per thread proc mmap processing timeout in ms"),
OPT_BOOLEAN(0, "namespaces", &record.opts.record_namespaces,
"Record namespaces events"),
- OPT_BOOLEAN(0, "switch-events", &record.opts.record_switch_events,
- "Record context switch events"),
+ OPT_BOOLEAN(0, "all-cgroups", &record.opts.record_cgroup,
+ "Record cgroup events"),
+ OPT_BOOLEAN_SET(0, "switch-events", &record.opts.record_switch_events,
+ &record.opts.record_switch_events_set,
+ "Record context switch events"),
OPT_BOOLEAN_FLAG(0, "all-kernel", &record.opts.all_kernel,
"Configure all used events to run in kernel space.",
PARSE_OPT_EXCLUSIVE),
@@ -2386,6 +2531,9 @@ static struct option __record_options[] = {
&record.switch_output.set, "signal or size[BKMG] or time[smhd]",
"Switch output when receiving SIGUSR2 (signal) or cross a size or time threshold",
"signal"),
+ OPT_CALLBACK_SET(0, "switch-output-event", &record.sb_evlist, &record.switch_output_event_set, "switch output event",
+ "switch output event selector. use 'perf list' to list available events",
+ parse_events_option_new_evlist),
OPT_INTEGER(0, "switch-max-files", &record.switch_output.num_files,
"Limit number of switch output generated files"),
OPT_BOOLEAN(0, "dry-run", &dry_run,
@@ -2405,6 +2553,14 @@ static struct option __record_options[] = {
#endif
OPT_CALLBACK(0, "max-size", &record.output_max_size,
"size", "Limit the maximum size of the output file", parse_output_max_size),
+ OPT_UINTEGER(0, "num-thread-synthesize",
+ &record.opts.nr_threads_synthesize,
+ "number of threads to run for event synthesis"),
+#ifdef HAVE_LIBPFM
+ OPT_CALLBACK(0, "pfm-events", &record.evlist, "event",
+ "libpfm4 event selector. use 'perf list' to list available events",
+ parse_libpfm_events_option),
+#endif
OPT_END()
};
diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c
index 72a12b69f120..b63b3fb2de70 100644
--- a/tools/perf/builtin-report.c
+++ b/tools/perf/builtin-report.c
@@ -47,7 +47,6 @@
#include "util/time-utils.h"
#include "util/auxtrace.h"
#include "util/units.h"
-#include "util/branch.h"
#include "util/util.h" // perf_tip()
#include "ui/ui.h"
#include "ui/progress.h"
@@ -84,6 +83,7 @@ struct report {
bool header_only;
bool nonany_branch_mode;
bool group_set;
+ bool stitch_lbr;
int max_stack;
struct perf_read_values show_threads_values;
struct annotation_options annotation_opts;
@@ -104,6 +104,7 @@ struct report {
bool symbol_ipc;
bool total_cycles_mode;
struct block_report *block_reports;
+ int nr_block_reports;
};
static int report__config(const char *var, const char *value, void *cb)
@@ -185,24 +186,23 @@ static int hist_iter__branch_callback(struct hist_entry_iter *iter,
{
struct hist_entry *he = iter->he;
struct report *rep = arg;
- struct branch_info *bi;
+ struct branch_info *bi = he->branch_info;
struct perf_sample *sample = iter->sample;
struct evsel *evsel = iter->evsel;
int err;
+ branch_type_count(&rep->brtype_stat, &bi->flags,
+ bi->from.addr, bi->to.addr);
+
if (!ui__has_annotation() && !rep->symbol_ipc)
return 0;
- bi = he->branch_info;
err = addr_map_symbol__inc_samples(&bi->from, sample, evsel);
if (err)
goto out;
err = addr_map_symbol__inc_samples(&bi->to, sample, evsel);
- branch_type_count(&rep->brtype_stat, &bi->flags,
- bi->from.addr, bi->to.addr);
-
out:
return err;
}
@@ -267,6 +267,9 @@ static int process_sample_event(struct perf_tool *tool,
return -1;
}
+ if (rep->stitch_lbr)
+ al.thread->lbr_stitch_enable = true;
+
if (symbol_conf.hide_unresolved && al.sym == NULL)
goto out_put;
@@ -317,7 +320,7 @@ static int process_read_event(struct perf_tool *tool,
struct report *rep = container_of(tool, struct report, tool);
if (rep->show_threads) {
- const char *name = perf_evsel__name(evsel);
+ const char *name = evsel__name(evsel);
int err = perf_read_values_add_value(&rep->show_threads_values,
event->read.pid, event->read.tid,
evsel->idx,
@@ -339,12 +342,14 @@ static int report__setup_sample_type(struct report *rep)
bool is_pipe = perf_data__is_pipe(session->data);
if (session->itrace_synth_opts->callchain ||
+ session->itrace_synth_opts->add_callchain ||
(!is_pipe &&
perf_header__has_feat(&session->header, HEADER_AUXTRACE) &&
!session->itrace_synth_opts->set))
sample_type |= PERF_SAMPLE_CALLCHAIN;
- if (session->itrace_synth_opts->last_branch)
+ if (session->itrace_synth_opts->last_branch ||
+ session->itrace_synth_opts->add_last_branch)
sample_type |= PERF_SAMPLE_BRANCH_STACK;
if (!is_pipe && !(sample_type & PERF_SAMPLE_CALLCHAIN)) {
@@ -396,15 +401,12 @@ static int report__setup_sample_type(struct report *rep)
}
}
- if (symbol_conf.use_callchain || symbol_conf.cumulate_callchain) {
- if ((sample_type & PERF_SAMPLE_REGS_USER) &&
- (sample_type & PERF_SAMPLE_STACK_USER)) {
- callchain_param.record_mode = CALLCHAIN_DWARF;
- dwarf_callchain_users = true;
- } else if (sample_type & PERF_SAMPLE_BRANCH_STACK)
- callchain_param.record_mode = CALLCHAIN_LBR;
- else
- callchain_param.record_mode = CALLCHAIN_FP;
+ callchain_param_setup(sample_type);
+
+ if (rep->stitch_lbr && (callchain_param.record_mode != CALLCHAIN_LBR)) {
+ ui__warning("Can't find LBR callchain. Switch off --stitch-lbr.\n"
+ "Please apply --call-graph lbr when recording.\n");
+ rep->stitch_lbr = false;
}
/* ??? handle more cases than just ANY? */
@@ -447,10 +449,10 @@ static size_t hists__fprintf_nr_sample_events(struct hists *hists, struct report
nr_events = hists->stats.total_non_filtered_period;
}
- if (perf_evsel__is_group_event(evsel)) {
+ if (evsel__is_group_event(evsel)) {
struct evsel *pos;
- perf_evsel__group_desc(evsel, buf, size);
+ evsel__group_desc(evsel, buf, size);
evname = buf;
for_each_group_member(pos, evsel) {
@@ -525,10 +527,9 @@ static int perf_evlist__tty_browse_hists(struct evlist *evlist,
evlist__for_each_entry(evlist, pos) {
struct hists *hists = evsel__hists(pos);
- const char *evname = perf_evsel__name(pos);
+ const char *evname = evsel__name(pos);
- if (symbol_conf.event_group &&
- !perf_evsel__is_group_leader(pos))
+ if (symbol_conf.event_group && !evsel__is_group_leader(pos))
continue;
hists__fprintf_nr_sample_events(hists, rep, evname, stdout);
@@ -635,7 +636,7 @@ static int report__browse_hists(struct report *rep)
* Usually "ret" is the last pressed key, and we only
* care if the key notifies us to switch data file.
*/
- if (ret != K_SWITCH_INPUT_DATA)
+ if (ret != K_SWITCH_INPUT_DATA && ret != K_RELOAD)
ret = 0;
break;
case 2:
@@ -670,8 +671,7 @@ static int report__collapse_hists(struct report *rep)
break;
/* Non-group events are considered as leader */
- if (symbol_conf.event_group &&
- !perf_evsel__is_group_leader(pos)) {
+ if (symbol_conf.event_group && !evsel__is_group_leader(pos)) {
struct hists *leader_hists = evsel__hists(pos->leader);
hists__match(leader_hists, hists);
@@ -706,8 +706,7 @@ static void report__output_resort(struct report *rep)
ui_progress__init(&prog, rep->nr_entries, "Sorting events for output...");
evlist__for_each_entry(rep->session->evlist, pos) {
- perf_evsel__output_resort_cb(pos, &prog,
- hists__resort_cb, rep);
+ evsel__output_resort_cb(pos, &prog, hists__resort_cb, rep);
}
ui_progress__finish();
@@ -966,8 +965,19 @@ static int __cmd_report(struct report *rep)
report__output_resort(rep);
if (rep->total_cycles_mode) {
+ int block_hpps[6] = {
+ PERF_HPP_REPORT__BLOCK_TOTAL_CYCLES_PCT,
+ PERF_HPP_REPORT__BLOCK_LBR_CYCLES,
+ PERF_HPP_REPORT__BLOCK_CYCLES_PCT,
+ PERF_HPP_REPORT__BLOCK_AVG_CYCLES,
+ PERF_HPP_REPORT__BLOCK_RANGE,
+ PERF_HPP_REPORT__BLOCK_DSO,
+ };
+
rep->block_reports = block_info__create_report(session->evlist,
- rep->total_cycles);
+ rep->total_cycles,
+ block_hpps, 6,
+ &rep->nr_block_reports);
if (!rep->block_reports)
return -1;
}
@@ -1069,6 +1079,26 @@ parse_percent_limit(const struct option *opt, const char *str,
return 0;
}
+static int process_attr(struct perf_tool *tool __maybe_unused,
+ union perf_event *event,
+ struct evlist **pevlist)
+{
+ u64 sample_type;
+ int err;
+
+ err = perf_event__process_attr(tool, event, pevlist);
+ if (err)
+ return err;
+
+ /*
+ * Check if we need to enable callchains based
+ * on events sample_type.
+ */
+ sample_type = perf_evlist__combined_sample_type(*pevlist);
+ callchain_param_setup(sample_type);
+ return 0;
+}
+
int cmd_report(int argc, const char **argv)
{
struct perf_session *session;
@@ -1094,11 +1124,12 @@ int cmd_report(int argc, const char **argv)
.mmap2 = perf_event__process_mmap2,
.comm = perf_event__process_comm,
.namespaces = perf_event__process_namespaces,
+ .cgroup = perf_event__process_cgroup,
.exit = perf_event__process_exit,
.fork = perf_event__process_fork,
.lost = perf_event__process_lost,
.read = process_read_event,
- .attr = perf_event__process_attr,
+ .attr = process_attr,
.tracing_data = perf_event__process_tracing_data,
.build_id = perf_event__process_build_id,
.id_index = perf_event__process_id_index,
@@ -1216,6 +1247,10 @@ int cmd_report(int argc, const char **argv)
"Show a column with the sum of periods"),
OPT_BOOLEAN_SET(0, "group", &symbol_conf.event_group, &report.group_set,
"Show event group information together"),
+ OPT_INTEGER(0, "group-sort-idx", &symbol_conf.group_sort_idx,
+ "Sort the output by the event at the index n in group. "
+ "If n is invalid, sort by the first event. "
+ "WARNING: should be used on grouped events."),
OPT_CALLBACK_NOOPT('b', "branch-stack", &branch_mode, "",
"use branch records for per branch histogram filling",
parse_branch_mode),
@@ -1241,6 +1276,8 @@ int cmd_report(int argc, const char **argv)
"Show full source file name path for source lines"),
OPT_BOOLEAN(0, "show-ref-call-graph", &symbol_conf.show_ref_callgraph,
"Show callgraph from reference event"),
+ OPT_BOOLEAN(0, "stitch-lbr", &report.stitch_lbr,
+ "Enable LBR callgraph stitching approach"),
OPT_INTEGER(0, "socket-filter", &report.socket_filter,
"only show processor socket that match with this filter"),
OPT_BOOLEAN(0, "raw-trace", &symbol_conf.raw_trace,
@@ -1316,7 +1353,7 @@ int cmd_report(int argc, const char **argv)
if (symbol_conf.cumulate_callchain && !callchain_param.order_set)
callchain_param.order = ORDER_CALLER;
- if (itrace_synth_opts.callchain &&
+ if ((itrace_synth_opts.callchain || itrace_synth_opts.add_callchain) &&
(int)itrace_synth_opts.callchain_sz > report.max_stack)
report.max_stack = itrace_synth_opts.callchain_sz;
@@ -1358,7 +1395,13 @@ repeat:
setup_forced_leader(&report, session->evlist);
- if (itrace_synth_opts.last_branch)
+ if (symbol_conf.group_sort_idx && !session->evlist->nr_groups) {
+ parse_options_usage(NULL, options, "group-sort-idx", 0);
+ ret = -EINVAL;
+ goto error;
+ }
+
+ if (itrace_synth_opts.last_branch || itrace_synth_opts.add_last_branch)
has_br_stack = true;
if (has_br_stack && branch_call_mode)
@@ -1378,7 +1421,7 @@ repeat:
}
if (branch_call_mode) {
callchain_param.key = CCKEY_ADDRESS;
- callchain_param.branch_callstack = 1;
+ callchain_param.branch_callstack = true;
symbol_conf.use_callchain = true;
callchain_register_param(&callchain_param);
if (sort_order == NULL)
@@ -1459,7 +1502,7 @@ repeat:
sort_order = sort_tmp;
}
- if ((last_key != K_SWITCH_INPUT_DATA) &&
+ if ((last_key != K_SWITCH_INPUT_DATA && last_key != K_RELOAD) &&
(setup_sorting(session->evlist) < 0)) {
if (sort_order)
parse_options_usage(report_usage, options, "s", 1);
@@ -1538,7 +1581,7 @@ repeat:
sort__setup_elide(stdout);
ret = __cmd_report(&report);
- if (ret == K_SWITCH_INPUT_DATA) {
+ if (ret == K_SWITCH_INPUT_DATA || ret == K_RELOAD) {
perf_session__delete(session);
last_key = K_SWITCH_INPUT_DATA;
goto repeat;
@@ -1551,8 +1594,11 @@ error:
zfree(&report.ptime_range);
}
- if (report.block_reports)
- zfree(&report.block_reports);
+ if (report.block_reports) {
+ block_info__free_report(report.block_reports,
+ report.nr_block_reports);
+ report.block_reports = NULL;
+ }
zstd_fini(&(session->zstd_data));
perf_session__delete(session);
diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c
index 82fcc2c15fe4..459e4229945e 100644
--- a/tools/perf/builtin-sched.c
+++ b/tools/perf/builtin-sched.c
@@ -811,8 +811,8 @@ replay_wakeup_event(struct perf_sched *sched,
struct evsel *evsel, struct perf_sample *sample,
struct machine *machine __maybe_unused)
{
- const char *comm = perf_evsel__strval(evsel, sample, "comm");
- const u32 pid = perf_evsel__intval(evsel, sample, "pid");
+ const char *comm = evsel__strval(evsel, sample, "comm");
+ const u32 pid = evsel__intval(evsel, sample, "pid");
struct task_desc *waker, *wakee;
if (verbose > 0) {
@@ -833,11 +833,11 @@ static int replay_switch_event(struct perf_sched *sched,
struct perf_sample *sample,
struct machine *machine __maybe_unused)
{
- const char *prev_comm = perf_evsel__strval(evsel, sample, "prev_comm"),
- *next_comm = perf_evsel__strval(evsel, sample, "next_comm");
- const u32 prev_pid = perf_evsel__intval(evsel, sample, "prev_pid"),
- next_pid = perf_evsel__intval(evsel, sample, "next_pid");
- const u64 prev_state = perf_evsel__intval(evsel, sample, "prev_state");
+ const char *prev_comm = evsel__strval(evsel, sample, "prev_comm"),
+ *next_comm = evsel__strval(evsel, sample, "next_comm");
+ const u32 prev_pid = evsel__intval(evsel, sample, "prev_pid"),
+ next_pid = evsel__intval(evsel, sample, "next_pid");
+ const u64 prev_state = evsel__intval(evsel, sample, "prev_state");
struct task_desc *prev, __maybe_unused *next;
u64 timestamp0, timestamp = sample->time;
int cpu = sample->cpu;
@@ -1106,9 +1106,9 @@ static int latency_switch_event(struct perf_sched *sched,
struct perf_sample *sample,
struct machine *machine)
{
- const u32 prev_pid = perf_evsel__intval(evsel, sample, "prev_pid"),
- next_pid = perf_evsel__intval(evsel, sample, "next_pid");
- const u64 prev_state = perf_evsel__intval(evsel, sample, "prev_state");
+ const u32 prev_pid = evsel__intval(evsel, sample, "prev_pid"),
+ next_pid = evsel__intval(evsel, sample, "next_pid");
+ const u64 prev_state = evsel__intval(evsel, sample, "prev_state");
struct work_atoms *out_events, *in_events;
struct thread *sched_out, *sched_in;
u64 timestamp0, timestamp = sample->time;
@@ -1176,8 +1176,8 @@ static int latency_runtime_event(struct perf_sched *sched,
struct perf_sample *sample,
struct machine *machine)
{
- const u32 pid = perf_evsel__intval(evsel, sample, "pid");
- const u64 runtime = perf_evsel__intval(evsel, sample, "runtime");
+ const u32 pid = evsel__intval(evsel, sample, "pid");
+ const u64 runtime = evsel__intval(evsel, sample, "runtime");
struct thread *thread = machine__findnew_thread(machine, -1, pid);
struct work_atoms *atoms = thread_atoms_search(&sched->atom_root, thread, &sched->cmp_pid);
u64 timestamp = sample->time;
@@ -1211,7 +1211,7 @@ static int latency_wakeup_event(struct perf_sched *sched,
struct perf_sample *sample,
struct machine *machine)
{
- const u32 pid = perf_evsel__intval(evsel, sample, "pid");
+ const u32 pid = evsel__intval(evsel, sample, "pid");
struct work_atoms *atoms;
struct work_atom *atom;
struct thread *wakee;
@@ -1272,7 +1272,7 @@ static int latency_migrate_task_event(struct perf_sched *sched,
struct perf_sample *sample,
struct machine *machine)
{
- const u32 pid = perf_evsel__intval(evsel, sample, "pid");
+ const u32 pid = evsel__intval(evsel, sample, "pid");
u64 timestamp = sample->time;
struct work_atoms *atoms;
struct work_atom *atom;
@@ -1526,7 +1526,7 @@ map__findnew_thread(struct perf_sched *sched, struct machine *machine, pid_t pid
static int map_switch_event(struct perf_sched *sched, struct evsel *evsel,
struct perf_sample *sample, struct machine *machine)
{
- const u32 next_pid = perf_evsel__intval(evsel, sample, "next_pid");
+ const u32 next_pid = evsel__intval(evsel, sample, "next_pid");
struct thread *sched_in;
struct thread_runtime *tr;
int new_shortname;
@@ -1670,8 +1670,8 @@ static int process_sched_switch_event(struct perf_tool *tool,
{
struct perf_sched *sched = container_of(tool, struct perf_sched, tool);
int this_cpu = sample->cpu, err = 0;
- u32 prev_pid = perf_evsel__intval(evsel, sample, "prev_pid"),
- next_pid = perf_evsel__intval(evsel, sample, "next_pid");
+ u32 prev_pid = evsel__intval(evsel, sample, "prev_pid"),
+ next_pid = evsel__intval(evsel, sample, "next_pid");
if (sched->curr_pid[this_cpu] != (u32)-1) {
/*
@@ -1848,7 +1848,7 @@ static inline void print_sched_time(unsigned long long nsecs, int width)
* returns runtime data for event, allocating memory for it the
* first time it is used.
*/
-static struct evsel_runtime *perf_evsel__get_runtime(struct evsel *evsel)
+static struct evsel_runtime *evsel__get_runtime(struct evsel *evsel)
{
struct evsel_runtime *r = evsel->priv;
@@ -1863,10 +1863,9 @@ static struct evsel_runtime *perf_evsel__get_runtime(struct evsel *evsel)
/*
* save last time event was seen per cpu
*/
-static void perf_evsel__save_time(struct evsel *evsel,
- u64 timestamp, u32 cpu)
+static void evsel__save_time(struct evsel *evsel, u64 timestamp, u32 cpu)
{
- struct evsel_runtime *r = perf_evsel__get_runtime(evsel);
+ struct evsel_runtime *r = evsel__get_runtime(evsel);
if (r == NULL)
return;
@@ -1890,9 +1889,9 @@ static void perf_evsel__save_time(struct evsel *evsel,
}
/* returns last time this event was seen on the given cpu */
-static u64 perf_evsel__get_time(struct evsel *evsel, u32 cpu)
+static u64 evsel__get_time(struct evsel *evsel, u32 cpu)
{
- struct evsel_runtime *r = perf_evsel__get_runtime(evsel);
+ struct evsel_runtime *r = evsel__get_runtime(evsel);
if ((r == NULL) || (r->last_time == NULL) || (cpu >= r->ncpu))
return 0;
@@ -2004,8 +2003,8 @@ static void timehist_print_sample(struct perf_sched *sched,
u64 t, int state)
{
struct thread_runtime *tr = thread__priv(thread);
- const char *next_comm = perf_evsel__strval(evsel, sample, "next_comm");
- const u32 next_pid = perf_evsel__intval(evsel, sample, "next_pid");
+ const char *next_comm = evsel__strval(evsel, sample, "next_comm");
+ const u32 next_pid = evsel__intval(evsel, sample, "next_pid");
u32 max_cpus = sched->max_cpu + 1;
char tstr[64];
char nstr[30];
@@ -2136,8 +2135,8 @@ static bool is_idle_sample(struct perf_sample *sample,
struct evsel *evsel)
{
/* pid 0 == swapper == idle task */
- if (strcmp(perf_evsel__name(evsel), "sched:sched_switch") == 0)
- return perf_evsel__intval(evsel, sample, "prev_pid") == 0;
+ if (strcmp(evsel__name(evsel), "sched:sched_switch") == 0)
+ return evsel__intval(evsel, sample, "prev_pid") == 0;
return sample->pid == 0;
}
@@ -2334,7 +2333,7 @@ static struct thread *timehist_get_thread(struct perf_sched *sched,
itr->last_thread = thread;
/* copy task callchain when entering to idle */
- if (perf_evsel__intval(evsel, sample, "next_pid") == 0)
+ if (evsel__intval(evsel, sample, "next_pid") == 0)
save_idle_callchain(sched, itr, sample);
}
}
@@ -2355,10 +2354,10 @@ static bool timehist_skip_sample(struct perf_sched *sched,
}
if (sched->idle_hist) {
- if (strcmp(perf_evsel__name(evsel), "sched:sched_switch"))
+ if (strcmp(evsel__name(evsel), "sched:sched_switch"))
rc = true;
- else if (perf_evsel__intval(evsel, sample, "prev_pid") != 0 &&
- perf_evsel__intval(evsel, sample, "next_pid") != 0)
+ else if (evsel__intval(evsel, sample, "prev_pid") != 0 &&
+ evsel__intval(evsel, sample, "next_pid") != 0)
rc = true;
}
@@ -2409,7 +2408,7 @@ static int timehist_sched_wakeup_event(struct perf_tool *tool,
struct thread *thread;
struct thread_runtime *tr = NULL;
/* want pid of awakened task not pid in sample */
- const u32 pid = perf_evsel__intval(evsel, sample, "pid");
+ const u32 pid = evsel__intval(evsel, sample, "pid");
thread = machine__findnew_thread(machine, 0, pid);
if (thread == NULL)
@@ -2445,8 +2444,8 @@ static void timehist_print_migration_event(struct perf_sched *sched,
return;
max_cpus = sched->max_cpu + 1;
- ocpu = perf_evsel__intval(evsel, sample, "orig_cpu");
- dcpu = perf_evsel__intval(evsel, sample, "dest_cpu");
+ ocpu = evsel__intval(evsel, sample, "orig_cpu");
+ dcpu = evsel__intval(evsel, sample, "dest_cpu");
thread = machine__findnew_thread(machine, sample->pid, sample->tid);
if (thread == NULL)
@@ -2493,7 +2492,7 @@ static int timehist_migrate_task_event(struct perf_tool *tool,
struct thread *thread;
struct thread_runtime *tr = NULL;
/* want pid of migrated task not pid in sample */
- const u32 pid = perf_evsel__intval(evsel, sample, "pid");
+ const u32 pid = evsel__intval(evsel, sample, "pid");
thread = machine__findnew_thread(machine, 0, pid);
if (thread == NULL)
@@ -2524,8 +2523,7 @@ static int timehist_sched_change_event(struct perf_tool *tool,
struct thread_runtime *tr = NULL;
u64 tprev, t = sample->time;
int rc = 0;
- int state = perf_evsel__intval(evsel, sample, "prev_state");
-
+ int state = evsel__intval(evsel, sample, "prev_state");
if (machine__resolve(machine, &al, sample) < 0) {
pr_err("problem processing %d event. skipping it\n",
@@ -2549,7 +2547,7 @@ static int timehist_sched_change_event(struct perf_tool *tool,
goto out;
}
- tprev = perf_evsel__get_time(evsel, sample->cpu);
+ tprev = evsel__get_time(evsel, sample->cpu);
/*
* If start time given:
@@ -2632,7 +2630,7 @@ out:
tr->ready_to_run = 0;
}
- perf_evsel__save_time(evsel, sample->time, sample->cpu);
+ evsel__save_time(evsel, sample->time, sample->cpu);
return rc;
}
@@ -2942,7 +2940,7 @@ static int timehist_check_attr(struct perf_sched *sched,
struct evsel_runtime *er;
list_for_each_entry(evsel, &evlist->core.entries, core.node) {
- er = perf_evsel__get_runtime(evsel);
+ er = evsel__get_runtime(evsel);
if (er == NULL) {
pr_err("Failed to allocate memory for evsel runtime data\n");
return -1;
diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c
index e2406b291c1c..5da243676f12 100644
--- a/tools/perf/builtin-script.c
+++ b/tools/perf/builtin-script.c
@@ -63,7 +63,9 @@
static char const *script_name;
static char const *generate_script_lang;
static bool reltime;
+static bool deltatime;
static u64 initial_time;
+static u64 previous_time;
static bool debug_mode;
static u64 last_timestamp;
static u64 nr_unordered;
@@ -165,6 +167,7 @@ static struct {
u64 fields;
u64 invalid_fields;
u64 user_set_fields;
+ u64 user_unset_fields;
} output[OUTPUT_TYPE_MAX] = {
[PERF_TYPE_HARDWARE] = {
@@ -271,7 +274,7 @@ static struct evsel_script *perf_evsel_script__new(struct evsel *evsel,
struct evsel_script *es = zalloc(sizeof(*es));
if (es != NULL) {
- if (asprintf(&es->filename, "%s.%s.dump", data->file.path, perf_evsel__name(evsel)) < 0)
+ if (asprintf(&es->filename, "%s.%s.dump", data->file.path, evsel__name(evsel)) < 0)
goto out_free;
es->fp = fopen(es->filename, "w");
if (es->fp == NULL)
@@ -349,10 +352,8 @@ static const char *output_field2str(enum perf_output_field field)
#define PRINT_FIELD(x) (output[output_type(attr->type)].fields & PERF_OUTPUT_##x)
-static int perf_evsel__do_check_stype(struct evsel *evsel,
- u64 sample_type, const char *sample_msg,
- enum perf_output_field field,
- bool allow_user_set)
+static int evsel__do_check_stype(struct evsel *evsel, u64 sample_type, const char *sample_msg,
+ enum perf_output_field field, bool allow_user_set)
{
struct perf_event_attr *attr = &evsel->core.attr;
int type = output_type(attr->type);
@@ -364,7 +365,7 @@ static int perf_evsel__do_check_stype(struct evsel *evsel,
if (output[type].user_set_fields & field) {
if (allow_user_set)
return 0;
- evname = perf_evsel__name(evsel);
+ evname = evsel__name(evsel);
pr_err("Samples for '%s' event do not have %s attribute set. "
"Cannot print '%s' field.\n",
evname, sample_msg, output_field2str(field));
@@ -373,7 +374,7 @@ static int perf_evsel__do_check_stype(struct evsel *evsel,
/* user did not ask for it explicitly so remove from the default list */
output[type].fields &= ~field;
- evname = perf_evsel__name(evsel);
+ evname = evsel__name(evsel);
pr_debug("Samples for '%s' event do not have %s attribute set. "
"Skipping '%s' field.\n",
evname, sample_msg, output_field2str(field));
@@ -381,16 +382,13 @@ static int perf_evsel__do_check_stype(struct evsel *evsel,
return 0;
}
-static int perf_evsel__check_stype(struct evsel *evsel,
- u64 sample_type, const char *sample_msg,
- enum perf_output_field field)
+static int evsel__check_stype(struct evsel *evsel, u64 sample_type, const char *sample_msg,
+ enum perf_output_field field)
{
- return perf_evsel__do_check_stype(evsel, sample_type, sample_msg, field,
- false);
+ return evsel__do_check_stype(evsel, sample_type, sample_msg, field, false);
}
-static int perf_evsel__check_attr(struct evsel *evsel,
- struct perf_session *session)
+static int perf_evsel__check_attr(struct evsel *evsel, struct perf_session *session)
{
struct perf_event_attr *attr = &evsel->core.attr;
bool allow_user_set;
@@ -402,32 +400,28 @@ static int perf_evsel__check_attr(struct evsel *evsel,
HEADER_AUXTRACE);
if (PRINT_FIELD(TRACE) &&
- !perf_session__has_traces(session, "record -R"))
+ !perf_session__has_traces(session, "record -R"))
return -EINVAL;
if (PRINT_FIELD(IP)) {
- if (perf_evsel__check_stype(evsel, PERF_SAMPLE_IP, "IP",
- PERF_OUTPUT_IP))
+ if (evsel__check_stype(evsel, PERF_SAMPLE_IP, "IP", PERF_OUTPUT_IP))
return -EINVAL;
}
if (PRINT_FIELD(ADDR) &&
- perf_evsel__do_check_stype(evsel, PERF_SAMPLE_ADDR, "ADDR",
- PERF_OUTPUT_ADDR, allow_user_set))
+ evsel__do_check_stype(evsel, PERF_SAMPLE_ADDR, "ADDR", PERF_OUTPUT_ADDR, allow_user_set))
return -EINVAL;
if (PRINT_FIELD(DATA_SRC) &&
- perf_evsel__check_stype(evsel, PERF_SAMPLE_DATA_SRC, "DATA_SRC",
- PERF_OUTPUT_DATA_SRC))
+ evsel__check_stype(evsel, PERF_SAMPLE_DATA_SRC, "DATA_SRC", PERF_OUTPUT_DATA_SRC))
return -EINVAL;
if (PRINT_FIELD(WEIGHT) &&
- perf_evsel__check_stype(evsel, PERF_SAMPLE_WEIGHT, "WEIGHT",
- PERF_OUTPUT_WEIGHT))
+ evsel__check_stype(evsel, PERF_SAMPLE_WEIGHT, "WEIGHT", PERF_OUTPUT_WEIGHT))
return -EINVAL;
if (PRINT_FIELD(SYM) &&
- !(evsel->core.attr.sample_type & (PERF_SAMPLE_IP|PERF_SAMPLE_ADDR))) {
+ !(evsel->core.attr.sample_type & (PERF_SAMPLE_IP|PERF_SAMPLE_ADDR))) {
pr_err("Display of symbols requested but neither sample IP nor "
"sample address\navailable. Hence, no addresses to convert "
"to symbols.\n");
@@ -439,7 +433,7 @@ static int perf_evsel__check_attr(struct evsel *evsel,
return -EINVAL;
}
if (PRINT_FIELD(DSO) &&
- !(evsel->core.attr.sample_type & (PERF_SAMPLE_IP|PERF_SAMPLE_ADDR))) {
+ !(evsel->core.attr.sample_type & (PERF_SAMPLE_IP|PERF_SAMPLE_ADDR))) {
pr_err("Display of DSO requested but no address to convert.\n");
return -EINVAL;
}
@@ -456,33 +450,27 @@ static int perf_evsel__check_attr(struct evsel *evsel,
return -EINVAL;
}
if ((PRINT_FIELD(PID) || PRINT_FIELD(TID)) &&
- perf_evsel__check_stype(evsel, PERF_SAMPLE_TID, "TID",
- PERF_OUTPUT_TID|PERF_OUTPUT_PID))
+ evsel__check_stype(evsel, PERF_SAMPLE_TID, "TID", PERF_OUTPUT_TID|PERF_OUTPUT_PID))
return -EINVAL;
if (PRINT_FIELD(TIME) &&
- perf_evsel__check_stype(evsel, PERF_SAMPLE_TIME, "TIME",
- PERF_OUTPUT_TIME))
+ evsel__check_stype(evsel, PERF_SAMPLE_TIME, "TIME", PERF_OUTPUT_TIME))
return -EINVAL;
if (PRINT_FIELD(CPU) &&
- perf_evsel__do_check_stype(evsel, PERF_SAMPLE_CPU, "CPU",
- PERF_OUTPUT_CPU, allow_user_set))
+ evsel__do_check_stype(evsel, PERF_SAMPLE_CPU, "CPU", PERF_OUTPUT_CPU, allow_user_set))
return -EINVAL;
if (PRINT_FIELD(IREGS) &&
- perf_evsel__check_stype(evsel, PERF_SAMPLE_REGS_INTR, "IREGS",
- PERF_OUTPUT_IREGS))
+ evsel__check_stype(evsel, PERF_SAMPLE_REGS_INTR, "IREGS", PERF_OUTPUT_IREGS))
return -EINVAL;
if (PRINT_FIELD(UREGS) &&
- perf_evsel__check_stype(evsel, PERF_SAMPLE_REGS_USER, "UREGS",
- PERF_OUTPUT_UREGS))
+ evsel__check_stype(evsel, PERF_SAMPLE_REGS_USER, "UREGS", PERF_OUTPUT_UREGS))
return -EINVAL;
if (PRINT_FIELD(PHYS_ADDR) &&
- perf_evsel__check_stype(evsel, PERF_SAMPLE_PHYS_ADDR, "PHYS_ADDR",
- PERF_OUTPUT_PHYS_ADDR))
+ evsel__check_stype(evsel, PERF_SAMPLE_PHYS_ADDR, "PHYS_ADDR", PERF_OUTPUT_PHYS_ADDR))
return -EINVAL;
return 0;
@@ -602,8 +590,6 @@ static int perf_sample__fprintf_regs(struct regs_dump *regs, uint64_t mask,
printed += fprintf(fp, "%5s:0x%"PRIx64" ", perf_reg_name(r), val);
}
- fprintf(fp, "\n");
-
return printed;
}
@@ -704,6 +690,13 @@ static int perf_sample__fprintf_start(struct perf_sample *sample,
if (!initial_time)
initial_time = sample->time;
t = sample->time - initial_time;
+ } else if (deltatime) {
+ if (previous_time)
+ t = sample->time - previous_time;
+ else {
+ t = 0;
+ }
+ previous_time = sample->time;
}
nsecs = t;
secs = nsecs / NSEC_PER_SEC;
@@ -735,6 +728,7 @@ static int perf_sample__fprintf_brstack(struct perf_sample *sample,
struct perf_event_attr *attr, FILE *fp)
{
struct branch_stack *br = sample->branch_stack;
+ struct branch_entry *entries = perf_sample__branch_entries(sample);
struct addr_location alf, alt;
u64 i, from, to;
int printed = 0;
@@ -743,8 +737,8 @@ static int perf_sample__fprintf_brstack(struct perf_sample *sample,
return 0;
for (i = 0; i < br->nr; i++) {
- from = br->entries[i].from;
- to = br->entries[i].to;
+ from = entries[i].from;
+ to = entries[i].to;
if (PRINT_FIELD(DSO)) {
memset(&alf, 0, sizeof(alf));
@@ -768,10 +762,10 @@ static int perf_sample__fprintf_brstack(struct perf_sample *sample,
}
printed += fprintf(fp, "/%c/%c/%c/%d ",
- mispred_str( br->entries + i),
- br->entries[i].flags.in_tx? 'X' : '-',
- br->entries[i].flags.abort? 'A' : '-',
- br->entries[i].flags.cycles);
+ mispred_str(entries + i),
+ entries[i].flags.in_tx ? 'X' : '-',
+ entries[i].flags.abort ? 'A' : '-',
+ entries[i].flags.cycles);
}
return printed;
@@ -782,6 +776,7 @@ static int perf_sample__fprintf_brstacksym(struct perf_sample *sample,
struct perf_event_attr *attr, FILE *fp)
{
struct branch_stack *br = sample->branch_stack;
+ struct branch_entry *entries = perf_sample__branch_entries(sample);
struct addr_location alf, alt;
u64 i, from, to;
int printed = 0;
@@ -793,8 +788,8 @@ static int perf_sample__fprintf_brstacksym(struct perf_sample *sample,
memset(&alf, 0, sizeof(alf));
memset(&alt, 0, sizeof(alt));
- from = br->entries[i].from;
- to = br->entries[i].to;
+ from = entries[i].from;
+ to = entries[i].to;
thread__find_symbol_fb(thread, sample->cpumode, from, &alf);
thread__find_symbol_fb(thread, sample->cpumode, to, &alt);
@@ -813,10 +808,10 @@ static int perf_sample__fprintf_brstacksym(struct perf_sample *sample,
printed += fprintf(fp, ")");
}
printed += fprintf(fp, "/%c/%c/%c/%d ",
- mispred_str( br->entries + i),
- br->entries[i].flags.in_tx? 'X' : '-',
- br->entries[i].flags.abort? 'A' : '-',
- br->entries[i].flags.cycles);
+ mispred_str(entries + i),
+ entries[i].flags.in_tx ? 'X' : '-',
+ entries[i].flags.abort ? 'A' : '-',
+ entries[i].flags.cycles);
}
return printed;
@@ -827,6 +822,7 @@ static int perf_sample__fprintf_brstackoff(struct perf_sample *sample,
struct perf_event_attr *attr, FILE *fp)
{
struct branch_stack *br = sample->branch_stack;
+ struct branch_entry *entries = perf_sample__branch_entries(sample);
struct addr_location alf, alt;
u64 i, from, to;
int printed = 0;
@@ -838,8 +834,8 @@ static int perf_sample__fprintf_brstackoff(struct perf_sample *sample,
memset(&alf, 0, sizeof(alf));
memset(&alt, 0, sizeof(alt));
- from = br->entries[i].from;
- to = br->entries[i].to;
+ from = entries[i].from;
+ to = entries[i].to;
if (thread__find_map_fb(thread, sample->cpumode, from, &alf) &&
!alf.map->dso->adjust_symbols)
@@ -862,10 +858,10 @@ static int perf_sample__fprintf_brstackoff(struct perf_sample *sample,
printed += fprintf(fp, ")");
}
printed += fprintf(fp, "/%c/%c/%c/%d ",
- mispred_str(br->entries + i),
- br->entries[i].flags.in_tx ? 'X' : '-',
- br->entries[i].flags.abort ? 'A' : '-',
- br->entries[i].flags.cycles);
+ mispred_str(entries + i),
+ entries[i].flags.in_tx ? 'X' : '-',
+ entries[i].flags.abort ? 'A' : '-',
+ entries[i].flags.cycles);
}
return printed;
@@ -1053,6 +1049,7 @@ static int perf_sample__fprintf_brstackinsn(struct perf_sample *sample,
struct machine *machine, FILE *fp)
{
struct branch_stack *br = sample->branch_stack;
+ struct branch_entry *entries = perf_sample__branch_entries(sample);
u64 start, end;
int i, insn, len, nr, ilen, printed = 0;
struct perf_insn x;
@@ -1073,31 +1070,31 @@ static int perf_sample__fprintf_brstackinsn(struct perf_sample *sample,
printed += fprintf(fp, "%c", '\n');
/* Handle first from jump, of which we don't know the entry. */
- len = grab_bb(buffer, br->entries[nr-1].from,
- br->entries[nr-1].from,
+ len = grab_bb(buffer, entries[nr-1].from,
+ entries[nr-1].from,
machine, thread, &x.is64bit, &x.cpumode, false);
if (len > 0) {
- printed += ip__fprintf_sym(br->entries[nr - 1].from, thread,
+ printed += ip__fprintf_sym(entries[nr - 1].from, thread,
x.cpumode, x.cpu, &lastsym, attr, fp);
- printed += ip__fprintf_jump(br->entries[nr - 1].from, &br->entries[nr - 1],
+ printed += ip__fprintf_jump(entries[nr - 1].from, &entries[nr - 1],
&x, buffer, len, 0, fp, &total_cycles);
if (PRINT_FIELD(SRCCODE))
- printed += print_srccode(thread, x.cpumode, br->entries[nr - 1].from);
+ printed += print_srccode(thread, x.cpumode, entries[nr - 1].from);
}
/* Print all blocks */
for (i = nr - 2; i >= 0; i--) {
- if (br->entries[i].from || br->entries[i].to)
+ if (entries[i].from || entries[i].to)
pr_debug("%d: %" PRIx64 "-%" PRIx64 "\n", i,
- br->entries[i].from,
- br->entries[i].to);
- start = br->entries[i + 1].to;
- end = br->entries[i].from;
+ entries[i].from,
+ entries[i].to);
+ start = entries[i + 1].to;
+ end = entries[i].from;
len = grab_bb(buffer, start, end, machine, thread, &x.is64bit, &x.cpumode, false);
/* Patch up missing kernel transfers due to ring filters */
if (len == -ENXIO && i > 0) {
- end = br->entries[--i].from;
+ end = entries[--i].from;
pr_debug("\tpatching up to %" PRIx64 "-%" PRIx64 "\n", start, end);
len = grab_bb(buffer, start, end, machine, thread, &x.is64bit, &x.cpumode, false);
}
@@ -1110,7 +1107,7 @@ static int perf_sample__fprintf_brstackinsn(struct perf_sample *sample,
printed += ip__fprintf_sym(ip, thread, x.cpumode, x.cpu, &lastsym, attr, fp);
if (ip == end) {
- printed += ip__fprintf_jump(ip, &br->entries[i], &x, buffer + off, len - off, ++insn, fp,
+ printed += ip__fprintf_jump(ip, &entries[i], &x, buffer + off, len - off, ++insn, fp,
&total_cycles);
if (PRINT_FIELD(SRCCODE))
printed += print_srccode(thread, x.cpumode, ip);
@@ -1134,9 +1131,9 @@ static int perf_sample__fprintf_brstackinsn(struct perf_sample *sample,
* Hit the branch? In this case we are already done, and the target
* has not been executed yet.
*/
- if (br->entries[0].from == sample->ip)
+ if (entries[0].from == sample->ip)
goto out;
- if (br->entries[0].flags.abort)
+ if (entries[0].flags.abort)
goto out;
/*
@@ -1147,7 +1144,7 @@ static int perf_sample__fprintf_brstackinsn(struct perf_sample *sample,
* between final branch and sample. When this happens just
* continue walking after the last TO until we hit a branch.
*/
- start = br->entries[0].to;
+ start = entries[0].to;
end = sample->ip;
if (end < start) {
/* Missing jump. Scan 128 bytes for the next branch */
@@ -1681,8 +1678,10 @@ struct perf_script {
bool show_lost_events;
bool show_round_events;
bool show_bpf_events;
+ bool show_cgroup_events;
bool allocated;
bool per_event_dump;
+ bool stitch_lbr;
struct evswitch evswitch;
struct perf_cpu_map *cpus;
struct perf_thread_map *threads;
@@ -1699,7 +1698,7 @@ static int perf_evlist__max_name_len(struct evlist *evlist)
int max = 0;
evlist__for_each_entry(evlist, evsel) {
- int len = strlen(perf_evsel__name(evsel));
+ int len = strlen(evsel__name(evsel));
max = MAX(len, max);
}
@@ -1873,7 +1872,7 @@ static void process_event(struct perf_script *script,
fprintf(fp, "%10" PRIu64 " ", sample->period);
if (PRINT_FIELD(EVNAME)) {
- const char *evname = perf_evsel__name(evsel);
+ const char *evname = evsel__name(evsel);
if (!script->name_width)
script->name_width = perf_evlist__max_name_len(script->session->evlist);
@@ -1909,6 +1908,9 @@ static void process_event(struct perf_script *script,
if (PRINT_FIELD(IP)) {
struct callchain_cursor *cursor = NULL;
+ if (script->stitch_lbr)
+ al->thread->lbr_stitch_enable = true;
+
if (symbol_conf.use_callchain && sample->callchain &&
thread__resolve_callchain(al->thread, &callchain_cursor, evsel,
sample, NULL, NULL, scripting_max_stack) == 0)
@@ -1932,7 +1934,7 @@ static void process_event(struct perf_script *script,
else if (PRINT_FIELD(BRSTACKOFF))
perf_sample__fprintf_brstackoff(sample, thread, attr, fp);
- if (perf_evsel__is_bpf_output(evsel) && PRINT_FIELD(BPF_OUTPUT))
+ if (evsel__is_bpf_output(evsel) && PRINT_FIELD(BPF_OUTPUT))
perf_sample__fprintf_bpf_output(sample, fp);
perf_sample__fprintf_insn(sample, attr, thread, machine, fp);
@@ -1961,7 +1963,7 @@ static struct scripting_ops *scripting_ops;
static void __process_stat(struct evsel *counter, u64 tstamp)
{
int nthreads = perf_thread_map__nr(counter->core.threads);
- int ncpus = perf_evsel__nr_cpus(counter);
+ int ncpus = evsel__nr_cpus(counter);
int cpu, thread;
static int header_printed;
@@ -1987,7 +1989,7 @@ static void __process_stat(struct evsel *counter, u64 tstamp)
counts->ena,
counts->run,
tstamp,
- perf_evsel__name(counter));
+ evsel__name(counter));
}
}
}
@@ -2026,7 +2028,7 @@ static int cleanup_scripting(void)
static bool filter_cpu(struct perf_sample *sample)
{
- if (cpu_list)
+ if (cpu_list && sample->cpu != (u32)-1)
return !test_bit(sample->cpu, cpu_bitmap);
return false;
}
@@ -2084,6 +2086,7 @@ static int process_attr(struct perf_tool *tool, union perf_event *event,
struct perf_script *scr = container_of(tool, struct perf_script, tool);
struct evlist *evlist;
struct evsel *evsel, *pos;
+ u64 sample_type;
int err;
static struct evsel_script *es;
@@ -2116,49 +2119,89 @@ static int process_attr(struct perf_tool *tool, union perf_event *event,
return 0;
}
- set_print_ip_opts(&evsel->core.attr);
-
- if (evsel->core.attr.sample_type)
+ if (evsel->core.attr.sample_type) {
err = perf_evsel__check_attr(evsel, scr->session);
+ if (err)
+ return err;
+ }
- return err;
+ /*
+ * Check if we need to enable callchains based
+ * on events sample_type.
+ */
+ sample_type = perf_evlist__combined_sample_type(evlist);
+ callchain_param_setup(sample_type);
+
+ /* Enable fields for callchain entries */
+ if (symbol_conf.use_callchain &&
+ (sample_type & PERF_SAMPLE_CALLCHAIN ||
+ sample_type & PERF_SAMPLE_BRANCH_STACK ||
+ (sample_type & PERF_SAMPLE_REGS_USER &&
+ sample_type & PERF_SAMPLE_STACK_USER))) {
+ int type = output_type(evsel->core.attr.type);
+
+ if (!(output[type].user_unset_fields & PERF_OUTPUT_IP))
+ output[type].fields |= PERF_OUTPUT_IP;
+ if (!(output[type].user_unset_fields & PERF_OUTPUT_SYM))
+ output[type].fields |= PERF_OUTPUT_SYM;
+ }
+ set_print_ip_opts(&evsel->core.attr);
+ return 0;
}
-static int process_comm_event(struct perf_tool *tool,
- union perf_event *event,
- struct perf_sample *sample,
- struct machine *machine)
+static int print_event_with_time(struct perf_tool *tool,
+ union perf_event *event,
+ struct perf_sample *sample,
+ struct machine *machine,
+ pid_t pid, pid_t tid, u64 timestamp)
{
- struct thread *thread;
struct perf_script *script = container_of(tool, struct perf_script, tool);
struct perf_session *session = script->session;
struct evsel *evsel = perf_evlist__id2evsel(session->evlist, sample->id);
- int ret = -1;
+ struct thread *thread = NULL;
- thread = machine__findnew_thread(machine, event->comm.pid, event->comm.tid);
- if (thread == NULL) {
- pr_debug("problem processing COMM event, skipping it.\n");
- return -1;
+ if (evsel && !evsel->core.attr.sample_id_all) {
+ sample->cpu = 0;
+ sample->time = timestamp;
+ sample->pid = pid;
+ sample->tid = tid;
}
- if (perf_event__process_comm(tool, event, sample, machine) < 0)
- goto out;
+ if (filter_cpu(sample))
+ return 0;
- if (!evsel->core.attr.sample_id_all) {
- sample->cpu = 0;
- sample->time = 0;
- sample->tid = event->comm.tid;
- sample->pid = event->comm.pid;
- }
- if (!filter_cpu(sample)) {
+ if (tid != -1)
+ thread = machine__findnew_thread(machine, pid, tid);
+
+ if (thread && evsel) {
perf_sample__fprintf_start(sample, thread, evsel,
- PERF_RECORD_COMM, stdout);
- perf_event__fprintf(event, stdout);
+ event->header.type, stdout);
}
- ret = 0;
-out:
+
+ perf_event__fprintf(event, stdout);
+
thread__put(thread);
- return ret;
+
+ return 0;
+}
+
+static int print_event(struct perf_tool *tool, union perf_event *event,
+ struct perf_sample *sample, struct machine *machine,
+ pid_t pid, pid_t tid)
+{
+ return print_event_with_time(tool, event, sample, machine, pid, tid, 0);
+}
+
+static int process_comm_event(struct perf_tool *tool,
+ union perf_event *event,
+ struct perf_sample *sample,
+ struct machine *machine)
+{
+ if (perf_event__process_comm(tool, event, sample, machine) < 0)
+ return -1;
+
+ return print_event(tool, event, sample, machine, event->comm.pid,
+ event->comm.tid);
}
static int process_namespaces_event(struct perf_tool *tool,
@@ -2166,37 +2209,23 @@ static int process_namespaces_event(struct perf_tool *tool,
struct perf_sample *sample,
struct machine *machine)
{
- struct thread *thread;
- struct perf_script *script = container_of(tool, struct perf_script, tool);
- struct perf_session *session = script->session;
- struct evsel *evsel = perf_evlist__id2evsel(session->evlist, sample->id);
- int ret = -1;
-
- thread = machine__findnew_thread(machine, event->namespaces.pid,
- event->namespaces.tid);
- if (thread == NULL) {
- pr_debug("problem processing NAMESPACES event, skipping it.\n");
+ if (perf_event__process_namespaces(tool, event, sample, machine) < 0)
return -1;
- }
- if (perf_event__process_namespaces(tool, event, sample, machine) < 0)
- goto out;
+ return print_event(tool, event, sample, machine, event->namespaces.pid,
+ event->namespaces.tid);
+}
- if (!evsel->core.attr.sample_id_all) {
- sample->cpu = 0;
- sample->time = 0;
- sample->tid = event->namespaces.tid;
- sample->pid = event->namespaces.pid;
- }
- if (!filter_cpu(sample)) {
- perf_sample__fprintf_start(sample, thread, evsel,
- PERF_RECORD_NAMESPACES, stdout);
- perf_event__fprintf(event, stdout);
- }
- ret = 0;
-out:
- thread__put(thread);
- return ret;
+static int process_cgroup_event(struct perf_tool *tool,
+ union perf_event *event,
+ struct perf_sample *sample,
+ struct machine *machine)
+{
+ if (perf_event__process_cgroup(tool, event, sample, machine) < 0)
+ return -1;
+
+ return print_event(tool, event, sample, machine, sample->pid,
+ sample->tid);
}
static int process_fork_event(struct perf_tool *tool,
@@ -2204,69 +2233,24 @@ static int process_fork_event(struct perf_tool *tool,
struct perf_sample *sample,
struct machine *machine)
{
- struct thread *thread;
- struct perf_script *script = container_of(tool, struct perf_script, tool);
- struct perf_session *session = script->session;
- struct evsel *evsel = perf_evlist__id2evsel(session->evlist, sample->id);
-
if (perf_event__process_fork(tool, event, sample, machine) < 0)
return -1;
- thread = machine__findnew_thread(machine, event->fork.pid, event->fork.tid);
- if (thread == NULL) {
- pr_debug("problem processing FORK event, skipping it.\n");
- return -1;
- }
-
- if (!evsel->core.attr.sample_id_all) {
- sample->cpu = 0;
- sample->time = event->fork.time;
- sample->tid = event->fork.tid;
- sample->pid = event->fork.pid;
- }
- if (!filter_cpu(sample)) {
- perf_sample__fprintf_start(sample, thread, evsel,
- PERF_RECORD_FORK, stdout);
- perf_event__fprintf(event, stdout);
- }
- thread__put(thread);
-
- return 0;
+ return print_event_with_time(tool, event, sample, machine,
+ event->fork.pid, event->fork.tid,
+ event->fork.time);
}
static int process_exit_event(struct perf_tool *tool,
union perf_event *event,
struct perf_sample *sample,
struct machine *machine)
{
- int err = 0;
- struct thread *thread;
- struct perf_script *script = container_of(tool, struct perf_script, tool);
- struct perf_session *session = script->session;
- struct evsel *evsel = perf_evlist__id2evsel(session->evlist, sample->id);
-
- thread = machine__findnew_thread(machine, event->fork.pid, event->fork.tid);
- if (thread == NULL) {
- pr_debug("problem processing EXIT event, skipping it.\n");
+ /* Print before 'exit' deletes anything */
+ if (print_event_with_time(tool, event, sample, machine, event->fork.pid,
+ event->fork.tid, event->fork.time))
return -1;
- }
-
- if (!evsel->core.attr.sample_id_all) {
- sample->cpu = 0;
- sample->time = 0;
- sample->tid = event->fork.tid;
- sample->pid = event->fork.pid;
- }
- if (!filter_cpu(sample)) {
- perf_sample__fprintf_start(sample, thread, evsel,
- PERF_RECORD_EXIT, stdout);
- perf_event__fprintf(event, stdout);
- }
-
- if (perf_event__process_exit(tool, event, sample, machine) < 0)
- err = -1;
- thread__put(thread);
- return err;
+ return perf_event__process_exit(tool, event, sample, machine);
}
static int process_mmap_event(struct perf_tool *tool,
@@ -2274,33 +2258,11 @@ static int process_mmap_event(struct perf_tool *tool,
struct perf_sample *sample,
struct machine *machine)
{
- struct thread *thread;
- struct perf_script *script = container_of(tool, struct perf_script, tool);
- struct perf_session *session = script->session;
- struct evsel *evsel = perf_evlist__id2evsel(session->evlist, sample->id);
-
if (perf_event__process_mmap(tool, event, sample, machine) < 0)
return -1;
- thread = machine__findnew_thread(machine, event->mmap.pid, event->mmap.tid);
- if (thread == NULL) {
- pr_debug("problem processing MMAP event, skipping it.\n");
- return -1;
- }
-
- if (!evsel->core.attr.sample_id_all) {
- sample->cpu = 0;
- sample->time = 0;
- sample->tid = event->mmap.tid;
- sample->pid = event->mmap.pid;
- }
- if (!filter_cpu(sample)) {
- perf_sample__fprintf_start(sample, thread, evsel,
- PERF_RECORD_MMAP, stdout);
- perf_event__fprintf(event, stdout);
- }
- thread__put(thread);
- return 0;
+ return print_event(tool, event, sample, machine, event->mmap.pid,
+ event->mmap.tid);
}
static int process_mmap2_event(struct perf_tool *tool,
@@ -2308,33 +2270,11 @@ static int process_mmap2_event(struct perf_tool *tool,
struct perf_sample *sample,
struct machine *machine)
{
- struct thread *thread;
- struct perf_script *script = container_of(tool, struct perf_script, tool);
- struct perf_session *session = script->session;
- struct evsel *evsel = perf_evlist__id2evsel(session->evlist, sample->id);
-
if (perf_event__process_mmap2(tool, event, sample, machine) < 0)
return -1;
- thread = machine__findnew_thread(machine, event->mmap2.pid, event->mmap2.tid);
- if (thread == NULL) {
- pr_debug("problem processing MMAP2 event, skipping it.\n");
- return -1;
- }
-
- if (!evsel->core.attr.sample_id_all) {
- sample->cpu = 0;
- sample->time = 0;
- sample->tid = event->mmap2.tid;
- sample->pid = event->mmap2.pid;
- }
- if (!filter_cpu(sample)) {
- perf_sample__fprintf_start(sample, thread, evsel,
- PERF_RECORD_MMAP2, stdout);
- perf_event__fprintf(event, stdout);
- }
- thread__put(thread);
- return 0;
+ return print_event(tool, event, sample, machine, event->mmap2.pid,
+ event->mmap2.tid);
}
static int process_switch_event(struct perf_tool *tool,
@@ -2342,10 +2282,7 @@ static int process_switch_event(struct perf_tool *tool,
struct perf_sample *sample,
struct machine *machine)
{
- struct thread *thread;
struct perf_script *script = container_of(tool, struct perf_script, tool);
- struct perf_session *session = script->session;
- struct evsel *evsel = perf_evlist__id2evsel(session->evlist, sample->id);
if (perf_event__process_switch(tool, event, sample, machine) < 0)
return -1;
@@ -2356,20 +2293,8 @@ static int process_switch_event(struct perf_tool *tool,
if (!script->show_switch_events)
return 0;
- thread = machine__findnew_thread(machine, sample->pid,
- sample->tid);
- if (thread == NULL) {
- pr_debug("problem processing SWITCH event, skipping it.\n");
- return -1;
- }
-
- if (!filter_cpu(sample)) {
- perf_sample__fprintf_start(sample, thread, evsel,
- PERF_RECORD_SWITCH, stdout);
- perf_event__fprintf(event, stdout);
- }
- thread__put(thread);
- return 0;
+ return print_event(tool, event, sample, machine, sample->pid,
+ sample->tid);
}
static int
@@ -2378,23 +2303,8 @@ process_lost_event(struct perf_tool *tool,
struct perf_sample *sample,
struct machine *machine)
{
- struct perf_script *script = container_of(tool, struct perf_script, tool);
- struct perf_session *session = script->session;
- struct evsel *evsel = perf_evlist__id2evsel(session->evlist, sample->id);
- struct thread *thread;
-
- thread = machine__findnew_thread(machine, sample->pid,
- sample->tid);
- if (thread == NULL)
- return -1;
-
- if (!filter_cpu(sample)) {
- perf_sample__fprintf_start(sample, thread, evsel,
- PERF_RECORD_LOST, stdout);
- perf_event__fprintf(event, stdout);
- }
- thread__put(thread);
- return 0;
+ return print_event(tool, event, sample, machine, sample->pid,
+ sample->tid);
}
static int
@@ -2413,33 +2323,11 @@ process_bpf_events(struct perf_tool *tool __maybe_unused,
struct perf_sample *sample,
struct machine *machine)
{
- struct thread *thread;
- struct perf_script *script = container_of(tool, struct perf_script, tool);
- struct perf_session *session = script->session;
- struct evsel *evsel = perf_evlist__id2evsel(session->evlist, sample->id);
-
if (machine__process_ksymbol(machine, event, sample) < 0)
return -1;
- if (!evsel->core.attr.sample_id_all) {
- perf_event__fprintf(event, stdout);
- return 0;
- }
-
- thread = machine__findnew_thread(machine, sample->pid, sample->tid);
- if (thread == NULL) {
- pr_debug("problem processing MMAP event, skipping it.\n");
- return -1;
- }
-
- if (!filter_cpu(sample)) {
- perf_sample__fprintf_start(sample, thread, evsel,
- event->header.type, stdout);
- perf_event__fprintf(event, stdout);
- }
-
- thread__put(thread);
- return 0;
+ return print_event(tool, event, sample, machine, sample->pid,
+ sample->tid);
}
static void sig_handler(int sig __maybe_unused)
@@ -2538,6 +2426,8 @@ static int __cmd_script(struct perf_script *script)
script->tool.context_switch = process_switch_event;
if (script->show_namespace_events)
script->tool.namespaces = process_namespaces_event;
+ if (script->show_cgroup_events)
+ script->tool.cgroup = process_cgroup_event;
if (script->show_lost_events)
script->tool.lost = process_lost_event;
if (script->show_round_events) {
@@ -2568,7 +2458,7 @@ static int __cmd_script(struct perf_script *script)
struct script_spec {
struct list_head node;
struct scripting_ops *ops;
- char spec[0];
+ char spec[];
};
static LIST_HEAD(script_specs);
@@ -2806,9 +2696,11 @@ parse:
if (change == REMOVE) {
output[j].fields &= ~all_output_options[i].field;
output[j].user_set_fields &= ~all_output_options[i].field;
+ output[j].user_unset_fields |= all_output_options[i].field;
} else {
output[j].fields |= all_output_options[i].field;
output[j].user_set_fields |= all_output_options[i].field;
+ output[j].user_unset_fields &= ~all_output_options[i].field;
}
output[j].user_set = true;
output[j].wildcard_set = true;
@@ -3094,7 +2986,7 @@ static int check_ev_match(char *dir_name, char *scriptname,
match = 0;
evlist__for_each_entry(session->evlist, pos) {
- if (!strcmp(perf_evsel__name(pos), evname)) {
+ if (!strcmp(evsel__name(pos), evname)) {
match = 1;
break;
}
@@ -3214,10 +3106,10 @@ static char *get_script_path(const char *script_root, const char *suffix)
__script_root = get_script_root(script_dirent, suffix);
if (__script_root && !strcmp(script_root, __script_root)) {
free(__script_root);
- closedir(lang_dir);
closedir(scripts_dir);
scnprintf(script_path, MAXPATHLEN, "%s/%s",
lang_path, script_dirent->d_name);
+ closedir(lang_dir);
return strdup(script_path);
}
free(__script_root);
@@ -3291,6 +3183,12 @@ static void script__setup_sample_type(struct perf_script *script)
else
callchain_param.record_mode = CALLCHAIN_FP;
}
+
+ if (script->stitch_lbr && (callchain_param.record_mode != CALLCHAIN_LBR)) {
+ pr_warning("Can't find LBR callchain. Switch off --stitch-lbr.\n"
+ "Please apply --call-graph lbr when recording.\n");
+ script->stitch_lbr = false;
+ }
}
static int process_stat_round_event(struct perf_session *session,
@@ -3414,7 +3312,10 @@ static int parse_xed(const struct option *opt __maybe_unused,
const char *str __maybe_unused,
int unset __maybe_unused)
{
- force_pager("xed -F insn: -A -64 | less");
+ if (isatty(1))
+ force_pager("xed -F insn: -A -64 | less");
+ else
+ force_pager("xed -F insn: -A -64");
return 0;
}
@@ -3463,6 +3364,7 @@ int cmd_script(int argc, const char **argv)
.mmap2 = perf_event__process_mmap2,
.comm = perf_event__process_comm,
.namespaces = perf_event__process_namespaces,
+ .cgroup = perf_event__process_cgroup,
.exit = perf_event__process_exit,
.fork = perf_event__process_fork,
.attr = process_attr,
@@ -3551,6 +3453,7 @@ int cmd_script(int argc, const char **argv)
"anything beyond the specified depth will be ignored. "
"Default: kernel.perf_event_max_stack or " __stringify(PERF_MAX_STACK_DEPTH)),
OPT_BOOLEAN(0, "reltime", &reltime, "Show time stamps relative to start"),
+ OPT_BOOLEAN(0, "deltatime", &deltatime, "Show time stamps relative to previous event"),
OPT_BOOLEAN('I', "show-info", &show_full_info,
"display extended information from perf.data file"),
OPT_BOOLEAN('\0', "show-kernel-path", &symbol_conf.show_kernel_path,
@@ -3563,6 +3466,8 @@ int cmd_script(int argc, const char **argv)
"Show context switch events (if recorded)"),
OPT_BOOLEAN('\0', "show-namespace-events", &script.show_namespace_events,
"Show namespace events (if recorded)"),
+ OPT_BOOLEAN('\0', "show-cgroup-events", &script.show_cgroup_events,
+ "Show cgroup events (if recorded)"),
OPT_BOOLEAN('\0', "show-lost-events", &script.show_lost_events,
"Show lost events (if recorded)"),
OPT_BOOLEAN('\0', "show-round-events", &script.show_round_events,
@@ -3598,6 +3503,8 @@ int cmd_script(int argc, const char **argv)
"file", "file saving guest os /proc/kallsyms"),
OPT_STRING(0, "guestmodules", &symbol_conf.default_guest_modules,
"file", "file saving guest os /proc/modules"),
+ OPT_BOOLEAN('\0', "stitch-lbr", &script.stitch_lbr,
+ "Enable LBR callgraph stitching approach"),
OPTS_EVSWITCH(&script.evswitch),
OPT_END()
};
@@ -3647,7 +3554,14 @@ int cmd_script(int argc, const char **argv)
}
}
- if (itrace_synth_opts.callchain &&
+ if (reltime && deltatime) {
+ fprintf(stderr,
+ "reltime and deltatime - the two don't get along well. "
+ "Please limit to --reltime or --deltatime.\n");
+ return -1;
+ }
+
+ if ((itrace_synth_opts.callchain || itrace_synth_opts.add_callchain) &&
itrace_synth_opts.callchain_sz > scripting_max_stack)
scripting_max_stack = itrace_synth_opts.callchain_sz;
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index a098c2ebf4ea..9be020e0098a 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -66,6 +66,7 @@
#include "util/time-utils.h"
#include "util/top.h"
#include "util/affinity.h"
+#include "util/pfm.h"
#include "asm/bug.h"
#include <linux/time64.h>
@@ -189,6 +190,59 @@ static struct perf_stat_config stat_config = {
.big_num = true,
};
+static bool cpus_map_matched(struct evsel *a, struct evsel *b)
+{
+ if (!a->core.cpus && !b->core.cpus)
+ return true;
+
+ if (!a->core.cpus || !b->core.cpus)
+ return false;
+
+ if (a->core.cpus->nr != b->core.cpus->nr)
+ return false;
+
+ for (int i = 0; i < a->core.cpus->nr; i++) {
+ if (a->core.cpus->map[i] != b->core.cpus->map[i])
+ return false;
+ }
+
+ return true;
+}
+
+static void evlist__check_cpu_maps(struct evlist *evlist)
+{
+ struct evsel *evsel, *pos, *leader;
+ char buf[1024];
+
+ evlist__for_each_entry(evlist, evsel) {
+ leader = evsel->leader;
+
+ /* Check that leader matches cpus with each member. */
+ if (leader == evsel)
+ continue;
+ if (cpus_map_matched(leader, evsel))
+ continue;
+
+ /* If there's mismatch disable the group and warn user. */
+ WARN_ONCE(1, "WARNING: grouped events cpus do not match, disabling group:\n");
+ evsel__group_desc(leader, buf, sizeof(buf));
+ pr_warning(" %s\n", buf);
+
+ if (verbose) {
+ cpu_map__snprint(leader->core.cpus, buf, sizeof(buf));
+ pr_warning(" %s: %s\n", leader->name, buf);
+ cpu_map__snprint(evsel->core.cpus, buf, sizeof(buf));
+ pr_warning(" %s: %s\n", evsel->name, buf);
+ }
+
+ for_each_group_evsel(pos, leader) {
+ pos->leader = pos;
+ pos->core.nr_members = 0;
+ }
+ evsel->leader->core.nr_members = 0;
+ }
+}
+
static inline void diff_timespec(struct timespec *r, struct timespec *a,
struct timespec *b)
{
@@ -238,9 +292,8 @@ static int write_stat_round_event(u64 tm, u64 type)
#define SID(e, x, y) xyarray__entry(e->core.sample_id, x, y)
-static int
-perf_evsel__write_stat_event(struct evsel *counter, u32 cpu, u32 thread,
- struct perf_counts_values *count)
+static int evsel__write_stat_event(struct evsel *counter, u32 cpu, u32 thread,
+ struct perf_counts_values *count)
{
struct perf_sample_id *sid = SID(counter, cpu, thread);
@@ -259,7 +312,7 @@ static int read_single_counter(struct evsel *counter, int cpu,
count->val = val;
return 0;
}
- return perf_evsel__read_counter(counter, cpu, thread);
+ return evsel__read_counter(counter, cpu, thread);
}
/*
@@ -284,7 +337,7 @@ static int read_counter_cpu(struct evsel *counter, struct timespec *rs, int cpu)
/*
* The leader's group read loads data into its group members
- * (via perf_evsel__read_counter()) and sets their count->loaded.
+ * (via evsel__read_counter()) and sets their count->loaded.
*/
if (!perf_counts__is_loaded(counter->counts, cpu, thread) &&
read_single_counter(counter, cpu, thread, rs)) {
@@ -297,7 +350,7 @@ static int read_counter_cpu(struct evsel *counter, struct timespec *rs, int cpu)
perf_counts__set_loaded(counter->counts, cpu, thread, false);
if (STAT_RECORD) {
- if (perf_evsel__write_stat_event(counter, cpu, thread, count)) {
+ if (evsel__write_stat_event(counter, cpu, thread, count)) {
pr_err("failed to write stat event\n");
return -1;
}
@@ -306,7 +359,7 @@ static int read_counter_cpu(struct evsel *counter, struct timespec *rs, int cpu)
if (verbose > 1) {
fprintf(stat_config.output,
"%s: %d: %" PRIu64 " %" PRIu64 " %" PRIu64 "\n",
- perf_evsel__name(counter),
+ evsel__name(counter),
cpu,
count->val, count->ena, count->run);
}
@@ -315,14 +368,14 @@ static int read_counter_cpu(struct evsel *counter, struct timespec *rs, int cpu)
return 0;
}
-static void read_counters(struct timespec *rs)
+static int read_affinity_counters(struct timespec *rs)
{
struct evsel *counter;
struct affinity affinity;
int i, ncpus, cpu;
if (affinity__setup(&affinity) < 0)
- return;
+ return -1;
ncpus = perf_cpu_map__nr(evsel_list->core.all_cpus);
if (!target__has_cpu(&target) || target__has_per_thread(&target))
@@ -342,6 +395,15 @@ static void read_counters(struct timespec *rs)
}
}
affinity__cleanup(&affinity);
+ return 0;
+}
+
+static void read_counters(struct timespec *rs)
+{
+ struct evsel *counter;
+
+ if (!stat_config.summary && (read_affinity_counters(rs) < 0))
+ return;
evlist__for_each_entry(evsel_list, counter) {
if (counter->err)
@@ -352,6 +414,46 @@ static void read_counters(struct timespec *rs)
}
}
+static int runtime_stat_new(struct perf_stat_config *config, int nthreads)
+{
+ int i;
+
+ config->stats = calloc(nthreads, sizeof(struct runtime_stat));
+ if (!config->stats)
+ return -1;
+
+ config->stats_num = nthreads;
+
+ for (i = 0; i < nthreads; i++)
+ runtime_stat__init(&config->stats[i]);
+
+ return 0;
+}
+
+static void runtime_stat_delete(struct perf_stat_config *config)
+{
+ int i;
+
+ if (!config->stats)
+ return;
+
+ for (i = 0; i < config->stats_num; i++)
+ runtime_stat__exit(&config->stats[i]);
+
+ zfree(&config->stats);
+}
+
+static void runtime_stat_reset(struct perf_stat_config *config)
+{
+ int i;
+
+ if (!config->stats)
+ return;
+
+ for (i = 0; i < config->stats_num; i++)
+ perf_stat__reset_shadow_per_stat(&config->stats[i]);
+}
+
static void process_interval(void)
{
struct timespec ts, rs;
@@ -359,6 +461,8 @@ static void process_interval(void)
clock_gettime(CLOCK_MONOTONIC, &ts);
diff_timespec(&rs, &ts, &ref_time);
+ perf_stat__reset_shadow_per_stat(&rt_stat);
+ runtime_stat_reset(&stat_config);
read_counters(&rs);
if (STAT_RECORD) {
@@ -367,7 +471,7 @@ static void process_interval(void)
}
init_stats(&walltime_nsecs_stats);
- update_stats(&walltime_nsecs_stats, stat_config.interval * 1000000);
+ update_stats(&walltime_nsecs_stats, stat_config.interval * 1000000ULL);
print_counters(&rs, 0, NULL);
}
@@ -409,7 +513,7 @@ static void workload_exec_failed_signal(int signo __maybe_unused, siginfo_t *inf
workload_exec_errno = info->si_value.sival_int;
}
-static bool perf_evsel__should_store_id(struct evsel *counter)
+static bool evsel__should_store_id(struct evsel *counter)
{
return STAT_RECORD || counter->core.attr.read_format & PERF_FORMAT_ID;
}
@@ -454,7 +558,7 @@ static enum counter_recovery stat_handle_error(struct evsel *counter)
errno == ENXIO) {
if (verbose > 0)
ui__warning("%s event is not supported by the kernel.\n",
- perf_evsel__name(counter));
+ evsel__name(counter));
counter->supported = false;
/*
* errored is a sticky flag that means one of the counter's
@@ -465,7 +569,7 @@ static enum counter_recovery stat_handle_error(struct evsel *counter)
if ((counter->leader != counter) ||
!(counter->leader->core.nr_members > 1))
return COUNTER_SKIP;
- } else if (perf_evsel__fallback(counter, errno, msg, sizeof(msg))) {
+ } else if (evsel__fallback(counter, errno, msg, sizeof(msg))) {
if (verbose > 0)
ui__warning("%s\n", msg);
return COUNTER_RETRY;
@@ -483,8 +587,7 @@ static enum counter_recovery stat_handle_error(struct evsel *counter)
}
}
- perf_evsel__open_strerror(counter, &target,
- errno, msg, sizeof(msg));
+ evsel__open_strerror(counter, &target, errno, msg, sizeof(msg));
ui__error("%s\n", msg);
if (child_pid != -1)
@@ -604,7 +707,7 @@ try_again:
if (!counter->reset_group)
continue;
try_again_reset:
- pr_debug2("reopening weak %s\n", perf_evsel__name(counter));
+ pr_debug2("reopening weak %s\n", evsel__name(counter));
if (create_perf_stat_counter(counter, &stat_config, &target,
counter->cpu_iter - 1) < 0) {
@@ -635,14 +738,14 @@ try_again_reset:
if (l > stat_config.unit_width)
stat_config.unit_width = l;
- if (perf_evsel__should_store_id(counter) &&
- perf_evsel__store_ids(counter, evsel_list))
+ if (evsel__should_store_id(counter) &&
+ evsel__store_ids(counter, evsel_list))
return -1;
}
if (perf_evlist__apply_filters(evsel_list, &counter)) {
pr_err("failed to set filter \"%s\" on event %s with %d (%s)\n",
- counter->filter, perf_evsel__name(counter), errno,
+ counter->filter, evsel__name(counter), errno,
str_error_r(errno, msg, sizeof(msg)));
return -1;
}
@@ -686,8 +789,11 @@ try_again_reset:
break;
}
}
- if (child_pid != -1)
+ if (child_pid != -1) {
+ if (timeout)
+ kill(child_pid, SIGTERM);
wait4(child_pid, &status, 0, &stat_config.ru_data);
+ }
if (workload_exec_errno) {
const char *emsg = str_error_r(workload_exec_errno, msg, sizeof(msg));
@@ -720,7 +826,21 @@ try_again_reset:
if (stat_config.walltime_run_table)
stat_config.walltime_run[run_idx] = t1 - t0;
- update_stats(&walltime_nsecs_stats, t1 - t0);
+ if (interval) {
+ stat_config.interval = 0;
+ stat_config.summary = true;
+ init_stats(&walltime_nsecs_stats);
+ update_stats(&walltime_nsecs_stats, t1 - t0);
+
+ if (stat_config.aggr_mode == AGGR_GLOBAL)
+ perf_evlist__save_aggr_prev_raw_counts(evsel_list);
+
+ perf_evlist__copy_prev_raw_counts(evsel_list);
+ perf_evlist__reset_prev_raw_counts(evsel_list);
+ runtime_stat_reset(&stat_config);
+ perf_stat__reset_shadow_per_stat(&rt_stat);
+ } else
+ update_stats(&walltime_nsecs_stats, t1 - t0);
/*
* Closing a group leader splits the group, and as we only disable
@@ -819,10 +939,16 @@ static void sig_atexit(void)
kill(getpid(), signr);
}
+void perf_stat__set_big_num(int set)
+{
+ stat_config.big_num = (set != 0);
+}
+
static int stat__set_big_num(const struct option *opt __maybe_unused,
const char *s __maybe_unused, int unset)
{
big_num_opt = unset ? 0 : 1;
+ perf_stat__set_big_num(!unset);
return 0;
}
@@ -838,7 +964,10 @@ static int parse_metric_groups(const struct option *opt,
const char *str,
int unset __maybe_unused)
{
- return metricgroup__parse_groups(opt, str, &stat_config.metric_events);
+ return metricgroup__parse_groups(opt, str,
+ stat_config.metric_no_group,
+ stat_config.metric_no_merge,
+ &stat_config.metric_events);
}
static struct option stat_options[] = {
@@ -916,6 +1045,10 @@ static struct option stat_options[] = {
"ms to wait before starting measurement after program start"),
OPT_CALLBACK_NOOPT(0, "metric-only", &stat_config.metric_only, NULL,
"Only print computed metrics. No raw values", enable_metric_only),
+ OPT_BOOLEAN(0, "metric-no-group", &stat_config.metric_no_group,
+ "don't group metric events, impacts multiplexing"),
+ OPT_BOOLEAN(0, "metric-no-merge", &stat_config.metric_no_merge,
+ "don't try to share events between metrics in a group"),
OPT_BOOLEAN(0, "topdown", &topdown_run,
"measure topdown level 1 statistics"),
OPT_BOOLEAN(0, "smi-cost", &smi_cost,
@@ -929,6 +1062,15 @@ static struct option stat_options[] = {
OPT_BOOLEAN_FLAG(0, "all-user", &stat_config.all_user,
"Configure all used events to run in user space.",
PARSE_OPT_EXCLUSIVE),
+ OPT_BOOLEAN(0, "percore-show-thread", &stat_config.percore_show_thread,
+ "Use with 'percore' event qualifier to show the event "
+ "counts of one hardware thread by sum up total hardware "
+ "threads of same physical core"),
+#ifdef HAVE_LIBPFM
+ OPT_CALLBACK(0, "pfm-events", &evsel_list, "event",
+ "libpfm4 event selector. use 'perf list' to list available events",
+ parse_libpfm_events_option),
+#endif
OPT_END()
};
@@ -1436,6 +1578,8 @@ static int add_default_attributes(void)
struct option opt = { .value = &evsel_list };
return metricgroup__parse_groups(&opt, "transaction",
+ stat_config.metric_no_group,
+ stat_config.metric_no_merge,
&stat_config.metric_events);
}
@@ -1731,35 +1875,6 @@ int process_cpu_map_event(struct perf_session *session,
return set_maps(st);
}
-static int runtime_stat_new(struct perf_stat_config *config, int nthreads)
-{
- int i;
-
- config->stats = calloc(nthreads, sizeof(struct runtime_stat));
- if (!config->stats)
- return -1;
-
- config->stats_num = nthreads;
-
- for (i = 0; i < nthreads; i++)
- runtime_stat__init(&config->stats[i]);
-
- return 0;
-}
-
-static void runtime_stat_delete(struct perf_stat_config *config)
-{
- int i;
-
- if (!config->stats)
- return;
-
- for (i = 0; i < config->stats_num; i++)
- runtime_stat__exit(&config->stats[i]);
-
- zfree(&config->stats);
-}
-
static const char * const stat_report_usage[] = {
"perf stat report [<options>]",
NULL,
@@ -2051,6 +2166,8 @@ int cmd_stat(int argc, const char **argv)
goto out;
}
+ evlist__check_cpu_maps(evsel_list);
+
/*
* Initialize thread_map with comm names,
* so we could print it out on output.
@@ -2141,7 +2258,7 @@ int cmd_stat(int argc, const char **argv)
}
}
- if (!forever && status != -1 && !interval)
+ if (!forever && status != -1 && (!interval || stat_config.summary))
print_counters(NULL, argc, argv);
if (STAT_RECORD) {
diff --git a/tools/perf/builtin-timechart.c b/tools/perf/builtin-timechart.c
index 9e84fae9b096..4e380e7b5230 100644
--- a/tools/perf/builtin-timechart.c
+++ b/tools/perf/builtin-timechart.c
@@ -128,7 +128,7 @@ struct sample_wrapper {
struct sample_wrapper *next;
u64 timestamp;
- unsigned char data[0];
+ unsigned char data[];
};
#define TYPE_NONE 0
@@ -579,8 +579,8 @@ process_sample_cpu_idle(struct timechart *tchart __maybe_unused,
struct perf_sample *sample,
const char *backtrace __maybe_unused)
{
- u32 state = perf_evsel__intval(evsel, sample, "state");
- u32 cpu_id = perf_evsel__intval(evsel, sample, "cpu_id");
+ u32 state = evsel__intval(evsel, sample, "state");
+ u32 cpu_id = evsel__intval(evsel, sample, "cpu_id");
if (state == (u32)PWR_EVENT_EXIT)
c_state_end(tchart, cpu_id, sample->time);
@@ -595,8 +595,8 @@ process_sample_cpu_frequency(struct timechart *tchart,
struct perf_sample *sample,
const char *backtrace __maybe_unused)
{
- u32 state = perf_evsel__intval(evsel, sample, "state");
- u32 cpu_id = perf_evsel__intval(evsel, sample, "cpu_id");
+ u32 state = evsel__intval(evsel, sample, "state");
+ u32 cpu_id = evsel__intval(evsel, sample, "cpu_id");
p_state_change(tchart, cpu_id, sample->time, state);
return 0;
@@ -608,9 +608,9 @@ process_sample_sched_wakeup(struct timechart *tchart,
struct perf_sample *sample,
const char *backtrace)
{
- u8 flags = perf_evsel__intval(evsel, sample, "common_flags");
- int waker = perf_evsel__intval(evsel, sample, "common_pid");
- int wakee = perf_evsel__intval(evsel, sample, "pid");
+ u8 flags = evsel__intval(evsel, sample, "common_flags");
+ int waker = evsel__intval(evsel, sample, "common_pid");
+ int wakee = evsel__intval(evsel, sample, "pid");
sched_wakeup(tchart, sample->cpu, sample->time, waker, wakee, flags, backtrace);
return 0;
@@ -622,9 +622,9 @@ process_sample_sched_switch(struct timechart *tchart,
struct perf_sample *sample,
const char *backtrace)
{
- int prev_pid = perf_evsel__intval(evsel, sample, "prev_pid");
- int next_pid = perf_evsel__intval(evsel, sample, "next_pid");
- u64 prev_state = perf_evsel__intval(evsel, sample, "prev_state");
+ int prev_pid = evsel__intval(evsel, sample, "prev_pid");
+ int next_pid = evsel__intval(evsel, sample, "next_pid");
+ u64 prev_state = evsel__intval(evsel, sample, "prev_state");
sched_switch(tchart, sample->cpu, sample->time, prev_pid, next_pid,
prev_state, backtrace);
@@ -638,8 +638,8 @@ process_sample_power_start(struct timechart *tchart __maybe_unused,
struct perf_sample *sample,
const char *backtrace __maybe_unused)
{
- u64 cpu_id = perf_evsel__intval(evsel, sample, "cpu_id");
- u64 value = perf_evsel__intval(evsel, sample, "value");
+ u64 cpu_id = evsel__intval(evsel, sample, "cpu_id");
+ u64 value = evsel__intval(evsel, sample, "value");
c_state_start(cpu_id, sample->time, value);
return 0;
@@ -661,8 +661,8 @@ process_sample_power_frequency(struct timechart *tchart,
struct perf_sample *sample,
const char *backtrace __maybe_unused)
{
- u64 cpu_id = perf_evsel__intval(evsel, sample, "cpu_id");
- u64 value = perf_evsel__intval(evsel, sample, "value");
+ u64 cpu_id = evsel__intval(evsel, sample, "cpu_id");
+ u64 value = evsel__intval(evsel, sample, "value");
p_state_change(tchart, cpu_id, sample->time, value);
return 0;
@@ -843,7 +843,7 @@ process_enter_read(struct timechart *tchart,
struct evsel *evsel,
struct perf_sample *sample)
{
- long fd = perf_evsel__intval(evsel, sample, "fd");
+ long fd = evsel__intval(evsel, sample, "fd");
return pid_begin_io_sample(tchart, sample->tid, IOTYPE_READ,
sample->time, fd);
}
@@ -853,7 +853,7 @@ process_exit_read(struct timechart *tchart,
struct evsel *evsel,
struct perf_sample *sample)
{
- long ret = perf_evsel__intval(evsel, sample, "ret");
+ long ret = evsel__intval(evsel, sample, "ret");
return pid_end_io_sample(tchart, sample->tid, IOTYPE_READ,
sample->time, ret);
}
@@ -863,7 +863,7 @@ process_enter_write(struct timechart *tchart,
struct evsel *evsel,
struct perf_sample *sample)
{
- long fd = perf_evsel__intval(evsel, sample, "fd");
+ long fd = evsel__intval(evsel, sample, "fd");
return pid_begin_io_sample(tchart, sample->tid, IOTYPE_WRITE,
sample->time, fd);
}
@@ -873,7 +873,7 @@ process_exit_write(struct timechart *tchart,
struct evsel *evsel,
struct perf_sample *sample)
{
- long ret = perf_evsel__intval(evsel, sample, "ret");
+ long ret = evsel__intval(evsel, sample, "ret");
return pid_end_io_sample(tchart, sample->tid, IOTYPE_WRITE,
sample->time, ret);
}
@@ -883,7 +883,7 @@ process_enter_sync(struct timechart *tchart,
struct evsel *evsel,
struct perf_sample *sample)
{
- long fd = perf_evsel__intval(evsel, sample, "fd");
+ long fd = evsel__intval(evsel, sample, "fd");
return pid_begin_io_sample(tchart, sample->tid, IOTYPE_SYNC,
sample->time, fd);
}
@@ -893,7 +893,7 @@ process_exit_sync(struct timechart *tchart,
struct evsel *evsel,
struct perf_sample *sample)
{
- long ret = perf_evsel__intval(evsel, sample, "ret");
+ long ret = evsel__intval(evsel, sample, "ret");
return pid_end_io_sample(tchart, sample->tid, IOTYPE_SYNC,
sample->time, ret);
}
@@ -903,7 +903,7 @@ process_enter_tx(struct timechart *tchart,
struct evsel *evsel,
struct perf_sample *sample)
{
- long fd = perf_evsel__intval(evsel, sample, "fd");
+ long fd = evsel__intval(evsel, sample, "fd");
return pid_begin_io_sample(tchart, sample->tid, IOTYPE_TX,
sample->time, fd);
}
@@ -913,7 +913,7 @@ process_exit_tx(struct timechart *tchart,
struct evsel *evsel,
struct perf_sample *sample)
{
- long ret = perf_evsel__intval(evsel, sample, "ret");
+ long ret = evsel__intval(evsel, sample, "ret");
return pid_end_io_sample(tchart, sample->tid, IOTYPE_TX,
sample->time, ret);
}
@@ -923,7 +923,7 @@ process_enter_rx(struct timechart *tchart,
struct evsel *evsel,
struct perf_sample *sample)
{
- long fd = perf_evsel__intval(evsel, sample, "fd");
+ long fd = evsel__intval(evsel, sample, "fd");
return pid_begin_io_sample(tchart, sample->tid, IOTYPE_RX,
sample->time, fd);
}
@@ -933,7 +933,7 @@ process_exit_rx(struct timechart *tchart,
struct evsel *evsel,
struct perf_sample *sample)
{
- long ret = perf_evsel__intval(evsel, sample, "ret");
+ long ret = evsel__intval(evsel, sample, "ret");
return pid_end_io_sample(tchart, sample->tid, IOTYPE_RX,
sample->time, ret);
}
@@ -943,7 +943,7 @@ process_enter_poll(struct timechart *tchart,
struct evsel *evsel,
struct perf_sample *sample)
{
- long fd = perf_evsel__intval(evsel, sample, "fd");
+ long fd = evsel__intval(evsel, sample, "fd");
return pid_begin_io_sample(tchart, sample->tid, IOTYPE_POLL,
sample->time, fd);
}
@@ -953,7 +953,7 @@ process_exit_poll(struct timechart *tchart,
struct evsel *evsel,
struct perf_sample *sample)
{
- long ret = perf_evsel__intval(evsel, sample, "ret");
+ long ret = evsel__intval(evsel, sample, "ret");
return pid_end_io_sample(tchart, sample->tid, IOTYPE_POLL,
sample->time, ret);
}
diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c
index d2539b793f9d..13889d73f8dd 100644
--- a/tools/perf/builtin-top.c
+++ b/tools/perf/builtin-top.c
@@ -33,6 +33,7 @@
#include "util/map.h"
#include "util/mmap.h"
#include "util/session.h"
+#include "util/thread.h"
#include "util/symbol.h"
#include "util/synthetic-events.h"
#include "util/top.h"
@@ -52,6 +53,7 @@
#include "util/debug.h"
#include "util/ordered-events.h"
+#include "util/pfm.h"
#include <assert.h>
#include <elf.h>
@@ -254,7 +256,7 @@ static void perf_top__show_details(struct perf_top *top)
if (notes->src == NULL)
goto out_unlock;
- printf("Showing %s for %s\n", perf_evsel__name(top->sym_evsel), symbol->name);
+ printf("Showing %s for %s\n", evsel__name(top->sym_evsel), symbol->name);
printf(" Events Pcnt (>=%d%%)\n", top->annotation_opts.min_pcnt);
more = symbol__annotate_printf(&he->ms, top->sym_evsel, &top->annotation_opts);
@@ -297,8 +299,7 @@ static void perf_top__resort_hists(struct perf_top *t)
hists__collapse_resort(hists, NULL);
/* Non-group events are considered as leader */
- if (symbol_conf.event_group &&
- !perf_evsel__is_group_leader(pos)) {
+ if (symbol_conf.event_group && !evsel__is_group_leader(pos)) {
struct hists *leader_hists = evsel__hists(pos->leader);
hists__match(leader_hists, hists);
@@ -307,7 +308,7 @@ static void perf_top__resort_hists(struct perf_top *t)
}
evlist__for_each_entry(evlist, pos) {
- perf_evsel__output_resort(pos, NULL);
+ evsel__output_resort(pos, NULL);
}
}
@@ -441,7 +442,7 @@ static void perf_top__print_mapped_keys(struct perf_top *top)
fprintf(stdout, "\t[e] display entries (lines). \t(%d)\n", top->print_entries);
if (top->evlist->core.nr_entries > 1)
- fprintf(stdout, "\t[E] active event counter. \t(%s)\n", perf_evsel__name(top->sym_evsel));
+ fprintf(stdout, "\t[E] active event counter. \t(%s)\n", evsel__name(top->sym_evsel));
fprintf(stdout, "\t[f] profile display filter (count). \t(%d)\n", top->count_filter);
@@ -528,13 +529,13 @@ static bool perf_top__handle_keypress(struct perf_top *top, int c)
fprintf(stderr, "\nAvailable events:");
evlist__for_each_entry(top->evlist, top->sym_evsel)
- fprintf(stderr, "\n\t%d %s", top->sym_evsel->idx, perf_evsel__name(top->sym_evsel));
+ fprintf(stderr, "\n\t%d %s", top->sym_evsel->idx, evsel__name(top->sym_evsel));
prompt_integer(&counter, "Enter details event counter");
if (counter >= top->evlist->core.nr_entries) {
top->sym_evsel = evlist__first(top->evlist);
- fprintf(stderr, "Sorry, no such event, using %s.\n", perf_evsel__name(top->sym_evsel));
+ fprintf(stderr, "Sorry, no such event, using %s.\n", evsel__name(top->sym_evsel));
sleep(1);
break;
}
@@ -616,6 +617,7 @@ static void *display_thread_tui(void *arg)
.arg = top,
.refresh = top->delay_secs,
};
+ int ret;
/* In order to read symbols from other namespaces perf to needs to call
* setns(2). This isn't permitted if the struct_fs has multiple users.
@@ -626,6 +628,7 @@ static void *display_thread_tui(void *arg)
prctl(PR_SET_NAME, "perf-top-UI", 0, 0, 0);
+repeat:
perf_top__sort_new_samples(top);
/*
@@ -638,13 +641,18 @@ static void *display_thread_tui(void *arg)
hists->uid_filter_str = top->record_opts.target.uid_str;
}
- perf_evlist__tui_browse_hists(top->evlist, help, &hbt,
+ ret = perf_evlist__tui_browse_hists(top->evlist, help, &hbt,
top->min_percent,
&top->session->header.env,
!top->record_opts.overwrite,
&top->annotation_opts);
- stop_top();
+ if (ret == K_RELOAD) {
+ top->zero = true;
+ goto repeat;
+ } else
+ stop_top();
+
return NULL;
}
@@ -768,6 +776,9 @@ static void perf_event__process_sample(struct perf_tool *tool,
if (machine__resolve(machine, &al, sample) < 0)
return;
+ if (top->stitch_lbr)
+ al.thread->lbr_stitch_enable = true;
+
if (!machine->kptr_restrict_warned &&
symbol_conf.kptr_restrict &&
al.cpumode == PERF_RECORD_MISC_KERNEL) {
@@ -939,7 +950,7 @@ static int perf_top__overwrite_check(struct perf_top *top)
{
struct record_opts *opts = &top->record_opts;
struct evlist *evlist = top->evlist;
- struct perf_evsel_config_term *term;
+ struct evsel_config_term *term;
struct list_head *config_terms;
struct evsel *evsel;
int set, overwrite = -1;
@@ -948,7 +959,7 @@ static int perf_top__overwrite_check(struct perf_top *top)
set = -1;
config_terms = &evsel->config_terms;
list_for_each_entry(term, config_terms, list) {
- if (term->type == PERF_EVSEL__CONFIG_TERM_OVERWRITE)
+ if (term->type == EVSEL__CONFIG_TERM_OVERWRITE)
set = term->val.overwrite ? 1 : 0;
}
@@ -1035,14 +1046,13 @@ try_again:
perf_top_overwrite_fallback(top, counter))
goto try_again;
- if (perf_evsel__fallback(counter, errno, msg, sizeof(msg))) {
+ if (evsel__fallback(counter, errno, msg, sizeof(msg))) {
if (verbose > 0)
ui__warning("%s\n", msg);
goto try_again;
}
- perf_evsel__open_strerror(counter, &opts->target,
- errno, msg, sizeof(msg));
+ evsel__open_strerror(counter, &opts->target, errno, msg, sizeof(msg));
ui__error("%s\n", msg);
goto out_err;
}
@@ -1246,6 +1256,14 @@ static int __cmd_top(struct perf_top *top)
if (opts->record_namespaces)
top->tool.namespace_events = true;
+ if (opts->record_cgroup) {
+#ifdef HAVE_FILE_HANDLE
+ top->tool.cgroup_events = true;
+#else
+ pr_err("cgroup tracking is not supported.\n");
+ return -1;
+#endif
+ }
ret = perf_event__synthesize_bpf_events(top->session, perf_event__process,
&top->session->machines.host,
@@ -1253,6 +1271,11 @@ static int __cmd_top(struct perf_top *top)
if (ret < 0)
pr_debug("Couldn't synthesize BPF events: Pre-existing BPF programs won't have symbols resolved.\n");
+ ret = perf_event__synthesize_cgroups(&top->tool, perf_event__process,
+ &top->session->machines.host);
+ if (ret < 0)
+ pr_debug("Couldn't synthesize cgroup events.\n");
+
machine__synthesize_threads(&top->session->machines.host, &opts->target,
top->evlist->core.threads, false,
top->nr_threads_synthesize);
@@ -1545,10 +1568,22 @@ int cmd_top(int argc, const char **argv)
"number of thread to run event synthesize"),
OPT_BOOLEAN(0, "namespaces", &opts->record_namespaces,
"Record namespaces events"),
+ OPT_BOOLEAN(0, "all-cgroups", &opts->record_cgroup,
+ "Record cgroup events"),
+ OPT_INTEGER(0, "group-sort-idx", &symbol_conf.group_sort_idx,
+ "Sort the output by the event at the index n in group. "
+ "If n is invalid, sort by the first event. "
+ "WARNING: should be used on grouped events."),
+ OPT_BOOLEAN(0, "stitch-lbr", &top.stitch_lbr,
+ "Enable LBR callgraph stitching approach"),
+#ifdef HAVE_LIBPFM
+ OPT_CALLBACK(0, "pfm-events", &top.evlist, "event",
+ "libpfm4 event selector. use 'perf list' to list available events",
+ parse_libpfm_events_option),
+#endif
OPTS_EVSWITCH(&top.evswitch),
OPT_END()
};
- struct evlist *sb_evlist = NULL;
const char * const top_usage[] = {
"perf top [<options>]",
NULL
@@ -1614,6 +1649,11 @@ int cmd_top(int argc, const char **argv)
}
}
+ if (top.stitch_lbr && !(callchain_param.record_mode == CALLCHAIN_LBR)) {
+ pr_err("Error: --stitch-lbr must be used with --call-graph lbr\n");
+ goto out_delete_evlist;
+ }
+
if (opts->branch_stack && callchain_param.enabled)
symbol_conf.show_branchflag_count = true;
@@ -1706,10 +1746,21 @@ int cmd_top(int argc, const char **argv)
goto out_delete_evlist;
}
- if (!top.record_opts.no_bpf_event)
- bpf_event__add_sb_event(&sb_evlist, &perf_env);
+ if (!top.record_opts.no_bpf_event) {
+ top.sb_evlist = evlist__new();
+
+ if (top.sb_evlist == NULL) {
+ pr_err("Couldn't create side band evlist.\n.");
+ goto out_delete_evlist;
+ }
+
+ if (evlist__add_bpf_sb_event(top.sb_evlist, &perf_env)) {
+ pr_err("Couldn't ask for PERF_RECORD_BPF_EVENT side band events.\n.");
+ goto out_delete_evlist;
+ }
+ }
- if (perf_evlist__start_sb_thread(sb_evlist, target)) {
+ if (perf_evlist__start_sb_thread(top.sb_evlist, target)) {
pr_debug("Couldn't start the BPF side band thread:\nBPF programs starting from now on won't be annotatable\n");
opts->no_bpf_event = true;
}
@@ -1717,7 +1768,7 @@ int cmd_top(int argc, const char **argv)
status = __cmd_top(&top);
if (!opts->no_bpf_event)
- perf_evlist__stop_sb_thread(sb_evlist);
+ perf_evlist__stop_sb_thread(top.sb_evlist);
out_delete_evlist:
evlist__delete(top.evlist);
diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c
index 01d542007c8b..4cbb64edc998 100644
--- a/tools/perf/builtin-trace.c
+++ b/tools/perf/builtin-trace.c
@@ -366,11 +366,9 @@ out_delete:
return NULL;
}
-static int perf_evsel__init_tp_uint_field(struct evsel *evsel,
- struct tp_field *field,
- const char *name)
+static int evsel__init_tp_uint_field(struct evsel *evsel, struct tp_field *field, const char *name)
{
- struct tep_format_field *format_field = perf_evsel__field(evsel, name);
+ struct tep_format_field *format_field = evsel__field(evsel, name);
if (format_field == NULL)
return -1;
@@ -380,13 +378,11 @@ static int perf_evsel__init_tp_uint_field(struct evsel *evsel,
#define perf_evsel__init_sc_tp_uint_field(evsel, name) \
({ struct syscall_tp *sc = __evsel__syscall_tp(evsel);\
- perf_evsel__init_tp_uint_field(evsel, &sc->name, #name); })
+ evsel__init_tp_uint_field(evsel, &sc->name, #name); })
-static int perf_evsel__init_tp_ptr_field(struct evsel *evsel,
- struct tp_field *field,
- const char *name)
+static int evsel__init_tp_ptr_field(struct evsel *evsel, struct tp_field *field, const char *name)
{
- struct tep_format_field *format_field = perf_evsel__field(evsel, name);
+ struct tep_format_field *format_field = evsel__field(evsel, name);
if (format_field == NULL)
return -1;
@@ -396,7 +392,7 @@ static int perf_evsel__init_tp_ptr_field(struct evsel *evsel,
#define perf_evsel__init_sc_tp_ptr_field(evsel, name) \
({ struct syscall_tp *sc = __evsel__syscall_tp(evsel);\
- perf_evsel__init_tp_ptr_field(evsel, &sc->name, #name); })
+ evsel__init_tp_ptr_field(evsel, &sc->name, #name); })
static void evsel__delete_priv(struct evsel *evsel)
{
@@ -404,13 +400,13 @@ static void evsel__delete_priv(struct evsel *evsel)
evsel__delete(evsel);
}
-static int perf_evsel__init_syscall_tp(struct evsel *evsel)
+static int evsel__init_syscall_tp(struct evsel *evsel)
{
struct syscall_tp *sc = evsel__syscall_tp(evsel);
if (sc != NULL) {
- if (perf_evsel__init_tp_uint_field(evsel, &sc->id, "__syscall_nr") &&
- perf_evsel__init_tp_uint_field(evsel, &sc->id, "nr"))
+ if (evsel__init_tp_uint_field(evsel, &sc->id, "__syscall_nr") &&
+ evsel__init_tp_uint_field(evsel, &sc->id, "nr"))
return -ENOENT;
return 0;
}
@@ -418,14 +414,14 @@ static int perf_evsel__init_syscall_tp(struct evsel *evsel)
return -ENOMEM;
}
-static int perf_evsel__init_augmented_syscall_tp(struct evsel *evsel, struct evsel *tp)
+static int evsel__init_augmented_syscall_tp(struct evsel *evsel, struct evsel *tp)
{
struct syscall_tp *sc = evsel__syscall_tp(evsel);
if (sc != NULL) {
- struct tep_format_field *syscall_id = perf_evsel__field(tp, "id");
+ struct tep_format_field *syscall_id = evsel__field(tp, "id");
if (syscall_id == NULL)
- syscall_id = perf_evsel__field(tp, "__syscall_nr");
+ syscall_id = evsel__field(tp, "__syscall_nr");
if (syscall_id == NULL ||
__tp_field__init_uint(&sc->id, syscall_id->size, syscall_id->offset, evsel->needs_swap))
return -EINVAL;
@@ -436,21 +432,21 @@ static int perf_evsel__init_augmented_syscall_tp(struct evsel *evsel, struct evs
return -ENOMEM;
}
-static int perf_evsel__init_augmented_syscall_tp_args(struct evsel *evsel)
+static int evsel__init_augmented_syscall_tp_args(struct evsel *evsel)
{
struct syscall_tp *sc = __evsel__syscall_tp(evsel);
return __tp_field__init_ptr(&sc->args, sc->id.offset + sizeof(u64));
}
-static int perf_evsel__init_augmented_syscall_tp_ret(struct evsel *evsel)
+static int evsel__init_augmented_syscall_tp_ret(struct evsel *evsel)
{
struct syscall_tp *sc = __evsel__syscall_tp(evsel);
return __tp_field__init_uint(&sc->ret, sizeof(u64), sc->id.offset + sizeof(u64), evsel->needs_swap);
}
-static int perf_evsel__init_raw_syscall_tp(struct evsel *evsel, void *handler)
+static int evsel__init_raw_syscall_tp(struct evsel *evsel, void *handler)
{
if (evsel__syscall_tp(evsel) != NULL) {
if (perf_evsel__init_sc_tp_uint_field(evsel, id))
@@ -465,16 +461,16 @@ static int perf_evsel__init_raw_syscall_tp(struct evsel *evsel, void *handler)
static struct evsel *perf_evsel__raw_syscall_newtp(const char *direction, void *handler)
{
- struct evsel *evsel = perf_evsel__newtp("raw_syscalls", direction);
+ struct evsel *evsel = evsel__newtp("raw_syscalls", direction);
/* older kernel (e.g., RHEL6) use syscalls:{enter,exit} */
if (IS_ERR(evsel))
- evsel = perf_evsel__newtp("syscalls", direction);
+ evsel = evsel__newtp("syscalls", direction);
if (IS_ERR(evsel))
return NULL;
- if (perf_evsel__init_raw_syscall_tp(evsel, handler))
+ if (evsel__init_raw_syscall_tp(evsel, handler))
goto out_delete;
return evsel;
@@ -1752,12 +1748,26 @@ static int trace__read_syscall_info(struct trace *trace, int id)
struct syscall *sc;
const char *name = syscalltbl__name(trace->sctbl, id);
+#ifdef HAVE_SYSCALL_TABLE_SUPPORT
if (trace->syscalls.table == NULL) {
trace->syscalls.table = calloc(trace->sctbl->syscalls.max_id + 1, sizeof(*sc));
if (trace->syscalls.table == NULL)
return -ENOMEM;
}
+#else
+ if (id > trace->sctbl->syscalls.max_id || (id == 0 && trace->syscalls.table == NULL)) {
+ // When using libaudit we don't know beforehand what is the max syscall id
+ struct syscall *table = realloc(trace->syscalls.table, (id + 1) * sizeof(*sc));
+
+ if (table == NULL)
+ return -ENOMEM;
+
+ memset(table + trace->sctbl->syscalls.max_id, 0, (id - trace->sctbl->syscalls.max_id) * sizeof(*sc));
+ trace->syscalls.table = table;
+ trace->sctbl->syscalls.max_id = id;
+ }
+#endif
sc = trace->syscalls.table + id;
if (sc->nonexistent)
return 0;
@@ -1801,7 +1811,7 @@ static int trace__read_syscall_info(struct trace *trace, int id)
return syscall__set_arg_fmts(sc);
}
-static int perf_evsel__init_tp_arg_scnprintf(struct evsel *evsel)
+static int evsel__init_tp_arg_scnprintf(struct evsel *evsel)
{
struct syscall_arg_fmt *fmt = evsel__syscall_arg_fmt(evsel);
@@ -2074,15 +2084,27 @@ static struct syscall *trace__syscall_info(struct trace *trace,
if (verbose > 1) {
static u64 n;
fprintf(trace->output, "Invalid syscall %d id, skipping (%s, %" PRIu64 ") ...\n",
- id, perf_evsel__name(evsel), ++n);
+ id, evsel__name(evsel), ++n);
}
return NULL;
}
err = -EINVAL;
- if (id > trace->sctbl->syscalls.max_id)
+#ifdef HAVE_SYSCALL_TABLE_SUPPORT
+ if (id > trace->sctbl->syscalls.max_id) {
+#else
+ if (id >= trace->sctbl->syscalls.max_id) {
+ /*
+ * With libaudit we don't know beforehand what is the max_id,
+ * so we let trace__read_syscall_info() figure that out as we
+ * go on reading syscalls.
+ */
+ err = trace__read_syscall_info(trace, id);
+ if (err)
+#endif
goto out_cant_read;
+ }
if ((trace->syscalls.table == NULL || trace->syscalls.table[id].name == NULL) &&
(err = trace__read_syscall_info(trace, id)) != 0)
@@ -2206,7 +2228,7 @@ static int trace__fprintf_sample(struct trace *trace, struct evsel *evsel,
double ts = (double)sample->time / NSEC_PER_MSEC;
printed += fprintf(trace->output, "%22s %10.3f %s %d/%d [%d]\n",
- perf_evsel__name(evsel), ts,
+ evsel__name(evsel), ts,
thread__comm_str(thread),
sample->pid, sample->tid, sample->cpu);
}
@@ -2382,7 +2404,7 @@ static int trace__fprintf_callchain(struct trace *trace, struct perf_sample *sam
static const char *errno_to_name(struct evsel *evsel, int err)
{
- struct perf_env *env = perf_evsel__env(evsel);
+ struct perf_env *env = evsel__env(evsel);
const char *arch_name = perf_env__arch(env);
return arch_syscalls__strerrno(arch_name, err);
@@ -2513,7 +2535,7 @@ errno_print: {
if (callchain_ret > 0)
trace__fprintf_callchain(trace, sample);
else if (callchain_ret < 0)
- pr_err("Problem processing %s callchain, skipping...\n", perf_evsel__name(evsel));
+ pr_err("Problem processing %s callchain, skipping...\n", evsel__name(evsel));
out:
ttrace->entry_pending = false;
err = 0;
@@ -2531,7 +2553,7 @@ static int trace__vfs_getname(struct trace *trace, struct evsel *evsel,
size_t filename_len, entry_str_len, to_move;
ssize_t remaining_space;
char *pos;
- const char *filename = perf_evsel__rawptr(evsel, sample, "pathname");
+ const char *filename = evsel__rawptr(evsel, sample, "pathname");
if (!thread)
goto out;
@@ -2587,7 +2609,7 @@ static int trace__sched_stat_runtime(struct trace *trace, struct evsel *evsel,
union perf_event *event __maybe_unused,
struct perf_sample *sample)
{
- u64 runtime = perf_evsel__intval(evsel, sample, "runtime");
+ u64 runtime = evsel__intval(evsel, sample, "runtime");
double runtime_ms = (double)runtime / NSEC_PER_MSEC;
struct thread *thread = machine__findnew_thread(trace->host,
sample->pid,
@@ -2606,10 +2628,10 @@ out_put:
out_dump:
fprintf(trace->output, "%s: comm=%s,pid=%u,runtime=%" PRIu64 ",vruntime=%" PRIu64 ")\n",
evsel->name,
- perf_evsel__strval(evsel, sample, "comm"),
- (pid_t)perf_evsel__intval(evsel, sample, "pid"),
+ evsel__strval(evsel, sample, "comm"),
+ (pid_t)evsel__intval(evsel, sample, "pid"),
runtime,
- perf_evsel__intval(evsel, sample, "vruntime"));
+ evsel__intval(evsel, sample, "vruntime"));
goto out_put;
}
@@ -2774,7 +2796,7 @@ static int trace__event_handler(struct trace *trace, struct evsel *evsel,
fprintf(trace->output, "%s(", evsel->name);
- if (perf_evsel__is_bpf_output(evsel)) {
+ if (evsel__is_bpf_output(evsel)) {
bpf_output__fprintf(trace, sample);
} else if (evsel->tp_format) {
if (strncmp(evsel->tp_format->name, "sys_enter_", 10) ||
@@ -2795,7 +2817,7 @@ newline:
if (callchain_ret > 0)
trace__fprintf_callchain(trace, sample);
else if (callchain_ret < 0)
- pr_err("Problem processing %s callchain, skipping...\n", perf_evsel__name(evsel));
+ pr_err("Problem processing %s callchain, skipping...\n", evsel__name(evsel));
++trace->nr_events_printed;
@@ -2890,7 +2912,7 @@ static int trace__pgfault(struct trace *trace,
if (callchain_ret > 0)
trace__fprintf_callchain(trace, sample);
else if (callchain_ret < 0)
- pr_err("Problem processing %s callchain, skipping...\n", perf_evsel__name(evsel));
+ pr_err("Problem processing %s callchain, skipping...\n", evsel__name(evsel));
++trace->nr_events_printed;
out:
@@ -3032,10 +3054,10 @@ static bool evlist__add_vfs_getname(struct evlist *evlist)
}
evlist__for_each_entry_safe(evlist, evsel, tmp) {
- if (!strstarts(perf_evsel__name(evsel), "probe:vfs_getname"))
+ if (!strstarts(evsel__name(evsel), "probe:vfs_getname"))
continue;
- if (perf_evsel__field(evsel, "pathname")) {
+ if (evsel__field(evsel, "pathname")) {
evsel->handler = trace__vfs_getname;
found = true;
continue;
@@ -3049,7 +3071,7 @@ static bool evlist__add_vfs_getname(struct evlist *evlist)
return found;
}
-static struct evsel *perf_evsel__new_pgfault(u64 config)
+static struct evsel *evsel__new_pgfault(u64 config)
{
struct evsel *evsel;
struct perf_event_attr attr = {
@@ -3093,7 +3115,7 @@ static void trace__handle_event(struct trace *trace, union perf_event *event, st
if (evsel->core.attr.type == PERF_TYPE_TRACEPOINT &&
sample->raw_data == NULL) {
fprintf(trace->output, "%s sample with no payload for tid: %d, cpu %d, raw_size=%d, skipping...\n",
- perf_evsel__name(evsel), sample->tid,
+ evsel__name(evsel), sample->tid,
sample->cpu, sample->raw_size);
} else {
tracepoint_handler handler = evsel->handler;
@@ -3124,8 +3146,8 @@ static int trace__add_syscall_newtp(struct trace *trace)
if (perf_evsel__init_sc_tp_uint_field(sys_exit, ret))
goto out_delete_sys_exit;
- perf_evsel__config_callchain(sys_enter, &trace->opts, &callchain_param);
- perf_evsel__config_callchain(sys_exit, &trace->opts, &callchain_param);
+ evsel__config_callchain(sys_enter, &trace->opts, &callchain_param);
+ evsel__config_callchain(sys_exit, &trace->opts, &callchain_param);
evlist__add(evlist, sys_enter);
evlist__add(evlist, sys_exit);
@@ -3164,10 +3186,9 @@ static int trace__set_ev_qualifier_tp_filter(struct trace *trace)
if (filter == NULL)
goto out_enomem;
- if (!perf_evsel__append_tp_filter(trace->syscalls.events.sys_enter,
- filter)) {
+ if (!evsel__append_tp_filter(trace->syscalls.events.sys_enter, filter)) {
sys_exit = trace->syscalls.events.sys_exit;
- err = perf_evsel__append_tp_filter(sys_exit, filter);
+ err = evsel__append_tp_filter(sys_exit, filter);
}
free(filter);
@@ -3179,6 +3200,26 @@ out_enomem:
}
#ifdef HAVE_LIBBPF_SUPPORT
+static struct bpf_map *trace__find_bpf_map_by_name(struct trace *trace, const char *name)
+{
+ if (trace->bpf_obj == NULL)
+ return NULL;
+
+ return bpf_object__find_map_by_name(trace->bpf_obj, name);
+}
+
+static void trace__set_bpf_map_filtered_pids(struct trace *trace)
+{
+ trace->filter_pids.map = trace__find_bpf_map_by_name(trace, "pids_filtered");
+}
+
+static void trace__set_bpf_map_syscalls(struct trace *trace)
+{
+ trace->syscalls.map = trace__find_bpf_map_by_name(trace, "syscalls");
+ trace->syscalls.prog_array.sys_enter = trace__find_bpf_map_by_name(trace, "syscalls_sys_enter");
+ trace->syscalls.prog_array.sys_exit = trace__find_bpf_map_by_name(trace, "syscalls_sys_exit");
+}
+
static struct bpf_program *trace__find_bpf_program_by_title(struct trace *trace, const char *name)
{
if (trace->bpf_obj == NULL)
@@ -3517,6 +3558,20 @@ static void trace__delete_augmented_syscalls(struct trace *trace)
trace->bpf_obj = NULL;
}
#else // HAVE_LIBBPF_SUPPORT
+static struct bpf_map *trace__find_bpf_map_by_name(struct trace *trace __maybe_unused,
+ const char *name __maybe_unused)
+{
+ return NULL;
+}
+
+static void trace__set_bpf_map_filtered_pids(struct trace *trace __maybe_unused)
+{
+}
+
+static void trace__set_bpf_map_syscalls(struct trace *trace __maybe_unused)
+{
+}
+
static int trace__set_ev_qualifier_bpf_filter(struct trace *trace __maybe_unused)
{
return 0;
@@ -3695,7 +3750,7 @@ static int ordered_events__deliver_event(struct ordered_events *oe,
return __trace__deliver_event(trace, event->event);
}
-static struct syscall_arg_fmt *perf_evsel__syscall_arg_fmt(struct evsel *evsel, char *arg)
+static struct syscall_arg_fmt *evsel__find_syscall_arg_fmt_by_name(struct evsel *evsel, char *arg)
{
struct tep_format_field *field;
struct syscall_arg_fmt *fmt = __evsel__syscall_arg_fmt(evsel);
@@ -3750,7 +3805,7 @@ static int trace__expand_filter(struct trace *trace __maybe_unused, struct evsel
scnprintf(arg, sizeof(arg), "%.*s", left_size, left);
- fmt = perf_evsel__syscall_arg_fmt(evsel, arg);
+ fmt = evsel__find_syscall_arg_fmt_by_name(evsel, arg);
if (fmt == NULL) {
pr_err("\"%s\" not found in \"%s\", can't set filter \"%s\"\n",
arg, evsel->name, evsel->filter);
@@ -3801,7 +3856,7 @@ static int trace__expand_filter(struct trace *trace __maybe_unused, struct evsel
if (new_filter != evsel->filter) {
pr_debug("New filter for %s: %s\n", evsel->name, new_filter);
- perf_evsel__set_filter(evsel, new_filter);
+ evsel__set_filter(evsel, new_filter);
free(new_filter);
}
@@ -3846,18 +3901,18 @@ static int trace__run(struct trace *trace, int argc, const char **argv)
}
if ((trace->trace_pgfaults & TRACE_PFMAJ)) {
- pgfault_maj = perf_evsel__new_pgfault(PERF_COUNT_SW_PAGE_FAULTS_MAJ);
+ pgfault_maj = evsel__new_pgfault(PERF_COUNT_SW_PAGE_FAULTS_MAJ);
if (pgfault_maj == NULL)
goto out_error_mem;
- perf_evsel__config_callchain(pgfault_maj, &trace->opts, &callchain_param);
+ evsel__config_callchain(pgfault_maj, &trace->opts, &callchain_param);
evlist__add(evlist, pgfault_maj);
}
if ((trace->trace_pgfaults & TRACE_PFMIN)) {
- pgfault_min = perf_evsel__new_pgfault(PERF_COUNT_SW_PAGE_FAULTS_MIN);
+ pgfault_min = evsel__new_pgfault(PERF_COUNT_SW_PAGE_FAULTS_MIN);
if (pgfault_min == NULL)
goto out_error_mem;
- perf_evsel__config_callchain(pgfault_min, &trace->opts, &callchain_param);
+ evsel__config_callchain(pgfault_min, &trace->opts, &callchain_param);
evlist__add(evlist, pgfault_min);
}
@@ -4108,7 +4163,7 @@ out_error:
out_error_apply_filters:
fprintf(trace->output,
"Failed to set filter \"%s\" on event %s with %d (%s)\n",
- evsel->filter, perf_evsel__name(evsel), errno,
+ evsel->filter, evsel__name(evsel), errno,
str_error_r(errno, errbuf, sizeof(errbuf)));
goto out_delete_evlist;
}
@@ -4179,7 +4234,7 @@ static int trace__replay(struct trace *trace)
"syscalls:sys_enter");
if (evsel &&
- (perf_evsel__init_raw_syscall_tp(evsel, trace__sys_enter) < 0 ||
+ (evsel__init_raw_syscall_tp(evsel, trace__sys_enter) < 0 ||
perf_evsel__init_sc_tp_ptr_field(evsel, args))) {
pr_err("Error during initialize raw_syscalls:sys_enter event\n");
goto out;
@@ -4191,7 +4246,7 @@ static int trace__replay(struct trace *trace)
evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
"syscalls:sys_exit");
if (evsel &&
- (perf_evsel__init_raw_syscall_tp(evsel, trace__sys_exit) < 0 ||
+ (evsel__init_raw_syscall_tp(evsel, trace__sys_exit) < 0 ||
perf_evsel__init_sc_tp_uint_field(evsel, ret))) {
pr_err("Error during initialize raw_syscalls:sys_exit event\n");
goto out;
@@ -4471,11 +4526,11 @@ static int evlist__set_syscall_tp_fields(struct evlist *evlist)
continue;
if (strcmp(evsel->tp_format->system, "syscalls")) {
- perf_evsel__init_tp_arg_scnprintf(evsel);
+ evsel__init_tp_arg_scnprintf(evsel);
continue;
}
- if (perf_evsel__init_syscall_tp(evsel))
+ if (evsel__init_syscall_tp(evsel))
return -1;
if (!strncmp(evsel->tp_format->name, "sys_enter_", 10)) {
@@ -4605,26 +4660,6 @@ static int trace__parse_cgroups(const struct option *opt, const char *str, int u
return 0;
}
-static struct bpf_map *trace__find_bpf_map_by_name(struct trace *trace, const char *name)
-{
- if (trace->bpf_obj == NULL)
- return NULL;
-
- return bpf_object__find_map_by_name(trace->bpf_obj, name);
-}
-
-static void trace__set_bpf_map_filtered_pids(struct trace *trace)
-{
- trace->filter_pids.map = trace__find_bpf_map_by_name(trace, "pids_filtered");
-}
-
-static void trace__set_bpf_map_syscalls(struct trace *trace)
-{
- trace->syscalls.map = trace__find_bpf_map_by_name(trace, "syscalls");
- trace->syscalls.prog_array.sys_enter = trace__find_bpf_map_by_name(trace, "syscalls_sys_enter");
- trace->syscalls.prog_array.sys_exit = trace__find_bpf_map_by_name(trace, "syscalls_sys_exit");
-}
-
static int trace__config(const char *var, const char *value, void *arg)
{
struct trace *trace = arg;
@@ -4989,7 +5024,7 @@ int cmd_trace(int argc, const char **argv)
*/
if (trace.syscalls.events.augmented) {
evlist__for_each_entry(trace.evlist, evsel) {
- bool raw_syscalls_sys_exit = strcmp(perf_evsel__name(evsel), "raw_syscalls:sys_exit") == 0;
+ bool raw_syscalls_sys_exit = strcmp(evsel__name(evsel), "raw_syscalls:sys_exit") == 0;
if (raw_syscalls_sys_exit) {
trace.raw_augmented_syscalls = true;
@@ -4997,10 +5032,10 @@ int cmd_trace(int argc, const char **argv)
}
if (trace.syscalls.events.augmented->priv == NULL &&
- strstr(perf_evsel__name(evsel), "syscalls:sys_enter")) {
+ strstr(evsel__name(evsel), "syscalls:sys_enter")) {
struct evsel *augmented = trace.syscalls.events.augmented;
- if (perf_evsel__init_augmented_syscall_tp(augmented, evsel) ||
- perf_evsel__init_augmented_syscall_tp_args(augmented))
+ if (evsel__init_augmented_syscall_tp(augmented, evsel) ||
+ evsel__init_augmented_syscall_tp_args(augmented))
goto out;
/*
* Augmented is __augmented_syscalls__ BPF_OUTPUT event
@@ -5014,16 +5049,16 @@ int cmd_trace(int argc, const char **argv)
* as not to filter it, then we'll handle it just like we would
* for the BPF_OUTPUT one:
*/
- if (perf_evsel__init_augmented_syscall_tp(evsel, evsel) ||
- perf_evsel__init_augmented_syscall_tp_args(evsel))
+ if (evsel__init_augmented_syscall_tp(evsel, evsel) ||
+ evsel__init_augmented_syscall_tp_args(evsel))
goto out;
evsel->handler = trace__sys_enter;
}
- if (strstarts(perf_evsel__name(evsel), "syscalls:sys_exit_")) {
+ if (strstarts(evsel__name(evsel), "syscalls:sys_exit_")) {
struct syscall_tp *sc;
init_augmented_syscall_tp:
- if (perf_evsel__init_augmented_syscall_tp(evsel, evsel))
+ if (evsel__init_augmented_syscall_tp(evsel, evsel))
goto out;
sc = __evsel__syscall_tp(evsel);
/*
@@ -5047,7 +5082,7 @@ init_augmented_syscall_tp:
*/
if (trace.raw_augmented_syscalls)
trace.raw_augmented_syscalls_args_size = (6 + 1) * sizeof(long) + sc->id.offset;
- perf_evsel__init_augmented_syscall_tp_ret(evsel);
+ evsel__init_augmented_syscall_tp_ret(evsel);
evsel->handler = trace__sys_exit;
}
}
diff --git a/tools/perf/check-headers.sh b/tools/perf/check-headers.sh
index bfb21d049e6c..94c2bc22c2bb 100755
--- a/tools/perf/check-headers.sh
+++ b/tools/perf/check-headers.sh
@@ -22,7 +22,9 @@ include/uapi/linux/usbdevice_fs.h
include/uapi/linux/vhost.h
include/uapi/sound/asound.h
include/linux/bits.h
+include/vdso/bits.h
include/linux/const.h
+include/vdso/const.h
include/linux/hash.h
include/uapi/linux/hw_breakpoint.h
arch/x86/include/asm/disabled-features.h
@@ -115,6 +117,7 @@ check arch/x86/lib/memcpy_64.S '-I "^EXPORT_SYMBOL" -I "^#include <asm/ex
check arch/x86/lib/memset_64.S '-I "^EXPORT_SYMBOL" -I "^#include <asm/export.h>" -I"^SYM_FUNC_START\(_LOCAL\)*(memset_\(erms\|orig\))"'
check include/uapi/asm-generic/mman.h '-I "^#include <\(uapi/\)*asm-generic/mman-common\(-tools\)*.h>"'
check include/uapi/linux/mman.h '-I "^#include <\(uapi/\)*asm/mman.h>"'
+check include/linux/build_bug.h '-I "^#\(ifndef\|endif\)\( \/\/\)* static_assert$"'
check include/linux/ctype.h '-I "isdigit("'
check lib/ctype.c '-I "^EXPORT_SYMBOL" -I "^#include <linux/export.h>" -B'
check arch/x86/include/asm/inat.h '-I "^#include [\"<]\(asm/\)*inat_types.h[\">]"'
@@ -125,4 +128,8 @@ check arch/x86/lib/insn.c '-I "^#include [\"<]\(../include/\)*asm/in
# diff non-symmetric files
check_2 tools/perf/arch/x86/entry/syscalls/syscall_64.tbl arch/x86/entry/syscalls/syscall_64.tbl
+# check duplicated library files
+check_2 tools/perf/util/hashmap.h tools/lib/bpf/hashmap.h
+check_2 tools/perf/util/hashmap.c tools/lib/bpf/hashmap.c
+
cd tools/perf
diff --git a/tools/perf/design.txt b/tools/perf/design.txt
index 0453ba26cdbd..a42fab308ff6 100644
--- a/tools/perf/design.txt
+++ b/tools/perf/design.txt
@@ -258,7 +258,8 @@ gets schedule to. Per task counters can be created by any user, for
their own tasks.
A 'pid == -1' and 'cpu == x' counter is a per CPU counter that counts
-all events on CPU-x. Per CPU counters need CAP_SYS_ADMIN privilege.
+all events on CPU-x. Per CPU counters need CAP_PERFMON or CAP_SYS_ADMIN
+privilege.
The 'flags' parameter is currently unused and must be zero.
diff --git a/tools/perf/jvmti/libjvmti.c b/tools/perf/jvmti/libjvmti.c
index c441a34cb1c0..fcca275e5bf9 100644
--- a/tools/perf/jvmti/libjvmti.c
+++ b/tools/perf/jvmti/libjvmti.c
@@ -32,34 +32,41 @@ static void print_error(jvmtiEnv *jvmti, const char *msg, jvmtiError ret)
#ifdef HAVE_JVMTI_CMLR
static jvmtiError
-do_get_line_numbers(jvmtiEnv *jvmti, void *pc, jmethodID m, jint bci,
- jvmti_line_info_t *tab, jint *nr)
+do_get_line_number(jvmtiEnv *jvmti, void *pc, jmethodID m, jint bci,
+ jvmti_line_info_t *tab)
{
- jint i, lines = 0;
- jint nr_lines = 0;
+ jint i, nr_lines = 0;
jvmtiLineNumberEntry *loc_tab = NULL;
jvmtiError ret;
+ jint src_line = -1;
ret = (*jvmti)->GetLineNumberTable(jvmti, m, &nr_lines, &loc_tab);
- if (ret != JVMTI_ERROR_NONE) {
+ if (ret == JVMTI_ERROR_ABSENT_INFORMATION || ret == JVMTI_ERROR_NATIVE_METHOD) {
+ /* No debug information for this method */
+ return ret;
+ } else if (ret != JVMTI_ERROR_NONE) {
print_error(jvmti, "GetLineNumberTable", ret);
return ret;
}
- for (i = 0; i < nr_lines; i++) {
- if (loc_tab[i].start_location < bci) {
- tab[lines].pc = (unsigned long)pc;
- tab[lines].line_number = loc_tab[i].line_number;
- tab[lines].discrim = 0; /* not yet used */
- tab[lines].methodID = m;
- lines++;
- } else {
- break;
- }
+ for (i = 0; i < nr_lines && loc_tab[i].start_location <= bci; i++) {
+ src_line = i;
+ }
+
+ if (src_line != -1) {
+ tab->pc = (unsigned long)pc;
+ tab->line_number = loc_tab[src_line].line_number;
+ tab->discrim = 0; /* not yet used */
+ tab->methodID = m;
+
+ ret = JVMTI_ERROR_NONE;
+ } else {
+ ret = JVMTI_ERROR_ABSENT_INFORMATION;
}
+
(*jvmti)->Deallocate(jvmti, (unsigned char *)loc_tab);
- *nr = lines;
- return JVMTI_ERROR_NONE;
+
+ return ret;
}
static jvmtiError
@@ -67,9 +74,8 @@ get_line_numbers(jvmtiEnv *jvmti, const void *compile_info, jvmti_line_info_t **
{
const jvmtiCompiledMethodLoadRecordHeader *hdr;
jvmtiCompiledMethodLoadInlineRecord *rec;
- jvmtiLineNumberEntry *lne = NULL;
PCStackInfo *c;
- jint nr, ret;
+ jint ret;
int nr_total = 0;
int i, lines_total = 0;
@@ -82,21 +88,7 @@ get_line_numbers(jvmtiEnv *jvmti, const void *compile_info, jvmti_line_info_t **
for (hdr = compile_info; hdr != NULL; hdr = hdr->next) {
if (hdr->kind == JVMTI_CMLR_INLINE_INFO) {
rec = (jvmtiCompiledMethodLoadInlineRecord *)hdr;
- for (i = 0; i < rec->numpcs; i++) {
- c = rec->pcinfo + i;
- nr = 0;
- /*
- * unfortunately, need a tab to get the number of lines!
- */
- ret = (*jvmti)->GetLineNumberTable(jvmti, c->methods[0], &nr, &lne);
- if (ret == JVMTI_ERROR_NONE) {
- /* free what was allocated for nothing */
- (*jvmti)->Deallocate(jvmti, (unsigned char *)lne);
- nr_total += (int)nr;
- } else {
- print_error(jvmti, "GetLineNumberTable", ret);
- }
- }
+ nr_total += rec->numpcs;
}
}
@@ -115,14 +107,17 @@ get_line_numbers(jvmtiEnv *jvmti, const void *compile_info, jvmti_line_info_t **
rec = (jvmtiCompiledMethodLoadInlineRecord *)hdr;
for (i = 0; i < rec->numpcs; i++) {
c = rec->pcinfo + i;
- nr = 0;
- ret = do_get_line_numbers(jvmti, c->pc,
- c->methods[0],
- c->bcis[0],
- *tab + lines_total,
- &nr);
+ /*
+ * c->methods is the stack of inlined method calls
+ * at c->pc. [0] is the leaf method. Caller frames
+ * are ignored at the moment.
+ */
+ ret = do_get_line_number(jvmti, c->pc,
+ c->methods[0],
+ c->bcis[0],
+ *tab + lines_total);
if (ret == JVMTI_ERROR_NONE)
- lines_total += nr;
+ lines_total++;
}
}
}
@@ -246,8 +241,6 @@ compiled_method_load_cb(jvmtiEnv *jvmti,
char *class_sign = NULL;
char *func_name = NULL;
char *func_sign = NULL;
- char *file_name = NULL;
- char fn[PATH_MAX];
uint64_t addr = (uint64_t)(uintptr_t)code_addr;
jvmtiError ret;
int nr_lines = 0; /* in line_tab[] */
@@ -264,7 +257,9 @@ compiled_method_load_cb(jvmtiEnv *jvmti,
if (has_line_numbers && map && map_length) {
ret = get_line_numbers(jvmti, compile_info, &line_tab, &nr_lines);
if (ret != JVMTI_ERROR_NONE) {
- warnx("jvmti: cannot get line table for method");
+ if (ret != JVMTI_ERROR_NOT_FOUND) {
+ warnx("jvmti: cannot get line table for method");
+ }
nr_lines = 0;
} else if (nr_lines > 0) {
line_file_names = malloc(sizeof(char*) * nr_lines);
@@ -282,12 +277,6 @@ compiled_method_load_cb(jvmtiEnv *jvmti,
}
}
- ret = (*jvmti)->GetSourceFileName(jvmti, decl_class, &file_name);
- if (ret != JVMTI_ERROR_NONE) {
- print_error(jvmti, "GetSourceFileName", ret);
- goto error;
- }
-
ret = (*jvmti)->GetClassSignature(jvmti, decl_class,
&class_sign, NULL);
if (ret != JVMTI_ERROR_NONE) {
@@ -302,8 +291,6 @@ compiled_method_load_cb(jvmtiEnv *jvmti,
goto error;
}
- copy_class_filename(class_sign, file_name, fn, PATH_MAX);
-
/*
* write source line info record if we have it
*/
@@ -323,7 +310,6 @@ error:
(*jvmti)->Deallocate(jvmti, (unsigned char *)func_name);
(*jvmti)->Deallocate(jvmti, (unsigned char *)func_sign);
(*jvmti)->Deallocate(jvmti, (unsigned char *)class_sign);
- (*jvmti)->Deallocate(jvmti, (unsigned char *)file_name);
free(line_tab);
while (line_file_names && (nr_lines > 0)) {
if (line_file_names[nr_lines - 1]) {
diff --git a/tools/perf/pmu-events/arch/powerpc/power8/metrics.json b/tools/perf/pmu-events/arch/powerpc/power8/metrics.json
index bffb2d4a6420..fc4aa6c2ddc9 100644
--- a/tools/perf/pmu-events/arch/powerpc/power8/metrics.json
+++ b/tools/perf/pmu-events/arch/powerpc/power8/metrics.json
@@ -169,7 +169,7 @@
},
{
"BriefDescription": "Cycles GCT empty where dispatch was held",
- "MetricExpr": "(PM_GCT_NOSLOT_DISP_HELD_MAP + PM_GCT_NOSLOT_DISP_HELD_SRQ + PM_GCT_NOSLOT_DISP_HELD_ISSQ + PM_GCT_NOSLOT_DISP_HELD_OTHER) / PM_RUN_INST_CMPL)",
+ "MetricExpr": "(PM_GCT_NOSLOT_DISP_HELD_MAP + PM_GCT_NOSLOT_DISP_HELD_SRQ + PM_GCT_NOSLOT_DISP_HELD_ISSQ + PM_GCT_NOSLOT_DISP_HELD_OTHER) / PM_RUN_INST_CMPL",
"MetricGroup": "cpi_breakdown",
"MetricName": "gct_empty_disp_held_cpi"
},
diff --git a/tools/perf/pmu-events/arch/powerpc/power9/metrics.json b/tools/perf/pmu-events/arch/powerpc/power9/metrics.json
index 811c2a8c1c9e..80816d6402e9 100644
--- a/tools/perf/pmu-events/arch/powerpc/power9/metrics.json
+++ b/tools/perf/pmu-events/arch/powerpc/power9/metrics.json
@@ -208,6 +208,84 @@
"MetricName": "fxu_stall_cpi"
},
{
+ "BriefDescription": "Instruction Completion Table empty for this thread due to branch mispred",
+ "MetricExpr": "PM_ICT_NOSLOT_BR_MPRED/PM_RUN_INST_CMPL",
+ "MetricGroup": "cpi_breakdown",
+ "MetricName": "ict_noslot_br_mpred_cpi"
+ },
+ {
+ "BriefDescription": "Instruction Completion Table empty for this thread due to Icache Miss and branch mispred",
+ "MetricExpr": "PM_ICT_NOSLOT_BR_MPRED_ICMISS/PM_RUN_INST_CMPL",
+ "MetricGroup": "cpi_breakdown",
+ "MetricName": "ict_noslot_br_mpred_icmiss_cpi"
+ },
+ {
+ "BriefDescription": "Instruction Completion Table other stalls",
+ "MetricExpr": "(PM_ICT_NOSLOT_CYC - PM_ICT_NOSLOT_IC_MISS - PM_ICT_NOSLOT_BR_MPRED_ICMISS - PM_ICT_NOSLOT_BR_MPRED - PM_ICT_NOSLOT_DISP_HELD)/PM_RUN_INST_CMPL",
+ "MetricGroup": "cpi_breakdown",
+ "MetricName": "ict_noslot_cyc_other_cpi"
+ },
+ {
+ "BriefDescription": "Cycles in which the NTC instruciton is held at dispatch for any reason",
+ "MetricExpr": "PM_ICT_NOSLOT_DISP_HELD/PM_RUN_INST_CMPL",
+ "MetricGroup": "cpi_breakdown",
+ "MetricName": "ict_noslot_disp_held_cpi"
+ },
+ {
+ "BriefDescription": "Instruction Completion Table empty for this thread due to dispatch holds because the History Buffer was full. Could be GPR/VSR/VMR/FPR/CR/XVF",
+ "MetricExpr": "PM_ICT_NOSLOT_DISP_HELD_HB_FULL/PM_RUN_INST_CMPL",
+ "MetricGroup": "cpi_breakdown",
+ "MetricName": "ict_noslot_disp_held_hb_full_cpi"
+ },
+ {
+ "BriefDescription": "Instruction Completion Table empty for this thread due to dispatch hold on this thread due to Issue q full, BRQ full, XVCF Full, Count cache, Link, Tar full",
+ "MetricExpr": "PM_ICT_NOSLOT_DISP_HELD_ISSQ/PM_RUN_INST_CMPL",
+ "MetricGroup": "cpi_breakdown",
+ "MetricName": "ict_noslot_disp_held_issq_cpi"
+ },
+ {
+ "BriefDescription": "ICT_NOSLOT_DISP_HELD_OTHER_CPI",
+ "MetricExpr": "(PM_ICT_NOSLOT_DISP_HELD - PM_ICT_NOSLOT_DISP_HELD_HB_FULL - PM_ICT_NOSLOT_DISP_HELD_SYNC - PM_ICT_NOSLOT_DISP_HELD_TBEGIN - PM_ICT_NOSLOT_DISP_HELD_ISSQ)/PM_RUN_INST_CMPL",
+ "MetricGroup": "cpi_breakdown",
+ "MetricName": "ict_noslot_disp_held_other_cpi"
+ },
+ {
+ "BriefDescription": "Dispatch held due to a synchronizing instruction at dispatch",
+ "MetricExpr": "PM_ICT_NOSLOT_DISP_HELD_SYNC/PM_RUN_INST_CMPL",
+ "MetricGroup": "cpi_breakdown",
+ "MetricName": "ict_noslot_disp_held_sync_cpi"
+ },
+ {
+ "BriefDescription": "the NTC instruction is being held at dispatch because it is a tbegin instruction and there is an older tbegin in the pipeline that must complete before the younger tbegin can dispatch",
+ "MetricExpr": "PM_ICT_NOSLOT_DISP_HELD_TBEGIN/PM_RUN_INST_CMPL",
+ "MetricGroup": "cpi_breakdown",
+ "MetricName": "ict_noslot_disp_held_tbegin_cpi"
+ },
+ {
+ "BriefDescription": "ICT_NOSLOT_IC_L2_CPI",
+ "MetricExpr": "(PM_ICT_NOSLOT_IC_MISS - PM_ICT_NOSLOT_IC_L3 - PM_ICT_NOSLOT_IC_L3MISS)/PM_RUN_INST_CMPL",
+ "MetricGroup": "cpi_breakdown",
+ "MetricName": "ict_noslot_ic_l2_cpi"
+ },
+ {
+ "BriefDescription": "Instruction Completion Table empty for this thread due to icache misses that were sourced from the local L3",
+ "MetricExpr": "PM_ICT_NOSLOT_IC_L3/PM_RUN_INST_CMPL",
+ "MetricGroup": "cpi_breakdown",
+ "MetricName": "ict_noslot_ic_l3_cpi"
+ },
+ {
+ "BriefDescription": "Instruction Completion Table empty for this thread due to icache misses that were sourced from beyond the local L3. The source could be local/remote/distant memory or another core's cache",
+ "MetricExpr": "PM_ICT_NOSLOT_IC_L3MISS/PM_RUN_INST_CMPL",
+ "MetricGroup": "cpi_breakdown",
+ "MetricName": "ict_noslot_ic_l3miss_cpi"
+ },
+ {
+ "BriefDescription": "Instruction Completion Table empty for this thread due to Icache Miss",
+ "MetricExpr": "PM_ICT_NOSLOT_IC_MISS/PM_RUN_INST_CMPL",
+ "MetricGroup": "cpi_breakdown",
+ "MetricName": "ict_noslot_ic_miss_cpi"
+ },
+ {
"MetricExpr": "(PM_NTC_ISSUE_HELD_DARQ_FULL + PM_NTC_ISSUE_HELD_ARB + PM_NTC_ISSUE_HELD_OTHER)/PM_RUN_INST_CMPL",
"MetricGroup": "cpi_breakdown",
"MetricName": "issue_hold_cpi"
@@ -313,7 +391,7 @@
"MetricName": "nested_tend_stall_cpi"
},
{
- "BriefDescription": "Number of cycles the ICT has no itags assigned to this thread",
+ "BriefDescription": "Number of cycles the Instruction Completion Table has no itags assigned to this thread",
"MetricExpr": "PM_ICT_NOSLOT_CYC/PM_RUN_INST_CMPL",
"MetricGroup": "cpi_breakdown",
"MetricName": "nothing_dispatched_cpi"
@@ -362,7 +440,7 @@
},
{
"BriefDescription": "Completion stall for other reasons",
- "MetricExpr": "PM_CMPLU_STALL - PM_CMPLU_STALL_NTC_DISP_FIN - PM_CMPLU_STALL_NTC_FLUSH - PM_CMPLU_STALL_LSU - PM_CMPLU_STALL_EXEC_UNIT - PM_CMPLU_STALL_BRU)/PM_RUN_INST_CMPL",
+ "MetricExpr": "(PM_CMPLU_STALL - PM_CMPLU_STALL_NTC_DISP_FIN - PM_CMPLU_STALL_NTC_FLUSH - PM_CMPLU_STALL_LSU - PM_CMPLU_STALL_EXEC_UNIT - PM_CMPLU_STALL_BRU)/PM_RUN_INST_CMPL",
"MetricGroup": "cpi_breakdown",
"MetricName": "other_stall_cpi"
},
@@ -425,7 +503,7 @@
"MetricName": "st_fwd_stall_cpi"
},
{
- "BriefDescription": "Nothing completed and ICT not empty",
+ "BriefDescription": "Nothing completed and Instruction Completion Table not empty",
"MetricExpr": "PM_CMPLU_STALL/PM_RUN_INST_CMPL",
"MetricGroup": "cpi_breakdown",
"MetricName": "stall_cpi"
@@ -1820,71 +1898,6 @@
"MetricName": "fxu_all_idle"
},
{
- "BriefDescription": "Ict empty for this thread due to branch mispred",
- "MetricExpr": "PM_ICT_NOSLOT_BR_MPRED/PM_RUN_INST_CMPL",
- "MetricName": "ict_noslot_br_mpred_cpi"
- },
- {
- "BriefDescription": "Ict empty for this thread due to Icache Miss and branch mispred",
- "MetricExpr": "PM_ICT_NOSLOT_BR_MPRED_ICMISS/PM_RUN_INST_CMPL",
- "MetricName": "ict_noslot_br_mpred_icmiss_cpi"
- },
- {
- "BriefDescription": "ICT other stalls",
- "MetricExpr": "(PM_ICT_NOSLOT_CYC - PM_ICT_NOSLOT_IC_MISS - PM_ICT_NOSLOT_BR_MPRED_ICMISS - PM_ICT_NOSLOT_BR_MPRED - PM_ICT_NOSLOT_DISP_HELD)/PM_RUN_INST_CMPL",
- "MetricName": "ict_noslot_cyc_other_cpi"
- },
- {
- "BriefDescription": "Cycles in which the NTC instruciton is held at dispatch for any reason",
- "MetricExpr": "PM_ICT_NOSLOT_DISP_HELD/PM_RUN_INST_CMPL",
- "MetricName": "ict_noslot_disp_held_cpi"
- },
- {
- "BriefDescription": "Ict empty for this thread due to dispatch holds because the History Buffer was full. Could be GPR/VSR/VMR/FPR/CR/XVF",
- "MetricExpr": "PM_ICT_NOSLOT_DISP_HELD_HB_FULL/PM_RUN_INST_CMPL",
- "MetricName": "ict_noslot_disp_held_hb_full_cpi"
- },
- {
- "BriefDescription": "Ict empty for this thread due to dispatch hold on this thread due to Issue q full, BRQ full, XVCF Full, Count cache, Link, Tar full",
- "MetricExpr": "PM_ICT_NOSLOT_DISP_HELD_ISSQ/PM_RUN_INST_CMPL",
- "MetricName": "ict_noslot_disp_held_issq_cpi"
- },
- {
- "BriefDescription": "ICT_NOSLOT_DISP_HELD_OTHER_CPI",
- "MetricExpr": "(PM_ICT_NOSLOT_DISP_HELD - PM_ICT_NOSLOT_DISP_HELD_HB_FULL - PM_ICT_NOSLOT_DISP_HELD_SYNC - PM_ICT_NOSLOT_DISP_HELD_TBEGIN - PM_ICT_NOSLOT_DISP_HELD_ISSQ)/PM_RUN_INST_CMPL",
- "MetricName": "ict_noslot_disp_held_other_cpi"
- },
- {
- "BriefDescription": "Dispatch held due to a synchronizing instruction at dispatch",
- "MetricExpr": "PM_ICT_NOSLOT_DISP_HELD_SYNC/PM_RUN_INST_CMPL",
- "MetricName": "ict_noslot_disp_held_sync_cpi"
- },
- {
- "BriefDescription": "the NTC instruction is being held at dispatch because it is a tbegin instruction and there is an older tbegin in the pipeline that must complete before the younger tbegin can dispatch",
- "MetricExpr": "PM_ICT_NOSLOT_DISP_HELD_TBEGIN/PM_RUN_INST_CMPL",
- "MetricName": "ict_noslot_disp_held_tbegin_cpi"
- },
- {
- "BriefDescription": "ICT_NOSLOT_IC_L2_CPI",
- "MetricExpr": "(PM_ICT_NOSLOT_IC_MISS - PM_ICT_NOSLOT_IC_L3 - PM_ICT_NOSLOT_IC_L3MISS)/PM_RUN_INST_CMPL",
- "MetricName": "ict_noslot_ic_l2_cpi"
- },
- {
- "BriefDescription": "Ict empty for this thread due to icache misses that were sourced from the local L3",
- "MetricExpr": "PM_ICT_NOSLOT_IC_L3/PM_RUN_INST_CMPL",
- "MetricName": "ict_noslot_ic_l3_cpi"
- },
- {
- "BriefDescription": "Ict empty for this thread due to icache misses that were sourced from beyond the local L3. The source could be local/remote/distant memory or another core's cache",
- "MetricExpr": "PM_ICT_NOSLOT_IC_L3MISS/PM_RUN_INST_CMPL",
- "MetricName": "ict_noslot_ic_l3miss_cpi"
- },
- {
- "BriefDescription": "Ict empty for this thread due to Icache Miss",
- "MetricExpr": "PM_ICT_NOSLOT_IC_MISS/PM_RUN_INST_CMPL",
- "MetricName": "ict_noslot_ic_miss_cpi"
- },
- {
"BriefDescription": "Rate of IERAT reloads from L2",
"MetricExpr": "PM_IPTEG_FROM_L2 * 100 / PM_RUN_INST_CMPL",
"MetricName": "ipteg_from_l2_rate_percent"
diff --git a/tools/perf/pmu-events/arch/powerpc/power9/nest_metrics.json b/tools/perf/pmu-events/arch/powerpc/power9/nest_metrics.json
new file mode 100644
index 000000000000..c121e526442a
--- /dev/null
+++ b/tools/perf/pmu-events/arch/powerpc/power9/nest_metrics.json
@@ -0,0 +1,19 @@
+[
+ {
+ "MetricExpr": "(hv_24x7@PM_MCS01_128B_RD_DISP_PORT01\\,chip\\=?@ + hv_24x7@PM_MCS01_128B_RD_DISP_PORT23\\,chip\\=?@ + hv_24x7@PM_MCS23_128B_RD_DISP_PORT01\\,chip\\=?@ + hv_24x7@PM_MCS23_128B_RD_DISP_PORT23\\,chip\\=?@)",
+ "MetricName": "Memory_RD_BW_Chip",
+ "MetricGroup": "Memory_BW",
+ "ScaleUnit": "1.6e-2MB"
+ },
+ {
+ "MetricExpr": "(hv_24x7@PM_MCS01_128B_WR_DISP_PORT01\\,chip\\=?@ + hv_24x7@PM_MCS01_128B_WR_DISP_PORT23\\,chip\\=?@ + hv_24x7@PM_MCS23_128B_WR_DISP_PORT01\\,chip\\=?@ + hv_24x7@PM_MCS23_128B_WR_DISP_PORT23\\,chip\\=?@ )",
+ "MetricName": "Memory_WR_BW_Chip",
+ "MetricGroup": "Memory_BW",
+ "ScaleUnit": "1.6e-2MB"
+ },
+ {
+ "MetricExpr": "(hv_24x7@PM_PB_CYC\\,chip\\=?@ )",
+ "MetricName": "PowerBUS_Frequency",
+ "ScaleUnit": "2.5e-7GHz"
+ }
+]
diff --git a/tools/perf/pmu-events/arch/s390/cf_z15/crypto6.json b/tools/perf/pmu-events/arch/s390/cf_z15/crypto6.json
index 5e36bc2468d0..c998e4f1d1d2 100644
--- a/tools/perf/pmu-events/arch/s390/cf_z15/crypto6.json
+++ b/tools/perf/pmu-events/arch/s390/cf_z15/crypto6.json
@@ -4,27 +4,27 @@
"EventCode": "80",
"EventName": "ECC_FUNCTION_COUNT",
"BriefDescription": "ECC Function Count",
- "PublicDescription": "Long ECC function Count"
+ "PublicDescription": "This counter counts the total number of the elliptic-curve cryptography (ECC) functions issued by the CPU."
},
{
"Unit": "CPU-M-CF",
"EventCode": "81",
"EventName": "ECC_CYCLES_COUNT",
"BriefDescription": "ECC Cycles Count",
- "PublicDescription": "Long ECC Function cycles count"
+ "PublicDescription": "This counter counts the total number of CPU cycles when the ECC coprocessor is busy performing the elliptic-curve cryptography (ECC) functions issued by the CPU."
},
{
"Unit": "CPU-M-CF",
"EventCode": "82",
"EventName": "ECC_BLOCKED_FUNCTION_COUNT",
"BriefDescription": "Ecc Blocked Function Count",
- "PublicDescription": "Long ECC blocked function count"
+ "PublicDescription": "This counter counts the total number of the elliptic-curve cryptography (ECC) functions that are issued by the CPU and are blocked because the ECC coprocessor is busy performing a function issued by another CPU."
},
{
"Unit": "CPU-M-CF",
"EventCode": "83",
"EventName": "ECC_BLOCKED_CYCLES_COUNT",
"BriefDescription": "ECC Blocked Cycles Count",
- "PublicDescription": "Long ECC blocked cycles count"
+ "PublicDescription": "This counter counts the total number of CPU cycles blocked for the elliptic-curve cryptography (ECC) functions issued by the CPU because the ECC coprocessor is busy performing a function issued by another CPU."
},
]
diff --git a/tools/perf/pmu-events/arch/s390/cf_z15/extended.json b/tools/perf/pmu-events/arch/s390/cf_z15/extended.json
index 89e070727e1b..2df2e231e9ee 100644
--- a/tools/perf/pmu-events/arch/s390/cf_z15/extended.json
+++ b/tools/perf/pmu-events/arch/s390/cf_z15/extended.json
@@ -25,7 +25,7 @@
"EventCode": "131",
"EventName": "DTLB2_HPAGE_WRITES",
"BriefDescription": "DTLB2 One-Megabyte Page Writes",
- "PublicDescription": "A translation entry was written into the Combined Region and Segment Table Entry array in the Level-2 TLB for a one-megabyte page or a Last Host Translation was done"
+ "PublicDescription": "A translation entry was written into the Combined Region and Segment Table Entry array in the Level-2 TLB for a one-megabyte page"
},
{
"Unit": "CPU-M-CF",
@@ -358,6 +358,34 @@
},
{
"Unit": "CPU-M-CF",
+ "EventCode": "247",
+ "EventName": "DFLT_ACCESS",
+ "BriefDescription": "Cycles CPU spent obtaining access to Deflate unit",
+ "PublicDescription": "Cycles CPU spent obtaining access to Deflate unit"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "252",
+ "EventName": "DFLT_CYCLES",
+ "BriefDescription": "Cycles CPU is using Deflate unit",
+ "PublicDescription": "Cycles CPU is using Deflate unit"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "264",
+ "EventName": "DFLT_CC",
+ "BriefDescription": "Increments by one for every DEFLATE CONVERSION CALL instruction executed",
+ "PublicDescription": "Increments by one for every DEFLATE CONVERSION CALL instruction executed"
+ },
+ {
+ "Unit": "CPU-M-CF",
+ "EventCode": "265",
+ "EventName": "DFLT_CCERROR",
+ "BriefDescription": "Increments by one for every DEFLATE CONVERSION CALL instruction executed that ended in Condition Codes 0, 1 or 2",
+ "PublicDescription": "Increments by one for every DEFLATE CONVERSION CALL instruction executed that ended in Condition Codes 0, 1 or 2"
+ },
+ {
+ "Unit": "CPU-M-CF",
"EventCode": "448",
"EventName": "MT_DIAG_CYCLES_ONE_THR_ACTIVE",
"BriefDescription": "Cycle count with one thread active",
diff --git a/tools/perf/pmu-events/arch/x86/amdfam17h/branch.json b/tools/perf/pmu-events/arch/test/test_cpu/branch.json
index 93ddfd8053ca..93ddfd8053ca 100644
--- a/tools/perf/pmu-events/arch/x86/amdfam17h/branch.json
+++ b/tools/perf/pmu-events/arch/test/test_cpu/branch.json
diff --git a/tools/perf/pmu-events/arch/test/test_cpu/other.json b/tools/perf/pmu-events/arch/test/test_cpu/other.json
new file mode 100644
index 000000000000..7d53d7ecd723
--- /dev/null
+++ b/tools/perf/pmu-events/arch/test/test_cpu/other.json
@@ -0,0 +1,26 @@
+[
+ {
+ "EventCode": "0x6",
+ "Counter": "0,1",
+ "UMask": "0x80",
+ "EventName": "SEGMENT_REG_LOADS.ANY",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "Number of segment register loads."
+ },
+ {
+ "EventCode": "0x9",
+ "Counter": "0,1",
+ "UMask": "0x20",
+ "EventName": "DISPATCH_BLOCKED.ANY",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "Memory cluster signals to block micro-op dispatch for any reason"
+ },
+ {
+ "EventCode": "0x3A",
+ "Counter": "0,1",
+ "UMask": "0x0",
+ "EventName": "EIST_TRANS",
+ "SampleAfterValue": "200000",
+ "BriefDescription": "Number of Enhanced Intel SpeedStep(R) Technology (EIST) transitions"
+ }
+] \ No newline at end of file
diff --git a/tools/perf/pmu-events/arch/test/test_cpu/uncore.json b/tools/perf/pmu-events/arch/test/test_cpu/uncore.json
new file mode 100644
index 000000000000..d0a890cc814d
--- /dev/null
+++ b/tools/perf/pmu-events/arch/test/test_cpu/uncore.json
@@ -0,0 +1,21 @@
+[
+ {
+ "EventCode": "0x02",
+ "EventName": "uncore_hisi_ddrc.flux_wcmd",
+ "BriefDescription": "DDRC write commands",
+ "PublicDescription": "DDRC write commands",
+ "Unit": "hisi_sccl,ddrc"
+ },
+ {
+ "Unit": "CBO",
+ "EventCode": "0x22",
+ "UMask": "0x81",
+ "EventName": "UNC_CBO_XSNP_RESPONSE.MISS_EVICTION",
+ "BriefDescription": "A cross-core snoop resulted from L3 Eviction which misses in some processor core.",
+ "PublicDescription": "A cross-core snoop resulted from L3 Eviction which misses in some processor core.",
+ "Counter": "0,1",
+ "CounterMask": "0",
+ "Invert": "0",
+ "EdgeDetect": "0"
+ }
+]
diff --git a/tools/perf/pmu-events/arch/x86/amdfam17h/cache.json b/tools/perf/pmu-events/arch/x86/amdfam17h/cache.json
deleted file mode 100644
index 6221a840fcea..000000000000
--- a/tools/perf/pmu-events/arch/x86/amdfam17h/cache.json
+++ /dev/null
@@ -1,329 +0,0 @@
-[
- {
- "EventName": "ic_fw32",
- "EventCode": "0x80",
- "BriefDescription": "The number of 32B fetch windows transferred from IC pipe to DE instruction decoder (includes non-cacheable and cacheable fill responses)."
- },
- {
- "EventName": "ic_fw32_miss",
- "EventCode": "0x81",
- "BriefDescription": "The number of 32B fetch windows tried to read the L1 IC and missed in the full tag."
- },
- {
- "EventName": "ic_cache_fill_l2",
- "EventCode": "0x82",
- "BriefDescription": "The number of 64 byte instruction cache line was fulfilled from the L2 cache."
- },
- {
- "EventName": "ic_cache_fill_sys",
- "EventCode": "0x83",
- "BriefDescription": "The number of 64 byte instruction cache line fulfilled from system memory or another cache."
- },
- {
- "EventName": "bp_l1_tlb_miss_l2_hit",
- "EventCode": "0x84",
- "BriefDescription": "The number of instruction fetches that miss in the L1 ITLB but hit in the L2 ITLB."
- },
- {
- "EventName": "bp_l1_tlb_miss_l2_miss",
- "EventCode": "0x85",
- "BriefDescription": "The number of instruction fetches that miss in both the L1 and L2 TLBs."
- },
- {
- "EventName": "bp_snp_re_sync",
- "EventCode": "0x86",
- "BriefDescription": "The number of pipeline restarts caused by invalidating probes that hit on the instruction stream currently being executed. This would happen if the active instruction stream was being modified by another processor in an MP system - typically a highly unlikely event."
- },
- {
- "EventName": "ic_fetch_stall.ic_stall_any",
- "EventCode": "0x87",
- "BriefDescription": "IC pipe was stalled during this clock cycle for any reason (nothing valid in pipe ICM1).",
- "PublicDescription": "Instruction Pipe Stall. IC pipe was stalled during this clock cycle for any reason (nothing valid in pipe ICM1).",
- "UMask": "0x4"
- },
- {
- "EventName": "ic_fetch_stall.ic_stall_dq_empty",
- "EventCode": "0x87",
- "BriefDescription": "IC pipe was stalled during this clock cycle (including IC to OC fetches) due to DQ empty.",
- "PublicDescription": "Instruction Pipe Stall. IC pipe was stalled during this clock cycle (including IC to OC fetches) due to DQ empty.",
- "UMask": "0x2"
- },
- {
- "EventName": "ic_fetch_stall.ic_stall_back_pressure",
- "EventCode": "0x87",
- "BriefDescription": "IC pipe was stalled during this clock cycle (including IC to OC fetches) due to back-pressure.",
- "PublicDescription": "Instruction Pipe Stall. IC pipe was stalled during this clock cycle (including IC to OC fetches) due to back-pressure.",
- "UMask": "0x1"
- },
- {
- "EventName": "ic_cache_inval.l2_invalidating_probe",
- "EventCode": "0x8c",
- "BriefDescription": "IC line invalidated due to L2 invalidating probe (external or LS).",
- "PublicDescription": "The number of instruction cache lines invalidated. A non-SMC event is CMC (cross modifying code), either from the other thread of the core or another core. IC line invalidated due to L2 invalidating probe (external or LS).",
- "UMask": "0x2"
- },
- {
- "EventName": "ic_cache_inval.fill_invalidated",
- "EventCode": "0x8c",
- "BriefDescription": "IC line invalidated due to overwriting fill response.",
- "PublicDescription": "The number of instruction cache lines invalidated. A non-SMC event is CMC (cross modifying code), either from the other thread of the core or another core. IC line invalidated due to overwriting fill response.",
- "UMask": "0x1"
- },
- {
- "EventName": "bp_tlb_rel",
- "EventCode": "0x99",
- "BriefDescription": "The number of ITLB reload requests."
- },
- {
- "EventName": "l2_request_g1.rd_blk_l",
- "EventCode": "0x60",
- "BriefDescription": "Requests to L2 Group1.",
- "PublicDescription": "Requests to L2 Group1.",
- "UMask": "0x80"
- },
- {
- "EventName": "l2_request_g1.rd_blk_x",
- "EventCode": "0x60",
- "BriefDescription": "Requests to L2 Group1.",
- "PublicDescription": "Requests to L2 Group1.",
- "UMask": "0x40"
- },
- {
- "EventName": "l2_request_g1.ls_rd_blk_c_s",
- "EventCode": "0x60",
- "BriefDescription": "Requests to L2 Group1.",
- "PublicDescription": "Requests to L2 Group1.",
- "UMask": "0x20"
- },
- {
- "EventName": "l2_request_g1.cacheable_ic_read",
- "EventCode": "0x60",
- "BriefDescription": "Requests to L2 Group1.",
- "PublicDescription": "Requests to L2 Group1.",
- "UMask": "0x10"
- },
- {
- "EventName": "l2_request_g1.change_to_x",
- "EventCode": "0x60",
- "BriefDescription": "Requests to L2 Group1.",
- "PublicDescription": "Requests to L2 Group1.",
- "UMask": "0x8"
- },
- {
- "EventName": "l2_request_g1.prefetch_l2",
- "EventCode": "0x60",
- "BriefDescription": "Requests to L2 Group1.",
- "PublicDescription": "Requests to L2 Group1.",
- "UMask": "0x4"
- },
- {
- "EventName": "l2_request_g1.l2_hw_pf",
- "EventCode": "0x60",
- "BriefDescription": "Requests to L2 Group1.",
- "PublicDescription": "Requests to L2 Group1.",
- "UMask": "0x2"
- },
- {
- "EventName": "l2_request_g1.other_requests",
- "EventCode": "0x60",
- "BriefDescription": "Events covered by l2_request_g2.",
- "PublicDescription": "Requests to L2 Group1. Events covered by l2_request_g2.",
- "UMask": "0x1"
- },
- {
- "EventName": "l2_request_g2.group1",
- "EventCode": "0x61",
- "BriefDescription": "All Group 1 commands not in unit0.",
- "PublicDescription": "Multi-events in that LS and IF requests can be received simultaneous. All Group 1 commands not in unit0.",
- "UMask": "0x80"
- },
- {
- "EventName": "l2_request_g2.ls_rd_sized",
- "EventCode": "0x61",
- "BriefDescription": "RdSized, RdSized32, RdSized64.",
- "PublicDescription": "Multi-events in that LS and IF requests can be received simultaneous. RdSized, RdSized32, RdSized64.",
- "UMask": "0x40"
- },
- {
- "EventName": "l2_request_g2.ls_rd_sized_nc",
- "EventCode": "0x61",
- "BriefDescription": "RdSizedNC, RdSized32NC, RdSized64NC.",
- "PublicDescription": "Multi-events in that LS and IF requests can be received simultaneous. RdSizedNC, RdSized32NC, RdSized64NC.",
- "UMask": "0x20"
- },
- {
- "EventName": "l2_request_g2.ic_rd_sized",
- "EventCode": "0x61",
- "BriefDescription": "Multi-events in that LS and IF requests can be received simultaneous.",
- "PublicDescription": "Multi-events in that LS and IF requests can be received simultaneous.",
- "UMask": "0x10"
- },
- {
- "EventName": "l2_request_g2.ic_rd_sized_nc",
- "EventCode": "0x61",
- "BriefDescription": "Multi-events in that LS and IF requests can be received simultaneous.",
- "PublicDescription": "Multi-events in that LS and IF requests can be received simultaneous.",
- "UMask": "0x8"
- },
- {
- "EventName": "l2_request_g2.smc_inval",
- "EventCode": "0x61",
- "BriefDescription": "Multi-events in that LS and IF requests can be received simultaneous.",
- "PublicDescription": "Multi-events in that LS and IF requests can be received simultaneous.",
- "UMask": "0x4"
- },
- {
- "EventName": "l2_request_g2.bus_locks_originator",
- "EventCode": "0x61",
- "BriefDescription": "Multi-events in that LS and IF requests can be received simultaneous.",
- "PublicDescription": "Multi-events in that LS and IF requests can be received simultaneous.",
- "UMask": "0x2"
- },
- {
- "EventName": "l2_request_g2.bus_locks_responses",
- "EventCode": "0x61",
- "BriefDescription": "Multi-events in that LS and IF requests can be received simultaneous.",
- "PublicDescription": "Multi-events in that LS and IF requests can be received simultaneous.",
- "UMask": "0x1"
- },
- {
- "EventName": "l2_latency.l2_cycles_waiting_on_fills",
- "EventCode": "0x62",
- "BriefDescription": "Total cycles spent waiting for L2 fills to complete from L3 or memory, divided by four. Event counts are for both threads. To calculate average latency, the number of fills from both threads must be used.",
- "PublicDescription": "Total cycles spent waiting for L2 fills to complete from L3 or memory, divided by four. Event counts are for both threads. To calculate average latency, the number of fills from both threads must be used.",
- "UMask": "0x1"
- },
- {
- "EventName": "l2_wcb_req.wcb_write",
- "EventCode": "0x63",
- "PublicDescription": "LS (Load/Store unit) to L2 WCB (Write Combining Buffer) write requests.",
- "BriefDescription": "LS to L2 WCB write requests.",
- "UMask": "0x40"
- },
- {
- "EventName": "l2_wcb_req.wcb_close",
- "EventCode": "0x63",
- "BriefDescription": "LS to L2 WCB close requests.",
- "PublicDescription": "LS (Load/Store unit) to L2 WCB (Write Combining Buffer) close requests.",
- "UMask": "0x20"
- },
- {
- "EventName": "l2_wcb_req.zero_byte_store",
- "EventCode": "0x63",
- "BriefDescription": "LS to L2 WCB zero byte store requests.",
- "PublicDescription": "LS (Load/Store unit) to L2 WCB (Write Combining Buffer) zero byte store requests.",
- "UMask": "0x4"
- },
- {
- "EventName": "l2_wcb_req.cl_zero",
- "EventCode": "0x63",
- "PublicDescription": "LS to L2 WCB cache line zeroing requests.",
- "BriefDescription": "LS (Load/Store unit) to L2 WCB (Write Combining Buffer) cache line zeroing requests.",
- "UMask": "0x1"
- },
- {
- "EventName": "l2_cache_req_stat.ls_rd_blk_cs",
- "EventCode": "0x64",
- "BriefDescription": "LS ReadBlock C/S Hit.",
- "PublicDescription": "This event does not count accesses to the L2 cache by the L2 prefetcher, but it does count accesses by the L1 prefetcher. LS ReadBlock C/S Hit.",
- "UMask": "0x80"
- },
- {
- "EventName": "l2_cache_req_stat.ls_rd_blk_l_hit_x",
- "EventCode": "0x64",
- "BriefDescription": "LS Read Block L Hit X.",
- "PublicDescription": "This event does not count accesses to the L2 cache by the L2 prefetcher, but it does count accesses by the L1 prefetcher. LS Read Block L Hit X.",
- "UMask": "0x40"
- },
- {
- "EventName": "l2_cache_req_stat.ls_rd_blk_l_hit_s",
- "EventCode": "0x64",
- "BriefDescription": "LsRdBlkL Hit Shared.",
- "PublicDescription": "This event does not count accesses to the L2 cache by the L2 prefetcher, but it does count accesses by the L1 prefetcher. LsRdBlkL Hit Shared.",
- "UMask": "0x20"
- },
- {
- "EventName": "l2_cache_req_stat.ls_rd_blk_x",
- "EventCode": "0x64",
- "BriefDescription": "LsRdBlkX/ChgToX Hit X. Count RdBlkX finding Shared as a Miss.",
- "PublicDescription": "This event does not count accesses to the L2 cache by the L2 prefetcher, but it does count accesses by the L1 prefetcher. LsRdBlkX/ChgToX Hit X. Count RdBlkX finding Shared as a Miss.",
- "UMask": "0x10"
- },
- {
- "EventName": "l2_cache_req_stat.ls_rd_blk_c",
- "EventCode": "0x64",
- "BriefDescription": "LS Read Block C S L X Change to X Miss.",
- "PublicDescription": "This event does not count accesses to the L2 cache by the L2 prefetcher, but it does count accesses by the L1 prefetcher. LS Read Block C S L X Change to X Miss.",
- "UMask": "0x8"
- },
- {
- "EventName": "l2_cache_req_stat.ic_fill_hit_x",
- "EventCode": "0x64",
- "BriefDescription": "IC Fill Hit Exclusive Stale.",
- "PublicDescription": "This event does not count accesses to the L2 cache by the L2 prefetcher, but it does count accesses by the L1 prefetcher. IC Fill Hit Exclusive Stale.",
- "UMask": "0x4"
- },
- {
- "EventName": "l2_cache_req_stat.ic_fill_hit_s",
- "EventCode": "0x64",
- "BriefDescription": "IC Fill Hit Shared.",
- "PublicDescription": "This event does not count accesses to the L2 cache by the L2 prefetcher, but it does count accesses by the L1 prefetcher. IC Fill Hit Shared.",
- "UMask": "0x2"
- },
- {
- "EventName": "l2_cache_req_stat.ic_fill_miss",
- "EventCode": "0x64",
- "BriefDescription": "IC Fill Miss.",
- "PublicDescription": "This event does not count accesses to the L2 cache by the L2 prefetcher, but it does count accesses by the L1 prefetcher. IC Fill Miss.",
- "UMask": "0x1"
- },
- {
- "EventName": "l2_fill_pending.l2_fill_busy",
- "EventCode": "0x6d",
- "BriefDescription": "Total cycles spent with one or more fill requests in flight from L2.",
- "PublicDescription": "Total cycles spent with one or more fill requests in flight from L2.",
- "UMask": "0x1"
- },
- {
- "EventName": "l3_request_g1.caching_l3_cache_accesses",
- "EventCode": "0x01",
- "BriefDescription": "Caching: L3 cache accesses",
- "UMask": "0x80",
- "Unit": "L3PMC"
- },
- {
- "EventName": "l3_lookup_state.all_l3_req_typs",
- "EventCode": "0x04",
- "BriefDescription": "All L3 Request Types",
- "UMask": "0xff",
- "Unit": "L3PMC"
- },
- {
- "EventName": "l3_comb_clstr_state.other_l3_miss_typs",
- "EventCode": "0x06",
- "BriefDescription": "Other L3 Miss Request Types",
- "UMask": "0xfe",
- "Unit": "L3PMC"
- },
- {
- "EventName": "l3_comb_clstr_state.request_miss",
- "EventCode": "0x06",
- "BriefDescription": "L3 cache misses",
- "UMask": "0x01",
- "Unit": "L3PMC"
- },
- {
- "EventName": "xi_sys_fill_latency",
- "EventCode": "0x90",
- "BriefDescription": "L3 Cache Miss Latency. Total cycles for all transactions divided by 16. Ignores SliceMask and ThreadMask.",
- "UMask": "0x00",
- "Unit": "L3PMC"
- },
- {
- "EventName": "xi_ccx_sdp_req1.all_l3_miss_req_typs",
- "EventCode": "0x9a",
- "BriefDescription": "All L3 Miss Request Types. Ignores SliceMask and ThreadMask.",
- "UMask": "0x3f",
- "Unit": "L3PMC"
- }
-]
diff --git a/tools/perf/pmu-events/arch/x86/amdfam17h/other.json b/tools/perf/pmu-events/arch/x86/amdfam17h/other.json
deleted file mode 100644
index b26a00d05a2e..000000000000
--- a/tools/perf/pmu-events/arch/x86/amdfam17h/other.json
+++ /dev/null
@@ -1,65 +0,0 @@
-[
- {
- "EventName": "ic_oc_mode_switch.oc_ic_mode_switch",
- "EventCode": "0x28a",
- "BriefDescription": "OC to IC mode switch.",
- "PublicDescription": "OC Mode Switch. OC to IC mode switch.",
- "UMask": "0x2"
- },
- {
- "EventName": "ic_oc_mode_switch.ic_oc_mode_switch",
- "EventCode": "0x28a",
- "BriefDescription": "IC to OC mode switch.",
- "PublicDescription": "OC Mode Switch. IC to OC mode switch.",
- "UMask": "0x1"
- },
- {
- "EventName": "de_dis_dispatch_token_stalls0.retire_token_stall",
- "EventCode": "0xaf",
- "BriefDescription": "RETIRE Tokens unavailable.",
- "PublicDescription": "Cycles where a dispatch group is valid but does not get dispatched due to a token stall. RETIRE Tokens unavailable.",
- "UMask": "0x40"
- },
- {
- "EventName": "de_dis_dispatch_token_stalls0.agsq_token_stall",
- "EventCode": "0xaf",
- "BriefDescription": "AGSQ Tokens unavailable.",
- "PublicDescription": "Cycles where a dispatch group is valid but does not get dispatched due to a token stall. AGSQ Tokens unavailable.",
- "UMask": "0x20"
- },
- {
- "EventName": "de_dis_dispatch_token_stalls0.alu_token_stall",
- "EventCode": "0xaf",
- "BriefDescription": "ALU tokens total unavailable.",
- "PublicDescription": "Cycles where a dispatch group is valid but does not get dispatched due to a token stall. ALU tokens total unavailable.",
- "UMask": "0x10"
- },
- {
- "EventName": "de_dis_dispatch_token_stalls0.alsq3_0_token_stall",
- "EventCode": "0xaf",
- "BriefDescription": "Cycles where a dispatch group is valid but does not get dispatched due to a token stall.",
- "PublicDescription": "Cycles where a dispatch group is valid but does not get dispatched due to a token stall.",
- "UMask": "0x8"
- },
- {
- "EventName": "de_dis_dispatch_token_stalls0.alsq3_token_stall",
- "EventCode": "0xaf",
- "BriefDescription": "ALSQ 3 Tokens unavailable.",
- "PublicDescription": "Cycles where a dispatch group is valid but does not get dispatched due to a token stall. ALSQ 3 Tokens unavailable.",
- "UMask": "0x4"
- },
- {
- "EventName": "de_dis_dispatch_token_stalls0.alsq2_token_stall",
- "EventCode": "0xaf",
- "BriefDescription": "ALSQ 2 Tokens unavailable.",
- "PublicDescription": "Cycles where a dispatch group is valid but does not get dispatched due to a token stall. ALSQ 2 Tokens unavailable.",
- "UMask": "0x2"
- },
- {
- "EventName": "de_dis_dispatch_token_stalls0.alsq1_token_stall",
- "EventCode": "0xaf",
- "BriefDescription": "ALSQ 1 Tokens unavailable.",
- "PublicDescription": "Cycles where a dispatch group is valid but does not get dispatched due to a token stall. ALSQ 1 Tokens unavailable.",
- "UMask": "0x1"
- }
-]
diff --git a/tools/perf/pmu-events/arch/x86/amdzen1/branch.json b/tools/perf/pmu-events/arch/x86/amdzen1/branch.json
new file mode 100644
index 000000000000..a9943eeb8d6b
--- /dev/null
+++ b/tools/perf/pmu-events/arch/x86/amdzen1/branch.json
@@ -0,0 +1,23 @@
+[
+ {
+ "EventName": "bp_l1_btb_correct",
+ "EventCode": "0x8a",
+ "BriefDescription": "L1 BTB Correction."
+ },
+ {
+ "EventName": "bp_l2_btb_correct",
+ "EventCode": "0x8b",
+ "BriefDescription": "L2 BTB Correction."
+ },
+ {
+ "EventName": "bp_dyn_ind_pred",
+ "EventCode": "0x8e",
+ "BriefDescription": "Dynamic Indirect Predictions.",
+ "PublicDescription": "Indirect Branch Prediction for potential multi-target branch (speculative)."
+ },
+ {
+ "EventName": "bp_de_redirect",
+ "EventCode": "0x91",
+ "BriefDescription": "Decoder Overrides Existing Branch Prediction (speculative)."
+ }
+]
diff --git a/tools/perf/pmu-events/arch/x86/amdzen1/cache.json b/tools/perf/pmu-events/arch/x86/amdzen1/cache.json
new file mode 100644
index 000000000000..404d4c569c01
--- /dev/null
+++ b/tools/perf/pmu-events/arch/x86/amdzen1/cache.json
@@ -0,0 +1,294 @@
+[
+ {
+ "EventName": "ic_fw32",
+ "EventCode": "0x80",
+ "BriefDescription": "The number of 32B fetch windows transferred from IC pipe to DE instruction decoder (includes non-cacheable and cacheable fill responses)."
+ },
+ {
+ "EventName": "ic_fw32_miss",
+ "EventCode": "0x81",
+ "BriefDescription": "The number of 32B fetch windows tried to read the L1 IC and missed in the full tag."
+ },
+ {
+ "EventName": "ic_cache_fill_l2",
+ "EventCode": "0x82",
+ "BriefDescription": "The number of 64 byte instruction cache line was fulfilled from the L2 cache."
+ },
+ {
+ "EventName": "ic_cache_fill_sys",
+ "EventCode": "0x83",
+ "BriefDescription": "The number of 64 byte instruction cache line fulfilled from system memory or another cache."
+ },
+ {
+ "EventName": "bp_l1_tlb_miss_l2_hit",
+ "EventCode": "0x84",
+ "BriefDescription": "The number of instruction fetches that miss in the L1 ITLB but hit in the L2 ITLB."
+ },
+ {
+ "EventName": "bp_l1_tlb_miss_l2_miss",
+ "EventCode": "0x85",
+ "BriefDescription": "The number of instruction fetches that miss in both the L1 and L2 TLBs."
+ },
+ {
+ "EventName": "bp_snp_re_sync",
+ "EventCode": "0x86",
+ "BriefDescription": "The number of pipeline restarts caused by invalidating probes that hit on the instruction stream currently being executed. This would happen if the active instruction stream was being modified by another processor in an MP system - typically a highly unlikely event."
+ },
+ {
+ "EventName": "ic_fetch_stall.ic_stall_any",
+ "EventCode": "0x87",
+ "BriefDescription": "Instruction Pipe Stall. IC pipe was stalled during this clock cycle for any reason (nothing valid in pipe ICM1).",
+ "UMask": "0x4"
+ },
+ {
+ "EventName": "ic_fetch_stall.ic_stall_dq_empty",
+ "EventCode": "0x87",
+ "BriefDescription": "Instruction Pipe Stall. IC pipe was stalled during this clock cycle (including IC to OC fetches) due to DQ empty.",
+ "UMask": "0x2"
+ },
+ {
+ "EventName": "ic_fetch_stall.ic_stall_back_pressure",
+ "EventCode": "0x87",
+ "BriefDescription": "Instruction Pipe Stall. IC pipe was stalled during this clock cycle (including IC to OC fetches) due to back-pressure.",
+ "UMask": "0x1"
+ },
+ {
+ "EventName": "ic_cache_inval.l2_invalidating_probe",
+ "EventCode": "0x8c",
+ "BriefDescription": "IC line invalidated due to L2 invalidating probe (external or LS). The number of instruction cache lines invalidated. A non-SMC event is CMC (cross modifying code), either from the other thread of the core or another core.",
+ "UMask": "0x2"
+ },
+ {
+ "EventName": "ic_cache_inval.fill_invalidated",
+ "EventCode": "0x8c",
+ "BriefDescription": "IC line invalidated due to overwriting fill response. The number of instruction cache lines invalidated. A non-SMC event is CMC (cross modifying code), either from the other thread of the core or another core.",
+ "UMask": "0x1"
+ },
+ {
+ "EventName": "bp_tlb_rel",
+ "EventCode": "0x99",
+ "BriefDescription": "The number of ITLB reload requests."
+ },
+ {
+ "EventName": "l2_request_g1.rd_blk_l",
+ "EventCode": "0x60",
+ "BriefDescription": "All L2 Cache Requests (Breakdown 1 - Common). Data cache reads (including hardware and software prefetch).",
+ "UMask": "0x80"
+ },
+ {
+ "EventName": "l2_request_g1.rd_blk_x",
+ "EventCode": "0x60",
+ "BriefDescription": "All L2 Cache Requests (Breakdown 1 - Common). Data cache stores.",
+ "UMask": "0x40"
+ },
+ {
+ "EventName": "l2_request_g1.ls_rd_blk_c_s",
+ "EventCode": "0x60",
+ "BriefDescription": "All L2 Cache Requests (Breakdown 1 - Common). Data cache shared reads.",
+ "UMask": "0x20"
+ },
+ {
+ "EventName": "l2_request_g1.cacheable_ic_read",
+ "EventCode": "0x60",
+ "BriefDescription": "All L2 Cache Requests (Breakdown 1 - Common). Instruction cache reads.",
+ "UMask": "0x10"
+ },
+ {
+ "EventName": "l2_request_g1.change_to_x",
+ "EventCode": "0x60",
+ "BriefDescription": "All L2 Cache Requests (Breakdown 1 - Common). Data cache state change requests. Request change to writable, check L2 for current state.",
+ "UMask": "0x8"
+ },
+ {
+ "EventName": "l2_request_g1.prefetch_l2_cmd",
+ "EventCode": "0x60",
+ "BriefDescription": "All L2 Cache Requests (Breakdown 1 - Common). PrefetchL2Cmd.",
+ "UMask": "0x4"
+ },
+ {
+ "EventName": "l2_request_g1.l2_hw_pf",
+ "EventCode": "0x60",
+ "BriefDescription": "All L2 Cache Requests (Breakdown 1 - Common). L2 Prefetcher. All prefetches accepted by L2 pipeline, hit or miss. Types of PF and L2 hit/miss broken out in a separate perfmon event.",
+ "UMask": "0x2"
+ },
+ {
+ "EventName": "l2_request_g1.group2",
+ "EventCode": "0x60",
+ "BriefDescription": "Miscellaneous events covered in more detail by l2_request_g2 (PMCx061).",
+ "UMask": "0x1"
+ },
+ {
+ "EventName": "l2_request_g2.group1",
+ "EventCode": "0x61",
+ "BriefDescription": "Miscellaneous events covered in more detail by l2_request_g1 (PMCx060).",
+ "UMask": "0x80"
+ },
+ {
+ "EventName": "l2_request_g2.ls_rd_sized",
+ "EventCode": "0x61",
+ "BriefDescription": "All L2 Cache Requests (Breakdown 2 - Rare). Data cache read sized.",
+ "UMask": "0x40"
+ },
+ {
+ "EventName": "l2_request_g2.ls_rd_sized_nc",
+ "EventCode": "0x61",
+ "BriefDescription": "All L2 Cache Requests (Breakdown 2 - Rare). Data cache read sized non-cacheable.",
+ "UMask": "0x20"
+ },
+ {
+ "EventName": "l2_request_g2.ic_rd_sized",
+ "EventCode": "0x61",
+ "BriefDescription": "All L2 Cache Requests (Breakdown 2 - Rare). Instruction cache read sized.",
+ "UMask": "0x10"
+ },
+ {
+ "EventName": "l2_request_g2.ic_rd_sized_nc",
+ "EventCode": "0x61",
+ "BriefDescription": "All L2 Cache Requests (Breakdown 2 - Rare). Instruction cache read sized non-cacheable.",
+ "UMask": "0x8"
+ },
+ {
+ "EventName": "l2_request_g2.smc_inval",
+ "EventCode": "0x61",
+ "BriefDescription": "All L2 Cache Requests (Breakdown 2 - Rare). Self-modifying code invalidates.",
+ "UMask": "0x4"
+ },
+ {
+ "EventName": "l2_request_g2.bus_locks_originator",
+ "EventCode": "0x61",
+ "BriefDescription": "All L2 Cache Requests (Breakdown 2 - Rare). Bus locks.",
+ "UMask": "0x2"
+ },
+ {
+ "EventName": "l2_request_g2.bus_locks_responses",
+ "EventCode": "0x61",
+ "BriefDescription": "All L2 Cache Requests (Breakdown 2 - Rare). Bus lock response.",
+ "UMask": "0x1"
+ },
+ {
+ "EventName": "l2_latency.l2_cycles_waiting_on_fills",
+ "EventCode": "0x62",
+ "BriefDescription": "Total cycles spent waiting for L2 fills to complete from L3 or memory, divided by four. Event counts are for both threads. To calculate average latency, the number of fills from both threads must be used.",
+ "UMask": "0x1"
+ },
+ {
+ "EventName": "l2_wcb_req.wcb_write",
+ "EventCode": "0x63",
+ "BriefDescription": "LS to L2 WCB write requests. LS (Load/Store unit) to L2 WCB (Write Combining Buffer) write requests.",
+ "UMask": "0x40"
+ },
+ {
+ "EventName": "l2_wcb_req.wcb_close",
+ "EventCode": "0x63",
+ "BriefDescription": "LS to L2 WCB close requests. LS (Load/Store unit) to L2 WCB (Write Combining Buffer) close requests.",
+ "UMask": "0x20"
+ },
+ {
+ "EventName": "l2_wcb_req.zero_byte_store",
+ "EventCode": "0x63",
+ "BriefDescription": "LS to L2 WCB zero byte store requests. LS (Load/Store unit) to L2 WCB (Write Combining Buffer) zero byte store requests.",
+ "UMask": "0x4"
+ },
+ {
+ "EventName": "l2_wcb_req.cl_zero",
+ "EventCode": "0x63",
+ "BriefDescription": "LS to L2 WCB cache line zeroing requests. LS (Load/Store unit) to L2 WCB (Write Combining Buffer) cache line zeroing requests.",
+ "UMask": "0x1"
+ },
+ {
+ "EventName": "l2_cache_req_stat.ls_rd_blk_cs",
+ "EventCode": "0x64",
+ "BriefDescription": "Core to L2 cacheable request access status (not including L2 Prefetch). Data cache shared read hit in L2",
+ "UMask": "0x80"
+ },
+ {
+ "EventName": "l2_cache_req_stat.ls_rd_blk_l_hit_x",
+ "EventCode": "0x64",
+ "BriefDescription": "Core to L2 cacheable request access status (not including L2 Prefetch). Data cache read hit in L2.",
+ "UMask": "0x40"
+ },
+ {
+ "EventName": "l2_cache_req_stat.ls_rd_blk_l_hit_s",
+ "EventCode": "0x64",
+ "BriefDescription": "Core to L2 cacheable request access status (not including L2 Prefetch). Data cache read hit on shared line in L2.",
+ "UMask": "0x20"
+ },
+ {
+ "EventName": "l2_cache_req_stat.ls_rd_blk_x",
+ "EventCode": "0x64",
+ "BriefDescription": "Core to L2 cacheable request access status (not including L2 Prefetch). Data cache store or state change hit in L2.",
+ "UMask": "0x10"
+ },
+ {
+ "EventName": "l2_cache_req_stat.ls_rd_blk_c",
+ "EventCode": "0x64",
+ "BriefDescription": "Core to L2 cacheable request access status (not including L2 Prefetch). Data cache request miss in L2 (all types).",
+ "UMask": "0x8"
+ },
+ {
+ "EventName": "l2_cache_req_stat.ic_fill_hit_x",
+ "EventCode": "0x64",
+ "BriefDescription": "Core to L2 cacheable request access status (not including L2 Prefetch). Instruction cache hit modifiable line in L2.",
+ "UMask": "0x4"
+ },
+ {
+ "EventName": "l2_cache_req_stat.ic_fill_hit_s",
+ "EventCode": "0x64",
+ "BriefDescription": "Core to L2 cacheable request access status (not including L2 Prefetch). Instruction cache hit clean line in L2.",
+ "UMask": "0x2"
+ },
+ {
+ "EventName": "l2_cache_req_stat.ic_fill_miss",
+ "EventCode": "0x64",
+ "BriefDescription": "Core to L2 cacheable request access status (not including L2 Prefetch). Instruction cache request miss in L2.",
+ "UMask": "0x1"
+ },
+ {
+ "EventName": "l2_fill_pending.l2_fill_busy",
+ "EventCode": "0x6d",
+ "BriefDescription": "Cycles with fill pending from L2. Total cycles spent with one or more fill requests in flight from L2.",
+ "UMask": "0x1"
+ },
+ {
+ "EventName": "l3_request_g1.caching_l3_cache_accesses",
+ "EventCode": "0x01",
+ "BriefDescription": "Caching: L3 cache accesses",
+ "UMask": "0x80",
+ "Unit": "L3PMC"
+ },
+ {
+ "EventName": "l3_lookup_state.all_l3_req_typs",
+ "EventCode": "0x04",
+ "BriefDescription": "All L3 Request Types",
+ "UMask": "0xff",
+ "Unit": "L3PMC"
+ },
+ {
+ "EventName": "l3_comb_clstr_state.other_l3_miss_typs",
+ "EventCode": "0x06",
+ "BriefDescription": "Other L3 Miss Request Types",
+ "UMask": "0xfe",
+ "Unit": "L3PMC"
+ },
+ {
+ "EventName": "l3_comb_clstr_state.request_miss",
+ "EventCode": "0x06",
+ "BriefDescription": "L3 cache misses",
+ "UMask": "0x01",
+ "Unit": "L3PMC"
+ },
+ {
+ "EventName": "xi_sys_fill_latency",
+ "EventCode": "0x90",
+ "BriefDescription": "L3 Cache Miss Latency. Total cycles for all transactions divided by 16. Ignores SliceMask and ThreadMask.",
+ "UMask": "0x00",
+ "Unit": "L3PMC"
+ },
+ {
+ "EventName": "xi_ccx_sdp_req1.all_l3_miss_req_typs",
+ "EventCode": "0x9a",
+ "BriefDescription": "All L3 Miss Request Types. Ignores SliceMask and ThreadMask.",
+ "UMask": "0x3f",
+ "Unit": "L3PMC"
+ }
+]
diff --git a/tools/perf/pmu-events/arch/x86/amdfam17h/core.json b/tools/perf/pmu-events/arch/x86/amdzen1/core.json
index 1079544eeed5..7e1aa8273935 100644
--- a/tools/perf/pmu-events/arch/x86/amdfam17h/core.json
+++ b/tools/perf/pmu-events/arch/x86/amdzen1/core.json
@@ -62,7 +62,6 @@
"EventName": "ex_ret_brn_ind_misp",
"EventCode": "0xca",
"BriefDescription": "Retired Indirect Branch Instructions Mispredicted.",
- "PublicDescription": "Retired Indirect Branch Instructions Mispredicted."
},
{
"EventName": "ex_ret_mmx_fp_instr.sse_instr",
@@ -91,11 +90,6 @@
"BriefDescription": "Retired Conditional Branch Instructions."
},
{
- "EventName": "ex_ret_cond_misp",
- "EventCode": "0xd2",
- "BriefDescription": "Retired Conditional Branch Instructions Mispredicted."
- },
- {
"EventName": "ex_div_busy",
"EventCode": "0xd3",
"BriefDescription": "Div Cycles Busy count."
@@ -108,22 +102,19 @@
{
"EventName": "ex_tagged_ibs_ops.ibs_count_rollover",
"EventCode": "0x1cf",
- "BriefDescription": "Number of times an op could not be tagged by IBS because of a previous tagged op that has not retired.",
- "PublicDescription": "Tagged IBS Ops. Number of times an op could not be tagged by IBS because of a previous tagged op that has not retired.",
+ "BriefDescription": "Tagged IBS Ops. Number of times an op could not be tagged by IBS because of a previous tagged op that has not retired.",
"UMask": "0x4"
},
{
"EventName": "ex_tagged_ibs_ops.ibs_tagged_ops_ret",
"EventCode": "0x1cf",
- "BriefDescription": "Number of Ops tagged by IBS that retired.",
- "PublicDescription": "Tagged IBS Ops. Number of Ops tagged by IBS that retired.",
+ "BriefDescription": "Tagged IBS Ops. Number of Ops tagged by IBS that retired.",
"UMask": "0x2"
},
{
"EventName": "ex_tagged_ibs_ops.ibs_tagged_ops",
"EventCode": "0x1cf",
- "BriefDescription": "Number of Ops tagged by IBS.",
- "PublicDescription": "Tagged IBS Ops. Number of Ops tagged by IBS.",
+ "BriefDescription": "Tagged IBS Ops. Number of Ops tagged by IBS.",
"UMask": "0x1"
},
{
diff --git a/tools/perf/pmu-events/arch/x86/amdfam17h/floating-point.json b/tools/perf/pmu-events/arch/x86/amdzen1/floating-point.json
index ea4711983d1d..a35542bd3b36 100644
--- a/tools/perf/pmu-events/arch/x86/amdfam17h/floating-point.json
+++ b/tools/perf/pmu-events/arch/x86/amdzen1/floating-point.json
@@ -2,18 +2,74 @@
{
"EventName": "fpu_pipe_assignment.dual",
"EventCode": "0x00",
- "BriefDescription": "Total number multi-pipe uOps.",
- "PublicDescription": "The number of operations (uOps) and dual-pipe uOps dispatched to each of the 4 FPU execution pipelines. This event reflects how busy the FPU pipelines are and may be used for workload characterization. This includes all operations performed by x87, MMX, and SSE instructions, including moves. Each increment represents a one- cycle dispatch event. This event is a speculative event. Since this event includes non-numeric operations it is not suitable for measuring MFLOPS. Total number multi-pipe uOps assigned to Pipe 3.",
+ "BriefDescription": "Total number multi-pipe uOps assigned to all pipes.",
+ "PublicDescription": "The number of operations (uOps) and dual-pipe uOps dispatched to each of the 4 FPU execution pipelines. This event reflects how busy the FPU pipelines are and may be used for workload characterization. This includes all operations performed by x87, MMX, and SSE instructions, including moves. Each increment represents a one- cycle dispatch event. This event is a speculative event. Since this event includes non-numeric operations it is not suitable for measuring MFLOPS. Total number multi-pipe uOps assigned to all pipes.",
"UMask": "0xf0"
},
{
+ "EventName": "fpu_pipe_assignment.dual3",
+ "EventCode": "0x00",
+ "BriefDescription": "Total number multi-pipe uOps assigned to pipe 3.",
+ "PublicDescription": "The number of operations (uOps) and dual-pipe uOps dispatched to each of the 4 FPU execution pipelines. This event reflects how busy the FPU pipelines are and may be used for workload characterization. This includes all operations performed by x87, MMX, and SSE instructions, including moves. Each increment represents a one- cycle dispatch event. This event is a speculative event. Since this event includes non-numeric operations it is not suitable for measuring MFLOPS. Total number multi-pipe uOps assigned to pipe 3.",
+ "UMask": "0x80"
+ },
+ {
+ "EventName": "fpu_pipe_assignment.dual2",
+ "EventCode": "0x00",
+ "BriefDescription": "Total number multi-pipe uOps assigned to pipe 2.",
+ "PublicDescription": "The number of operations (uOps) and dual-pipe uOps dispatched to each of the 4 FPU execution pipelines. This event reflects how busy the FPU pipelines are and may be used for workload characterization. This includes all operations performed by x87, MMX, and SSE instructions, including moves. Each increment represents a one- cycle dispatch event. This event is a speculative event. Since this event includes non-numeric operations it is not suitable for measuring MFLOPS. Total number multi-pipe uOps assigned to pipe 2.",
+ "UMask": "0x40"
+ },
+ {
+ "EventName": "fpu_pipe_assignment.dual1",
+ "EventCode": "0x00",
+ "BriefDescription": "Total number multi-pipe uOps assigned to pipe 1.",
+ "PublicDescription": "The number of operations (uOps) and dual-pipe uOps dispatched to each of the 4 FPU execution pipelines. This event reflects how busy the FPU pipelines are and may be used for workload characterization. This includes all operations performed by x87, MMX, and SSE instructions, including moves. Each increment represents a one- cycle dispatch event. This event is a speculative event. Since this event includes non-numeric operations it is not suitable for measuring MFLOPS. Total number multi-pipe uOps assigned to pipe 1.",
+ "UMask": "0x20"
+ },
+ {
+ "EventName": "fpu_pipe_assignment.dual0",
+ "EventCode": "0x00",
+ "BriefDescription": "Total number multi-pipe uOps assigned to pipe 0.",
+ "PublicDescription": "The number of operations (uOps) and dual-pipe uOps dispatched to each of the 4 FPU execution pipelines. This event reflects how busy the FPU pipelines are and may be used for workload characterization. This includes all operations performed by x87, MMX, and SSE instructions, including moves. Each increment represents a one- cycle dispatch event. This event is a speculative event. Since this event includes non-numeric operations it is not suitable for measuring MFLOPS. Total number multi-pipe uOps assigned to pipe 0.",
+ "UMask": "0x10"
+ },
+ {
"EventName": "fpu_pipe_assignment.total",
"EventCode": "0x00",
- "BriefDescription": "Total number uOps.",
- "PublicDescription": "The number of operations (uOps) and dual-pipe uOps dispatched to each of the 4 FPU execution pipelines. This event reflects how busy the FPU pipelines are and may be used for workload characterization. This includes all operations performed by x87, MMX, and SSE instructions, including moves. Each increment represents a one- cycle dispatch event. This event is a speculative event. Since this event includes non-numeric operations it is not suitable for measuring MFLOPS. Total number uOps assigned to Pipe 3.",
+ "BriefDescription": "Total number uOps assigned to all fpu pipes.",
+ "PublicDescription": "The number of operations (uOps) and dual-pipe uOps dispatched to each of the 4 FPU execution pipelines. This event reflects how busy the FPU pipelines are and may be used for workload characterization. This includes all operations performed by x87, MMX, and SSE instructions, including moves. Each increment represents a one- cycle dispatch event. This event is a speculative event. Since this event includes non-numeric operations it is not suitable for measuring MFLOPS. Total number uOps assigned to all pipes.",
"UMask": "0xf"
},
{
+ "EventName": "fpu_pipe_assignment.total3",
+ "EventCode": "0x00",
+ "BriefDescription": "Total number of fp uOps on pipe 3.",
+ "PublicDescription": "The number of operations (uOps) dispatched to each of the 4 FPU execution pipelines. This event reflects how busy the FPU pipelines are and may be used for workload characterization. This includes all operations performed by x87, MMX, and SSE instructions, including moves. Each increment represents a one-cycle dispatch event. This event is a speculative event. Since this event includes non-numeric operations it is not suitable for measuring MFLOPS. Total number uOps assigned to pipe 3.",
+ "UMask": "0x8"
+ },
+ {
+ "EventName": "fpu_pipe_assignment.total2",
+ "EventCode": "0x00",
+ "BriefDescription": "Total number of fp uOps on pipe 2.",
+ "PublicDescription": "The number of operations (uOps) dispatched to each of the 4 FPU execution pipelines. This event reflects how busy the FPU pipelines are and may be used for workload characterization. This includes all operations performed by x87, MMX, and SSE instructions, including moves. Each increment represents a one- cycle dispatch event. This event is a speculative event. Since this event includes non-numeric operations it is not suitable for measuring MFLOPS. Total number uOps assigned to pipe 2.",
+ "UMask": "0x4"
+ },
+ {
+ "EventName": "fpu_pipe_assignment.total1",
+ "EventCode": "0x00",
+ "BriefDescription": "Total number of fp uOps on pipe 1.",
+ "PublicDescription": "The number of operations (uOps) dispatched to each of the 4 FPU execution pipelines. This event reflects how busy the FPU pipelines are and may be used for workload characterization. This includes all operations performed by x87, MMX, and SSE instructions, including moves. Each increment represents a one- cycle dispatch event. This event is a speculative event. Since this event includes non-numeric operations it is not suitable for measuring MFLOPS. Total number uOps assigned to pipe 1.",
+ "UMask": "0x2"
+ },
+ {
+ "EventName": "fpu_pipe_assignment.total0",
+ "EventCode": "0x00",
+ "BriefDescription": "Total number of fp uOps on pipe 0.",
+ "PublicDescription": "The number of operations (uOps) dispatched to each of the 4 FPU execution pipelines. This event reflects how busy the FPU pipelines are and may be used for workload characterization. This includes all operations performed by x87, MMX, and SSE instructions, including moves. Each increment represents a one- cycle dispatch event. This event is a speculative event. Since this event includes non-numeric operations it is not suitable for measuring MFLOPS. Total number uOps assigned to pipe 0.",
+ "UMask": "0x1"
+ },
+ {
"EventName": "fp_sched_empty",
"EventCode": "0x01",
"BriefDescription": "This is a speculative event. The number of cycles in which the FPU scheduler is empty. Note that some Ops like FP loads bypass the scheduler."
diff --git a/tools/perf/pmu-events/arch/x86/amdfam17h/memory.json b/tools/perf/pmu-events/arch/x86/amdzen1/memory.json
index fa2d60d4def0..b33a3c308019 100644
--- a/tools/perf/pmu-events/arch/x86/amdfam17h/memory.json
+++ b/tools/perf/pmu-events/arch/x86/amdzen1/memory.json
@@ -3,28 +3,24 @@
"EventName": "ls_locks.bus_lock",
"EventCode": "0x25",
"BriefDescription": "Bus lock when a locked operations crosses a cache boundary or is done on an uncacheable memory type.",
- "PublicDescription": "Bus lock when a locked operations crosses a cache boundary or is done on an uncacheable memory type.",
"UMask": "0x1"
},
{
"EventName": "ls_dispatch.ld_st_dispatch",
"EventCode": "0x29",
- "BriefDescription": "Load-op-Stores.",
- "PublicDescription": "Counts the number of operations dispatched to the LS unit. Unit Masks ADDed. Load-op-Stores.",
+ "BriefDescription": "Counts the number of operations dispatched to the LS unit. Unit Masks ADDed. Load-op-Stores.",
"UMask": "0x4"
},
{
"EventName": "ls_dispatch.store_dispatch",
"EventCode": "0x29",
- "BriefDescription": "Counts the number of operations dispatched to the LS unit. Unit Masks ADDed.",
- "PublicDescription": "Counts the number of operations dispatched to the LS unit. Unit Masks ADDed.",
+ "BriefDescription": "Counts the number of stores dispatched to the LS unit. Unit Masks ADDed.",
"UMask": "0x2"
},
{
"EventName": "ls_dispatch.ld_dispatch",
"EventCode": "0x29",
- "BriefDescription": "Counts the number of operations dispatched to the LS unit. Unit Masks ADDed.",
- "PublicDescription": "Counts the number of operations dispatched to the LS unit. Unit Masks ADDed.",
+ "BriefDescription": "Counts the number of loads dispatched to the LS unit. Unit Masks ADDed.",
"UMask": "0x1"
},
{
@@ -38,83 +34,114 @@
"BriefDescription": "The number of accesses to the data cache for load and store references. This may include certain microcode scratchpad accesses, although these are generally rare. Each increment represents an eight-byte access, although the instruction may only be accessing a portion of that. This event is a speculative event."
},
{
+ "EventName": "ls_mab_alloc.dc_prefetcher",
+ "EventCode": "0x41",
+ "BriefDescription": "LS MAB allocates by type - DC prefetcher.",
+ "UMask": "0x8"
+ },
+ {
+ "EventName": "ls_mab_alloc.stores",
+ "EventCode": "0x41",
+ "BriefDescription": "LS MAB allocates by type - stores.",
+ "UMask": "0x2"
+ },
+ {
+ "EventName": "ls_mab_alloc.loads",
+ "EventCode": "0x41",
+ "BriefDescription": "LS MAB allocates by type - loads.",
+ "UMask": "0x01"
+ },
+ {
"EventName": "ls_l1_d_tlb_miss.all",
"EventCode": "0x45",
"BriefDescription": "L1 DTLB Miss or Reload off all sizes.",
- "PublicDescription": "L1 DTLB Miss or Reload off all sizes.",
"UMask": "0xff"
},
{
"EventName": "ls_l1_d_tlb_miss.tlb_reload_1g_l2_miss",
"EventCode": "0x45",
"BriefDescription": "L1 DTLB Miss of a page of 1G size.",
- "PublicDescription": "L1 DTLB Miss of a page of 1G size.",
"UMask": "0x80"
},
{
"EventName": "ls_l1_d_tlb_miss.tlb_reload_2m_l2_miss",
"EventCode": "0x45",
"BriefDescription": "L1 DTLB Miss of a page of 2M size.",
- "PublicDescription": "L1 DTLB Miss of a page of 2M size.",
"UMask": "0x40"
},
{
"EventName": "ls_l1_d_tlb_miss.tlb_reload_32k_l2_miss",
"EventCode": "0x45",
"BriefDescription": "L1 DTLB Miss of a page of 32K size.",
- "PublicDescription": "L1 DTLB Miss of a page of 32K size.",
"UMask": "0x20"
},
{
"EventName": "ls_l1_d_tlb_miss.tlb_reload_4k_l2_miss",
"EventCode": "0x45",
"BriefDescription": "L1 DTLB Miss of a page of 4K size.",
- "PublicDescription": "L1 DTLB Miss of a page of 4K size.",
"UMask": "0x10"
},
{
"EventName": "ls_l1_d_tlb_miss.tlb_reload_1g_l2_hit",
"EventCode": "0x45",
"BriefDescription": "L1 DTLB Reload of a page of 1G size.",
- "PublicDescription": "L1 DTLB Reload of a page of 1G size.",
"UMask": "0x8"
},
{
"EventName": "ls_l1_d_tlb_miss.tlb_reload_2m_l2_hit",
"EventCode": "0x45",
"BriefDescription": "L1 DTLB Reload of a page of 2M size.",
- "PublicDescription": "L1 DTLB Reload of a page of 2M size.",
"UMask": "0x4"
},
{
"EventName": "ls_l1_d_tlb_miss.tlb_reload_32k_l2_hit",
"EventCode": "0x45",
"BriefDescription": "L1 DTLB Reload of a page of 32K size.",
- "PublicDescription": "L1 DTLB Reload of a page of 32K size.",
"UMask": "0x2"
},
{
"EventName": "ls_l1_d_tlb_miss.tlb_reload_4k_l2_hit",
"EventCode": "0x45",
"BriefDescription": "L1 DTLB Reload of a page of 4K size.",
- "PublicDescription": "L1 DTLB Reload of a page of 4K size.",
"UMask": "0x1"
},
{
- "EventName": "ls_tablewalker.perf_mon_tablewalk_alloc_iside",
+ "EventName": "ls_tablewalker.iside",
"EventCode": "0x46",
- "BriefDescription": "Tablewalker allocation.",
- "PublicDescription": "Tablewalker allocation.",
+ "BriefDescription": "Total Page Table Walks on I-side.",
"UMask": "0xc"
},
{
- "EventName": "ls_tablewalker.perf_mon_tablewalk_alloc_dside",
+ "EventName": "ls_tablewalker.ic_type1",
+ "EventCode": "0x46",
+ "BriefDescription": "Total Page Table Walks IC Type 1.",
+ "UMask": "0x8"
+ },
+ {
+ "EventName": "ls_tablewalker.ic_type0",
"EventCode": "0x46",
- "BriefDescription": "Tablewalker allocation.",
- "PublicDescription": "Tablewalker allocation.",
+ "BriefDescription": "Total Page Table Walks IC Type 0.",
+ "UMask": "0x4"
+ },
+ {
+ "EventName": "ls_tablewalker.dside",
+ "EventCode": "0x46",
+ "BriefDescription": "Total Page Table Walks on D-side.",
"UMask": "0x3"
},
{
+ "EventName": "ls_tablewalker.dc_type1",
+ "EventCode": "0x46",
+ "BriefDescription": "Total Page Table Walks DC Type 1.",
+ "UMask": "0x2"
+ },
+ {
+ "EventName": "ls_tablewalker.dc_type0",
+ "EventCode": "0x46",
+ "BriefDescription": "Total Page Table Walks DC Type 0.",
+ "UMask": "0x1"
+ },
+ {
"EventName": "ls_misal_accesses",
"EventCode": "0x47",
"BriefDescription": "Misaligned loads."
@@ -123,35 +150,30 @@
"EventName": "ls_pref_instr_disp.prefetch_nta",
"EventCode": "0x4b",
"BriefDescription": "Software Prefetch Instructions (PREFETCHNTA instruction) Dispatched.",
- "PublicDescription": "Software Prefetch Instructions (PREFETCHNTA instruction) Dispatched.",
"UMask": "0x4"
},
{
"EventName": "ls_pref_instr_disp.store_prefetch_w",
"EventCode": "0x4b",
"BriefDescription": "Software Prefetch Instructions (3DNow PREFETCHW instruction) Dispatched.",
- "PublicDescription": "Software Prefetch Instructions (3DNow PREFETCHW instruction) Dispatched.",
"UMask": "0x2"
},
{
"EventName": "ls_pref_instr_disp.load_prefetch_w",
"EventCode": "0x4b",
- "BriefDescription": "Prefetch, Prefetch_T0_T1_T2.",
- "PublicDescription": "Software Prefetch Instructions Dispatched. Prefetch, Prefetch_T0_T1_T2.",
+ "BriefDescription": "Software Prefetch Instructions Dispatched. Prefetch, Prefetch_T0_T1_T2.",
"UMask": "0x1"
},
{
"EventName": "ls_inef_sw_pref.mab_mch_cnt",
"EventCode": "0x52",
- "BriefDescription": "The number of software prefetches that did not fetch data outside of the processor core.",
- "PublicDescription": "The number of software prefetches that did not fetch data outside of the processor core.",
+ "BriefDescription": "The number of software prefetches that did not fetch data outside of the processor core. Software PREFETCH instruction saw a match on an already-allocated miss request buffer.",
"UMask": "0x2"
},
{
"EventName": "ls_inef_sw_pref.data_pipe_sw_pf_dc_hit",
"EventCode": "0x52",
- "BriefDescription": "The number of software prefetches that did not fetch data outside of the processor core.",
- "PublicDescription": "The number of software prefetches that did not fetch data outside of the processor core.",
+ "BriefDescription": "The number of software prefetches that did not fetch data outside of the processor core. Software PREFETCH instruction saw a DC hit.",
"UMask": "0x1"
},
{
diff --git a/tools/perf/pmu-events/arch/x86/amdzen1/other.json b/tools/perf/pmu-events/arch/x86/amdzen1/other.json
new file mode 100644
index 000000000000..ff780098d36e
--- /dev/null
+++ b/tools/perf/pmu-events/arch/x86/amdzen1/other.json
@@ -0,0 +1,56 @@
+[
+ {
+ "EventName": "ic_oc_mode_switch.oc_ic_mode_switch",
+ "EventCode": "0x28a",
+ "BriefDescription": "OC Mode Switch. OC to IC mode switch.",
+ "UMask": "0x2"
+ },
+ {
+ "EventName": "ic_oc_mode_switch.ic_oc_mode_switch",
+ "EventCode": "0x28a",
+ "BriefDescription": "OC Mode Switch. IC to OC mode switch.",
+ "UMask": "0x1"
+ },
+ {
+ "EventName": "de_dis_dispatch_token_stalls0.retire_token_stall",
+ "EventCode": "0xaf",
+ "BriefDescription": "Cycles where a dispatch group is valid but does not get dispatched due to a token stall. RETIRE Tokens unavailable.",
+ "UMask": "0x40"
+ },
+ {
+ "EventName": "de_dis_dispatch_token_stalls0.agsq_token_stall",
+ "EventCode": "0xaf",
+ "BriefDescription": "Cycles where a dispatch group is valid but does not get dispatched due to a token stall. AGSQ Tokens unavailable.",
+ "UMask": "0x20"
+ },
+ {
+ "EventName": "de_dis_dispatch_token_stalls0.alu_token_stall",
+ "EventCode": "0xaf",
+ "BriefDescription": "Cycles where a dispatch group is valid but does not get dispatched due to a token stall. ALU tokens total unavailable.",
+ "UMask": "0x10"
+ },
+ {
+ "EventName": "de_dis_dispatch_token_stalls0.alsq3_0_token_stall",
+ "EventCode": "0xaf",
+ "BriefDescription": "Cycles where a dispatch group is valid but does not get dispatched due to a token stall. ALSQ 3_0 Tokens unavailable.",
+ "UMask": "0x8"
+ },
+ {
+ "EventName": "de_dis_dispatch_token_stalls0.alsq3_token_stall",
+ "EventCode": "0xaf",
+ "BriefDescription": "Cycles where a dispatch group is valid but does not get dispatched due to a token stall. ALSQ 3 Tokens unavailable.",
+ "UMask": "0x4"
+ },
+ {
+ "EventName": "de_dis_dispatch_token_stalls0.alsq2_token_stall",
+ "EventCode": "0xaf",
+ "BriefDescription": "Cycles where a dispatch group is valid but does not get dispatched due to a token stall. ALSQ 2 Tokens unavailable.",
+ "UMask": "0x2"
+ },
+ {
+ "EventName": "de_dis_dispatch_token_stalls0.alsq1_token_stall",
+ "EventCode": "0xaf",
+ "BriefDescription": "Cycles where a dispatch group is valid but does not get dispatched due to a token stall. ALSQ 1 Tokens unavailable.",
+ "UMask": "0x1"
+ }
+]
diff --git a/tools/perf/pmu-events/arch/x86/amdzen2/branch.json b/tools/perf/pmu-events/arch/x86/amdzen2/branch.json
new file mode 100644
index 000000000000..ef4166a66288
--- /dev/null
+++ b/tools/perf/pmu-events/arch/x86/amdzen2/branch.json
@@ -0,0 +1,52 @@
+[
+ {
+ "EventName": "bp_l1_btb_correct",
+ "EventCode": "0x8a",
+ "BriefDescription": "L1 Branch Prediction Overrides Existing Prediction (speculative)."
+ },
+ {
+ "EventName": "bp_l2_btb_correct",
+ "EventCode": "0x8b",
+ "BriefDescription": "L2 Branch Prediction Overrides Existing Prediction (speculative)."
+ },
+ {
+ "EventName": "bp_dyn_ind_pred",
+ "EventCode": "0x8e",
+ "BriefDescription": "Dynamic Indirect Predictions.",
+ "PublicDescription": "Indirect Branch Prediction for potential multi-target branch (speculative)."
+ },
+ {
+ "EventName": "bp_de_redirect",
+ "EventCode": "0x91",
+ "BriefDescription": "Decoder Overrides Existing Branch Prediction (speculative)."
+ },
+ {
+ "EventName": "bp_l1_tlb_fetch_hit",
+ "EventCode": "0x94",
+ "BriefDescription": "The number of instruction fetches that hit in the L1 ITLB.",
+ "UMask": "0xFF"
+ },
+ {
+ "EventName": "bp_l1_tlb_fetch_hit.if1g",
+ "EventCode": "0x94",
+ "BriefDescription": "The number of instruction fetches that hit in the L1 ITLB. Instruction fetches to a 1GB page.",
+ "UMask": "0x4"
+ },
+ {
+ "EventName": "bp_l1_tlb_fetch_hit.if2m",
+ "EventCode": "0x94",
+ "BriefDescription": "The number of instruction fetches that hit in the L1 ITLB. Instruction fetches to a 2MB page.",
+ "UMask": "0x2"
+ },
+ {
+ "EventName": "bp_l1_tlb_fetch_hit.if4k",
+ "EventCode": "0x94",
+ "BriefDescription": "The number of instruction fetches that hit in the L1 ITLB. Instruction fetches to a 4KB page.",
+ "UMask": "0x1"
+ },
+ {
+ "EventName": "bp_tlb_rel",
+ "EventCode": "0x99",
+ "BriefDescription": "The number of ITLB reload requests."
+ }
+]
diff --git a/tools/perf/pmu-events/arch/x86/amdzen2/cache.json b/tools/perf/pmu-events/arch/x86/amdzen2/cache.json
new file mode 100644
index 000000000000..1c60bfa0f00b
--- /dev/null
+++ b/tools/perf/pmu-events/arch/x86/amdzen2/cache.json
@@ -0,0 +1,338 @@
+[
+ {
+ "EventName": "l2_request_g1.rd_blk_l",
+ "EventCode": "0x60",
+ "BriefDescription": "All L2 Cache Requests (Breakdown 1 - Common). Data cache reads (including hardware and software prefetch).",
+ "UMask": "0x80"
+ },
+ {
+ "EventName": "l2_request_g1.rd_blk_x",
+ "EventCode": "0x60",
+ "BriefDescription": "All L2 Cache Requests (Breakdown 1 - Common). Data cache stores.",
+ "UMask": "0x40"
+ },
+ {
+ "EventName": "l2_request_g1.ls_rd_blk_c_s",
+ "EventCode": "0x60",
+ "BriefDescription": "All L2 Cache Requests (Breakdown 1 - Common). Data cache shared reads.",
+ "UMask": "0x20"
+ },
+ {
+ "EventName": "l2_request_g1.cacheable_ic_read",
+ "EventCode": "0x60",
+ "BriefDescription": "All L2 Cache Requests (Breakdown 1 - Common). Instruction cache reads.",
+ "UMask": "0x10"
+ },
+ {
+ "EventName": "l2_request_g1.change_to_x",
+ "EventCode": "0x60",
+ "BriefDescription": "All L2 Cache Requests (Breakdown 1 - Common). Data cache state change requests. Request change to writable, check L2 for current state.",
+ "UMask": "0x8"
+ },
+ {
+ "EventName": "l2_request_g1.prefetch_l2_cmd",
+ "EventCode": "0x60",
+ "BriefDescription": "All L2 Cache Requests (Breakdown 1 - Common). PrefetchL2Cmd.",
+ "UMask": "0x4"
+ },
+ {
+ "EventName": "l2_request_g1.l2_hw_pf",
+ "EventCode": "0x60",
+ "BriefDescription": "All L2 Cache Requests (Breakdown 1 - Common). L2 Prefetcher. All prefetches accepted by L2 pipeline, hit or miss. Types of PF and L2 hit/miss broken out in a separate perfmon event.",
+ "UMask": "0x2"
+ },
+ {
+ "EventName": "l2_request_g1.group2",
+ "EventCode": "0x60",
+ "BriefDescription": "Miscellaneous events covered in more detail by l2_request_g2 (PMCx061).",
+ "UMask": "0x1"
+ },
+ {
+ "EventName": "l2_request_g2.group1",
+ "EventCode": "0x61",
+ "BriefDescription": "Miscellaneous events covered in more detail by l2_request_g1 (PMCx060).",
+ "UMask": "0x80"
+ },
+ {
+ "EventName": "l2_request_g2.ls_rd_sized",
+ "EventCode": "0x61",
+ "BriefDescription": "All L2 Cache Requests (Breakdown 2 - Rare). Data cache read sized.",
+ "UMask": "0x40"
+ },
+ {
+ "EventName": "l2_request_g2.ls_rd_sized_nc",
+ "EventCode": "0x61",
+ "BriefDescription": "All L2 Cache Requests (Breakdown 2 - Rare). Data cache read sized non-cacheable.",
+ "UMask": "0x20"
+ },
+ {
+ "EventName": "l2_request_g2.ic_rd_sized",
+ "EventCode": "0x61",
+ "BriefDescription": "All L2 Cache Requests (Breakdown 2 - Rare). Instruction cache read sized.",
+ "UMask": "0x10"
+ },
+ {
+ "EventName": "l2_request_g2.ic_rd_sized_nc",
+ "EventCode": "0x61",
+ "BriefDescription": "All L2 Cache Requests (Breakdown 2 - Rare). Instruction cache read sized non-cacheable.",
+ "UMask": "0x8"
+ },
+ {
+ "EventName": "l2_request_g2.smc_inval",
+ "EventCode": "0x61",
+ "BriefDescription": "All L2 Cache Requests (Breakdown 2 - Rare). Self-modifying code invalidates.",
+ "UMask": "0x4"
+ },
+ {
+ "EventName": "l2_request_g2.bus_locks_originator",
+ "EventCode": "0x61",
+ "BriefDescription": "All L2 Cache Requests (Breakdown 2 - Rare). Bus locks.",
+ "UMask": "0x2"
+ },
+ {
+ "EventName": "l2_request_g2.bus_locks_responses",
+ "EventCode": "0x61",
+ "BriefDescription": "All L2 Cache Requests (Breakdown 2 - Rare). Bus lock response.",
+ "UMask": "0x1"
+ },
+ {
+ "EventName": "l2_latency.l2_cycles_waiting_on_fills",
+ "EventCode": "0x62",
+ "BriefDescription": "Total cycles spent waiting for L2 fills to complete from L3 or memory, divided by four. Event counts are for both threads. To calculate average latency, the number of fills from both threads must be used.",
+ "UMask": "0x1"
+ },
+ {
+ "EventName": "l2_wcb_req.wcb_write",
+ "EventCode": "0x63",
+ "BriefDescription": "LS to L2 WCB write requests. LS (Load/Store unit) to L2 WCB (Write Combining Buffer) write requests.",
+ "UMask": "0x40"
+ },
+ {
+ "EventName": "l2_wcb_req.wcb_close",
+ "EventCode": "0x63",
+ "BriefDescription": "LS to L2 WCB close requests. LS (Load/Store unit) to L2 WCB (Write Combining Buffer) close requests.",
+ "UMask": "0x20"
+ },
+ {
+ "EventName": "l2_wcb_req.zero_byte_store",
+ "EventCode": "0x63",
+ "BriefDescription": "LS to L2 WCB zero byte store requests. LS (Load/Store unit) to L2 WCB (Write Combining Buffer) zero byte store requests.",
+ "UMask": "0x4"
+ },
+ {
+ "EventName": "l2_wcb_req.cl_zero",
+ "EventCode": "0x63",
+ "BriefDescription": "LS to L2 WCB cache line zeroing requests. LS (Load/Store unit) to L2 WCB (Write Combining Buffer) cache line zeroing requests.",
+ "UMask": "0x1"
+ },
+ {
+ "EventName": "l2_cache_req_stat.ls_rd_blk_cs",
+ "EventCode": "0x64",
+ "BriefDescription": "Core to L2 cacheable request access status (not including L2 Prefetch). Data cache shared read hit in L2",
+ "UMask": "0x80"
+ },
+ {
+ "EventName": "l2_cache_req_stat.ls_rd_blk_l_hit_x",
+ "EventCode": "0x64",
+ "BriefDescription": "Core to L2 cacheable request access status (not including L2 Prefetch). Data cache read hit in L2.",
+ "UMask": "0x40"
+ },
+ {
+ "EventName": "l2_cache_req_stat.ls_rd_blk_l_hit_s",
+ "EventCode": "0x64",
+ "BriefDescription": "Core to L2 cacheable request access status (not including L2 Prefetch). Data cache read hit on shared line in L2.",
+ "UMask": "0x20"
+ },
+ {
+ "EventName": "l2_cache_req_stat.ls_rd_blk_x",
+ "EventCode": "0x64",
+ "BriefDescription": "Core to L2 cacheable request access status (not including L2 Prefetch). Data cache store or state change hit in L2.",
+ "UMask": "0x10"
+ },
+ {
+ "EventName": "l2_cache_req_stat.ls_rd_blk_c",
+ "EventCode": "0x64",
+ "BriefDescription": "Core to L2 cacheable request access status (not including L2 Prefetch). Data cache request miss in L2 (all types).",
+ "UMask": "0x8"
+ },
+ {
+ "EventName": "l2_cache_req_stat.ic_fill_hit_x",
+ "EventCode": "0x64",
+ "BriefDescription": "Core to L2 cacheable request access status (not including L2 Prefetch). Instruction cache hit modifiable line in L2.",
+ "UMask": "0x4"
+ },
+ {
+ "EventName": "l2_cache_req_stat.ic_fill_hit_s",
+ "EventCode": "0x64",
+ "BriefDescription": "Core to L2 cacheable request access status (not including L2 Prefetch). Instruction cache hit clean line in L2.",
+ "UMask": "0x2"
+ },
+ {
+ "EventName": "l2_cache_req_stat.ic_fill_miss",
+ "EventCode": "0x64",
+ "BriefDescription": "Core to L2 cacheable request access status (not including L2 Prefetch). Instruction cache request miss in L2.",
+ "UMask": "0x1"
+ },
+ {
+ "EventName": "l2_fill_pending.l2_fill_busy",
+ "EventCode": "0x6d",
+ "BriefDescription": "Cycles with fill pending from L2. Total cycles spent with one or more fill requests in flight from L2.",
+ "UMask": "0x1"
+ },
+ {
+ "EventName": "l2_pf_hit_l2",
+ "EventCode": "0x70",
+ "BriefDescription": "L2 prefetch hit in L2.",
+ "UMask": "0xff"
+ },
+ {
+ "EventName": "l2_pf_miss_l2_hit_l3",
+ "EventCode": "0x71",
+ "BriefDescription": "L2 prefetcher hits in L3. Counts all L2 prefetches accepted by the L2 pipeline which miss the L2 cache and hit the L3.",
+ "UMask": "0xff"
+ },
+ {
+ "EventName": "l2_pf_miss_l2_l3",
+ "EventCode": "0x72",
+ "BriefDescription": "L2 prefetcher misses in L3. All L2 prefetches accepted by the L2 pipeline which miss the L2 and the L3 caches.",
+ "UMask": "0xff"
+ },
+ {
+ "EventName": "ic_fw32",
+ "EventCode": "0x80",
+ "BriefDescription": "The number of 32B fetch windows transferred from IC pipe to DE instruction decoder (includes non-cacheable and cacheable fill responses)."
+ },
+ {
+ "EventName": "ic_fw32_miss",
+ "EventCode": "0x81",
+ "BriefDescription": "The number of 32B fetch windows tried to read the L1 IC and missed in the full tag."
+ },
+ {
+ "EventName": "ic_cache_fill_l2",
+ "EventCode": "0x82",
+ "BriefDescription": "The number of 64 byte instruction cache line was fulfilled from the L2 cache."
+ },
+ {
+ "EventName": "ic_cache_fill_sys",
+ "EventCode": "0x83",
+ "BriefDescription": "The number of 64 byte instruction cache line fulfilled from system memory or another cache."
+ },
+ {
+ "EventName": "bp_l1_tlb_miss_l2_hit",
+ "EventCode": "0x84",
+ "BriefDescription": "The number of instruction fetches that miss in the L1 ITLB but hit in the L2 ITLB."
+ },
+ {
+ "EventName": "bp_l1_tlb_miss_l2_tlb_miss",
+ "EventCode": "0x85",
+ "BriefDescription": "The number of instruction fetches that miss in both the L1 and L2 TLBs.",
+ "UMask": "0xff"
+ },
+ {
+ "EventName": "bp_l1_tlb_miss_l2_tlb_miss.if1g",
+ "EventCode": "0x85",
+ "BriefDescription": "The number of instruction fetches that miss in both the L1 and L2 TLBs. Instruction fetches to a 1GB page.",
+ "UMask": "0x4"
+ },
+ {
+ "EventName": "bp_l1_tlb_miss_l2_tlb_miss.if2m",
+ "EventCode": "0x85",
+ "BriefDescription": "The number of instruction fetches that miss in both the L1 and L2 TLBs. Instruction fetches to a 2MB page.",
+ "UMask": "0x2"
+ },
+ {
+ "EventName": "bp_l1_tlb_miss_l2_tlb_miss.if4k",
+ "EventCode": "0x85",
+ "BriefDescription": "The number of instruction fetches that miss in both the L1 and L2 TLBs. Instruction fetches to a 4KB page.",
+ "UMask": "0x1"
+ },
+ {
+ "EventName": "bp_snp_re_sync",
+ "EventCode": "0x86",
+ "BriefDescription": "The number of pipeline restarts caused by invalidating probes that hit on the instruction stream currently being executed. This would happen if the active instruction stream was being modified by another processor in an MP system - typically a highly unlikely event."
+ },
+ {
+ "EventName": "ic_fetch_stall.ic_stall_any",
+ "EventCode": "0x87",
+ "BriefDescription": "Instruction Pipe Stall. IC pipe was stalled during this clock cycle for any reason (nothing valid in pipe ICM1).",
+ "UMask": "0x4"
+ },
+ {
+ "EventName": "ic_fetch_stall.ic_stall_dq_empty",
+ "EventCode": "0x87",
+ "BriefDescription": "Instruction Pipe Stall. IC pipe was stalled during this clock cycle (including IC to OC fetches) due to DQ empty.",
+ "UMask": "0x2"
+ },
+ {
+ "EventName": "ic_fetch_stall.ic_stall_back_pressure",
+ "EventCode": "0x87",
+ "BriefDescription": "Instruction Pipe Stall. IC pipe was stalled during this clock cycle (including IC to OC fetches) due to back-pressure.",
+ "UMask": "0x1"
+ },
+ {
+ "EventName": "ic_cache_inval.l2_invalidating_probe",
+ "EventCode": "0x8c",
+ "BriefDescription": "IC line invalidated due to L2 invalidating probe (external or LS). The number of instruction cache lines invalidated. A non-SMC event is CMC (cross modifying code), either from the other thread of the core or another core.",
+ "UMask": "0x2"
+ },
+ {
+ "EventName": "ic_cache_inval.fill_invalidated",
+ "EventCode": "0x8c",
+ "BriefDescription": "IC line invalidated due to overwriting fill response. The number of instruction cache lines invalidated. A non-SMC event is CMC (cross modifying code), either from the other thread of the core or another core.",
+ "UMask": "0x1"
+ },
+ {
+ "EventName": "ic_oc_mode_switch.oc_ic_mode_switch",
+ "EventCode": "0x28a",
+ "BriefDescription": "OC Mode Switch. OC to IC mode switch.",
+ "UMask": "0x2"
+ },
+ {
+ "EventName": "ic_oc_mode_switch.ic_oc_mode_switch",
+ "EventCode": "0x28a",
+ "BriefDescription": "OC Mode Switch. IC to OC mode switch.",
+ "UMask": "0x1"
+ },
+ {
+ "EventName": "l3_request_g1.caching_l3_cache_accesses",
+ "EventCode": "0x01",
+ "BriefDescription": "Caching: L3 cache accesses",
+ "UMask": "0x80",
+ "Unit": "L3PMC"
+ },
+ {
+ "EventName": "l3_lookup_state.all_l3_req_typs",
+ "EventCode": "0x04",
+ "BriefDescription": "All L3 Request Types",
+ "UMask": "0xff",
+ "Unit": "L3PMC"
+ },
+ {
+ "EventName": "l3_comb_clstr_state.other_l3_miss_typs",
+ "EventCode": "0x06",
+ "BriefDescription": "Other L3 Miss Request Types",
+ "UMask": "0xfe",
+ "Unit": "L3PMC"
+ },
+ {
+ "EventName": "l3_comb_clstr_state.request_miss",
+ "EventCode": "0x06",
+ "BriefDescription": "L3 cache misses",
+ "UMask": "0x01",
+ "Unit": "L3PMC"
+ },
+ {
+ "EventName": "xi_sys_fill_latency",
+ "EventCode": "0x90",
+ "BriefDescription": "L3 Cache Miss Latency. Total cycles for all transactions divided by 16. Ignores SliceMask and ThreadMask.",
+ "UMask": "0x00",
+ "Unit": "L3PMC"
+ },
+ {
+ "EventName": "xi_ccx_sdp_req1.all_l3_miss_req_typs",
+ "EventCode": "0x9A",
+ "BriefDescription": "All L3 Miss Request Types. Ignores SliceMask and ThreadMask.",
+ "UMask": "0x3f",
+ "Unit": "L3PMC"
+ }
+]
diff --git a/tools/perf/pmu-events/arch/x86/amdzen2/core.json b/tools/perf/pmu-events/arch/x86/amdzen2/core.json
new file mode 100644
index 000000000000..de89e5a44ff1
--- /dev/null
+++ b/tools/perf/pmu-events/arch/x86/amdzen2/core.json
@@ -0,0 +1,130 @@
+[
+ {
+ "EventName": "ex_ret_instr",
+ "EventCode": "0xc0",
+ "BriefDescription": "Retired Instructions."
+ },
+ {
+ "EventName": "ex_ret_cops",
+ "EventCode": "0xc1",
+ "BriefDescription": "Retired Uops.",
+ "PublicDescription": "The number of micro-ops retired. This count includes all processor activity (instructions, exceptions, interrupts, microcode assists, etc.). The number of events logged per cycle can vary from 0 to 8."
+ },
+ {
+ "EventName": "ex_ret_brn",
+ "EventCode": "0xc2",
+ "BriefDescription": "Retired Branch Instructions.",
+ "PublicDescription": "The number of branch instructions retired. This includes all types of architectural control flow changes, including exceptions and interrupts."
+ },
+ {
+ "EventName": "ex_ret_brn_misp",
+ "EventCode": "0xc3",
+ "BriefDescription": "Retired Branch Instructions Mispredicted.",
+ "PublicDescription": "The number of branch instructions retired, of any type, that were not correctly predicted. This includes those for which prediction is not attempted (far control transfers, exceptions and interrupts)."
+ },
+ {
+ "EventName": "ex_ret_brn_tkn",
+ "EventCode": "0xc4",
+ "BriefDescription": "Retired Taken Branch Instructions.",
+ "PublicDescription": "The number of taken branches that were retired. This includes all types of architectural control flow changes, including exceptions and interrupts."
+ },
+ {
+ "EventName": "ex_ret_brn_tkn_misp",
+ "EventCode": "0xc5",
+ "BriefDescription": "Retired Taken Branch Instructions Mispredicted.",
+ "PublicDescription": "The number of retired taken branch instructions that were mispredicted."
+ },
+ {
+ "EventName": "ex_ret_brn_far",
+ "EventCode": "0xc6",
+ "BriefDescription": "Retired Far Control Transfers.",
+ "PublicDescription": "The number of far control transfers retired including far call/jump/return, IRET, SYSCALL and SYSRET, plus exceptions and interrupts. Far control transfers are not subject to branch prediction."
+ },
+ {
+ "EventName": "ex_ret_brn_resync",
+ "EventCode": "0xc7",
+ "BriefDescription": "Retired Branch Resyncs.",
+ "PublicDescription": "The number of resync branches. These reflect pipeline restarts due to certain microcode assists and events such as writes to the active instruction stream, among other things. Each occurrence reflects a restart penalty similar to a branch mispredict. This is relatively rare."
+ },
+ {
+ "EventName": "ex_ret_near_ret",
+ "EventCode": "0xc8",
+ "BriefDescription": "Retired Near Returns.",
+ "PublicDescription": "The number of near return instructions (RET or RET Iw) retired."
+ },
+ {
+ "EventName": "ex_ret_near_ret_mispred",
+ "EventCode": "0xc9",
+ "BriefDescription": "Retired Near Returns Mispredicted.",
+ "PublicDescription": "The number of near returns retired that were not correctly predicted by the return address predictor. Each such mispredict incurs the same penalty as a mispredicted conditional branch instruction."
+ },
+ {
+ "EventName": "ex_ret_brn_ind_misp",
+ "EventCode": "0xca",
+ "BriefDescription": "Retired Indirect Branch Instructions Mispredicted."
+ },
+ {
+ "EventName": "ex_ret_mmx_fp_instr.sse_instr",
+ "EventCode": "0xcb",
+ "BriefDescription": "SSE instructions (SSE, SSE2, SSE3, SSSE3, SSE4A, SSE41, SSE42, AVX).",
+ "PublicDescription": "The number of MMX, SSE or x87 instructions retired. The UnitMask allows the selection of the individual classes of instructions as given in the table. Each increment represents one complete instruction. Since this event includes non-numeric instructions it is not suitable for measuring MFLOPS. SSE instructions (SSE, SSE2, SSE3, SSSE3, SSE4A, SSE41, SSE42, AVX).",
+ "UMask": "0x4"
+ },
+ {
+ "EventName": "ex_ret_mmx_fp_instr.mmx_instr",
+ "EventCode": "0xcb",
+ "BriefDescription": "MMX instructions.",
+ "PublicDescription": "The number of MMX, SSE or x87 instructions retired. The UnitMask allows the selection of the individual classes of instructions as given in the table. Each increment represents one complete instruction. Since this event includes non-numeric instructions it is not suitable for measuring MFLOPS. MMX instructions.",
+ "UMask": "0x2"
+ },
+ {
+ "EventName": "ex_ret_mmx_fp_instr.x87_instr",
+ "EventCode": "0xcb",
+ "BriefDescription": "x87 instructions.",
+ "PublicDescription": "The number of MMX, SSE or x87 instructions retired. The UnitMask allows the selection of the individual classes of instructions as given in the table. Each increment represents one complete instruction. Since this event includes non-numeric instructions it is not suitable for measuring MFLOPS. x87 instructions.",
+ "UMask": "0x1"
+ },
+ {
+ "EventName": "ex_ret_cond",
+ "EventCode": "0xd1",
+ "BriefDescription": "Retired Conditional Branch Instructions."
+ },
+ {
+ "EventName": "ex_ret_cond_misp",
+ "EventCode": "0xd2",
+ "BriefDescription": "Retired Conditional Branch Instructions Mispredicted."
+ },
+ {
+ "EventName": "ex_div_busy",
+ "EventCode": "0xd3",
+ "BriefDescription": "Div Cycles Busy count."
+ },
+ {
+ "EventName": "ex_div_count",
+ "EventCode": "0xd4",
+ "BriefDescription": "Div Op Count."
+ },
+ {
+ "EventName": "ex_tagged_ibs_ops.ibs_count_rollover",
+ "EventCode": "0x1cf",
+ "BriefDescription": "Tagged IBS Ops. Number of times an op could not be tagged by IBS because of a previous tagged op that has not retired.",
+ "UMask": "0x4"
+ },
+ {
+ "EventName": "ex_tagged_ibs_ops.ibs_tagged_ops_ret",
+ "EventCode": "0x1cf",
+ "BriefDescription": "Tagged IBS Ops. Number of Ops tagged by IBS that retired.",
+ "UMask": "0x2"
+ },
+ {
+ "EventName": "ex_tagged_ibs_ops.ibs_tagged_ops",
+ "EventCode": "0x1cf",
+ "BriefDescription": "Tagged IBS Ops. Number of Ops tagged by IBS.",
+ "UMask": "0x1"
+ },
+ {
+ "EventName": "ex_ret_fus_brnch_inst",
+ "EventCode": "0x1d0",
+ "BriefDescription": "Retired Fused Instructions. The number of fuse-branch instructions retired per cycle. The number of events logged per cycle can vary from 0-8.",
+ }
+]
diff --git a/tools/perf/pmu-events/arch/x86/amdzen2/floating-point.json b/tools/perf/pmu-events/arch/x86/amdzen2/floating-point.json
new file mode 100644
index 000000000000..622a0c420e46
--- /dev/null
+++ b/tools/perf/pmu-events/arch/x86/amdzen2/floating-point.json
@@ -0,0 +1,140 @@
+[
+ {
+ "EventName": "fpu_pipe_assignment.total",
+ "EventCode": "0x00",
+ "BriefDescription": "Total number of fp uOps.",
+ "PublicDescription": "Total number of fp uOps. The number of operations (uOps) dispatched to each of the 4 FPU execution pipelines. This event reflects how busy the FPU pipelines are and may be used for workload characterization. This includes all operations performed by x87, MMX, and SSE instructions, including moves. Each increment represents a one- cycle dispatch event. This event is a speculative event. Since this event includes non-numeric operations it is not suitable for measuring MFLOPS.",
+ "UMask": "0xf"
+ },
+ {
+ "EventName": "fpu_pipe_assignment.total3",
+ "EventCode": "0x00",
+ "BriefDescription": "Total number uOps assigned to pipe 3.",
+ "PublicDescription": "The number of operations (uOps) dispatched to each of the 4 FPU execution pipelines. This event reflects how busy the FPU pipelines are and may be used for workload characterization. This includes all operations performed by x87, MMX, and SSE instructions, including moves. Each increment represents a one-cycle dispatch event. This event is a speculative event. Since this event includes non-numeric operations it is not suitable for measuring MFLOPS. Total number uOps assigned to pipe 3.",
+ "UMask": "0x8"
+ },
+ {
+ "EventName": "fpu_pipe_assignment.total2",
+ "EventCode": "0x00",
+ "BriefDescription": "Total number uOps assigned to pipe 2.",
+ "PublicDescription": "The number of operations (uOps) dispatched to each of the 4 FPU execution pipelines. This event reflects how busy the FPU pipelines are and may be used for workload characterization. This includes all operations performed by x87, MMX, and SSE instructions, including moves. Each increment represents a one- cycle dispatch event. This event is a speculative event. Since this event includes non-numeric operations it is not suitable for measuring MFLOPS. Total number uOps assigned to pipe 2.",
+ "UMask": "0x4"
+ },
+ {
+ "EventName": "fpu_pipe_assignment.total1",
+ "EventCode": "0x00",
+ "BriefDescription": "Total number uOps assigned to pipe 1.",
+ "PublicDescription": "The number of operations (uOps) dispatched to each of the 4 FPU execution pipelines. This event reflects how busy the FPU pipelines are and may be used for workload characterization. This includes all operations performed by x87, MMX, and SSE instructions, including moves. Each increment represents a one- cycle dispatch event. This event is a speculative event. Since this event includes non-numeric operations it is not suitable for measuring MFLOPS. Total number uOps assigned to pipe 1.",
+ "UMask": "0x2"
+ },
+ {
+ "EventName": "fpu_pipe_assignment.total0",
+ "EventCode": "0x00",
+ "BriefDescription": "Total number of fp uOps on pipe 0.",
+ "PublicDescription": "The number of operations (uOps) dispatched to each of the 4 FPU execution pipelines. This event reflects how busy the FPU pipelines are and may be used for workload characterization. This includes all operations performed by x87, MMX, and SSE instructions, including moves. Each increment represents a one- cycle dispatch event. This event is a speculative event. Since this event includes non-numeric operations it is not suitable for measuring MFLOPS. Total number uOps assigned to pipe 0.",
+ "UMask": "0x1"
+ },
+ {
+ "EventName": "fp_ret_sse_avx_ops.all",
+ "EventCode": "0x03",
+ "BriefDescription": "All FLOPS. This is a retire-based event. The number of retired SSE/AVX FLOPS. The number of events logged per cycle can vary from 0 to 64. This event can count above 15.",
+ "UMask": "0xff"
+ },
+ {
+ "EventName": "fp_ret_sse_avx_ops.mac_flops",
+ "EventCode": "0x03",
+ "BriefDescription": "Multiply-add FLOPS. Multiply-add counts as 2 FLOPS. This is a retire-based event. The number of retired SSE/AVX FLOPS. The number of events logged per cycle can vary from 0 to 64. This event can count above 15.",
+ "PublicDescription": "",
+ "UMask": "0x8"
+ },
+ {
+ "EventName": "fp_ret_sse_avx_ops.div_flops",
+ "EventCode": "0x03",
+ "BriefDescription": "Divide/square root FLOPS. This is a retire-based event. The number of retired SSE/AVX FLOPS. The number of events logged per cycle can vary from 0 to 64. This event can count above 15.",
+ "UMask": "0x4"
+ },
+ {
+ "EventName": "fp_ret_sse_avx_ops.mult_flops",
+ "EventCode": "0x03",
+ "BriefDescription": "Multiply FLOPS. This is a retire-based event. The number of retired SSE/AVX FLOPS. The number of events logged per cycle can vary from 0 to 64. This event can count above 15.",
+ "UMask": "0x2"
+ },
+ {
+ "EventName": "fp_ret_sse_avx_ops.add_sub_flops",
+ "EventCode": "0x03",
+ "BriefDescription": "Add/subtract FLOPS. This is a retire-based event. The number of retired SSE/AVX FLOPS. The number of events logged per cycle can vary from 0 to 64. This event can count above 15.",
+ "UMask": "0x1"
+ },
+ {
+ "EventName": "fp_num_mov_elim_scal_op.optimized",
+ "EventCode": "0x04",
+ "BriefDescription": "Number of Scalar Ops optimized. This is a dispatch based speculative event, and is useful for measuring the effectiveness of the Move elimination and Scalar code optimization schemes.",
+ "UMask": "0x8"
+ },
+ {
+ "EventName": "fp_num_mov_elim_scal_op.opt_potential",
+ "EventCode": "0x04",
+ "BriefDescription": "Number of Ops that are candidates for optimization (have Z-bit either set or pass). This is a dispatch based speculative event, and is useful for measuring the effectiveness of the Move elimination and Scalar code optimization schemes.",
+ "UMask": "0x4"
+ },
+ {
+ "EventName": "fp_num_mov_elim_scal_op.sse_mov_ops_elim",
+ "EventCode": "0x04",
+ "BriefDescription": "Number of SSE Move Ops eliminated. This is a dispatch based speculative event, and is useful for measuring the effectiveness of the Move elimination and Scalar code optimization schemes.",
+ "UMask": "0x2"
+ },
+ {
+ "EventName": "fp_num_mov_elim_scal_op.sse_mov_ops",
+ "EventCode": "0x04",
+ "BriefDescription": "Number of SSE Move Ops. This is a dispatch based speculative event, and is useful for measuring the effectiveness of the Move elimination and Scalar code optimization schemes.",
+ "UMask": "0x1"
+ },
+ {
+ "EventName": "fp_retired_ser_ops.sse_bot_ret",
+ "EventCode": "0x05",
+ "BriefDescription": "SSE bottom-executing uOps retired. The number of serializing Ops retired.",
+ "UMask": "0x8"
+ },
+ {
+ "EventName": "fp_retired_ser_ops.sse_ctrl_ret",
+ "EventCode": "0x05",
+ "BriefDescription": "The number of serializing Ops retired. SSE control word mispredict traps due to mispredictions in RC, FTZ or DAZ, or changes in mask bits.",
+ "UMask": "0x4"
+ },
+ {
+ "EventName": "fp_retired_ser_ops.x87_bot_ret",
+ "EventCode": "0x05",
+ "BriefDescription": "x87 bottom-executing uOps retired. The number of serializing Ops retired.",
+ "UMask": "0x2"
+ },
+ {
+ "EventName": "fp_retired_ser_ops.x87_ctrl_ret",
+ "EventCode": "0x05",
+ "BriefDescription": "x87 control word mispredict traps due to mispredictions in RC or PC, or changes in mask bits. The number of serializing Ops retired.",
+ "UMask": "0x1"
+ },
+ {
+ "EventName": "fp_disp_faults.ymm_spill_fault",
+ "EventCode": "0x0e",
+ "BriefDescription": "Floating Point Dispatch Faults. YMM spill fault.",
+ "UMask": "0x8"
+ },
+ {
+ "EventName": "fp_disp_faults.ymm_fill_fault",
+ "EventCode": "0x0e",
+ "BriefDescription": "Floating Point Dispatch Faults. YMM fill fault.",
+ "UMask": "0x4"
+ },
+ {
+ "EventName": "fp_disp_faults.xmm_fill_fault",
+ "EventCode": "0x0e",
+ "BriefDescription": "Floating Point Dispatch Faults. XMM fill fault.",
+ "UMask": "0x2"
+ },
+ {
+ "EventName": "fp_disp_faults.x87_fill_fault",
+ "EventCode": "0x0e",
+ "BriefDescription": "Floating Point Dispatch Faults. x87 fill fault.",
+ "UMask": "0x1"
+ }
+]
diff --git a/tools/perf/pmu-events/arch/x86/amdzen2/memory.json b/tools/perf/pmu-events/arch/x86/amdzen2/memory.json
new file mode 100644
index 000000000000..715046b339cb
--- /dev/null
+++ b/tools/perf/pmu-events/arch/x86/amdzen2/memory.json
@@ -0,0 +1,341 @@
+[
+ {
+ "EventName": "ls_bad_status2.stli_other",
+ "EventCode": "0x24",
+ "BriefDescription": "Non-forwardable conflict; used to reduce STLI's via software. All reasons. Store To Load Interlock (STLI) are loads that were unable to complete because of a possible match with an older store, and the older store could not do STLF for some reason.",
+ "PublicDescription" : "Store-to-load conflicts: A load was unable to complete due to a non-forwardable conflict with an older store. Most commonly, a load's address range partially but not completely overlaps with an uncompleted older store. Software can avoid this problem by using same-size and same-alignment loads and stores when accessing the same data. Vector/SIMD code is particularly susceptible to this problem; software should construct wide vector stores by manipulating vector elements in registers using shuffle/blend/swap instructions prior to storing to memory, instead of using narrow element-by-element stores.",
+ "UMask": "0x2"
+ },
+ {
+ "EventName": "ls_locks.spec_lock_hi_spec",
+ "EventCode": "0x25",
+ "BriefDescription": "Retired lock instructions. High speculative cacheable lock speculation succeeded.",
+ "UMask": "0x8"
+ },
+ {
+ "EventName": "ls_locks.spec_lock_lo_spec",
+ "EventCode": "0x25",
+ "BriefDescription": "Retired lock instructions. Low speculative cacheable lock speculation succeeded.",
+ "UMask": "0x4"
+ },
+ {
+ "EventName": "ls_locks.non_spec_lock",
+ "EventCode": "0x25",
+ "BriefDescription": "Retired lock instructions. Non-speculative lock succeeded.",
+ "UMask": "0x2"
+ },
+ {
+ "EventName": "ls_locks.bus_lock",
+ "EventCode": "0x25",
+ "BriefDescription": "Retired lock instructions. Bus lock when a locked operations crosses a cache boundary or is done on an uncacheable memory type. Comparable to legacy bus lock.",
+ "UMask": "0x1"
+ },
+ {
+ "EventName": "ls_ret_cl_flush",
+ "EventCode": "0x26",
+ "BriefDescription": "Number of retired CLFLUSH instructions."
+ },
+ {
+ "EventName": "ls_ret_cpuid",
+ "EventCode": "0x27",
+ "BriefDescription": "Number of retired CPUID instructions."
+ },
+ {
+ "EventName": "ls_dispatch.ld_st_dispatch",
+ "EventCode": "0x29",
+ "BriefDescription": "Dispatch of a single op that performs a load from and store to the same memory address. Number of single ops that do load/store to an address.",
+ "UMask": "0x4"
+ },
+ {
+ "EventName": "ls_dispatch.store_dispatch",
+ "EventCode": "0x29",
+ "BriefDescription": "Number of stores dispatched. Counts the number of operations dispatched to the LS unit. Unit Masks ADDed.",
+ "UMask": "0x2"
+ },
+ {
+ "EventName": "ls_dispatch.ld_dispatch",
+ "EventCode": "0x29",
+ "BriefDescription": "Number of loads dispatched. Counts the number of operations dispatched to the LS unit. Unit Masks ADDed.",
+ "UMask": "0x1"
+ },
+ {
+ "EventName": "ls_smi_rx",
+ "EventCode": "0x2B",
+ "BriefDescription": "Number of SMIs received."
+ },
+ {
+ "EventName": "ls_int_taken",
+ "EventCode": "0x2C",
+ "BriefDescription": "Number of interrupts taken."
+ },
+ {
+ "EventName": "ls_rdtsc",
+ "EventCode": "0x2D",
+ "BriefDescription": "Number of reads of the TSC (RDTSC instructions). The count is speculative."
+ },
+ {
+ "EventName": "ls_stlf",
+ "EventCode": "0x35",
+ "BriefDescription": "Number of STLF hits."
+ },
+ {
+ "EventName": "ls_st_commit_cancel2.st_commit_cancel_wcb_full",
+ "EventCode": "0x37",
+ "BriefDescription": "A non-cacheable store and the non-cacheable commit buffer is full."
+ },
+ {
+ "EventName": "ls_dc_accesses",
+ "EventCode": "0x40",
+ "BriefDescription": "Number of accesses to the dcache for load/store references.",
+ "PublicDescription": "The number of accesses to the data cache for load and store references. This may include certain microcode scratchpad accesses, although these are generally rare. Each increment represents an eight-byte access, although the instruction may only be accessing a portion of that. This event is a speculative event."
+ },
+ {
+ "EventName": "ls_mab_alloc.dc_prefetcher",
+ "EventCode": "0x41",
+ "BriefDescription": "LS MAB Allocates by Type. DC prefetcher.",
+ "UMask": "0x8"
+ },
+ {
+ "EventName": "ls_mab_alloc.stores",
+ "EventCode": "0x41",
+ "BriefDescription": "LS MAB Allocates by Type. Stores.",
+ "UMask": "0x2"
+ },
+ {
+ "EventName": "ls_mab_alloc.loads",
+ "EventCode": "0x41",
+ "BriefDescription": "LS MAB Allocates by Type. Loads.",
+ "UMask": "0x1"
+ },
+ {
+ "EventName": "ls_refills_from_sys.ls_mabresp_rmt_dram",
+ "EventCode": "0x43",
+ "BriefDescription": "Demand Data Cache Fills by Data Source. DRAM or IO from different die.",
+ "UMask": "0x40"
+ },
+ {
+ "EventName": "ls_refills_from_sys.ls_mabresp_rmt_cache",
+ "EventCode": "0x43",
+ "BriefDescription": "Demand Data Cache Fills by Data Source. Hit in cache; Remote CCX and the address's Home Node is on a different die.",
+ "UMask": "0x10"
+ },
+ {
+ "EventName": "ls_refills_from_sys.ls_mabresp_lcl_dram",
+ "EventCode": "0x43",
+ "BriefDescription": "Demand Data Cache Fills by Data Source. DRAM or IO from this thread's die.",
+ "UMask": "0x8"
+ },
+ {
+ "EventName": "ls_refills_from_sys.ls_mabresp_lcl_cache",
+ "EventCode": "0x43",
+ "BriefDescription": "Demand Data Cache Fills by Data Source. Hit in cache; local CCX (not Local L2), or Remote CCX and the address's Home Node is on this thread's die.",
+ "UMask": "0x2"
+ },
+ {
+ "EventName": "ls_refills_from_sys.ls_mabresp_lcl_l2",
+ "EventCode": "0x43",
+ "BriefDescription": "Demand Data Cache Fills by Data Source. Local L2 hit.",
+ "UMask": "0x1"
+ },
+ {
+ "EventName": "ls_l1_d_tlb_miss.all",
+ "EventCode": "0x45",
+ "BriefDescription": "All L1 DTLB Misses or Reloads.",
+ "UMask": "0xff"
+ },
+ {
+ "EventName": "ls_l1_d_tlb_miss.tlb_reload_1g_l2_miss",
+ "EventCode": "0x45",
+ "BriefDescription": "L1 DTLB Miss. DTLB reload to a 1G page that miss in the L2 TLB.",
+ "UMask": "0x80"
+ },
+ {
+ "EventName": "ls_l1_d_tlb_miss.tlb_reload_2m_l2_miss",
+ "EventCode": "0x45",
+ "BriefDescription": "L1 DTLB Miss. DTLB reload to a 2M page that miss in the L2 TLB.",
+ "UMask": "0x40"
+ },
+ {
+ "EventName": "ls_l1_d_tlb_miss.tlb_reload_coalesced_page_miss",
+ "EventCode": "0x45",
+ "BriefDescription": "L1 DTLB Miss. DTLB reload coalesced page miss.",
+ "UMask": "0x20"
+ },
+ {
+ "EventName": "ls_l1_d_tlb_miss.tlb_reload_4k_l2_miss",
+ "EventCode": "0x45",
+ "BriefDescription": "L1 DTLB Miss. DTLB reload to a 4K page that miss the L2 TLB.",
+ "UMask": "0x10"
+ },
+ {
+ "EventName": "ls_l1_d_tlb_miss.tlb_reload_1g_l2_hit",
+ "EventCode": "0x45",
+ "BriefDescription": "L1 DTLB Miss. DTLB reload to a 1G page that hit in the L2 TLB.",
+ "UMask": "0x8"
+ },
+ {
+ "EventName": "ls_l1_d_tlb_miss.tlb_reload_2m_l2_hit",
+ "EventCode": "0x45",
+ "BriefDescription": "L1 DTLB Miss. DTLB reload to a 2M page that hit in the L2 TLB.",
+ "UMask": "0x4"
+ },
+ {
+ "EventName": "ls_l1_d_tlb_miss.tlb_reload_coalesced_page_hit",
+ "EventCode": "0x45",
+ "BriefDescription": "L1 DTLB Miss. DTLB reload hit a coalesced page.",
+ "UMask": "0x2"
+ },
+ {
+ "EventName": "ls_l1_d_tlb_miss.tlb_reload_4k_l2_hit",
+ "EventCode": "0x45",
+ "BriefDescription": "L1 DTLB Miss. DTLB reload to a 4K page that hit in the L2 TLB.",
+ "UMask": "0x1"
+ },
+ {
+ "EventName": "ls_tablewalker.iside",
+ "EventCode": "0x46",
+ "BriefDescription": "Total Page Table Walks on I-side.",
+ "UMask": "0xc"
+ },
+ {
+ "EventName": "ls_tablewalker.ic_type1",
+ "EventCode": "0x46",
+ "BriefDescription": "Total Page Table Walks IC Type 1.",
+ "UMask": "0x8"
+ },
+ {
+ "EventName": "ls_tablewalker.ic_type0",
+ "EventCode": "0x46",
+ "BriefDescription": "Total Page Table Walks IC Type 0.",
+ "UMask": "0x4"
+ },
+ {
+ "EventName": "ls_tablewalker.dside",
+ "EventCode": "0x46",
+ "BriefDescription": "Total Page Table Walks on D-side.",
+ "UMask": "0x3"
+ },
+ {
+ "EventName": "ls_tablewalker.dc_type1",
+ "EventCode": "0x46",
+ "BriefDescription": "Total Page Table Walks DC Type 1.",
+ "UMask": "0x2"
+ },
+ {
+ "EventName": "ls_tablewalker.dc_type0",
+ "EventCode": "0x46",
+ "BriefDescription": "Total Page Table Walks DC Type 0.",
+ "UMask": "0x1"
+ },
+ {
+ "EventName": "ls_misal_accesses",
+ "EventCode": "0x47",
+ "BriefDescription": "Misaligned loads."
+ },
+ {
+ "EventName": "ls_pref_instr_disp",
+ "EventCode": "0x4b",
+ "BriefDescription": "Software Prefetch Instructions Dispatched (Speculative).",
+ "UMask": "0xff"
+ },
+ {
+ "EventName": "ls_pref_instr_disp.prefetch_nta",
+ "EventCode": "0x4b",
+ "BriefDescription": "Software Prefetch Instructions Dispatched (Speculative). PrefetchNTA instruction. See docAPM3 PREFETCHlevel.",
+ "UMask": "0x4"
+ },
+ {
+ "EventName": "ls_pref_instr_disp.prefetch_w",
+ "EventCode": "0x4b",
+ "BriefDescription": "Software Prefetch Instructions Dispatched (Speculative). See docAPM3 PREFETCHW.",
+ "UMask": "0x2"
+ },
+ {
+ "EventName": "ls_pref_instr_disp.prefetch",
+ "EventCode": "0x4b",
+ "BriefDescription": "Software Prefetch Instructions Dispatched (Speculative). Prefetch_T0_T1_T2. PrefetchT0, T1 and T2 instructions. See docAPM3 PREFETCHlevel.",
+ "UMask": "0x1"
+ },
+ {
+ "EventName": "ls_inef_sw_pref.mab_mch_cnt",
+ "EventCode": "0x52",
+ "BriefDescription": "The number of software prefetches that did not fetch data outside of the processor core. Software PREFETCH instruction saw a match on an already-allocated miss request buffer.",
+ "UMask": "0x2"
+ },
+ {
+ "EventName": "ls_inef_sw_pref.data_pipe_sw_pf_dc_hit",
+ "EventCode": "0x52",
+ "BriefDescription": "The number of software prefetches that did not fetch data outside of the processor core. Software PREFETCH instruction saw a DC hit.",
+ "UMask": "0x1"
+ },
+ {
+ "EventName": "ls_sw_pf_dc_fill.ls_mabresp_rmt_dram",
+ "EventCode": "0x59",
+ "BriefDescription": "Software Prefetch Data Cache Fills by Data Source. From DRAM (home node remote).",
+ "UMask": "0x40"
+ },
+ {
+ "EventName": "ls_sw_pf_dc_fill.ls_mabresp_rmt_cache",
+ "EventCode": "0x59",
+ "BriefDescription": "Software Prefetch Data Cache Fills by Data Source. From another cache (home node remote).",
+ "UMask": "0x10"
+ },
+ {
+ "EventName": "ls_sw_pf_dc_fill.ls_mabresp_lcl_dram",
+ "EventCode": "0x59",
+ "BriefDescription": "Software Prefetch Data Cache Fills by Data Source. DRAM or IO from this thread's die. From DRAM (home node local).",
+ "UMask": "0x8"
+ },
+ {
+ "EventName": "ls_sw_pf_dc_fill.ls_mabresp_lcl_cache",
+ "EventCode": "0x59",
+ "BriefDescription": "Software Prefetch Data Cache Fills by Data Source. From another cache (home node local).",
+ "UMask": "0x2"
+ },
+ {
+ "EventName": "ls_sw_pf_dc_fill.ls_mabresp_lcl_l2",
+ "EventCode": "0x59",
+ "BriefDescription": "Software Prefetch Data Cache Fills by Data Source. Local L2 hit.",
+ "UMask": "0x1"
+ },
+ {
+ "EventName": "ls_hw_pf_dc_fill.ls_mabresp_rmt_dram",
+ "EventCode": "0x5A",
+ "BriefDescription": "Hardware Prefetch Data Cache Fills by Data Source. From DRAM (home node remote).",
+ "UMask": "0x40"
+ },
+ {
+ "EventName": "ls_hw_pf_dc_fill.ls_mabresp_rmt_cache",
+ "EventCode": "0x5A",
+ "BriefDescription": "Hardware Prefetch Data Cache Fills by Data Source. From another cache (home node remote).",
+ "UMask": "0x10"
+ },
+ {
+ "EventName": "ls_hw_pf_dc_fill.ls_mabresp_lcl_dram",
+ "EventCode": "0x5A",
+ "BriefDescription": "Hardware Prefetch Data Cache Fills by Data Source. From DRAM (home node local).",
+ "UMask": "0x8"
+ },
+ {
+ "EventName": "ls_hw_pf_dc_fill.ls_mabresp_lcl_cache",
+ "EventCode": "0x5A",
+ "BriefDescription": "Hardware Prefetch Data Cache Fills by Data Source. From another cache (home node local).",
+ "UMask": "0x2"
+ },
+ {
+ "EventName": "ls_hw_pf_dc_fill.ls_mabresp_lcl_l2",
+ "EventCode": "0x5A",
+ "BriefDescription": "Hardware Prefetch Data Cache Fills by Data Source. Local L2 hit.",
+ "UMask": "0x1"
+ },
+ {
+ "EventName": "ls_not_halted_cyc",
+ "EventCode": "0x76",
+ "BriefDescription": "Cycles not in Halt."
+ },
+ {
+ "EventName": "ls_tlb_flush",
+ "EventCode": "0x78",
+ "BriefDescription": "All TLB Flushes"
+ }
+]
diff --git a/tools/perf/pmu-events/arch/x86/amdzen2/other.json b/tools/perf/pmu-events/arch/x86/amdzen2/other.json
new file mode 100644
index 000000000000..e94994d4a60e
--- /dev/null
+++ b/tools/perf/pmu-events/arch/x86/amdzen2/other.json
@@ -0,0 +1,115 @@
+[
+ {
+ "EventName": "de_dis_uop_queue_empty_di0",
+ "EventCode": "0xa9",
+ "BriefDescription": "Cycles where the Micro-Op Queue is empty."
+ },
+ {
+ "EventName": "de_dis_uops_from_decoder",
+ "EventCode": "0xaa",
+ "BriefDescription": "Ops dispatched from either the decoders, OpCache or both.",
+ "UMask": "0xff"
+ },
+ {
+ "EventName": "de_dis_uops_from_decoder.opcache_dispatched",
+ "EventCode": "0xaa",
+ "BriefDescription": "Count of dispatched Ops from OpCache.",
+ "UMask": "0x2"
+ },
+ {
+ "EventName": "de_dis_uops_from_decoder.decoder_dispatched",
+ "EventCode": "0xaa",
+ "BriefDescription": "Count of dispatched Ops from Decoder.",
+ "UMask": "0x1"
+ },
+ {
+ "EventName": "de_dis_dispatch_token_stalls1.fp_misc_rsrc_stall",
+ "EventCode": "0xae",
+ "BriefDescription": "Cycles where a dispatch group is valid but does not get dispatched due to a token stall. FP Miscellaneous resource unavailable. Applies to the recovery of mispredicts with FP ops.",
+ "UMask": "0x80"
+ },
+ {
+ "EventName": "de_dis_dispatch_token_stalls1.fp_sch_rsrc_stall",
+ "EventCode": "0xae",
+ "BriefDescription": "Cycles where a dispatch group is valid but does not get dispatched due to a token stall. FP scheduler resource stall. Applies to ops that use the FP scheduler.",
+ "UMask": "0x40"
+ },
+ {
+ "EventName": "de_dis_dispatch_token_stalls1.fp_reg_file_rsrc_stall",
+ "EventCode": "0xae",
+ "BriefDescription": "Cycles where a dispatch group is valid but does not get dispatched due to a token stall. Floating point register file resource stall. Applies to all FP ops that have a destination register.",
+ "UMask": "0x20"
+ },
+ {
+ "EventName": "de_dis_dispatch_token_stalls1.taken_branch_buffer_rsrc_stall",
+ "EventCode": "0xae",
+ "BriefDescription": "Cycles where a dispatch group is valid but does not get dispatched due to a token stall. Taken branch buffer resource stall.",
+ "UMask": "0x10"
+ },
+ {
+ "EventName": "de_dis_dispatch_token_stalls1.int_sched_misc_token_stall",
+ "EventCode": "0xae",
+ "BriefDescription": "Cycles where a dispatch group is valid but does not get dispatched due to a token stall. Integer Scheduler miscellaneous resource stall.",
+ "UMask": "0x8"
+ },
+ {
+ "EventName": "de_dis_dispatch_token_stalls1.store_queue_token_stall",
+ "EventCode": "0xae",
+ "BriefDescription": "Cycles where a dispatch group is valid but does not get dispatched due to a token stall. Store queue resource stall. Applies to all ops with store semantics.",
+ "UMask": "0x4"
+ },
+ {
+ "EventName": "de_dis_dispatch_token_stalls1.load_queue_token_stall",
+ "EventCode": "0xae",
+ "BriefDescription": "Cycles where a dispatch group is valid but does not get dispatched due to a token stall. Load queue resource stall. Applies to all ops with load semantics.",
+ "UMask": "0x2"
+ },
+ {
+ "EventName": "de_dis_dispatch_token_stalls1.int_phy_reg_file_token_stall",
+ "EventCode": "0xae",
+ "BriefDescription": "Cycles where a dispatch group is valid but does not get dispatched due to a token stall. Integer Physical Register File resource stall. Applies to all ops that have an integer destination register.",
+ "UMask": "0x1"
+ },
+ {
+ "EventName": "de_dis_dispatch_token_stalls0.sc_agu_dispatch_stall",
+ "EventCode": "0xaf",
+ "BriefDescription": "Cycles where a dispatch group is valid but does not get dispatched due to a token stall. SC AGU dispatch stall.",
+ "UMask": "0x40"
+ },
+ {
+ "EventName": "de_dis_dispatch_token_stalls0.retire_token_stall",
+ "EventCode": "0xaf",
+ "BriefDescription": "Cycles where a dispatch group is valid but does not get dispatched due to a token stall. RETIRE Tokens unavailable.",
+ "UMask": "0x20"
+ },
+ {
+ "EventName": "de_dis_dispatch_token_stalls0.agsq_token_stall",
+ "EventCode": "0xaf",
+ "BriefDescription": "Cycles where a dispatch group is valid but does not get dispatched due to a token stall. AGSQ Tokens unavailable.",
+ "UMask": "0x10"
+ },
+ {
+ "EventName": "de_dis_dispatch_token_stalls0.alu_token_stall",
+ "EventCode": "0xaf",
+ "BriefDescription": "Cycles where a dispatch group is valid but does not get dispatched due to a token stall. ALU tokens total unavailable.",
+ "UMask": "0x8"
+ },
+ {
+ "EventName": "de_dis_dispatch_token_stalls0.alsq3_0_token_stall",
+ "EventCode": "0xaf",
+ "BriefDescription": "Cycles where a dispatch group is valid but does not get dispatched due to a token stall. ALSQ3_0_TokenStall.",
+ "UMask": "0x4"
+ },
+ {
+ "EventName": "de_dis_dispatch_token_stalls0.alsq2_token_stall",
+ "EventCode": "0xaf",
+ "BriefDescription": "Cycles where a dispatch group is valid but does not get dispatched due to a token stall. ALSQ 2 Tokens unavailable.",
+ "UMask": "0x2"
+ },
+ {
+ "EventName": "de_dis_dispatch_token_stalls0.alsq1_token_stall",
+ "EventCode": "0xaf",
+ "BriefDescription": "Cycles where a dispatch group is valid but does not get dispatched due to a token stall. ALSQ 1 Tokens unavailable.",
+ "UMask": "0x1"
+ }
+]
diff --git a/tools/perf/pmu-events/arch/x86/broadwell/bdw-metrics.json b/tools/perf/pmu-events/arch/x86/broadwell/bdw-metrics.json
index 45a34ce4fe89..8cdc7c13dc2a 100644
--- a/tools/perf/pmu-events/arch/x86/broadwell/bdw-metrics.json
+++ b/tools/perf/pmu-events/arch/x86/broadwell/bdw-metrics.json
@@ -297,7 +297,7 @@
},
{
"BriefDescription": "Fraction of cycles spent in Kernel mode",
- "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC:k / CPU_CLK_UNHALTED.REF_TSC",
+ "MetricExpr": "CPU_CLK_UNHALTED.THREAD:k / CPU_CLK_UNHALTED.THREAD",
"MetricGroup": "Summary",
"MetricName": "Kernel_Utilization"
},
diff --git a/tools/perf/pmu-events/arch/x86/broadwellde/bdwde-metrics.json b/tools/perf/pmu-events/arch/x86/broadwellde/bdwde-metrics.json
index 961fe4395758..16fd8a7490fc 100644
--- a/tools/perf/pmu-events/arch/x86/broadwellde/bdwde-metrics.json
+++ b/tools/perf/pmu-events/arch/x86/broadwellde/bdwde-metrics.json
@@ -115,7 +115,7 @@
},
{
"BriefDescription": "Fraction of cycles spent in Kernel mode",
- "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC:k / CPU_CLK_UNHALTED.REF_TSC",
+ "MetricExpr": "CPU_CLK_UNHALTED.THREAD:k / CPU_CLK_UNHALTED.THREAD",
"MetricGroup": "Summary",
"MetricName": "Kernel_Utilization"
},
diff --git a/tools/perf/pmu-events/arch/x86/broadwellx/bdx-metrics.json b/tools/perf/pmu-events/arch/x86/broadwellx/bdx-metrics.json
index 746734ce09be..1eb0415fa11a 100644
--- a/tools/perf/pmu-events/arch/x86/broadwellx/bdx-metrics.json
+++ b/tools/perf/pmu-events/arch/x86/broadwellx/bdx-metrics.json
@@ -297,7 +297,7 @@
},
{
"BriefDescription": "Fraction of cycles spent in Kernel mode",
- "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC:k / CPU_CLK_UNHALTED.REF_TSC",
+ "MetricExpr": "CPU_CLK_UNHALTED.THREAD:k / CPU_CLK_UNHALTED.THREAD",
"MetricGroup": "Summary",
"MetricName": "Kernel_Utilization"
},
diff --git a/tools/perf/pmu-events/arch/x86/cascadelakex/clx-metrics.json b/tools/perf/pmu-events/arch/x86/cascadelakex/clx-metrics.json
index f94653229dd4..d25eebce34c9 100644
--- a/tools/perf/pmu-events/arch/x86/cascadelakex/clx-metrics.json
+++ b/tools/perf/pmu-events/arch/x86/cascadelakex/clx-metrics.json
@@ -215,7 +215,8 @@
"BriefDescription": "Utilization of the core's Page Walker(s) serving STLB misses triggered by instruction/Load/Store accesses",
"MetricExpr": "( ITLB_MISSES.WALK_PENDING + DTLB_LOAD_MISSES.WALK_PENDING + DTLB_STORE_MISSES.WALK_PENDING + EPT.WALK_PENDING ) / ( 2 * cycles )",
"MetricGroup": "TLB",
- "MetricName": "Page_Walks_Utilization"
+ "MetricName": "Page_Walks_Utilization",
+ "MetricConstraint": "NO_NMI_WATCHDOG"
},
{
"BriefDescription": "Utilization of the core's Page Walker(s) serving STLB misses triggered by instruction/Load/Store accesses",
@@ -315,7 +316,7 @@
},
{
"BriefDescription": "Fraction of cycles spent in Kernel mode",
- "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC:k / CPU_CLK_UNHALTED.REF_TSC",
+ "MetricExpr": "CPU_CLK_UNHALTED.THREAD:k / CPU_CLK_UNHALTED.THREAD",
"MetricGroup": "Summary",
"MetricName": "Kernel_Utilization"
},
@@ -327,31 +328,31 @@
},
{
"BriefDescription": "Average latency of data read request to external memory (in nanoseconds). Accounts for demand loads and L1/L2 prefetches",
- "MetricExpr": "1000000000 * ( cha@event\\=0x36\\\\\\,umask\\=0x21@ / cha@event\\=0x35\\\\\\,umask\\=0x21@ ) / ( cha_0@event\\=0x0@ / duration_time )",
+ "MetricExpr": "1000000000 * ( cha@event\\=0x36\\,umask\\=0x21@ / cha@event\\=0x35\\,umask\\=0x21@ ) / ( cha_0@event\\=0x0@ / duration_time )",
"MetricGroup": "Memory_Lat",
"MetricName": "DRAM_Read_Latency"
},
{
"BriefDescription": "Average number of parallel data read requests to external memory. Accounts for demand loads and L1/L2 prefetches",
- "MetricExpr": "cha@event\\=0x36\\\\\\,umask\\=0x21@ / cha@event\\=0x36\\\\\\,umask\\=0x21\\\\\\,thresh\\=1@",
+ "MetricExpr": "cha@event\\=0x36\\,umask\\=0x21@ / cha@event\\=0x36\\,umask\\=0x21\\,thresh\\=1@",
"MetricGroup": "Memory_BW",
"MetricName": "DRAM_Parallel_Reads"
},
{
"BriefDescription": "Average latency of data read request to external 3D X-Point memory [in nanoseconds]. Accounts for demand loads and L1/L2 data-read prefetches",
- "MetricExpr": "( 1000000000 * ( imc@event\\=0xe0\\\\\\,umask\\=0x1@ / imc@event\\=0xe3@ ) / imc_0@event\\=0x0@ ) if 1 if 0 == 1 else 0 else 0",
+ "MetricExpr": "( 1000000000 * ( imc@event\\=0xe0\\,umask\\=0x1@ / imc@event\\=0xe3@ ) / imc_0@event\\=0x0@ )",
"MetricGroup": "Memory_Lat",
"MetricName": "MEM_PMM_Read_Latency"
},
{
"BriefDescription": "Average 3DXP Memory Bandwidth Use for reads [GB / sec]",
- "MetricExpr": "( ( 64 * imc@event\\=0xe3@ / 1000000000 ) / duration_time ) if 1 if 0 == 1 else 0 else 0",
+ "MetricExpr": "( ( 64 * imc@event\\=0xe3@ / 1000000000 ) / duration_time )",
"MetricGroup": "Memory_BW",
"MetricName": "PMM_Read_BW"
},
{
"BriefDescription": "Average 3DXP Memory Bandwidth Use for Writes [GB / sec]",
- "MetricExpr": "( ( 64 * imc@event\\=0xe7@ / 1000000000 ) / duration_time ) if 1 if 0 == 1 else 0 else 0",
+ "MetricExpr": "( ( 64 * imc@event\\=0xe7@ / 1000000000 ) / duration_time )",
"MetricGroup": "Memory_BW",
"MetricName": "PMM_Write_BW"
},
diff --git a/tools/perf/pmu-events/arch/x86/haswell/hsw-metrics.json b/tools/perf/pmu-events/arch/x86/haswell/hsw-metrics.json
index 5402cd3120f9..f57c5f3506c2 100644
--- a/tools/perf/pmu-events/arch/x86/haswell/hsw-metrics.json
+++ b/tools/perf/pmu-events/arch/x86/haswell/hsw-metrics.json
@@ -267,7 +267,7 @@
},
{
"BriefDescription": "Fraction of cycles spent in Kernel mode",
- "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC:k / CPU_CLK_UNHALTED.REF_TSC",
+ "MetricExpr": "CPU_CLK_UNHALTED.THREAD:k / CPU_CLK_UNHALTED.THREAD",
"MetricGroup": "Summary",
"MetricName": "Kernel_Utilization"
},
diff --git a/tools/perf/pmu-events/arch/x86/haswellx/hsx-metrics.json b/tools/perf/pmu-events/arch/x86/haswellx/hsx-metrics.json
index 832f3cb40b34..311a005dc35b 100644
--- a/tools/perf/pmu-events/arch/x86/haswellx/hsx-metrics.json
+++ b/tools/perf/pmu-events/arch/x86/haswellx/hsx-metrics.json
@@ -267,7 +267,7 @@
},
{
"BriefDescription": "Fraction of cycles spent in Kernel mode",
- "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC:k / CPU_CLK_UNHALTED.REF_TSC",
+ "MetricExpr": "CPU_CLK_UNHALTED.THREAD:k / CPU_CLK_UNHALTED.THREAD",
"MetricGroup": "Summary",
"MetricName": "Kernel_Utilization"
},
diff --git a/tools/perf/pmu-events/arch/x86/ivybridge/ivb-metrics.json b/tools/perf/pmu-events/arch/x86/ivybridge/ivb-metrics.json
index d69b2a8fc0bc..28e25447d3ef 100644
--- a/tools/perf/pmu-events/arch/x86/ivybridge/ivb-metrics.json
+++ b/tools/perf/pmu-events/arch/x86/ivybridge/ivb-metrics.json
@@ -285,7 +285,7 @@
},
{
"BriefDescription": "Fraction of cycles spent in Kernel mode",
- "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC:k / CPU_CLK_UNHALTED.REF_TSC",
+ "MetricExpr": "CPU_CLK_UNHALTED.THREAD:k / CPU_CLK_UNHALTED.THREAD",
"MetricGroup": "Summary",
"MetricName": "Kernel_Utilization"
},
diff --git a/tools/perf/pmu-events/arch/x86/ivytown/ivt-metrics.json b/tools/perf/pmu-events/arch/x86/ivytown/ivt-metrics.json
index 5f465fd81315..db23db2e98be 100644
--- a/tools/perf/pmu-events/arch/x86/ivytown/ivt-metrics.json
+++ b/tools/perf/pmu-events/arch/x86/ivytown/ivt-metrics.json
@@ -285,7 +285,7 @@
},
{
"BriefDescription": "Fraction of cycles spent in Kernel mode",
- "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC:k / CPU_CLK_UNHALTED.REF_TSC",
+ "MetricExpr": "CPU_CLK_UNHALTED.THREAD:k / CPU_CLK_UNHALTED.THREAD",
"MetricGroup": "Summary",
"MetricName": "Kernel_Utilization"
},
diff --git a/tools/perf/pmu-events/arch/x86/jaketown/jkt-metrics.json b/tools/perf/pmu-events/arch/x86/jaketown/jkt-metrics.json
index 3e909b306003..dbb33e00b72a 100644
--- a/tools/perf/pmu-events/arch/x86/jaketown/jkt-metrics.json
+++ b/tools/perf/pmu-events/arch/x86/jaketown/jkt-metrics.json
@@ -171,7 +171,7 @@
},
{
"BriefDescription": "Fraction of cycles spent in Kernel mode",
- "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC:k / CPU_CLK_UNHALTED.REF_TSC",
+ "MetricExpr": "CPU_CLK_UNHALTED.THREAD:k / CPU_CLK_UNHALTED.THREAD",
"MetricGroup": "Summary",
"MetricName": "Kernel_Utilization"
},
diff --git a/tools/perf/pmu-events/arch/x86/mapfile.csv b/tools/perf/pmu-events/arch/x86/mapfile.csv
index 745ced083844..25b06cf98747 100644
--- a/tools/perf/pmu-events/arch/x86/mapfile.csv
+++ b/tools/perf/pmu-events/arch/x86/mapfile.csv
@@ -36,4 +36,5 @@ GenuineIntel-6-55-[56789ABCDEF],v1,cascadelakex,core
GenuineIntel-6-7D,v1,icelake,core
GenuineIntel-6-7E,v1,icelake,core
GenuineIntel-6-86,v1,tremontx,core
-AuthenticAMD-23-[[:xdigit:]]+,v1,amdfam17h,core
+AuthenticAMD-23-([12][0-9A-F]|[0-9A-F]),v2,amdzen1,core
+AuthenticAMD-23-[[:xdigit:]]+,v1,amdzen2,core
diff --git a/tools/perf/pmu-events/arch/x86/sandybridge/snb-metrics.json b/tools/perf/pmu-events/arch/x86/sandybridge/snb-metrics.json
index 50c053235752..fb2d7b8875f8 100644
--- a/tools/perf/pmu-events/arch/x86/sandybridge/snb-metrics.json
+++ b/tools/perf/pmu-events/arch/x86/sandybridge/snb-metrics.json
@@ -171,7 +171,7 @@
},
{
"BriefDescription": "Fraction of cycles spent in Kernel mode",
- "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC:k / CPU_CLK_UNHALTED.REF_TSC",
+ "MetricExpr": "CPU_CLK_UNHALTED.THREAD:k / CPU_CLK_UNHALTED.THREAD",
"MetricGroup": "Summary",
"MetricName": "Kernel_Utilization"
},
diff --git a/tools/perf/pmu-events/arch/x86/skylake/skl-metrics.json b/tools/perf/pmu-events/arch/x86/skylake/skl-metrics.json
index e7feb60f9fa9..8704efeb8d31 100644
--- a/tools/perf/pmu-events/arch/x86/skylake/skl-metrics.json
+++ b/tools/perf/pmu-events/arch/x86/skylake/skl-metrics.json
@@ -215,7 +215,8 @@
"BriefDescription": "Utilization of the core's Page Walker(s) serving STLB misses triggered by instruction/Load/Store accesses",
"MetricExpr": "( ITLB_MISSES.WALK_PENDING + DTLB_LOAD_MISSES.WALK_PENDING + DTLB_STORE_MISSES.WALK_PENDING + EPT.WALK_PENDING ) / ( 2 * cycles )",
"MetricGroup": "TLB",
- "MetricName": "Page_Walks_Utilization"
+ "MetricName": "Page_Walks_Utilization",
+ "MetricConstraint": "NO_NMI_WATCHDOG"
},
{
"BriefDescription": "Utilization of the core's Page Walker(s) serving STLB misses triggered by instruction/Load/Store accesses",
@@ -303,7 +304,7 @@
},
{
"BriefDescription": "Fraction of cycles spent in Kernel mode",
- "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC:k / CPU_CLK_UNHALTED.REF_TSC",
+ "MetricExpr": "CPU_CLK_UNHALTED.THREAD:k / CPU_CLK_UNHALTED.THREAD",
"MetricGroup": "Summary",
"MetricName": "Kernel_Utilization"
},
diff --git a/tools/perf/pmu-events/arch/x86/skylakex/skx-metrics.json b/tools/perf/pmu-events/arch/x86/skylakex/skx-metrics.json
index 21d7a0c2c2e8..390bdab1be9d 100644
--- a/tools/perf/pmu-events/arch/x86/skylakex/skx-metrics.json
+++ b/tools/perf/pmu-events/arch/x86/skylakex/skx-metrics.json
@@ -215,7 +215,8 @@
"BriefDescription": "Utilization of the core's Page Walker(s) serving STLB misses triggered by instruction/Load/Store accesses",
"MetricExpr": "( ITLB_MISSES.WALK_PENDING + DTLB_LOAD_MISSES.WALK_PENDING + DTLB_STORE_MISSES.WALK_PENDING + EPT.WALK_PENDING ) / ( 2 * cycles )",
"MetricGroup": "TLB",
- "MetricName": "Page_Walks_Utilization"
+ "MetricName": "Page_Walks_Utilization",
+ "MetricConstraint": "NO_NMI_WATCHDOG"
},
{
"BriefDescription": "Utilization of the core's Page Walker(s) serving STLB misses triggered by instruction/Load/Store accesses",
@@ -315,7 +316,7 @@
},
{
"BriefDescription": "Fraction of cycles spent in Kernel mode",
- "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC:k / CPU_CLK_UNHALTED.REF_TSC",
+ "MetricExpr": "CPU_CLK_UNHALTED.THREAD:k / CPU_CLK_UNHALTED.THREAD",
"MetricGroup": "Summary",
"MetricName": "Kernel_Utilization"
},
@@ -327,13 +328,13 @@
},
{
"BriefDescription": "Average latency of data read request to external memory (in nanoseconds). Accounts for demand loads and L1/L2 prefetches",
- "MetricExpr": "1000000000 * ( cha@event\\=0x36\\\\\\,umask\\=0x21@ / cha@event\\=0x35\\\\\\,umask\\=0x21@ ) / ( cha_0@event\\=0x0@ / duration_time )",
+ "MetricExpr": "1000000000 * ( cha@event\\=0x36\\,umask\\=0x21@ / cha@event\\=0x35\\,umask\\=0x21@ ) / ( cha_0@event\\=0x0@ / duration_time )",
"MetricGroup": "Memory_Lat",
"MetricName": "DRAM_Read_Latency"
},
{
"BriefDescription": "Average number of parallel data read requests to external memory. Accounts for demand loads and L1/L2 prefetches",
- "MetricExpr": "cha@event\\=0x36\\\\\\,umask\\=0x21@ / cha@event\\=0x36\\\\\\,umask\\=0x21\\\\\\,thresh\\=1@",
+ "MetricExpr": "cha@event\\=0x36\\,umask\\=0x21@ / cha@event\\=0x36\\,umask\\=0x21\\,thresh\\=1@",
"MetricGroup": "Memory_BW",
"MetricName": "DRAM_Parallel_Reads"
},
diff --git a/tools/perf/pmu-events/jevents.c b/tools/perf/pmu-events/jevents.c
index 27b4da80f751..fa86c5f997cc 100644
--- a/tools/perf/pmu-events/jevents.c
+++ b/tools/perf/pmu-events/jevents.c
@@ -323,7 +323,7 @@ static int print_events_table_entry(void *data, char *name, char *event,
char *pmu, char *unit, char *perpkg,
char *metric_expr,
char *metric_name, char *metric_group,
- char *deprecated)
+ char *deprecated, char *metric_constraint)
{
struct perf_entry_data *pd = data;
FILE *outfp = pd->outfp;
@@ -357,6 +357,8 @@ static int print_events_table_entry(void *data, char *name, char *event,
fprintf(outfp, "\t.metric_group = \"%s\",\n", metric_group);
if (deprecated)
fprintf(outfp, "\t.deprecated = \"%s\",\n", deprecated);
+ if (metric_constraint)
+ fprintf(outfp, "\t.metric_constraint = \"%s\",\n", metric_constraint);
fprintf(outfp, "},\n");
return 0;
@@ -375,6 +377,7 @@ struct event_struct {
char *metric_name;
char *metric_group;
char *deprecated;
+ char *metric_constraint;
};
#define ADD_EVENT_FIELD(field) do { if (field) { \
@@ -422,7 +425,7 @@ static int save_arch_std_events(void *data, char *name, char *event,
char *desc, char *long_desc, char *pmu,
char *unit, char *perpkg, char *metric_expr,
char *metric_name, char *metric_group,
- char *deprecated)
+ char *deprecated, char *metric_constraint)
{
struct event_struct *es;
@@ -486,7 +489,7 @@ try_fixup(const char *fn, char *arch_std, char **event, char **desc,
char **name, char **long_desc, char **pmu, char **filter,
char **perpkg, char **unit, char **metric_expr, char **metric_name,
char **metric_group, unsigned long long eventcode,
- char **deprecated)
+ char **deprecated, char **metric_constraint)
{
/* try to find matching event from arch standard values */
struct event_struct *es;
@@ -515,7 +518,7 @@ int json_events(const char *fn,
char *pmu, char *unit, char *perpkg,
char *metric_expr,
char *metric_name, char *metric_group,
- char *deprecated),
+ char *deprecated, char *metric_constraint),
void *data)
{
int err;
@@ -545,6 +548,7 @@ int json_events(const char *fn,
char *metric_name = NULL;
char *metric_group = NULL;
char *deprecated = NULL;
+ char *metric_constraint = NULL;
char *arch_std = NULL;
unsigned long long eventcode = 0;
struct msrmap *msr = NULL;
@@ -629,6 +633,8 @@ int json_events(const char *fn,
addfield(map, &metric_name, "", "", val);
} else if (json_streq(map, field, "MetricGroup")) {
addfield(map, &metric_group, "", "", val);
+ } else if (json_streq(map, field, "MetricConstraint")) {
+ addfield(map, &metric_constraint, "", "", val);
} else if (json_streq(map, field, "MetricExpr")) {
addfield(map, &metric_expr, "", "", val);
for (s = metric_expr; *s; s++)
@@ -670,13 +676,13 @@ int json_events(const char *fn,
&long_desc, &pmu, &filter, &perpkg,
&unit, &metric_expr, &metric_name,
&metric_group, eventcode,
- &deprecated);
+ &deprecated, &metric_constraint);
if (err)
goto free_strings;
}
err = func(data, name, real_event(name, event), desc, long_desc,
pmu, unit, perpkg, metric_expr, metric_name,
- metric_group, deprecated);
+ metric_group, deprecated, metric_constraint);
free_strings:
free(event);
free(desc);
@@ -691,6 +697,7 @@ free_strings:
free(metric_expr);
free(metric_name);
free(metric_group);
+ free(metric_constraint);
free(arch_std);
if (err)
@@ -764,6 +771,19 @@ static void print_mapping_table_suffix(FILE *outfp)
fprintf(outfp, "};\n");
}
+static void print_mapping_test_table(FILE *outfp)
+{
+ /*
+ * Print the terminating, NULL entry.
+ */
+ fprintf(outfp, "{\n");
+ fprintf(outfp, "\t.cpuid = \"testcpu\",\n");
+ fprintf(outfp, "\t.version = \"v1\",\n");
+ fprintf(outfp, "\t.type = \"core\",\n");
+ fprintf(outfp, "\t.table = pme_test_cpu,\n");
+ fprintf(outfp, "},\n");
+}
+
static int process_mapfile(FILE *outfp, char *fpath)
{
int n = 16384;
@@ -841,6 +861,7 @@ static int process_mapfile(FILE *outfp, char *fpath)
}
out:
+ print_mapping_test_table(outfp);
print_mapping_table_suffix(outfp);
fclose(mapfp);
free(line);
@@ -1161,6 +1182,22 @@ int main(int argc, char *argv[])
goto empty_map;
}
+ sprintf(ldirname, "%s/test", start_dirname);
+
+ rc = nftw(ldirname, process_one_file, maxfds, 0);
+ if (rc && verbose) {
+ pr_info("%s: Error walking file tree %s rc=%d for test\n",
+ prog, ldirname, rc);
+ goto empty_map;
+ } else if (rc < 0) {
+ /* Make build fail */
+ free_arch_std_events();
+ ret = 1;
+ goto out_free_mapfile;
+ } else if (rc) {
+ goto empty_map;
+ }
+
if (close_table)
print_events_table_suffix(eventsfp);
diff --git a/tools/perf/pmu-events/jevents.h b/tools/perf/pmu-events/jevents.h
index 5cda49a42143..2afc8304529e 100644
--- a/tools/perf/pmu-events/jevents.h
+++ b/tools/perf/pmu-events/jevents.h
@@ -8,7 +8,7 @@ int json_events(const char *fn,
char *pmu,
char *unit, char *perpkg, char *metric_expr,
char *metric_name, char *metric_group,
- char *deprecated),
+ char *deprecated, char *metric_constraint),
void *data);
char *get_cpu_str(void);
diff --git a/tools/perf/pmu-events/jsmn.h b/tools/perf/pmu-events/jsmn.h
index c7b0f6ea2a31..1bdfd55fff30 100644
--- a/tools/perf/pmu-events/jsmn.h
+++ b/tools/perf/pmu-events/jsmn.h
@@ -1,4 +1,4 @@
-/* SPDX-License-Identifier: GPL-2.0 */
+/* SPDX-License-Identifier: MIT */
#ifndef __JSMN_H_
#define __JSMN_H_
diff --git a/tools/perf/pmu-events/pmu-events.h b/tools/perf/pmu-events/pmu-events.h
index caeb577d36c9..c8f306b572f4 100644
--- a/tools/perf/pmu-events/pmu-events.h
+++ b/tools/perf/pmu-events/pmu-events.h
@@ -18,6 +18,7 @@ struct pmu_event {
const char *metric_name;
const char *metric_group;
const char *deprecated;
+ const char *metric_constraint;
};
/*
@@ -25,7 +26,7 @@ struct pmu_event {
* Map a CPU to its table of PMU events. The CPU is identified by the
* cpuid field, which is an arch-specific identifier for the CPU.
* The identifier specified in tools/perf/pmu-events/arch/xxx/mapfile
- * must match the get_cpustr() in tools/perf/arch/xxx/util/header.c)
+ * must match the get_cpuid_str() in tools/perf/arch/xxx/util/header.c)
*
* The cpuid can contain any character other than the comma.
*/
diff --git a/tools/perf/scripts/perl/check-perf-trace.pl b/tools/perf/scripts/perl/check-perf-trace.pl
index 4e7076c20616..d307ce8fd6ed 100644
--- a/tools/perf/scripts/perl/check-perf-trace.pl
+++ b/tools/perf/scripts/perl/check-perf-trace.pl
@@ -28,7 +28,7 @@ sub trace_end
sub irq::softirq_entry
{
my ($event_name, $context, $common_cpu, $common_secs, $common_nsecs,
- $common_pid, $common_comm,
+ $common_pid, $common_comm, $common_callchain,
$vec) = @_;
print_header($event_name, $common_cpu, $common_secs, $common_nsecs,
@@ -43,7 +43,7 @@ sub irq::softirq_entry
sub kmem::kmalloc
{
my ($event_name, $context, $common_cpu, $common_secs, $common_nsecs,
- $common_pid, $common_comm,
+ $common_pid, $common_comm, $common_callchain,
$call_site, $ptr, $bytes_req, $bytes_alloc,
$gfp_flags) = @_;
@@ -92,7 +92,7 @@ sub print_unhandled
sub trace_unhandled
{
my ($event_name, $context, $common_cpu, $common_secs, $common_nsecs,
- $common_pid, $common_comm) = @_;
+ $common_pid, $common_comm, $common_callchain) = @_;
$unhandled{$event_name}++;
}
diff --git a/tools/perf/scripts/perl/failed-syscalls.pl b/tools/perf/scripts/perl/failed-syscalls.pl
index 55e7ae4c5c88..05954a8f363a 100644
--- a/tools/perf/scripts/perl/failed-syscalls.pl
+++ b/tools/perf/scripts/perl/failed-syscalls.pl
@@ -18,7 +18,7 @@ my %failed_syscalls;
sub raw_syscalls::sys_exit
{
my ($event_name, $context, $common_cpu, $common_secs, $common_nsecs,
- $common_pid, $common_comm,
+ $common_pid, $common_comm, $common_callchain,
$id, $ret) = @_;
if ($ret < 0) {
diff --git a/tools/perf/scripts/perl/rw-by-file.pl b/tools/perf/scripts/perl/rw-by-file.pl
index 168fa5e94b44..92a750b8552b 100644
--- a/tools/perf/scripts/perl/rw-by-file.pl
+++ b/tools/perf/scripts/perl/rw-by-file.pl
@@ -28,7 +28,7 @@ my %writes;
sub syscalls::sys_enter_read
{
my ($event_name, $context, $common_cpu, $common_secs, $common_nsecs,
- $common_pid, $common_comm, $nr, $fd, $buf, $count) = @_;
+ $common_pid, $common_comm, $common_callchain, $nr, $fd, $buf, $count) = @_;
if ($common_comm eq $for_comm) {
$reads{$fd}{bytes_requested} += $count;
@@ -39,7 +39,7 @@ sub syscalls::sys_enter_read
sub syscalls::sys_enter_write
{
my ($event_name, $context, $common_cpu, $common_secs, $common_nsecs,
- $common_pid, $common_comm, $nr, $fd, $buf, $count) = @_;
+ $common_pid, $common_comm, $common_callchain, $nr, $fd, $buf, $count) = @_;
if ($common_comm eq $for_comm) {
$writes{$fd}{bytes_written} += $count;
@@ -98,7 +98,7 @@ sub print_unhandled
sub trace_unhandled
{
my ($event_name, $context, $common_cpu, $common_secs, $common_nsecs,
- $common_pid, $common_comm) = @_;
+ $common_pid, $common_comm, $common_callchain) = @_;
$unhandled{$event_name}++;
}
diff --git a/tools/perf/scripts/perl/rw-by-pid.pl b/tools/perf/scripts/perl/rw-by-pid.pl
index 495698250b2f..d789fe39caab 100644
--- a/tools/perf/scripts/perl/rw-by-pid.pl
+++ b/tools/perf/scripts/perl/rw-by-pid.pl
@@ -24,7 +24,7 @@ my %writes;
sub syscalls::sys_exit_read
{
my ($event_name, $context, $common_cpu, $common_secs, $common_nsecs,
- $common_pid, $common_comm,
+ $common_pid, $common_comm, $common_callchain,
$nr, $ret) = @_;
if ($ret > 0) {
@@ -40,7 +40,7 @@ sub syscalls::sys_exit_read
sub syscalls::sys_enter_read
{
my ($event_name, $context, $common_cpu, $common_secs, $common_nsecs,
- $common_pid, $common_comm,
+ $common_pid, $common_comm, $common_callchain,
$nr, $fd, $buf, $count) = @_;
$reads{$common_pid}{bytes_requested} += $count;
@@ -51,7 +51,7 @@ sub syscalls::sys_enter_read
sub syscalls::sys_exit_write
{
my ($event_name, $context, $common_cpu, $common_secs, $common_nsecs,
- $common_pid, $common_comm,
+ $common_pid, $common_comm, $common_callchain,
$nr, $ret) = @_;
if ($ret <= 0) {
@@ -62,7 +62,7 @@ sub syscalls::sys_exit_write
sub syscalls::sys_enter_write
{
my ($event_name, $context, $common_cpu, $common_secs, $common_nsecs,
- $common_pid, $common_comm,
+ $common_pid, $common_comm, $common_callchain,
$nr, $fd, $buf, $count) = @_;
$writes{$common_pid}{bytes_written} += $count;
@@ -178,7 +178,7 @@ sub print_unhandled
sub trace_unhandled
{
my ($event_name, $context, $common_cpu, $common_secs, $common_nsecs,
- $common_pid, $common_comm) = @_;
+ $common_pid, $common_comm, $common_callchain) = @_;
$unhandled{$event_name}++;
}
diff --git a/tools/perf/scripts/perl/rwtop.pl b/tools/perf/scripts/perl/rwtop.pl
index 6473442568a2..eba4df67af6b 100644
--- a/tools/perf/scripts/perl/rwtop.pl
+++ b/tools/perf/scripts/perl/rwtop.pl
@@ -35,7 +35,7 @@ if (!$interval) {
sub syscalls::sys_exit_read
{
my ($event_name, $context, $common_cpu, $common_secs, $common_nsecs,
- $common_pid, $common_comm,
+ $common_pid, $common_comm, $common_callchain,
$nr, $ret) = @_;
print_check();
@@ -53,7 +53,7 @@ sub syscalls::sys_exit_read
sub syscalls::sys_enter_read
{
my ($event_name, $context, $common_cpu, $common_secs, $common_nsecs,
- $common_pid, $common_comm,
+ $common_pid, $common_comm, $common_callchain,
$nr, $fd, $buf, $count) = @_;
print_check();
@@ -66,7 +66,7 @@ sub syscalls::sys_enter_read
sub syscalls::sys_exit_write
{
my ($event_name, $context, $common_cpu, $common_secs, $common_nsecs,
- $common_pid, $common_comm,
+ $common_pid, $common_comm, $common_callchain,
$nr, $ret) = @_;
print_check();
@@ -79,7 +79,7 @@ sub syscalls::sys_exit_write
sub syscalls::sys_enter_write
{
my ($event_name, $context, $common_cpu, $common_secs, $common_nsecs,
- $common_pid, $common_comm,
+ $common_pid, $common_comm, $common_callchain,
$nr, $fd, $buf, $count) = @_;
print_check();
@@ -197,7 +197,7 @@ sub print_unhandled
sub trace_unhandled
{
my ($event_name, $context, $common_cpu, $common_secs, $common_nsecs,
- $common_pid, $common_comm) = @_;
+ $common_pid, $common_comm, $common_callchain) = @_;
$unhandled{$event_name}++;
}
diff --git a/tools/perf/scripts/perl/wakeup-latency.pl b/tools/perf/scripts/perl/wakeup-latency.pl
index efcfec5e347a..53444ff4ec7f 100644
--- a/tools/perf/scripts/perl/wakeup-latency.pl
+++ b/tools/perf/scripts/perl/wakeup-latency.pl
@@ -28,7 +28,7 @@ my $total_wakeups = 0;
sub sched::sched_switch
{
my ($event_name, $context, $common_cpu, $common_secs, $common_nsecs,
- $common_pid, $common_comm,
+ $common_pid, $common_comm, $common_callchain,
$prev_comm, $prev_pid, $prev_prio, $prev_state, $next_comm, $next_pid,
$next_prio) = @_;
@@ -51,7 +51,7 @@ sub sched::sched_switch
sub sched::sched_wakeup
{
my ($event_name, $context, $common_cpu, $common_secs, $common_nsecs,
- $common_pid, $common_comm,
+ $common_pid, $common_comm, $common_callchain,
$comm, $pid, $prio, $success, $target_cpu) = @_;
$last_wakeup{$target_cpu}{ts} = nsecs($common_secs, $common_nsecs);
@@ -101,7 +101,7 @@ sub print_unhandled
sub trace_unhandled
{
my ($event_name, $context, $common_cpu, $common_secs, $common_nsecs,
- $common_pid, $common_comm) = @_;
+ $common_pid, $common_comm, $common_callchain) = @_;
$unhandled{$event_name}++;
}
diff --git a/tools/perf/scripts/python/bin/flamegraph-record b/tools/perf/scripts/python/bin/flamegraph-record
new file mode 100755
index 000000000000..7df5a19c0163
--- /dev/null
+++ b/tools/perf/scripts/python/bin/flamegraph-record
@@ -0,0 +1,2 @@
+#!/bin/bash
+perf record -g "$@"
diff --git a/tools/perf/scripts/python/bin/flamegraph-report b/tools/perf/scripts/python/bin/flamegraph-report
new file mode 100755
index 000000000000..53c5dc90c87e
--- /dev/null
+++ b/tools/perf/scripts/python/bin/flamegraph-report
@@ -0,0 +1,3 @@
+#!/bin/bash
+# description: create flame graphs
+perf script -s "$PERF_EXEC_PATH"/scripts/python/flamegraph.py -- "$@"
diff --git a/tools/perf/scripts/python/flamegraph.py b/tools/perf/scripts/python/flamegraph.py
new file mode 100755
index 000000000000..61f3be9add6b
--- /dev/null
+++ b/tools/perf/scripts/python/flamegraph.py
@@ -0,0 +1,124 @@
+# flamegraph.py - create flame graphs from perf samples
+# SPDX-License-Identifier: GPL-2.0
+#
+# Usage:
+#
+# perf record -a -g -F 99 sleep 60
+# perf script report flamegraph
+#
+# Combined:
+#
+# perf script flamegraph -a -F 99 sleep 60
+#
+# Written by Andreas Gerstmayr <agerstmayr@redhat.com>
+# Flame Graphs invented by Brendan Gregg <bgregg@netflix.com>
+# Works in tandem with d3-flame-graph by Martin Spier <mspier@netflix.com>
+
+from __future__ import print_function
+import sys
+import os
+import argparse
+import json
+
+
+class Node:
+ def __init__(self, name, libtype=""):
+ self.name = name
+ self.libtype = libtype
+ self.value = 0
+ self.children = []
+
+ def toJSON(self):
+ return {
+ "n": self.name,
+ "l": self.libtype,
+ "v": self.value,
+ "c": self.children
+ }
+
+
+class FlameGraphCLI:
+ def __init__(self, args):
+ self.args = args
+ self.stack = Node("root")
+
+ if self.args.format == "html" and \
+ not os.path.isfile(self.args.template):
+ print("Flame Graph template {} does not exist. Please install "
+ "the js-d3-flame-graph (RPM) or libjs-d3-flame-graph (deb) "
+ "package, specify an existing flame graph template "
+ "(--template PATH) or another output format "
+ "(--format FORMAT).".format(self.args.template),
+ file=sys.stderr)
+ sys.exit(1)
+
+ def find_or_create_node(self, node, name, dso):
+ libtype = "kernel" if dso == "[kernel.kallsyms]" else ""
+ if name is None:
+ name = "[unknown]"
+
+ for child in node.children:
+ if child.name == name and child.libtype == libtype:
+ return child
+
+ child = Node(name, libtype)
+ node.children.append(child)
+ return child
+
+ def process_event(self, event):
+ node = self.find_or_create_node(self.stack, event["comm"], None)
+ if "callchain" in event:
+ for entry in reversed(event['callchain']):
+ node = self.find_or_create_node(
+ node, entry.get("sym", {}).get("name"), event.get("dso"))
+ else:
+ node = self.find_or_create_node(
+ node, entry.get("symbol"), event.get("dso"))
+ node.value += 1
+
+ def trace_end(self):
+ json_str = json.dumps(self.stack, default=lambda x: x.toJSON())
+
+ if self.args.format == "html":
+ try:
+ with open(self.args.template) as f:
+ output_str = f.read().replace("/** @flamegraph_json **/",
+ json_str)
+ except IOError as e:
+ print("Error reading template file: {}".format(e), file=sys.stderr)
+ sys.exit(1)
+ output_fn = self.args.output or "flamegraph.html"
+ else:
+ output_str = json_str
+ output_fn = self.args.output or "stacks.json"
+
+ if output_fn == "-":
+ sys.stdout.write(output_str)
+ else:
+ print("dumping data to {}".format(output_fn))
+ try:
+ with open(output_fn, "w") as out:
+ out.write(output_str)
+ except IOError as e:
+ print("Error writing output file: {}".format(e), file=sys.stderr)
+ sys.exit(1)
+
+
+if __name__ == "__main__":
+ parser = argparse.ArgumentParser(description="Create flame graphs.")
+ parser.add_argument("-f", "--format",
+ default="html", choices=["json", "html"],
+ help="output file format")
+ parser.add_argument("-o", "--output",
+ help="output file name")
+ parser.add_argument("--template",
+ default="/usr/share/d3-flame-graph/d3-flamegraph-base.html",
+ help="path to flamegraph HTML template")
+ parser.add_argument("-i", "--input",
+ help=argparse.SUPPRESS)
+
+ args = parser.parse_args()
+ cli = FlameGraphCLI(args)
+
+ process_event = cli.process_event
+ trace_end = cli.trace_end
diff --git a/tools/perf/tests/.gitignore b/tools/perf/tests/.gitignore
index 8cc30e731c73..d053b325f728 100644
--- a/tools/perf/tests/.gitignore
+++ b/tools/perf/tests/.gitignore
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
llvm-src-base.c
llvm-src-kbuild.c
llvm-src-prologue.c
diff --git a/tools/perf/tests/Build b/tools/perf/tests/Build
index 1692529639b0..cd00498a5dce 100644
--- a/tools/perf/tests/Build
+++ b/tools/perf/tests/Build
@@ -14,6 +14,7 @@ perf-y += evsel-roundtrip-name.o
perf-y += evsel-tp-sched.o
perf-y += fdarray.o
perf-y += pmu.o
+perf-y += pmu-events.o
perf-y += hists_common.o
perf-y += hists_link.o
perf-y += hists_filter.o
@@ -55,6 +56,9 @@ perf-y += mem2node.o
perf-y += maps.o
perf-y += time-utils-test.o
perf-y += genelf.o
+perf-y += api-io.o
+perf-y += demangle-java-test.o
+perf-y += pfm.o
$(OUTPUT)tests/llvm-src-base.c: tests/bpf-script-example.c tests/Build
$(call rule_mkdir)
diff --git a/tools/perf/tests/api-io.c b/tools/perf/tests/api-io.c
new file mode 100644
index 000000000000..2ada86ad6084
--- /dev/null
+++ b/tools/perf/tests/api-io.c
@@ -0,0 +1,304 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <limits.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+#include "debug.h"
+#include "tests.h"
+#include <api/io.h>
+#include <linux/kernel.h>
+
+#define TEMPL "/tmp/perf-test-XXXXXX"
+
+#define EXPECT_EQUAL(val, expected) \
+do { \
+ if (val != expected) { \
+ pr_debug("%s:%d: %d != %d\n", \
+ __FILE__, __LINE__, val, expected); \
+ ret = -1; \
+ } \
+} while (0)
+
+#define EXPECT_EQUAL64(val, expected) \
+do { \
+ if (val != expected) { \
+ pr_debug("%s:%d: %lld != %lld\n", \
+ __FILE__, __LINE__, val, expected); \
+ ret = -1; \
+ } \
+} while (0)
+
+static int make_test_file(char path[PATH_MAX], const char *contents)
+{
+ ssize_t contents_len = strlen(contents);
+ int fd;
+
+ strcpy(path, TEMPL);
+ fd = mkstemp(path);
+ if (fd < 0) {
+ pr_debug("mkstemp failed");
+ return -1;
+ }
+ if (write(fd, contents, contents_len) < contents_len) {
+ pr_debug("short write");
+ close(fd);
+ unlink(path);
+ return -1;
+ }
+ close(fd);
+ return 0;
+}
+
+static int setup_test(char path[PATH_MAX], const char *contents,
+ size_t buf_size, struct io *io)
+{
+ if (make_test_file(path, contents))
+ return -1;
+
+ io->fd = open(path, O_RDONLY);
+ if (io->fd < 0) {
+ pr_debug("Failed to open '%s'\n", path);
+ unlink(path);
+ return -1;
+ }
+ io->buf = malloc(buf_size);
+ if (io->buf == NULL) {
+ pr_debug("Failed to allocate memory");
+ close(io->fd);
+ unlink(path);
+ return -1;
+ }
+ io__init(io, io->fd, io->buf, buf_size);
+ return 0;
+}
+
+static void cleanup_test(char path[PATH_MAX], struct io *io)
+{
+ free(io->buf);
+ close(io->fd);
+ unlink(path);
+}
+
+static int do_test_get_char(const char *test_string, size_t buf_size)
+{
+ char path[PATH_MAX];
+ struct io io;
+ int ch, ret = 0;
+ size_t i;
+
+ if (setup_test(path, test_string, buf_size, &io))
+ return -1;
+
+ for (i = 0; i < strlen(test_string); i++) {
+ ch = io__get_char(&io);
+
+ EXPECT_EQUAL(ch, test_string[i]);
+ EXPECT_EQUAL(io.eof, false);
+ }
+ ch = io__get_char(&io);
+ EXPECT_EQUAL(ch, -1);
+ EXPECT_EQUAL(io.eof, true);
+
+ cleanup_test(path, &io);
+ return ret;
+}
+
+static int test_get_char(void)
+{
+ int i, ret = 0;
+ size_t j;
+
+ static const char *const test_strings[] = {
+ "12345678abcdef90",
+ "a\nb\nc\nd\n",
+ "\a\b\t\v\f\r",
+ };
+ for (i = 0; i <= 10; i++) {
+ for (j = 0; j < ARRAY_SIZE(test_strings); j++) {
+ if (do_test_get_char(test_strings[j], 1 << i))
+ ret = -1;
+ }
+ }
+ return ret;
+}
+
+static int do_test_get_hex(const char *test_string,
+ __u64 val1, int ch1,
+ __u64 val2, int ch2,
+ __u64 val3, int ch3,
+ bool end_eof)
+{
+ char path[PATH_MAX];
+ struct io io;
+ int ch, ret = 0;
+ __u64 hex;
+
+ if (setup_test(path, test_string, 4, &io))
+ return -1;
+
+ ch = io__get_hex(&io, &hex);
+ EXPECT_EQUAL64(hex, val1);
+ EXPECT_EQUAL(ch, ch1);
+
+ ch = io__get_hex(&io, &hex);
+ EXPECT_EQUAL64(hex, val2);
+ EXPECT_EQUAL(ch, ch2);
+
+ ch = io__get_hex(&io, &hex);
+ EXPECT_EQUAL64(hex, val3);
+ EXPECT_EQUAL(ch, ch3);
+
+ EXPECT_EQUAL(io.eof, end_eof);
+
+ cleanup_test(path, &io);
+ return ret;
+}
+
+static int test_get_hex(void)
+{
+ int ret = 0;
+
+ if (do_test_get_hex("12345678abcdef90",
+ 0x12345678abcdef90, -1,
+ 0, -1,
+ 0, -1,
+ true))
+ ret = -1;
+
+ if (do_test_get_hex("1\n2\n3\n",
+ 1, '\n',
+ 2, '\n',
+ 3, '\n',
+ false))
+ ret = -1;
+
+ if (do_test_get_hex("12345678ABCDEF90;a;b",
+ 0x12345678abcdef90, ';',
+ 0xa, ';',
+ 0xb, -1,
+ true))
+ ret = -1;
+
+ if (do_test_get_hex("0x1x2x",
+ 0, 'x',
+ 1, 'x',
+ 2, 'x',
+ false))
+ ret = -1;
+
+ if (do_test_get_hex("x1x",
+ 0, -2,
+ 1, 'x',
+ 0, -1,
+ true))
+ ret = -1;
+
+ if (do_test_get_hex("10000000000000000000000000000abcdefgh99i",
+ 0xabcdef, 'g',
+ 0, -2,
+ 0x99, 'i',
+ false))
+ ret = -1;
+
+ return ret;
+}
+
+static int do_test_get_dec(const char *test_string,
+ __u64 val1, int ch1,
+ __u64 val2, int ch2,
+ __u64 val3, int ch3,
+ bool end_eof)
+{
+ char path[PATH_MAX];
+ struct io io;
+ int ch, ret = 0;
+ __u64 dec;
+
+ if (setup_test(path, test_string, 4, &io))
+ return -1;
+
+ ch = io__get_dec(&io, &dec);
+ EXPECT_EQUAL64(dec, val1);
+ EXPECT_EQUAL(ch, ch1);
+
+ ch = io__get_dec(&io, &dec);
+ EXPECT_EQUAL64(dec, val2);
+ EXPECT_EQUAL(ch, ch2);
+
+ ch = io__get_dec(&io, &dec);
+ EXPECT_EQUAL64(dec, val3);
+ EXPECT_EQUAL(ch, ch3);
+
+ EXPECT_EQUAL(io.eof, end_eof);
+
+ cleanup_test(path, &io);
+ return ret;
+}
+
+static int test_get_dec(void)
+{
+ int ret = 0;
+
+ if (do_test_get_dec("12345678abcdef90",
+ 12345678, 'a',
+ 0, -2,
+ 0, -2,
+ false))
+ ret = -1;
+
+ if (do_test_get_dec("1\n2\n3\n",
+ 1, '\n',
+ 2, '\n',
+ 3, '\n',
+ false))
+ ret = -1;
+
+ if (do_test_get_dec("12345678;1;2",
+ 12345678, ';',
+ 1, ';',
+ 2, -1,
+ true))
+ ret = -1;
+
+ if (do_test_get_dec("0x1x2x",
+ 0, 'x',
+ 1, 'x',
+ 2, 'x',
+ false))
+ ret = -1;
+
+ if (do_test_get_dec("x1x",
+ 0, -2,
+ 1, 'x',
+ 0, -1,
+ true))
+ ret = -1;
+
+ if (do_test_get_dec("10000000000000000000000000000000000000000000000000000000000123456789ab99c",
+ 123456789, 'a',
+ 0, -2,
+ 99, 'c',
+ false))
+ ret = -1;
+
+ return ret;
+}
+
+int test__api_io(struct test *test __maybe_unused,
+ int subtest __maybe_unused)
+{
+ int ret = 0;
+
+ if (test_get_char())
+ ret = TEST_FAIL;
+ if (test_get_hex())
+ ret = TEST_FAIL;
+ if (test_get_dec())
+ ret = TEST_FAIL;
+ return ret;
+}
diff --git a/tools/perf/tests/attr/system-wide-dummy b/tools/perf/tests/attr/system-wide-dummy
new file mode 100644
index 000000000000..eba723cc0d38
--- /dev/null
+++ b/tools/perf/tests/attr/system-wide-dummy
@@ -0,0 +1,50 @@
+# Event added by system-wide or CPU perf-record to handle the race of
+# processes starting while /proc is processed.
+[event]
+fd=1
+group_fd=-1
+cpu=*
+pid=-1
+flags=8
+type=1
+size=120
+config=9
+sample_period=4000
+sample_type=455
+read_format=4
+# Event will be enabled right away.
+disabled=0
+inherit=1
+pinned=0
+exclusive=0
+exclude_user=0
+exclude_kernel=0
+exclude_hv=0
+exclude_idle=0
+mmap=1
+comm=1
+freq=1
+inherit_stat=0
+enable_on_exec=0
+task=1
+watermark=0
+precise_ip=0
+mmap_data=0
+sample_id_all=1
+exclude_host=0
+exclude_guest=0
+exclude_callchain_kernel=0
+exclude_callchain_user=0
+mmap2=1
+comm_exec=1
+context_switch=0
+write_backward=0
+namespaces=0
+use_clockid=0
+wakeup_events=0
+bp_type=0
+config1=0
+config2=0
+branch_sample_type=0
+sample_regs_user=0
+sample_stack_user=0
diff --git a/tools/perf/tests/attr/test-record-C0 b/tools/perf/tests/attr/test-record-C0
index 93818054ae20..317730b906dd 100644
--- a/tools/perf/tests/attr/test-record-C0
+++ b/tools/perf/tests/attr/test-record-C0
@@ -9,6 +9,14 @@ cpu=0
# no enable on exec for CPU attached
enable_on_exec=0
-# PERF_SAMPLE_IP | PERF_SAMPLE_TID PERF_SAMPLE_TIME | # PERF_SAMPLE_PERIOD
+# PERF_SAMPLE_IP | PERF_SAMPLE_TID | PERF_SAMPLE_TIME |
+# PERF_SAMPLE_ID | PERF_SAMPLE_PERIOD
# + PERF_SAMPLE_CPU added by -C 0
-sample_type=391
+sample_type=455
+
+# Dummy event handles mmaps, comm and task.
+mmap=0
+comm=0
+task=0
+
+[event:system-wide-dummy]
diff --git a/tools/perf/tests/builtin-test.c b/tools/perf/tests/builtin-test.c
index 5f05db75cdd8..da5b6cc23f25 100644
--- a/tools/perf/tests/builtin-test.c
+++ b/tools/perf/tests/builtin-test.c
@@ -73,6 +73,17 @@ static struct test generic_tests[] = {
.func = test__pmu,
},
{
+ .desc = "PMU events",
+ .func = test__pmu_events,
+ .subtest = {
+ .skip_if_fail = false,
+ .get_nr = test__pmu_events_subtest_get_nr,
+ .get_desc = test__pmu_events_subtest_get_desc,
+ .skip_reason = test__pmu_events_subtest_skip_reason,
+ },
+
+ },
+ {
.desc = "DSO data read",
.func = test__dso_data,
},
@@ -306,10 +317,27 @@ static struct test generic_tests[] = {
.func = test__jit_write_elf,
},
{
+ .desc = "Test libpfm4 support",
+ .func = test__pfm,
+ .subtest = {
+ .skip_if_fail = true,
+ .get_nr = test__pfm_subtest_get_nr,
+ .get_desc = test__pfm_subtest_get_desc,
+ }
+ },
+ {
+ .desc = "Test api io",
+ .func = test__api_io,
+ },
+ {
.desc = "maps__merge_in",
.func = test__maps__merge_in,
},
{
+ .desc = "Demangle Java",
+ .func = test__demangle_java,
+ },
+ {
.func = NULL,
},
};
@@ -319,7 +347,7 @@ static struct test *tests[] = {
arch_tests,
};
-static bool perf_test__matches(struct test *test, int curr, int argc, const char *argv[])
+static bool perf_test__matches(const char *desc, int curr, int argc, const char *argv[])
{
int i;
@@ -336,7 +364,7 @@ static bool perf_test__matches(struct test *test, int curr, int argc, const char
continue;
}
- if (strcasestr(test->desc, argv[i]))
+ if (strcasestr(desc, argv[i]))
return true;
}
@@ -421,8 +449,15 @@ static int test_and_print(struct test *t, bool force_skip, int subtest)
case TEST_OK:
pr_info(" Ok\n");
break;
- case TEST_SKIP:
- color_fprintf(stderr, PERF_COLOR_YELLOW, " Skip\n");
+ case TEST_SKIP: {
+ const char *skip_reason = NULL;
+ if (t->subtest.skip_reason)
+ skip_reason = t->subtest.skip_reason(subtest);
+ if (skip_reason)
+ color_fprintf(stderr, PERF_COLOR_YELLOW, " Skip (%s)\n", skip_reason);
+ else
+ color_fprintf(stderr, PERF_COLOR_YELLOW, " Skip\n");
+ }
break;
case TEST_FAIL:
default:
@@ -543,8 +578,11 @@ static int run_shell_tests(int argc, const char *argv[], int i, int width)
return -1;
dir = opendir(st.dir);
- if (!dir)
+ if (!dir) {
+ pr_err("failed to open shell test directory: %s\n",
+ st.dir);
return -1;
+ }
for_each_shell_test(dir, st.dir, ent) {
int curr = i++;
@@ -555,7 +593,7 @@ static int run_shell_tests(int argc, const char *argv[], int i, int width)
.priv = &st,
};
- if (!perf_test__matches(&test, curr, argc, argv))
+ if (!perf_test__matches(test.desc, curr, argc, argv))
continue;
st.file = ent->d_name;
@@ -583,9 +621,25 @@ static int __cmd_test(int argc, const char *argv[], struct intlist *skiplist)
for_each_test(j, t) {
int curr = i++, err;
+ int subi;
- if (!perf_test__matches(t, curr, argc, argv))
- continue;
+ if (!perf_test__matches(t->desc, curr, argc, argv)) {
+ bool skip = true;
+ int subn;
+
+ if (!t->subtest.get_nr)
+ continue;
+
+ subn = t->subtest.get_nr();
+
+ for (subi = 0; subi < subn; subi++) {
+ if (perf_test__matches(t->subtest.get_desc(subi), curr, argc, argv))
+ skip = false;
+ }
+
+ if (skip)
+ continue;
+ }
if (t->is_supported && !t->is_supported()) {
pr_debug("%2d: %-*s: Disabled\n", i, width, t->desc);
@@ -613,7 +667,6 @@ static int __cmd_test(int argc, const char *argv[], struct intlist *skiplist)
*/
int subw = width > 2 ? width - 2 : width;
bool skip = false;
- int subi;
if (subn <= 0) {
color_fprintf(stderr, PERF_COLOR_YELLOW,
@@ -630,6 +683,9 @@ static int __cmd_test(int argc, const char *argv[], struct intlist *skiplist)
}
for (subi = 0; subi < subn; subi++) {
+ if (!perf_test__matches(t->subtest.get_desc(subi), curr, argc, argv))
+ continue;
+
pr_info("%2d.%1d: %-*s:", i, subi + 1, subw,
t->subtest.get_desc(subi));
err = test_and_print(t, skip, subi);
@@ -663,7 +719,7 @@ static int perf_test__list_shell(int argc, const char **argv, int i)
.desc = shell_test__description(bf, sizeof(bf), path, ent->d_name),
};
- if (!perf_test__matches(&t, curr, argc, argv))
+ if (!perf_test__matches(t.desc, curr, argc, argv))
continue;
pr_info("%2d: %s\n", i, t.desc);
@@ -682,7 +738,7 @@ static int perf_test__list(int argc, const char **argv)
for_each_test(j, t) {
int curr = i++;
- if (!perf_test__matches(t, curr, argc, argv) ||
+ if (!perf_test__matches(t->desc, curr, argc, argv) ||
(t->is_supported && !t->is_supported()))
continue;
diff --git a/tools/perf/tests/demangle-java-test.c b/tools/perf/tests/demangle-java-test.c
new file mode 100644
index 000000000000..8f3b90832fb0
--- /dev/null
+++ b/tools/perf/tests/demangle-java-test.c
@@ -0,0 +1,42 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <string.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include "tests.h"
+#include "session.h"
+#include "debug.h"
+#include "demangle-java.h"
+
+int test__demangle_java(struct test *test __maybe_unused, int subtest __maybe_unused)
+{
+ int ret = TEST_OK;
+ char *buf = NULL;
+ size_t i;
+
+ struct {
+ const char *mangled, *demangled;
+ } test_cases[] = {
+ { "Ljava/lang/StringLatin1;equals([B[B)Z",
+ "boolean java.lang.StringLatin1.equals(byte[], byte[])" },
+ { "Ljava/util/zip/ZipUtils;CENSIZ([BI)J",
+ "long java.util.zip.ZipUtils.CENSIZ(byte[], int)" },
+ { "Ljava/util/regex/Pattern$BmpCharProperty;match(Ljava/util/regex/Matcher;ILjava/lang/CharSequence;)Z",
+ "boolean java.util.regex.Pattern$BmpCharProperty.match(java.util.regex.Matcher, int, java.lang.CharSequence)" },
+ { "Ljava/lang/AbstractStringBuilder;appendChars(Ljava/lang/String;II)V",
+ "void java.lang.AbstractStringBuilder.appendChars(java.lang.String, int, int)" },
+ { "Ljava/lang/Object;<init>()V",
+ "void java.lang.Object<init>()" },
+ };
+
+ for (i = 0; i < sizeof(test_cases) / sizeof(test_cases[0]); i++) {
+ buf = java_demangle_sym(test_cases[i].mangled, 0);
+ if (strcmp(buf, test_cases[i].demangled)) {
+ pr_debug("FAILED: %s: %s != %s\n", test_cases[i].mangled,
+ buf, test_cases[i].demangled);
+ ret = TEST_FAIL;
+ }
+ free(buf);
+ }
+
+ return ret;
+}
diff --git a/tools/perf/tests/dwarf-unwind.c b/tools/perf/tests/dwarf-unwind.c
index 779ce280a0e9..2491d167bf76 100644
--- a/tools/perf/tests/dwarf-unwind.c
+++ b/tools/perf/tests/dwarf-unwind.c
@@ -37,6 +37,7 @@ static int init_live_machine(struct machine *machine)
union perf_event event;
pid_t pid = getpid();
+ memset(&event, 0, sizeof(event));
return perf_event__synthesize_mmap_events(NULL, &event, pid, pid,
mmap_handler, machine, true);
}
@@ -94,7 +95,7 @@ static int unwind_entry(struct unwind_entry *entry, void *arg)
return strcmp((const char *) symbol, funcs[idx]);
}
-noinline int test_dwarf_unwind__thread(struct thread *thread)
+__no_tail_call noinline int test_dwarf_unwind__thread(struct thread *thread)
{
struct perf_sample sample;
unsigned long cnt = 0;
@@ -125,7 +126,7 @@ noinline int test_dwarf_unwind__thread(struct thread *thread)
static int global_unwind_retval = -INT_MAX;
-noinline int test_dwarf_unwind__compare(void *p1, void *p2)
+__no_tail_call noinline int test_dwarf_unwind__compare(void *p1, void *p2)
{
/* Any possible value should be 'thread' */
struct thread *thread = *(struct thread **)p1;
@@ -144,7 +145,7 @@ noinline int test_dwarf_unwind__compare(void *p1, void *p2)
return p1 - p2;
}
-noinline int test_dwarf_unwind__krava_3(struct thread *thread)
+__no_tail_call noinline int test_dwarf_unwind__krava_3(struct thread *thread)
{
struct thread *array[2] = {thread, thread};
void *fp = &bsearch;
@@ -163,12 +164,12 @@ noinline int test_dwarf_unwind__krava_3(struct thread *thread)
return global_unwind_retval;
}
-noinline int test_dwarf_unwind__krava_2(struct thread *thread)
+__no_tail_call noinline int test_dwarf_unwind__krava_2(struct thread *thread)
{
return test_dwarf_unwind__krava_3(thread);
}
-noinline int test_dwarf_unwind__krava_1(struct thread *thread)
+__no_tail_call noinline int test_dwarf_unwind__krava_1(struct thread *thread)
{
return test_dwarf_unwind__krava_2(thread);
}
diff --git a/tools/perf/tests/event-times.c b/tools/perf/tests/event-times.c
index 1e8a9f5c356d..db68894a6f40 100644
--- a/tools/perf/tests/event-times.c
+++ b/tools/perf/tests/event-times.c
@@ -72,7 +72,7 @@ static int attach__current_disabled(struct evlist *evlist)
evsel->core.attr.disabled = 1;
- err = perf_evsel__open_per_thread(evsel, threads);
+ err = evsel__open_per_thread(evsel, threads);
if (err) {
pr_debug("Failed to open event cpu-clock:u\n");
return err;
@@ -96,7 +96,7 @@ static int attach__current_enabled(struct evlist *evlist)
return -1;
}
- err = perf_evsel__open_per_thread(evsel, threads);
+ err = evsel__open_per_thread(evsel, threads);
perf_thread_map__put(threads);
return err == 0 ? TEST_OK : TEST_FAIL;
@@ -125,7 +125,7 @@ static int attach__cpu_disabled(struct evlist *evlist)
evsel->core.attr.disabled = 1;
- err = perf_evsel__open_per_cpu(evsel, cpus, -1);
+ err = evsel__open_per_cpu(evsel, cpus, -1);
if (err) {
if (err == -EACCES)
return TEST_SKIP;
@@ -152,7 +152,7 @@ static int attach__cpu_enabled(struct evlist *evlist)
return -1;
}
- err = perf_evsel__open_per_cpu(evsel, cpus, -1);
+ err = evsel__open_per_cpu(evsel, cpus, -1);
if (err == -EACCES)
return TEST_SKIP;
diff --git a/tools/perf/tests/event_update.c b/tools/perf/tests/event_update.c
index c727379cf20e..bdcf032f8516 100644
--- a/tools/perf/tests/event_update.c
+++ b/tools/perf/tests/event_update.c
@@ -109,7 +109,7 @@ int test__event_update(struct test *test __maybe_unused, int subtest __maybe_unu
TEST_ASSERT_VAL("failed to synthesize attr update scale",
!perf_event__synthesize_event_update_scale(NULL, evsel, process_event_scale));
- tmp.name = perf_evsel__name(evsel);
+ tmp.name = evsel__name(evsel);
TEST_ASSERT_VAL("failed to synthesize attr update name",
!perf_event__synthesize_event_update_name(&tmp.tool, evsel, process_event_name));
diff --git a/tools/perf/tests/evsel-roundtrip-name.c b/tools/perf/tests/evsel-roundtrip-name.c
index 956205bf9326..f7f3e5b4c180 100644
--- a/tools/perf/tests/evsel-roundtrip-name.c
+++ b/tools/perf/tests/evsel-roundtrip-name.c
@@ -20,12 +20,11 @@ static int perf_evsel__roundtrip_cache_name_test(void)
for (type = 0; type < PERF_COUNT_HW_CACHE_MAX; type++) {
for (op = 0; op < PERF_COUNT_HW_CACHE_OP_MAX; op++) {
/* skip invalid cache type */
- if (!perf_evsel__is_cache_op_valid(type, op))
+ if (!evsel__is_cache_op_valid(type, op))
continue;
for (i = 0; i < PERF_COUNT_HW_CACHE_RESULT_MAX; i++) {
- __perf_evsel__hw_cache_type_op_res_name(type, op, i,
- name, sizeof(name));
+ __evsel__hw_cache_type_op_res_name(type, op, i, name, sizeof(name));
err = parse_events(evlist, name, NULL);
if (err)
ret = err;
@@ -39,23 +38,22 @@ static int perf_evsel__roundtrip_cache_name_test(void)
for (type = 0; type < PERF_COUNT_HW_CACHE_MAX; type++) {
for (op = 0; op < PERF_COUNT_HW_CACHE_OP_MAX; op++) {
/* skip invalid cache type */
- if (!perf_evsel__is_cache_op_valid(type, op))
+ if (!evsel__is_cache_op_valid(type, op))
continue;
for (i = 0; i < PERF_COUNT_HW_CACHE_RESULT_MAX; i++) {
- __perf_evsel__hw_cache_type_op_res_name(type, op, i,
- name, sizeof(name));
+ __evsel__hw_cache_type_op_res_name(type, op, i, name, sizeof(name));
if (evsel->idx != idx)
continue;
++idx;
- if (strcmp(perf_evsel__name(evsel), name)) {
- pr_debug("%s != %s\n", perf_evsel__name(evsel), name);
+ if (strcmp(evsel__name(evsel), name)) {
+ pr_debug("%s != %s\n", evsel__name(evsel), name);
ret = -1;
}
- evsel = perf_evsel__next(evsel);
+ evsel = evsel__next(evsel);
}
}
}
@@ -84,9 +82,9 @@ static int __perf_evsel__name_array_test(const char *names[], int nr_names)
err = 0;
evlist__for_each_entry(evlist, evsel) {
- if (strcmp(perf_evsel__name(evsel), names[evsel->idx])) {
+ if (strcmp(evsel__name(evsel), names[evsel->idx])) {
--err;
- pr_debug("%s != %s\n", perf_evsel__name(evsel), names[evsel->idx]);
+ pr_debug("%s != %s\n", evsel__name(evsel), names[evsel->idx]);
}
}
@@ -102,12 +100,11 @@ int test__perf_evsel__roundtrip_name_test(struct test *test __maybe_unused, int
{
int err = 0, ret = 0;
- err = perf_evsel__name_array_test(perf_evsel__hw_names);
+ err = perf_evsel__name_array_test(evsel__hw_names);
if (err)
ret = err;
- err = __perf_evsel__name_array_test(perf_evsel__sw_names,
- PERF_COUNT_SW_DUMMY + 1);
+ err = __perf_evsel__name_array_test(evsel__sw_names, PERF_COUNT_SW_DUMMY + 1);
if (err)
ret = err;
diff --git a/tools/perf/tests/evsel-tp-sched.c b/tools/perf/tests/evsel-tp-sched.c
index 261e6eaaee99..0e224a0a55d9 100644
--- a/tools/perf/tests/evsel-tp-sched.c
+++ b/tools/perf/tests/evsel-tp-sched.c
@@ -8,7 +8,7 @@
static int perf_evsel__test_field(struct evsel *evsel, const char *name,
int size, bool should_be_signed)
{
- struct tep_format_field *field = perf_evsel__field(evsel, name);
+ struct tep_format_field *field = evsel__field(evsel, name);
int is_signed;
int ret = 0;
@@ -35,11 +35,11 @@ static int perf_evsel__test_field(struct evsel *evsel, const char *name,
int test__perf_evsel__tp_sched_test(struct test *test __maybe_unused, int subtest __maybe_unused)
{
- struct evsel *evsel = perf_evsel__newtp("sched", "sched_switch");
+ struct evsel *evsel = evsel__newtp("sched", "sched_switch");
int ret = 0;
if (IS_ERR(evsel)) {
- pr_debug("perf_evsel__newtp failed with %ld\n", PTR_ERR(evsel));
+ pr_debug("evsel__newtp failed with %ld\n", PTR_ERR(evsel));
return -1;
}
@@ -66,10 +66,10 @@ int test__perf_evsel__tp_sched_test(struct test *test __maybe_unused, int subtes
evsel__delete(evsel);
- evsel = perf_evsel__newtp("sched", "sched_wakeup");
+ evsel = evsel__newtp("sched", "sched_wakeup");
if (IS_ERR(evsel)) {
- pr_debug("perf_evsel__newtp failed with %ld\n", PTR_ERR(evsel));
+ pr_debug("evsel__newtp failed with %ld\n", PTR_ERR(evsel));
return -1;
}
diff --git a/tools/perf/tests/expr.c b/tools/perf/tests/expr.c
index 87843af4c118..1cb02ca2b15f 100644
--- a/tools/perf/tests/expr.c
+++ b/tools/perf/tests/expr.c
@@ -6,11 +6,11 @@
#include <string.h>
#include <linux/zalloc.h>
-static int test(struct parse_ctx *ctx, const char *e, double val2)
+static int test(struct expr_parse_ctx *ctx, const char *e, double val2)
{
double val;
- if (expr__parse(&val, ctx, &e))
+ if (expr__parse(&val, ctx, e, 1))
TEST_ASSERT_VAL("parse test failed", 0);
TEST_ASSERT_VAL("unexpected value", val == val2);
return 0;
@@ -19,15 +19,13 @@ static int test(struct parse_ctx *ctx, const char *e, double val2)
int test__expr(struct test *t __maybe_unused, int subtest __maybe_unused)
{
const char *p;
- const char **other;
- double val;
- int i, ret;
- struct parse_ctx ctx;
- int num_other;
+ double val, *val_ptr;
+ int ret;
+ struct expr_parse_ctx ctx;
expr__ctx_init(&ctx);
- expr__add_id(&ctx, "FOO", 1);
- expr__add_id(&ctx, "BAR", 2);
+ expr__add_id(&ctx, strdup("FOO"), 1);
+ expr__add_id(&ctx, strdup("BAR"), 2);
ret = test(&ctx, "1+1", 2);
ret |= test(&ctx, "FOO+BAR", 3);
@@ -39,29 +37,43 @@ int test__expr(struct test *t __maybe_unused, int subtest __maybe_unused)
ret |= test(&ctx, "min(1,2) + 1", 2);
ret |= test(&ctx, "max(1,2) + 1", 3);
ret |= test(&ctx, "1+1 if 3*4 else 0", 2);
+ ret |= test(&ctx, "1.1 + 2.1", 3.2);
+ ret |= test(&ctx, ".1 + 2.", 2.1);
if (ret)
return ret;
p = "FOO/0";
- ret = expr__parse(&val, &ctx, &p);
- TEST_ASSERT_VAL("division by zero", ret == 1);
+ ret = expr__parse(&val, &ctx, p, 1);
+ TEST_ASSERT_VAL("division by zero", ret == -1);
p = "BAR/";
- ret = expr__parse(&val, &ctx, &p);
- TEST_ASSERT_VAL("missing operand", ret == 1);
+ ret = expr__parse(&val, &ctx, p, 1);
+ TEST_ASSERT_VAL("missing operand", ret == -1);
+
+ expr__ctx_clear(&ctx);
+ TEST_ASSERT_VAL("find other",
+ expr__find_other("FOO + BAR + BAZ + BOZO", "FOO",
+ &ctx, 1) == 0);
+ TEST_ASSERT_VAL("find other", hashmap__size(&ctx.ids) == 3);
+ TEST_ASSERT_VAL("find other", hashmap__find(&ctx.ids, "BAR",
+ (void **)&val_ptr));
+ TEST_ASSERT_VAL("find other", hashmap__find(&ctx.ids, "BAZ",
+ (void **)&val_ptr));
+ TEST_ASSERT_VAL("find other", hashmap__find(&ctx.ids, "BOZO",
+ (void **)&val_ptr));
+ expr__ctx_clear(&ctx);
TEST_ASSERT_VAL("find other",
- expr__find_other("FOO + BAR + BAZ + BOZO", "FOO", &other, &num_other) == 0);
- TEST_ASSERT_VAL("find other", num_other == 3);
- TEST_ASSERT_VAL("find other", !strcmp(other[0], "BAR"));
- TEST_ASSERT_VAL("find other", !strcmp(other[1], "BAZ"));
- TEST_ASSERT_VAL("find other", !strcmp(other[2], "BOZO"));
- TEST_ASSERT_VAL("find other", other[3] == NULL);
+ expr__find_other("EVENT1\\,param\\=?@ + EVENT2\\,param\\=?@",
+ NULL, &ctx, 3) == 0);
+ TEST_ASSERT_VAL("find other", hashmap__size(&ctx.ids) == 2);
+ TEST_ASSERT_VAL("find other", hashmap__find(&ctx.ids, "EVENT1,param=3/",
+ (void **)&val_ptr));
+ TEST_ASSERT_VAL("find other", hashmap__find(&ctx.ids, "EVENT2,param=3/",
+ (void **)&val_ptr));
- for (i = 0; i < num_other; i++)
- zfree(&other[i]);
- free((void *)other);
+ expr__ctx_clear(&ctx);
return 0;
}
diff --git a/tools/perf/tests/hists_cumulate.c b/tools/perf/tests/hists_cumulate.c
index 6367c8f6ca22..3f2e1a581247 100644
--- a/tools/perf/tests/hists_cumulate.c
+++ b/tools/perf/tests/hists_cumulate.c
@@ -190,7 +190,7 @@ static int do_test(struct hists *hists, struct result *expected, size_t nr_expec
* function since TEST_ASSERT_VAL() returns in case of failure.
*/
hists__collapse_resort(hists, NULL);
- perf_evsel__output_resort(hists_to_evsel(hists), NULL);
+ evsel__output_resort(hists_to_evsel(hists), NULL);
if (verbose > 2) {
pr_info("use callchain: %d, cumulate callchain: %d\n",
@@ -280,7 +280,7 @@ static int test1(struct evsel *evsel, struct machine *machine)
symbol_conf.use_callchain = false;
symbol_conf.cumulate_callchain = false;
- perf_evsel__reset_sample_bit(evsel, CALLCHAIN);
+ evsel__reset_sample_bit(evsel, CALLCHAIN);
setup_sorting(NULL);
callchain_register_param(&callchain_param);
@@ -427,7 +427,7 @@ static int test2(struct evsel *evsel, struct machine *machine)
symbol_conf.use_callchain = true;
symbol_conf.cumulate_callchain = false;
- perf_evsel__set_sample_bit(evsel, CALLCHAIN);
+ evsel__set_sample_bit(evsel, CALLCHAIN);
setup_sorting(NULL);
callchain_register_param(&callchain_param);
@@ -485,7 +485,7 @@ static int test3(struct evsel *evsel, struct machine *machine)
symbol_conf.use_callchain = false;
symbol_conf.cumulate_callchain = true;
- perf_evsel__reset_sample_bit(evsel, CALLCHAIN);
+ evsel__reset_sample_bit(evsel, CALLCHAIN);
setup_sorting(NULL);
callchain_register_param(&callchain_param);
@@ -669,7 +669,7 @@ static int test4(struct evsel *evsel, struct machine *machine)
symbol_conf.use_callchain = true;
symbol_conf.cumulate_callchain = true;
- perf_evsel__set_sample_bit(evsel, CALLCHAIN);
+ evsel__set_sample_bit(evsel, CALLCHAIN);
setup_sorting(NULL);
diff --git a/tools/perf/tests/hists_filter.c b/tools/perf/tests/hists_filter.c
index 618b51ffcc01..123e07d35b55 100644
--- a/tools/perf/tests/hists_filter.c
+++ b/tools/perf/tests/hists_filter.c
@@ -142,7 +142,7 @@ int test__hists_filter(struct test *test __maybe_unused, int subtest __maybe_unu
struct hists *hists = evsel__hists(evsel);
hists__collapse_resort(hists, NULL);
- perf_evsel__output_resort(evsel, NULL);
+ evsel__output_resort(evsel, NULL);
if (verbose > 2) {
pr_info("Normal histogram\n");
diff --git a/tools/perf/tests/hists_output.c b/tools/perf/tests/hists_output.c
index 38f804ff6452..8973f35df604 100644
--- a/tools/perf/tests/hists_output.c
+++ b/tools/perf/tests/hists_output.c
@@ -155,7 +155,7 @@ static int test1(struct evsel *evsel, struct machine *machine)
goto out;
hists__collapse_resort(hists, NULL);
- perf_evsel__output_resort(evsel, NULL);
+ evsel__output_resort(evsel, NULL);
if (verbose > 2) {
pr_info("[fields = %s, sort = %s]\n", field_order, sort_order);
@@ -255,7 +255,7 @@ static int test2(struct evsel *evsel, struct machine *machine)
goto out;
hists__collapse_resort(hists, NULL);
- perf_evsel__output_resort(evsel, NULL);
+ evsel__output_resort(evsel, NULL);
if (verbose > 2) {
pr_info("[fields = %s, sort = %s]\n", field_order, sort_order);
@@ -309,7 +309,7 @@ static int test3(struct evsel *evsel, struct machine *machine)
goto out;
hists__collapse_resort(hists, NULL);
- perf_evsel__output_resort(evsel, NULL);
+ evsel__output_resort(evsel, NULL);
if (verbose > 2) {
pr_info("[fields = %s, sort = %s]\n", field_order, sort_order);
@@ -387,7 +387,7 @@ static int test4(struct evsel *evsel, struct machine *machine)
goto out;
hists__collapse_resort(hists, NULL);
- perf_evsel__output_resort(evsel, NULL);
+ evsel__output_resort(evsel, NULL);
if (verbose > 2) {
pr_info("[fields = %s, sort = %s]\n", field_order, sort_order);
@@ -490,7 +490,7 @@ static int test5(struct evsel *evsel, struct machine *machine)
goto out;
hists__collapse_resort(hists, NULL);
- perf_evsel__output_resort(evsel, NULL);
+ evsel__output_resort(evsel, NULL);
if (verbose > 2) {
pr_info("[fields = %s, sort = %s]\n", field_order, sort_order);
diff --git a/tools/perf/tests/make b/tools/perf/tests/make
index c850d1664c56..9b651dfe0a6b 100644
--- a/tools/perf/tests/make
+++ b/tools/perf/tests/make
@@ -28,9 +28,13 @@ endif
PARALLEL_OPT=
ifeq ($(SET_PARALLEL),1)
- cores := $(shell (getconf _NPROCESSORS_ONLN || egrep -c '^processor|^CPU[0-9]' /proc/cpuinfo) 2>/dev/null)
- ifeq ($(cores),0)
- cores := 1
+ ifeq ($(JOBS),)
+ cores := $(shell (getconf _NPROCESSORS_ONLN || egrep -c '^processor|^CPU[0-9]' /proc/cpuinfo) 2>/dev/null)
+ ifeq ($(cores),0)
+ cores := 1
+ endif
+ else
+ cores=$(JOBS)
endif
PARALLEL_OPT="-j$(cores)"
endif
@@ -80,10 +84,13 @@ make_no_libaudit := NO_LIBAUDIT=1
make_no_libbionic := NO_LIBBIONIC=1
make_no_auxtrace := NO_AUXTRACE=1
make_no_libbpf := NO_LIBBPF=1
+make_no_libbpf_DEBUG := NO_LIBBPF=1 DEBUG=1
make_no_libcrypto := NO_LIBCRYPTO=1
make_with_babeltrace:= LIBBABELTRACE=1
make_no_sdt := NO_SDT=1
+make_no_syscall_tbl := NO_SYSCALL_TABLE=1
make_with_clangllvm := LIBCLANGLLVM=1
+make_with_libpfm4 := LIBPFM4=1
make_tags := tags
make_cscope := cscope
make_help := help
@@ -108,7 +115,7 @@ make_minimal += NO_DEMANGLE=1 NO_LIBELF=1 NO_LIBUNWIND=1 NO_BACKTRACE=1
make_minimal += NO_LIBNUMA=1 NO_LIBAUDIT=1 NO_LIBBIONIC=1
make_minimal += NO_LIBDW_DWARF_UNWIND=1 NO_AUXTRACE=1 NO_LIBBPF=1
make_minimal += NO_LIBCRYPTO=1 NO_SDT=1 NO_JVMTI=1 NO_LIBZSTD=1
-make_minimal += NO_LIBCAP=1
+make_minimal += NO_LIBCAP=1 NO_SYSCALL_TABLE=1
# $(run) contains all available tests
run := make_pure
@@ -140,8 +147,13 @@ run += make_no_libaudit
run += make_no_libbionic
run += make_no_auxtrace
run += make_no_libbpf
+run += make_no_libbpf_DEBUG
+run += make_no_libcrypto
+run += make_no_sdt
+run += make_no_syscall_tbl
run += make_with_babeltrace
run += make_with_clangllvm
+run += make_with_libpfm4
run += make_help
run += make_doc
run += make_perf_o
diff --git a/tools/perf/tests/mmap-basic.c b/tools/perf/tests/mmap-basic.c
index 5f4c0dbb4715..7b0dbfc0e17d 100644
--- a/tools/perf/tests/mmap-basic.c
+++ b/tools/perf/tests/mmap-basic.c
@@ -79,14 +79,14 @@ int test__basic_mmap(struct test *test __maybe_unused, int subtest __maybe_unuse
char name[64];
snprintf(name, sizeof(name), "sys_enter_%s", syscall_names[i]);
- evsels[i] = perf_evsel__newtp("syscalls", name);
+ evsels[i] = evsel__newtp("syscalls", name);
if (IS_ERR(evsels[i])) {
- pr_debug("perf_evsel__new(%s)\n", name);
+ pr_debug("evsel__new(%s)\n", name);
goto out_delete_evlist;
}
evsels[i]->core.attr.wakeup_events = 1;
- perf_evsel__set_sample_id(evsels[i], false);
+ evsel__set_sample_id(evsels[i], false);
evlist__add(evlist, evsels[i]);
@@ -150,7 +150,7 @@ out_init:
if (nr_events[evsel->idx] != expected_nr_events[evsel->idx]) {
pr_debug("expected %d %s events, got %d\n",
expected_nr_events[evsel->idx],
- perf_evsel__name(evsel), nr_events[evsel->idx]);
+ evsel__name(evsel), nr_events[evsel->idx]);
err = -1;
goto out_delete_evlist;
}
diff --git a/tools/perf/tests/openat-syscall-all-cpus.c b/tools/perf/tests/openat-syscall-all-cpus.c
index 93c176523e38..71f85e2cc127 100644
--- a/tools/perf/tests/openat-syscall-all-cpus.c
+++ b/tools/perf/tests/openat-syscall-all-cpus.c
@@ -44,7 +44,7 @@ int test__openat_syscall_event_on_all_cpus(struct test *test __maybe_unused, int
CPU_ZERO(&cpu_set);
- evsel = perf_evsel__newtp("syscalls", "sys_enter_openat");
+ evsel = evsel__newtp("syscalls", "sys_enter_openat");
if (IS_ERR(evsel)) {
tracing_path__strerror_open_tp(errno, errbuf, sizeof(errbuf), "syscalls", "sys_enter_openat");
pr_debug("%s\n", errbuf);
@@ -90,8 +90,8 @@ int test__openat_syscall_event_on_all_cpus(struct test *test __maybe_unused, int
* we use the auto allocation it will allocate just for 1 cpu,
* as we start by cpu 0.
*/
- if (perf_evsel__alloc_counts(evsel, cpus->nr, 1) < 0) {
- pr_debug("perf_evsel__alloc_counts(ncpus=%d)\n", cpus->nr);
+ if (evsel__alloc_counts(evsel, cpus->nr, 1) < 0) {
+ pr_debug("evsel__alloc_counts(ncpus=%d)\n", cpus->nr);
goto out_close_fd;
}
@@ -103,21 +103,21 @@ int test__openat_syscall_event_on_all_cpus(struct test *test __maybe_unused, int
if (cpus->map[cpu] >= CPU_SETSIZE)
continue;
- if (perf_evsel__read_on_cpu(evsel, cpu, 0) < 0) {
- pr_debug("perf_evsel__read_on_cpu\n");
+ if (evsel__read_on_cpu(evsel, cpu, 0) < 0) {
+ pr_debug("evsel__read_on_cpu\n");
err = -1;
break;
}
expected = nr_openat_calls + cpu;
if (perf_counts(evsel->counts, cpu, 0)->val != expected) {
- pr_debug("perf_evsel__read_on_cpu: expected to intercept %d calls on cpu %d, got %" PRIu64 "\n",
+ pr_debug("evsel__read_on_cpu: expected to intercept %d calls on cpu %d, got %" PRIu64 "\n",
expected, cpus->map[cpu], perf_counts(evsel->counts, cpu, 0)->val);
err = -1;
}
}
- perf_evsel__free_counts(evsel);
+ evsel__free_counts(evsel);
out_close_fd:
perf_evsel__close_fd(&evsel->core);
out_evsel_delete:
diff --git a/tools/perf/tests/openat-syscall-tp-fields.c b/tools/perf/tests/openat-syscall-tp-fields.c
index c6b2d7aab608..1f5f5e79ae25 100644
--- a/tools/perf/tests/openat-syscall-tp-fields.c
+++ b/tools/perf/tests/openat-syscall-tp-fields.c
@@ -46,9 +46,9 @@ int test__syscall_openat_tp_fields(struct test *test __maybe_unused, int subtest
goto out;
}
- evsel = perf_evsel__newtp("syscalls", "sys_enter_openat");
+ evsel = evsel__newtp("syscalls", "sys_enter_openat");
if (IS_ERR(evsel)) {
- pr_debug("%s: perf_evsel__newtp\n", __func__);
+ pr_debug("%s: evsel__newtp\n", __func__);
goto out_delete_evlist;
}
@@ -60,7 +60,7 @@ int test__syscall_openat_tp_fields(struct test *test __maybe_unused, int subtest
goto out_delete_evlist;
}
- perf_evsel__config(evsel, &opts, NULL);
+ evsel__config(evsel, &opts, NULL);
perf_thread_map__set_pid(evlist->core.threads, 0, getpid());
@@ -108,13 +108,13 @@ int test__syscall_openat_tp_fields(struct test *test __maybe_unused, int subtest
continue;
}
- err = perf_evsel__parse_sample(evsel, event, &sample);
+ err = evsel__parse_sample(evsel, event, &sample);
if (err) {
pr_debug("Can't parse sample, err = %d\n", err);
goto out_delete_evlist;
}
- tp_flags = perf_evsel__intval(evsel, &sample, "flags");
+ tp_flags = evsel__intval(evsel, &sample, "flags");
if (flags != tp_flags) {
pr_debug("%s: Expected flags=%#x, got %#x\n",
diff --git a/tools/perf/tests/openat-syscall.c b/tools/perf/tests/openat-syscall.c
index 5ebffae18605..85a8f0fe7aea 100644
--- a/tools/perf/tests/openat-syscall.c
+++ b/tools/perf/tests/openat-syscall.c
@@ -27,14 +27,14 @@ int test__openat_syscall_event(struct test *test __maybe_unused, int subtest __m
return -1;
}
- evsel = perf_evsel__newtp("syscalls", "sys_enter_openat");
+ evsel = evsel__newtp("syscalls", "sys_enter_openat");
if (IS_ERR(evsel)) {
tracing_path__strerror_open_tp(errno, errbuf, sizeof(errbuf), "syscalls", "sys_enter_openat");
pr_debug("%s\n", errbuf);
goto out_thread_map_delete;
}
- if (perf_evsel__open_per_thread(evsel, threads) < 0) {
+ if (evsel__open_per_thread(evsel, threads) < 0) {
pr_debug("failed to open counter: %s, "
"tweak /proc/sys/kernel/perf_event_paranoid?\n",
str_error_r(errno, sbuf, sizeof(sbuf)));
@@ -46,13 +46,13 @@ int test__openat_syscall_event(struct test *test __maybe_unused, int subtest __m
close(fd);
}
- if (perf_evsel__read_on_cpu(evsel, 0, 0) < 0) {
- pr_debug("perf_evsel__read_on_cpu\n");
+ if (evsel__read_on_cpu(evsel, 0, 0) < 0) {
+ pr_debug("evsel__read_on_cpu\n");
goto out_close_fd;
}
if (perf_counts(evsel->counts, 0, 0)->val != nr_openat_calls) {
- pr_debug("perf_evsel__read_on_cpu: expected to intercept %d calls, got %" PRIu64 "\n",
+ pr_debug("evsel__read_on_cpu: expected to intercept %d calls, got %" PRIu64 "\n",
nr_openat_calls, perf_counts(evsel->counts, 0, 0)->val);
goto out_close_fd;
}
diff --git a/tools/perf/tests/parse-events.c b/tools/perf/tests/parse-events.c
index 091c3aeccc27..895188b63f96 100644
--- a/tools/perf/tests/parse-events.c
+++ b/tools/perf/tests/parse-events.c
@@ -371,7 +371,7 @@ static int test__checkevent_breakpoint_modifier(struct evlist *evlist)
TEST_ASSERT_VAL("wrong exclude_hv", evsel->core.attr.exclude_hv);
TEST_ASSERT_VAL("wrong precise_ip", !evsel->core.attr.precise_ip);
TEST_ASSERT_VAL("wrong name",
- !strcmp(perf_evsel__name(evsel), "mem:0:u"));
+ !strcmp(evsel__name(evsel), "mem:0:u"));
return test__checkevent_breakpoint(evlist);
}
@@ -385,7 +385,7 @@ static int test__checkevent_breakpoint_x_modifier(struct evlist *evlist)
TEST_ASSERT_VAL("wrong exclude_hv", evsel->core.attr.exclude_hv);
TEST_ASSERT_VAL("wrong precise_ip", !evsel->core.attr.precise_ip);
TEST_ASSERT_VAL("wrong name",
- !strcmp(perf_evsel__name(evsel), "mem:0:x:k"));
+ !strcmp(evsel__name(evsel), "mem:0:x:k"));
return test__checkevent_breakpoint_x(evlist);
}
@@ -399,7 +399,7 @@ static int test__checkevent_breakpoint_r_modifier(struct evlist *evlist)
TEST_ASSERT_VAL("wrong exclude_hv", !evsel->core.attr.exclude_hv);
TEST_ASSERT_VAL("wrong precise_ip", evsel->core.attr.precise_ip);
TEST_ASSERT_VAL("wrong name",
- !strcmp(perf_evsel__name(evsel), "mem:0:r:hp"));
+ !strcmp(evsel__name(evsel), "mem:0:r:hp"));
return test__checkevent_breakpoint_r(evlist);
}
@@ -413,7 +413,7 @@ static int test__checkevent_breakpoint_w_modifier(struct evlist *evlist)
TEST_ASSERT_VAL("wrong exclude_hv", evsel->core.attr.exclude_hv);
TEST_ASSERT_VAL("wrong precise_ip", evsel->core.attr.precise_ip);
TEST_ASSERT_VAL("wrong name",
- !strcmp(perf_evsel__name(evsel), "mem:0:w:up"));
+ !strcmp(evsel__name(evsel), "mem:0:w:up"));
return test__checkevent_breakpoint_w(evlist);
}
@@ -427,7 +427,7 @@ static int test__checkevent_breakpoint_rw_modifier(struct evlist *evlist)
TEST_ASSERT_VAL("wrong exclude_hv", evsel->core.attr.exclude_hv);
TEST_ASSERT_VAL("wrong precise_ip", evsel->core.attr.precise_ip);
TEST_ASSERT_VAL("wrong name",
- !strcmp(perf_evsel__name(evsel), "mem:0:rw:kp"));
+ !strcmp(evsel__name(evsel), "mem:0:rw:kp"));
return test__checkevent_breakpoint_rw(evlist);
}
@@ -468,7 +468,7 @@ static int test__checkevent_list(struct evlist *evlist)
TEST_ASSERT_VAL("wrong precise_ip", !evsel->core.attr.precise_ip);
/* syscalls:sys_enter_openat:k */
- evsel = perf_evsel__next(evsel);
+ evsel = evsel__next(evsel);
TEST_ASSERT_VAL("wrong type", PERF_TYPE_TRACEPOINT == evsel->core.attr.type);
TEST_ASSERT_VAL("wrong sample_type",
PERF_TP_SAMPLE_TYPE == evsel->core.attr.sample_type);
@@ -479,7 +479,7 @@ static int test__checkevent_list(struct evlist *evlist)
TEST_ASSERT_VAL("wrong precise_ip", !evsel->core.attr.precise_ip);
/* 1:1:hp */
- evsel = perf_evsel__next(evsel);
+ evsel = evsel__next(evsel);
TEST_ASSERT_VAL("wrong type", 1 == evsel->core.attr.type);
TEST_ASSERT_VAL("wrong config", 1 == evsel->core.attr.config);
TEST_ASSERT_VAL("wrong exclude_user", evsel->core.attr.exclude_user);
@@ -498,15 +498,15 @@ static int test__checkevent_pmu_name(struct evlist *evlist)
TEST_ASSERT_VAL("wrong number of entries", 2 == evlist->core.nr_entries);
TEST_ASSERT_VAL("wrong type", PERF_TYPE_RAW == evsel->core.attr.type);
TEST_ASSERT_VAL("wrong config", 1 == evsel->core.attr.config);
- TEST_ASSERT_VAL("wrong name", !strcmp(perf_evsel__name(evsel), "krava"));
+ TEST_ASSERT_VAL("wrong name", !strcmp(evsel__name(evsel), "krava"));
/* cpu/config=2/u" */
- evsel = perf_evsel__next(evsel);
+ evsel = evsel__next(evsel);
TEST_ASSERT_VAL("wrong number of entries", 2 == evlist->core.nr_entries);
TEST_ASSERT_VAL("wrong type", PERF_TYPE_RAW == evsel->core.attr.type);
TEST_ASSERT_VAL("wrong config", 2 == evsel->core.attr.config);
TEST_ASSERT_VAL("wrong name",
- !strcmp(perf_evsel__name(evsel), "cpu/config=2/u"));
+ !strcmp(evsel__name(evsel), "cpu/config=2/u"));
return 0;
}
@@ -529,7 +529,7 @@ static int test__checkevent_pmu_partial_time_callgraph(struct evlist *evlist)
TEST_ASSERT_VAL("wrong time", !(PERF_SAMPLE_TIME & evsel->core.attr.sample_type));
/* cpu/config=2,call-graph=no,time=0,period=2000/ */
- evsel = perf_evsel__next(evsel);
+ evsel = evsel__next(evsel);
TEST_ASSERT_VAL("wrong type", PERF_TYPE_RAW == evsel->core.attr.type);
TEST_ASSERT_VAL("wrong config", 2 == evsel->core.attr.config);
/*
@@ -577,7 +577,7 @@ static int test__checkevent_pmu_events_mix(struct evlist *evlist)
TEST_ASSERT_VAL("wrong pinned", !evsel->core.attr.pinned);
/* cpu/pmu-event/u*/
- evsel = perf_evsel__next(evsel);
+ evsel = evsel__next(evsel);
TEST_ASSERT_VAL("wrong number of entries", 2 == evlist->core.nr_entries);
TEST_ASSERT_VAL("wrong type", PERF_TYPE_RAW == evsel->core.attr.type);
TEST_ASSERT_VAL("wrong exclude_user",
@@ -652,13 +652,13 @@ static int test__group1(struct evlist *evlist)
TEST_ASSERT_VAL("wrong exclude guest", !evsel->core.attr.exclude_guest);
TEST_ASSERT_VAL("wrong exclude host", !evsel->core.attr.exclude_host);
TEST_ASSERT_VAL("wrong precise_ip", !evsel->core.attr.precise_ip);
- TEST_ASSERT_VAL("wrong leader", perf_evsel__is_group_leader(evsel));
+ TEST_ASSERT_VAL("wrong leader", evsel__is_group_leader(evsel));
TEST_ASSERT_VAL("wrong core.nr_members", evsel->core.nr_members == 2);
- TEST_ASSERT_VAL("wrong group_idx", perf_evsel__group_idx(evsel) == 0);
+ TEST_ASSERT_VAL("wrong group_idx", evsel__group_idx(evsel) == 0);
TEST_ASSERT_VAL("wrong sample_read", !evsel->sample_read);
/* cycles:upp */
- evsel = perf_evsel__next(evsel);
+ evsel = evsel__next(evsel);
TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type);
TEST_ASSERT_VAL("wrong config",
PERF_COUNT_HW_CPU_CYCLES == evsel->core.attr.config);
@@ -670,7 +670,7 @@ static int test__group1(struct evlist *evlist)
TEST_ASSERT_VAL("wrong exclude host", !evsel->core.attr.exclude_host);
TEST_ASSERT_VAL("wrong precise_ip", evsel->core.attr.precise_ip == 2);
TEST_ASSERT_VAL("wrong leader", evsel->leader == leader);
- TEST_ASSERT_VAL("wrong group_idx", perf_evsel__group_idx(evsel) == 1);
+ TEST_ASSERT_VAL("wrong group_idx", evsel__group_idx(evsel) == 1);
TEST_ASSERT_VAL("wrong sample_read", !evsel->sample_read);
return 0;
@@ -694,13 +694,13 @@ static int test__group2(struct evlist *evlist)
TEST_ASSERT_VAL("wrong exclude guest", !evsel->core.attr.exclude_guest);
TEST_ASSERT_VAL("wrong exclude host", !evsel->core.attr.exclude_host);
TEST_ASSERT_VAL("wrong precise_ip", !evsel->core.attr.precise_ip);
- TEST_ASSERT_VAL("wrong leader", perf_evsel__is_group_leader(evsel));
+ TEST_ASSERT_VAL("wrong leader", evsel__is_group_leader(evsel));
TEST_ASSERT_VAL("wrong core.nr_members", evsel->core.nr_members == 2);
- TEST_ASSERT_VAL("wrong group_idx", perf_evsel__group_idx(evsel) == 0);
+ TEST_ASSERT_VAL("wrong group_idx", evsel__group_idx(evsel) == 0);
TEST_ASSERT_VAL("wrong sample_read", !evsel->sample_read);
/* cache-references + :u modifier */
- evsel = perf_evsel__next(evsel);
+ evsel = evsel__next(evsel);
TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type);
TEST_ASSERT_VAL("wrong config",
PERF_COUNT_HW_CACHE_REFERENCES == evsel->core.attr.config);
@@ -711,11 +711,11 @@ static int test__group2(struct evlist *evlist)
TEST_ASSERT_VAL("wrong exclude host", !evsel->core.attr.exclude_host);
TEST_ASSERT_VAL("wrong precise_ip", !evsel->core.attr.precise_ip);
TEST_ASSERT_VAL("wrong leader", evsel->leader == leader);
- TEST_ASSERT_VAL("wrong group_idx", perf_evsel__group_idx(evsel) == 1);
+ TEST_ASSERT_VAL("wrong group_idx", evsel__group_idx(evsel) == 1);
TEST_ASSERT_VAL("wrong sample_read", !evsel->sample_read);
/* cycles:k */
- evsel = perf_evsel__next(evsel);
+ evsel = evsel__next(evsel);
TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type);
TEST_ASSERT_VAL("wrong config",
PERF_COUNT_HW_CPU_CYCLES == evsel->core.attr.config);
@@ -725,7 +725,7 @@ static int test__group2(struct evlist *evlist)
TEST_ASSERT_VAL("wrong exclude guest", !evsel->core.attr.exclude_guest);
TEST_ASSERT_VAL("wrong exclude host", !evsel->core.attr.exclude_host);
TEST_ASSERT_VAL("wrong precise_ip", !evsel->core.attr.precise_ip);
- TEST_ASSERT_VAL("wrong leader", perf_evsel__is_group_leader(evsel));
+ TEST_ASSERT_VAL("wrong leader", evsel__is_group_leader(evsel));
TEST_ASSERT_VAL("wrong sample_read", !evsel->sample_read);
return 0;
@@ -750,15 +750,15 @@ static int test__group3(struct evlist *evlist __maybe_unused)
TEST_ASSERT_VAL("wrong exclude guest", evsel->core.attr.exclude_guest);
TEST_ASSERT_VAL("wrong exclude host", !evsel->core.attr.exclude_host);
TEST_ASSERT_VAL("wrong precise_ip", !evsel->core.attr.precise_ip);
- TEST_ASSERT_VAL("wrong leader", perf_evsel__is_group_leader(evsel));
+ TEST_ASSERT_VAL("wrong leader", evsel__is_group_leader(evsel));
TEST_ASSERT_VAL("wrong group name",
!strcmp(leader->group_name, "group1"));
TEST_ASSERT_VAL("wrong core.nr_members", evsel->core.nr_members == 2);
- TEST_ASSERT_VAL("wrong group_idx", perf_evsel__group_idx(evsel) == 0);
+ TEST_ASSERT_VAL("wrong group_idx", evsel__group_idx(evsel) == 0);
TEST_ASSERT_VAL("wrong sample_read", !evsel->sample_read);
/* group1 cycles:kppp */
- evsel = perf_evsel__next(evsel);
+ evsel = evsel__next(evsel);
TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type);
TEST_ASSERT_VAL("wrong config",
PERF_COUNT_HW_CPU_CYCLES == evsel->core.attr.config);
@@ -771,11 +771,11 @@ static int test__group3(struct evlist *evlist __maybe_unused)
TEST_ASSERT_VAL("wrong precise_ip", evsel->core.attr.precise_ip == 3);
TEST_ASSERT_VAL("wrong leader", evsel->leader == leader);
TEST_ASSERT_VAL("wrong group name", !evsel->group_name);
- TEST_ASSERT_VAL("wrong group_idx", perf_evsel__group_idx(evsel) == 1);
+ TEST_ASSERT_VAL("wrong group_idx", evsel__group_idx(evsel) == 1);
TEST_ASSERT_VAL("wrong sample_read", !evsel->sample_read);
/* group2 cycles + G modifier */
- evsel = leader = perf_evsel__next(evsel);
+ evsel = leader = evsel__next(evsel);
TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type);
TEST_ASSERT_VAL("wrong config",
PERF_COUNT_HW_CPU_CYCLES == evsel->core.attr.config);
@@ -785,15 +785,15 @@ static int test__group3(struct evlist *evlist __maybe_unused)
TEST_ASSERT_VAL("wrong exclude guest", !evsel->core.attr.exclude_guest);
TEST_ASSERT_VAL("wrong exclude host", evsel->core.attr.exclude_host);
TEST_ASSERT_VAL("wrong precise_ip", !evsel->core.attr.precise_ip);
- TEST_ASSERT_VAL("wrong leader", perf_evsel__is_group_leader(evsel));
+ TEST_ASSERT_VAL("wrong leader", evsel__is_group_leader(evsel));
TEST_ASSERT_VAL("wrong group name",
!strcmp(leader->group_name, "group2"));
TEST_ASSERT_VAL("wrong core.nr_members", evsel->core.nr_members == 2);
- TEST_ASSERT_VAL("wrong group_idx", perf_evsel__group_idx(evsel) == 0);
+ TEST_ASSERT_VAL("wrong group_idx", evsel__group_idx(evsel) == 0);
TEST_ASSERT_VAL("wrong sample_read", !evsel->sample_read);
/* group2 1:3 + G modifier */
- evsel = perf_evsel__next(evsel);
+ evsel = evsel__next(evsel);
TEST_ASSERT_VAL("wrong type", 1 == evsel->core.attr.type);
TEST_ASSERT_VAL("wrong config", 3 == evsel->core.attr.config);
TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user);
@@ -803,11 +803,11 @@ static int test__group3(struct evlist *evlist __maybe_unused)
TEST_ASSERT_VAL("wrong exclude host", evsel->core.attr.exclude_host);
TEST_ASSERT_VAL("wrong precise_ip", !evsel->core.attr.precise_ip);
TEST_ASSERT_VAL("wrong leader", evsel->leader == leader);
- TEST_ASSERT_VAL("wrong group_idx", perf_evsel__group_idx(evsel) == 1);
+ TEST_ASSERT_VAL("wrong group_idx", evsel__group_idx(evsel) == 1);
TEST_ASSERT_VAL("wrong sample_read", !evsel->sample_read);
/* instructions:u */
- evsel = perf_evsel__next(evsel);
+ evsel = evsel__next(evsel);
TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type);
TEST_ASSERT_VAL("wrong config",
PERF_COUNT_HW_INSTRUCTIONS == evsel->core.attr.config);
@@ -817,7 +817,7 @@ static int test__group3(struct evlist *evlist __maybe_unused)
TEST_ASSERT_VAL("wrong exclude guest", !evsel->core.attr.exclude_guest);
TEST_ASSERT_VAL("wrong exclude host", !evsel->core.attr.exclude_host);
TEST_ASSERT_VAL("wrong precise_ip", !evsel->core.attr.precise_ip);
- TEST_ASSERT_VAL("wrong leader", perf_evsel__is_group_leader(evsel));
+ TEST_ASSERT_VAL("wrong leader", evsel__is_group_leader(evsel));
TEST_ASSERT_VAL("wrong sample_read", !evsel->sample_read);
return 0;
@@ -843,13 +843,13 @@ static int test__group4(struct evlist *evlist __maybe_unused)
TEST_ASSERT_VAL("wrong exclude host", !evsel->core.attr.exclude_host);
TEST_ASSERT_VAL("wrong precise_ip", evsel->core.attr.precise_ip == 1);
TEST_ASSERT_VAL("wrong group name", !evsel->group_name);
- TEST_ASSERT_VAL("wrong leader", perf_evsel__is_group_leader(evsel));
+ TEST_ASSERT_VAL("wrong leader", evsel__is_group_leader(evsel));
TEST_ASSERT_VAL("wrong core.nr_members", evsel->core.nr_members == 2);
- TEST_ASSERT_VAL("wrong group_idx", perf_evsel__group_idx(evsel) == 0);
+ TEST_ASSERT_VAL("wrong group_idx", evsel__group_idx(evsel) == 0);
TEST_ASSERT_VAL("wrong sample_read", !evsel->sample_read);
/* instructions:kp + p */
- evsel = perf_evsel__next(evsel);
+ evsel = evsel__next(evsel);
TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type);
TEST_ASSERT_VAL("wrong config",
PERF_COUNT_HW_INSTRUCTIONS == evsel->core.attr.config);
@@ -861,7 +861,7 @@ static int test__group4(struct evlist *evlist __maybe_unused)
TEST_ASSERT_VAL("wrong exclude host", !evsel->core.attr.exclude_host);
TEST_ASSERT_VAL("wrong precise_ip", evsel->core.attr.precise_ip == 2);
TEST_ASSERT_VAL("wrong leader", evsel->leader == leader);
- TEST_ASSERT_VAL("wrong group_idx", perf_evsel__group_idx(evsel) == 1);
+ TEST_ASSERT_VAL("wrong group_idx", evsel__group_idx(evsel) == 1);
TEST_ASSERT_VAL("wrong sample_read", !evsel->sample_read);
return 0;
@@ -886,13 +886,13 @@ static int test__group5(struct evlist *evlist __maybe_unused)
TEST_ASSERT_VAL("wrong exclude host", evsel->core.attr.exclude_host);
TEST_ASSERT_VAL("wrong precise_ip", !evsel->core.attr.precise_ip);
TEST_ASSERT_VAL("wrong group name", !evsel->group_name);
- TEST_ASSERT_VAL("wrong leader", perf_evsel__is_group_leader(evsel));
+ TEST_ASSERT_VAL("wrong leader", evsel__is_group_leader(evsel));
TEST_ASSERT_VAL("wrong core.nr_members", evsel->core.nr_members == 2);
- TEST_ASSERT_VAL("wrong group_idx", perf_evsel__group_idx(evsel) == 0);
+ TEST_ASSERT_VAL("wrong group_idx", evsel__group_idx(evsel) == 0);
TEST_ASSERT_VAL("wrong sample_read", !evsel->sample_read);
/* instructions + G */
- evsel = perf_evsel__next(evsel);
+ evsel = evsel__next(evsel);
TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type);
TEST_ASSERT_VAL("wrong config",
PERF_COUNT_HW_INSTRUCTIONS == evsel->core.attr.config);
@@ -903,11 +903,11 @@ static int test__group5(struct evlist *evlist __maybe_unused)
TEST_ASSERT_VAL("wrong exclude host", evsel->core.attr.exclude_host);
TEST_ASSERT_VAL("wrong precise_ip", !evsel->core.attr.precise_ip);
TEST_ASSERT_VAL("wrong leader", evsel->leader == leader);
- TEST_ASSERT_VAL("wrong group_idx", perf_evsel__group_idx(evsel) == 1);
+ TEST_ASSERT_VAL("wrong group_idx", evsel__group_idx(evsel) == 1);
TEST_ASSERT_VAL("wrong sample_read", !evsel->sample_read);
/* cycles:G */
- evsel = leader = perf_evsel__next(evsel);
+ evsel = leader = evsel__next(evsel);
TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type);
TEST_ASSERT_VAL("wrong config",
PERF_COUNT_HW_CPU_CYCLES == evsel->core.attr.config);
@@ -918,13 +918,13 @@ static int test__group5(struct evlist *evlist __maybe_unused)
TEST_ASSERT_VAL("wrong exclude host", evsel->core.attr.exclude_host);
TEST_ASSERT_VAL("wrong precise_ip", !evsel->core.attr.precise_ip);
TEST_ASSERT_VAL("wrong group name", !evsel->group_name);
- TEST_ASSERT_VAL("wrong leader", perf_evsel__is_group_leader(evsel));
+ TEST_ASSERT_VAL("wrong leader", evsel__is_group_leader(evsel));
TEST_ASSERT_VAL("wrong core.nr_members", evsel->core.nr_members == 2);
- TEST_ASSERT_VAL("wrong group_idx", perf_evsel__group_idx(evsel) == 0);
+ TEST_ASSERT_VAL("wrong group_idx", evsel__group_idx(evsel) == 0);
TEST_ASSERT_VAL("wrong sample_read", !evsel->sample_read);
/* instructions:G */
- evsel = perf_evsel__next(evsel);
+ evsel = evsel__next(evsel);
TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type);
TEST_ASSERT_VAL("wrong config",
PERF_COUNT_HW_INSTRUCTIONS == evsel->core.attr.config);
@@ -935,10 +935,10 @@ static int test__group5(struct evlist *evlist __maybe_unused)
TEST_ASSERT_VAL("wrong exclude host", evsel->core.attr.exclude_host);
TEST_ASSERT_VAL("wrong precise_ip", !evsel->core.attr.precise_ip);
TEST_ASSERT_VAL("wrong leader", evsel->leader == leader);
- TEST_ASSERT_VAL("wrong group_idx", perf_evsel__group_idx(evsel) == 1);
+ TEST_ASSERT_VAL("wrong group_idx", evsel__group_idx(evsel) == 1);
/* cycles */
- evsel = perf_evsel__next(evsel);
+ evsel = evsel__next(evsel);
TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type);
TEST_ASSERT_VAL("wrong config",
PERF_COUNT_HW_CPU_CYCLES == evsel->core.attr.config);
@@ -948,7 +948,7 @@ static int test__group5(struct evlist *evlist __maybe_unused)
TEST_ASSERT_VAL("wrong exclude guest", evsel->core.attr.exclude_guest);
TEST_ASSERT_VAL("wrong exclude host", !evsel->core.attr.exclude_host);
TEST_ASSERT_VAL("wrong precise_ip", !evsel->core.attr.precise_ip);
- TEST_ASSERT_VAL("wrong leader", perf_evsel__is_group_leader(evsel));
+ TEST_ASSERT_VAL("wrong leader", evsel__is_group_leader(evsel));
return 0;
}
@@ -972,12 +972,12 @@ static int test__group_gh1(struct evlist *evlist)
TEST_ASSERT_VAL("wrong exclude host", !evsel->core.attr.exclude_host);
TEST_ASSERT_VAL("wrong precise_ip", !evsel->core.attr.precise_ip);
TEST_ASSERT_VAL("wrong group name", !evsel->group_name);
- TEST_ASSERT_VAL("wrong leader", perf_evsel__is_group_leader(evsel));
+ TEST_ASSERT_VAL("wrong leader", evsel__is_group_leader(evsel));
TEST_ASSERT_VAL("wrong core.nr_members", evsel->core.nr_members == 2);
- TEST_ASSERT_VAL("wrong group_idx", perf_evsel__group_idx(evsel) == 0);
+ TEST_ASSERT_VAL("wrong group_idx", evsel__group_idx(evsel) == 0);
/* cache-misses:G + :H group modifier */
- evsel = perf_evsel__next(evsel);
+ evsel = evsel__next(evsel);
TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type);
TEST_ASSERT_VAL("wrong config",
PERF_COUNT_HW_CACHE_MISSES == evsel->core.attr.config);
@@ -988,7 +988,7 @@ static int test__group_gh1(struct evlist *evlist)
TEST_ASSERT_VAL("wrong exclude host", !evsel->core.attr.exclude_host);
TEST_ASSERT_VAL("wrong precise_ip", !evsel->core.attr.precise_ip);
TEST_ASSERT_VAL("wrong leader", evsel->leader == leader);
- TEST_ASSERT_VAL("wrong group_idx", perf_evsel__group_idx(evsel) == 1);
+ TEST_ASSERT_VAL("wrong group_idx", evsel__group_idx(evsel) == 1);
return 0;
}
@@ -1012,12 +1012,12 @@ static int test__group_gh2(struct evlist *evlist)
TEST_ASSERT_VAL("wrong exclude host", evsel->core.attr.exclude_host);
TEST_ASSERT_VAL("wrong precise_ip", !evsel->core.attr.precise_ip);
TEST_ASSERT_VAL("wrong group name", !evsel->group_name);
- TEST_ASSERT_VAL("wrong leader", perf_evsel__is_group_leader(evsel));
+ TEST_ASSERT_VAL("wrong leader", evsel__is_group_leader(evsel));
TEST_ASSERT_VAL("wrong core.nr_members", evsel->core.nr_members == 2);
- TEST_ASSERT_VAL("wrong group_idx", perf_evsel__group_idx(evsel) == 0);
+ TEST_ASSERT_VAL("wrong group_idx", evsel__group_idx(evsel) == 0);
/* cache-misses:H + :G group modifier */
- evsel = perf_evsel__next(evsel);
+ evsel = evsel__next(evsel);
TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type);
TEST_ASSERT_VAL("wrong config",
PERF_COUNT_HW_CACHE_MISSES == evsel->core.attr.config);
@@ -1028,7 +1028,7 @@ static int test__group_gh2(struct evlist *evlist)
TEST_ASSERT_VAL("wrong exclude host", !evsel->core.attr.exclude_host);
TEST_ASSERT_VAL("wrong precise_ip", !evsel->core.attr.precise_ip);
TEST_ASSERT_VAL("wrong leader", evsel->leader == leader);
- TEST_ASSERT_VAL("wrong group_idx", perf_evsel__group_idx(evsel) == 1);
+ TEST_ASSERT_VAL("wrong group_idx", evsel__group_idx(evsel) == 1);
return 0;
}
@@ -1052,12 +1052,12 @@ static int test__group_gh3(struct evlist *evlist)
TEST_ASSERT_VAL("wrong exclude host", evsel->core.attr.exclude_host);
TEST_ASSERT_VAL("wrong precise_ip", !evsel->core.attr.precise_ip);
TEST_ASSERT_VAL("wrong group name", !evsel->group_name);
- TEST_ASSERT_VAL("wrong leader", perf_evsel__is_group_leader(evsel));
+ TEST_ASSERT_VAL("wrong leader", evsel__is_group_leader(evsel));
TEST_ASSERT_VAL("wrong core.nr_members", evsel->core.nr_members == 2);
- TEST_ASSERT_VAL("wrong group_idx", perf_evsel__group_idx(evsel) == 0);
+ TEST_ASSERT_VAL("wrong group_idx", evsel__group_idx(evsel) == 0);
/* cache-misses:H + :u group modifier */
- evsel = perf_evsel__next(evsel);
+ evsel = evsel__next(evsel);
TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type);
TEST_ASSERT_VAL("wrong config",
PERF_COUNT_HW_CACHE_MISSES == evsel->core.attr.config);
@@ -1068,7 +1068,7 @@ static int test__group_gh3(struct evlist *evlist)
TEST_ASSERT_VAL("wrong exclude host", !evsel->core.attr.exclude_host);
TEST_ASSERT_VAL("wrong precise_ip", !evsel->core.attr.precise_ip);
TEST_ASSERT_VAL("wrong leader", evsel->leader == leader);
- TEST_ASSERT_VAL("wrong group_idx", perf_evsel__group_idx(evsel) == 1);
+ TEST_ASSERT_VAL("wrong group_idx", evsel__group_idx(evsel) == 1);
return 0;
}
@@ -1092,12 +1092,12 @@ static int test__group_gh4(struct evlist *evlist)
TEST_ASSERT_VAL("wrong exclude host", evsel->core.attr.exclude_host);
TEST_ASSERT_VAL("wrong precise_ip", !evsel->core.attr.precise_ip);
TEST_ASSERT_VAL("wrong group name", !evsel->group_name);
- TEST_ASSERT_VAL("wrong leader", perf_evsel__is_group_leader(evsel));
+ TEST_ASSERT_VAL("wrong leader", evsel__is_group_leader(evsel));
TEST_ASSERT_VAL("wrong core.nr_members", evsel->core.nr_members == 2);
- TEST_ASSERT_VAL("wrong group_idx", perf_evsel__group_idx(evsel) == 0);
+ TEST_ASSERT_VAL("wrong group_idx", evsel__group_idx(evsel) == 0);
/* cache-misses:H + :uG group modifier */
- evsel = perf_evsel__next(evsel);
+ evsel = evsel__next(evsel);
TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type);
TEST_ASSERT_VAL("wrong config",
PERF_COUNT_HW_CACHE_MISSES == evsel->core.attr.config);
@@ -1108,7 +1108,7 @@ static int test__group_gh4(struct evlist *evlist)
TEST_ASSERT_VAL("wrong exclude host", !evsel->core.attr.exclude_host);
TEST_ASSERT_VAL("wrong precise_ip", !evsel->core.attr.precise_ip);
TEST_ASSERT_VAL("wrong leader", evsel->leader == leader);
- TEST_ASSERT_VAL("wrong group_idx", perf_evsel__group_idx(evsel) == 1);
+ TEST_ASSERT_VAL("wrong group_idx", evsel__group_idx(evsel) == 1);
return 0;
}
@@ -1135,7 +1135,7 @@ static int test__leader_sample1(struct evlist *evlist)
TEST_ASSERT_VAL("wrong sample_read", evsel->sample_read);
/* cache-misses - not sampling */
- evsel = perf_evsel__next(evsel);
+ evsel = evsel__next(evsel);
TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type);
TEST_ASSERT_VAL("wrong config",
PERF_COUNT_HW_CACHE_MISSES == evsel->core.attr.config);
@@ -1149,7 +1149,7 @@ static int test__leader_sample1(struct evlist *evlist)
TEST_ASSERT_VAL("wrong sample_read", evsel->sample_read);
/* branch-misses - not sampling */
- evsel = perf_evsel__next(evsel);
+ evsel = evsel__next(evsel);
TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type);
TEST_ASSERT_VAL("wrong config",
PERF_COUNT_HW_BRANCH_MISSES == evsel->core.attr.config);
@@ -1188,7 +1188,7 @@ static int test__leader_sample2(struct evlist *evlist __maybe_unused)
TEST_ASSERT_VAL("wrong sample_read", evsel->sample_read);
/* branch-misses - not sampling */
- evsel = perf_evsel__next(evsel);
+ evsel = evsel__next(evsel);
TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type);
TEST_ASSERT_VAL("wrong config",
PERF_COUNT_HW_BRANCH_MISSES == evsel->core.attr.config);
@@ -1234,14 +1234,14 @@ static int test__pinned_group(struct evlist *evlist)
TEST_ASSERT_VAL("wrong pinned", evsel->core.attr.pinned);
/* cache-misses - can not be pinned, but will go on with the leader */
- evsel = perf_evsel__next(evsel);
+ evsel = evsel__next(evsel);
TEST_ASSERT_VAL("wrong type", PERF_TYPE_HARDWARE == evsel->core.attr.type);
TEST_ASSERT_VAL("wrong config",
PERF_COUNT_HW_CACHE_MISSES == evsel->core.attr.config);
TEST_ASSERT_VAL("wrong pinned", !evsel->core.attr.pinned);
/* branch-misses - ditto */
- evsel = perf_evsel__next(evsel);
+ evsel = evsel__next(evsel);
TEST_ASSERT_VAL("wrong config",
PERF_COUNT_HW_BRANCH_MISSES == evsel->core.attr.config);
TEST_ASSERT_VAL("wrong pinned", !evsel->core.attr.pinned);
@@ -1356,6 +1356,16 @@ static int test__checkevent_complex_name(struct evlist *evlist)
return 0;
}
+static int test__checkevent_raw_pmu(struct evlist *evlist)
+{
+ struct evsel *evsel = evlist__first(evlist);
+
+ TEST_ASSERT_VAL("wrong number of entries", 1 == evlist->core.nr_entries);
+ TEST_ASSERT_VAL("wrong type", PERF_TYPE_SOFTWARE == evsel->core.attr.type);
+ TEST_ASSERT_VAL("wrong config", 0x1a == evsel->core.attr.config);
+ return 0;
+}
+
static int test__sym_event_slash(struct evlist *evlist)
{
struct evsel *evsel = evlist__first(evlist);
@@ -1750,7 +1760,12 @@ static struct evlist_test test__events_pmu[] = {
.name = "cpu/name='COMPLEX_CYCLES_NAME:orig=cycles,desc=chip-clock-ticks',period=0x1,event=0x2/ukp",
.check = test__checkevent_complex_name,
.id = 3,
- }
+ },
+ {
+ .name = "software/r1a/",
+ .check = test__checkevent_raw_pmu,
+ .id = 4,
+ },
};
struct terms_test {
diff --git a/tools/perf/tests/perf-record.c b/tools/perf/tests/perf-record.c
index 2195fc205e72..83adfd846ccd 100644
--- a/tools/perf/tests/perf-record.c
+++ b/tools/perf/tests/perf-record.c
@@ -106,9 +106,9 @@ int test__PERF_RECORD(struct test *test __maybe_unused, int subtest __maybe_unus
* Config the evsels, setting attr->comm on the first one, etc.
*/
evsel = evlist__first(evlist);
- perf_evsel__set_sample_bit(evsel, CPU);
- perf_evsel__set_sample_bit(evsel, TID);
- perf_evsel__set_sample_bit(evsel, TIME);
+ evsel__set_sample_bit(evsel, CPU);
+ evsel__set_sample_bit(evsel, TID);
+ evsel__set_sample_bit(evsel, TIME);
perf_evlist__config(evlist, &opts, NULL);
err = sched__get_first_possible_cpu(evlist->workload.pid, &cpu_mask);
diff --git a/tools/perf/tests/pfm.c b/tools/perf/tests/pfm.c
new file mode 100644
index 000000000000..76a53126efdf
--- /dev/null
+++ b/tools/perf/tests/pfm.c
@@ -0,0 +1,203 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Test support for libpfm4 event encodings.
+ *
+ * Copyright 2020 Google LLC.
+ */
+#include "tests.h"
+#include "util/debug.h"
+#include "util/evlist.h"
+#include "util/pfm.h"
+
+#include <linux/kernel.h>
+
+#ifdef HAVE_LIBPFM
+static int test__pfm_events(void);
+static int test__pfm_group(void);
+#endif
+
+static const struct {
+ int (*func)(void);
+ const char *desc;
+} pfm_testcase_table[] = {
+#ifdef HAVE_LIBPFM
+ {
+ .func = test__pfm_events,
+ .desc = "test of individual --pfm-events",
+ },
+ {
+ .func = test__pfm_group,
+ .desc = "test groups of --pfm-events",
+ },
+#endif
+};
+
+#ifdef HAVE_LIBPFM
+static int count_pfm_events(struct perf_evlist *evlist)
+{
+ struct perf_evsel *evsel;
+ int count = 0;
+
+ perf_evlist__for_each_entry(evlist, evsel) {
+ count++;
+ }
+ return count;
+}
+
+static int test__pfm_events(void)
+{
+ struct evlist *evlist;
+ struct option opt;
+ size_t i;
+ const struct {
+ const char *events;
+ int nr_events;
+ } table[] = {
+ {
+ .events = "",
+ .nr_events = 0,
+ },
+ {
+ .events = "instructions",
+ .nr_events = 1,
+ },
+ {
+ .events = "instructions,cycles",
+ .nr_events = 2,
+ },
+ {
+ .events = "stereolab",
+ .nr_events = 0,
+ },
+ {
+ .events = "instructions,instructions",
+ .nr_events = 2,
+ },
+ {
+ .events = "stereolab,instructions",
+ .nr_events = 0,
+ },
+ {
+ .events = "instructions,stereolab",
+ .nr_events = 1,
+ },
+ };
+
+ for (i = 0; i < ARRAY_SIZE(table); i++) {
+ evlist = evlist__new();
+ if (evlist == NULL)
+ return -ENOMEM;
+
+ opt.value = evlist;
+ parse_libpfm_events_option(&opt,
+ table[i].events,
+ 0);
+ TEST_ASSERT_EQUAL(table[i].events,
+ count_pfm_events(&evlist->core),
+ table[i].nr_events);
+ TEST_ASSERT_EQUAL(table[i].events,
+ evlist->nr_groups,
+ 0);
+
+ evlist__delete(evlist);
+ }
+ return 0;
+}
+
+static int test__pfm_group(void)
+{
+ struct evlist *evlist;
+ struct option opt;
+ size_t i;
+ const struct {
+ const char *events;
+ int nr_events;
+ int nr_groups;
+ } table[] = {
+ {
+ .events = "{},",
+ .nr_events = 0,
+ .nr_groups = 0,
+ },
+ {
+ .events = "{instructions}",
+ .nr_events = 1,
+ .nr_groups = 1,
+ },
+ {
+ .events = "{instructions},{}",
+ .nr_events = 1,
+ .nr_groups = 1,
+ },
+ {
+ .events = "{},{instructions}",
+ .nr_events = 0,
+ .nr_groups = 0,
+ },
+ {
+ .events = "{instructions},{instructions}",
+ .nr_events = 2,
+ .nr_groups = 2,
+ },
+ {
+ .events = "{instructions,cycles},{instructions,cycles}",
+ .nr_events = 4,
+ .nr_groups = 2,
+ },
+ {
+ .events = "{stereolab}",
+ .nr_events = 0,
+ .nr_groups = 0,
+ },
+ {
+ .events =
+ "{instructions,cycles},{instructions,stereolab}",
+ .nr_events = 3,
+ .nr_groups = 1,
+ },
+ };
+
+ for (i = 0; i < ARRAY_SIZE(table); i++) {
+ evlist = evlist__new();
+ if (evlist == NULL)
+ return -ENOMEM;
+
+ opt.value = evlist;
+ parse_libpfm_events_option(&opt,
+ table[i].events,
+ 0);
+ TEST_ASSERT_EQUAL(table[i].events,
+ count_pfm_events(&evlist->core),
+ table[i].nr_events);
+ TEST_ASSERT_EQUAL(table[i].events,
+ evlist->nr_groups,
+ table[i].nr_groups);
+
+ evlist__delete(evlist);
+ }
+ return 0;
+}
+#endif
+
+const char *test__pfm_subtest_get_desc(int i)
+{
+ if (i < 0 || i >= (int)ARRAY_SIZE(pfm_testcase_table))
+ return NULL;
+ return pfm_testcase_table[i].desc;
+}
+
+int test__pfm_subtest_get_nr(void)
+{
+ return (int)ARRAY_SIZE(pfm_testcase_table);
+}
+
+int test__pfm(struct test *test __maybe_unused, int i __maybe_unused)
+{
+#ifdef HAVE_LIBPFM
+ if (i < 0 || i >= (int)ARRAY_SIZE(pfm_testcase_table))
+ return TEST_FAIL;
+ return pfm_testcase_table[i].func();
+#else
+ return TEST_SKIP;
+#endif
+}
diff --git a/tools/perf/tests/pmu-events.c b/tools/perf/tests/pmu-events.c
new file mode 100644
index 000000000000..ab64b4a4e284
--- /dev/null
+++ b/tools/perf/tests/pmu-events.c
@@ -0,0 +1,540 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "math.h"
+#include "parse-events.h"
+#include "pmu.h"
+#include "tests.h"
+#include <errno.h>
+#include <stdio.h>
+#include <linux/kernel.h>
+#include <linux/zalloc.h>
+#include "debug.h"
+#include "../pmu-events/pmu-events.h"
+#include "util/evlist.h"
+#include "util/expr.h"
+#include "util/parse-events.h"
+
+struct perf_pmu_test_event {
+ struct pmu_event event;
+
+ /* extra events for aliases */
+ const char *alias_str;
+
+ /*
+ * Note: For when PublicDescription does not exist in the JSON, we
+ * will have no long_desc in pmu_event.long_desc, but long_desc may
+ * be set in the alias.
+ */
+ const char *alias_long_desc;
+};
+
+static struct perf_pmu_test_event test_cpu_events[] = {
+ {
+ .event = {
+ .name = "bp_l1_btb_correct",
+ .event = "event=0x8a",
+ .desc = "L1 BTB Correction",
+ .topic = "branch",
+ },
+ .alias_str = "event=0x8a",
+ .alias_long_desc = "L1 BTB Correction",
+ },
+ {
+ .event = {
+ .name = "bp_l2_btb_correct",
+ .event = "event=0x8b",
+ .desc = "L2 BTB Correction",
+ .topic = "branch",
+ },
+ .alias_str = "event=0x8b",
+ .alias_long_desc = "L2 BTB Correction",
+ },
+ {
+ .event = {
+ .name = "segment_reg_loads.any",
+ .event = "umask=0x80,period=200000,event=0x6",
+ .desc = "Number of segment register loads",
+ .topic = "other",
+ },
+ .alias_str = "umask=0x80,(null)=0x30d40,event=0x6",
+ .alias_long_desc = "Number of segment register loads",
+ },
+ {
+ .event = {
+ .name = "dispatch_blocked.any",
+ .event = "umask=0x20,period=200000,event=0x9",
+ .desc = "Memory cluster signals to block micro-op dispatch for any reason",
+ .topic = "other",
+ },
+ .alias_str = "umask=0x20,(null)=0x30d40,event=0x9",
+ .alias_long_desc = "Memory cluster signals to block micro-op dispatch for any reason",
+ },
+ {
+ .event = {
+ .name = "eist_trans",
+ .event = "umask=0x0,period=200000,event=0x3a",
+ .desc = "Number of Enhanced Intel SpeedStep(R) Technology (EIST) transitions",
+ .topic = "other",
+ },
+ .alias_str = "umask=0,(null)=0x30d40,event=0x3a",
+ .alias_long_desc = "Number of Enhanced Intel SpeedStep(R) Technology (EIST) transitions",
+ },
+ { /* sentinel */
+ .event = {
+ .name = NULL,
+ },
+ },
+};
+
+static struct perf_pmu_test_event test_uncore_events[] = {
+ {
+ .event = {
+ .name = "uncore_hisi_ddrc.flux_wcmd",
+ .event = "event=0x2",
+ .desc = "DDRC write commands. Unit: hisi_sccl,ddrc ",
+ .topic = "uncore",
+ .long_desc = "DDRC write commands",
+ .pmu = "hisi_sccl,ddrc",
+ },
+ .alias_str = "event=0x2",
+ .alias_long_desc = "DDRC write commands",
+ },
+ {
+ .event = {
+ .name = "unc_cbo_xsnp_response.miss_eviction",
+ .event = "umask=0x81,event=0x22",
+ .desc = "Unit: uncore_cbox A cross-core snoop resulted from L3 Eviction which misses in some processor core",
+ .topic = "uncore",
+ .long_desc = "A cross-core snoop resulted from L3 Eviction which misses in some processor core",
+ .pmu = "uncore_cbox",
+ },
+ .alias_str = "umask=0x81,event=0x22",
+ .alias_long_desc = "A cross-core snoop resulted from L3 Eviction which misses in some processor core",
+ },
+ { /* sentinel */
+ .event = {
+ .name = NULL,
+ },
+ }
+};
+
+const int total_test_events_size = ARRAY_SIZE(test_uncore_events);
+
+static bool is_same(const char *reference, const char *test)
+{
+ if (!reference && !test)
+ return true;
+
+ if (reference && !test)
+ return false;
+
+ if (!reference && test)
+ return false;
+
+ return !strcmp(reference, test);
+}
+
+static struct pmu_events_map *__test_pmu_get_events_map(void)
+{
+ struct pmu_events_map *map;
+
+ for (map = &pmu_events_map[0]; map->cpuid; map++) {
+ if (!strcmp(map->cpuid, "testcpu"))
+ return map;
+ }
+
+ pr_err("could not find test events map\n");
+
+ return NULL;
+}
+
+/* Verify generated events from pmu-events.c is as expected */
+static int test_pmu_event_table(void)
+{
+ struct pmu_events_map *map = __test_pmu_get_events_map();
+ struct pmu_event *table;
+ int map_events = 0, expected_events;
+
+ /* ignore 2x sentinels */
+ expected_events = ARRAY_SIZE(test_cpu_events) +
+ ARRAY_SIZE(test_uncore_events) - 2;
+
+ if (!map)
+ return -1;
+
+ for (table = map->table; table->name; table++) {
+ struct perf_pmu_test_event *test;
+ struct pmu_event *te;
+ bool found = false;
+
+ if (table->pmu)
+ test = &test_uncore_events[0];
+ else
+ test = &test_cpu_events[0];
+
+ te = &test->event;
+
+ for (; te->name; test++, te = &test->event) {
+ if (strcmp(table->name, te->name))
+ continue;
+ found = true;
+ map_events++;
+
+ if (!is_same(table->desc, te->desc)) {
+ pr_debug2("testing event table %s: mismatched desc, %s vs %s\n",
+ table->name, table->desc, te->desc);
+ return -1;
+ }
+
+ if (!is_same(table->topic, te->topic)) {
+ pr_debug2("testing event table %s: mismatched topic, %s vs %s\n",
+ table->name, table->topic,
+ te->topic);
+ return -1;
+ }
+
+ if (!is_same(table->long_desc, te->long_desc)) {
+ pr_debug2("testing event table %s: mismatched long_desc, %s vs %s\n",
+ table->name, table->long_desc,
+ te->long_desc);
+ return -1;
+ }
+
+ if (!is_same(table->unit, te->unit)) {
+ pr_debug2("testing event table %s: mismatched unit, %s vs %s\n",
+ table->name, table->unit,
+ te->unit);
+ return -1;
+ }
+
+ if (!is_same(table->perpkg, te->perpkg)) {
+ pr_debug2("testing event table %s: mismatched perpkg, %s vs %s\n",
+ table->name, table->perpkg,
+ te->perpkg);
+ return -1;
+ }
+
+ if (!is_same(table->metric_expr, te->metric_expr)) {
+ pr_debug2("testing event table %s: mismatched metric_expr, %s vs %s\n",
+ table->name, table->metric_expr,
+ te->metric_expr);
+ return -1;
+ }
+
+ if (!is_same(table->metric_name, te->metric_name)) {
+ pr_debug2("testing event table %s: mismatched metric_name, %s vs %s\n",
+ table->name, table->metric_name,
+ te->metric_name);
+ return -1;
+ }
+
+ if (!is_same(table->deprecated, te->deprecated)) {
+ pr_debug2("testing event table %s: mismatched deprecated, %s vs %s\n",
+ table->name, table->deprecated,
+ te->deprecated);
+ return -1;
+ }
+
+ pr_debug("testing event table %s: pass\n", table->name);
+ }
+
+ if (!found) {
+ pr_err("testing event table: could not find event %s\n",
+ table->name);
+ return -1;
+ }
+ }
+
+ if (map_events != expected_events) {
+ pr_err("testing event table: found %d, but expected %d\n",
+ map_events, expected_events);
+ return -1;
+ }
+
+ return 0;
+}
+
+static struct perf_pmu_alias *find_alias(const char *test_event, struct list_head *aliases)
+{
+ struct perf_pmu_alias *alias;
+
+ list_for_each_entry(alias, aliases, list)
+ if (!strcmp(test_event, alias->name))
+ return alias;
+
+ return NULL;
+}
+
+/* Verify aliases are as expected */
+static int __test__pmu_event_aliases(char *pmu_name, int *count)
+{
+ struct perf_pmu_test_event *test;
+ struct pmu_event *te;
+ struct perf_pmu *pmu;
+ LIST_HEAD(aliases);
+ int res = 0;
+ bool use_uncore_table;
+ struct pmu_events_map *map = __test_pmu_get_events_map();
+
+ if (!map)
+ return -1;
+
+ if (is_pmu_core(pmu_name)) {
+ test = &test_cpu_events[0];
+ use_uncore_table = false;
+ } else {
+ test = &test_uncore_events[0];
+ use_uncore_table = true;
+ }
+
+ pmu = zalloc(sizeof(*pmu));
+ if (!pmu)
+ return -1;
+
+ pmu->name = pmu_name;
+
+ pmu_add_cpu_aliases_map(&aliases, pmu, map);
+
+ for (te = &test->event; te->name; test++, te = &test->event) {
+ struct perf_pmu_alias *alias = find_alias(te->name, &aliases);
+
+ if (!alias) {
+ bool uncore_match = pmu_uncore_alias_match(pmu_name,
+ te->pmu);
+
+ if (use_uncore_table && !uncore_match) {
+ pr_debug3("testing aliases PMU %s: skip matching alias %s\n",
+ pmu_name, te->name);
+ continue;
+ }
+
+ pr_debug2("testing aliases PMU %s: no alias, alias_table->name=%s\n",
+ pmu_name, te->name);
+ res = -1;
+ break;
+ }
+
+ if (!is_same(alias->desc, te->desc)) {
+ pr_debug2("testing aliases PMU %s: mismatched desc, %s vs %s\n",
+ pmu_name, alias->desc, te->desc);
+ res = -1;
+ break;
+ }
+
+ if (!is_same(alias->long_desc, test->alias_long_desc)) {
+ pr_debug2("testing aliases PMU %s: mismatched long_desc, %s vs %s\n",
+ pmu_name, alias->long_desc,
+ test->alias_long_desc);
+ res = -1;
+ break;
+ }
+
+ if (!is_same(alias->str, test->alias_str)) {
+ pr_debug2("testing aliases PMU %s: mismatched str, %s vs %s\n",
+ pmu_name, alias->str, test->alias_str);
+ res = -1;
+ break;
+ }
+
+ if (!is_same(alias->topic, te->topic)) {
+ pr_debug2("testing aliases PMU %s: mismatched topic, %s vs %s\n",
+ pmu_name, alias->topic, te->topic);
+ res = -1;
+ break;
+ }
+
+ (*count)++;
+ pr_debug2("testing aliases PMU %s: matched event %s\n",
+ pmu_name, alias->name);
+ }
+
+ free(pmu);
+ return res;
+}
+
+
+static int test_aliases(void)
+{
+ struct perf_pmu *pmu = NULL;
+
+ while ((pmu = perf_pmu__scan(pmu)) != NULL) {
+ int count = 0;
+
+ if (list_empty(&pmu->format)) {
+ pr_debug2("skipping testing PMU %s\n", pmu->name);
+ continue;
+ }
+
+ if (__test__pmu_event_aliases(pmu->name, &count)) {
+ pr_debug("testing PMU %s aliases: failed\n", pmu->name);
+ return -1;
+ }
+
+ if (count == 0)
+ pr_debug3("testing PMU %s aliases: no events to match\n",
+ pmu->name);
+ else
+ pr_debug("testing PMU %s aliases: pass\n", pmu->name);
+ }
+
+ return 0;
+}
+
+static bool is_number(const char *str)
+{
+ char *end_ptr;
+ double v;
+
+ errno = 0;
+ v = strtod(str, &end_ptr);
+ (void)v; // We're not interested in this value, only if it is valid
+ return errno == 0 && end_ptr != str;
+}
+
+static int check_parse_id(const char *id, bool same_cpu, struct pmu_event *pe)
+{
+ struct parse_events_error error;
+ struct evlist *evlist;
+ int ret;
+
+ /* Numbers are always valid. */
+ if (is_number(id))
+ return 0;
+
+ evlist = evlist__new();
+ memset(&error, 0, sizeof(error));
+ ret = parse_events(evlist, id, &error);
+ if (ret && same_cpu) {
+ pr_warning("Parse event failed metric '%s' id '%s' expr '%s'\n",
+ pe->metric_name, id, pe->metric_expr);
+ pr_warning("Error string '%s' help '%s'\n", error.str,
+ error.help);
+ } else if (ret) {
+ pr_debug3("Parse event failed, but for an event that may not be supported by this CPU.\nid '%s' metric '%s' expr '%s'\n",
+ id, pe->metric_name, pe->metric_expr);
+ ret = 0;
+ }
+ evlist__delete(evlist);
+ free(error.str);
+ free(error.help);
+ free(error.first_str);
+ free(error.first_help);
+ return ret;
+}
+
+static void expr_failure(const char *msg,
+ const struct pmu_events_map *map,
+ const struct pmu_event *pe)
+{
+ pr_debug("%s for map %s %s %s\n",
+ msg, map->cpuid, map->version, map->type);
+ pr_debug("On metric %s\n", pe->metric_name);
+ pr_debug("On expression %s\n", pe->metric_expr);
+}
+
+static int test_parsing(void)
+{
+ struct pmu_events_map *cpus_map = perf_pmu__find_map(NULL);
+ struct pmu_events_map *map;
+ struct pmu_event *pe;
+ int i, j, k;
+ int ret = 0;
+ struct expr_parse_ctx ctx;
+ double result;
+
+ i = 0;
+ for (;;) {
+ map = &pmu_events_map[i++];
+ if (!map->table)
+ break;
+ j = 0;
+ for (;;) {
+ struct hashmap_entry *cur;
+ size_t bkt;
+
+ pe = &map->table[j++];
+ if (!pe->name && !pe->metric_group && !pe->metric_name)
+ break;
+ if (!pe->metric_expr)
+ continue;
+ expr__ctx_init(&ctx);
+ if (expr__find_other(pe->metric_expr, NULL, &ctx, 0)
+ < 0) {
+ expr_failure("Parse other failed", map, pe);
+ ret++;
+ continue;
+ }
+
+ /*
+ * Add all ids with a made up value. The value may
+ * trigger divide by zero when subtracted and so try to
+ * make them unique.
+ */
+ k = 1;
+ hashmap__for_each_entry((&ctx.ids), cur, bkt)
+ expr__add_id(&ctx, strdup(cur->key), k++);
+
+ hashmap__for_each_entry((&ctx.ids), cur, bkt) {
+ if (check_parse_id(cur->key, map == cpus_map,
+ pe))
+ ret++;
+ }
+
+ if (expr__parse(&result, &ctx, pe->metric_expr, 0)) {
+ expr_failure("Parse failed", map, pe);
+ ret++;
+ }
+ expr__ctx_clear(&ctx);
+ }
+ }
+ /* TODO: fail when not ok */
+ return ret == 0 ? TEST_OK : TEST_SKIP;
+}
+
+static const struct {
+ int (*func)(void);
+ const char *desc;
+} pmu_events_testcase_table[] = {
+ {
+ .func = test_pmu_event_table,
+ .desc = "PMU event table sanity",
+ },
+ {
+ .func = test_aliases,
+ .desc = "PMU event map aliases",
+ },
+ {
+ .func = test_parsing,
+ .desc = "Parsing of PMU event table metrics",
+ },
+};
+
+const char *test__pmu_events_subtest_get_desc(int subtest)
+{
+ if (subtest < 0 ||
+ subtest >= (int)ARRAY_SIZE(pmu_events_testcase_table))
+ return NULL;
+ return pmu_events_testcase_table[subtest].desc;
+}
+
+const char *test__pmu_events_subtest_skip_reason(int subtest)
+{
+ if (subtest < 0 ||
+ subtest >= (int)ARRAY_SIZE(pmu_events_testcase_table))
+ return NULL;
+ if (pmu_events_testcase_table[subtest].func != test_parsing)
+ return NULL;
+ return "some metrics failed";
+}
+
+int test__pmu_events_subtest_get_nr(void)
+{
+ return (int)ARRAY_SIZE(pmu_events_testcase_table);
+}
+
+int test__pmu_events(struct test *test __maybe_unused, int subtest)
+{
+ if (subtest < 0 ||
+ subtest >= (int)ARRAY_SIZE(pmu_events_testcase_table))
+ return TEST_FAIL;
+ return pmu_events_testcase_table[subtest].func();
+}
diff --git a/tools/perf/tests/pmu.c b/tools/perf/tests/pmu.c
index 74379ff1f7fa..5c11fe2b3040 100644
--- a/tools/perf/tests/pmu.c
+++ b/tools/perf/tests/pmu.c
@@ -156,8 +156,8 @@ int test__pmu(struct test *test __maybe_unused, int subtest __maybe_unused)
if (ret)
break;
- ret = perf_pmu__config_terms(&formats, &attr, terms,
- false, NULL);
+ ret = perf_pmu__config_terms("perf-pmu-test", &formats, &attr,
+ terms, false, NULL);
if (ret)
break;
diff --git a/tools/perf/tests/sample-parsing.c b/tools/perf/tests/sample-parsing.c
index 2762e1155238..a0bdaf390ac8 100644
--- a/tools/perf/tests/sample-parsing.c
+++ b/tools/perf/tests/sample-parsing.c
@@ -99,6 +99,7 @@ static bool samples_same(const struct perf_sample *s1,
if (type & PERF_SAMPLE_BRANCH_STACK) {
COMP(branch_stack->nr);
+ COMP(branch_stack->hw_idx);
for (i = 0; i < s1->branch_stack->nr; i++)
MCOMP(branch_stack->entries[i]);
}
@@ -150,6 +151,9 @@ static bool samples_same(const struct perf_sample *s1,
if (type & PERF_SAMPLE_PHYS_ADDR)
COMP(phys_addr);
+ if (type & PERF_SAMPLE_CGROUP)
+ COMP(cgroup);
+
if (type & PERF_SAMPLE_AUX) {
COMP(aux_sample.size);
if (memcmp(s1->aux_sample.data, s2->aux_sample.data,
@@ -186,7 +190,7 @@ static int do_test(u64 sample_type, u64 sample_regs, u64 read_format)
u64 data[64];
} branch_stack = {
/* 1 branch_entry */
- .data = {1, 211, 212, 213},
+ .data = {1, -1ULL, 211, 212, 213},
};
u64 regs[64];
const u64 raw_data[] = {0x123456780a0b0c0dULL, 0x1102030405060708ULL};
@@ -208,6 +212,7 @@ static int do_test(u64 sample_type, u64 sample_regs, u64 read_format)
.transaction = 112,
.raw_data = (void *)raw_data,
.callchain = &callchain.callchain,
+ .no_hw_idx = false,
.branch_stack = &branch_stack.branch_stack,
.user_regs = {
.abi = PERF_SAMPLE_REGS_ABI_64,
@@ -228,6 +233,7 @@ static int do_test(u64 sample_type, u64 sample_regs, u64 read_format)
.regs = regs,
},
.phys_addr = 113,
+ .cgroup = 114,
.aux_sample = {
.size = sizeof(aux_data),
.data = (void *)aux_data,
@@ -244,6 +250,9 @@ static int do_test(u64 sample_type, u64 sample_regs, u64 read_format)
if (sample_type & PERF_SAMPLE_REGS_INTR)
evsel.core.attr.sample_regs_intr = sample_regs;
+ if (sample_type & PERF_SAMPLE_BRANCH_STACK)
+ evsel.core.attr.branch_sample_type |= PERF_SAMPLE_BRANCH_HW_INDEX;
+
for (i = 0; i < sizeof(regs); i++)
*(i + (u8 *)regs) = i & 0xfe;
@@ -287,12 +296,12 @@ static int do_test(u64 sample_type, u64 sample_regs, u64 read_format)
goto out_free;
}
- evsel.sample_size = __perf_evsel__sample_size(sample_type);
+ evsel.sample_size = __evsel__sample_size(sample_type);
- err = perf_evsel__parse_sample(&evsel, event, &sample_out);
+ err = evsel__parse_sample(&evsel, event, &sample_out);
if (err) {
pr_debug("%s failed for sample_type %#"PRIx64", error %d\n",
- "perf_evsel__parse_sample", sample_type, err);
+ "evsel__parse_sample", sample_type, err);
goto out_free;
}
@@ -331,7 +340,7 @@ int test__sample_parsing(struct test *test __maybe_unused, int subtest __maybe_u
* were added. Please actually update the test rather than just change
* the condition below.
*/
- if (PERF_SAMPLE_MAX > PERF_SAMPLE_AUX << 1) {
+ if (PERF_SAMPLE_MAX > PERF_SAMPLE_CGROUP << 1) {
pr_debug("sample format has changed, some new PERF_SAMPLE_ bit was introduced - test needs updating\n");
return -1;
}
diff --git a/tools/perf/tests/sw-clock.c b/tools/perf/tests/sw-clock.c
index bfb9986093d8..4b9b731977c8 100644
--- a/tools/perf/tests/sw-clock.c
+++ b/tools/perf/tests/sw-clock.c
@@ -56,7 +56,7 @@ static int __test__sw_clock_freq(enum perf_sw_ids clock_id)
evsel = evsel__new(&attr);
if (evsel == NULL) {
- pr_debug("perf_evsel__new\n");
+ pr_debug("evsel__new\n");
goto out_delete_evlist;
}
evlist__add(evlist, evsel);
diff --git a/tools/perf/tests/switch-tracking.c b/tools/perf/tests/switch-tracking.c
index fcb0d03dba4e..db5e1f70053a 100644
--- a/tools/perf/tests/switch-tracking.c
+++ b/tools/perf/tests/switch-tracking.c
@@ -135,8 +135,8 @@ static int process_sample_event(struct evlist *evlist,
evsel = perf_evlist__id2evsel(evlist, sample.id);
if (evsel == switch_tracking->switch_evsel) {
- next_tid = perf_evsel__intval(evsel, &sample, "next_pid");
- prev_tid = perf_evsel__intval(evsel, &sample, "prev_pid");
+ next_tid = evsel__intval(evsel, &sample, "next_pid");
+ prev_tid = evsel__intval(evsel, &sample, "prev_pid");
cpu = sample.cpu;
pr_debug3("sched_switch: cpu: %d prev_tid %d next_tid %d\n",
cpu, prev_tid, next_tid);
@@ -394,8 +394,8 @@ int test__switch_tracking(struct test *test __maybe_unused, int subtest __maybe_
switch_evsel = evlist__last(evlist);
- perf_evsel__set_sample_bit(switch_evsel, CPU);
- perf_evsel__set_sample_bit(switch_evsel, TIME);
+ evsel__set_sample_bit(switch_evsel, CPU);
+ evsel__set_sample_bit(switch_evsel, TIME);
switch_evsel->core.system_wide = true;
switch_evsel->no_aux_samples = true;
@@ -412,8 +412,8 @@ int test__switch_tracking(struct test *test __maybe_unused, int subtest __maybe_
goto out_err;
}
- perf_evsel__set_sample_bit(cycles_evsel, CPU);
- perf_evsel__set_sample_bit(cycles_evsel, TIME);
+ evsel__set_sample_bit(cycles_evsel, CPU);
+ evsel__set_sample_bit(cycles_evsel, TIME);
/* Fourth event */
err = parse_events(evlist, "dummy:u", NULL);
@@ -429,7 +429,7 @@ int test__switch_tracking(struct test *test __maybe_unused, int subtest __maybe_
tracking_evsel->core.attr.freq = 0;
tracking_evsel->core.attr.sample_period = 1;
- perf_evsel__set_sample_bit(tracking_evsel, TIME);
+ evsel__set_sample_bit(tracking_evsel, TIME);
/* Config events */
perf_evlist__config(evlist, &opts, NULL);
diff --git a/tools/perf/tests/tests.h b/tools/perf/tests/tests.h
index 9a160fef47c9..76a4e352eaaf 100644
--- a/tools/perf/tests/tests.h
+++ b/tools/perf/tests/tests.h
@@ -34,6 +34,7 @@ struct test {
bool skip_if_fail;
int (*get_nr)(void);
const char *(*get_desc)(int subtest);
+ const char *(*skip_reason)(int subtest);
} subtest;
bool (*is_supported)(void);
void *priv;
@@ -49,6 +50,10 @@ int test__perf_evsel__roundtrip_name_test(struct test *test, int subtest);
int test__perf_evsel__tp_sched_test(struct test *test, int subtest);
int test__syscall_openat_tp_fields(struct test *test, int subtest);
int test__pmu(struct test *test, int subtest);
+int test__pmu_events(struct test *test, int subtest);
+const char *test__pmu_events_subtest_get_desc(int subtest);
+const char *test__pmu_events_subtest_skip_reason(int subtest);
+int test__pmu_events_subtest_get_nr(void);
int test__attr(struct test *test, int subtest);
int test__dso_data(struct test *test, int subtest);
int test__dso_data_cache(struct test *test, int subtest);
@@ -111,6 +116,11 @@ int test__mem2node(struct test *t, int subtest);
int test__maps__merge_in(struct test *t, int subtest);
int test__time_utils(struct test *t, int subtest);
int test__jit_write_elf(struct test *test, int subtest);
+int test__api_io(struct test *test, int subtest);
+int test__demangle_java(struct test *test, int subtest);
+int test__pfm(struct test *test, int subtest);
+const char *test__pfm_subtest_get_desc(int subtest);
+int test__pfm_subtest_get_nr(void);
bool test__bp_signal_is_supported(void);
bool test__bp_account_is_supported(void);
diff --git a/tools/perf/tests/topology.c b/tools/perf/tests/topology.c
index 4a800499d7c3..22daf2bdf5fa 100644
--- a/tools/perf/tests/topology.c
+++ b/tools/perf/tests/topology.c
@@ -33,10 +33,8 @@ static int session_write_header(char *path)
{
struct perf_session *session;
struct perf_data data = {
- .file = {
- .path = path,
- },
- .mode = PERF_DATA_MODE_WRITE,
+ .path = path,
+ .mode = PERF_DATA_MODE_WRITE,
};
session = perf_session__new(&data, false, NULL);
@@ -63,10 +61,8 @@ static int check_cpu_topology(char *path, struct perf_cpu_map *map)
{
struct perf_session *session;
struct perf_data data = {
- .file = {
- .path = path,
- },
- .mode = PERF_DATA_MODE_READ,
+ .path = path,
+ .mode = PERF_DATA_MODE_READ,
};
int i;
diff --git a/tools/perf/trace/beauty/arch_errno_names.sh b/tools/perf/trace/beauty/arch_errno_names.sh
index 22c9fc900c84..9f9ea45cddc4 100755
--- a/tools/perf/trace/beauty/arch_errno_names.sh
+++ b/tools/perf/trace/beauty/arch_errno_names.sh
@@ -57,7 +57,7 @@ process_arch()
local arch="$1"
local asm_errno=$(asm_errno_file "$arch")
- $gcc $include_path -E -dM -x c $asm_errno \
+ $gcc $CFLAGS $include_path -E -dM -x c $asm_errno \
|grep -hE '^#define[[:blank:]]+(E[^[:blank:]]+)[[:blank:]]+([[:digit:]]+).*' \
|awk '{ print $2","$3; }' \
|sort -t, -k2 -nu \
@@ -91,7 +91,7 @@ EoHEADER
# in tools/perf/arch
archlist=""
for arch in $(find $toolsdir/arch -maxdepth 1 -mindepth 1 -type d -printf "%f\n" | grep -v x86 | sort); do
- test -d arch/$arch && archlist="$archlist $arch"
+ test -d $toolsdir/perf/arch/$arch && archlist="$archlist $arch"
done
for arch in x86 $archlist generic; do
diff --git a/tools/perf/trace/beauty/clone.c b/tools/perf/trace/beauty/clone.c
index 062ca849c8fd..f4db894e0af6 100644
--- a/tools/perf/trace/beauty/clone.c
+++ b/tools/perf/trace/beauty/clone.c
@@ -46,6 +46,7 @@ static size_t clone__scnprintf_flags(unsigned long flags, char *bf, size_t size,
P_FLAG(NEWNET);
P_FLAG(IO);
P_FLAG(CLEAR_SIGHAND);
+ P_FLAG(INTO_CGROUP);
#undef P_FLAG
if (flags)
diff --git a/tools/perf/trace/beauty/mmap.c b/tools/perf/trace/beauty/mmap.c
index 9fa771a90d79..862c8331dded 100644
--- a/tools/perf/trace/beauty/mmap.c
+++ b/tools/perf/trace/beauty/mmap.c
@@ -69,6 +69,7 @@ static size_t syscall_arg__scnprintf_mremap_flags(char *bf, size_t size,
P_MREMAP_FLAG(MAYMOVE);
P_MREMAP_FLAG(FIXED);
+ P_MREMAP_FLAG(DONTUNMAP);
#undef P_MREMAP_FLAG
if (flags)
diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c
index f36dee499320..f98a118dfc49 100644
--- a/tools/perf/ui/browsers/hists.c
+++ b/tools/perf/ui/browsers/hists.c
@@ -677,7 +677,7 @@ static int hist_browser__title(struct hist_browser *browser, char *bf, size_t si
return browser->title ? browser->title(browser, bf, size) : 0;
}
-static int hist_browser__handle_hotkey(struct hist_browser *browser, bool warn_lost_event, char *title, int key)
+static int hist_browser__handle_hotkey(struct hist_browser *browser, bool warn_lost_event, char *title, size_t size, int key)
{
switch (key) {
case K_TIMER: {
@@ -703,7 +703,7 @@ static int hist_browser__handle_hotkey(struct hist_browser *browser, bool warn_l
ui_browser__warn_lost_events(&browser->b);
}
- hist_browser__title(browser, title, sizeof(title));
+ hist_browser__title(browser, title, size);
ui_browser__show_title(&browser->b, title);
break;
}
@@ -764,13 +764,13 @@ int hist_browser__run(struct hist_browser *browser, const char *help,
if (ui_browser__show(&browser->b, title, "%s", help) < 0)
return -1;
- if (key && hist_browser__handle_hotkey(browser, warn_lost_event, title, key))
+ if (key && hist_browser__handle_hotkey(browser, warn_lost_event, title, sizeof(title), key))
goto out;
while (1) {
key = ui_browser__run(&browser->b, delay_secs);
- if (hist_browser__handle_hotkey(browser, warn_lost_event, title, key))
+ if (hist_browser__handle_hotkey(browser, warn_lost_event, title, sizeof(title), key))
break;
}
out:
@@ -2465,13 +2465,41 @@ do_annotate(struct hist_browser *browser, struct popup_action *act)
return 0;
}
+static struct symbol *symbol__new_unresolved(u64 addr, struct map *map)
+{
+ struct annotated_source *src;
+ struct symbol *sym;
+ char name[64];
+
+ snprintf(name, sizeof(name), "%.*" PRIx64, BITS_PER_LONG / 4, addr);
+
+ sym = symbol__new(addr, ANNOTATION_DUMMY_LEN, 0, 0, name);
+ if (sym) {
+ src = symbol__hists(sym, 1);
+ if (!src) {
+ symbol__delete(sym);
+ return NULL;
+ }
+
+ dso__insert_symbol(map->dso, sym);
+ }
+
+ return sym;
+}
+
static int
add_annotate_opt(struct hist_browser *browser __maybe_unused,
struct popup_action *act, char **optstr,
- struct map_symbol *ms)
+ struct map_symbol *ms,
+ u64 addr)
{
- if (ms->sym == NULL || ms->map->dso->annotate_warned ||
- symbol__annotation(ms->sym)->src == NULL)
+ if (!ms->map || !ms->map->dso || ms->map->dso->annotate_warned)
+ return 0;
+
+ if (!ms->sym)
+ ms->sym = symbol__new_unresolved(addr, ms->map);
+
+ if (ms->sym == NULL || symbol__annotation(ms->sym)->src == NULL)
return 0;
if (asprintf(optstr, "Annotate %s", ms->sym->name) < 0)
@@ -2964,7 +2992,8 @@ static int perf_evsel__hists_browse(struct evsel *evsel, int nr_events,
"s Switch to another data file in PWD\n"
"t Zoom into current Thread\n"
"V Verbose (DSO names in callchains, etc)\n"
- "/ Filter symbol by name";
+ "/ Filter symbol by name\n"
+ "0-9 Sort by event n in group";
static const char top_help[] = HIST_BROWSER_HELP_COMMON
"P Print histograms to perf.hist.N\n"
"t Zoom into current Thread\n"
@@ -3025,6 +3054,31 @@ do_hotkey: // key came straight from options ui__popup_menu()
* go to the next or previous
*/
goto out_free_stack;
+ case '0' ... '9':
+ if (!symbol_conf.event_group ||
+ evsel->core.nr_members < 2) {
+ snprintf(buf, sizeof(buf),
+ "Sort by index only available with group events!");
+ helpline = buf;
+ continue;
+ }
+
+ if (key - '0' == symbol_conf.group_sort_idx)
+ continue;
+
+ symbol_conf.group_sort_idx = key - '0';
+
+ if (symbol_conf.group_sort_idx >= evsel->core.nr_members) {
+ snprintf(buf, sizeof(buf),
+ "Max event group index to sort is %d (index from 0 to %d)",
+ evsel->core.nr_members - 1,
+ evsel->core.nr_members - 1);
+ helpline = buf;
+ continue;
+ }
+
+ key = K_RELOAD;
+ goto out_free_stack;
case 'a':
if (!hists__has(hists, sym)) {
ui_browser__warning(&browser->b, delay_secs * 2,
@@ -3033,21 +3087,45 @@ do_hotkey: // key came straight from options ui__popup_menu()
continue;
}
- if (browser->selection == NULL ||
- browser->selection->sym == NULL ||
- browser->selection->map->dso->annotate_warned)
+ if (!browser->selection ||
+ !browser->selection->map ||
+ !browser->selection->map->dso ||
+ browser->selection->map->dso->annotate_warned) {
continue;
+ }
- if (symbol__annotation(browser->selection->sym)->src == NULL) {
- ui_browser__warning(&browser->b, delay_secs * 2,
- "No samples for the \"%s\" symbol.\n\n"
- "Probably appeared just in a callchain",
- browser->selection->sym->name);
- continue;
+ if (!browser->selection->sym) {
+ if (!browser->he_selection)
+ continue;
+
+ if (sort__mode == SORT_MODE__BRANCH) {
+ bi = browser->he_selection->branch_info;
+ if (!bi || !bi->to.ms.map)
+ continue;
+
+ actions->ms.sym = symbol__new_unresolved(bi->to.al_addr, bi->to.ms.map);
+ actions->ms.map = bi->to.ms.map;
+ } else {
+ actions->ms.sym = symbol__new_unresolved(browser->he_selection->ip,
+ browser->selection->map);
+ actions->ms.map = browser->selection->map;
+ }
+
+ if (!actions->ms.sym)
+ continue;
+ } else {
+ if (symbol__annotation(browser->selection->sym)->src == NULL) {
+ ui_browser__warning(&browser->b, delay_secs * 2,
+ "No samples for the \"%s\" symbol.\n\n"
+ "Probably appeared just in a callchain",
+ browser->selection->sym->name);
+ continue;
+ }
+
+ actions->ms.map = browser->selection->map;
+ actions->ms.sym = browser->selection->sym;
}
- actions->ms.map = browser->selection->map;
- actions->ms.sym = browser->selection->sym;
do_annotate(browser, actions);
continue;
case 'P':
@@ -3219,17 +3297,20 @@ do_hotkey: // key came straight from options ui__popup_menu()
nr_options += add_annotate_opt(browser,
&actions[nr_options],
&options[nr_options],
- &bi->from.ms);
+ &bi->from.ms,
+ bi->from.al_addr);
if (bi->to.ms.sym != bi->from.ms.sym)
nr_options += add_annotate_opt(browser,
&actions[nr_options],
&options[nr_options],
- &bi->to.ms);
+ &bi->to.ms,
+ bi->to.al_addr);
} else {
nr_options += add_annotate_opt(browser,
&actions[nr_options],
&options[nr_options],
- browser->selection);
+ browser->selection,
+ browser->he_selection->ip);
}
skip_annotation:
nr_options += add_thread_opt(browser, &actions[nr_options],
@@ -3335,7 +3416,7 @@ static void perf_evsel_menu__write(struct ui_browser *browser,
struct hists *hists = evsel__hists(evsel);
bool current_entry = ui_browser__is_current_entry(browser, row);
unsigned long nr_events = hists->stats.nr_events[PERF_RECORD_SAMPLE];
- const char *ev_name = perf_evsel__name(evsel);
+ const char *ev_name = evsel__name(evsel);
char bf[256], unit;
const char *warn = " ";
size_t printed;
@@ -3343,10 +3424,10 @@ static void perf_evsel_menu__write(struct ui_browser *browser,
ui_browser__set_color(browser, current_entry ? HE_COLORSET_SELECTED :
HE_COLORSET_NORMAL);
- if (perf_evsel__is_group_event(evsel)) {
+ if (evsel__is_group_event(evsel)) {
struct evsel *pos;
- ev_name = perf_evsel__group_name(evsel);
+ ev_name = evsel__group_name(evsel);
for_each_group_member(pos, evsel) {
struct hists *pos_hists = evsel__hists(pos);
@@ -3431,15 +3512,16 @@ browse_hists:
if (pos->core.node.next == &evlist->core.entries)
pos = evlist__first(evlist);
else
- pos = perf_evsel__next(pos);
+ pos = evsel__next(pos);
goto browse_hists;
case K_UNTAB:
if (pos->core.node.prev == &evlist->core.entries)
pos = evlist__last(evlist);
else
- pos = perf_evsel__prev(pos);
+ pos = evsel__prev(pos);
goto browse_hists;
case K_SWITCH_INPUT_DATA:
+ case K_RELOAD:
case 'q':
case CTRL('c'):
goto out;
@@ -3472,7 +3554,7 @@ static bool filter_group_entries(struct ui_browser *browser __maybe_unused,
{
struct evsel *evsel = list_entry(entry, struct evsel, core.node);
- if (symbol_conf.event_group && !perf_evsel__is_group_leader(evsel))
+ if (symbol_conf.event_group && !evsel__is_group_leader(evsel))
return true;
return false;
@@ -3505,7 +3587,7 @@ static int __perf_evlist__tui_browse_hists(struct evlist *evlist,
ui_helpline__push("Press ESC to exit");
evlist__for_each_entry(evlist, pos) {
- const char *ev_name = perf_evsel__name(pos);
+ const char *ev_name = evsel__name(pos);
size_t line_len = strlen(ev_name) + 7;
if (menu.b.width < line_len)
@@ -3540,7 +3622,7 @@ single_entry:
nr_entries = 0;
evlist__for_each_entry(evlist, pos) {
- if (perf_evsel__is_group_leader(pos))
+ if (evsel__is_group_leader(pos))
nr_entries++;
}
@@ -3558,7 +3640,7 @@ static int block_hists_browser__title(struct hist_browser *browser, char *bf,
size_t size)
{
struct hists *hists = evsel__hists(browser->block_evsel);
- const char *evname = perf_evsel__name(browser->block_evsel);
+ const char *evname = evsel__name(browser->block_evsel);
unsigned long nr_samples = hists->stats.nr_events[PERF_RECORD_SAMPLE];
int ret;
diff --git a/tools/perf/ui/gtk/annotate.c b/tools/perf/ui/gtk/annotate.c
index 35f9641bf670..a7dff77f2018 100644
--- a/tools/perf/ui/gtk/annotate.c
+++ b/tools/perf/ui/gtk/annotate.c
@@ -130,7 +130,7 @@ static int perf_gtk__annotate_symbol(GtkWidget *window, struct map_symbol *ms,
gtk_list_store_append(store, &iter);
- if (perf_evsel__is_group_event(evsel)) {
+ if (evsel__is_group_event(evsel)) {
for (i = 0; i < evsel->core.nr_members; i++) {
ret += perf_gtk__get_percent(s + ret,
sizeof(s) - ret,
diff --git a/tools/perf/ui/gtk/hists.c b/tools/perf/ui/gtk/hists.c
index ed1a97b2c4b0..53ef71a1b15d 100644
--- a/tools/perf/ui/gtk/hists.c
+++ b/tools/perf/ui/gtk/hists.c
@@ -635,18 +635,18 @@ int perf_evlist__gtk_browse_hists(struct evlist *evlist,
evlist__for_each_entry(evlist, pos) {
struct hists *hists = evsel__hists(pos);
- const char *evname = perf_evsel__name(pos);
+ const char *evname = evsel__name(pos);
GtkWidget *scrolled_window;
GtkWidget *tab_label;
char buf[512];
size_t size = sizeof(buf);
if (symbol_conf.event_group) {
- if (!perf_evsel__is_group_leader(pos))
+ if (!evsel__is_group_leader(pos))
continue;
if (pos->core.nr_members > 1) {
- perf_evsel__group_desc(pos, buf, size);
+ evsel__group_desc(pos, buf, size);
evname = buf;
}
}
diff --git a/tools/perf/ui/hist.c b/tools/perf/ui/hist.c
index f73675500061..c1f24d004852 100644
--- a/tools/perf/ui/hist.c
+++ b/tools/perf/ui/hist.c
@@ -43,12 +43,12 @@ static int __hpp__fmt(struct perf_hpp *hpp, struct hist_entry *he,
} else
ret = hpp__call_print_fn(hpp, print_fn, fmt, len, get_field(he));
- if (perf_evsel__is_group_event(evsel)) {
+ if (evsel__is_group_event(evsel)) {
int prev_idx, idx_delta;
struct hist_entry *pair;
int nr_members = evsel->core.nr_members;
- prev_idx = perf_evsel__group_idx(evsel);
+ prev_idx = evsel__group_idx(evsel);
list_for_each_entry(pair, &he->pairs.head, pairs.node) {
u64 period = get_field(pair);
@@ -58,7 +58,7 @@ static int __hpp__fmt(struct perf_hpp *hpp, struct hist_entry *he,
continue;
evsel = hists_to_evsel(pair->hists);
- idx_delta = perf_evsel__group_idx(evsel) - prev_idx - 1;
+ idx_delta = evsel__group_idx(evsel) - prev_idx - 1;
while (idx_delta--) {
/*
@@ -82,7 +82,7 @@ static int __hpp__fmt(struct perf_hpp *hpp, struct hist_entry *he,
len, period);
}
- prev_idx = perf_evsel__group_idx(evsel);
+ prev_idx = evsel__group_idx(evsel);
}
idx_delta = nr_members - prev_idx - 1;
@@ -151,40 +151,103 @@ static int field_cmp(u64 field_a, u64 field_b)
return 0;
}
+static int hist_entry__new_pair(struct hist_entry *a, struct hist_entry *b,
+ hpp_field_fn get_field, int nr_members,
+ u64 **fields_a, u64 **fields_b)
+{
+ u64 *fa = calloc(nr_members, sizeof(*fa)),
+ *fb = calloc(nr_members, sizeof(*fb));
+ struct hist_entry *pair;
+
+ if (!fa || !fb)
+ goto out_free;
+
+ list_for_each_entry(pair, &a->pairs.head, pairs.node) {
+ struct evsel *evsel = hists_to_evsel(pair->hists);
+ fa[evsel__group_idx(evsel)] = get_field(pair);
+ }
+
+ list_for_each_entry(pair, &b->pairs.head, pairs.node) {
+ struct evsel *evsel = hists_to_evsel(pair->hists);
+ fb[evsel__group_idx(evsel)] = get_field(pair);
+ }
+
+ *fields_a = fa;
+ *fields_b = fb;
+ return 0;
+out_free:
+ free(fa);
+ free(fb);
+ *fields_a = *fields_b = NULL;
+ return -1;
+}
+
+static int __hpp__group_sort_idx(struct hist_entry *a, struct hist_entry *b,
+ hpp_field_fn get_field, int idx)
+{
+ struct evsel *evsel = hists_to_evsel(a->hists);
+ u64 *fields_a, *fields_b;
+ int cmp, nr_members, ret, i;
+
+ cmp = field_cmp(get_field(a), get_field(b));
+ if (!evsel__is_group_event(evsel))
+ return cmp;
+
+ nr_members = evsel->core.nr_members;
+ if (idx < 1 || idx >= nr_members)
+ return cmp;
+
+ ret = hist_entry__new_pair(a, b, get_field, nr_members, &fields_a, &fields_b);
+ if (ret) {
+ ret = cmp;
+ goto out;
+ }
+
+ ret = field_cmp(fields_a[idx], fields_b[idx]);
+ if (ret)
+ goto out;
+
+ for (i = 1; i < nr_members; i++) {
+ if (i != idx) {
+ ret = field_cmp(fields_a[i], fields_b[i]);
+ if (ret)
+ goto out;
+ }
+ }
+
+out:
+ free(fields_a);
+ free(fields_b);
+
+ return ret;
+}
+
static int __hpp__sort(struct hist_entry *a, struct hist_entry *b,
hpp_field_fn get_field)
{
s64 ret;
int i, nr_members;
struct evsel *evsel;
- struct hist_entry *pair;
u64 *fields_a, *fields_b;
+ if (symbol_conf.group_sort_idx && symbol_conf.event_group) {
+ return __hpp__group_sort_idx(a, b, get_field,
+ symbol_conf.group_sort_idx);
+ }
+
ret = field_cmp(get_field(a), get_field(b));
if (ret || !symbol_conf.event_group)
return ret;
evsel = hists_to_evsel(a->hists);
- if (!perf_evsel__is_group_event(evsel))
+ if (!evsel__is_group_event(evsel))
return ret;
nr_members = evsel->core.nr_members;
- fields_a = calloc(nr_members, sizeof(*fields_a));
- fields_b = calloc(nr_members, sizeof(*fields_b));
-
- if (!fields_a || !fields_b)
+ i = hist_entry__new_pair(a, b, get_field, nr_members, &fields_a, &fields_b);
+ if (i)
goto out;
- list_for_each_entry(pair, &a->pairs.head, pairs.node) {
- evsel = hists_to_evsel(pair->hists);
- fields_a[perf_evsel__group_idx(evsel)] = get_field(pair);
- }
-
- list_for_each_entry(pair, &b->pairs.head, pairs.node) {
- evsel = hists_to_evsel(pair->hists);
- fields_b[perf_evsel__group_idx(evsel)] = get_field(pair);
- }
-
for (i = 1; i < nr_members; i++) {
ret = field_cmp(fields_a[i], fields_b[i]);
if (ret)
diff --git a/tools/perf/ui/keysyms.h b/tools/perf/ui/keysyms.h
index fbfac29077f2..04cc4e5c031f 100644
--- a/tools/perf/ui/keysyms.h
+++ b/tools/perf/ui/keysyms.h
@@ -25,5 +25,6 @@
#define K_ERROR -2
#define K_RESIZE -3
#define K_SWITCH_INPUT_DATA -4
+#define K_RELOAD -5
#endif /* _PERF_KEYSYMS_H_ */
diff --git a/tools/perf/util/Build b/tools/perf/util/Build
index 07da6c790b63..8d18380ecd10 100644
--- a/tools/perf/util/Build
+++ b/tools/perf/util/Build
@@ -10,6 +10,7 @@ perf-y += db-export.o
perf-y += env.o
perf-y += event.o
perf-y += evlist.o
+perf-y += sideband_evlist.o
perf-y += evsel.o
perf-y += evsel_fprintf.o
perf-y += perf_event_attr_fprintf.o
@@ -88,6 +89,7 @@ perf-y += counts.o
perf-y += stat.o
perf-y += stat-shadow.o
perf-y += stat-display.o
+perf-y += perf_api_probe.o
perf-y += record.o
perf-y += srcline.o
perf-y += srccode.o
@@ -104,7 +106,7 @@ perf-$(CONFIG_AUXTRACE) += intel-pt-decoder/
perf-$(CONFIG_AUXTRACE) += intel-pt.o
perf-$(CONFIG_AUXTRACE) += intel-bts.o
perf-$(CONFIG_AUXTRACE) += arm-spe.o
-perf-$(CONFIG_AUXTRACE) += arm-spe-pkt-decoder.o
+perf-$(CONFIG_AUXTRACE) += arm-spe-decoder/
perf-$(CONFIG_AUXTRACE) += s390-cpumsf.o
ifdef CONFIG_LIBOPENCSD
@@ -121,7 +123,9 @@ perf-y += mem-events.o
perf-y += vsprintf.o
perf-y += units.o
perf-y += time-utils.o
+perf-y += expr-flex.o
perf-y += expr-bison.o
+perf-y += expr.o
perf-y += branch.o
perf-y += mem2node.o
@@ -132,6 +136,10 @@ perf-$(CONFIG_LIBELF) += symbol-elf.o
perf-$(CONFIG_LIBELF) += probe-file.o
perf-$(CONFIG_LIBELF) += probe-event.o
+ifndef CONFIG_LIBBPF
+perf-y += hashmap.o
+endif
+
ifndef CONFIG_LIBELF
perf-y += symbol-minimal.o
endif
@@ -175,6 +183,8 @@ perf-$(CONFIG_LIBBPF) += bpf-event.o
perf-$(CONFIG_CXX) += c++/
+perf-$(CONFIG_LIBPFM4) += pfm.o
+
CFLAGS_config.o += -DETC_PERFCONFIG="BUILD_STR($(ETC_PERFCONFIG_SQ))"
CFLAGS_llvm-utils.o += -DPERF_INCLUDE_DIR="BUILD_STR($(perf_include_dir_SQ))"
@@ -189,9 +199,13 @@ $(OUTPUT)util/parse-events-bison.c: util/parse-events.y
$(call rule_mkdir)
$(Q)$(call echo-cmd,bison)$(BISON) -v util/parse-events.y -d $(PARSER_DEBUG_BISON) -o $@ -p parse_events_
+$(OUTPUT)util/expr-flex.c: util/expr.l $(OUTPUT)util/expr-bison.c
+ $(call rule_mkdir)
+ $(Q)$(call echo-cmd,flex)$(FLEX) -o $@ --header-file=$(OUTPUT)util/expr-flex.h $(PARSER_DEBUG_FLEX) util/expr.l
+
$(OUTPUT)util/expr-bison.c: util/expr.y
$(call rule_mkdir)
- $(Q)$(call echo-cmd,bison)$(BISON) -v util/expr.y -d $(PARSER_DEBUG_BISON) -o $@ -p expr__
+ $(Q)$(call echo-cmd,bison)$(BISON) -v util/expr.y -d $(PARSER_DEBUG_BISON) -o $@ -p expr_
$(OUTPUT)util/pmu-flex.c: util/pmu.l $(OUTPUT)util/pmu-bison.c
$(call rule_mkdir)
@@ -203,12 +217,14 @@ $(OUTPUT)util/pmu-bison.c: util/pmu.y
CFLAGS_parse-events-flex.o += -w
CFLAGS_pmu-flex.o += -w
+CFLAGS_expr-flex.o += -w
CFLAGS_parse-events-bison.o += -DYYENABLE_NLS=0 -w
CFLAGS_pmu-bison.o += -DYYENABLE_NLS=0 -DYYLTYPE_IS_TRIVIAL=0 -w
CFLAGS_expr-bison.o += -DYYENABLE_NLS=0 -DYYLTYPE_IS_TRIVIAL=0 -w
$(OUTPUT)util/parse-events.o: $(OUTPUT)util/parse-events-flex.c $(OUTPUT)util/parse-events-bison.c
$(OUTPUT)util/pmu.o: $(OUTPUT)util/pmu-flex.c $(OUTPUT)util/pmu-bison.c
+$(OUTPUT)util/expr.o: $(OUTPUT)util/expr-flex.c $(OUTPUT)util/expr-bison.c
CFLAGS_bitmap.o += -Wno-unused-parameter -DETC_PERFCONFIG="BUILD_STR($(ETC_PERFCONFIG_SQ))"
CFLAGS_find_bit.o += -Wno-unused-parameter -DETC_PERFCONFIG="BUILD_STR($(ETC_PERFCONFIG_SQ))"
@@ -216,6 +232,7 @@ CFLAGS_rbtree.o += -Wno-unused-parameter -DETC_PERFCONFIG="BUILD_STR($(ET
CFLAGS_libstring.o += -Wno-unused-parameter -DETC_PERFCONFIG="BUILD_STR($(ETC_PERFCONFIG_SQ))"
CFLAGS_hweight.o += -Wno-unused-parameter -DETC_PERFCONFIG="BUILD_STR($(ETC_PERFCONFIG_SQ))"
CFLAGS_parse-events.o += -Wno-redundant-decls
+CFLAGS_expr.o += -Wno-redundant-decls
CFLAGS_header.o += -include $(OUTPUT)PERF-VERSION-FILE
$(OUTPUT)util/kallsyms.o: ../lib/symbol/kallsyms.c FORCE
diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c
index 0ea95be84b3b..76bfb4a9d94e 100644
--- a/tools/perf/util/annotate.c
+++ b/tools/perf/util/annotate.c
@@ -41,7 +41,6 @@
#include <linux/bitops.h>
#include <linux/kernel.h>
#include <linux/string.h>
-#include <bpf/libbpf.h>
#include <subcmd/parse-options.h>
#include <subcmd/run-command.h>
@@ -1191,7 +1190,7 @@ static struct disasm_line *disasm_line__new(struct annotate_args *args)
struct disasm_line *dl = NULL;
int nr = 1;
- if (perf_evsel__is_group_event(args->evsel))
+ if (evsel__is_group_event(args->evsel))
nr = args->evsel->core.nr_members;
dl = zalloc(disasm_line_size(nr));
@@ -1437,7 +1436,7 @@ annotation_line__print(struct annotation_line *al, struct symbol *sym, u64 start
if (queue)
return -1;
- if (perf_evsel__is_group_event(evsel))
+ if (evsel__is_group_event(evsel))
width *= evsel->core.nr_members;
if (!*al->line)
@@ -1821,6 +1820,24 @@ static int symbol__disassemble_bpf(struct symbol *sym __maybe_unused,
}
#endif // defined(HAVE_LIBBFD_SUPPORT) && defined(HAVE_LIBBPF_SUPPORT)
+static int
+symbol__disassemble_bpf_image(struct symbol *sym,
+ struct annotate_args *args)
+{
+ struct annotation *notes = symbol__annotation(sym);
+ struct disasm_line *dl;
+
+ args->offset = -1;
+ args->line = strdup("to be implemented");
+ args->line_nr = 0;
+ dl = disasm_line__new(args);
+ if (dl)
+ annotation_line__add(&dl->al, &notes->src->source);
+
+ free(args->line);
+ return 0;
+}
+
/*
* Possibly create a new version of line with tabs expanded. Returns the
* existing or new line, storage is updated if a new line is allocated. If
@@ -1920,6 +1937,8 @@ static int symbol__disassemble(struct symbol *sym, struct annotate_args *args)
if (dso->binary_type == DSO_BINARY_TYPE__BPF_PROG_INFO) {
return symbol__disassemble_bpf(sym, args);
+ } else if (dso->binary_type == DSO_BINARY_TYPE__BPF_IMAGE) {
+ return symbol__disassemble_bpf_image(sym, args);
} else if (dso__is_kcore(dso)) {
kce.kcore_filename = symfs_filename;
kce.addr = map__rip_2objdump(map, sym->start);
@@ -2136,7 +2155,7 @@ int symbol__annotate(struct map_symbol *ms, struct evsel *evsel,
.evsel = evsel,
.options = options,
};
- struct perf_env *env = perf_evsel__env(evsel);
+ struct perf_env *env = evsel__env(evsel);
const char *arch_name = perf_env__arch(env);
struct arch *arch;
int err;
@@ -2324,7 +2343,7 @@ int symbol__annotate_printf(struct map_symbol *ms, struct evsel *evsel,
struct dso *dso = map->dso;
char *filename;
const char *d_filename;
- const char *evsel_name = perf_evsel__name(evsel);
+ const char *evsel_name = evsel__name(evsel);
struct annotation *notes = symbol__annotation(sym);
struct sym_hist *h = annotation__histogram(notes, evsel->idx);
struct annotation_line *pos, *queue = NULL;
@@ -2348,9 +2367,9 @@ int symbol__annotate_printf(struct map_symbol *ms, struct evsel *evsel,
len = symbol__size(sym);
- if (perf_evsel__is_group_event(evsel)) {
+ if (evsel__is_group_event(evsel)) {
width *= evsel->core.nr_members;
- perf_evsel__group_desc(evsel, buf, sizeof(buf));
+ evsel__group_desc(evsel, buf, sizeof(buf));
evsel_name = buf;
}
@@ -2485,7 +2504,7 @@ static int symbol__annotate_fprintf2(struct symbol *sym, FILE *fp,
int map_symbol__annotation_dump(struct map_symbol *ms, struct evsel *evsel,
struct annotation_options *opts)
{
- const char *ev_name = perf_evsel__name(evsel);
+ const char *ev_name = evsel__name(evsel);
char buf[1024];
char *filename;
int err = -1;
@@ -2498,8 +2517,8 @@ int map_symbol__annotation_dump(struct map_symbol *ms, struct evsel *evsel,
if (fp == NULL)
goto out_free_filename;
- if (perf_evsel__is_group_event(evsel)) {
- perf_evsel__group_desc(evsel, buf, sizeof(buf));
+ if (evsel__is_group_event(evsel)) {
+ evsel__group_desc(evsel, buf, sizeof(buf));
ev_name = buf;
}
@@ -2611,8 +2630,6 @@ void annotation__mark_jump_targets(struct annotation *notes, struct symbol *sym)
if (++al->jump_sources > notes->max_jump_sources)
notes->max_jump_sources = al->jump_sources;
-
- ++notes->nr_jumps;
}
}
@@ -3046,7 +3063,7 @@ int symbol__annotate2(struct map_symbol *ms, struct evsel *evsel,
if (notes->offsets == NULL)
return ENOMEM;
- if (perf_evsel__is_group_event(evsel))
+ if (evsel__is_group_event(evsel))
nr_pcnt = evsel->core.nr_members;
err = symbol__annotate(ms, evsel, options, parch);
diff --git a/tools/perf/util/annotate.h b/tools/perf/util/annotate.h
index 001258601a37..0a0cd4f32175 100644
--- a/tools/perf/util/annotate.h
+++ b/tools/perf/util/annotate.h
@@ -74,6 +74,7 @@ bool ins__is_fused(struct arch *arch, const char *ins1, const char *ins2);
#define ANNOTATION__CYCLES_WIDTH 6
#define ANNOTATION__MINMAX_CYCLES_WIDTH 19
#define ANNOTATION__AVG_IPC_WIDTH 36
+#define ANNOTATION_DUMMY_LEN 256
struct annotation_options {
bool hide_src_code,
@@ -143,7 +144,7 @@ struct annotation_line {
u32 idx;
int idx_asm;
int data_nr;
- struct annotation_data data[0];
+ struct annotation_data data[];
};
struct disasm_line {
@@ -226,7 +227,7 @@ void symbol__calc_percent(struct symbol *sym, struct evsel *evsel);
struct sym_hist {
u64 nr_samples;
u64 period;
- struct sym_hist_entry addr[0];
+ struct sym_hist_entry addr[];
};
struct cyc_hist {
@@ -279,7 +280,6 @@ struct annotation {
struct annotation_options *options;
struct annotation_line **offsets;
int nr_events;
- int nr_jumps;
int max_jump_sources;
int nr_entries;
int nr_asm_entries;
diff --git a/tools/perf/util/arm-spe-decoder/Build b/tools/perf/util/arm-spe-decoder/Build
new file mode 100644
index 000000000000..f8dae13fc876
--- /dev/null
+++ b/tools/perf/util/arm-spe-decoder/Build
@@ -0,0 +1 @@
+perf-$(CONFIG_AUXTRACE) += arm-spe-pkt-decoder.o arm-spe-decoder.o
diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c
new file mode 100644
index 000000000000..302a14d0aca9
--- /dev/null
+++ b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c
@@ -0,0 +1,219 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * arm_spe_decoder.c: ARM SPE support
+ */
+
+#ifndef _GNU_SOURCE
+#define _GNU_SOURCE
+#endif
+#include <errno.h>
+#include <inttypes.h>
+#include <stdbool.h>
+#include <string.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <linux/compiler.h>
+#include <linux/zalloc.h>
+
+#include "../auxtrace.h"
+#include "../debug.h"
+#include "../util.h"
+
+#include "arm-spe-decoder.h"
+
+#ifndef BIT
+#define BIT(n) (1UL << (n))
+#endif
+
+static u64 arm_spe_calc_ip(int index, u64 payload)
+{
+ u8 *addr = (u8 *)&payload;
+ int ns, el;
+
+ /* Instruction virtual address or Branch target address */
+ if (index == SPE_ADDR_PKT_HDR_INDEX_INS ||
+ index == SPE_ADDR_PKT_HDR_INDEX_BRANCH) {
+ ns = addr[7] & SPE_ADDR_PKT_NS;
+ el = (addr[7] & SPE_ADDR_PKT_EL_MASK) >> SPE_ADDR_PKT_EL_OFFSET;
+
+ /* Fill highest byte for EL1 or EL2 (VHE) mode */
+ if (ns && (el == SPE_ADDR_PKT_EL1 || el == SPE_ADDR_PKT_EL2))
+ addr[7] = 0xff;
+ /* Clean highest byte for other cases */
+ else
+ addr[7] = 0x0;
+
+ /* Data access virtual address */
+ } else if (index == SPE_ADDR_PKT_HDR_INDEX_DATA_VIRT) {
+
+ /* Fill highest byte if bits [48..55] is 0xff */
+ if (addr[6] == 0xff)
+ addr[7] = 0xff;
+ /* Otherwise, cleanup tags */
+ else
+ addr[7] = 0x0;
+
+ /* Data access physical address */
+ } else if (index == SPE_ADDR_PKT_HDR_INDEX_DATA_PHYS) {
+ /* Cleanup byte 7 */
+ addr[7] = 0x0;
+ } else {
+ pr_err("unsupported address packet index: 0x%x\n", index);
+ }
+
+ return payload;
+}
+
+struct arm_spe_decoder *arm_spe_decoder_new(struct arm_spe_params *params)
+{
+ struct arm_spe_decoder *decoder;
+
+ if (!params->get_trace)
+ return NULL;
+
+ decoder = zalloc(sizeof(struct arm_spe_decoder));
+ if (!decoder)
+ return NULL;
+
+ decoder->get_trace = params->get_trace;
+ decoder->data = params->data;
+
+ return decoder;
+}
+
+void arm_spe_decoder_free(struct arm_spe_decoder *decoder)
+{
+ free(decoder);
+}
+
+static int arm_spe_get_data(struct arm_spe_decoder *decoder)
+{
+ struct arm_spe_buffer buffer = { .buf = 0, };
+ int ret;
+
+ pr_debug("Getting more data\n");
+ ret = decoder->get_trace(&buffer, decoder->data);
+ if (ret < 0)
+ return ret;
+
+ decoder->buf = buffer.buf;
+ decoder->len = buffer.len;
+
+ if (!decoder->len)
+ pr_debug("No more data\n");
+
+ return decoder->len;
+}
+
+static int arm_spe_get_next_packet(struct arm_spe_decoder *decoder)
+{
+ int ret;
+
+ do {
+ if (!decoder->len) {
+ ret = arm_spe_get_data(decoder);
+
+ /* Failed to read out trace data */
+ if (ret <= 0)
+ return ret;
+ }
+
+ ret = arm_spe_get_packet(decoder->buf, decoder->len,
+ &decoder->packet);
+ if (ret <= 0) {
+ /* Move forward for 1 byte */
+ decoder->buf += 1;
+ decoder->len -= 1;
+ return -EBADMSG;
+ }
+
+ decoder->buf += ret;
+ decoder->len -= ret;
+ } while (decoder->packet.type == ARM_SPE_PAD);
+
+ return 1;
+}
+
+static int arm_spe_read_record(struct arm_spe_decoder *decoder)
+{
+ int err;
+ int idx;
+ u64 payload, ip;
+
+ memset(&decoder->record, 0x0, sizeof(decoder->record));
+
+ while (1) {
+ err = arm_spe_get_next_packet(decoder);
+ if (err <= 0)
+ return err;
+
+ idx = decoder->packet.index;
+ payload = decoder->packet.payload;
+
+ switch (decoder->packet.type) {
+ case ARM_SPE_TIMESTAMP:
+ decoder->record.timestamp = payload;
+ return 1;
+ case ARM_SPE_END:
+ return 1;
+ case ARM_SPE_ADDRESS:
+ ip = arm_spe_calc_ip(idx, payload);
+ if (idx == SPE_ADDR_PKT_HDR_INDEX_INS)
+ decoder->record.from_ip = ip;
+ else if (idx == SPE_ADDR_PKT_HDR_INDEX_BRANCH)
+ decoder->record.to_ip = ip;
+ break;
+ case ARM_SPE_COUNTER:
+ break;
+ case ARM_SPE_CONTEXT:
+ break;
+ case ARM_SPE_OP_TYPE:
+ break;
+ case ARM_SPE_EVENTS:
+ if (payload & BIT(EV_L1D_REFILL))
+ decoder->record.type |= ARM_SPE_L1D_MISS;
+
+ if (payload & BIT(EV_L1D_ACCESS))
+ decoder->record.type |= ARM_SPE_L1D_ACCESS;
+
+ if (payload & BIT(EV_TLB_WALK))
+ decoder->record.type |= ARM_SPE_TLB_MISS;
+
+ if (payload & BIT(EV_TLB_ACCESS))
+ decoder->record.type |= ARM_SPE_TLB_ACCESS;
+
+ if ((idx == 1 || idx == 2 || idx == 3) &&
+ (payload & BIT(EV_LLC_MISS)))
+ decoder->record.type |= ARM_SPE_LLC_MISS;
+
+ if ((idx == 1 || idx == 2 || idx == 3) &&
+ (payload & BIT(EV_LLC_ACCESS)))
+ decoder->record.type |= ARM_SPE_LLC_ACCESS;
+
+ if ((idx == 1 || idx == 2 || idx == 3) &&
+ (payload & BIT(EV_REMOTE_ACCESS)))
+ decoder->record.type |= ARM_SPE_REMOTE_ACCESS;
+
+ if (payload & BIT(EV_MISPRED))
+ decoder->record.type |= ARM_SPE_BRANCH_MISS;
+
+ break;
+ case ARM_SPE_DATA_SOURCE:
+ break;
+ case ARM_SPE_BAD:
+ break;
+ case ARM_SPE_PAD:
+ break;
+ default:
+ pr_err("Get packet error!\n");
+ return -1;
+ }
+ }
+
+ return 0;
+}
+
+int arm_spe_decode(struct arm_spe_decoder *decoder)
+{
+ return arm_spe_read_record(decoder);
+}
diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h
new file mode 100644
index 000000000000..a5111a8d4360
--- /dev/null
+++ b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h
@@ -0,0 +1,82 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * arm_spe_decoder.h: Arm Statistical Profiling Extensions support
+ * Copyright (c) 2019-2020, Arm Ltd.
+ */
+
+#ifndef INCLUDE__ARM_SPE_DECODER_H__
+#define INCLUDE__ARM_SPE_DECODER_H__
+
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdint.h>
+
+#include "arm-spe-pkt-decoder.h"
+
+enum arm_spe_events {
+ EV_EXCEPTION_GEN = 0,
+ EV_RETIRED = 1,
+ EV_L1D_ACCESS = 2,
+ EV_L1D_REFILL = 3,
+ EV_TLB_ACCESS = 4,
+ EV_TLB_WALK = 5,
+ EV_NOT_TAKEN = 6,
+ EV_MISPRED = 7,
+ EV_LLC_ACCESS = 8,
+ EV_LLC_MISS = 9,
+ EV_REMOTE_ACCESS = 10,
+ EV_ALIGNMENT = 11,
+ EV_PARTIAL_PREDICATE = 17,
+ EV_EMPTY_PREDICATE = 18,
+};
+
+enum arm_spe_sample_type {
+ ARM_SPE_L1D_ACCESS = 1 << 0,
+ ARM_SPE_L1D_MISS = 1 << 1,
+ ARM_SPE_LLC_ACCESS = 1 << 2,
+ ARM_SPE_LLC_MISS = 1 << 3,
+ ARM_SPE_TLB_ACCESS = 1 << 4,
+ ARM_SPE_TLB_MISS = 1 << 5,
+ ARM_SPE_BRANCH_MISS = 1 << 6,
+ ARM_SPE_REMOTE_ACCESS = 1 << 7,
+};
+
+struct arm_spe_record {
+ enum arm_spe_sample_type type;
+ int err;
+ u64 from_ip;
+ u64 to_ip;
+ u64 timestamp;
+};
+
+struct arm_spe_insn;
+
+struct arm_spe_buffer {
+ const unsigned char *buf;
+ size_t len;
+ u64 offset;
+ u64 trace_nr;
+};
+
+struct arm_spe_params {
+ int (*get_trace)(struct arm_spe_buffer *buffer, void *data);
+ void *data;
+};
+
+struct arm_spe_decoder {
+ int (*get_trace)(struct arm_spe_buffer *buffer, void *data);
+ void *data;
+ struct arm_spe_record record;
+
+ const unsigned char *buf;
+ size_t len;
+
+ struct arm_spe_pkt packet;
+};
+
+struct arm_spe_decoder *arm_spe_decoder_new(struct arm_spe_params *params);
+void arm_spe_decoder_free(struct arm_spe_decoder *decoder);
+
+int arm_spe_decode(struct arm_spe_decoder *decoder);
+
+#endif
diff --git a/tools/perf/util/arm-spe-pkt-decoder.c b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c
index b94001b756c7..b94001b756c7 100644
--- a/tools/perf/util/arm-spe-pkt-decoder.c
+++ b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c
diff --git a/tools/perf/util/arm-spe-pkt-decoder.h b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.h
index d786ef65113f..4c870521b8eb 100644
--- a/tools/perf/util/arm-spe-pkt-decoder.h
+++ b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.h
@@ -15,6 +15,8 @@
#define ARM_SPE_NEED_MORE_BYTES -1
#define ARM_SPE_BAD_PACKET -2
+#define ARM_SPE_PKT_MAX_SZ 16
+
enum arm_spe_pkt_type {
ARM_SPE_BAD,
ARM_SPE_PAD,
@@ -34,6 +36,20 @@ struct arm_spe_pkt {
uint64_t payload;
};
+#define SPE_ADDR_PKT_HDR_INDEX_INS (0x0)
+#define SPE_ADDR_PKT_HDR_INDEX_BRANCH (0x1)
+#define SPE_ADDR_PKT_HDR_INDEX_DATA_VIRT (0x2)
+#define SPE_ADDR_PKT_HDR_INDEX_DATA_PHYS (0x3)
+
+#define SPE_ADDR_PKT_NS BIT(7)
+#define SPE_ADDR_PKT_CH BIT(6)
+#define SPE_ADDR_PKT_EL_OFFSET (5)
+#define SPE_ADDR_PKT_EL_MASK (0x3 << SPE_ADDR_PKT_EL_OFFSET)
+#define SPE_ADDR_PKT_EL0 (0)
+#define SPE_ADDR_PKT_EL1 (1)
+#define SPE_ADDR_PKT_EL2 (2)
+#define SPE_ADDR_PKT_EL3 (3)
+
const char *arm_spe_pkt_name(enum arm_spe_pkt_type);
int arm_spe_get_packet(const unsigned char *buf, size_t len,
diff --git a/tools/perf/util/arm-spe.c b/tools/perf/util/arm-spe.c
index 53be12b23ff4..3882a5360ada 100644
--- a/tools/perf/util/arm-spe.c
+++ b/tools/perf/util/arm-spe.c
@@ -4,46 +4,85 @@
* Copyright (c) 2017-2018, Arm Ltd.
*/
+#include <byteswap.h>
#include <endian.h>
#include <errno.h>
-#include <byteswap.h>
#include <inttypes.h>
-#include <unistd.h>
-#include <stdlib.h>
-#include <linux/kernel.h>
-#include <linux/types.h>
#include <linux/bitops.h>
+#include <linux/kernel.h>
#include <linux/log2.h>
+#include <linux/types.h>
#include <linux/zalloc.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include "auxtrace.h"
#include "color.h"
+#include "debug.h"
+#include "evlist.h"
#include "evsel.h"
#include "machine.h"
#include "session.h"
-#include "debug.h"
-#include "auxtrace.h"
+#include "symbol.h"
+#include "thread.h"
+#include "thread-stack.h"
+#include "tool.h"
+#include "util/synthetic-events.h"
+
#include "arm-spe.h"
-#include "arm-spe-pkt-decoder.h"
+#include "arm-spe-decoder/arm-spe-decoder.h"
+#include "arm-spe-decoder/arm-spe-pkt-decoder.h"
+
+#define MAX_TIMESTAMP (~0ULL)
struct arm_spe {
struct auxtrace auxtrace;
struct auxtrace_queues queues;
struct auxtrace_heap heap;
+ struct itrace_synth_opts synth_opts;
u32 auxtrace_type;
struct perf_session *session;
struct machine *machine;
u32 pmu_type;
+
+ u8 timeless_decoding;
+ u8 data_queued;
+
+ u8 sample_flc;
+ u8 sample_llc;
+ u8 sample_tlb;
+ u8 sample_branch;
+ u8 sample_remote_access;
+
+ u64 l1d_miss_id;
+ u64 l1d_access_id;
+ u64 llc_miss_id;
+ u64 llc_access_id;
+ u64 tlb_miss_id;
+ u64 tlb_access_id;
+ u64 branch_miss_id;
+ u64 remote_access_id;
+
+ u64 kernel_start;
+
+ unsigned long num_events;
};
struct arm_spe_queue {
- struct arm_spe *spe;
- unsigned int queue_nr;
- struct auxtrace_buffer *buffer;
- bool on_heap;
- bool done;
- pid_t pid;
- pid_t tid;
- int cpu;
+ struct arm_spe *spe;
+ unsigned int queue_nr;
+ struct auxtrace_buffer *buffer;
+ struct auxtrace_buffer *old_buffer;
+ union perf_event *event_buf;
+ bool on_heap;
+ bool done;
+ pid_t pid;
+ pid_t tid;
+ int cpu;
+ struct arm_spe_decoder *decoder;
+ u64 time;
+ u64 timestamp;
+ struct thread *thread;
};
static void arm_spe_dump(struct arm_spe *spe __maybe_unused,
@@ -92,44 +131,520 @@ static void arm_spe_dump_event(struct arm_spe *spe, unsigned char *buf,
arm_spe_dump(spe, buf, len);
}
-static int arm_spe_process_event(struct perf_session *session __maybe_unused,
- union perf_event *event __maybe_unused,
- struct perf_sample *sample __maybe_unused,
- struct perf_tool *tool __maybe_unused)
+static int arm_spe_get_trace(struct arm_spe_buffer *b, void *data)
+{
+ struct arm_spe_queue *speq = data;
+ struct auxtrace_buffer *buffer = speq->buffer;
+ struct auxtrace_buffer *old_buffer = speq->old_buffer;
+ struct auxtrace_queue *queue;
+
+ queue = &speq->spe->queues.queue_array[speq->queue_nr];
+
+ buffer = auxtrace_buffer__next(queue, buffer);
+ /* If no more data, drop the previous auxtrace_buffer and return */
+ if (!buffer) {
+ if (old_buffer)
+ auxtrace_buffer__drop_data(old_buffer);
+ b->len = 0;
+ return 0;
+ }
+
+ speq->buffer = buffer;
+
+ /* If the aux_buffer doesn't have data associated, try to load it */
+ if (!buffer->data) {
+ /* get the file desc associated with the perf data file */
+ int fd = perf_data__fd(speq->spe->session->data);
+
+ buffer->data = auxtrace_buffer__get_data(buffer, fd);
+ if (!buffer->data)
+ return -ENOMEM;
+ }
+
+ b->len = buffer->size;
+ b->buf = buffer->data;
+
+ if (b->len) {
+ if (old_buffer)
+ auxtrace_buffer__drop_data(old_buffer);
+ speq->old_buffer = buffer;
+ } else {
+ auxtrace_buffer__drop_data(buffer);
+ return arm_spe_get_trace(b, data);
+ }
+
+ return 0;
+}
+
+static struct arm_spe_queue *arm_spe__alloc_queue(struct arm_spe *spe,
+ unsigned int queue_nr)
+{
+ struct arm_spe_params params = { .get_trace = 0, };
+ struct arm_spe_queue *speq;
+
+ speq = zalloc(sizeof(*speq));
+ if (!speq)
+ return NULL;
+
+ speq->event_buf = malloc(PERF_SAMPLE_MAX_SIZE);
+ if (!speq->event_buf)
+ goto out_free;
+
+ speq->spe = spe;
+ speq->queue_nr = queue_nr;
+ speq->pid = -1;
+ speq->tid = -1;
+ speq->cpu = -1;
+
+ /* params set */
+ params.get_trace = arm_spe_get_trace;
+ params.data = speq;
+
+ /* create new decoder */
+ speq->decoder = arm_spe_decoder_new(&params);
+ if (!speq->decoder)
+ goto out_free;
+
+ return speq;
+
+out_free:
+ zfree(&speq->event_buf);
+ free(speq);
+
+ return NULL;
+}
+
+static inline u8 arm_spe_cpumode(struct arm_spe *spe, u64 ip)
+{
+ return ip >= spe->kernel_start ?
+ PERF_RECORD_MISC_KERNEL :
+ PERF_RECORD_MISC_USER;
+}
+
+static void arm_spe_prep_sample(struct arm_spe *spe,
+ struct arm_spe_queue *speq,
+ union perf_event *event,
+ struct perf_sample *sample)
+{
+ struct arm_spe_record *record = &speq->decoder->record;
+
+ if (!spe->timeless_decoding)
+ sample->time = speq->timestamp;
+
+ sample->ip = record->from_ip;
+ sample->cpumode = arm_spe_cpumode(spe, sample->ip);
+ sample->pid = speq->pid;
+ sample->tid = speq->tid;
+ sample->addr = record->to_ip;
+ sample->period = 1;
+ sample->cpu = speq->cpu;
+
+ event->sample.header.type = PERF_RECORD_SAMPLE;
+ event->sample.header.misc = sample->cpumode;
+ event->sample.header.size = sizeof(struct perf_event_header);
+}
+
+static inline int
+arm_spe_deliver_synth_event(struct arm_spe *spe,
+ struct arm_spe_queue *speq __maybe_unused,
+ union perf_event *event,
+ struct perf_sample *sample)
+{
+ int ret;
+
+ ret = perf_session__deliver_synth_event(spe->session, event, sample);
+ if (ret)
+ pr_err("ARM SPE: failed to deliver event, error %d\n", ret);
+
+ return ret;
+}
+
+static int
+arm_spe_synth_spe_events_sample(struct arm_spe_queue *speq,
+ u64 spe_events_id)
+{
+ struct arm_spe *spe = speq->spe;
+ union perf_event *event = speq->event_buf;
+ struct perf_sample sample = { .ip = 0, };
+
+ arm_spe_prep_sample(spe, speq, event, &sample);
+
+ sample.id = spe_events_id;
+ sample.stream_id = spe_events_id;
+
+ return arm_spe_deliver_synth_event(spe, speq, event, &sample);
+}
+
+static int arm_spe_sample(struct arm_spe_queue *speq)
+{
+ const struct arm_spe_record *record = &speq->decoder->record;
+ struct arm_spe *spe = speq->spe;
+ int err;
+
+ if (spe->sample_flc) {
+ if (record->type & ARM_SPE_L1D_MISS) {
+ err = arm_spe_synth_spe_events_sample(
+ speq, spe->l1d_miss_id);
+ if (err)
+ return err;
+ }
+
+ if (record->type & ARM_SPE_L1D_ACCESS) {
+ err = arm_spe_synth_spe_events_sample(
+ speq, spe->l1d_access_id);
+ if (err)
+ return err;
+ }
+ }
+
+ if (spe->sample_llc) {
+ if (record->type & ARM_SPE_LLC_MISS) {
+ err = arm_spe_synth_spe_events_sample(
+ speq, spe->llc_miss_id);
+ if (err)
+ return err;
+ }
+
+ if (record->type & ARM_SPE_LLC_ACCESS) {
+ err = arm_spe_synth_spe_events_sample(
+ speq, spe->llc_access_id);
+ if (err)
+ return err;
+ }
+ }
+
+ if (spe->sample_tlb) {
+ if (record->type & ARM_SPE_TLB_MISS) {
+ err = arm_spe_synth_spe_events_sample(
+ speq, spe->tlb_miss_id);
+ if (err)
+ return err;
+ }
+
+ if (record->type & ARM_SPE_TLB_ACCESS) {
+ err = arm_spe_synth_spe_events_sample(
+ speq, spe->tlb_access_id);
+ if (err)
+ return err;
+ }
+ }
+
+ if (spe->sample_branch && (record->type & ARM_SPE_BRANCH_MISS)) {
+ err = arm_spe_synth_spe_events_sample(speq,
+ spe->branch_miss_id);
+ if (err)
+ return err;
+ }
+
+ if (spe->sample_remote_access &&
+ (record->type & ARM_SPE_REMOTE_ACCESS)) {
+ err = arm_spe_synth_spe_events_sample(speq,
+ spe->remote_access_id);
+ if (err)
+ return err;
+ }
+
+ return 0;
+}
+
+static int arm_spe_run_decoder(struct arm_spe_queue *speq, u64 *timestamp)
+{
+ struct arm_spe *spe = speq->spe;
+ int ret;
+
+ if (!spe->kernel_start)
+ spe->kernel_start = machine__kernel_start(spe->machine);
+
+ while (1) {
+ ret = arm_spe_decode(speq->decoder);
+ if (!ret) {
+ pr_debug("No data or all data has been processed.\n");
+ return 1;
+ }
+
+ /*
+ * Error is detected when decode SPE trace data, continue to
+ * the next trace data and find out more records.
+ */
+ if (ret < 0)
+ continue;
+
+ ret = arm_spe_sample(speq);
+ if (ret)
+ return ret;
+
+ if (!spe->timeless_decoding && speq->timestamp >= *timestamp) {
+ *timestamp = speq->timestamp;
+ return 0;
+ }
+ }
+
+ return 0;
+}
+
+static int arm_spe__setup_queue(struct arm_spe *spe,
+ struct auxtrace_queue *queue,
+ unsigned int queue_nr)
+{
+ struct arm_spe_queue *speq = queue->priv;
+ struct arm_spe_record *record;
+
+ if (list_empty(&queue->head) || speq)
+ return 0;
+
+ speq = arm_spe__alloc_queue(spe, queue_nr);
+
+ if (!speq)
+ return -ENOMEM;
+
+ queue->priv = speq;
+
+ if (queue->cpu != -1)
+ speq->cpu = queue->cpu;
+
+ if (!speq->on_heap) {
+ int ret;
+
+ if (spe->timeless_decoding)
+ return 0;
+
+retry:
+ ret = arm_spe_decode(speq->decoder);
+
+ if (!ret)
+ return 0;
+
+ if (ret < 0)
+ goto retry;
+
+ record = &speq->decoder->record;
+
+ speq->timestamp = record->timestamp;
+ ret = auxtrace_heap__add(&spe->heap, queue_nr, speq->timestamp);
+ if (ret)
+ return ret;
+ speq->on_heap = true;
+ }
+
+ return 0;
+}
+
+static int arm_spe__setup_queues(struct arm_spe *spe)
+{
+ unsigned int i;
+ int ret;
+
+ for (i = 0; i < spe->queues.nr_queues; i++) {
+ ret = arm_spe__setup_queue(spe, &spe->queues.queue_array[i], i);
+ if (ret)
+ return ret;
+ }
+
+ return 0;
+}
+
+static int arm_spe__update_queues(struct arm_spe *spe)
+{
+ if (spe->queues.new_data) {
+ spe->queues.new_data = false;
+ return arm_spe__setup_queues(spe);
+ }
+
+ return 0;
+}
+
+static bool arm_spe__is_timeless_decoding(struct arm_spe *spe)
+{
+ struct evsel *evsel;
+ struct evlist *evlist = spe->session->evlist;
+ bool timeless_decoding = true;
+
+ /*
+ * Circle through the list of event and complain if we find one
+ * with the time bit set.
+ */
+ evlist__for_each_entry(evlist, evsel) {
+ if ((evsel->core.attr.sample_type & PERF_SAMPLE_TIME))
+ timeless_decoding = false;
+ }
+
+ return timeless_decoding;
+}
+
+static void arm_spe_set_pid_tid_cpu(struct arm_spe *spe,
+ struct auxtrace_queue *queue)
+{
+ struct arm_spe_queue *speq = queue->priv;
+ pid_t tid;
+
+ tid = machine__get_current_tid(spe->machine, speq->cpu);
+ if (tid != -1) {
+ speq->tid = tid;
+ thread__zput(speq->thread);
+ } else
+ speq->tid = queue->tid;
+
+ if ((!speq->thread) && (speq->tid != -1)) {
+ speq->thread = machine__find_thread(spe->machine, -1,
+ speq->tid);
+ }
+
+ if (speq->thread) {
+ speq->pid = speq->thread->pid_;
+ if (queue->cpu == -1)
+ speq->cpu = speq->thread->cpu;
+ }
+}
+
+static int arm_spe_process_queues(struct arm_spe *spe, u64 timestamp)
+{
+ unsigned int queue_nr;
+ u64 ts;
+ int ret;
+
+ while (1) {
+ struct auxtrace_queue *queue;
+ struct arm_spe_queue *speq;
+
+ if (!spe->heap.heap_cnt)
+ return 0;
+
+ if (spe->heap.heap_array[0].ordinal >= timestamp)
+ return 0;
+
+ queue_nr = spe->heap.heap_array[0].queue_nr;
+ queue = &spe->queues.queue_array[queue_nr];
+ speq = queue->priv;
+
+ auxtrace_heap__pop(&spe->heap);
+
+ if (spe->heap.heap_cnt) {
+ ts = spe->heap.heap_array[0].ordinal + 1;
+ if (ts > timestamp)
+ ts = timestamp;
+ } else {
+ ts = timestamp;
+ }
+
+ arm_spe_set_pid_tid_cpu(spe, queue);
+
+ ret = arm_spe_run_decoder(speq, &ts);
+ if (ret < 0) {
+ auxtrace_heap__add(&spe->heap, queue_nr, ts);
+ return ret;
+ }
+
+ if (!ret) {
+ ret = auxtrace_heap__add(&spe->heap, queue_nr, ts);
+ if (ret < 0)
+ return ret;
+ } else {
+ speq->on_heap = false;
+ }
+ }
+
+ return 0;
+}
+
+static int arm_spe_process_timeless_queues(struct arm_spe *spe, pid_t tid,
+ u64 time_)
{
+ struct auxtrace_queues *queues = &spe->queues;
+ unsigned int i;
+ u64 ts = 0;
+
+ for (i = 0; i < queues->nr_queues; i++) {
+ struct auxtrace_queue *queue = &spe->queues.queue_array[i];
+ struct arm_spe_queue *speq = queue->priv;
+
+ if (speq && (tid == -1 || speq->tid == tid)) {
+ speq->time = time_;
+ arm_spe_set_pid_tid_cpu(spe, queue);
+ arm_spe_run_decoder(speq, &ts);
+ }
+ }
return 0;
}
+static int arm_spe_process_event(struct perf_session *session,
+ union perf_event *event,
+ struct perf_sample *sample,
+ struct perf_tool *tool)
+{
+ int err = 0;
+ u64 timestamp;
+ struct arm_spe *spe = container_of(session->auxtrace,
+ struct arm_spe, auxtrace);
+
+ if (dump_trace)
+ return 0;
+
+ if (!tool->ordered_events) {
+ pr_err("SPE trace requires ordered events\n");
+ return -EINVAL;
+ }
+
+ if (sample->time && (sample->time != (u64) -1))
+ timestamp = sample->time;
+ else
+ timestamp = 0;
+
+ if (timestamp || spe->timeless_decoding) {
+ err = arm_spe__update_queues(spe);
+ if (err)
+ return err;
+ }
+
+ if (spe->timeless_decoding) {
+ if (event->header.type == PERF_RECORD_EXIT) {
+ err = arm_spe_process_timeless_queues(spe,
+ event->fork.tid,
+ sample->time);
+ }
+ } else if (timestamp) {
+ if (event->header.type == PERF_RECORD_EXIT) {
+ err = arm_spe_process_queues(spe, timestamp);
+ if (err)
+ return err;
+ }
+ }
+
+ return err;
+}
+
static int arm_spe_process_auxtrace_event(struct perf_session *session,
union perf_event *event,
struct perf_tool *tool __maybe_unused)
{
struct arm_spe *spe = container_of(session->auxtrace, struct arm_spe,
auxtrace);
- struct auxtrace_buffer *buffer;
- off_t data_offset;
- int fd = perf_data__fd(session->data);
- int err;
- if (perf_data__is_pipe(session->data)) {
- data_offset = 0;
- } else {
- data_offset = lseek(fd, 0, SEEK_CUR);
- if (data_offset == -1)
- return -errno;
- }
+ if (!spe->data_queued) {
+ struct auxtrace_buffer *buffer;
+ off_t data_offset;
+ int fd = perf_data__fd(session->data);
+ int err;
- err = auxtrace_queues__add_event(&spe->queues, session, event,
- data_offset, &buffer);
- if (err)
- return err;
+ if (perf_data__is_pipe(session->data)) {
+ data_offset = 0;
+ } else {
+ data_offset = lseek(fd, 0, SEEK_CUR);
+ if (data_offset == -1)
+ return -errno;
+ }
- /* Dump here now we have copied a piped trace out of the pipe */
- if (dump_trace) {
- if (auxtrace_buffer__get_data(buffer, fd)) {
- arm_spe_dump_event(spe, buffer->data,
- buffer->size);
- auxtrace_buffer__put_data(buffer);
+ err = auxtrace_queues__add_event(&spe->queues, session, event,
+ data_offset, &buffer);
+ if (err)
+ return err;
+
+ /* Dump here now we have copied a piped trace out of the pipe */
+ if (dump_trace) {
+ if (auxtrace_buffer__get_data(buffer, fd)) {
+ arm_spe_dump_event(spe, buffer->data,
+ buffer->size);
+ auxtrace_buffer__put_data(buffer);
+ }
}
}
@@ -139,7 +654,25 @@ static int arm_spe_process_auxtrace_event(struct perf_session *session,
static int arm_spe_flush(struct perf_session *session __maybe_unused,
struct perf_tool *tool __maybe_unused)
{
- return 0;
+ struct arm_spe *spe = container_of(session->auxtrace, struct arm_spe,
+ auxtrace);
+ int ret;
+
+ if (dump_trace)
+ return 0;
+
+ if (!tool->ordered_events)
+ return -EINVAL;
+
+ ret = arm_spe__update_queues(spe);
+ if (ret < 0)
+ return ret;
+
+ if (spe->timeless_decoding)
+ return arm_spe_process_timeless_queues(spe, -1,
+ MAX_TIMESTAMP - 1);
+
+ return arm_spe_process_queues(spe, MAX_TIMESTAMP);
}
static void arm_spe_free_queue(void *priv)
@@ -148,6 +681,9 @@ static void arm_spe_free_queue(void *priv)
if (!speq)
return;
+ thread__zput(speq->thread);
+ arm_spe_decoder_free(speq->decoder);
+ zfree(&speq->event_buf);
free(speq);
}
@@ -176,6 +712,14 @@ static void arm_spe_free(struct perf_session *session)
free(spe);
}
+static bool arm_spe_evsel_is_auxtrace(struct perf_session *session,
+ struct evsel *evsel)
+{
+ struct arm_spe *spe = container_of(session->auxtrace, struct arm_spe, auxtrace);
+
+ return evsel->core.attr.type == spe->pmu_type;
+}
+
static const char * const arm_spe_info_fmts[] = {
[ARM_SPE_PMU_TYPE] = " PMU Type %"PRId64"\n",
};
@@ -188,11 +732,189 @@ static void arm_spe_print_info(__u64 *arr)
fprintf(stdout, arm_spe_info_fmts[ARM_SPE_PMU_TYPE], arr[ARM_SPE_PMU_TYPE]);
}
+struct arm_spe_synth {
+ struct perf_tool dummy_tool;
+ struct perf_session *session;
+};
+
+static int arm_spe_event_synth(struct perf_tool *tool,
+ union perf_event *event,
+ struct perf_sample *sample __maybe_unused,
+ struct machine *machine __maybe_unused)
+{
+ struct arm_spe_synth *arm_spe_synth =
+ container_of(tool, struct arm_spe_synth, dummy_tool);
+
+ return perf_session__deliver_synth_event(arm_spe_synth->session,
+ event, NULL);
+}
+
+static int arm_spe_synth_event(struct perf_session *session,
+ struct perf_event_attr *attr, u64 id)
+{
+ struct arm_spe_synth arm_spe_synth;
+
+ memset(&arm_spe_synth, 0, sizeof(struct arm_spe_synth));
+ arm_spe_synth.session = session;
+
+ return perf_event__synthesize_attr(&arm_spe_synth.dummy_tool, attr, 1,
+ &id, arm_spe_event_synth);
+}
+
+static void arm_spe_set_event_name(struct evlist *evlist, u64 id,
+ const char *name)
+{
+ struct evsel *evsel;
+
+ evlist__for_each_entry(evlist, evsel) {
+ if (evsel->core.id && evsel->core.id[0] == id) {
+ if (evsel->name)
+ zfree(&evsel->name);
+ evsel->name = strdup(name);
+ break;
+ }
+ }
+}
+
+static int
+arm_spe_synth_events(struct arm_spe *spe, struct perf_session *session)
+{
+ struct evlist *evlist = session->evlist;
+ struct evsel *evsel;
+ struct perf_event_attr attr;
+ bool found = false;
+ u64 id;
+ int err;
+
+ evlist__for_each_entry(evlist, evsel) {
+ if (evsel->core.attr.type == spe->pmu_type) {
+ found = true;
+ break;
+ }
+ }
+
+ if (!found) {
+ pr_debug("No selected events with SPE trace data\n");
+ return 0;
+ }
+
+ memset(&attr, 0, sizeof(struct perf_event_attr));
+ attr.size = sizeof(struct perf_event_attr);
+ attr.type = PERF_TYPE_HARDWARE;
+ attr.sample_type = evsel->core.attr.sample_type & PERF_SAMPLE_MASK;
+ attr.sample_type |= PERF_SAMPLE_IP | PERF_SAMPLE_TID |
+ PERF_SAMPLE_PERIOD;
+ if (spe->timeless_decoding)
+ attr.sample_type &= ~(u64)PERF_SAMPLE_TIME;
+ else
+ attr.sample_type |= PERF_SAMPLE_TIME;
+
+ attr.exclude_user = evsel->core.attr.exclude_user;
+ attr.exclude_kernel = evsel->core.attr.exclude_kernel;
+ attr.exclude_hv = evsel->core.attr.exclude_hv;
+ attr.exclude_host = evsel->core.attr.exclude_host;
+ attr.exclude_guest = evsel->core.attr.exclude_guest;
+ attr.sample_id_all = evsel->core.attr.sample_id_all;
+ attr.read_format = evsel->core.attr.read_format;
+
+ /* create new id val to be a fixed offset from evsel id */
+ id = evsel->core.id[0] + 1000000000;
+
+ if (!id)
+ id = 1;
+
+ if (spe->synth_opts.flc) {
+ spe->sample_flc = true;
+
+ /* Level 1 data cache miss */
+ err = arm_spe_synth_event(session, &attr, id);
+ if (err)
+ return err;
+ spe->l1d_miss_id = id;
+ arm_spe_set_event_name(evlist, id, "l1d-miss");
+ id += 1;
+
+ /* Level 1 data cache access */
+ err = arm_spe_synth_event(session, &attr, id);
+ if (err)
+ return err;
+ spe->l1d_access_id = id;
+ arm_spe_set_event_name(evlist, id, "l1d-access");
+ id += 1;
+ }
+
+ if (spe->synth_opts.llc) {
+ spe->sample_llc = true;
+
+ /* Last level cache miss */
+ err = arm_spe_synth_event(session, &attr, id);
+ if (err)
+ return err;
+ spe->llc_miss_id = id;
+ arm_spe_set_event_name(evlist, id, "llc-miss");
+ id += 1;
+
+ /* Last level cache access */
+ err = arm_spe_synth_event(session, &attr, id);
+ if (err)
+ return err;
+ spe->llc_access_id = id;
+ arm_spe_set_event_name(evlist, id, "llc-access");
+ id += 1;
+ }
+
+ if (spe->synth_opts.tlb) {
+ spe->sample_tlb = true;
+
+ /* TLB miss */
+ err = arm_spe_synth_event(session, &attr, id);
+ if (err)
+ return err;
+ spe->tlb_miss_id = id;
+ arm_spe_set_event_name(evlist, id, "tlb-miss");
+ id += 1;
+
+ /* TLB access */
+ err = arm_spe_synth_event(session, &attr, id);
+ if (err)
+ return err;
+ spe->tlb_access_id = id;
+ arm_spe_set_event_name(evlist, id, "tlb-access");
+ id += 1;
+ }
+
+ if (spe->synth_opts.branches) {
+ spe->sample_branch = true;
+
+ /* Branch miss */
+ err = arm_spe_synth_event(session, &attr, id);
+ if (err)
+ return err;
+ spe->branch_miss_id = id;
+ arm_spe_set_event_name(evlist, id, "branch-miss");
+ id += 1;
+ }
+
+ if (spe->synth_opts.remote_access) {
+ spe->sample_remote_access = true;
+
+ /* Remote access */
+ err = arm_spe_synth_event(session, &attr, id);
+ if (err)
+ return err;
+ spe->remote_access_id = id;
+ arm_spe_set_event_name(evlist, id, "remote-access");
+ id += 1;
+ }
+
+ return 0;
+}
+
int arm_spe_process_auxtrace_info(union perf_event *event,
struct perf_session *session)
{
struct perf_record_auxtrace_info *auxtrace_info = &event->auxtrace_info;
- size_t min_sz = sizeof(u64) * ARM_SPE_PMU_TYPE;
+ size_t min_sz = sizeof(u64) * ARM_SPE_AUXTRACE_PRIV_MAX;
struct arm_spe *spe;
int err;
@@ -213,17 +935,41 @@ int arm_spe_process_auxtrace_info(union perf_event *event,
spe->auxtrace_type = auxtrace_info->type;
spe->pmu_type = auxtrace_info->priv[ARM_SPE_PMU_TYPE];
+ spe->timeless_decoding = arm_spe__is_timeless_decoding(spe);
spe->auxtrace.process_event = arm_spe_process_event;
spe->auxtrace.process_auxtrace_event = arm_spe_process_auxtrace_event;
spe->auxtrace.flush_events = arm_spe_flush;
spe->auxtrace.free_events = arm_spe_free_events;
spe->auxtrace.free = arm_spe_free;
+ spe->auxtrace.evsel_is_auxtrace = arm_spe_evsel_is_auxtrace;
session->auxtrace = &spe->auxtrace;
arm_spe_print_info(&auxtrace_info->priv[0]);
+ if (dump_trace)
+ return 0;
+
+ if (session->itrace_synth_opts && session->itrace_synth_opts->set)
+ spe->synth_opts = *session->itrace_synth_opts;
+ else
+ itrace_synth_opts__set_default(&spe->synth_opts, false);
+
+ err = arm_spe_synth_events(spe, session);
+ if (err)
+ goto err_free_queues;
+
+ err = auxtrace_queues__process_index(&spe->queues, session);
+ if (err)
+ goto err_free_queues;
+
+ if (spe->queues.populated)
+ spe->data_queued = true;
+
return 0;
+err_free_queues:
+ auxtrace_queues__free(&spe->queues);
+ session->auxtrace = NULL;
err_free:
free(spe);
return err;
diff --git a/tools/perf/util/auxtrace.c b/tools/perf/util/auxtrace.c
index 3571ce72ca28..25c639ac4ad4 100644
--- a/tools/perf/util/auxtrace.c
+++ b/tools/perf/util/auxtrace.c
@@ -33,6 +33,7 @@
#include "evsel.h"
#include "evsel_config.h"
#include "symbol.h"
+#include "util/perf_api_probe.h"
#include "util/synthetic-events.h"
#include "thread_map.h"
#include "asm/bug.h"
@@ -54,29 +55,9 @@
#include "util/mmap.h"
#include <linux/ctype.h>
-#include <linux/kernel.h>
#include "symbol/kallsyms.h"
#include <internal/lib.h>
-static struct perf_pmu *perf_evsel__find_pmu(struct evsel *evsel)
-{
- struct perf_pmu *pmu = NULL;
-
- while ((pmu = perf_pmu__scan(pmu)) != NULL) {
- if (pmu->type == evsel->core.attr.type)
- break;
- }
-
- return pmu;
-}
-
-static bool perf_evsel__is_aux_event(struct evsel *evsel)
-{
- struct perf_pmu *pmu = perf_evsel__find_pmu(evsel);
-
- return pmu && pmu->auxtrace;
-}
-
/*
* Make a group from 'leader' to 'last', requiring that the events were not
* already grouped to a different leader.
@@ -88,7 +69,7 @@ static int perf_evlist__regroup(struct evlist *evlist,
struct evsel *evsel;
bool grp;
- if (!perf_evsel__is_group_leader(leader))
+ if (!evsel__is_group_leader(leader))
return -EINVAL;
grp = false;
@@ -703,8 +684,8 @@ static int auxtrace_validate_aux_sample_size(struct evlist *evlist,
evlist__for_each_entry(evlist, evsel) {
sz = evsel->core.attr.aux_sample_size;
- if (perf_evsel__is_group_leader(evsel)) {
- has_aux_leader = perf_evsel__is_aux_event(evsel);
+ if (evsel__is_group_leader(evsel)) {
+ has_aux_leader = evsel__is_aux_event(evsel);
if (sz) {
if (has_aux_leader)
pr_err("Cannot add AUX area sampling to an AUX area event\n");
@@ -723,10 +704,10 @@ static int auxtrace_validate_aux_sample_size(struct evlist *evlist,
pr_err("Cannot add AUX area sampling because group leader is not an AUX area event\n");
return -EINVAL;
}
- perf_evsel__set_sample_bit(evsel, AUX);
+ evsel__set_sample_bit(evsel, AUX);
opts->auxtrace_sample_mode = true;
} else {
- perf_evsel__reset_sample_bit(evsel, AUX);
+ evsel__reset_sample_bit(evsel, AUX);
}
}
@@ -747,7 +728,7 @@ int auxtrace_parse_sample_options(struct auxtrace_record *itr,
struct evlist *evlist,
struct record_opts *opts, const char *str)
{
- struct perf_evsel_config_term *term;
+ struct evsel_config_term *term;
struct evsel *aux_evsel;
bool has_aux_sample_size = false;
bool has_aux_leader = false;
@@ -777,8 +758,8 @@ int auxtrace_parse_sample_options(struct auxtrace_record *itr,
/* Set aux_sample_size based on --aux-sample option */
evlist__for_each_entry(evlist, evsel) {
- if (perf_evsel__is_group_leader(evsel)) {
- has_aux_leader = perf_evsel__is_aux_event(evsel);
+ if (evsel__is_group_leader(evsel)) {
+ has_aux_leader = evsel__is_aux_event(evsel);
} else if (has_aux_leader) {
evsel->core.attr.aux_sample_size = sz;
}
@@ -787,9 +768,9 @@ no_opt:
aux_evsel = NULL;
/* Override with aux_sample_size from config term */
evlist__for_each_entry(evlist, evsel) {
- if (perf_evsel__is_aux_event(evsel))
+ if (evsel__is_aux_event(evsel))
aux_evsel = evsel;
- term = perf_evsel__get_config_term(evsel, AUX_SAMPLE_SIZE);
+ term = evsel__get_config_term(evsel, AUX_SAMPLE_SIZE);
if (term) {
has_aux_sample_size = true;
evsel->core.attr.aux_sample_size = term->val.aux_sample_size;
@@ -1234,29 +1215,79 @@ out_free:
return err;
}
+static void unleader_evsel(struct evlist *evlist, struct evsel *leader)
+{
+ struct evsel *new_leader = NULL;
+ struct evsel *evsel;
+
+ /* Find new leader for the group */
+ evlist__for_each_entry(evlist, evsel) {
+ if (evsel->leader != leader || evsel == leader)
+ continue;
+ if (!new_leader)
+ new_leader = evsel;
+ evsel->leader = new_leader;
+ }
+
+ /* Update group information */
+ if (new_leader) {
+ zfree(&new_leader->group_name);
+ new_leader->group_name = leader->group_name;
+ leader->group_name = NULL;
+
+ new_leader->core.nr_members = leader->core.nr_members - 1;
+ leader->core.nr_members = 1;
+ }
+}
+
+static void unleader_auxtrace(struct perf_session *session)
+{
+ struct evsel *evsel;
+
+ evlist__for_each_entry(session->evlist, evsel) {
+ if (auxtrace__evsel_is_auxtrace(session, evsel) &&
+ evsel__is_group_leader(evsel)) {
+ unleader_evsel(session->evlist, evsel);
+ }
+ }
+}
+
int perf_event__process_auxtrace_info(struct perf_session *session,
union perf_event *event)
{
enum auxtrace_type type = event->auxtrace_info.type;
+ int err;
if (dump_trace)
fprintf(stdout, " type: %u\n", type);
switch (type) {
case PERF_AUXTRACE_INTEL_PT:
- return intel_pt_process_auxtrace_info(event, session);
+ err = intel_pt_process_auxtrace_info(event, session);
+ break;
case PERF_AUXTRACE_INTEL_BTS:
- return intel_bts_process_auxtrace_info(event, session);
+ err = intel_bts_process_auxtrace_info(event, session);
+ break;
case PERF_AUXTRACE_ARM_SPE:
- return arm_spe_process_auxtrace_info(event, session);
+ err = arm_spe_process_auxtrace_info(event, session);
+ break;
case PERF_AUXTRACE_CS_ETM:
- return cs_etm__process_auxtrace_info(event, session);
+ err = cs_etm__process_auxtrace_info(event, session);
+ break;
case PERF_AUXTRACE_S390_CPUMSF:
- return s390_cpumsf_process_auxtrace_info(event, session);
+ err = s390_cpumsf_process_auxtrace_info(event, session);
+ break;
case PERF_AUXTRACE_UNKNOWN:
default:
return -EINVAL;
}
+
+ if (err)
+ return err;
+
+ unleader_auxtrace(session);
+
+ return 0;
}
s64 perf_event__process_auxtrace(struct perf_session *session,
@@ -1299,6 +1330,11 @@ void itrace_synth_opts__set_default(struct itrace_synth_opts *synth_opts,
synth_opts->pwr_events = true;
synth_opts->other_events = true;
synth_opts->errors = true;
+ synth_opts->flc = true;
+ synth_opts->llc = true;
+ synth_opts->tlb = true;
+ synth_opts->remote_access = true;
+
if (no_sample) {
synth_opts->period_type = PERF_ITRACE_PERIOD_INSTRUCTIONS;
synth_opts->period = 1;
@@ -1412,8 +1448,12 @@ int itrace_parse_synth_opts(const struct option *opt, const char *str,
synth_opts->branches = true;
synth_opts->returns = true;
break;
+ case 'G':
case 'g':
- synth_opts->callchain = true;
+ if (p[-1] == 'G')
+ synth_opts->add_callchain = true;
+ else
+ synth_opts->callchain = true;
synth_opts->callchain_sz =
PERF_ITRACE_DEFAULT_CALLCHAIN_SZ;
while (*p == ' ' || *p == ',')
@@ -1428,8 +1468,12 @@ int itrace_parse_synth_opts(const struct option *opt, const char *str,
synth_opts->callchain_sz = val;
}
break;
+ case 'L':
case 'l':
- synth_opts->last_branch = true;
+ if (p[-1] == 'L')
+ synth_opts->add_last_branch = true;
+ else
+ synth_opts->last_branch = true;
synth_opts->last_branch_sz =
PERF_ITRACE_DEFAULT_LAST_BRANCH_SZ;
while (*p == ' ' || *p == ',')
@@ -1451,6 +1495,18 @@ int itrace_parse_synth_opts(const struct option *opt, const char *str,
goto out_err;
p = endptr;
break;
+ case 'f':
+ synth_opts->flc = true;
+ break;
+ case 'm':
+ synth_opts->llc = true;
+ break;
+ case 't':
+ synth_opts->tlb = true;
+ break;
+ case 'a':
+ synth_opts->remote_access = true;
+ break;
case ' ':
case ',':
break;
@@ -2482,7 +2538,7 @@ static int parse_addr_filter(struct evsel *evsel, const char *filter,
goto out_exit;
}
- if (perf_evsel__append_addr_filter(evsel, new_filter)) {
+ if (evsel__append_addr_filter(evsel, new_filter)) {
err = -ENOMEM;
goto out_exit;
}
@@ -2500,9 +2556,9 @@ out_exit:
return err;
}
-static int perf_evsel__nr_addr_filter(struct evsel *evsel)
+static int evsel__nr_addr_filter(struct evsel *evsel)
{
- struct perf_pmu *pmu = perf_evsel__find_pmu(evsel);
+ struct perf_pmu *pmu = evsel__find_pmu(evsel);
int nr_addr_filters = 0;
if (!pmu)
@@ -2521,7 +2577,7 @@ int auxtrace_parse_filters(struct evlist *evlist)
evlist__for_each_entry(evlist, evsel) {
filter = evsel->filter;
- max_nr = perf_evsel__nr_addr_filter(evsel);
+ max_nr = evsel__nr_addr_filter(evsel);
if (!filter || !max_nr)
continue;
evsel->filter = NULL;
@@ -2577,3 +2633,12 @@ void auxtrace__free(struct perf_session *session)
return session->auxtrace->free(session);
}
+
+bool auxtrace__evsel_is_auxtrace(struct perf_session *session,
+ struct evsel *evsel)
+{
+ if (!session->auxtrace || !session->auxtrace->evsel_is_auxtrace)
+ return false;
+
+ return session->auxtrace->evsel_is_auxtrace(session, evsel);
+}
diff --git a/tools/perf/util/auxtrace.h b/tools/perf/util/auxtrace.h
index e58ef160b599..142ccf7d34df 100644
--- a/tools/perf/util/auxtrace.h
+++ b/tools/perf/util/auxtrace.h
@@ -21,6 +21,7 @@
union perf_event;
struct perf_session;
struct evlist;
+struct evsel;
struct perf_tool;
struct mmap;
struct perf_sample;
@@ -62,6 +63,7 @@ enum itrace_period_type {
* because 'perf inject' will write it out
* @instructions: whether to synthesize 'instructions' events
* @branches: whether to synthesize 'branches' events
+ * (branch misses only for Arm SPE)
* @transactions: whether to synthesize events for transactions
* @ptwrites: whether to synthesize events for ptwrites
* @pwr_events: whether to synthesize power events
@@ -73,8 +75,14 @@ enum itrace_period_type {
* @calls: limit branch samples to calls (can be combined with @returns)
* @returns: limit branch samples to returns (can be combined with @calls)
* @callchain: add callchain to 'instructions' events
+ * @add_callchain: add callchain to existing event records
* @thread_stack: feed branches to the thread_stack
* @last_branch: add branch context to 'instruction' events
+ * @add_last_branch: add branch context to existing event records
+ * @flc: whether to synthesize first level cache events
+ * @llc: whether to synthesize last level cache events
+ * @tlb: whether to synthesize TLB events
+ * @remote_access: whether to synthesize remote access events
* @callchain_sz: maximum callchain size
* @last_branch_sz: branch context size
* @period: 'instructions' events period
@@ -100,8 +108,14 @@ struct itrace_synth_opts {
bool calls;
bool returns;
bool callchain;
+ bool add_callchain;
bool thread_stack;
bool last_branch;
+ bool add_last_branch;
+ bool flc;
+ bool llc;
+ bool tlb;
+ bool remote_access;
unsigned int callchain_sz;
unsigned int last_branch_sz;
unsigned long long period;
@@ -166,6 +180,8 @@ struct auxtrace {
struct perf_tool *tool);
void (*free_events)(struct perf_session *session);
void (*free)(struct perf_session *session);
+ bool (*evsel_is_auxtrace)(struct perf_session *session,
+ struct evsel *evsel);
};
/**
@@ -584,10 +600,12 @@ void auxtrace__dump_auxtrace_sample(struct perf_session *session,
int auxtrace__flush_events(struct perf_session *session, struct perf_tool *tool);
void auxtrace__free_events(struct perf_session *session);
void auxtrace__free(struct perf_session *session);
+bool auxtrace__evsel_is_auxtrace(struct perf_session *session,
+ struct evsel *evsel);
#define ITRACE_HELP \
" i: synthesize instructions events\n" \
-" b: synthesize branches events\n" \
+" b: synthesize branches events (branch misses for Arm SPE)\n" \
" c: synthesize branches events (calls only)\n" \
" r: synthesize branches events (returns only)\n" \
" x: synthesize transactions events\n" \
@@ -595,6 +613,10 @@ void auxtrace__free(struct perf_session *session);
" p: synthesize power events\n" \
" e: synthesize error events\n" \
" d: create a debug log\n" \
+" f: synthesize first level cache events\n" \
+" m: synthesize last level cache events\n" \
+" t: synthesize TLB events\n" \
+" a: synthesize remote access events\n" \
" g[len]: synthesize a call chain (use with i or x)\n" \
" l[len]: synthesize last branch entries (use with i or x)\n" \
" sNUMBER: skip initial number of events\n" \
@@ -750,6 +772,13 @@ void auxtrace_index__free(struct list_head *head __maybe_unused)
}
static inline
+bool auxtrace__evsel_is_auxtrace(struct perf_session *session __maybe_unused,
+ struct evsel *evsel __maybe_unused)
+{
+ return false;
+}
+
+static inline
int auxtrace_parse_filters(struct evlist *evlist __maybe_unused)
{
return 0;
diff --git a/tools/perf/util/block-info.c b/tools/perf/util/block-info.c
index fbbb6d640dad..423ec69bda6c 100644
--- a/tools/perf/util/block-info.c
+++ b/tools/perf/util/block-info.c
@@ -65,8 +65,7 @@ struct block_info *block_info__new(void)
return bi;
}
-int64_t block_info__cmp(struct perf_hpp_fmt *fmt __maybe_unused,
- struct hist_entry *left, struct hist_entry *right)
+int64_t __block_info__cmp(struct hist_entry *left, struct hist_entry *right)
{
struct block_info *bi_l = left->block_info;
struct block_info *bi_r = right->block_info;
@@ -74,30 +73,27 @@ int64_t block_info__cmp(struct perf_hpp_fmt *fmt __maybe_unused,
if (!bi_l->sym || !bi_r->sym) {
if (!bi_l->sym && !bi_r->sym)
- return 0;
+ return -1;
else if (!bi_l->sym)
return -1;
else
return 1;
}
- if (bi_l->sym == bi_r->sym) {
- if (bi_l->start == bi_r->start) {
- if (bi_l->end == bi_r->end)
- return 0;
- else
- return (int64_t)(bi_r->end - bi_l->end);
- } else
- return (int64_t)(bi_r->start - bi_l->start);
- } else {
- cmp = strcmp(bi_l->sym->name, bi_r->sym->name);
+ cmp = strcmp(bi_l->sym->name, bi_r->sym->name);
+ if (cmp)
return cmp;
- }
- if (bi_l->sym->start != bi_r->sym->start)
- return (int64_t)(bi_r->sym->start - bi_l->sym->start);
+ if (bi_l->start != bi_r->start)
+ return (int64_t)(bi_r->start - bi_l->start);
- return (int64_t)(bi_r->sym->end - bi_l->sym->end);
+ return (int64_t)(bi_r->end - bi_l->end);
+}
+
+int64_t block_info__cmp(struct perf_hpp_fmt *fmt __maybe_unused,
+ struct hist_entry *left, struct hist_entry *right)
+{
+ return __block_info__cmp(left, right);
}
static void init_block_info(struct block_info *bi, struct symbol *sym,
@@ -185,6 +181,17 @@ static int block_column_width(struct perf_hpp_fmt *fmt,
return block_fmt->width;
}
+static int color_pct(struct perf_hpp *hpp, int width, double pct)
+{
+#ifdef HAVE_SLANG_SUPPORT
+ if (use_browser) {
+ return __hpp__slsmg_color_printf(hpp, "%*.2f%%",
+ width - 1, pct);
+ }
+#endif
+ return hpp_color_scnprintf(hpp, "%*.2f%%", width - 1, pct);
+}
+
static int block_total_cycles_pct_entry(struct perf_hpp_fmt *fmt,
struct perf_hpp *hpp,
struct hist_entry *he)
@@ -192,14 +199,11 @@ static int block_total_cycles_pct_entry(struct perf_hpp_fmt *fmt,
struct block_fmt *block_fmt = container_of(fmt, struct block_fmt, fmt);
struct block_info *bi = he->block_info;
double ratio = 0.0;
- char buf[16];
if (block_fmt->total_cycles)
ratio = (double)bi->cycles / (double)block_fmt->total_cycles;
- sprintf(buf, "%.2f%%", 100.0 * ratio);
-
- return scnprintf(hpp->buf, hpp->size, "%*s", block_fmt->width, buf);
+ return color_pct(hpp, block_fmt->width, 100.0 * ratio);
}
static int64_t block_total_cycles_pct_sort(struct perf_hpp_fmt *fmt,
@@ -252,16 +256,13 @@ static int block_cycles_pct_entry(struct perf_hpp_fmt *fmt,
struct block_info *bi = he->block_info;
double ratio = 0.0;
u64 avg;
- char buf[16];
if (block_fmt->block_cycles && bi->num_aggr) {
avg = bi->cycles_aggr / bi->num_aggr;
ratio = (double)avg / (double)block_fmt->block_cycles;
}
- sprintf(buf, "%.2f%%", 100.0 * ratio);
-
- return scnprintf(hpp->buf, hpp->size, "%*s", block_fmt->width, buf);
+ return color_pct(hpp, block_fmt->width, 100.0 * ratio);
}
static int block_avg_cycles_entry(struct perf_hpp_fmt *fmt,
@@ -349,7 +350,7 @@ static void hpp_register(struct block_fmt *block_fmt, int idx,
switch (idx) {
case PERF_HPP_REPORT__BLOCK_TOTAL_CYCLES_PCT:
- fmt->entry = block_total_cycles_pct_entry;
+ fmt->color = block_total_cycles_pct_entry;
fmt->cmp = block_info__cmp;
fmt->sort = block_total_cycles_pct_sort;
break;
@@ -357,7 +358,7 @@ static void hpp_register(struct block_fmt *block_fmt, int idx,
fmt->entry = block_cycles_lbr_entry;
break;
case PERF_HPP_REPORT__BLOCK_CYCLES_PCT:
- fmt->entry = block_cycles_pct_entry;
+ fmt->color = block_cycles_pct_entry;
break;
case PERF_HPP_REPORT__BLOCK_AVG_CYCLES:
fmt->entry = block_avg_cycles_entry;
@@ -377,33 +378,41 @@ static void hpp_register(struct block_fmt *block_fmt, int idx,
}
static void register_block_columns(struct perf_hpp_list *hpp_list,
- struct block_fmt *block_fmts)
+ struct block_fmt *block_fmts,
+ int *block_hpps, int nr_hpps)
{
- for (int i = 0; i < PERF_HPP_REPORT__BLOCK_MAX_INDEX; i++)
- hpp_register(&block_fmts[i], i, hpp_list);
+ for (int i = 0; i < nr_hpps; i++)
+ hpp_register(&block_fmts[i], block_hpps[i], hpp_list);
}
-static void init_block_hist(struct block_hist *bh, struct block_fmt *block_fmts)
+static void init_block_hist(struct block_hist *bh, struct block_fmt *block_fmts,
+ int *block_hpps, int nr_hpps)
{
__hists__init(&bh->block_hists, &bh->block_list);
perf_hpp_list__init(&bh->block_list);
bh->block_list.nr_header_lines = 1;
- register_block_columns(&bh->block_list, block_fmts);
+ register_block_columns(&bh->block_list, block_fmts,
+ block_hpps, nr_hpps);
- perf_hpp_list__register_sort_field(&bh->block_list,
- &block_fmts[PERF_HPP_REPORT__BLOCK_TOTAL_CYCLES_PCT].fmt);
+ /* Sort by the first fmt */
+ perf_hpp_list__register_sort_field(&bh->block_list, &block_fmts[0].fmt);
}
-static void process_block_report(struct hists *hists,
- struct block_report *block_report,
- u64 total_cycles)
+static int process_block_report(struct hists *hists,
+ struct block_report *block_report,
+ u64 total_cycles, int *block_hpps,
+ int nr_hpps)
{
struct rb_node *next = rb_first_cached(&hists->entries);
struct block_hist *bh = &block_report->hist;
struct hist_entry *he;
- init_block_hist(bh, block_report->fmts);
+ if (nr_hpps > PERF_HPP_REPORT__BLOCK_MAX_INDEX)
+ return -1;
+
+ block_report->nr_fmts = nr_hpps;
+ init_block_hist(bh, block_report->fmts, block_hpps, nr_hpps);
while (next) {
he = rb_entry(next, struct hist_entry, rb_node);
@@ -412,16 +421,19 @@ static void process_block_report(struct hists *hists,
next = rb_next(&he->rb_node);
}
- for (int i = 0; i < PERF_HPP_REPORT__BLOCK_MAX_INDEX; i++) {
+ for (int i = 0; i < nr_hpps; i++) {
block_report->fmts[i].total_cycles = total_cycles;
block_report->fmts[i].block_cycles = block_report->cycles;
}
hists__output_resort(&bh->block_hists, NULL);
+ return 0;
}
struct block_report *block_info__create_report(struct evlist *evlist,
- u64 total_cycles)
+ u64 total_cycles,
+ int *block_hpps, int nr_hpps,
+ int *nr_reps)
{
struct block_report *block_reports;
int nr_hists = evlist->core.nr_entries, i = 0;
@@ -434,13 +446,23 @@ struct block_report *block_info__create_report(struct evlist *evlist,
evlist__for_each_entry(evlist, pos) {
struct hists *hists = evsel__hists(pos);
- process_block_report(hists, &block_reports[i], total_cycles);
+ process_block_report(hists, &block_reports[i], total_cycles,
+ block_hpps, nr_hpps);
i++;
}
+ *nr_reps = nr_hists;
return block_reports;
}
+void block_info__free_report(struct block_report *reps, int nr_reps)
+{
+ for (int i = 0; i < nr_reps; i++)
+ hists__delete_entries(&reps[i].hist.block_hists);
+
+ free(reps);
+}
+
int report__browse_block_hists(struct block_hist *bh, float min_percent,
struct evsel *evsel, struct perf_env *env,
struct annotation_options *annotation_opts)
@@ -452,13 +474,11 @@ int report__browse_block_hists(struct block_hist *bh, float min_percent,
symbol_conf.report_individual_block = true;
hists__fprintf(&bh->block_hists, true, 0, 0, min_percent,
stdout, true);
- hists__delete_entries(&bh->block_hists);
return 0;
case 1:
symbol_conf.report_individual_block = true;
ret = block_hists_tui_browse(bh, evsel, min_percent,
env, annotation_opts);
- hists__delete_entries(&bh->block_hists);
return ret;
default:
return -1;
diff --git a/tools/perf/util/block-info.h b/tools/perf/util/block-info.h
index bef0d75e9819..42e9dcc4cf0a 100644
--- a/tools/perf/util/block-info.h
+++ b/tools/perf/util/block-info.h
@@ -45,6 +45,7 @@ struct block_report {
struct block_hist hist;
u64 cycles;
struct block_fmt fmts[PERF_HPP_REPORT__BLOCK_MAX_INDEX];
+ int nr_fmts;
};
struct block_hist;
@@ -61,6 +62,8 @@ static inline void __block_info__zput(struct block_info **bi)
#define block_info__zput(bi) __block_info__zput(&bi)
+int64_t __block_info__cmp(struct hist_entry *left, struct hist_entry *right);
+
int64_t block_info__cmp(struct perf_hpp_fmt *fmt __maybe_unused,
struct hist_entry *left, struct hist_entry *right);
@@ -68,7 +71,11 @@ int block_info__process_sym(struct hist_entry *he, struct block_hist *bh,
u64 *block_cycles_aggr, u64 total_cycles);
struct block_report *block_info__create_report(struct evlist *evlist,
- u64 total_cycles);
+ u64 total_cycles,
+ int *block_hpps, int nr_hpps,
+ int *nr_reps);
+
+void block_info__free_report(struct block_report *reps, int nr_reps);
int report__browse_block_hists(struct block_hist *bh, float min_percent,
struct evsel *evsel, struct perf_env *env,
diff --git a/tools/perf/util/bpf-event.c b/tools/perf/util/bpf-event.c
index a3207d900339..3742511a08d1 100644
--- a/tools/perf/util/bpf-event.c
+++ b/tools/perf/util/bpf-event.c
@@ -6,6 +6,9 @@
#include <bpf/libbpf.h>
#include <linux/btf.h>
#include <linux/err.h>
+#include <linux/string.h>
+#include <internal/lib.h>
+#include <symbol/kallsyms.h>
#include "bpf-event.h"
#include "debug.h"
#include "dso.h"
@@ -290,11 +293,82 @@ out:
return err ? -1 : 0;
}
+struct kallsyms_parse {
+ union perf_event *event;
+ perf_event__handler_t process;
+ struct machine *machine;
+ struct perf_tool *tool;
+};
+
+static int
+process_bpf_image(char *name, u64 addr, struct kallsyms_parse *data)
+{
+ struct machine *machine = data->machine;
+ union perf_event *event = data->event;
+ struct perf_record_ksymbol *ksymbol;
+ int len;
+
+ ksymbol = &event->ksymbol;
+
+ *ksymbol = (struct perf_record_ksymbol) {
+ .header = {
+ .type = PERF_RECORD_KSYMBOL,
+ .size = offsetof(struct perf_record_ksymbol, name),
+ },
+ .addr = addr,
+ .len = page_size,
+ .ksym_type = PERF_RECORD_KSYMBOL_TYPE_BPF,
+ .flags = 0,
+ };
+
+ len = scnprintf(ksymbol->name, KSYM_NAME_LEN, "%s", name);
+ ksymbol->header.size += PERF_ALIGN(len + 1, sizeof(u64));
+ memset((void *) event + event->header.size, 0, machine->id_hdr_size);
+ event->header.size += machine->id_hdr_size;
+
+ return perf_tool__process_synth_event(data->tool, event, machine,
+ data->process);
+}
+
+static int
+kallsyms_process_symbol(void *data, const char *_name,
+ char type __maybe_unused, u64 start)
+{
+ char disp[KSYM_NAME_LEN];
+ char *module, *name;
+ unsigned long id;
+ int err = 0;
+
+ module = strchr(_name, '\t');
+ if (!module)
+ return 0;
+
+ /* We are going after [bpf] module ... */
+ if (strcmp(module + 1, "[bpf]"))
+ return 0;
+
+ name = memdup(_name, (module - _name) + 1);
+ if (!name)
+ return -ENOMEM;
+
+ name[module - _name] = 0;
+
+ /* .. and only for trampolines and dispatchers */
+ if ((sscanf(name, "bpf_trampoline_%lu", &id) == 1) ||
+ (sscanf(name, "bpf_dispatcher_%s", disp) == 1))
+ err = process_bpf_image(name, start, data);
+
+ free(name);
+ return err;
+}
+
int perf_event__synthesize_bpf_events(struct perf_session *session,
perf_event__handler_t process,
struct machine *machine,
struct record_opts *opts)
{
+ const char *kallsyms_filename = "/proc/kallsyms";
+ struct kallsyms_parse arg;
union perf_event *event;
__u32 id = 0;
int err;
@@ -303,6 +377,8 @@ int perf_event__synthesize_bpf_events(struct perf_session *session,
event = malloc(sizeof(event->bpf) + KSYM_NAME_LEN + machine->id_hdr_size);
if (!event)
return -1;
+
+ /* Synthesize all the bpf programs in system. */
while (true) {
err = bpf_prog_get_next_id(id, &id);
if (err) {
@@ -335,6 +411,23 @@ int perf_event__synthesize_bpf_events(struct perf_session *session,
break;
}
}
+
+ /* Synthesize all the bpf images - trampolines/dispatchers. */
+ if (symbol_conf.kallsyms_name != NULL)
+ kallsyms_filename = symbol_conf.kallsyms_name;
+
+ arg = (struct kallsyms_parse) {
+ .event = event,
+ .process = process,
+ .machine = machine,
+ .tool = session->tool,
+ };
+
+ if (kallsyms__parse(kallsyms_filename, &arg, kallsyms_process_symbol)) {
+ pr_err("%s: failed to synthesize bpf images: %s\n",
+ __func__, strerror(errno));
+ }
+
free(event);
return err;
}
@@ -416,8 +509,7 @@ static int bpf_event__sb_cb(union perf_event *event, void *data)
return 0;
}
-int bpf_event__add_sb_event(struct evlist **evlist,
- struct perf_env *env)
+int evlist__add_bpf_sb_event(struct evlist *evlist, struct perf_env *env)
{
struct perf_event_attr attr = {
.type = PERF_TYPE_SOFTWARE,
diff --git a/tools/perf/util/bpf-event.h b/tools/perf/util/bpf-event.h
index 81fdc88e6c1a..68f315c3df5b 100644
--- a/tools/perf/util/bpf-event.h
+++ b/tools/perf/util/bpf-event.h
@@ -33,8 +33,7 @@ struct btf_node {
#ifdef HAVE_LIBBPF_SUPPORT
int machine__process_bpf(struct machine *machine, union perf_event *event,
struct perf_sample *sample);
-int bpf_event__add_sb_event(struct evlist **evlist,
- struct perf_env *env);
+int evlist__add_bpf_sb_event(struct evlist *evlist, struct perf_env *env);
void bpf_event__print_bpf_prog_info(struct bpf_prog_info *info,
struct perf_env *env,
FILE *fp);
@@ -46,8 +45,8 @@ static inline int machine__process_bpf(struct machine *machine __maybe_unused,
return 0;
}
-static inline int bpf_event__add_sb_event(struct evlist **evlist __maybe_unused,
- struct perf_env *env __maybe_unused)
+static inline int evlist__add_bpf_sb_event(struct evlist *evlist __maybe_unused,
+ struct perf_env *env __maybe_unused)
{
return 0;
}
diff --git a/tools/perf/util/bpf-loader.c b/tools/perf/util/bpf-loader.c
index 10c187b8b8ea..2feb751516ab 100644
--- a/tools/perf/util/bpf-loader.c
+++ b/tools/perf/util/bpf-loader.c
@@ -1225,7 +1225,7 @@ bpf__obj_config_map(struct bpf_object *obj,
out:
free(map_name);
if (!err)
- key_scan_pos += strlen(map_opt);
+ *key_scan_pos += strlen(map_opt);
return err;
}
@@ -1430,7 +1430,7 @@ apply_config_evsel_for_key(const char *name, int map_fd, void *pkey,
return -BPF_LOADER_ERRNO__OBJCONF_MAP_EVTINH;
}
- if (perf_evsel__is_bpf_output(evsel))
+ if (evsel__is_bpf_output(evsel))
check_pass = true;
if (attr->type == PERF_TYPE_RAW)
check_pass = true;
diff --git a/tools/perf/util/branch.h b/tools/perf/util/branch.h
index 88e00d268f6f..17b2ccc61094 100644
--- a/tools/perf/util/branch.h
+++ b/tools/perf/util/branch.h
@@ -12,15 +12,21 @@
#include <linux/stddef.h>
#include <linux/perf_event.h>
#include <linux/types.h>
+#include "event.h"
struct branch_flags {
- u64 mispred:1;
- u64 predicted:1;
- u64 in_tx:1;
- u64 abort:1;
- u64 cycles:16;
- u64 type:4;
- u64 reserved:40;
+ union {
+ u64 value;
+ struct {
+ u64 mispred:1;
+ u64 predicted:1;
+ u64 in_tx:1;
+ u64 abort:1;
+ u64 cycles:16;
+ u64 type:4;
+ u64 reserved:40;
+ };
+ };
};
struct branch_info {
@@ -39,9 +45,30 @@ struct branch_entry {
struct branch_stack {
u64 nr;
- struct branch_entry entries[0];
+ u64 hw_idx;
+ struct branch_entry entries[];
};
+/*
+ * The hw_idx is only available when PERF_SAMPLE_BRANCH_HW_INDEX is applied.
+ * Otherwise, the output format of a sample with branch stack is
+ * struct branch_stack {
+ * u64 nr;
+ * struct branch_entry entries[0];
+ * }
+ * Check whether the hw_idx is available,
+ * and return the corresponding pointer of entries[0].
+ */
+static inline struct branch_entry *perf_sample__branch_entries(struct perf_sample *sample)
+{
+ u64 *entry = (u64 *)sample->branch_stack;
+
+ entry++;
+ if (sample->no_hw_idx)
+ return (struct branch_entry *)entry;
+ return (struct branch_entry *)(++entry);
+}
+
struct branch_type_stat {
bool branch_to;
u64 counts[PERF_BR_MAX];
diff --git a/tools/perf/util/callchain.c b/tools/perf/util/callchain.c
index 818aa4efd386..2775b752f2fa 100644
--- a/tools/perf/util/callchain.c
+++ b/tools/perf/util/callchain.c
@@ -1599,3 +1599,17 @@ void callchain_cursor_reset(struct callchain_cursor *cursor)
for (node = cursor->first; node != NULL; node = node->next)
map__zput(node->ms.map);
}
+
+void callchain_param_setup(u64 sample_type)
+{
+ if (symbol_conf.use_callchain || symbol_conf.cumulate_callchain) {
+ if ((sample_type & PERF_SAMPLE_REGS_USER) &&
+ (sample_type & PERF_SAMPLE_STACK_USER)) {
+ callchain_param.record_mode = CALLCHAIN_DWARF;
+ dwarf_callchain_users = true;
+ } else if (sample_type & PERF_SAMPLE_BRANCH_STACK)
+ callchain_param.record_mode = CALLCHAIN_LBR;
+ else
+ callchain_param.record_mode = CALLCHAIN_FP;
+ }
+}
diff --git a/tools/perf/util/callchain.h b/tools/perf/util/callchain.h
index 706bb7bbe1e1..fe36a9e5ccd1 100644
--- a/tools/perf/util/callchain.h
+++ b/tools/perf/util/callchain.h
@@ -143,6 +143,9 @@ struct callchain_cursor_node {
u64 ip;
struct map_symbol ms;
const char *srcline;
+ /* Indicate valid cursor node for LBR stitch */
+ bool valid;
+
bool branch;
struct branch_flags branch_flags;
u64 branch_from;
@@ -151,6 +154,11 @@ struct callchain_cursor_node {
struct callchain_cursor_node *next;
};
+struct stitch_list {
+ struct list_head node;
+ struct callchain_cursor_node cursor;
+};
+
struct callchain_cursor {
u64 nr;
struct callchain_cursor_node *first;
@@ -289,4 +297,5 @@ int callchain_branch_counts(struct callchain_root *root,
u64 *branch_count, u64 *predicted_count,
u64 *abort_count, u64 *cycles_count);
+void callchain_param_setup(u64 sample_type);
#endif /* __PERF_CALLCHAIN_H */
diff --git a/tools/perf/util/cap.h b/tools/perf/util/cap.h
index 051dc590ceee..ae52878c0b2e 100644
--- a/tools/perf/util/cap.h
+++ b/tools/perf/util/cap.h
@@ -29,4 +29,8 @@ static inline bool perf_cap__capable(int cap __maybe_unused)
#define CAP_SYSLOG 34
#endif
+#ifndef CAP_PERFMON
+#define CAP_PERFMON 38
+#endif
+
#endif /* __PERF_CAP_H */
diff --git a/tools/perf/util/cgroup.c b/tools/perf/util/cgroup.c
index 4881d4af3381..050dea9f1e88 100644
--- a/tools/perf/util/cgroup.c
+++ b/tools/perf/util/cgroup.c
@@ -3,75 +3,16 @@
#include "evsel.h"
#include "cgroup.h"
#include "evlist.h"
-#include <linux/stringify.h>
#include <linux/zalloc.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <stdlib.h>
#include <string.h>
+#include <api/fs/fs.h>
int nr_cgroups;
-static int
-cgroupfs_find_mountpoint(char *buf, size_t maxlen)
-{
- FILE *fp;
- char mountpoint[PATH_MAX + 1], tokens[PATH_MAX + 1], type[PATH_MAX + 1];
- char path_v1[PATH_MAX + 1], path_v2[PATH_MAX + 2], *path;
- char *token, *saved_ptr = NULL;
-
- fp = fopen("/proc/mounts", "r");
- if (!fp)
- return -1;
-
- /*
- * in order to handle split hierarchy, we need to scan /proc/mounts
- * and inspect every cgroupfs mount point to find one that has
- * perf_event subsystem
- */
- path_v1[0] = '\0';
- path_v2[0] = '\0';
-
- while (fscanf(fp, "%*s %"__stringify(PATH_MAX)"s %"__stringify(PATH_MAX)"s %"
- __stringify(PATH_MAX)"s %*d %*d\n",
- mountpoint, type, tokens) == 3) {
-
- if (!path_v1[0] && !strcmp(type, "cgroup")) {
-
- token = strtok_r(tokens, ",", &saved_ptr);
-
- while (token != NULL) {
- if (!strcmp(token, "perf_event")) {
- strcpy(path_v1, mountpoint);
- break;
- }
- token = strtok_r(NULL, ",", &saved_ptr);
- }
- }
-
- if (!path_v2[0] && !strcmp(type, "cgroup2"))
- strcpy(path_v2, mountpoint);
-
- if (path_v1[0] && path_v2[0])
- break;
- }
- fclose(fp);
-
- if (path_v1[0])
- path = path_v1;
- else if (path_v2[0])
- path = path_v2;
- else
- return -1;
-
- if (strlen(path) < maxlen) {
- strcpy(buf, path);
- return 0;
- }
- return -1;
-}
-
static int open_cgroup(const char *name)
{
char path[PATH_MAX + 1];
@@ -79,7 +20,7 @@ static int open_cgroup(const char *name)
int fd;
- if (cgroupfs_find_mountpoint(mnt, PATH_MAX + 1))
+ if (cgroupfs_find_mountpoint(mnt, PATH_MAX + 1, "perf_event"))
return -1;
scnprintf(path, PATH_MAX, "%s/%s", mnt, name);
@@ -166,7 +107,8 @@ found:
static void cgroup__delete(struct cgroup *cgroup)
{
- close(cgroup->fd);
+ if (cgroup->fd >= 0)
+ close(cgroup->fd);
zfree(&cgroup->name);
free(cgroup);
}
@@ -250,3 +192,83 @@ int parse_cgroups(const struct option *opt, const char *str,
}
return 0;
}
+
+static struct cgroup *__cgroup__findnew(struct rb_root *root, uint64_t id,
+ bool create, const char *path)
+{
+ struct rb_node **p = &root->rb_node;
+ struct rb_node *parent = NULL;
+ struct cgroup *cgrp;
+
+ while (*p != NULL) {
+ parent = *p;
+ cgrp = rb_entry(parent, struct cgroup, node);
+
+ if (cgrp->id == id)
+ return cgrp;
+
+ if (cgrp->id < id)
+ p = &(*p)->rb_left;
+ else
+ p = &(*p)->rb_right;
+ }
+
+ if (!create)
+ return NULL;
+
+ cgrp = malloc(sizeof(*cgrp));
+ if (cgrp == NULL)
+ return NULL;
+
+ cgrp->name = strdup(path);
+ if (cgrp->name == NULL) {
+ free(cgrp);
+ return NULL;
+ }
+
+ cgrp->fd = -1;
+ cgrp->id = id;
+ refcount_set(&cgrp->refcnt, 1);
+
+ rb_link_node(&cgrp->node, parent, p);
+ rb_insert_color(&cgrp->node, root);
+
+ return cgrp;
+}
+
+struct cgroup *cgroup__findnew(struct perf_env *env, uint64_t id,
+ const char *path)
+{
+ struct cgroup *cgrp;
+
+ down_write(&env->cgroups.lock);
+ cgrp = __cgroup__findnew(&env->cgroups.tree, id, true, path);
+ up_write(&env->cgroups.lock);
+ return cgrp;
+}
+
+struct cgroup *cgroup__find(struct perf_env *env, uint64_t id)
+{
+ struct cgroup *cgrp;
+
+ down_read(&env->cgroups.lock);
+ cgrp = __cgroup__findnew(&env->cgroups.tree, id, false, NULL);
+ up_read(&env->cgroups.lock);
+ return cgrp;
+}
+
+void perf_env__purge_cgroups(struct perf_env *env)
+{
+ struct rb_node *node;
+ struct cgroup *cgrp;
+
+ down_write(&env->cgroups.lock);
+ while (!RB_EMPTY_ROOT(&env->cgroups.tree)) {
+ node = rb_first(&env->cgroups.tree);
+ cgrp = rb_entry(node, struct cgroup, node);
+
+ rb_erase(node, &env->cgroups.tree);
+ cgroup__put(cgrp);
+ }
+ up_write(&env->cgroups.lock);
+}
diff --git a/tools/perf/util/cgroup.h b/tools/perf/util/cgroup.h
index 2ec11f01090d..e98d5975fe55 100644
--- a/tools/perf/util/cgroup.h
+++ b/tools/perf/util/cgroup.h
@@ -3,16 +3,19 @@
#define __CGROUP_H__
#include <linux/refcount.h>
+#include <linux/rbtree.h>
+#include "util/env.h"
struct option;
struct cgroup {
- char *name;
- int fd;
- refcount_t refcnt;
+ struct rb_node node;
+ u64 id;
+ char *name;
+ int fd;
+ refcount_t refcnt;
};
-
extern int nr_cgroups; /* number of explicit cgroups defined */
struct cgroup *cgroup__get(struct cgroup *cgroup);
@@ -26,4 +29,10 @@ void evlist__set_default_cgroup(struct evlist *evlist, struct cgroup *cgroup);
int parse_cgroups(const struct option *opt, const char *str, int unset);
+struct cgroup *cgroup__findnew(struct perf_env *env, uint64_t id,
+ const char *path);
+struct cgroup *cgroup__find(struct perf_env *env, uint64_t id);
+
+void perf_env__purge_cgroups(struct perf_env *env);
+
#endif /* __CGROUP_H__ */
diff --git a/tools/perf/util/cloexec.c b/tools/perf/util/cloexec.c
index a12872f2856a..fa8248aadb59 100644
--- a/tools/perf/util/cloexec.c
+++ b/tools/perf/util/cloexec.c
@@ -28,7 +28,7 @@ int __weak sched_getcpu(void)
static int perf_flag_probe(void)
{
- /* use 'safest' configuration as used in perf_evsel__fallback() */
+ /* use 'safest' configuration as used in evsel__fallback() */
struct perf_event_attr attr = {
.type = PERF_TYPE_SOFTWARE,
.config = PERF_COUNT_SW_CPU_CLOCK,
@@ -65,7 +65,7 @@ static int perf_flag_probe(void)
return 1;
}
- WARN_ONCE(err != EINVAL && err != EBUSY,
+ WARN_ONCE(err != EINVAL && err != EBUSY && err != EACCES,
"perf_event_open(..., PERF_FLAG_FD_CLOEXEC) failed with unexpected error %d (%s)\n",
err, str_error_r(err, sbuf, sizeof(sbuf)));
@@ -83,7 +83,7 @@ static int perf_flag_probe(void)
if (fd >= 0)
close(fd);
- if (WARN_ONCE(fd < 0 && err != EBUSY,
+ if (WARN_ONCE(fd < 0 && err != EBUSY && err != EACCES,
"perf_event_open(..., 0) failed unexpectedly with error %d (%s)\n",
err, str_error_r(err, sbuf, sizeof(sbuf))))
return -1;
diff --git a/tools/perf/util/config.c b/tools/perf/util/config.c
index ef38eba56ed0..20be0504fb95 100644
--- a/tools/perf/util/config.c
+++ b/tools/perf/util/config.c
@@ -17,10 +17,10 @@
#include "util/event.h" /* proc_map_timeout */
#include "util/hist.h" /* perf_hist_config */
#include "util/llvm-utils.h" /* perf_llvm_config */
+#include "util/stat.h" /* perf_stat__set_big_num */
#include "build-id.h"
#include "debug.h"
#include "config.h"
-#include "debug.h"
#include <sys/types.h>
#include <sys/stat.h>
#include <stdlib.h>
@@ -452,6 +452,15 @@ static int perf_ui_config(const char *var, const char *value)
return 0;
}
+static int perf_stat_config(const char *var, const char *value)
+{
+ if (!strcmp(var, "stat.big-num"))
+ perf_stat__set_big_num(perf_config_bool(var, value));
+
+ /* Add other config variables here. */
+ return 0;
+}
+
int perf_default_config(const char *var, const char *value,
void *dummy __maybe_unused)
{
@@ -473,6 +482,9 @@ int perf_default_config(const char *var, const char *value,
if (strstarts(var, "buildid."))
return perf_buildid_config(var, value);
+ if (strstarts(var, "stat."))
+ return perf_stat_config(var, value);
+
/* Add other config variables here. */
return 0;
}
diff --git a/tools/perf/util/counts.c b/tools/perf/util/counts.c
index f94e1a23dad6..582f3aeaf5e4 100644
--- a/tools/perf/util/counts.c
+++ b/tools/perf/util/counts.c
@@ -1,6 +1,7 @@
// SPDX-License-Identifier: GPL-2.0
#include <errno.h>
#include <stdlib.h>
+#include <string.h>
#include "evsel.h"
#include "counts.h"
#include <linux/zalloc.h>
@@ -42,24 +43,25 @@ void perf_counts__delete(struct perf_counts *counts)
}
}
-static void perf_counts__reset(struct perf_counts *counts)
+void perf_counts__reset(struct perf_counts *counts)
{
xyarray__reset(counts->loaded);
xyarray__reset(counts->values);
+ memset(&counts->aggr, 0, sizeof(struct perf_counts_values));
}
-void perf_evsel__reset_counts(struct evsel *evsel)
+void evsel__reset_counts(struct evsel *evsel)
{
perf_counts__reset(evsel->counts);
}
-int perf_evsel__alloc_counts(struct evsel *evsel, int ncpus, int nthreads)
+int evsel__alloc_counts(struct evsel *evsel, int ncpus, int nthreads)
{
evsel->counts = perf_counts__new(ncpus, nthreads);
return evsel->counts != NULL ? 0 : -ENOMEM;
}
-void perf_evsel__free_counts(struct evsel *evsel)
+void evsel__free_counts(struct evsel *evsel)
{
perf_counts__delete(evsel->counts);
evsel->counts = NULL;
diff --git a/tools/perf/util/counts.h b/tools/perf/util/counts.h
index 92196df4945f..7ff36bf6d644 100644
--- a/tools/perf/util/counts.h
+++ b/tools/perf/util/counts.h
@@ -37,9 +37,10 @@ perf_counts__set_loaded(struct perf_counts *counts, int cpu, int thread, bool lo
struct perf_counts *perf_counts__new(int ncpus, int nthreads);
void perf_counts__delete(struct perf_counts *counts);
+void perf_counts__reset(struct perf_counts *counts);
-void perf_evsel__reset_counts(struct evsel *evsel);
-int perf_evsel__alloc_counts(struct evsel *evsel, int ncpus, int nthreads);
-void perf_evsel__free_counts(struct evsel *evsel);
+void evsel__reset_counts(struct evsel *evsel);
+int evsel__alloc_counts(struct evsel *evsel, int ncpus, int nthreads);
+void evsel__free_counts(struct evsel *evsel);
#endif /* __PERF_COUNTS_H */
diff --git a/tools/perf/util/cpumap.c b/tools/perf/util/cpumap.c
index 983b7388f22b..dc5c5e6fc502 100644
--- a/tools/perf/util/cpumap.c
+++ b/tools/perf/util/cpumap.c
@@ -317,7 +317,7 @@ static void set_max_cpu_num(void)
/* get the highest possible cpu number for a sparse allocation */
ret = snprintf(path, PATH_MAX, "%s/devices/system/cpu/possible", mnt);
- if (ret == PATH_MAX) {
+ if (ret >= PATH_MAX) {
pr_err("sysfs path crossed PATH_MAX(%d) size\n", PATH_MAX);
goto out;
}
@@ -328,7 +328,7 @@ static void set_max_cpu_num(void)
/* get the highest present cpu number for a sparse allocation */
ret = snprintf(path, PATH_MAX, "%s/devices/system/cpu/present", mnt);
- if (ret == PATH_MAX) {
+ if (ret >= PATH_MAX) {
pr_err("sysfs path crossed PATH_MAX(%d) size\n", PATH_MAX);
goto out;
}
@@ -356,7 +356,7 @@ static void set_max_node_num(void)
/* get the highest possible cpu number for a sparse allocation */
ret = snprintf(path, PATH_MAX, "%s/devices/system/node/possible", mnt);
- if (ret == PATH_MAX) {
+ if (ret >= PATH_MAX) {
pr_err("sysfs path crossed PATH_MAX(%d) size\n", PATH_MAX);
goto out;
}
@@ -441,7 +441,7 @@ int cpu__setup_cpunode_map(void)
return 0;
n = snprintf(path, PATH_MAX, "%s/devices/system/node", mnt);
- if (n == PATH_MAX) {
+ if (n >= PATH_MAX) {
pr_err("sysfs path crossed PATH_MAX(%d) size\n", PATH_MAX);
return -1;
}
@@ -456,7 +456,7 @@ int cpu__setup_cpunode_map(void)
continue;
n = snprintf(buf, PATH_MAX, "%s/%s", path, dent1->d_name);
- if (n == PATH_MAX) {
+ if (n >= PATH_MAX) {
pr_err("sysfs path crossed PATH_MAX(%d) size\n", PATH_MAX);
continue;
}
diff --git a/tools/perf/util/cputopo.h b/tools/perf/util/cputopo.h
index 7bf6b811f715..6201c3790d86 100644
--- a/tools/perf/util/cputopo.h
+++ b/tools/perf/util/cputopo.h
@@ -22,7 +22,7 @@ struct numa_topology_node {
struct numa_topology {
u32 nr;
- struct numa_topology_node nodes[0];
+ struct numa_topology_node nodes[];
};
struct cpu_topology *cpu_topology__new(void);
diff --git a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c
index cd92a99eb89d..cd007cc9c283 100644
--- a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c
+++ b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c
@@ -564,6 +564,8 @@ static ocsd_datapath_resp_t cs_etm_decoder__gen_trace_elem_printer(
resp = cs_etm_decoder__set_tid(etmq, packet_queue,
elem, trace_chan_id);
break;
+ /* Unused packet types */
+ case OCSD_GEN_TRC_ELEM_I_RANGE_NOPATH:
case OCSD_GEN_TRC_ELEM_ADDR_NACC:
case OCSD_GEN_TRC_ELEM_CYCLE_COUNT:
case OCSD_GEN_TRC_ELEM_ADDR_UNKNOWN:
diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c
index 5471045ebf5c..c283223fb31f 100644
--- a/tools/perf/util/cs-etm.c
+++ b/tools/perf/util/cs-etm.c
@@ -94,6 +94,9 @@ struct cs_etm_queue {
struct cs_etm_traceid_queue **traceid_queues;
};
+/* RB tree for quick conversion between traceID and metadata pointers */
+static struct intlist *traceid_list;
+
static int cs_etm__update_queues(struct cs_etm_auxtrace *etm);
static int cs_etm__process_queues(struct cs_etm_auxtrace *etm);
static int cs_etm__process_timeless_queues(struct cs_etm_auxtrace *etm,
@@ -363,6 +366,23 @@ struct cs_etm_packet_queue
return NULL;
}
+static void cs_etm__packet_swap(struct cs_etm_auxtrace *etm,
+ struct cs_etm_traceid_queue *tidq)
+{
+ struct cs_etm_packet *tmp;
+
+ if (etm->sample_branches || etm->synth_opts.last_branch ||
+ etm->sample_instructions) {
+ /*
+ * Swap PACKET with PREV_PACKET: PACKET becomes PREV_PACKET for
+ * the next incoming packet.
+ */
+ tmp = tidq->packet;
+ tidq->packet = tidq->prev_packet;
+ tidq->prev_packet = tmp;
+ }
+}
+
static void cs_etm__packet_dump(const char *pkt_string)
{
const char *color = PERF_COLOR_BLUE;
@@ -614,6 +634,16 @@ static void cs_etm__free(struct perf_session *session)
zfree(&aux);
}
+static bool cs_etm__evsel_is_auxtrace(struct perf_session *session,
+ struct evsel *evsel)
+{
+ struct cs_etm_auxtrace *aux = container_of(session->auxtrace,
+ struct cs_etm_auxtrace,
+ auxtrace);
+
+ return evsel->core.attr.type == aux->pmu_type;
+}
+
static u8 cs_etm__cpu_mode(struct cs_etm_queue *etmq, u64 address)
{
struct machine *machine;
@@ -945,7 +975,7 @@ static inline u64 cs_etm__instr_addr(struct cs_etm_queue *etmq,
if (packet->isa == CS_ETM_ISA_T32) {
u64 addr = packet->start_addr;
- while (offset > 0) {
+ while (offset) {
addr += cs_etm__t32_instr_size(etmq,
trace_chan_id, addr);
offset--;
@@ -1134,10 +1164,8 @@ static int cs_etm__synth_instruction_sample(struct cs_etm_queue *etmq,
cs_etm__copy_insn(etmq, tidq->trace_chan_id, tidq->packet, &sample);
- if (etm->synth_opts.last_branch) {
- cs_etm__copy_last_branch_rb(etmq, tidq);
+ if (etm->synth_opts.last_branch)
sample.branch_stack = tidq->last_branch;
- }
if (etm->synth_opts.inject) {
ret = cs_etm__inject_event(event, &sample,
@@ -1153,9 +1181,6 @@ static int cs_etm__synth_instruction_sample(struct cs_etm_queue *etmq,
"CS ETM Trace: failed to deliver instruction event, error %d\n",
ret);
- if (etm->synth_opts.last_branch)
- cs_etm__reset_last_branch_rb(tidq);
-
return ret;
}
@@ -1172,6 +1197,7 @@ static int cs_etm__synth_branch_sample(struct cs_etm_queue *etmq,
union perf_event *event = tidq->event_buf;
struct dummy_branch_stack {
u64 nr;
+ u64 hw_idx;
struct branch_entry entries;
} dummy_bs;
u64 ip;
@@ -1202,6 +1228,7 @@ static int cs_etm__synth_branch_sample(struct cs_etm_queue *etmq,
if (etm->synth_opts.last_branch) {
dummy_bs = (struct dummy_branch_stack){
.nr = 1,
+ .hw_idx = -1ULL,
.entries = {
.from = sample.ip,
.to = sample.addr,
@@ -1340,12 +1367,14 @@ static int cs_etm__sample(struct cs_etm_queue *etmq,
struct cs_etm_traceid_queue *tidq)
{
struct cs_etm_auxtrace *etm = etmq->etm;
- struct cs_etm_packet *tmp;
int ret;
u8 trace_chan_id = tidq->trace_chan_id;
- u64 instrs_executed = tidq->packet->instr_count;
+ u64 instrs_prev;
+
+ /* Get instructions remainder from previous packet */
+ instrs_prev = tidq->period_instructions;
- tidq->period_instructions += instrs_executed;
+ tidq->period_instructions += tidq->packet->instr_count;
/*
* Record a branch when the last instruction in
@@ -1363,26 +1392,80 @@ static int cs_etm__sample(struct cs_etm_queue *etmq,
* TODO: allow period to be defined in cycles and clock time
*/
- /* Get number of instructions executed after the sample point */
- u64 instrs_over = tidq->period_instructions -
- etm->instructions_sample_period;
+ /*
+ * Below diagram demonstrates the instruction samples
+ * generation flows:
+ *
+ * Instrs Instrs Instrs Instrs
+ * Sample(n) Sample(n+1) Sample(n+2) Sample(n+3)
+ * | | | |
+ * V V V V
+ * --------------------------------------------------
+ * ^ ^
+ * | |
+ * Period Period
+ * instructions(Pi) instructions(Pi')
+ *
+ * | |
+ * \---------------- -----------------/
+ * V
+ * tidq->packet->instr_count
+ *
+ * Instrs Sample(n...) are the synthesised samples occurring
+ * every etm->instructions_sample_period instructions - as
+ * defined on the perf command line. Sample(n) is being the
+ * last sample before the current etm packet, n+1 to n+3
+ * samples are generated from the current etm packet.
+ *
+ * tidq->packet->instr_count represents the number of
+ * instructions in the current etm packet.
+ *
+ * Period instructions (Pi) contains the the number of
+ * instructions executed after the sample point(n) from the
+ * previous etm packet. This will always be less than
+ * etm->instructions_sample_period.
+ *
+ * When generate new samples, it combines with two parts
+ * instructions, one is the tail of the old packet and another
+ * is the head of the new coming packet, to generate
+ * sample(n+1); sample(n+2) and sample(n+3) consume the
+ * instructions with sample period. After sample(n+3), the rest
+ * instructions will be used by later packet and it is assigned
+ * to tidq->period_instructions for next round calculation.
+ */
/*
- * Calculate the address of the sampled instruction (-1 as
- * sample is reported as though instruction has just been
- * executed, but PC has not advanced to next instruction)
+ * Get the initial offset into the current packet instructions;
+ * entry conditions ensure that instrs_prev is less than
+ * etm->instructions_sample_period.
*/
- u64 offset = (instrs_executed - instrs_over - 1);
- u64 addr = cs_etm__instr_addr(etmq, trace_chan_id,
- tidq->packet, offset);
+ u64 offset = etm->instructions_sample_period - instrs_prev;
+ u64 addr;
- ret = cs_etm__synth_instruction_sample(
- etmq, tidq, addr, etm->instructions_sample_period);
- if (ret)
- return ret;
+ /* Prepare last branches for instruction sample */
+ if (etm->synth_opts.last_branch)
+ cs_etm__copy_last_branch_rb(etmq, tidq);
+
+ while (tidq->period_instructions >=
+ etm->instructions_sample_period) {
+ /*
+ * Calculate the address of the sampled instruction (-1
+ * as sample is reported as though instruction has just
+ * been executed, but PC has not advanced to next
+ * instruction)
+ */
+ addr = cs_etm__instr_addr(etmq, trace_chan_id,
+ tidq->packet, offset - 1);
+ ret = cs_etm__synth_instruction_sample(
+ etmq, tidq, addr,
+ etm->instructions_sample_period);
+ if (ret)
+ return ret;
- /* Carry remaining instructions into next sample period */
- tidq->period_instructions = instrs_over;
+ offset += etm->instructions_sample_period;
+ tidq->period_instructions -=
+ etm->instructions_sample_period;
+ }
}
if (etm->sample_branches) {
@@ -1404,15 +1487,7 @@ static int cs_etm__sample(struct cs_etm_queue *etmq,
}
}
- if (etm->sample_branches || etm->synth_opts.last_branch) {
- /*
- * Swap PACKET with PREV_PACKET: PACKET becomes PREV_PACKET for
- * the next incoming packet.
- */
- tmp = tidq->packet;
- tidq->packet = tidq->prev_packet;
- tidq->prev_packet = tmp;
- }
+ cs_etm__packet_swap(etm, tidq);
return 0;
}
@@ -1441,7 +1516,6 @@ static int cs_etm__flush(struct cs_etm_queue *etmq,
{
int err = 0;
struct cs_etm_auxtrace *etm = etmq->etm;
- struct cs_etm_packet *tmp;
/* Handle start tracing packet */
if (tidq->prev_packet->sample_type == CS_ETM_EMPTY)
@@ -1449,6 +1523,11 @@ static int cs_etm__flush(struct cs_etm_queue *etmq,
if (etmq->etm->synth_opts.last_branch &&
tidq->prev_packet->sample_type == CS_ETM_RANGE) {
+ u64 addr;
+
+ /* Prepare last branches for instruction sample */
+ cs_etm__copy_last_branch_rb(etmq, tidq);
+
/*
* Generate a last branch event for the branches left in the
* circular buffer at the end of the trace.
@@ -1456,7 +1535,7 @@ static int cs_etm__flush(struct cs_etm_queue *etmq,
* Use the address of the end of the last reported execution
* range
*/
- u64 addr = cs_etm__last_executed_instr(tidq->prev_packet);
+ addr = cs_etm__last_executed_instr(tidq->prev_packet);
err = cs_etm__synth_instruction_sample(
etmq, tidq, addr,
@@ -1476,15 +1555,11 @@ static int cs_etm__flush(struct cs_etm_queue *etmq,
}
swap_packet:
- if (etm->sample_branches || etm->synth_opts.last_branch) {
- /*
- * Swap PACKET with PREV_PACKET: PACKET becomes PREV_PACKET for
- * the next incoming packet.
- */
- tmp = tidq->packet;
- tidq->packet = tidq->prev_packet;
- tidq->prev_packet = tmp;
- }
+ cs_etm__packet_swap(etm, tidq);
+
+ /* Reset last branches after flush the trace */
+ if (etm->synth_opts.last_branch)
+ cs_etm__reset_last_branch_rb(tidq);
return err;
}
@@ -1505,11 +1580,16 @@ static int cs_etm__end_block(struct cs_etm_queue *etmq,
*/
if (etmq->etm->synth_opts.last_branch &&
tidq->prev_packet->sample_type == CS_ETM_RANGE) {
+ u64 addr;
+
+ /* Prepare last branches for instruction sample */
+ cs_etm__copy_last_branch_rb(etmq, tidq);
+
/*
* Use the address of the end of the last reported execution
* range.
*/
- u64 addr = cs_etm__last_executed_instr(tidq->prev_packet);
+ addr = cs_etm__last_executed_instr(tidq->prev_packet);
err = cs_etm__synth_instruction_sample(
etmq, tidq, addr,
@@ -2551,6 +2631,7 @@ int cs_etm__process_auxtrace_info(union perf_event *event,
etm->auxtrace.flush_events = cs_etm__flush_events;
etm->auxtrace.free_events = cs_etm__free_events;
etm->auxtrace.free = cs_etm__free;
+ etm->auxtrace.evsel_is_auxtrace = cs_etm__evsel_is_auxtrace;
session->auxtrace = &etm->auxtrace;
etm->unknown_thread = thread__new(999999999, 999999999);
diff --git a/tools/perf/util/cs-etm.h b/tools/perf/util/cs-etm.h
index 650ecc2a6349..4ad925d6d799 100644
--- a/tools/perf/util/cs-etm.h
+++ b/tools/perf/util/cs-etm.h
@@ -114,9 +114,6 @@ enum cs_etm_isa {
CS_ETM_ISA_T32,
};
-/* RB tree for quick conversion between traceID and metadata pointers */
-struct intlist *traceid_list;
-
struct cs_etm_queue;
struct cs_etm_packet {
diff --git a/tools/perf/util/data-convert-bt.c b/tools/perf/util/data-convert-bt.c
index dbc772bfb04e..5f36fc6a5578 100644
--- a/tools/perf/util/data-convert-bt.c
+++ b/tools/perf/util/data-convert-bt.c
@@ -835,7 +835,7 @@ static int process_sample_event(struct perf_tool *tool,
return -1;
}
- if (perf_evsel__is_bpf_output(evsel)) {
+ if (evsel__is_bpf_output(evsel)) {
ret = add_bpf_output_values(event_class, event, sample);
if (ret)
return -1;
@@ -1155,7 +1155,7 @@ static int add_event(struct ctf_writer *cw, struct evsel *evsel)
{
struct bt_ctf_event_class *event_class;
struct evsel_priv *priv;
- const char *name = perf_evsel__name(evsel);
+ const char *name = evsel__name(evsel);
int ret;
pr("Adding event '%s' (type %d)\n", name, evsel->core.attr.type);
@@ -1174,7 +1174,7 @@ static int add_event(struct ctf_writer *cw, struct evsel *evsel)
goto err;
}
- if (perf_evsel__is_bpf_output(evsel)) {
+ if (evsel__is_bpf_output(evsel)) {
ret = add_bpf_output_types(cw, event_class);
if (ret)
goto err;
diff --git a/tools/perf/util/demangle-java.c b/tools/perf/util/demangle-java.c
index 6fb7f34c0814..39c05200ed65 100644
--- a/tools/perf/util/demangle-java.c
+++ b/tools/perf/util/demangle-java.c
@@ -15,7 +15,7 @@ enum {
MODE_CLASS = 1,
MODE_FUNC = 2,
MODE_TYPE = 3,
- MODE_CTYPE = 3, /* class arg */
+ MODE_CTYPE = 4, /* class arg */
};
#define BASE_ENT(c, n) [c - 'A']=n
@@ -27,7 +27,7 @@ static const char *base_types['Z' - 'A' + 1] = {
BASE_ENT('I', "int" ),
BASE_ENT('J', "long" ),
BASE_ENT('S', "short" ),
- BASE_ENT('Z', "bool" ),
+ BASE_ENT('Z', "boolean" ),
};
/*
@@ -59,15 +59,16 @@ __demangle_java_sym(const char *str, const char *end, char *buf, int maxlen, int
switch (*q) {
case 'L':
- if (mode == MODE_PREFIX || mode == MODE_CTYPE) {
- if (mode == MODE_CTYPE) {
+ if (mode == MODE_PREFIX || mode == MODE_TYPE) {
+ if (mode == MODE_TYPE) {
if (narg)
rlen += scnprintf(buf + rlen, maxlen - rlen, ", ");
narg++;
}
- rlen += scnprintf(buf + rlen, maxlen - rlen, "class ");
if (mode == MODE_PREFIX)
mode = MODE_CLASS;
+ else
+ mode = MODE_CTYPE;
} else
buf[rlen++] = *q;
break;
@@ -120,7 +121,7 @@ __demangle_java_sym(const char *str, const char *end, char *buf, int maxlen, int
if (mode != MODE_CLASS && mode != MODE_CTYPE)
goto error;
/* safe because at least one other char to process */
- if (isalpha(*(q + 1)))
+ if (isalpha(*(q + 1)) && mode == MODE_CLASS)
rlen += scnprintf(buf + rlen, maxlen - rlen, ".");
if (mode == MODE_CLASS)
mode = MODE_FUNC;
diff --git a/tools/perf/util/dso.c b/tools/perf/util/dso.c
index 91f21239608b..99f0a39c3c59 100644
--- a/tools/perf/util/dso.c
+++ b/tools/perf/util/dso.c
@@ -47,6 +47,7 @@ char dso__symtab_origin(const struct dso *dso)
[DSO_BINARY_TYPE__BUILD_ID_CACHE_DEBUGINFO] = 'D',
[DSO_BINARY_TYPE__FEDORA_DEBUGINFO] = 'f',
[DSO_BINARY_TYPE__UBUNTU_DEBUGINFO] = 'u',
+ [DSO_BINARY_TYPE__MIXEDUP_UBUNTU_DEBUGINFO] = 'x',
[DSO_BINARY_TYPE__OPENEMBEDDED_DEBUGINFO] = 'o',
[DSO_BINARY_TYPE__BUILDID_DEBUGINFO] = 'b',
[DSO_BINARY_TYPE__SYSTEM_PATH_DSO] = 'd',
@@ -129,6 +130,21 @@ int dso__read_binary_type_filename(const struct dso *dso,
snprintf(filename + len, size - len, "%s", dso->long_name);
break;
+ case DSO_BINARY_TYPE__MIXEDUP_UBUNTU_DEBUGINFO:
+ /*
+ * Ubuntu can mixup /usr/lib with /lib, putting debuginfo in
+ * /usr/lib/debug/lib when it is expected to be in
+ * /usr/lib/debug/usr/lib
+ */
+ if (strlen(dso->long_name) < 9 ||
+ strncmp(dso->long_name, "/usr/lib/", 9)) {
+ ret = -1;
+ break;
+ }
+ len = __symbol__join_symfs(filename, size, "/usr/lib/debug");
+ snprintf(filename + len, size - len, "%s", dso->long_name + 4);
+ break;
+
case DSO_BINARY_TYPE__OPENEMBEDDED_DEBUGINFO:
{
const char *last_slash;
@@ -191,6 +207,7 @@ int dso__read_binary_type_filename(const struct dso *dso,
case DSO_BINARY_TYPE__GUEST_KALLSYMS:
case DSO_BINARY_TYPE__JAVA_JIT:
case DSO_BINARY_TYPE__BPF_PROG_INFO:
+ case DSO_BINARY_TYPE__BPF_IMAGE:
case DSO_BINARY_TYPE__NOT_FOUND:
ret = -1;
break;
diff --git a/tools/perf/util/dso.h b/tools/perf/util/dso.h
index 2db64b79617a..d3d03274b0d1 100644
--- a/tools/perf/util/dso.h
+++ b/tools/perf/util/dso.h
@@ -30,6 +30,7 @@ enum dso_binary_type {
DSO_BINARY_TYPE__BUILD_ID_CACHE_DEBUGINFO,
DSO_BINARY_TYPE__FEDORA_DEBUGINFO,
DSO_BINARY_TYPE__UBUNTU_DEBUGINFO,
+ DSO_BINARY_TYPE__MIXEDUP_UBUNTU_DEBUGINFO,
DSO_BINARY_TYPE__BUILDID_DEBUGINFO,
DSO_BINARY_TYPE__SYSTEM_PATH_DSO,
DSO_BINARY_TYPE__GUEST_KMODULE,
@@ -40,6 +41,7 @@ enum dso_binary_type {
DSO_BINARY_TYPE__GUEST_KCORE,
DSO_BINARY_TYPE__OPENEMBEDDED_DEBUGINFO,
DSO_BINARY_TYPE__BPF_PROG_INFO,
+ DSO_BINARY_TYPE__BPF_IMAGE,
DSO_BINARY_TYPE__NOT_FOUND,
};
@@ -136,7 +138,7 @@ struct dso_cache {
struct rb_node rb_node;
u64 offset;
u64 size;
- char data[0];
+ char data[];
};
struct auxtrace_cache;
@@ -208,7 +210,7 @@ struct dso {
struct nsinfo *nsinfo;
struct dso_id id;
refcount_t refcnt;
- char name[0];
+ char name[];
};
/* dso__for_each_symbol - iterate over the symbols of given type
diff --git a/tools/perf/util/dsos.c b/tools/perf/util/dsos.c
index 591707c69c39..939471731ea6 100644
--- a/tools/perf/util/dsos.c
+++ b/tools/perf/util/dsos.c
@@ -26,13 +26,29 @@ static int __dso_id__cmp(struct dso_id *a, struct dso_id *b)
return 0;
}
+static bool dso_id__empty(struct dso_id *id)
+{
+ if (!id)
+ return true;
+
+ return !id->maj && !id->min && !id->ino && !id->ino_generation;
+}
+
+static void dso__inject_id(struct dso *dso, struct dso_id *id)
+{
+ dso->id.maj = id->maj;
+ dso->id.min = id->min;
+ dso->id.ino = id->ino;
+ dso->id.ino_generation = id->ino_generation;
+}
+
static int dso_id__cmp(struct dso_id *a, struct dso_id *b)
{
/*
* The second is always dso->id, so zeroes if not set, assume passing
* NULL for a means a zeroed id
*/
- if (a == NULL)
+ if (dso_id__empty(a) || dso_id__empty(b))
return 0;
return __dso_id__cmp(a, b);
@@ -249,6 +265,10 @@ struct dso *__dsos__addnew(struct dsos *dsos, const char *name)
static struct dso *__dsos__findnew_id(struct dsos *dsos, const char *name, struct dso_id *id)
{
struct dso *dso = __dsos__find_id(dsos, name, id, false);
+
+ if (dso && dso_id__empty(&dso->id) && !dso_id__empty(id))
+ dso__inject_id(dso, id);
+
return dso ? dso : __dsos__addnew_id(dsos, name, id);
}
diff --git a/tools/perf/util/env.c b/tools/perf/util/env.c
index 4154f944f474..fadc59708ece 100644
--- a/tools/perf/util/env.c
+++ b/tools/perf/util/env.c
@@ -6,6 +6,7 @@
#include <linux/ctype.h>
#include <linux/zalloc.h>
#include "bpf-event.h"
+#include "cgroup.h"
#include <errno.h>
#include <sys/utsname.h>
#include <bpf/libbpf.h>
@@ -168,6 +169,7 @@ void perf_env__exit(struct perf_env *env)
int i;
perf_env__purge_bpf(env);
+ perf_env__purge_cgroups(env);
zfree(&env->hostname);
zfree(&env->os_release);
zfree(&env->version);
diff --git a/tools/perf/util/env.h b/tools/perf/util/env.h
index 11d05ae3606a..1ab2682d5d2b 100644
--- a/tools/perf/util/env.h
+++ b/tools/perf/util/env.h
@@ -48,6 +48,7 @@ struct perf_env {
char *cpuid;
unsigned long long total_mem;
unsigned int msr_pmu_type;
+ unsigned int max_branches;
int nr_cmdline;
int nr_sibling_cores;
@@ -57,12 +58,14 @@ struct perf_env {
int nr_memory_nodes;
int nr_pmu_mappings;
int nr_groups;
+ int nr_cpu_pmu_caps;
char *cmdline;
const char **cmdline_argv;
char *sibling_cores;
char *sibling_dies;
char *sibling_threads;
char *pmu_mappings;
+ char *cpu_pmu_caps;
struct cpu_topology_map *cpu;
struct cpu_cache_level *caches;
int caches_cnt;
@@ -88,6 +91,12 @@ struct perf_env {
u32 btfs_cnt;
} bpf_progs;
+ /* same reason as above (for perf-top) */
+ struct {
+ struct rw_semaphore lock;
+ struct rb_root tree;
+ } cgroups;
+
/* For fast cpu to numa node lookup via perf_env__numa_node */
int *numa_map;
int nr_numa_map;
diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c
index c5447ff516a2..f581550a3015 100644
--- a/tools/perf/util/event.c
+++ b/tools/perf/util/event.c
@@ -54,6 +54,7 @@ static const char *perf_event__names[] = {
[PERF_RECORD_NAMESPACES] = "NAMESPACES",
[PERF_RECORD_KSYMBOL] = "KSYMBOL",
[PERF_RECORD_BPF_EVENT] = "BPF_EVENT",
+ [PERF_RECORD_CGROUP] = "CGROUP",
[PERF_RECORD_HEADER_ATTR] = "ATTR",
[PERF_RECORD_HEADER_EVENT_TYPE] = "EVENT_TYPE",
[PERF_RECORD_HEADER_TRACING_DATA] = "TRACING_DATA",
@@ -180,6 +181,12 @@ size_t perf_event__fprintf_namespaces(union perf_event *event, FILE *fp)
return ret;
}
+size_t perf_event__fprintf_cgroup(union perf_event *event, FILE *fp)
+{
+ return fprintf(fp, " cgroup: %" PRI_lu64 " %s\n",
+ event->cgroup.id, event->cgroup.path);
+}
+
int perf_event__process_comm(struct perf_tool *tool __maybe_unused,
union perf_event *event,
struct perf_sample *sample,
@@ -196,6 +203,14 @@ int perf_event__process_namespaces(struct perf_tool *tool __maybe_unused,
return machine__process_namespaces_event(machine, event, sample);
}
+int perf_event__process_cgroup(struct perf_tool *tool __maybe_unused,
+ union perf_event *event,
+ struct perf_sample *sample,
+ struct machine *machine)
+{
+ return machine__process_cgroup_event(machine, event, sample);
+}
+
int perf_event__process_lost(struct perf_tool *tool __maybe_unused,
union perf_event *event,
struct perf_sample *sample,
@@ -417,6 +432,9 @@ size_t perf_event__fprintf(union perf_event *event, FILE *fp)
case PERF_RECORD_NAMESPACES:
ret += perf_event__fprintf_namespaces(event, fp);
break;
+ case PERF_RECORD_CGROUP:
+ ret += perf_event__fprintf_cgroup(event, fp);
+ break;
case PERF_RECORD_MMAP2:
ret += perf_event__fprintf_mmap2(event, fp);
break;
@@ -599,10 +617,23 @@ int machine__resolve(struct machine *machine, struct addr_location *al,
al->sym = map__find_symbol(al->map, al->addr);
}
- if (symbol_conf.sym_list &&
- (!al->sym || !strlist__has_entry(symbol_conf.sym_list,
- al->sym->name))) {
- al->filtered |= (1 << HIST_FILTER__SYMBOL);
+ if (symbol_conf.sym_list) {
+ int ret = 0;
+ char al_addr_str[32];
+ size_t sz = sizeof(al_addr_str);
+
+ if (al->sym) {
+ ret = strlist__has_entry(symbol_conf.sym_list,
+ al->sym->name);
+ }
+ if (!ret && al->sym) {
+ snprintf(al_addr_str, sz, "0x%"PRIx64,
+ al->map->unmap_ip(al->map, al->sym->start));
+ ret = strlist__has_entry(symbol_conf.sym_list,
+ al_addr_str);
+ }
+ if (!ret)
+ al->filtered |= (1 << HIST_FILTER__SYMBOL);
}
return 0;
diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h
index 85223159737c..6ae01c3c2ffa 100644
--- a/tools/perf/util/event.h
+++ b/tools/perf/util/event.h
@@ -79,7 +79,7 @@ struct sample_read {
struct ip_callchain {
u64 nr;
- u64 ips[0];
+ u64 ips[];
};
struct branch_stack;
@@ -135,10 +135,12 @@ struct perf_sample {
u32 raw_size;
u64 data_src;
u64 phys_addr;
+ u64 cgroup;
u32 flags;
u16 insn_len;
u8 cpumode;
u16 misc;
+ bool no_hw_idx; /* No hw_idx collected in branch_stack */
char insn[MAX_INSN];
void *raw_data;
struct ip_callchain *callchain;
@@ -321,6 +323,10 @@ int perf_event__process_namespaces(struct perf_tool *tool,
union perf_event *event,
struct perf_sample *sample,
struct machine *machine);
+int perf_event__process_cgroup(struct perf_tool *tool,
+ union perf_event *event,
+ struct perf_sample *sample,
+ struct machine *machine);
int perf_event__process_mmap(struct perf_tool *tool,
union perf_event *event,
struct perf_sample *sample,
@@ -376,6 +382,7 @@ size_t perf_event__fprintf_switch(union perf_event *event, FILE *fp);
size_t perf_event__fprintf_thread_map(union perf_event *event, FILE *fp);
size_t perf_event__fprintf_cpu_map(union perf_event *event, FILE *fp);
size_t perf_event__fprintf_namespaces(union perf_event *event, FILE *fp);
+size_t perf_event__fprintf_cgroup(union perf_event *event, FILE *fp);
size_t perf_event__fprintf_ksymbol(union perf_event *event, FILE *fp);
size_t perf_event__fprintf_bpf(union perf_event *event, FILE *fp);
size_t perf_event__fprintf(union perf_event *event, FILE *fp);
diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c
index 1548237b6558..173b4f0e0e6e 100644
--- a/tools/perf/util/evlist.c
+++ b/tools/perf/util/evlist.c
@@ -23,6 +23,7 @@
#include "asm/bug.h"
#include "bpf-event.h"
#include "util/string2.h"
+#include "util/perf_api_probe.h"
#include <signal.h>
#include <unistd.h>
#include <sched.h>
@@ -118,7 +119,7 @@ static void perf_evlist__update_id_pos(struct evlist *evlist)
struct evsel *evsel;
evlist__for_each_entry(evlist, evsel)
- perf_evsel__calc_id_pos(evsel);
+ evsel__calc_id_pos(evsel);
perf_evlist__set_id_pos(evlist);
}
@@ -232,7 +233,7 @@ void perf_evlist__set_leader(struct evlist *evlist)
int __perf_evlist__add_default(struct evlist *evlist, bool precise)
{
- struct evsel *evsel = perf_evsel__new_cycles(precise);
+ struct evsel *evsel = evsel__new_cycles(precise);
if (evsel == NULL)
return -ENOMEM;
@@ -248,7 +249,7 @@ int perf_evlist__add_dummy(struct evlist *evlist)
.config = PERF_COUNT_SW_DUMMY,
.size = sizeof(attr), /* to capture ABI version */
};
- struct evsel *evsel = perf_evsel__new_idx(&attr, evlist->core.nr_entries);
+ struct evsel *evsel = evsel__new_idx(&attr, evlist->core.nr_entries);
if (evsel == NULL)
return -ENOMEM;
@@ -265,7 +266,7 @@ static int evlist__add_attrs(struct evlist *evlist,
size_t i;
for (i = 0; i < nr_attrs; i++) {
- evsel = perf_evsel__new_idx(attrs + i, evlist->core.nr_entries + i);
+ evsel = evsel__new_idx(attrs + i, evlist->core.nr_entries + i);
if (evsel == NULL)
goto out_delete_partial_list;
list_add_tail(&evsel->core.node, &head);
@@ -324,7 +325,7 @@ perf_evlist__find_tracepoint_by_name(struct evlist *evlist,
int perf_evlist__add_newtp(struct evlist *evlist,
const char *sys, const char *name, void *handler)
{
- struct evsel *evsel = perf_evsel__newtp(sys, name);
+ struct evsel *evsel = evsel__newtp(sys, name);
if (IS_ERR(evsel))
return -1;
@@ -379,25 +380,36 @@ void evlist__disable(struct evlist *evlist)
{
struct evsel *pos;
struct affinity affinity;
- int cpu, i;
+ int cpu, i, imm = 0;
+ bool has_imm = false;
if (affinity__setup(&affinity) < 0)
return;
- evlist__for_each_cpu(evlist, i, cpu) {
- affinity__set(&affinity, cpu);
-
- evlist__for_each_entry(evlist, pos) {
- if (evsel__cpu_iter_skip(pos, cpu))
- continue;
- if (pos->disabled || !perf_evsel__is_group_leader(pos) || !pos->core.fd)
- continue;
- evsel__disable_cpu(pos, pos->cpu_iter - 1);
+ /* Disable 'immediate' events last */
+ for (imm = 0; imm <= 1; imm++) {
+ evlist__for_each_cpu(evlist, i, cpu) {
+ affinity__set(&affinity, cpu);
+
+ evlist__for_each_entry(evlist, pos) {
+ if (evsel__cpu_iter_skip(pos, cpu))
+ continue;
+ if (pos->disabled || !evsel__is_group_leader(pos) || !pos->core.fd)
+ continue;
+ if (pos->immediate)
+ has_imm = true;
+ if (pos->immediate != imm)
+ continue;
+ evsel__disable_cpu(pos, pos->cpu_iter - 1);
+ }
}
+ if (!has_imm)
+ break;
}
+
affinity__cleanup(&affinity);
evlist__for_each_entry(evlist, pos) {
- if (!perf_evsel__is_group_leader(pos) || !pos->core.fd)
+ if (!evsel__is_group_leader(pos) || !pos->core.fd)
continue;
pos->disabled = true;
}
@@ -420,14 +432,14 @@ void evlist__enable(struct evlist *evlist)
evlist__for_each_entry(evlist, pos) {
if (evsel__cpu_iter_skip(pos, cpu))
continue;
- if (!perf_evsel__is_group_leader(pos) || !pos->core.fd)
+ if (!evsel__is_group_leader(pos) || !pos->core.fd)
continue;
evsel__enable_cpu(pos, pos->cpu_iter - 1);
}
}
affinity__cleanup(&affinity);
evlist__for_each_entry(evlist, pos) {
- if (!perf_evsel__is_group_leader(pos) || !pos->core.fd)
+ if (!evsel__is_group_leader(pos) || !pos->core.fd)
continue;
pos->disabled = false;
}
@@ -947,7 +959,7 @@ void __perf_evlist__set_sample_bit(struct evlist *evlist,
struct evsel *evsel;
evlist__for_each_entry(evlist, evsel)
- __perf_evsel__set_sample_bit(evsel, bit);
+ __evsel__set_sample_bit(evsel, bit);
}
void __perf_evlist__reset_sample_bit(struct evlist *evlist,
@@ -956,7 +968,7 @@ void __perf_evlist__reset_sample_bit(struct evlist *evlist,
struct evsel *evsel;
evlist__for_each_entry(evlist, evsel)
- __perf_evsel__reset_sample_bit(evsel, bit);
+ __evsel__reset_sample_bit(evsel, bit);
}
int perf_evlist__apply_filters(struct evlist *evlist, struct evsel **err_evsel)
@@ -994,7 +1006,7 @@ int perf_evlist__set_tp_filter(struct evlist *evlist, const char *filter)
if (evsel->core.attr.type != PERF_TYPE_TRACEPOINT)
continue;
- err = perf_evsel__set_filter(evsel, filter);
+ err = evsel__set_filter(evsel, filter);
if (err)
break;
}
@@ -1014,7 +1026,7 @@ int perf_evlist__append_tp_filter(struct evlist *evlist, const char *filter)
if (evsel->core.attr.type != PERF_TYPE_TRACEPOINT)
continue;
- err = perf_evsel__append_tp_filter(evsel, filter);
+ err = evsel__append_tp_filter(evsel, filter);
if (err)
break;
}
@@ -1131,8 +1143,10 @@ bool perf_evlist__valid_read_format(struct evlist *evlist)
u64 sample_type = first->core.attr.sample_type;
evlist__for_each_entry(evlist, pos) {
- if (read_format != pos->core.attr.read_format)
- return false;
+ if (read_format != pos->core.attr.read_format) {
+ pr_debug("Read format differs %#" PRIx64 " vs %#" PRIx64 "\n",
+ read_format, (u64)pos->core.attr.read_format);
+ }
}
/* PERF_SAMPLE_READ imples PERF_FORMAT_ID. */
@@ -1436,7 +1450,7 @@ int perf_evlist__parse_sample(struct evlist *evlist, union perf_event *event,
if (!evsel)
return -EFAULT;
- return perf_evsel__parse_sample(evsel, event, sample);
+ return evsel__parse_sample(evsel, event, sample);
}
int perf_evlist__parse_sample_timestamp(struct evlist *evlist,
@@ -1447,7 +1461,7 @@ int perf_evlist__parse_sample_timestamp(struct evlist *evlist,
if (!evsel)
return -EFAULT;
- return perf_evsel__parse_sample_timestamp(evsel, event, timestamp);
+ return evsel__parse_sample_timestamp(evsel, event, timestamp);
}
int perf_evlist__strerror_open(struct evlist *evlist,
@@ -1701,133 +1715,3 @@ struct evsel *perf_evlist__reset_weak_group(struct evlist *evsel_list,
}
return leader;
}
-
-int perf_evlist__add_sb_event(struct evlist **evlist,
- struct perf_event_attr *attr,
- perf_evsel__sb_cb_t cb,
- void *data)
-{
- struct evsel *evsel;
- bool new_evlist = (*evlist) == NULL;
-
- if (*evlist == NULL)
- *evlist = evlist__new();
- if (*evlist == NULL)
- return -1;
-
- if (!attr->sample_id_all) {
- pr_warning("enabling sample_id_all for all side band events\n");
- attr->sample_id_all = 1;
- }
-
- evsel = perf_evsel__new_idx(attr, (*evlist)->core.nr_entries);
- if (!evsel)
- goto out_err;
-
- evsel->side_band.cb = cb;
- evsel->side_band.data = data;
- evlist__add(*evlist, evsel);
- return 0;
-
-out_err:
- if (new_evlist) {
- evlist__delete(*evlist);
- *evlist = NULL;
- }
- return -1;
-}
-
-static void *perf_evlist__poll_thread(void *arg)
-{
- struct evlist *evlist = arg;
- bool draining = false;
- int i, done = 0;
- /*
- * In order to read symbols from other namespaces perf to needs to call
- * setns(2). This isn't permitted if the struct_fs has multiple users.
- * unshare(2) the fs so that we may continue to setns into namespaces
- * that we're observing when, for instance, reading the build-ids at
- * the end of a 'perf record' session.
- */
- unshare(CLONE_FS);
-
- while (!done) {
- bool got_data = false;
-
- if (evlist->thread.done)
- draining = true;
-
- if (!draining)
- evlist__poll(evlist, 1000);
-
- for (i = 0; i < evlist->core.nr_mmaps; i++) {
- struct mmap *map = &evlist->mmap[i];
- union perf_event *event;
-
- if (perf_mmap__read_init(&map->core))
- continue;
- while ((event = perf_mmap__read_event(&map->core)) != NULL) {
- struct evsel *evsel = perf_evlist__event2evsel(evlist, event);
-
- if (evsel && evsel->side_band.cb)
- evsel->side_band.cb(event, evsel->side_band.data);
- else
- pr_warning("cannot locate proper evsel for the side band event\n");
-
- perf_mmap__consume(&map->core);
- got_data = true;
- }
- perf_mmap__read_done(&map->core);
- }
-
- if (draining && !got_data)
- break;
- }
- return NULL;
-}
-
-int perf_evlist__start_sb_thread(struct evlist *evlist,
- struct target *target)
-{
- struct evsel *counter;
-
- if (!evlist)
- return 0;
-
- if (perf_evlist__create_maps(evlist, target))
- goto out_delete_evlist;
-
- evlist__for_each_entry(evlist, counter) {
- if (evsel__open(counter, evlist->core.cpus,
- evlist->core.threads) < 0)
- goto out_delete_evlist;
- }
-
- if (evlist__mmap(evlist, UINT_MAX))
- goto out_delete_evlist;
-
- evlist__for_each_entry(evlist, counter) {
- if (evsel__enable(counter))
- goto out_delete_evlist;
- }
-
- evlist->thread.done = 0;
- if (pthread_create(&evlist->thread.th, NULL, perf_evlist__poll_thread, evlist))
- goto out_delete_evlist;
-
- return 0;
-
-out_delete_evlist:
- evlist__delete(evlist);
- evlist = NULL;
- return -1;
-}
-
-void perf_evlist__stop_sb_thread(struct evlist *evlist)
-{
- if (!evlist)
- return;
- evlist->thread.done = 1;
- pthread_join(evlist->thread.th, NULL);
- evlist__delete(evlist);
-}
diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h
index f5bd5c386df1..b6f325dfb4d2 100644
--- a/tools/perf/util/evlist.h
+++ b/tools/perf/util/evlist.h
@@ -107,10 +107,11 @@ int __perf_evlist__add_default_attrs(struct evlist *evlist,
int perf_evlist__add_dummy(struct evlist *evlist);
-int perf_evlist__add_sb_event(struct evlist **evlist,
+int perf_evlist__add_sb_event(struct evlist *evlist,
struct perf_event_attr *attr,
- perf_evsel__sb_cb_t cb,
+ evsel__sb_cb_t cb,
void *data);
+void evlist__set_cb(struct evlist *evlist, evsel__sb_cb_t cb, void *data);
int perf_evlist__start_sb_thread(struct evlist *evlist,
struct target *target);
void perf_evlist__stop_sb_thread(struct evlist *evlist);
@@ -173,10 +174,6 @@ void evlist__close(struct evlist *evlist);
struct callchain_param;
void perf_evlist__set_id_pos(struct evlist *evlist);
-bool perf_can_sample_identifier(void);
-bool perf_can_record_switch_events(void);
-bool perf_can_record_cpu_wide(void);
-bool perf_can_aux_sample(void);
void perf_evlist__config(struct evlist *evlist, struct record_opts *opts,
struct callchain_param *callchain);
int record_opts__config(struct record_opts *opts);
diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index c8dc4450884c..96e5171dce41 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -56,14 +56,14 @@ struct perf_missing_features perf_missing_features;
static clockid_t clockid;
-static int perf_evsel__no_extra_init(struct evsel *evsel __maybe_unused)
+static int evsel__no_extra_init(struct evsel *evsel __maybe_unused)
{
return 0;
}
void __weak test_attr__ready(void) { }
-static void perf_evsel__no_extra_fini(struct evsel *evsel __maybe_unused)
+static void evsel__no_extra_fini(struct evsel *evsel __maybe_unused)
{
}
@@ -73,13 +73,12 @@ static struct {
void (*fini)(struct evsel *evsel);
} perf_evsel__object = {
.size = sizeof(struct evsel),
- .init = perf_evsel__no_extra_init,
- .fini = perf_evsel__no_extra_fini,
+ .init = evsel__no_extra_init,
+ .fini = evsel__no_extra_fini,
};
-int perf_evsel__object_config(size_t object_size,
- int (*init)(struct evsel *evsel),
- void (*fini)(struct evsel *evsel))
+int evsel__object_config(size_t object_size, int (*init)(struct evsel *evsel),
+ void (*fini)(struct evsel *evsel))
{
if (object_size == 0)
@@ -102,7 +101,7 @@ set_methods:
#define FD(e, x, y) (*(int *)xyarray__entry(e->core.fd, x, y))
-int __perf_evsel__sample_size(u64 sample_type)
+int __evsel__sample_size(u64 sample_type)
{
u64 mask = sample_type & PERF_SAMPLE_MASK;
int size = 0;
@@ -178,53 +177,53 @@ static int __perf_evsel__calc_is_pos(u64 sample_type)
return idx;
}
-void perf_evsel__calc_id_pos(struct evsel *evsel)
+void evsel__calc_id_pos(struct evsel *evsel)
{
evsel->id_pos = __perf_evsel__calc_id_pos(evsel->core.attr.sample_type);
evsel->is_pos = __perf_evsel__calc_is_pos(evsel->core.attr.sample_type);
}
-void __perf_evsel__set_sample_bit(struct evsel *evsel,
+void __evsel__set_sample_bit(struct evsel *evsel,
enum perf_event_sample_format bit)
{
if (!(evsel->core.attr.sample_type & bit)) {
evsel->core.attr.sample_type |= bit;
evsel->sample_size += sizeof(u64);
- perf_evsel__calc_id_pos(evsel);
+ evsel__calc_id_pos(evsel);
}
}
-void __perf_evsel__reset_sample_bit(struct evsel *evsel,
+void __evsel__reset_sample_bit(struct evsel *evsel,
enum perf_event_sample_format bit)
{
if (evsel->core.attr.sample_type & bit) {
evsel->core.attr.sample_type &= ~bit;
evsel->sample_size -= sizeof(u64);
- perf_evsel__calc_id_pos(evsel);
+ evsel__calc_id_pos(evsel);
}
}
-void perf_evsel__set_sample_id(struct evsel *evsel,
+void evsel__set_sample_id(struct evsel *evsel,
bool can_sample_identifier)
{
if (can_sample_identifier) {
- perf_evsel__reset_sample_bit(evsel, ID);
- perf_evsel__set_sample_bit(evsel, IDENTIFIER);
+ evsel__reset_sample_bit(evsel, ID);
+ evsel__set_sample_bit(evsel, IDENTIFIER);
} else {
- perf_evsel__set_sample_bit(evsel, ID);
+ evsel__set_sample_bit(evsel, ID);
}
evsel->core.attr.read_format |= PERF_FORMAT_ID;
}
/**
- * perf_evsel__is_function_event - Return whether given evsel is a function
+ * evsel__is_function_event - Return whether given evsel is a function
* trace event
*
* @evsel - evsel selector to be tested
*
* Return %true if event is function trace event
*/
-bool perf_evsel__is_function_event(struct evsel *evsel)
+bool evsel__is_function_event(struct evsel *evsel)
{
#define FUNCTION_EVENT "ftrace:function"
@@ -249,17 +248,18 @@ void evsel__init(struct evsel *evsel,
evsel->bpf_fd = -1;
INIT_LIST_HEAD(&evsel->config_terms);
perf_evsel__object.init(evsel);
- evsel->sample_size = __perf_evsel__sample_size(attr->sample_type);
- perf_evsel__calc_id_pos(evsel);
+ evsel->sample_size = __evsel__sample_size(attr->sample_type);
+ evsel__calc_id_pos(evsel);
evsel->cmdline_group_boundary = false;
evsel->metric_expr = NULL;
evsel->metric_name = NULL;
evsel->metric_events = NULL;
+ evsel->per_pkg_mask = NULL;
evsel->collect_stat = false;
evsel->pmu_name = NULL;
}
-struct evsel *perf_evsel__new_idx(struct perf_event_attr *attr, int idx)
+struct evsel *evsel__new_idx(struct perf_event_attr *attr, int idx)
{
struct evsel *evsel = zalloc(perf_evsel__object.size);
@@ -267,13 +267,13 @@ struct evsel *perf_evsel__new_idx(struct perf_event_attr *attr, int idx)
return NULL;
evsel__init(evsel, attr, idx);
- if (perf_evsel__is_bpf_output(evsel)) {
+ if (evsel__is_bpf_output(evsel)) {
evsel->core.attr.sample_type |= (PERF_SAMPLE_RAW | PERF_SAMPLE_TIME |
PERF_SAMPLE_CPU | PERF_SAMPLE_PERIOD),
evsel->core.attr.sample_period = 1;
}
- if (perf_evsel__is_clock(evsel)) {
+ if (evsel__is_clock(evsel)) {
/*
* The evsel->unit points to static alias->unit
* so it's ok to use static string in here.
@@ -292,7 +292,7 @@ static bool perf_event_can_profile_kernel(void)
return perf_event_paranoid_check(1);
}
-struct evsel *perf_evsel__new_cycles(bool precise)
+struct evsel *evsel__new_cycles(bool precise)
{
struct perf_event_attr attr = {
.type = PERF_TYPE_HARDWARE,
@@ -334,7 +334,7 @@ error_free:
/*
* Returns pointer with encoded error via <linux/err.h> interface.
*/
-struct evsel *perf_evsel__newtp_idx(const char *sys, const char *name, int idx)
+struct evsel *evsel__newtp_idx(const char *sys, const char *name, int idx)
{
struct evsel *evsel = zalloc(perf_evsel__object.size);
int err = -ENOMEM;
@@ -372,7 +372,7 @@ out_err:
return ERR_PTR(err);
}
-const char *perf_evsel__hw_names[PERF_COUNT_HW_MAX] = {
+const char *evsel__hw_names[PERF_COUNT_HW_MAX] = {
"cycles",
"instructions",
"cache-references",
@@ -385,10 +385,10 @@ const char *perf_evsel__hw_names[PERF_COUNT_HW_MAX] = {
"ref-cycles",
};
-static const char *__perf_evsel__hw_name(u64 config)
+static const char *__evsel__hw_name(u64 config)
{
- if (config < PERF_COUNT_HW_MAX && perf_evsel__hw_names[config])
- return perf_evsel__hw_names[config];
+ if (config < PERF_COUNT_HW_MAX && evsel__hw_names[config])
+ return evsel__hw_names[config];
return "unknown-hardware";
}
@@ -429,13 +429,13 @@ static int perf_evsel__add_modifiers(struct evsel *evsel, char *bf, size_t size)
return r;
}
-static int perf_evsel__hw_name(struct evsel *evsel, char *bf, size_t size)
+static int evsel__hw_name(struct evsel *evsel, char *bf, size_t size)
{
- int r = scnprintf(bf, size, "%s", __perf_evsel__hw_name(evsel->core.attr.config));
+ int r = scnprintf(bf, size, "%s", __evsel__hw_name(evsel->core.attr.config));
return r + perf_evsel__add_modifiers(evsel, bf + r, size - r);
}
-const char *perf_evsel__sw_names[PERF_COUNT_SW_MAX] = {
+const char *evsel__sw_names[PERF_COUNT_SW_MAX] = {
"cpu-clock",
"task-clock",
"page-faults",
@@ -448,20 +448,20 @@ const char *perf_evsel__sw_names[PERF_COUNT_SW_MAX] = {
"dummy",
};
-static const char *__perf_evsel__sw_name(u64 config)
+static const char *__evsel__sw_name(u64 config)
{
- if (config < PERF_COUNT_SW_MAX && perf_evsel__sw_names[config])
- return perf_evsel__sw_names[config];
+ if (config < PERF_COUNT_SW_MAX && evsel__sw_names[config])
+ return evsel__sw_names[config];
return "unknown-software";
}
-static int perf_evsel__sw_name(struct evsel *evsel, char *bf, size_t size)
+static int evsel__sw_name(struct evsel *evsel, char *bf, size_t size)
{
- int r = scnprintf(bf, size, "%s", __perf_evsel__sw_name(evsel->core.attr.config));
+ int r = scnprintf(bf, size, "%s", __evsel__sw_name(evsel->core.attr.config));
return r + perf_evsel__add_modifiers(evsel, bf + r, size - r);
}
-static int __perf_evsel__bp_name(char *bf, size_t size, u64 addr, u64 type)
+static int __evsel__bp_name(char *bf, size_t size, u64 addr, u64 type)
{
int r;
@@ -479,15 +479,14 @@ static int __perf_evsel__bp_name(char *bf, size_t size, u64 addr, u64 type)
return r;
}
-static int perf_evsel__bp_name(struct evsel *evsel, char *bf, size_t size)
+static int evsel__bp_name(struct evsel *evsel, char *bf, size_t size)
{
struct perf_event_attr *attr = &evsel->core.attr;
- int r = __perf_evsel__bp_name(bf, size, attr->bp_addr, attr->bp_type);
+ int r = __evsel__bp_name(bf, size, attr->bp_addr, attr->bp_type);
return r + perf_evsel__add_modifiers(evsel, bf + r, size - r);
}
-const char *perf_evsel__hw_cache[PERF_COUNT_HW_CACHE_MAX]
- [PERF_EVSEL__MAX_ALIASES] = {
+const char *evsel__hw_cache[PERF_COUNT_HW_CACHE_MAX][EVSEL__MAX_ALIASES] = {
{ "L1-dcache", "l1-d", "l1d", "L1-data", },
{ "L1-icache", "l1-i", "l1i", "L1-instruction", },
{ "LLC", "L2", },
@@ -497,15 +496,13 @@ const char *perf_evsel__hw_cache[PERF_COUNT_HW_CACHE_MAX]
{ "node", },
};
-const char *perf_evsel__hw_cache_op[PERF_COUNT_HW_CACHE_OP_MAX]
- [PERF_EVSEL__MAX_ALIASES] = {
+const char *evsel__hw_cache_op[PERF_COUNT_HW_CACHE_OP_MAX][EVSEL__MAX_ALIASES] = {
{ "load", "loads", "read", },
{ "store", "stores", "write", },
{ "prefetch", "prefetches", "speculative-read", "speculative-load", },
};
-const char *perf_evsel__hw_cache_result[PERF_COUNT_HW_CACHE_RESULT_MAX]
- [PERF_EVSEL__MAX_ALIASES] = {
+const char *evsel__hw_cache_result[PERF_COUNT_HW_CACHE_RESULT_MAX][EVSEL__MAX_ALIASES] = {
{ "refs", "Reference", "ops", "access", },
{ "misses", "miss", },
};
@@ -521,7 +518,7 @@ const char *perf_evsel__hw_cache_result[PERF_COUNT_HW_CACHE_RESULT_MAX]
* L1I : Read and prefetch only
* ITLB and BPU : Read-only
*/
-static unsigned long perf_evsel__hw_cache_stat[C(MAX)] = {
+static unsigned long evsel__hw_cache_stat[C(MAX)] = {
[C(L1D)] = (CACHE_READ | CACHE_WRITE | CACHE_PREFETCH),
[C(L1I)] = (CACHE_READ | CACHE_PREFETCH),
[C(LL)] = (CACHE_READ | CACHE_WRITE | CACHE_PREFETCH),
@@ -531,28 +528,27 @@ static unsigned long perf_evsel__hw_cache_stat[C(MAX)] = {
[C(NODE)] = (CACHE_READ | CACHE_WRITE | CACHE_PREFETCH),
};
-bool perf_evsel__is_cache_op_valid(u8 type, u8 op)
+bool evsel__is_cache_op_valid(u8 type, u8 op)
{
- if (perf_evsel__hw_cache_stat[type] & COP(op))
+ if (evsel__hw_cache_stat[type] & COP(op))
return true; /* valid */
else
return false; /* invalid */
}
-int __perf_evsel__hw_cache_type_op_res_name(u8 type, u8 op, u8 result,
- char *bf, size_t size)
+int __evsel__hw_cache_type_op_res_name(u8 type, u8 op, u8 result, char *bf, size_t size)
{
if (result) {
- return scnprintf(bf, size, "%s-%s-%s", perf_evsel__hw_cache[type][0],
- perf_evsel__hw_cache_op[op][0],
- perf_evsel__hw_cache_result[result][0]);
+ return scnprintf(bf, size, "%s-%s-%s", evsel__hw_cache[type][0],
+ evsel__hw_cache_op[op][0],
+ evsel__hw_cache_result[result][0]);
}
- return scnprintf(bf, size, "%s-%s", perf_evsel__hw_cache[type][0],
- perf_evsel__hw_cache_op[op][1]);
+ return scnprintf(bf, size, "%s-%s", evsel__hw_cache[type][0],
+ evsel__hw_cache_op[op][1]);
}
-static int __perf_evsel__hw_cache_name(u64 config, char *bf, size_t size)
+static int __evsel__hw_cache_name(u64 config, char *bf, size_t size)
{
u8 op, result, type = (config >> 0) & 0xff;
const char *err = "unknown-ext-hardware-cache-type";
@@ -571,33 +567,33 @@ static int __perf_evsel__hw_cache_name(u64 config, char *bf, size_t size)
goto out_err;
err = "invalid-cache";
- if (!perf_evsel__is_cache_op_valid(type, op))
+ if (!evsel__is_cache_op_valid(type, op))
goto out_err;
- return __perf_evsel__hw_cache_type_op_res_name(type, op, result, bf, size);
+ return __evsel__hw_cache_type_op_res_name(type, op, result, bf, size);
out_err:
return scnprintf(bf, size, "%s", err);
}
-static int perf_evsel__hw_cache_name(struct evsel *evsel, char *bf, size_t size)
+static int evsel__hw_cache_name(struct evsel *evsel, char *bf, size_t size)
{
- int ret = __perf_evsel__hw_cache_name(evsel->core.attr.config, bf, size);
+ int ret = __evsel__hw_cache_name(evsel->core.attr.config, bf, size);
return ret + perf_evsel__add_modifiers(evsel, bf + ret, size - ret);
}
-static int perf_evsel__raw_name(struct evsel *evsel, char *bf, size_t size)
+static int evsel__raw_name(struct evsel *evsel, char *bf, size_t size)
{
int ret = scnprintf(bf, size, "raw 0x%" PRIx64, evsel->core.attr.config);
return ret + perf_evsel__add_modifiers(evsel, bf + ret, size - ret);
}
-static int perf_evsel__tool_name(char *bf, size_t size)
+static int evsel__tool_name(char *bf, size_t size)
{
int ret = scnprintf(bf, size, "duration_time");
return ret;
}
-const char *perf_evsel__name(struct evsel *evsel)
+const char *evsel__name(struct evsel *evsel)
{
char bf[128];
@@ -609,22 +605,22 @@ const char *perf_evsel__name(struct evsel *evsel)
switch (evsel->core.attr.type) {
case PERF_TYPE_RAW:
- perf_evsel__raw_name(evsel, bf, sizeof(bf));
+ evsel__raw_name(evsel, bf, sizeof(bf));
break;
case PERF_TYPE_HARDWARE:
- perf_evsel__hw_name(evsel, bf, sizeof(bf));
+ evsel__hw_name(evsel, bf, sizeof(bf));
break;
case PERF_TYPE_HW_CACHE:
- perf_evsel__hw_cache_name(evsel, bf, sizeof(bf));
+ evsel__hw_cache_name(evsel, bf, sizeof(bf));
break;
case PERF_TYPE_SOFTWARE:
if (evsel->tool_event)
- perf_evsel__tool_name(bf, sizeof(bf));
+ evsel__tool_name(bf, sizeof(bf));
else
- perf_evsel__sw_name(evsel, bf, sizeof(bf));
+ evsel__sw_name(evsel, bf, sizeof(bf));
break;
case PERF_TYPE_TRACEPOINT:
@@ -632,7 +628,7 @@ const char *perf_evsel__name(struct evsel *evsel)
break;
case PERF_TYPE_BREAKPOINT:
- perf_evsel__bp_name(evsel, bf, sizeof(bf));
+ evsel__bp_name(evsel, bf, sizeof(bf));
break;
default:
@@ -649,7 +645,7 @@ out_unknown:
return "unknown";
}
-const char *perf_evsel__group_name(struct evsel *evsel)
+const char *evsel__group_name(struct evsel *evsel)
{
return evsel->group_name ?: "anon group";
}
@@ -664,21 +660,19 @@ const char *perf_evsel__group_name(struct evsel *evsel)
* For record -e 'cycles,instructions' and report --group
* 'cycles:u, instructions:u'
*/
-int perf_evsel__group_desc(struct evsel *evsel, char *buf, size_t size)
+int evsel__group_desc(struct evsel *evsel, char *buf, size_t size)
{
int ret = 0;
struct evsel *pos;
- const char *group_name = perf_evsel__group_name(evsel);
+ const char *group_name = evsel__group_name(evsel);
if (!evsel->forced_leader)
ret = scnprintf(buf, size, "%s { ", group_name);
- ret += scnprintf(buf + ret, size - ret, "%s",
- perf_evsel__name(evsel));
+ ret += scnprintf(buf + ret, size - ret, "%s", evsel__name(evsel));
for_each_group_member(pos, evsel)
- ret += scnprintf(buf + ret, size - ret, ", %s",
- perf_evsel__name(pos));
+ ret += scnprintf(buf + ret, size - ret, ", %s", evsel__name(pos));
if (!evsel->forced_leader)
ret += scnprintf(buf + ret, size - ret, " }");
@@ -686,14 +680,13 @@ int perf_evsel__group_desc(struct evsel *evsel, char *buf, size_t size)
return ret;
}
-static void __perf_evsel__config_callchain(struct evsel *evsel,
- struct record_opts *opts,
- struct callchain_param *param)
+static void __evsel__config_callchain(struct evsel *evsel, struct record_opts *opts,
+ struct callchain_param *param)
{
- bool function = perf_evsel__is_function_event(evsel);
+ bool function = evsel__is_function_event(evsel);
struct perf_event_attr *attr = &evsel->core.attr;
- perf_evsel__set_sample_bit(evsel, CALLCHAIN);
+ evsel__set_sample_bit(evsel, CALLCHAIN);
attr->sample_max_stack = param->max_stack;
@@ -708,11 +701,12 @@ static void __perf_evsel__config_callchain(struct evsel *evsel,
"to get user callchain information. "
"Falling back to framepointers.\n");
} else {
- perf_evsel__set_sample_bit(evsel, BRANCH_STACK);
+ evsel__set_sample_bit(evsel, BRANCH_STACK);
attr->branch_sample_type = PERF_SAMPLE_BRANCH_USER |
PERF_SAMPLE_BRANCH_CALL_STACK |
PERF_SAMPLE_BRANCH_NO_CYCLES |
- PERF_SAMPLE_BRANCH_NO_FLAGS;
+ PERF_SAMPLE_BRANCH_NO_FLAGS |
+ PERF_SAMPLE_BRANCH_HW_INDEX;
}
} else
pr_warning("Cannot use LBR callstack with branch stack. "
@@ -721,8 +715,8 @@ static void __perf_evsel__config_callchain(struct evsel *evsel,
if (param->record_mode == CALLCHAIN_DWARF) {
if (!function) {
- perf_evsel__set_sample_bit(evsel, REGS_USER);
- perf_evsel__set_sample_bit(evsel, STACK_USER);
+ evsel__set_sample_bit(evsel, REGS_USER);
+ evsel__set_sample_bit(evsel, STACK_USER);
if (opts->sample_user_regs && DWARF_MINIMAL_REGS != PERF_REGS_MASK) {
attr->sample_regs_user |= DWARF_MINIMAL_REGS;
pr_warning("WARNING: The use of --call-graph=dwarf may require all the user registers, "
@@ -745,12 +739,11 @@ static void __perf_evsel__config_callchain(struct evsel *evsel,
}
}
-void perf_evsel__config_callchain(struct evsel *evsel,
- struct record_opts *opts,
- struct callchain_param *param)
+void evsel__config_callchain(struct evsel *evsel, struct record_opts *opts,
+ struct callchain_param *param)
{
if (param->enabled)
- return __perf_evsel__config_callchain(evsel, opts, param);
+ return __evsel__config_callchain(evsel, opts, param);
}
static void
@@ -759,22 +752,23 @@ perf_evsel__reset_callgraph(struct evsel *evsel,
{
struct perf_event_attr *attr = &evsel->core.attr;
- perf_evsel__reset_sample_bit(evsel, CALLCHAIN);
+ evsel__reset_sample_bit(evsel, CALLCHAIN);
if (param->record_mode == CALLCHAIN_LBR) {
- perf_evsel__reset_sample_bit(evsel, BRANCH_STACK);
+ evsel__reset_sample_bit(evsel, BRANCH_STACK);
attr->branch_sample_type &= ~(PERF_SAMPLE_BRANCH_USER |
- PERF_SAMPLE_BRANCH_CALL_STACK);
+ PERF_SAMPLE_BRANCH_CALL_STACK |
+ PERF_SAMPLE_BRANCH_HW_INDEX);
}
if (param->record_mode == CALLCHAIN_DWARF) {
- perf_evsel__reset_sample_bit(evsel, REGS_USER);
- perf_evsel__reset_sample_bit(evsel, STACK_USER);
+ evsel__reset_sample_bit(evsel, REGS_USER);
+ evsel__reset_sample_bit(evsel, STACK_USER);
}
}
-static void apply_config_terms(struct evsel *evsel,
- struct record_opts *opts, bool track)
+static void evsel__apply_config_terms(struct evsel *evsel,
+ struct record_opts *opts, bool track)
{
- struct perf_evsel_config_term *term;
+ struct evsel_config_term *term;
struct list_head *config_terms = &evsel->config_terms;
struct perf_event_attr *attr = &evsel->core.attr;
/* callgraph default */
@@ -787,69 +781,69 @@ static void apply_config_terms(struct evsel *evsel,
list_for_each_entry(term, config_terms, list) {
switch (term->type) {
- case PERF_EVSEL__CONFIG_TERM_PERIOD:
+ case EVSEL__CONFIG_TERM_PERIOD:
if (!(term->weak && opts->user_interval != ULLONG_MAX)) {
attr->sample_period = term->val.period;
attr->freq = 0;
- perf_evsel__reset_sample_bit(evsel, PERIOD);
+ evsel__reset_sample_bit(evsel, PERIOD);
}
break;
- case PERF_EVSEL__CONFIG_TERM_FREQ:
+ case EVSEL__CONFIG_TERM_FREQ:
if (!(term->weak && opts->user_freq != UINT_MAX)) {
attr->sample_freq = term->val.freq;
attr->freq = 1;
- perf_evsel__set_sample_bit(evsel, PERIOD);
+ evsel__set_sample_bit(evsel, PERIOD);
}
break;
- case PERF_EVSEL__CONFIG_TERM_TIME:
+ case EVSEL__CONFIG_TERM_TIME:
if (term->val.time)
- perf_evsel__set_sample_bit(evsel, TIME);
+ evsel__set_sample_bit(evsel, TIME);
else
- perf_evsel__reset_sample_bit(evsel, TIME);
+ evsel__reset_sample_bit(evsel, TIME);
break;
- case PERF_EVSEL__CONFIG_TERM_CALLGRAPH:
+ case EVSEL__CONFIG_TERM_CALLGRAPH:
callgraph_buf = term->val.str;
break;
- case PERF_EVSEL__CONFIG_TERM_BRANCH:
+ case EVSEL__CONFIG_TERM_BRANCH:
if (term->val.str && strcmp(term->val.str, "no")) {
- perf_evsel__set_sample_bit(evsel, BRANCH_STACK);
+ evsel__set_sample_bit(evsel, BRANCH_STACK);
parse_branch_str(term->val.str,
&attr->branch_sample_type);
} else
- perf_evsel__reset_sample_bit(evsel, BRANCH_STACK);
+ evsel__reset_sample_bit(evsel, BRANCH_STACK);
break;
- case PERF_EVSEL__CONFIG_TERM_STACK_USER:
+ case EVSEL__CONFIG_TERM_STACK_USER:
dump_size = term->val.stack_user;
break;
- case PERF_EVSEL__CONFIG_TERM_MAX_STACK:
+ case EVSEL__CONFIG_TERM_MAX_STACK:
max_stack = term->val.max_stack;
break;
- case PERF_EVSEL__CONFIG_TERM_MAX_EVENTS:
+ case EVSEL__CONFIG_TERM_MAX_EVENTS:
evsel->max_events = term->val.max_events;
break;
- case PERF_EVSEL__CONFIG_TERM_INHERIT:
+ case EVSEL__CONFIG_TERM_INHERIT:
/*
* attr->inherit should has already been set by
- * perf_evsel__config. If user explicitly set
+ * evsel__config. If user explicitly set
* inherit using config terms, override global
* opt->no_inherit setting.
*/
attr->inherit = term->val.inherit ? 1 : 0;
break;
- case PERF_EVSEL__CONFIG_TERM_OVERWRITE:
+ case EVSEL__CONFIG_TERM_OVERWRITE:
attr->write_backward = term->val.overwrite ? 1 : 0;
break;
- case PERF_EVSEL__CONFIG_TERM_DRV_CFG:
+ case EVSEL__CONFIG_TERM_DRV_CFG:
break;
- case PERF_EVSEL__CONFIG_TERM_PERCORE:
+ case EVSEL__CONFIG_TERM_PERCORE:
break;
- case PERF_EVSEL__CONFIG_TERM_AUX_OUTPUT:
+ case EVSEL__CONFIG_TERM_AUX_OUTPUT:
attr->aux_output = term->val.aux_output ? 1 : 0;
break;
- case PERF_EVSEL__CONFIG_TERM_AUX_SAMPLE_SIZE:
+ case EVSEL__CONFIG_TERM_AUX_SAMPLE_SIZE:
/* Already applied by auxtrace */
break;
- case PERF_EVSEL__CONFIG_TERM_CFG_CHG:
+ case EVSEL__CONFIG_TERM_CFG_CHG:
break;
default:
break;
@@ -895,11 +889,11 @@ static void apply_config_terms(struct evsel *evsel,
/* set perf-event callgraph */
if (param.enabled) {
if (sample_address) {
- perf_evsel__set_sample_bit(evsel, ADDR);
- perf_evsel__set_sample_bit(evsel, DATA_SRC);
+ evsel__set_sample_bit(evsel, ADDR);
+ evsel__set_sample_bit(evsel, DATA_SRC);
evsel->core.attr.mmap_data = track;
}
- perf_evsel__config_callchain(evsel, opts, &param);
+ evsel__config_callchain(evsel, opts, &param);
}
}
}
@@ -910,10 +904,9 @@ static bool is_dummy_event(struct evsel *evsel)
(evsel->core.attr.config == PERF_COUNT_SW_DUMMY);
}
-struct perf_evsel_config_term *__perf_evsel__get_config_term(struct evsel *evsel,
- enum evsel_term_type type)
+struct evsel_config_term *__evsel__get_config_term(struct evsel *evsel, enum evsel_term_type type)
{
- struct perf_evsel_config_term *term, *found_term = NULL;
+ struct evsel_config_term *term, *found_term = NULL;
list_for_each_entry(term, &evsel->config_terms, list) {
if (term->type == type)
@@ -951,8 +944,8 @@ struct perf_evsel_config_term *__perf_evsel__get_config_term(struct evsel *evsel
* enable/disable events specifically, as there's no
* initial traced exec call.
*/
-void perf_evsel__config(struct evsel *evsel, struct record_opts *opts,
- struct callchain_param *callchain)
+void evsel__config(struct evsel *evsel, struct record_opts *opts,
+ struct callchain_param *callchain)
{
struct evsel *leader = evsel->leader;
struct perf_event_attr *attr = &evsel->core.attr;
@@ -963,17 +956,17 @@ void perf_evsel__config(struct evsel *evsel, struct record_opts *opts,
attr->inherit = !opts->no_inherit;
attr->write_backward = opts->overwrite ? 1 : 0;
- perf_evsel__set_sample_bit(evsel, IP);
- perf_evsel__set_sample_bit(evsel, TID);
+ evsel__set_sample_bit(evsel, IP);
+ evsel__set_sample_bit(evsel, TID);
if (evsel->sample_read) {
- perf_evsel__set_sample_bit(evsel, READ);
+ evsel__set_sample_bit(evsel, READ);
/*
* We need ID even in case of single event, because
* PERF_SAMPLE_READ process ID specific data.
*/
- perf_evsel__set_sample_id(evsel, false);
+ evsel__set_sample_id(evsel, false);
/*
* Apply group format only if we belong to group
@@ -992,7 +985,7 @@ void perf_evsel__config(struct evsel *evsel, struct record_opts *opts,
if (!attr->sample_period || (opts->user_freq != UINT_MAX ||
opts->user_interval != ULLONG_MAX)) {
if (opts->freq) {
- perf_evsel__set_sample_bit(evsel, PERIOD);
+ evsel__set_sample_bit(evsel, PERIOD);
attr->freq = 1;
attr->sample_freq = opts->freq;
} else {
@@ -1000,25 +993,6 @@ void perf_evsel__config(struct evsel *evsel, struct record_opts *opts,
}
}
- /*
- * Disable sampling for all group members other
- * than leader in case leader 'leads' the sampling.
- */
- if ((leader != evsel) && leader->sample_read) {
- attr->freq = 0;
- attr->sample_freq = 0;
- attr->sample_period = 0;
- attr->write_backward = 0;
-
- /*
- * We don't get sample for slave events, we make them
- * when delivering group leader sample. Set the slave
- * event to follow the master sample_type to ease up
- * report.
- */
- attr->sample_type = leader->core.attr.sample_type;
- }
-
if (opts->no_samples)
attr->sample_freq = 0;
@@ -1031,7 +1005,7 @@ void perf_evsel__config(struct evsel *evsel, struct record_opts *opts,
}
if (opts->sample_address) {
- perf_evsel__set_sample_bit(evsel, ADDR);
+ evsel__set_sample_bit(evsel, ADDR);
attr->mmap_data = track;
}
@@ -1040,24 +1014,24 @@ void perf_evsel__config(struct evsel *evsel, struct record_opts *opts,
* event, due to issues with page faults while tracing page
* fault handler and its overall trickiness nature.
*/
- if (perf_evsel__is_function_event(evsel))
+ if (evsel__is_function_event(evsel))
evsel->core.attr.exclude_callchain_user = 1;
if (callchain && callchain->enabled && !evsel->no_aux_samples)
- perf_evsel__config_callchain(evsel, opts, callchain);
+ evsel__config_callchain(evsel, opts, callchain);
if (opts->sample_intr_regs) {
attr->sample_regs_intr = opts->sample_intr_regs;
- perf_evsel__set_sample_bit(evsel, REGS_INTR);
+ evsel__set_sample_bit(evsel, REGS_INTR);
}
if (opts->sample_user_regs) {
attr->sample_regs_user |= opts->sample_user_regs;
- perf_evsel__set_sample_bit(evsel, REGS_USER);
+ evsel__set_sample_bit(evsel, REGS_USER);
}
if (target__has_cpu(&opts->target) || opts->sample_cpu)
- perf_evsel__set_sample_bit(evsel, CPU);
+ evsel__set_sample_bit(evsel, CPU);
/*
* When the user explicitly disabled time don't force it here.
@@ -1066,31 +1040,31 @@ void perf_evsel__config(struct evsel *evsel, struct record_opts *opts,
(!perf_missing_features.sample_id_all &&
(!opts->no_inherit || target__has_cpu(&opts->target) || per_cpu ||
opts->sample_time_set)))
- perf_evsel__set_sample_bit(evsel, TIME);
+ evsel__set_sample_bit(evsel, TIME);
if (opts->raw_samples && !evsel->no_aux_samples) {
- perf_evsel__set_sample_bit(evsel, TIME);
- perf_evsel__set_sample_bit(evsel, RAW);
- perf_evsel__set_sample_bit(evsel, CPU);
+ evsel__set_sample_bit(evsel, TIME);
+ evsel__set_sample_bit(evsel, RAW);
+ evsel__set_sample_bit(evsel, CPU);
}
if (opts->sample_address)
- perf_evsel__set_sample_bit(evsel, DATA_SRC);
+ evsel__set_sample_bit(evsel, DATA_SRC);
if (opts->sample_phys_addr)
- perf_evsel__set_sample_bit(evsel, PHYS_ADDR);
+ evsel__set_sample_bit(evsel, PHYS_ADDR);
if (opts->no_buffering) {
attr->watermark = 0;
attr->wakeup_events = 1;
}
if (opts->branch_stack && !evsel->no_aux_samples) {
- perf_evsel__set_sample_bit(evsel, BRANCH_STACK);
+ evsel__set_sample_bit(evsel, BRANCH_STACK);
attr->branch_sample_type = opts->branch_stack;
}
if (opts->sample_weight)
- perf_evsel__set_sample_bit(evsel, WEIGHT);
+ evsel__set_sample_bit(evsel, WEIGHT);
attr->task = track;
attr->mmap = track;
@@ -1102,11 +1076,16 @@ void perf_evsel__config(struct evsel *evsel, struct record_opts *opts,
if (opts->record_namespaces)
attr->namespaces = track;
+ if (opts->record_cgroup) {
+ attr->cgroup = track && !perf_missing_features.cgroup;
+ evsel__set_sample_bit(evsel, CGROUP);
+ }
+
if (opts->record_switch_events)
attr->context_switch = track;
if (opts->sample_transaction)
- perf_evsel__set_sample_bit(evsel, TRANSACTION);
+ evsel__set_sample_bit(evsel, TRANSACTION);
if (opts->running_time) {
evsel->core.attr.read_format |=
@@ -1120,15 +1099,15 @@ void perf_evsel__config(struct evsel *evsel, struct record_opts *opts,
* Disabling only independent events or group leaders,
* keeping group members enabled.
*/
- if (perf_evsel__is_group_leader(evsel))
+ if (evsel__is_group_leader(evsel))
attr->disabled = 1;
/*
* Setting enable_on_exec for independent events and
* group leaders for traced executed by perf.
*/
- if (target__none(&opts->target) && perf_evsel__is_group_leader(evsel) &&
- !opts->initial_delay)
+ if (target__none(&opts->target) && evsel__is_group_leader(evsel) &&
+ !opts->initial_delay)
attr->enable_on_exec = 1;
if (evsel->immediate) {
@@ -1162,28 +1141,31 @@ void perf_evsel__config(struct evsel *evsel, struct record_opts *opts,
* Apply event specific term settings,
* it overloads any global configuration.
*/
- apply_config_terms(evsel, opts, track);
+ evsel__apply_config_terms(evsel, opts, track);
evsel->ignore_missing_thread = opts->ignore_missing_thread;
/* The --period option takes the precedence. */
if (opts->period_set) {
if (opts->period)
- perf_evsel__set_sample_bit(evsel, PERIOD);
+ evsel__set_sample_bit(evsel, PERIOD);
else
- perf_evsel__reset_sample_bit(evsel, PERIOD);
+ evsel__reset_sample_bit(evsel, PERIOD);
}
/*
+ * A dummy event never triggers any actual counter and therefore
+ * cannot be used with branch_stack.
+ *
* For initial_delay, a dummy event is added implicitly.
* The software event will trigger -EOPNOTSUPP error out,
* if BRANCH_STACK bit is set.
*/
- if (opts->initial_delay && is_dummy_event(evsel))
- perf_evsel__reset_sample_bit(evsel, BRANCH_STACK);
+ if (is_dummy_event(evsel))
+ evsel__reset_sample_bit(evsel, BRANCH_STACK);
}
-int perf_evsel__set_filter(struct evsel *evsel, const char *filter)
+int evsel__set_filter(struct evsel *evsel, const char *filter)
{
char *new_filter = strdup(filter);
@@ -1196,13 +1178,12 @@ int perf_evsel__set_filter(struct evsel *evsel, const char *filter)
return -1;
}
-static int perf_evsel__append_filter(struct evsel *evsel,
- const char *fmt, const char *filter)
+static int evsel__append_filter(struct evsel *evsel, const char *fmt, const char *filter)
{
char *new_filter;
if (evsel->filter == NULL)
- return perf_evsel__set_filter(evsel, filter);
+ return evsel__set_filter(evsel, filter);
if (asprintf(&new_filter, fmt, evsel->filter, filter) > 0) {
free(evsel->filter);
@@ -1213,14 +1194,14 @@ static int perf_evsel__append_filter(struct evsel *evsel,
return -1;
}
-int perf_evsel__append_tp_filter(struct evsel *evsel, const char *filter)
+int evsel__append_tp_filter(struct evsel *evsel, const char *filter)
{
- return perf_evsel__append_filter(evsel, "(%s) && (%s)", filter);
+ return evsel__append_filter(evsel, "(%s) && (%s)", filter);
}
-int perf_evsel__append_addr_filter(struct evsel *evsel, const char *filter)
+int evsel__append_addr_filter(struct evsel *evsel, const char *filter)
{
- return perf_evsel__append_filter(evsel, "%s,%s", filter);
+ return evsel__append_filter(evsel, "%s,%s", filter);
}
/* Caller has to clear disabled after going through all CPUs. */
@@ -1259,9 +1240,9 @@ int evsel__disable(struct evsel *evsel)
return err;
}
-static void perf_evsel__free_config_terms(struct evsel *evsel)
+static void evsel__free_config_terms(struct evsel *evsel)
{
- struct perf_evsel_config_term *term, *h;
+ struct evsel_config_term *term, *h;
list_for_each_entry_safe(term, h, &evsel->config_terms, list) {
list_del_init(&term->list);
@@ -1271,31 +1252,34 @@ static void perf_evsel__free_config_terms(struct evsel *evsel)
}
}
-void perf_evsel__exit(struct evsel *evsel)
+void evsel__exit(struct evsel *evsel)
{
assert(list_empty(&evsel->core.node));
assert(evsel->evlist == NULL);
- perf_evsel__free_counts(evsel);
+ evsel__free_counts(evsel);
perf_evsel__free_fd(&evsel->core);
perf_evsel__free_id(&evsel->core);
- perf_evsel__free_config_terms(evsel);
+ evsel__free_config_terms(evsel);
cgroup__put(evsel->cgrp);
perf_cpu_map__put(evsel->core.cpus);
perf_cpu_map__put(evsel->core.own_cpus);
perf_thread_map__put(evsel->core.threads);
zfree(&evsel->group_name);
zfree(&evsel->name);
+ zfree(&evsel->pmu_name);
+ zfree(&evsel->per_pkg_mask);
+ zfree(&evsel->metric_events);
perf_evsel__object.fini(evsel);
}
void evsel__delete(struct evsel *evsel)
{
- perf_evsel__exit(evsel);
+ evsel__exit(evsel);
free(evsel);
}
-void perf_evsel__compute_deltas(struct evsel *evsel, int cpu, int thread,
- struct perf_counts_values *count)
+void evsel__compute_deltas(struct evsel *evsel, int cpu, int thread,
+ struct perf_counts_values *count)
{
struct perf_counts_values tmp;
@@ -1334,8 +1318,7 @@ void perf_counts_values__scale(struct perf_counts_values *count,
*pscaled = scaled;
}
-static int
-perf_evsel__read_one(struct evsel *evsel, int cpu, int thread)
+static int evsel__read_one(struct evsel *evsel, int cpu, int thread)
{
struct perf_counts_values *count = perf_counts(evsel->counts, cpu, thread);
@@ -1395,8 +1378,7 @@ perf_evsel__process_group_data(struct evsel *leader,
return 0;
}
-static int
-perf_evsel__read_group(struct evsel *leader, int cpu, int thread)
+static int evsel__read_group(struct evsel *leader, int cpu, int thread)
{
struct perf_stat_evsel *ps = leader->stats;
u64 read_format = leader->core.attr.read_format;
@@ -1406,7 +1388,7 @@ perf_evsel__read_group(struct evsel *leader, int cpu, int thread)
if (!(read_format & PERF_FORMAT_ID))
return -EINVAL;
- if (!perf_evsel__is_group_leader(leader))
+ if (!evsel__is_group_leader(leader))
return -EINVAL;
if (!data) {
@@ -1426,18 +1408,17 @@ perf_evsel__read_group(struct evsel *leader, int cpu, int thread)
return perf_evsel__process_group_data(leader, cpu, thread, data);
}
-int perf_evsel__read_counter(struct evsel *evsel, int cpu, int thread)
+int evsel__read_counter(struct evsel *evsel, int cpu, int thread)
{
u64 read_format = evsel->core.attr.read_format;
if (read_format & PERF_FORMAT_GROUP)
- return perf_evsel__read_group(evsel, cpu, thread);
- else
- return perf_evsel__read_one(evsel, cpu, thread);
+ return evsel__read_group(evsel, cpu, thread);
+
+ return evsel__read_one(evsel, cpu, thread);
}
-int __perf_evsel__read_on_cpu(struct evsel *evsel,
- int cpu, int thread, bool scale)
+int __evsel__read_on_cpu(struct evsel *evsel, int cpu, int thread, bool scale)
{
struct perf_counts_values count;
size_t nv = scale ? 3 : 1;
@@ -1445,13 +1426,13 @@ int __perf_evsel__read_on_cpu(struct evsel *evsel,
if (FD(evsel, cpu, thread) < 0)
return -EINVAL;
- if (evsel->counts == NULL && perf_evsel__alloc_counts(evsel, cpu + 1, thread + 1) < 0)
+ if (evsel->counts == NULL && evsel__alloc_counts(evsel, cpu + 1, thread + 1) < 0)
return -ENOMEM;
if (readn(FD(evsel, cpu, thread), &count, nv * sizeof(u64)) <= 0)
return -errno;
- perf_evsel__compute_deltas(evsel, cpu, thread, &count);
+ evsel__compute_deltas(evsel, cpu, thread, &count);
perf_counts_values__scale(&count, scale, NULL);
*perf_counts(evsel->counts, cpu, thread) = count;
return 0;
@@ -1462,7 +1443,7 @@ static int get_group_fd(struct evsel *evsel, int cpu, int thread)
struct evsel *leader = evsel->leader;
int fd;
- if (perf_evsel__is_group_leader(evsel))
+ if (evsel__is_group_leader(evsel))
return -1;
/*
@@ -1673,6 +1654,8 @@ fallback_missing_features:
evsel->core.attr.ksymbol = 0;
if (perf_missing_features.bpf)
evsel->core.attr.bpf_event = 0;
+ if (perf_missing_features.branch_hw_idx)
+ evsel->core.attr.branch_sample_type &= ~PERF_SAMPLE_BRANCH_HW_INDEX;
retry_sample_id:
if (perf_missing_features.sample_id_all)
evsel->core.attr.sample_id_all = 0;
@@ -1739,8 +1722,7 @@ retry_open:
/*
* If we succeeded but had to kill clockid, fail and
- * have perf_evsel__open_strerror() print us a nice
- * error.
+ * have evsel__open_strerror() print us a nice error.
*/
if (perf_missing_features.clockid ||
perf_missing_features.clockid_wrong) {
@@ -1784,7 +1766,16 @@ try_fallback:
* Must probe features in the order they were added to the
* perf_event_attr interface.
*/
- if (!perf_missing_features.aux_output && evsel->core.attr.aux_output) {
+ if (!perf_missing_features.cgroup && evsel->core.attr.cgroup) {
+ perf_missing_features.cgroup = true;
+ pr_debug2_peo("Kernel has no cgroup sampling support, bailing out\n");
+ goto out_close;
+ } else if (!perf_missing_features.branch_hw_idx &&
+ (evsel->core.attr.branch_sample_type & PERF_SAMPLE_BRANCH_HW_INDEX)) {
+ perf_missing_features.branch_hw_idx = true;
+ pr_debug2("switching off branch HW index support\n");
+ goto fallback_missing_features;
+ } else if (!perf_missing_features.aux_output && evsel->core.attr.aux_output) {
perf_missing_features.aux_output = true;
pr_debug2_peo("Kernel has no attr.aux_output support, bailing out\n");
goto out_close;
@@ -1835,7 +1826,7 @@ try_fallback:
} else if (!perf_missing_features.group_read &&
evsel->core.attr.inherit &&
(evsel->core.attr.read_format & PERF_FORMAT_GROUP) &&
- perf_evsel__is_group_leader(evsel)) {
+ evsel__is_group_leader(evsel)) {
perf_missing_features.group_read = true;
pr_debug2_peo("switching off group read\n");
goto fallback_missing_features;
@@ -1869,9 +1860,7 @@ void evsel__close(struct evsel *evsel)
perf_evsel__free_id(&evsel->core);
}
-int perf_evsel__open_per_cpu(struct evsel *evsel,
- struct perf_cpu_map *cpus,
- int cpu)
+int evsel__open_per_cpu(struct evsel *evsel, struct perf_cpu_map *cpus, int cpu)
{
if (cpu == -1)
return evsel__open_cpu(evsel, cpus, NULL, 0,
@@ -1880,8 +1869,7 @@ int perf_evsel__open_per_cpu(struct evsel *evsel,
return evsel__open_cpu(evsel, cpus, NULL, cpu, cpu + 1);
}
-int perf_evsel__open_per_thread(struct evsel *evsel,
- struct perf_thread_map *threads)
+int evsel__open_per_thread(struct evsel *evsel, struct perf_thread_map *threads)
{
return evsel__open(evsel, NULL, threads);
}
@@ -1976,8 +1964,8 @@ perf_event__check_size(union perf_event *event, unsigned int sample_size)
return 0;
}
-int perf_evsel__parse_sample(struct evsel *evsel, union perf_event *event,
- struct perf_sample *data)
+int evsel__parse_sample(struct evsel *evsel, union perf_event *event,
+ struct perf_sample *data)
{
u64 type = evsel->core.attr.sample_type;
bool swapped = evsel->needs_swap;
@@ -2117,7 +2105,7 @@ int perf_evsel__parse_sample(struct evsel *evsel, union perf_event *event,
}
}
- if (evsel__has_callchain(evsel)) {
+ if (type & PERF_SAMPLE_CALLCHAIN) {
const u64 max_callchain_nr = UINT64_MAX / sizeof(u64);
OVERFLOW_CHECK_u64(array);
@@ -2169,7 +2157,12 @@ int perf_evsel__parse_sample(struct evsel *evsel, union perf_event *event,
if (data->branch_stack->nr > max_branch_nr)
return -EFAULT;
+
sz = data->branch_stack->nr * sizeof(struct branch_entry);
+ if (evsel__has_branch_hw_idx(evsel))
+ sz += sizeof(u64);
+ else
+ data->no_hw_idx = true;
OVERFLOW_CHECK(array, sz, max_size);
array = (void *)array + sz;
}
@@ -2252,6 +2245,12 @@ int perf_evsel__parse_sample(struct evsel *evsel, union perf_event *event,
array++;
}
+ data->cgroup = 0;
+ if (type & PERF_SAMPLE_CGROUP) {
+ data->cgroup = *array;
+ array++;
+ }
+
if (type & PERF_SAMPLE_AUX) {
OVERFLOW_CHECK_u64(array);
sz = *array++;
@@ -2268,9 +2267,8 @@ int perf_evsel__parse_sample(struct evsel *evsel, union perf_event *event,
return 0;
}
-int perf_evsel__parse_sample_timestamp(struct evsel *evsel,
- union perf_event *event,
- u64 *timestamp)
+int evsel__parse_sample_timestamp(struct evsel *evsel, union perf_event *event,
+ u64 *timestamp)
{
u64 type = evsel->core.attr.sample_type;
const __u64 *array;
@@ -2312,15 +2310,14 @@ int perf_evsel__parse_sample_timestamp(struct evsel *evsel,
return 0;
}
-struct tep_format_field *perf_evsel__field(struct evsel *evsel, const char *name)
+struct tep_format_field *evsel__field(struct evsel *evsel, const char *name)
{
return tep_find_field(evsel->tp_format, name);
}
-void *perf_evsel__rawptr(struct evsel *evsel, struct perf_sample *sample,
- const char *name)
+void *evsel__rawptr(struct evsel *evsel, struct perf_sample *sample, const char *name)
{
- struct tep_format_field *field = perf_evsel__field(evsel, name);
+ struct tep_format_field *field = evsel__field(evsel, name);
int offset;
if (!field)
@@ -2375,10 +2372,9 @@ u64 format_field__intval(struct tep_format_field *field, struct perf_sample *sam
return 0;
}
-u64 perf_evsel__intval(struct evsel *evsel, struct perf_sample *sample,
- const char *name)
+u64 evsel__intval(struct evsel *evsel, struct perf_sample *sample, const char *name)
{
- struct tep_format_field *field = perf_evsel__field(evsel, name);
+ struct tep_format_field *field = evsel__field(evsel, name);
if (!field)
return 0;
@@ -2386,8 +2382,7 @@ u64 perf_evsel__intval(struct evsel *evsel, struct perf_sample *sample,
return field ? format_field__intval(field, sample, evsel->needs_swap) : 0;
}
-bool perf_evsel__fallback(struct evsel *evsel, int err,
- char *msg, size_t msgsize)
+bool evsel__fallback(struct evsel *evsel, int err, char *msg, size_t msgsize)
{
int paranoid;
@@ -2412,13 +2407,17 @@ bool perf_evsel__fallback(struct evsel *evsel, int err,
return true;
} else if (err == EACCES && !evsel->core.attr.exclude_kernel &&
(paranoid = perf_event_paranoid()) > 1) {
- const char *name = perf_evsel__name(evsel);
+ const char *name = evsel__name(evsel);
char *new_name;
const char *sep = ":";
+ /* If event has exclude user then don't exclude kernel. */
+ if (evsel->core.attr.exclude_user)
+ return false;
+
/* Is there already the separator in the name. */
if (strchr(name, '/') ||
- strchr(name, ':'))
+ (strchr(name, ':') && !evsel->is_libpfm_event))
sep = "";
if (asprintf(&new_name, "%s%su", name, sep) < 0)
@@ -2475,39 +2474,46 @@ static bool find_process(const char *name)
return ret ? false : true;
}
-int perf_evsel__open_strerror(struct evsel *evsel, struct target *target,
- int err, char *msg, size_t size)
+int evsel__open_strerror(struct evsel *evsel, struct target *target,
+ int err, char *msg, size_t size)
{
char sbuf[STRERR_BUFSIZE];
- int printed = 0;
+ int printed = 0, enforced = 0;
switch (err) {
case EPERM:
case EACCES:
+ printed += scnprintf(msg + printed, size - printed,
+ "Access to performance monitoring and observability operations is limited.\n");
+
+ if (!sysfs__read_int("fs/selinux/enforce", &enforced)) {
+ if (enforced) {
+ printed += scnprintf(msg + printed, size - printed,
+ "Enforced MAC policy settings (SELinux) can limit access to performance\n"
+ "monitoring and observability operations. Inspect system audit records for\n"
+ "more perf_event access control information and adjusting the policy.\n");
+ }
+ }
+
if (err == EPERM)
- printed = scnprintf(msg, size,
- "No permission to enable %s event.\n\n",
- perf_evsel__name(evsel));
+ printed += scnprintf(msg, size,
+ "No permission to enable %s event.\n\n", evsel__name(evsel));
return scnprintf(msg + printed, size - printed,
- "You may not have permission to collect %sstats.\n\n"
- "Consider tweaking /proc/sys/kernel/perf_event_paranoid,\n"
- "which controls use of the performance events system by\n"
- "unprivileged users (without CAP_SYS_ADMIN).\n\n"
- "The current value is %d:\n\n"
+ "Consider adjusting /proc/sys/kernel/perf_event_paranoid setting to open\n"
+ "access to performance monitoring and observability operations for users\n"
+ "without CAP_PERFMON or CAP_SYS_ADMIN Linux capability.\n"
+ "perf_event_paranoid setting is %d:\n"
" -1: Allow use of (almost) all events by all users\n"
" Ignore mlock limit after perf_event_mlock_kb without CAP_IPC_LOCK\n"
- ">= 0: Disallow ftrace function tracepoint by users without CAP_SYS_ADMIN\n"
- " Disallow raw tracepoint access by users without CAP_SYS_ADMIN\n"
- ">= 1: Disallow CPU event access by users without CAP_SYS_ADMIN\n"
- ">= 2: Disallow kernel profiling by users without CAP_SYS_ADMIN\n\n"
- "To make this setting permanent, edit /etc/sysctl.conf too, e.g.:\n\n"
- " kernel.perf_event_paranoid = -1\n" ,
- target->system_wide ? "system-wide " : "",
- perf_event_paranoid());
+ ">= 0: Disallow raw and ftrace function tracepoint access\n"
+ ">= 1: Disallow CPU event access\n"
+ ">= 2: Disallow kernel profiling\n"
+ "To make the adjusted perf_event_paranoid setting permanent preserve it\n"
+ "in /etc/sysctl.conf (e.g. kernel.perf_event_paranoid = <setting>)",
+ perf_event_paranoid());
case ENOENT:
- return scnprintf(msg, size, "The %s event is not supported.",
- perf_evsel__name(evsel));
+ return scnprintf(msg, size, "The %s event is not supported.", evsel__name(evsel));
case EMFILE:
return scnprintf(msg, size, "%s",
"Too many events are opened.\n"
@@ -2531,7 +2537,7 @@ int perf_evsel__open_strerror(struct evsel *evsel, struct target *target,
if (evsel->core.attr.sample_period != 0)
return scnprintf(msg, size,
"%s: PMU Hardware doesn't support sampling/overflow-interrupts. Try 'perf stat'",
- perf_evsel__name(evsel));
+ evsel__name(evsel));
if (evsel->core.attr.precise_ip)
return scnprintf(msg, size, "%s",
"\'precise\' request may not be supported. Try removing 'p' modifier.");
@@ -2564,11 +2570,10 @@ int perf_evsel__open_strerror(struct evsel *evsel, struct target *target,
return scnprintf(msg, size,
"The sys_perf_event_open() syscall returned with %d (%s) for event (%s).\n"
"/bin/dmesg | grep -i perf may provide additional information.\n",
- err, str_error_r(err, sbuf, sizeof(sbuf)),
- perf_evsel__name(evsel));
+ err, str_error_r(err, sbuf, sizeof(sbuf)), evsel__name(evsel));
}
-struct perf_env *perf_evsel__env(struct evsel *evsel)
+struct perf_env *evsel__env(struct evsel *evsel)
{
if (evsel && evsel->evlist)
return evsel->evlist->env;
@@ -2593,7 +2598,7 @@ static int store_evsel_ids(struct evsel *evsel, struct evlist *evlist)
return 0;
}
-int perf_evsel__store_ids(struct evsel *evsel, struct evlist *evlist)
+int evsel__store_ids(struct evsel *evsel, struct evlist *evlist)
{
struct perf_cpu_map *cpus = evsel->core.cpus;
struct perf_thread_map *threads = evsel->core.threads;
diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h
index dc14f4a823cd..0f963c2a88a5 100644
--- a/tools/perf/util/evsel.h
+++ b/tools/perf/util/evsel.h
@@ -18,7 +18,7 @@ struct perf_counts;
struct perf_stat_evsel;
union perf_event;
-typedef int (perf_evsel__sb_cb_t)(union perf_event *event, void *data);
+typedef int (evsel__sb_cb_t)(union perf_event *event, void *data);
enum perf_tool_event {
PERF_TOOL_NONE = 0,
@@ -76,6 +76,7 @@ struct evsel {
bool ignore_missing_thread;
bool forced_leader;
bool use_uncore_alias;
+ bool is_libpfm_event;
/* parse modifier helper */
int exclude_GH;
int sample_read;
@@ -101,9 +102,17 @@ struct evsel {
int cpu_iter;
const char *pmu_name;
struct {
- perf_evsel__sb_cb_t *cb;
- void *data;
+ evsel__sb_cb_t *cb;
+ void *data;
} side_band;
+ /*
+ * For reporting purposes, an evsel sample can have a callchain
+ * synthesized from AUX area data. Keep track of synthesized sample
+ * types here. Note, the recorded sample_type cannot be changed because
+ * it is needed to continue to parse events.
+ * See also evsel__has_callchain().
+ */
+ __u64 synth_sample_type;
};
struct perf_missing_features {
@@ -119,6 +128,8 @@ struct perf_missing_features {
bool ksymbol;
bool bpf;
bool aux_output;
+ bool branch_hw_idx;
+ bool cgroup;
};
extern struct perf_missing_features perf_missing_features;
@@ -133,7 +144,7 @@ static inline struct perf_cpu_map *evsel__cpus(struct evsel *evsel)
return perf_evsel__cpus(&evsel->core);
}
-static inline int perf_evsel__nr_cpus(struct evsel *evsel)
+static inline int evsel__nr_cpus(struct evsel *evsel)
{
return evsel__cpus(evsel)->nr;
}
@@ -141,233 +152,207 @@ static inline int perf_evsel__nr_cpus(struct evsel *evsel)
void perf_counts_values__scale(struct perf_counts_values *count,
bool scale, s8 *pscaled);
-void perf_evsel__compute_deltas(struct evsel *evsel, int cpu, int thread,
- struct perf_counts_values *count);
+void evsel__compute_deltas(struct evsel *evsel, int cpu, int thread,
+ struct perf_counts_values *count);
-int perf_evsel__object_config(size_t object_size,
- int (*init)(struct evsel *evsel),
- void (*fini)(struct evsel *evsel));
+int evsel__object_config(size_t object_size,
+ int (*init)(struct evsel *evsel),
+ void (*fini)(struct evsel *evsel));
-struct evsel *perf_evsel__new_idx(struct perf_event_attr *attr, int idx);
+struct perf_pmu *evsel__find_pmu(struct evsel *evsel);
+bool evsel__is_aux_event(struct evsel *evsel);
+
+struct evsel *evsel__new_idx(struct perf_event_attr *attr, int idx);
static inline struct evsel *evsel__new(struct perf_event_attr *attr)
{
- return perf_evsel__new_idx(attr, 0);
+ return evsel__new_idx(attr, 0);
}
-struct evsel *perf_evsel__newtp_idx(const char *sys, const char *name, int idx);
+struct evsel *evsel__newtp_idx(const char *sys, const char *name, int idx);
/*
* Returns pointer with encoded error via <linux/err.h> interface.
*/
-static inline struct evsel *perf_evsel__newtp(const char *sys, const char *name)
+static inline struct evsel *evsel__newtp(const char *sys, const char *name)
{
- return perf_evsel__newtp_idx(sys, name, 0);
+ return evsel__newtp_idx(sys, name, 0);
}
-struct evsel *perf_evsel__new_cycles(bool precise);
+struct evsel *evsel__new_cycles(bool precise);
struct tep_event *event_format__new(const char *sys, const char *name);
void evsel__init(struct evsel *evsel, struct perf_event_attr *attr, int idx);
-void perf_evsel__exit(struct evsel *evsel);
+void evsel__exit(struct evsel *evsel);
void evsel__delete(struct evsel *evsel);
struct callchain_param;
-void perf_evsel__config(struct evsel *evsel,
- struct record_opts *opts,
- struct callchain_param *callchain);
-void perf_evsel__config_callchain(struct evsel *evsel,
- struct record_opts *opts,
- struct callchain_param *callchain);
-
-int __perf_evsel__sample_size(u64 sample_type);
-void perf_evsel__calc_id_pos(struct evsel *evsel);
-
-bool perf_evsel__is_cache_op_valid(u8 type, u8 op);
-
-#define PERF_EVSEL__MAX_ALIASES 8
-
-extern const char *perf_evsel__hw_cache[PERF_COUNT_HW_CACHE_MAX]
- [PERF_EVSEL__MAX_ALIASES];
-extern const char *perf_evsel__hw_cache_op[PERF_COUNT_HW_CACHE_OP_MAX]
- [PERF_EVSEL__MAX_ALIASES];
-extern const char *perf_evsel__hw_cache_result[PERF_COUNT_HW_CACHE_RESULT_MAX]
- [PERF_EVSEL__MAX_ALIASES];
-extern const char *perf_evsel__hw_names[PERF_COUNT_HW_MAX];
-extern const char *perf_evsel__sw_names[PERF_COUNT_SW_MAX];
-int __perf_evsel__hw_cache_type_op_res_name(u8 type, u8 op, u8 result,
- char *bf, size_t size);
-const char *perf_evsel__name(struct evsel *evsel);
-
-const char *perf_evsel__group_name(struct evsel *evsel);
-int perf_evsel__group_desc(struct evsel *evsel, char *buf, size_t size);
-
-void __perf_evsel__set_sample_bit(struct evsel *evsel,
- enum perf_event_sample_format bit);
-void __perf_evsel__reset_sample_bit(struct evsel *evsel,
- enum perf_event_sample_format bit);
-
-#define perf_evsel__set_sample_bit(evsel, bit) \
- __perf_evsel__set_sample_bit(evsel, PERF_SAMPLE_##bit)
-
-#define perf_evsel__reset_sample_bit(evsel, bit) \
- __perf_evsel__reset_sample_bit(evsel, PERF_SAMPLE_##bit)
-
-void perf_evsel__set_sample_id(struct evsel *evsel,
- bool use_sample_identifier);
-
-int perf_evsel__set_filter(struct evsel *evsel, const char *filter);
-int perf_evsel__append_tp_filter(struct evsel *evsel, const char *filter);
-int perf_evsel__append_addr_filter(struct evsel *evsel,
- const char *filter);
+void evsel__config(struct evsel *evsel, struct record_opts *opts,
+ struct callchain_param *callchain);
+void evsel__config_callchain(struct evsel *evsel, struct record_opts *opts,
+ struct callchain_param *callchain);
+
+int __evsel__sample_size(u64 sample_type);
+void evsel__calc_id_pos(struct evsel *evsel);
+
+bool evsel__is_cache_op_valid(u8 type, u8 op);
+
+#define EVSEL__MAX_ALIASES 8
+
+extern const char *evsel__hw_cache[PERF_COUNT_HW_CACHE_MAX][EVSEL__MAX_ALIASES];
+extern const char *evsel__hw_cache_op[PERF_COUNT_HW_CACHE_OP_MAX][EVSEL__MAX_ALIASES];
+extern const char *evsel__hw_cache_result[PERF_COUNT_HW_CACHE_RESULT_MAX][EVSEL__MAX_ALIASES];
+extern const char *evsel__hw_names[PERF_COUNT_HW_MAX];
+extern const char *evsel__sw_names[PERF_COUNT_SW_MAX];
+int __evsel__hw_cache_type_op_res_name(u8 type, u8 op, u8 result, char *bf, size_t size);
+const char *evsel__name(struct evsel *evsel);
+
+const char *evsel__group_name(struct evsel *evsel);
+int evsel__group_desc(struct evsel *evsel, char *buf, size_t size);
+
+void __evsel__set_sample_bit(struct evsel *evsel, enum perf_event_sample_format bit);
+void __evsel__reset_sample_bit(struct evsel *evsel, enum perf_event_sample_format bit);
+
+#define evsel__set_sample_bit(evsel, bit) \
+ __evsel__set_sample_bit(evsel, PERF_SAMPLE_##bit)
+
+#define evsel__reset_sample_bit(evsel, bit) \
+ __evsel__reset_sample_bit(evsel, PERF_SAMPLE_##bit)
+
+void evsel__set_sample_id(struct evsel *evsel, bool use_sample_identifier);
+
+int evsel__set_filter(struct evsel *evsel, const char *filter);
+int evsel__append_tp_filter(struct evsel *evsel, const char *filter);
+int evsel__append_addr_filter(struct evsel *evsel, const char *filter);
int evsel__enable_cpu(struct evsel *evsel, int cpu);
int evsel__enable(struct evsel *evsel);
int evsel__disable(struct evsel *evsel);
int evsel__disable_cpu(struct evsel *evsel, int cpu);
-int perf_evsel__open_per_cpu(struct evsel *evsel,
- struct perf_cpu_map *cpus,
- int cpu);
-int perf_evsel__open_per_thread(struct evsel *evsel,
- struct perf_thread_map *threads);
+int evsel__open_per_cpu(struct evsel *evsel, struct perf_cpu_map *cpus, int cpu);
+int evsel__open_per_thread(struct evsel *evsel, struct perf_thread_map *threads);
int evsel__open(struct evsel *evsel, struct perf_cpu_map *cpus,
struct perf_thread_map *threads);
void evsel__close(struct evsel *evsel);
struct perf_sample;
-void *perf_evsel__rawptr(struct evsel *evsel, struct perf_sample *sample,
- const char *name);
-u64 perf_evsel__intval(struct evsel *evsel, struct perf_sample *sample,
- const char *name);
+void *evsel__rawptr(struct evsel *evsel, struct perf_sample *sample, const char *name);
+u64 evsel__intval(struct evsel *evsel, struct perf_sample *sample, const char *name);
-static inline char *perf_evsel__strval(struct evsel *evsel,
- struct perf_sample *sample,
- const char *name)
+static inline char *evsel__strval(struct evsel *evsel, struct perf_sample *sample, const char *name)
{
- return perf_evsel__rawptr(evsel, sample, name);
+ return evsel__rawptr(evsel, sample, name);
}
struct tep_format_field;
u64 format_field__intval(struct tep_format_field *field, struct perf_sample *sample, bool needs_swap);
-struct tep_format_field *perf_evsel__field(struct evsel *evsel, const char *name);
+struct tep_format_field *evsel__field(struct evsel *evsel, const char *name);
-#define perf_evsel__match(evsel, t, c) \
+#define evsel__match(evsel, t, c) \
(evsel->core.attr.type == PERF_TYPE_##t && \
evsel->core.attr.config == PERF_COUNT_##c)
-static inline bool perf_evsel__match2(struct evsel *e1,
- struct evsel *e2)
+static inline bool evsel__match2(struct evsel *e1, struct evsel *e2)
{
return (e1->core.attr.type == e2->core.attr.type) &&
(e1->core.attr.config == e2->core.attr.config);
}
-#define perf_evsel__cmp(a, b) \
- ((a) && \
- (b) && \
- (a)->core.attr.type == (b)->core.attr.type && \
- (a)->core.attr.config == (b)->core.attr.config)
-
-int perf_evsel__read_counter(struct evsel *evsel, int cpu, int thread);
+int evsel__read_counter(struct evsel *evsel, int cpu, int thread);
-int __perf_evsel__read_on_cpu(struct evsel *evsel,
- int cpu, int thread, bool scale);
+int __evsel__read_on_cpu(struct evsel *evsel, int cpu, int thread, bool scale);
/**
- * perf_evsel__read_on_cpu - Read out the results on a CPU and thread
+ * evsel__read_on_cpu - Read out the results on a CPU and thread
*
* @evsel - event selector to read value
* @cpu - CPU of interest
* @thread - thread of interest
*/
-static inline int perf_evsel__read_on_cpu(struct evsel *evsel,
- int cpu, int thread)
+static inline int evsel__read_on_cpu(struct evsel *evsel, int cpu, int thread)
{
- return __perf_evsel__read_on_cpu(evsel, cpu, thread, false);
+ return __evsel__read_on_cpu(evsel, cpu, thread, false);
}
/**
- * perf_evsel__read_on_cpu_scaled - Read out the results on a CPU and thread, scaled
+ * evsel__read_on_cpu_scaled - Read out the results on a CPU and thread, scaled
*
* @evsel - event selector to read value
* @cpu - CPU of interest
* @thread - thread of interest
*/
-static inline int perf_evsel__read_on_cpu_scaled(struct evsel *evsel,
- int cpu, int thread)
+static inline int evsel__read_on_cpu_scaled(struct evsel *evsel, int cpu, int thread)
{
- return __perf_evsel__read_on_cpu(evsel, cpu, thread, true);
+ return __evsel__read_on_cpu(evsel, cpu, thread, true);
}
-int perf_evsel__parse_sample(struct evsel *evsel, union perf_event *event,
- struct perf_sample *sample);
+int evsel__parse_sample(struct evsel *evsel, union perf_event *event,
+ struct perf_sample *sample);
-int perf_evsel__parse_sample_timestamp(struct evsel *evsel,
- union perf_event *event,
- u64 *timestamp);
+int evsel__parse_sample_timestamp(struct evsel *evsel, union perf_event *event,
+ u64 *timestamp);
-static inline struct evsel *perf_evsel__next(struct evsel *evsel)
+static inline struct evsel *evsel__next(struct evsel *evsel)
{
return list_entry(evsel->core.node.next, struct evsel, core.node);
}
-static inline struct evsel *perf_evsel__prev(struct evsel *evsel)
+static inline struct evsel *evsel__prev(struct evsel *evsel)
{
return list_entry(evsel->core.node.prev, struct evsel, core.node);
}
/**
- * perf_evsel__is_group_leader - Return whether given evsel is a leader event
+ * evsel__is_group_leader - Return whether given evsel is a leader event
*
* @evsel - evsel selector to be tested
*
* Return %true if @evsel is a group leader or a stand-alone event
*/
-static inline bool perf_evsel__is_group_leader(const struct evsel *evsel)
+static inline bool evsel__is_group_leader(const struct evsel *evsel)
{
return evsel->leader == evsel;
}
/**
- * perf_evsel__is_group_event - Return whether given evsel is a group event
+ * evsel__is_group_event - Return whether given evsel is a group event
*
* @evsel - evsel selector to be tested
*
* Return %true iff event group view is enabled and @evsel is a actual group
* leader which has other members in the group
*/
-static inline bool perf_evsel__is_group_event(struct evsel *evsel)
+static inline bool evsel__is_group_event(struct evsel *evsel)
{
if (!symbol_conf.event_group)
return false;
- return perf_evsel__is_group_leader(evsel) && evsel->core.nr_members > 1;
+ return evsel__is_group_leader(evsel) && evsel->core.nr_members > 1;
}
-bool perf_evsel__is_function_event(struct evsel *evsel);
+bool evsel__is_function_event(struct evsel *evsel);
-static inline bool perf_evsel__is_bpf_output(struct evsel *evsel)
+static inline bool evsel__is_bpf_output(struct evsel *evsel)
{
- return perf_evsel__match(evsel, SOFTWARE, SW_BPF_OUTPUT);
+ return evsel__match(evsel, SOFTWARE, SW_BPF_OUTPUT);
}
-static inline bool perf_evsel__is_clock(struct evsel *evsel)
+static inline bool evsel__is_clock(struct evsel *evsel)
{
- return perf_evsel__match(evsel, SOFTWARE, SW_CPU_CLOCK) ||
- perf_evsel__match(evsel, SOFTWARE, SW_TASK_CLOCK);
+ return evsel__match(evsel, SOFTWARE, SW_CPU_CLOCK) ||
+ evsel__match(evsel, SOFTWARE, SW_TASK_CLOCK);
}
-bool perf_evsel__fallback(struct evsel *evsel, int err,
- char *msg, size_t msgsize);
-int perf_evsel__open_strerror(struct evsel *evsel, struct target *target,
- int err, char *msg, size_t size);
+bool evsel__fallback(struct evsel *evsel, int err, char *msg, size_t msgsize);
+int evsel__open_strerror(struct evsel *evsel, struct target *target,
+ int err, char *msg, size_t size);
-static inline int perf_evsel__group_idx(struct evsel *evsel)
+static inline int evsel__group_idx(struct evsel *evsel)
{
return evsel->idx - evsel->leader->idx;
}
@@ -384,17 +369,37 @@ for ((_evsel) = _leader; \
(_evsel) && (_evsel)->leader == (_leader); \
(_evsel) = list_entry((_evsel)->core.node.next, struct evsel, core.node))
-static inline bool perf_evsel__has_branch_callstack(const struct evsel *evsel)
+static inline bool evsel__has_branch_callstack(const struct evsel *evsel)
{
return evsel->core.attr.branch_sample_type & PERF_SAMPLE_BRANCH_CALL_STACK;
}
+static inline bool evsel__has_branch_hw_idx(const struct evsel *evsel)
+{
+ return evsel->core.attr.branch_sample_type & PERF_SAMPLE_BRANCH_HW_INDEX;
+}
+
static inline bool evsel__has_callchain(const struct evsel *evsel)
{
- return (evsel->core.attr.sample_type & PERF_SAMPLE_CALLCHAIN) != 0;
+ /*
+ * For reporting purposes, an evsel sample can have a recorded callchain
+ * or a callchain synthesized from AUX area data.
+ */
+ return evsel->core.attr.sample_type & PERF_SAMPLE_CALLCHAIN ||
+ evsel->synth_sample_type & PERF_SAMPLE_CALLCHAIN;
+}
+
+static inline bool evsel__has_br_stack(const struct evsel *evsel)
+{
+ /*
+ * For reporting purposes, an evsel sample can have a recorded branch
+ * stack or a branch stack synthesized from AUX area data.
+ */
+ return evsel->core.attr.sample_type & PERF_SAMPLE_BRANCH_STACK ||
+ evsel->synth_sample_type & PERF_SAMPLE_BRANCH_STACK;
}
-struct perf_env *perf_evsel__env(struct evsel *evsel);
+struct perf_env *evsel__env(struct evsel *evsel);
-int perf_evsel__store_ids(struct evsel *evsel, struct evlist *evlist);
+int evsel__store_ids(struct evsel *evsel, struct evlist *evlist);
#endif /* __PERF_EVSEL_H */
diff --git a/tools/perf/util/evsel_config.h b/tools/perf/util/evsel_config.h
index e026ab67b008..aee6f808b512 100644
--- a/tools/perf/util/evsel_config.h
+++ b/tools/perf/util/evsel_config.h
@@ -6,30 +6,30 @@
#include <stdbool.h>
/*
- * The 'struct perf_evsel_config_term' is used to pass event
- * specific configuration data to perf_evsel__config routine.
+ * The 'struct evsel_config_term' is used to pass event
+ * specific configuration data to evsel__config routine.
* It is allocated within event parsing and attached to
- * perf_evsel::config_terms list head.
+ * evsel::config_terms list head.
*/
enum evsel_term_type {
- PERF_EVSEL__CONFIG_TERM_PERIOD,
- PERF_EVSEL__CONFIG_TERM_FREQ,
- PERF_EVSEL__CONFIG_TERM_TIME,
- PERF_EVSEL__CONFIG_TERM_CALLGRAPH,
- PERF_EVSEL__CONFIG_TERM_STACK_USER,
- PERF_EVSEL__CONFIG_TERM_INHERIT,
- PERF_EVSEL__CONFIG_TERM_MAX_STACK,
- PERF_EVSEL__CONFIG_TERM_MAX_EVENTS,
- PERF_EVSEL__CONFIG_TERM_OVERWRITE,
- PERF_EVSEL__CONFIG_TERM_DRV_CFG,
- PERF_EVSEL__CONFIG_TERM_BRANCH,
- PERF_EVSEL__CONFIG_TERM_PERCORE,
- PERF_EVSEL__CONFIG_TERM_AUX_OUTPUT,
- PERF_EVSEL__CONFIG_TERM_AUX_SAMPLE_SIZE,
- PERF_EVSEL__CONFIG_TERM_CFG_CHG,
+ EVSEL__CONFIG_TERM_PERIOD,
+ EVSEL__CONFIG_TERM_FREQ,
+ EVSEL__CONFIG_TERM_TIME,
+ EVSEL__CONFIG_TERM_CALLGRAPH,
+ EVSEL__CONFIG_TERM_STACK_USER,
+ EVSEL__CONFIG_TERM_INHERIT,
+ EVSEL__CONFIG_TERM_MAX_STACK,
+ EVSEL__CONFIG_TERM_MAX_EVENTS,
+ EVSEL__CONFIG_TERM_OVERWRITE,
+ EVSEL__CONFIG_TERM_DRV_CFG,
+ EVSEL__CONFIG_TERM_BRANCH,
+ EVSEL__CONFIG_TERM_PERCORE,
+ EVSEL__CONFIG_TERM_AUX_OUTPUT,
+ EVSEL__CONFIG_TERM_AUX_SAMPLE_SIZE,
+ EVSEL__CONFIG_TERM_CFG_CHG,
};
-struct perf_evsel_config_term {
+struct evsel_config_term {
struct list_head list;
enum evsel_term_type type;
bool free_str;
@@ -53,10 +53,9 @@ struct perf_evsel_config_term {
struct evsel;
-struct perf_evsel_config_term *__perf_evsel__get_config_term(struct evsel *evsel,
- enum evsel_term_type type);
+struct evsel_config_term *__evsel__get_config_term(struct evsel *evsel, enum evsel_term_type type);
-#define perf_evsel__get_config_term(evsel, type) \
- __perf_evsel__get_config_term(evsel, PERF_EVSEL__CONFIG_TERM_ ## type)
+#define evsel__get_config_term(evsel, type) \
+ __evsel__get_config_term(evsel, EVSEL__CONFIG_TERM_ ## type)
#endif // __PERF_EVSEL_CONFIG_H
diff --git a/tools/perf/util/evsel_fprintf.c b/tools/perf/util/evsel_fprintf.c
index 3b4842840db0..fb498a723a00 100644
--- a/tools/perf/util/evsel_fprintf.c
+++ b/tools/perf/util/evsel_fprintf.c
@@ -35,8 +35,7 @@ static int __print_attr__fprintf(FILE *fp, const char *name, const char *val, vo
return comma_fprintf(fp, (bool *)priv, " %s: %s", name, val);
}
-int perf_evsel__fprintf(struct evsel *evsel,
- struct perf_attr_details *details, FILE *fp)
+int evsel__fprintf(struct evsel *evsel, struct perf_attr_details *details, FILE *fp)
{
bool first = true;
int printed = 0;
@@ -44,22 +43,22 @@ int perf_evsel__fprintf(struct evsel *evsel,
if (details->event_group) {
struct evsel *pos;
- if (!perf_evsel__is_group_leader(evsel))
+ if (!evsel__is_group_leader(evsel))
return 0;
if (evsel->core.nr_members > 1)
printed += fprintf(fp, "%s{", evsel->group_name ?: "");
- printed += fprintf(fp, "%s", perf_evsel__name(evsel));
+ printed += fprintf(fp, "%s", evsel__name(evsel));
for_each_group_member(pos, evsel)
- printed += fprintf(fp, ",%s", perf_evsel__name(pos));
+ printed += fprintf(fp, ",%s", evsel__name(pos));
if (evsel->core.nr_members > 1)
printed += fprintf(fp, "}");
goto out;
}
- printed += fprintf(fp, "%s", perf_evsel__name(evsel));
+ printed += fprintf(fp, "%s", evsel__name(evsel));
if (details->verbose) {
printed += perf_event_attr__fprintf(fp, &evsel->core.attr,
diff --git a/tools/perf/util/evsel_fprintf.h b/tools/perf/util/evsel_fprintf.h
index 47e6c8456bb1..3093d096c29f 100644
--- a/tools/perf/util/evsel_fprintf.h
+++ b/tools/perf/util/evsel_fprintf.h
@@ -15,8 +15,7 @@ struct perf_attr_details {
bool trace_fields;
};
-int perf_evsel__fprintf(struct evsel *evsel,
- struct perf_attr_details *details, FILE *fp);
+int evsel__fprintf(struct evsel *evsel, struct perf_attr_details *details, FILE *fp);
#define EVSEL__PRINT_IP (1<<0)
#define EVSEL__PRINT_SYM (1<<1)
diff --git a/tools/perf/util/expr.c b/tools/perf/util/expr.c
new file mode 100644
index 000000000000..f64ab91c432b
--- /dev/null
+++ b/tools/perf/util/expr.c
@@ -0,0 +1,131 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <stdbool.h>
+#include <assert.h>
+#include "expr.h"
+#include "expr-bison.h"
+#include "expr-flex.h"
+#include <linux/kernel.h>
+
+#ifdef PARSER_DEBUG
+extern int expr_debug;
+#endif
+
+static size_t key_hash(const void *key, void *ctx __maybe_unused)
+{
+ const char *str = (const char *)key;
+ size_t hash = 0;
+
+ while (*str != '\0') {
+ hash *= 31;
+ hash += *str;
+ str++;
+ }
+ return hash;
+}
+
+static bool key_equal(const void *key1, const void *key2,
+ void *ctx __maybe_unused)
+{
+ return !strcmp((const char *)key1, (const char *)key2);
+}
+
+/* Caller must make sure id is allocated */
+int expr__add_id(struct expr_parse_ctx *ctx, const char *name, double val)
+{
+ double *val_ptr = NULL, *old_val = NULL;
+ char *old_key = NULL;
+ int ret;
+
+ if (val != 0.0) {
+ val_ptr = malloc(sizeof(double));
+ if (!val_ptr)
+ return -ENOMEM;
+ *val_ptr = val;
+ }
+ ret = hashmap__set(&ctx->ids, name, val_ptr,
+ (const void **)&old_key, (void **)&old_val);
+ free(old_key);
+ free(old_val);
+ return ret;
+}
+
+int expr__get_id(struct expr_parse_ctx *ctx, const char *id, double *val_ptr)
+{
+ double *data;
+
+ if (!hashmap__find(&ctx->ids, id, (void **)&data))
+ return -1;
+ *val_ptr = (data == NULL) ? 0.0 : *data;
+ return 0;
+}
+
+void expr__ctx_init(struct expr_parse_ctx *ctx)
+{
+ hashmap__init(&ctx->ids, key_hash, key_equal, NULL);
+}
+
+void expr__ctx_clear(struct expr_parse_ctx *ctx)
+{
+ struct hashmap_entry *cur;
+ size_t bkt;
+
+ hashmap__for_each_entry((&ctx->ids), cur, bkt) {
+ free((char *)cur->key);
+ free(cur->value);
+ }
+ hashmap__clear(&ctx->ids);
+}
+
+static int
+__expr__parse(double *val, struct expr_parse_ctx *ctx, const char *expr,
+ int start, int runtime)
+{
+ struct expr_scanner_ctx scanner_ctx = {
+ .start_token = start,
+ .runtime = runtime,
+ };
+ YY_BUFFER_STATE buffer;
+ void *scanner;
+ int ret;
+
+ ret = expr_lex_init_extra(&scanner_ctx, &scanner);
+ if (ret)
+ return ret;
+
+ buffer = expr__scan_string(expr, scanner);
+
+#ifdef PARSER_DEBUG
+ expr_debug = 1;
+ expr_set_debug(1, scanner);
+#endif
+
+ ret = expr_parse(val, ctx, scanner);
+
+ expr__flush_buffer(buffer, scanner);
+ expr__delete_buffer(buffer, scanner);
+ expr_lex_destroy(scanner);
+ return ret;
+}
+
+int expr__parse(double *final_val, struct expr_parse_ctx *ctx,
+ const char *expr, int runtime)
+{
+ return __expr__parse(final_val, ctx, expr, EXPR_PARSE, runtime) ? -1 : 0;
+}
+
+int expr__find_other(const char *expr, const char *one,
+ struct expr_parse_ctx *ctx, int runtime)
+{
+ double *old_val = NULL;
+ char *old_key = NULL;
+ int ret = __expr__parse(NULL, ctx, expr, EXPR_OTHER, runtime);
+
+ if (one) {
+ hashmap__delete(&ctx->ids, one,
+ (const void **)&old_key, (void **)&old_val);
+ free(old_key);
+ free(old_val);
+ }
+
+ return ret;
+}
diff --git a/tools/perf/util/expr.h b/tools/perf/util/expr.h
index 046160831f90..8a2c1074f90f 100644
--- a/tools/perf/util/expr.h
+++ b/tools/perf/util/expr.h
@@ -2,25 +2,31 @@
#ifndef PARSE_CTX_H
#define PARSE_CTX_H 1
-#define EXPR_MAX_OTHER 15
-#define MAX_PARSE_ID EXPR_MAX_OTHER
+// There are fixes that need to land upstream before we can use libbpf's headers,
+// for now use our copy uncoditionally, since the data structures at this point
+// are exactly the same, no problem.
+//#ifdef HAVE_LIBBPF_SUPPORT
+//#include <bpf/hashmap.h>
+//#else
+#include "util/hashmap.h"
+//#endif
-struct parse_id {
- const char *name;
- double val;
+struct expr_parse_ctx {
+ struct hashmap ids;
};
-struct parse_ctx {
- int num_ids;
- struct parse_id ids[MAX_PARSE_ID];
+struct expr_scanner_ctx {
+ int start_token;
+ int runtime;
};
-void expr__ctx_init(struct parse_ctx *ctx);
-void expr__add_id(struct parse_ctx *ctx, const char *id, double val);
-#ifndef IN_EXPR_Y
-int expr__parse(double *final_val, struct parse_ctx *ctx, const char **pp);
-#endif
-int expr__find_other(const char *p, const char *one, const char ***other,
- int *num_other);
+void expr__ctx_init(struct expr_parse_ctx *ctx);
+void expr__ctx_clear(struct expr_parse_ctx *ctx);
+int expr__add_id(struct expr_parse_ctx *ctx, const char *id, double val);
+int expr__get_id(struct expr_parse_ctx *ctx, const char *id, double *val_ptr);
+int expr__parse(double *final_val, struct expr_parse_ctx *ctx,
+ const char *expr, int runtime);
+int expr__find_other(const char *expr, const char *one,
+ struct expr_parse_ctx *ids, int runtime);
#endif
diff --git a/tools/perf/util/expr.l b/tools/perf/util/expr.l
new file mode 100644
index 000000000000..f397bf8b1a48
--- /dev/null
+++ b/tools/perf/util/expr.l
@@ -0,0 +1,127 @@
+%option prefix="expr_"
+%option reentrant
+%option bison-bridge
+
+%{
+#include <linux/compiler.h>
+#include "expr.h"
+#include "expr-bison.h"
+
+char *expr_get_text(yyscan_t yyscanner);
+YYSTYPE *expr_get_lval(yyscan_t yyscanner);
+
+static double __value(YYSTYPE *yylval, char *str, int token)
+{
+ double num;
+
+ errno = 0;
+ num = strtod(str, NULL);
+ if (errno)
+ return EXPR_ERROR;
+
+ yylval->num = num;
+ return token;
+}
+
+static int value(yyscan_t scanner)
+{
+ YYSTYPE *yylval = expr_get_lval(scanner);
+ char *text = expr_get_text(scanner);
+
+ return __value(yylval, text, NUMBER);
+}
+
+/*
+ * Allow @ instead of / to be able to specify pmu/event/ without
+ * conflicts with normal division.
+ */
+static char *normalize(char *str, int runtime)
+{
+ char *ret = str;
+ char *dst = str;
+
+ while (*str) {
+ if (*str == '@')
+ *dst++ = '/';
+ else if (*str == '\\')
+ *dst++ = *++str;
+ else if (*str == '?') {
+ char *paramval;
+ int i = 0;
+ int size = asprintf(&paramval, "%d", runtime);
+
+ if (size < 0)
+ *dst++ = '0';
+ else {
+ while (i < size)
+ *dst++ = paramval[i++];
+ free(paramval);
+ }
+ }
+ else
+ *dst++ = *str;
+ str++;
+ }
+
+ *dst = 0x0;
+ return ret;
+}
+
+static int str(yyscan_t scanner, int token, int runtime)
+{
+ YYSTYPE *yylval = expr_get_lval(scanner);
+ char *text = expr_get_text(scanner);
+
+ yylval->str = normalize(strdup(text), runtime);
+ if (!yylval->str)
+ return EXPR_ERROR;
+
+ yylval->str = normalize(yylval->str, runtime);
+ return token;
+}
+%}
+
+number ([0-9]+\.?[0-9]*|[0-9]*\.?[0-9]+)
+
+sch [-,=]
+spec \\{sch}
+sym [0-9a-zA-Z_\.:@?]+
+symbol ({spec}|{sym})+
+
+%%
+ struct expr_scanner_ctx *sctx = expr_get_extra(yyscanner);
+
+ {
+ int start_token = sctx->start_token;
+
+ if (sctx->start_token) {
+ sctx->start_token = 0;
+ return start_token;
+ }
+ }
+
+max { return MAX; }
+min { return MIN; }
+if { return IF; }
+else { return ELSE; }
+#smt_on { return SMT_ON; }
+{number} { return value(yyscanner); }
+{symbol} { return str(yyscanner, ID, sctx->runtime); }
+"|" { return '|'; }
+"^" { return '^'; }
+"&" { return '&'; }
+"-" { return '-'; }
+"+" { return '+'; }
+"*" { return '*'; }
+"/" { return '/'; }
+"%" { return '%'; }
+"(" { return '('; }
+")" { return ')'; }
+"," { return ','; }
+. { }
+%%
+
+int expr_wrap(void *scanner __maybe_unused)
+{
+ return 1;
+}
diff --git a/tools/perf/util/expr.y b/tools/perf/util/expr.y
index 7d226241f1d7..bf3e898e3055 100644
--- a/tools/perf/util/expr.y
+++ b/tools/perf/util/expr.y
@@ -1,31 +1,33 @@
/* Simple expression parser */
%{
+#define YYDEBUG 1
+#include <stdio.h>
#include "util.h"
#include "util/debug.h"
#include <stdlib.h> // strtod()
#define IN_EXPR_Y 1
#include "expr.h"
#include "smt.h"
-#include <assert.h>
#include <string.h>
-#define MAXIDLEN 256
%}
%define api.pure full
%parse-param { double *final_val }
-%parse-param { struct parse_ctx *ctx }
-%parse-param { const char **pp }
-%lex-param { const char **pp }
+%parse-param { struct expr_parse_ctx *ctx }
+%parse-param {void *scanner}
+%lex-param {void* scanner}
%union {
- double num;
- char id[MAXIDLEN+1];
+ double num;
+ char *str;
}
+%token EXPR_PARSE EXPR_OTHER EXPR_ERROR
%token <num> NUMBER
-%token <id> ID
+%token <str> ID
+%destructor { free ($$); } <str>
%token MIN MAX IF ELSE SMT_ON
%left MIN MAX IF
%left '|'
@@ -37,31 +39,32 @@
%type <num> expr if_expr
%{
-static int expr__lex(YYSTYPE *res, const char **pp);
-
-static void expr__error(double *final_val __maybe_unused,
- struct parse_ctx *ctx __maybe_unused,
- const char **pp __maybe_unused,
+static void expr_error(double *final_val __maybe_unused,
+ struct expr_parse_ctx *ctx __maybe_unused,
+ void *scanner,
const char *s)
{
pr_debug("%s\n", s);
}
-static int lookup_id(struct parse_ctx *ctx, char *id, double *val)
-{
- int i;
+%}
+%%
+
+start:
+EXPR_PARSE all_expr
+|
+EXPR_OTHER all_other
- for (i = 0; i < ctx->num_ids; i++) {
- if (!strcasecmp(ctx->ids[i].name, id)) {
- *val = ctx->ids[i].val;
- return 0;
- }
- }
- return -1;
+all_other: all_other other
+|
+
+other: ID
+{
+ expr__add_id(ctx, $1, 0.0);
}
+|
+MIN | MAX | IF | ELSE | SMT_ON | NUMBER | '|' | '^' | '&' | '-' | '+' | '*' | '/' | '%' | '(' | ')' | ','
-%}
-%%
all_expr: if_expr { *final_val = $1; }
;
@@ -72,10 +75,12 @@ if_expr:
;
expr: NUMBER
- | ID { if (lookup_id(ctx, $1, &$$) < 0) {
+ | ID { if (expr__get_id(ctx, $1, &$$)) {
pr_debug("%s not found\n", $1);
+ free($1);
YYABORT;
}
+ free($1);
}
| expr '|' expr { $$ = (long)$1 | (long)$3; }
| expr '&' expr { $$ = (long)$1 & (long)$3; }
@@ -83,8 +88,18 @@ expr: NUMBER
| expr '+' expr { $$ = $1 + $3; }
| expr '-' expr { $$ = $1 - $3; }
| expr '*' expr { $$ = $1 * $3; }
- | expr '/' expr { if ($3 == 0) YYABORT; $$ = $1 / $3; }
- | expr '%' expr { if ((long)$3 == 0) YYABORT; $$ = (long)$1 % (long)$3; }
+ | expr '/' expr { if ($3 == 0) {
+ pr_debug("division by zero\n");
+ YYABORT;
+ }
+ $$ = $1 / $3;
+ }
+ | expr '%' expr { if ((long)$3 == 0) {
+ pr_debug("division by zero\n");
+ YYABORT;
+ }
+ $$ = (long)$1 % (long)$3;
+ }
| '-' expr %prec NEG { $$ = -$2; }
| '(' if_expr ')' { $$ = $2; }
| MIN '(' expr ',' expr ')' { $$ = $3 < $5 ? $3 : $5; }
@@ -93,146 +108,3 @@ expr: NUMBER
;
%%
-
-static int expr__symbol(YYSTYPE *res, const char *p, const char **pp)
-{
- char *dst = res->id;
- const char *s = p;
-
- if (*p == '#')
- *dst++ = *p++;
-
- while (isalnum(*p) || *p == '_' || *p == '.' || *p == ':' || *p == '@' || *p == '\\') {
- if (p - s >= MAXIDLEN)
- return -1;
- /*
- * Allow @ instead of / to be able to specify pmu/event/ without
- * conflicts with normal division.
- */
- if (*p == '@')
- *dst++ = '/';
- else if (*p == '\\')
- *dst++ = *++p;
- else
- *dst++ = *p;
- p++;
- }
- *dst = 0;
- *pp = p;
- dst = res->id;
- switch (dst[0]) {
- case 'm':
- if (!strcmp(dst, "min"))
- return MIN;
- if (!strcmp(dst, "max"))
- return MAX;
- break;
- case 'i':
- if (!strcmp(dst, "if"))
- return IF;
- break;
- case 'e':
- if (!strcmp(dst, "else"))
- return ELSE;
- break;
- case '#':
- if (!strcasecmp(dst, "#smt_on"))
- return SMT_ON;
- break;
- }
- return ID;
-}
-
-static int expr__lex(YYSTYPE *res, const char **pp)
-{
- int tok;
- const char *s;
- const char *p = *pp;
-
- while (isspace(*p))
- p++;
- s = p;
- switch (*p++) {
- case '#':
- case 'a' ... 'z':
- case 'A' ... 'Z':
- return expr__symbol(res, p - 1, pp);
- case '0' ... '9': case '.':
- res->num = strtod(s, (char **)&p);
- tok = NUMBER;
- break;
- default:
- tok = *s;
- break;
- }
- *pp = p;
- return tok;
-}
-
-/* Caller must make sure id is allocated */
-void expr__add_id(struct parse_ctx *ctx, const char *name, double val)
-{
- int idx;
- assert(ctx->num_ids < MAX_PARSE_ID);
- idx = ctx->num_ids++;
- ctx->ids[idx].name = name;
- ctx->ids[idx].val = val;
-}
-
-void expr__ctx_init(struct parse_ctx *ctx)
-{
- ctx->num_ids = 0;
-}
-
-static bool already_seen(const char *val, const char *one, const char **other,
- int num_other)
-{
- int i;
-
- if (one && !strcasecmp(one, val))
- return true;
- for (i = 0; i < num_other; i++)
- if (!strcasecmp(other[i], val))
- return true;
- return false;
-}
-
-int expr__find_other(const char *p, const char *one, const char ***other,
- int *num_otherp)
-{
- const char *orig = p;
- int err = -1;
- int num_other;
-
- *other = malloc((EXPR_MAX_OTHER + 1) * sizeof(char *));
- if (!*other)
- return -1;
-
- num_other = 0;
- for (;;) {
- YYSTYPE val;
- int tok = expr__lex(&val, &p);
- if (tok == 0) {
- err = 0;
- break;
- }
- if (tok == ID && !already_seen(val.id, one, *other, num_other)) {
- if (num_other >= EXPR_MAX_OTHER - 1) {
- pr_debug("Too many extra events in %s\n", orig);
- break;
- }
- (*other)[num_other] = strdup(val.id);
- if (!(*other)[num_other])
- return -1;
- num_other++;
- }
- }
- (*other)[num_other] = NULL;
- *num_otherp = num_other;
- if (err) {
- *num_otherp = 0;
- free(*other);
- *other = NULL;
- }
- return err;
-}
diff --git a/tools/perf/util/genelf_debug.c b/tools/perf/util/genelf_debug.c
index 30e9f618f6cd..dd40683bd4c0 100644
--- a/tools/perf/util/genelf_debug.c
+++ b/tools/perf/util/genelf_debug.c
@@ -342,7 +342,7 @@ static void emit_lineno_info(struct buffer_ext *be,
*/
/* start state of the state machine we take care of */
- unsigned long last_vma = code_addr;
+ unsigned long last_vma = 0;
char const *cur_filename = NULL;
unsigned long cur_file_idx = 0;
int last_line = 1;
@@ -473,7 +473,7 @@ jit_process_debug_info(uint64_t code_addr,
ent = debug_entry_next(ent);
}
add_compilation_unit(di, buffer_ext_size(dl));
- add_debug_line(dl, debug, nr_debug_entries, 0);
+ add_debug_line(dl, debug, nr_debug_entries, GEN_ELF_TEXT_OFFSET);
add_debug_abbrev(da);
if (0) buffer_ext_dump(da, "abbrev");
diff --git a/tools/perf/util/hashmap.c b/tools/perf/util/hashmap.c
new file mode 100644
index 000000000000..a405dad068f5
--- /dev/null
+++ b/tools/perf/util/hashmap.c
@@ -0,0 +1,238 @@
+// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
+
+/*
+ * Generic non-thread safe hash map implementation.
+ *
+ * Copyright (c) 2019 Facebook
+ */
+#include <stdint.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <errno.h>
+#include <linux/err.h>
+#include "hashmap.h"
+
+/* make sure libbpf doesn't use kernel-only integer typedefs */
+#pragma GCC poison u8 u16 u32 u64 s8 s16 s32 s64
+
+/* start with 4 buckets */
+#define HASHMAP_MIN_CAP_BITS 2
+
+static void hashmap_add_entry(struct hashmap_entry **pprev,
+ struct hashmap_entry *entry)
+{
+ entry->next = *pprev;
+ *pprev = entry;
+}
+
+static void hashmap_del_entry(struct hashmap_entry **pprev,
+ struct hashmap_entry *entry)
+{
+ *pprev = entry->next;
+ entry->next = NULL;
+}
+
+void hashmap__init(struct hashmap *map, hashmap_hash_fn hash_fn,
+ hashmap_equal_fn equal_fn, void *ctx)
+{
+ map->hash_fn = hash_fn;
+ map->equal_fn = equal_fn;
+ map->ctx = ctx;
+
+ map->buckets = NULL;
+ map->cap = 0;
+ map->cap_bits = 0;
+ map->sz = 0;
+}
+
+struct hashmap *hashmap__new(hashmap_hash_fn hash_fn,
+ hashmap_equal_fn equal_fn,
+ void *ctx)
+{
+ struct hashmap *map = malloc(sizeof(struct hashmap));
+
+ if (!map)
+ return ERR_PTR(-ENOMEM);
+ hashmap__init(map, hash_fn, equal_fn, ctx);
+ return map;
+}
+
+void hashmap__clear(struct hashmap *map)
+{
+ struct hashmap_entry *cur, *tmp;
+ size_t bkt;
+
+ hashmap__for_each_entry_safe(map, cur, tmp, bkt) {
+ free(cur);
+ }
+ free(map->buckets);
+ map->buckets = NULL;
+ map->cap = map->cap_bits = map->sz = 0;
+}
+
+void hashmap__free(struct hashmap *map)
+{
+ if (!map)
+ return;
+
+ hashmap__clear(map);
+ free(map);
+}
+
+size_t hashmap__size(const struct hashmap *map)
+{
+ return map->sz;
+}
+
+size_t hashmap__capacity(const struct hashmap *map)
+{
+ return map->cap;
+}
+
+static bool hashmap_needs_to_grow(struct hashmap *map)
+{
+ /* grow if empty or more than 75% filled */
+ return (map->cap == 0) || ((map->sz + 1) * 4 / 3 > map->cap);
+}
+
+static int hashmap_grow(struct hashmap *map)
+{
+ struct hashmap_entry **new_buckets;
+ struct hashmap_entry *cur, *tmp;
+ size_t new_cap_bits, new_cap;
+ size_t h, bkt;
+
+ new_cap_bits = map->cap_bits + 1;
+ if (new_cap_bits < HASHMAP_MIN_CAP_BITS)
+ new_cap_bits = HASHMAP_MIN_CAP_BITS;
+
+ new_cap = 1UL << new_cap_bits;
+ new_buckets = calloc(new_cap, sizeof(new_buckets[0]));
+ if (!new_buckets)
+ return -ENOMEM;
+
+ hashmap__for_each_entry_safe(map, cur, tmp, bkt) {
+ h = hash_bits(map->hash_fn(cur->key, map->ctx), new_cap_bits);
+ hashmap_add_entry(&new_buckets[h], cur);
+ }
+
+ map->cap = new_cap;
+ map->cap_bits = new_cap_bits;
+ free(map->buckets);
+ map->buckets = new_buckets;
+
+ return 0;
+}
+
+static bool hashmap_find_entry(const struct hashmap *map,
+ const void *key, size_t hash,
+ struct hashmap_entry ***pprev,
+ struct hashmap_entry **entry)
+{
+ struct hashmap_entry *cur, **prev_ptr;
+
+ if (!map->buckets)
+ return false;
+
+ for (prev_ptr = &map->buckets[hash], cur = *prev_ptr;
+ cur;
+ prev_ptr = &cur->next, cur = cur->next) {
+ if (map->equal_fn(cur->key, key, map->ctx)) {
+ if (pprev)
+ *pprev = prev_ptr;
+ *entry = cur;
+ return true;
+ }
+ }
+
+ return false;
+}
+
+int hashmap__insert(struct hashmap *map, const void *key, void *value,
+ enum hashmap_insert_strategy strategy,
+ const void **old_key, void **old_value)
+{
+ struct hashmap_entry *entry;
+ size_t h;
+ int err;
+
+ if (old_key)
+ *old_key = NULL;
+ if (old_value)
+ *old_value = NULL;
+
+ h = hash_bits(map->hash_fn(key, map->ctx), map->cap_bits);
+ if (strategy != HASHMAP_APPEND &&
+ hashmap_find_entry(map, key, h, NULL, &entry)) {
+ if (old_key)
+ *old_key = entry->key;
+ if (old_value)
+ *old_value = entry->value;
+
+ if (strategy == HASHMAP_SET || strategy == HASHMAP_UPDATE) {
+ entry->key = key;
+ entry->value = value;
+ return 0;
+ } else if (strategy == HASHMAP_ADD) {
+ return -EEXIST;
+ }
+ }
+
+ if (strategy == HASHMAP_UPDATE)
+ return -ENOENT;
+
+ if (hashmap_needs_to_grow(map)) {
+ err = hashmap_grow(map);
+ if (err)
+ return err;
+ h = hash_bits(map->hash_fn(key, map->ctx), map->cap_bits);
+ }
+
+ entry = malloc(sizeof(struct hashmap_entry));
+ if (!entry)
+ return -ENOMEM;
+
+ entry->key = key;
+ entry->value = value;
+ hashmap_add_entry(&map->buckets[h], entry);
+ map->sz++;
+
+ return 0;
+}
+
+bool hashmap__find(const struct hashmap *map, const void *key, void **value)
+{
+ struct hashmap_entry *entry;
+ size_t h;
+
+ h = hash_bits(map->hash_fn(key, map->ctx), map->cap_bits);
+ if (!hashmap_find_entry(map, key, h, NULL, &entry))
+ return false;
+
+ if (value)
+ *value = entry->value;
+ return true;
+}
+
+bool hashmap__delete(struct hashmap *map, const void *key,
+ const void **old_key, void **old_value)
+{
+ struct hashmap_entry **pprev, *entry;
+ size_t h;
+
+ h = hash_bits(map->hash_fn(key, map->ctx), map->cap_bits);
+ if (!hashmap_find_entry(map, key, h, &pprev, &entry))
+ return false;
+
+ if (old_key)
+ *old_key = entry->key;
+ if (old_value)
+ *old_value = entry->value;
+
+ hashmap_del_entry(pprev, entry);
+ free(entry);
+ map->sz--;
+
+ return true;
+}
+
diff --git a/tools/perf/util/hashmap.h b/tools/perf/util/hashmap.h
new file mode 100644
index 000000000000..df59fd4fc95b
--- /dev/null
+++ b/tools/perf/util/hashmap.h
@@ -0,0 +1,176 @@
+/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */
+
+/*
+ * Generic non-thread safe hash map implementation.
+ *
+ * Copyright (c) 2019 Facebook
+ */
+#ifndef __LIBBPF_HASHMAP_H
+#define __LIBBPF_HASHMAP_H
+
+#include <stdbool.h>
+#include <stddef.h>
+#include <limits.h>
+#ifndef __WORDSIZE
+#define __WORDSIZE (__SIZEOF_LONG__ * 8)
+#endif
+
+static inline size_t hash_bits(size_t h, int bits)
+{
+ /* shuffle bits and return requested number of upper bits */
+ return (h * 11400714819323198485llu) >> (__WORDSIZE - bits);
+}
+
+typedef size_t (*hashmap_hash_fn)(const void *key, void *ctx);
+typedef bool (*hashmap_equal_fn)(const void *key1, const void *key2, void *ctx);
+
+struct hashmap_entry {
+ const void *key;
+ void *value;
+ struct hashmap_entry *next;
+};
+
+struct hashmap {
+ hashmap_hash_fn hash_fn;
+ hashmap_equal_fn equal_fn;
+ void *ctx;
+
+ struct hashmap_entry **buckets;
+ size_t cap;
+ size_t cap_bits;
+ size_t sz;
+};
+
+#define HASHMAP_INIT(hash_fn, equal_fn, ctx) { \
+ .hash_fn = (hash_fn), \
+ .equal_fn = (equal_fn), \
+ .ctx = (ctx), \
+ .buckets = NULL, \
+ .cap = 0, \
+ .cap_bits = 0, \
+ .sz = 0, \
+}
+
+void hashmap__init(struct hashmap *map, hashmap_hash_fn hash_fn,
+ hashmap_equal_fn equal_fn, void *ctx);
+struct hashmap *hashmap__new(hashmap_hash_fn hash_fn,
+ hashmap_equal_fn equal_fn,
+ void *ctx);
+void hashmap__clear(struct hashmap *map);
+void hashmap__free(struct hashmap *map);
+
+size_t hashmap__size(const struct hashmap *map);
+size_t hashmap__capacity(const struct hashmap *map);
+
+/*
+ * Hashmap insertion strategy:
+ * - HASHMAP_ADD - only add key/value if key doesn't exist yet;
+ * - HASHMAP_SET - add key/value pair if key doesn't exist yet; otherwise,
+ * update value;
+ * - HASHMAP_UPDATE - update value, if key already exists; otherwise, do
+ * nothing and return -ENOENT;
+ * - HASHMAP_APPEND - always add key/value pair, even if key already exists.
+ * This turns hashmap into a multimap by allowing multiple values to be
+ * associated with the same key. Most useful read API for such hashmap is
+ * hashmap__for_each_key_entry() iteration. If hashmap__find() is still
+ * used, it will return last inserted key/value entry (first in a bucket
+ * chain).
+ */
+enum hashmap_insert_strategy {
+ HASHMAP_ADD,
+ HASHMAP_SET,
+ HASHMAP_UPDATE,
+ HASHMAP_APPEND,
+};
+
+/*
+ * hashmap__insert() adds key/value entry w/ various semantics, depending on
+ * provided strategy value. If a given key/value pair replaced already
+ * existing key/value pair, both old key and old value will be returned
+ * through old_key and old_value to allow calling code do proper memory
+ * management.
+ */
+int hashmap__insert(struct hashmap *map, const void *key, void *value,
+ enum hashmap_insert_strategy strategy,
+ const void **old_key, void **old_value);
+
+static inline int hashmap__add(struct hashmap *map,
+ const void *key, void *value)
+{
+ return hashmap__insert(map, key, value, HASHMAP_ADD, NULL, NULL);
+}
+
+static inline int hashmap__set(struct hashmap *map,
+ const void *key, void *value,
+ const void **old_key, void **old_value)
+{
+ return hashmap__insert(map, key, value, HASHMAP_SET,
+ old_key, old_value);
+}
+
+static inline int hashmap__update(struct hashmap *map,
+ const void *key, void *value,
+ const void **old_key, void **old_value)
+{
+ return hashmap__insert(map, key, value, HASHMAP_UPDATE,
+ old_key, old_value);
+}
+
+static inline int hashmap__append(struct hashmap *map,
+ const void *key, void *value)
+{
+ return hashmap__insert(map, key, value, HASHMAP_APPEND, NULL, NULL);
+}
+
+bool hashmap__delete(struct hashmap *map, const void *key,
+ const void **old_key, void **old_value);
+
+bool hashmap__find(const struct hashmap *map, const void *key, void **value);
+
+/*
+ * hashmap__for_each_entry - iterate over all entries in hashmap
+ * @map: hashmap to iterate
+ * @cur: struct hashmap_entry * used as a loop cursor
+ * @bkt: integer used as a bucket loop cursor
+ */
+#define hashmap__for_each_entry(map, cur, bkt) \
+ for (bkt = 0; bkt < map->cap; bkt++) \
+ for (cur = map->buckets[bkt]; cur; cur = cur->next)
+
+/*
+ * hashmap__for_each_entry_safe - iterate over all entries in hashmap, safe
+ * against removals
+ * @map: hashmap to iterate
+ * @cur: struct hashmap_entry * used as a loop cursor
+ * @tmp: struct hashmap_entry * used as a temporary next cursor storage
+ * @bkt: integer used as a bucket loop cursor
+ */
+#define hashmap__for_each_entry_safe(map, cur, tmp, bkt) \
+ for (bkt = 0; bkt < map->cap; bkt++) \
+ for (cur = map->buckets[bkt]; \
+ cur && ({tmp = cur->next; true; }); \
+ cur = tmp)
+
+/*
+ * hashmap__for_each_key_entry - iterate over entries associated with given key
+ * @map: hashmap to iterate
+ * @cur: struct hashmap_entry * used as a loop cursor
+ * @key: key to iterate entries for
+ */
+#define hashmap__for_each_key_entry(map, cur, _key) \
+ for (cur = ({ size_t bkt = hash_bits(map->hash_fn((_key), map->ctx),\
+ map->cap_bits); \
+ map->buckets ? map->buckets[bkt] : NULL; }); \
+ cur; \
+ cur = cur->next) \
+ if (map->equal_fn(cur->key, (_key), map->ctx))
+
+#define hashmap__for_each_key_entry_safe(map, cur, tmp, _key) \
+ for (cur = ({ size_t bkt = hash_bits(map->hash_fn((_key), map->ctx),\
+ map->cap_bits); \
+ cur = map->buckets ? map->buckets[bkt] : NULL; }); \
+ cur && ({ tmp = cur->next; true; }); \
+ cur = tmp) \
+ if (map->equal_fn(cur->key, (_key), map->ctx))
+
+#endif /* __LIBBPF_HASHMAP_H */
diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c
index 4246e7447e54..7a67d017d72c 100644
--- a/tools/perf/util/header.c
+++ b/tools/perf/util/header.c
@@ -525,7 +525,7 @@ static int write_event_desc(struct feat_fd *ff,
/*
* write event string as passed on cmdline
*/
- ret = do_write_string(ff, perf_evsel__name(evsel));
+ ret = do_write_string(ff, evsel__name(evsel));
if (ret < 0)
return ret;
/*
@@ -783,8 +783,7 @@ static int write_group_desc(struct feat_fd *ff,
return ret;
evlist__for_each_entry(evlist, evsel) {
- if (perf_evsel__is_group_leader(evsel) &&
- evsel->core.nr_members > 1) {
+ if (evsel__is_group_leader(evsel) && evsel->core.nr_members > 1) {
const char *name = evsel->group_name ?: "{anon_group}";
u32 leader_idx = evsel->idx;
u32 nr_members = evsel->core.nr_members;
@@ -1395,6 +1394,38 @@ static int write_compressed(struct feat_fd *ff __maybe_unused,
return do_write(ff, &(ff->ph->env.comp_mmap_len), sizeof(ff->ph->env.comp_mmap_len));
}
+static int write_cpu_pmu_caps(struct feat_fd *ff,
+ struct evlist *evlist __maybe_unused)
+{
+ struct perf_pmu *cpu_pmu = perf_pmu__find("cpu");
+ struct perf_pmu_caps *caps = NULL;
+ int nr_caps;
+ int ret;
+
+ if (!cpu_pmu)
+ return -ENOENT;
+
+ nr_caps = perf_pmu__caps_parse(cpu_pmu);
+ if (nr_caps < 0)
+ return nr_caps;
+
+ ret = do_write(ff, &nr_caps, sizeof(nr_caps));
+ if (ret < 0)
+ return ret;
+
+ list_for_each_entry(caps, &cpu_pmu->caps, list) {
+ ret = do_write_string(ff, caps->name);
+ if (ret < 0)
+ return ret;
+
+ ret = do_write_string(ff, caps->value);
+ if (ret < 0)
+ return ret;
+ }
+
+ return ret;
+}
+
static void print_hostname(struct feat_fd *ff, FILE *fp)
{
fprintf(fp, "# hostname : %s\n", ff->ph->env.hostname);
@@ -1590,6 +1621,40 @@ static void free_event_desc(struct evsel *events)
free(events);
}
+static bool perf_attr_check(struct perf_event_attr *attr)
+{
+ if (attr->__reserved_1 || attr->__reserved_2 || attr->__reserved_3) {
+ pr_warning("Reserved bits are set unexpectedly. "
+ "Please update perf tool.\n");
+ return false;
+ }
+
+ if (attr->sample_type & ~(PERF_SAMPLE_MAX-1)) {
+ pr_warning("Unknown sample type (0x%llx) is detected. "
+ "Please update perf tool.\n",
+ attr->sample_type);
+ return false;
+ }
+
+ if (attr->read_format & ~(PERF_FORMAT_MAX-1)) {
+ pr_warning("Unknown read format (0x%llx) is detected. "
+ "Please update perf tool.\n",
+ attr->read_format);
+ return false;
+ }
+
+ if ((attr->sample_type & PERF_SAMPLE_BRANCH_STACK) &&
+ (attr->branch_sample_type & ~(PERF_SAMPLE_BRANCH_MAX-1))) {
+ pr_warning("Unknown branch sample type (0x%llx) is detected. "
+ "Please update perf tool.\n",
+ attr->branch_sample_type);
+
+ return false;
+ }
+
+ return true;
+}
+
static struct evsel *read_event_desc(struct feat_fd *ff)
{
struct evsel *evsel, *events = NULL;
@@ -1634,6 +1699,9 @@ static struct evsel *read_event_desc(struct feat_fd *ff)
memcpy(&evsel->core.attr, buf, msz);
+ if (!perf_attr_check(&evsel->core.attr))
+ goto error;
+
if (do_read_u32(ff, &nr))
goto error;
@@ -1772,6 +1840,27 @@ static void print_compressed(struct feat_fd *ff, FILE *fp)
ff->ph->env.comp_level, ff->ph->env.comp_ratio);
}
+static void print_cpu_pmu_caps(struct feat_fd *ff, FILE *fp)
+{
+ const char *delimiter = "# cpu pmu capabilities: ";
+ u32 nr_caps = ff->ph->env.nr_cpu_pmu_caps;
+ char *str;
+
+ if (!nr_caps) {
+ fprintf(fp, "# cpu pmu capabilities: not available\n");
+ return;
+ }
+
+ str = ff->ph->env.cpu_pmu_caps;
+ while (nr_caps--) {
+ fprintf(fp, "%s%s", delimiter, str);
+ delimiter = ", ";
+ str += strlen(str) + 1;
+ }
+
+ fprintf(fp, "\n");
+}
+
static void print_pmu_mappings(struct feat_fd *ff, FILE *fp)
{
const char *delimiter = "# pmu mappings: ";
@@ -1817,14 +1906,12 @@ static void print_group_desc(struct feat_fd *ff, FILE *fp)
session = container_of(ff->ph, struct perf_session, header);
evlist__for_each_entry(session->evlist, evsel) {
- if (perf_evsel__is_group_leader(evsel) &&
- evsel->core.nr_members > 1) {
- fprintf(fp, "# group: %s{%s", evsel->group_name ?: "",
- perf_evsel__name(evsel));
+ if (evsel__is_group_leader(evsel) && evsel->core.nr_members > 1) {
+ fprintf(fp, "# group: %s{%s", evsel->group_name ?: "", evsel__name(evsel));
nr = evsel->core.nr_members - 1;
} else if (nr) {
- fprintf(fp, ",%s", perf_evsel__name(evsel));
+ fprintf(fp, ",%s", evsel__name(evsel));
if (--nr == 0)
fprintf(fp, "}\n");
@@ -2809,6 +2896,60 @@ static int process_compressed(struct feat_fd *ff,
return 0;
}
+static int process_cpu_pmu_caps(struct feat_fd *ff,
+ void *data __maybe_unused)
+{
+ char *name, *value;
+ struct strbuf sb;
+ u32 nr_caps;
+
+ if (do_read_u32(ff, &nr_caps))
+ return -1;
+
+ if (!nr_caps) {
+ pr_debug("cpu pmu capabilities not available\n");
+ return 0;
+ }
+
+ ff->ph->env.nr_cpu_pmu_caps = nr_caps;
+
+ if (strbuf_init(&sb, 128) < 0)
+ return -1;
+
+ while (nr_caps--) {
+ name = do_read_string(ff);
+ if (!name)
+ goto error;
+
+ value = do_read_string(ff);
+ if (!value)
+ goto free_name;
+
+ if (strbuf_addf(&sb, "%s=%s", name, value) < 0)
+ goto free_value;
+
+ /* include a NULL character at the end */
+ if (strbuf_add(&sb, "", 1) < 0)
+ goto free_value;
+
+ if (!strcmp(name, "branches"))
+ ff->ph->env.max_branches = atoi(value);
+
+ free(value);
+ free(name);
+ }
+ ff->ph->env.cpu_pmu_caps = strbuf_detach(&sb, NULL);
+ return 0;
+
+free_value:
+ free(value);
+free_name:
+ free(name);
+error:
+ strbuf_release(&sb);
+ return -1;
+}
+
#define FEAT_OPR(n, func, __full_only) \
[HEADER_##n] = { \
.name = __stringify(n), \
@@ -2866,6 +3007,7 @@ const struct perf_header_feature_ops feat_ops[HEADER_LAST_FEATURE] = {
FEAT_OPR(BPF_PROG_INFO, bpf_prog_info, false),
FEAT_OPR(BPF_BTF, bpf_btf, false),
FEAT_OPR(COMPRESSED, compressed, false),
+ FEAT_OPR(CPU_PMU_CAPS, cpu_pmu_caps, false),
};
struct header_print_data {
@@ -3432,7 +3574,7 @@ static int perf_header__read_pipe(struct perf_session *session)
return -EINVAL;
}
- return 0;
+ return f_header.size == sizeof(f_header) ? 0 : -1;
}
static int read_attr(int fd, struct perf_header *ph,
@@ -3534,7 +3676,7 @@ int perf_session__read_header(struct perf_session *session)
struct perf_file_header f_header;
struct perf_file_attr f_attr;
u64 f_id;
- int nr_attrs, nr_ids, i, j;
+ int nr_attrs, nr_ids, i, j, err;
int fd = perf_data__fd(data);
session->evlist = evlist__new();
@@ -3543,8 +3685,16 @@ int perf_session__read_header(struct perf_session *session)
session->evlist->env = &header->env;
session->machines.host.env = &header->env;
- if (perf_data__is_pipe(data))
- return perf_header__read_pipe(session);
+
+ /*
+ * We can read 'pipe' data event from regular file,
+ * check for the pipe header regardless of source.
+ */
+ err = perf_header__read_pipe(session);
+ if (!err || (err && perf_data__is_pipe(data))) {
+ data->is_pipe = true;
+ return err;
+ }
if (perf_file_header__read(&f_header, header, fd) < 0)
return -EINVAL;
@@ -3805,12 +3955,22 @@ int perf_event__process_tracing_data(struct perf_session *session,
{
ssize_t size_read, padding, size = event->tracing_data.size;
int fd = perf_data__fd(session->data);
- off_t offset = lseek(fd, 0, SEEK_CUR);
char buf[BUFSIZ];
- /* setup for reading amidst mmap */
- lseek(fd, offset + sizeof(struct perf_record_header_tracing_data),
- SEEK_SET);
+ /*
+ * The pipe fd is already in proper place and in any case
+ * we can't move it, and we'd screw the case where we read
+ * 'pipe' data from regular file. The trace_report reads
+ * data from 'fd' so we need to set it directly behind the
+ * event, where the tracing data starts.
+ */
+ if (!perf_data__is_pipe(session->data)) {
+ off_t offset = lseek(fd, 0, SEEK_CUR);
+
+ /* setup for reading amidst mmap */
+ lseek(fd, offset + sizeof(struct perf_record_header_tracing_data),
+ SEEK_SET);
+ }
size_read = trace_report(fd, &session->tevent,
session->repipe);
diff --git a/tools/perf/util/header.h b/tools/perf/util/header.h
index 840f95cee349..650bd1c7a99b 100644
--- a/tools/perf/util/header.h
+++ b/tools/perf/util/header.h
@@ -43,6 +43,7 @@ enum {
HEADER_BPF_PROG_INFO,
HEADER_BPF_BTF,
HEADER_COMPRESSED,
+ HEADER_CPU_PMU_CAPS,
HEADER_LAST_FEATURE,
HEADER_FEAT_BITS = 256,
};
diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c
index ca5a8f4d007e..8a793e4c9400 100644
--- a/tools/perf/util/hist.c
+++ b/tools/perf/util/hist.c
@@ -10,6 +10,7 @@
#include "mem-events.h"
#include "session.h"
#include "namespaces.h"
+#include "cgroup.h"
#include "sort.h"
#include "units.h"
#include "evlist.h"
@@ -194,6 +195,7 @@ void hists__calc_col_len(struct hists *hists, struct hist_entry *h)
hists__set_unres_dso_col_len(hists, HISTC_MEM_DADDR_DSO);
}
+ hists__new_col_len(hists, HISTC_CGROUP, 6);
hists__new_col_len(hists, HISTC_CGROUP_ID, 20);
hists__new_col_len(hists, HISTC_CPU, 3);
hists__new_col_len(hists, HISTC_SOCKET, 6);
@@ -222,6 +224,16 @@ void hists__calc_col_len(struct hists *hists, struct hist_entry *h)
if (h->trace_output)
hists__new_col_len(hists, HISTC_TRACE, strlen(h->trace_output));
+
+ if (h->cgroup) {
+ const char *cgrp_name = "unknown";
+ struct cgroup *cgrp = cgroup__find(h->ms.maps->machine->env,
+ h->cgroup);
+ if (cgrp != NULL)
+ cgrp_name = cgrp->name;
+
+ hists__new_col_len(hists, HISTC_CGROUP, strlen(cgrp_name));
+ }
}
void hists__output_recalc_col_len(struct hists *hists, int max_rows)
@@ -691,6 +703,7 @@ __hists__add_entry(struct hists *hists,
.dev = ns ? ns->link_info[CGROUP_NS_INDEX].dev : 0,
.ino = ns ? ns->link_info[CGROUP_NS_INDEX].ino : 0,
},
+ .cgroup = sample->cgroup,
.ms = {
.maps = al->maps,
.map = al->map,
@@ -1057,6 +1070,20 @@ iter_next_cumulative_entry(struct hist_entry_iter *iter,
return fill_callchain_info(al, node, iter->hide_unresolved);
}
+static bool
+hist_entry__fast__sym_diff(struct hist_entry *left,
+ struct hist_entry *right)
+{
+ struct symbol *sym_l = left->ms.sym;
+ struct symbol *sym_r = right->ms.sym;
+
+ if (!sym_l && !sym_r)
+ return left->ip != right->ip;
+
+ return !!_sort__sym_cmp(sym_l, sym_r);
+}
+
+
static int
iter_add_next_cumulative_entry(struct hist_entry_iter *iter,
struct addr_location *al)
@@ -1083,6 +1110,7 @@ iter_add_next_cumulative_entry(struct hist_entry_iter *iter,
};
int i;
struct callchain_cursor cursor;
+ bool fast = hists__has(he_tmp.hists, sym);
callchain_cursor_snapshot(&cursor, &callchain_cursor);
@@ -1093,6 +1121,14 @@ iter_add_next_cumulative_entry(struct hist_entry_iter *iter,
* It's possible that it has cycles or recursive calls.
*/
for (i = 0; i < iter->curr; i++) {
+ /*
+ * For most cases, there are no duplicate entries in callchain.
+ * The symbols are usually different. Do a quick check for
+ * symbols first.
+ */
+ if (fast && hist_entry__fast__sym_diff(he_cache[i], &he_tmp))
+ continue;
+
if (hist_entry__cmp(he_cache[i], &he_tmp) == 0) {
/* to avoid calling callback function */
iter->he = NULL;
@@ -1894,8 +1930,8 @@ static void output_resort(struct hists *hists, struct ui_progress *prog,
}
}
-void perf_evsel__output_resort_cb(struct evsel *evsel, struct ui_progress *prog,
- hists__resort_cb_t cb, void *cb_arg)
+void evsel__output_resort_cb(struct evsel *evsel, struct ui_progress *prog,
+ hists__resort_cb_t cb, void *cb_arg)
{
bool use_callchain;
@@ -1909,9 +1945,9 @@ void perf_evsel__output_resort_cb(struct evsel *evsel, struct ui_progress *prog,
output_resort(evsel__hists(evsel), prog, use_callchain, cb, cb_arg);
}
-void perf_evsel__output_resort(struct evsel *evsel, struct ui_progress *prog)
+void evsel__output_resort(struct evsel *evsel, struct ui_progress *prog)
{
- return perf_evsel__output_resort_cb(evsel, prog, NULL, NULL);
+ return evsel__output_resort_cb(evsel, prog, NULL, NULL);
}
void hists__output_resort(struct hists *hists, struct ui_progress *prog)
@@ -2584,9 +2620,10 @@ void hist__account_cycles(struct branch_stack *bs, struct addr_location *al,
u64 *total_cycles)
{
struct branch_info *bi;
+ struct branch_entry *entries = perf_sample__branch_entries(sample);
/* If we have branch cycles always annotate them. */
- if (bs && bs->nr && bs->entries[0].flags.cycles) {
+ if (bs && bs->nr && entries[0].flags.cycles) {
int i;
bi = sample__resolve_bstack(sample, al);
@@ -2623,7 +2660,7 @@ size_t perf_evlist__fprintf_nr_events(struct evlist *evlist, FILE *fp)
size_t ret = 0;
evlist__for_each_entry(evlist, pos) {
- ret += fprintf(fp, "%s stats:\n", perf_evsel__name(pos));
+ ret += fprintf(fp, "%s stats:\n", evsel__name(pos));
ret += events_stats__fprintf(&evsel__hists(pos)->stats, fp);
}
@@ -2647,7 +2684,7 @@ int __hists__scnprintf_title(struct hists *hists, char *bf, size_t size, bool sh
unsigned long nr_samples = hists->stats.nr_events[PERF_RECORD_SAMPLE];
u64 nr_events = hists->stats.total_period;
struct evsel *evsel = hists_to_evsel(hists);
- const char *ev_name = perf_evsel__name(evsel);
+ const char *ev_name = evsel__name(evsel);
char buf[512], sample_freq_str[64] = "";
size_t buflen = sizeof(buf);
char ref[30] = " show reference callgraph, ";
@@ -2658,10 +2695,10 @@ int __hists__scnprintf_title(struct hists *hists, char *bf, size_t size, bool sh
nr_events = hists->stats.total_non_filtered_period;
}
- if (perf_evsel__is_group_event(evsel)) {
+ if (evsel__is_group_event(evsel)) {
struct evsel *pos;
- perf_evsel__group_desc(evsel, buf, buflen);
+ evsel__group_desc(evsel, buf, buflen);
ev_name = buf;
for_each_group_member(pos, evsel) {
@@ -2808,9 +2845,8 @@ static int hists_evsel__init(struct evsel *evsel)
int hists__init(void)
{
- int err = perf_evsel__object_config(sizeof(struct hists_evsel),
- hists_evsel__init,
- hists_evsel__exit);
+ int err = evsel__object_config(sizeof(struct hists_evsel),
+ hists_evsel__init, hists_evsel__exit);
if (err)
fputs("FATAL ERROR: Couldn't setup hists class\n", stderr);
diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h
index 0aa63aeb58ec..96b1c13bbccc 100644
--- a/tools/perf/util/hist.h
+++ b/tools/perf/util/hist.h
@@ -38,6 +38,7 @@ enum hist_column {
HISTC_THREAD,
HISTC_COMM,
HISTC_CGROUP_ID,
+ HISTC_CGROUP,
HISTC_PARENT,
HISTC_CPU,
HISTC_SOCKET,
@@ -172,9 +173,9 @@ void hist_entry__delete(struct hist_entry *he);
typedef int (*hists__resort_cb_t)(struct hist_entry *he, void *arg);
-void perf_evsel__output_resort_cb(struct evsel *evsel, struct ui_progress *prog,
- hists__resort_cb_t cb, void *cb_arg);
-void perf_evsel__output_resort(struct evsel *evsel, struct ui_progress *prog);
+void evsel__output_resort_cb(struct evsel *evsel, struct ui_progress *prog,
+ hists__resort_cb_t cb, void *cb_arg);
+void evsel__output_resort(struct evsel *evsel, struct ui_progress *prog);
void hists__output_resort(struct hists *hists, struct ui_progress *prog);
void hists__output_resort_cb(struct hists *hists, struct ui_progress *prog,
hists__resort_cb_t cb);
@@ -536,6 +537,7 @@ static inline int block_hists_tui_browse(struct block_hist *bh __maybe_unused,
#define K_LEFT -1000
#define K_RIGHT -2000
#define K_SWITCH_INPUT_DATA -3000
+#define K_RELOAD -4000
#endif
unsigned int hists__sort_list_width(struct hists *hists);
diff --git a/tools/perf/util/intel-bts.c b/tools/perf/util/intel-bts.c
index 34cb380d19a3..af1e78d76228 100644
--- a/tools/perf/util/intel-bts.c
+++ b/tools/perf/util/intel-bts.c
@@ -432,7 +432,7 @@ static int intel_bts_process_buffer(struct intel_bts_queue *btsq,
le64_to_cpu(branch->from),
le64_to_cpu(branch->to),
btsq->intel_pt_insn.length,
- buffer->buffer_nr + 1);
+ buffer->buffer_nr + 1, true, 0, 0);
if (filter && !(filter & btsq->sample_flags))
continue;
err = intel_bts_synth_branch_sample(btsq, branch);
@@ -728,6 +728,15 @@ static void intel_bts_free(struct perf_session *session)
free(bts);
}
+static bool intel_bts_evsel_is_auxtrace(struct perf_session *session,
+ struct evsel *evsel)
+{
+ struct intel_bts *bts = container_of(session->auxtrace, struct intel_bts,
+ auxtrace);
+
+ return evsel->core.attr.type == bts->pmu_type;
+}
+
struct intel_bts_synth {
struct perf_tool dummy_tool;
struct perf_session *session;
@@ -816,10 +825,10 @@ static int intel_bts_synth_events(struct intel_bts *bts,
bts->branches_id = id;
/*
* We only use sample types from PERF_SAMPLE_MASK so we can use
- * __perf_evsel__sample_size() here.
+ * __evsel__sample_size() here.
*/
bts->branches_event_size = sizeof(struct perf_record_sample) +
- __perf_evsel__sample_size(attr.sample_type);
+ __evsel__sample_size(attr.sample_type);
}
return 0;
@@ -883,6 +892,7 @@ int intel_bts_process_auxtrace_info(union perf_event *event,
bts->auxtrace.flush_events = intel_bts_flush;
bts->auxtrace.free_events = intel_bts_free_events;
bts->auxtrace.free = intel_bts_free;
+ bts->auxtrace.evsel_is_auxtrace = intel_bts_evsel_is_auxtrace;
session->auxtrace = &bts->auxtrace;
intel_bts_print_info(&auxtrace_info->priv[0], INTEL_BTS_PMU_TYPE,
diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.c b/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.c
index 0ccf10a0bf44..4ce109993e74 100644
--- a/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.c
+++ b/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.c
@@ -552,7 +552,7 @@ static int intel_pt_do_get_packet(const unsigned char *buf, size_t len,
break;
default:
break;
- };
+ }
if (!(byte & BIT(0))) {
if (byte == 0)
diff --git a/tools/perf/util/intel-pt.c b/tools/perf/util/intel-pt.c
index 33cf8928cf05..e4dd8bf610ce 100644
--- a/tools/perf/util/intel-pt.c
+++ b/tools/perf/util/intel-pt.c
@@ -33,6 +33,7 @@
#include "tsc.h"
#include "intel-pt.h"
#include "config.h"
+#include "util/perf_api_probe.h"
#include "util/synthetic-events.h"
#include "time-utils.h"
@@ -68,6 +69,10 @@ struct intel_pt {
bool est_tsc;
bool sync_switch;
bool mispred_all;
+ bool use_thread_stack;
+ bool callstack;
+ unsigned int br_stack_sz;
+ unsigned int br_stack_sz_plus;
int have_sched_switch;
u32 pmu_type;
u64 kernel_start;
@@ -124,6 +129,9 @@ struct intel_pt {
struct range *time_ranges;
unsigned int range_cnt;
+
+ struct ip_callchain *chain;
+ struct branch_stack *br_stack;
};
enum switch_state {
@@ -143,8 +151,6 @@ struct intel_pt_queue {
const struct intel_pt_state *state;
struct ip_callchain *chain;
struct branch_stack *last_branch;
- struct branch_stack *last_branch_rb;
- size_t last_branch_pos;
union perf_event *event_buf;
bool on_heap;
bool stop;
@@ -868,6 +874,86 @@ static u64 intel_pt_ns_to_ticks(const struct intel_pt *pt, u64 ns)
pt->tc.time_mult;
}
+static struct ip_callchain *intel_pt_alloc_chain(struct intel_pt *pt)
+{
+ size_t sz = sizeof(struct ip_callchain);
+
+ /* Add 1 to callchain_sz for callchain context */
+ sz += (pt->synth_opts.callchain_sz + 1) * sizeof(u64);
+ return zalloc(sz);
+}
+
+static int intel_pt_callchain_init(struct intel_pt *pt)
+{
+ struct evsel *evsel;
+
+ evlist__for_each_entry(pt->session->evlist, evsel) {
+ if (!(evsel->core.attr.sample_type & PERF_SAMPLE_CALLCHAIN))
+ evsel->synth_sample_type |= PERF_SAMPLE_CALLCHAIN;
+ }
+
+ pt->chain = intel_pt_alloc_chain(pt);
+ if (!pt->chain)
+ return -ENOMEM;
+
+ return 0;
+}
+
+static void intel_pt_add_callchain(struct intel_pt *pt,
+ struct perf_sample *sample)
+{
+ struct thread *thread = machine__findnew_thread(pt->machine,
+ sample->pid,
+ sample->tid);
+
+ thread_stack__sample_late(thread, sample->cpu, pt->chain,
+ pt->synth_opts.callchain_sz + 1, sample->ip,
+ pt->kernel_start);
+
+ sample->callchain = pt->chain;
+}
+
+static struct branch_stack *intel_pt_alloc_br_stack(unsigned int entry_cnt)
+{
+ size_t sz = sizeof(struct branch_stack);
+
+ sz += entry_cnt * sizeof(struct branch_entry);
+ return zalloc(sz);
+}
+
+static int intel_pt_br_stack_init(struct intel_pt *pt)
+{
+ struct evsel *evsel;
+
+ evlist__for_each_entry(pt->session->evlist, evsel) {
+ if (!(evsel->core.attr.sample_type & PERF_SAMPLE_BRANCH_STACK))
+ evsel->synth_sample_type |= PERF_SAMPLE_BRANCH_STACK;
+ }
+
+ pt->br_stack = intel_pt_alloc_br_stack(pt->br_stack_sz);
+ if (!pt->br_stack)
+ return -ENOMEM;
+
+ return 0;
+}
+
+static void intel_pt_add_br_stack(struct intel_pt *pt,
+ struct perf_sample *sample)
+{
+ struct thread *thread = machine__findnew_thread(pt->machine,
+ sample->pid,
+ sample->tid);
+
+ thread_stack__br_sample_late(thread, sample->cpu, pt->br_stack,
+ pt->br_stack_sz, sample->ip,
+ pt->kernel_start);
+
+ sample->branch_stack = pt->br_stack;
+}
+
+/* INTEL_PT_LBR_0, INTEL_PT_LBR_1 and INTEL_PT_LBR_2 */
+#define LBRS_MAX (INTEL_PT_BLK_ITEM_ID_CNT * 3U)
+
static struct intel_pt_queue *intel_pt_alloc_queue(struct intel_pt *pt,
unsigned int queue_nr)
{
@@ -880,26 +966,17 @@ static struct intel_pt_queue *intel_pt_alloc_queue(struct intel_pt *pt,
return NULL;
if (pt->synth_opts.callchain) {
- size_t sz = sizeof(struct ip_callchain);
-
- /* Add 1 to callchain_sz for callchain context */
- sz += (pt->synth_opts.callchain_sz + 1) * sizeof(u64);
- ptq->chain = zalloc(sz);
+ ptq->chain = intel_pt_alloc_chain(pt);
if (!ptq->chain)
goto out_free;
}
- if (pt->synth_opts.last_branch) {
- size_t sz = sizeof(struct branch_stack);
+ if (pt->synth_opts.last_branch || pt->synth_opts.other_events) {
+ unsigned int entry_cnt = max(LBRS_MAX, pt->br_stack_sz);
- sz += pt->synth_opts.last_branch_sz *
- sizeof(struct branch_entry);
- ptq->last_branch = zalloc(sz);
+ ptq->last_branch = intel_pt_alloc_br_stack(entry_cnt);
if (!ptq->last_branch)
goto out_free;
- ptq->last_branch_rb = zalloc(sz);
- if (!ptq->last_branch_rb)
- goto out_free;
}
ptq->event_buf = malloc(PERF_SAMPLE_MAX_SIZE);
@@ -968,7 +1045,6 @@ static struct intel_pt_queue *intel_pt_alloc_queue(struct intel_pt *pt,
out_free:
zfree(&ptq->event_buf);
zfree(&ptq->last_branch);
- zfree(&ptq->last_branch_rb);
zfree(&ptq->chain);
free(ptq);
return NULL;
@@ -984,7 +1060,6 @@ static void intel_pt_free_queue(void *priv)
intel_pt_decoder_free(ptq->decoder);
zfree(&ptq->event_buf);
zfree(&ptq->last_branch);
- zfree(&ptq->last_branch_rb);
zfree(&ptq->chain);
free(ptq);
}
@@ -1152,58 +1227,6 @@ static int intel_pt_setup_queues(struct intel_pt *pt)
return 0;
}
-static inline void intel_pt_copy_last_branch_rb(struct intel_pt_queue *ptq)
-{
- struct branch_stack *bs_src = ptq->last_branch_rb;
- struct branch_stack *bs_dst = ptq->last_branch;
- size_t nr = 0;
-
- bs_dst->nr = bs_src->nr;
-
- if (!bs_src->nr)
- return;
-
- nr = ptq->pt->synth_opts.last_branch_sz - ptq->last_branch_pos;
- memcpy(&bs_dst->entries[0],
- &bs_src->entries[ptq->last_branch_pos],
- sizeof(struct branch_entry) * nr);
-
- if (bs_src->nr >= ptq->pt->synth_opts.last_branch_sz) {
- memcpy(&bs_dst->entries[nr],
- &bs_src->entries[0],
- sizeof(struct branch_entry) * ptq->last_branch_pos);
- }
-}
-
-static inline void intel_pt_reset_last_branch_rb(struct intel_pt_queue *ptq)
-{
- ptq->last_branch_pos = 0;
- ptq->last_branch_rb->nr = 0;
-}
-
-static void intel_pt_update_last_branch_rb(struct intel_pt_queue *ptq)
-{
- const struct intel_pt_state *state = ptq->state;
- struct branch_stack *bs = ptq->last_branch_rb;
- struct branch_entry *be;
-
- if (!ptq->last_branch_pos)
- ptq->last_branch_pos = ptq->pt->synth_opts.last_branch_sz;
-
- ptq->last_branch_pos -= 1;
-
- be = &bs->entries[ptq->last_branch_pos];
- be->from = state->from_ip;
- be->to = state->to_ip;
- be->flags.abort = !!(state->flags & INTEL_PT_ABORT_TX);
- be->flags.in_tx = !!(state->flags & INTEL_PT_IN_TX);
- /* No support for mispredict */
- be->flags.mispred = ptq->pt->mispred_all;
-
- if (bs->nr < ptq->pt->synth_opts.last_branch_sz)
- bs->nr += 1;
-}
-
static inline bool intel_pt_skip_event(struct intel_pt *pt)
{
return pt->synth_opts.initial_skip &&
@@ -1271,9 +1294,9 @@ static inline int intel_pt_opt_inject(struct intel_pt *pt,
return intel_pt_inject_event(event, sample, type);
}
-static int intel_pt_deliver_synth_b_event(struct intel_pt *pt,
- union perf_event *event,
- struct perf_sample *sample, u64 type)
+static int intel_pt_deliver_synth_event(struct intel_pt *pt,
+ union perf_event *event,
+ struct perf_sample *sample, u64 type)
{
int ret;
@@ -1295,6 +1318,7 @@ static int intel_pt_synth_branch_sample(struct intel_pt_queue *ptq)
struct perf_sample sample = { .ip = 0, };
struct dummy_branch_stack {
u64 nr;
+ u64 hw_idx;
struct branch_entry entries;
} dummy_bs;
@@ -1316,6 +1340,7 @@ static int intel_pt_synth_branch_sample(struct intel_pt_queue *ptq)
if (pt->synth_opts.last_branch && sort__mode == SORT_MODE__BRANCH) {
dummy_bs = (struct dummy_branch_stack){
.nr = 1,
+ .hw_idx = -1ULL,
.entries = {
.from = sample.ip,
.to = sample.addr,
@@ -1331,8 +1356,8 @@ static int intel_pt_synth_branch_sample(struct intel_pt_queue *ptq)
ptq->last_br_cyc_cnt = ptq->ipc_cyc_cnt;
}
- return intel_pt_deliver_synth_b_event(pt, event, &sample,
- pt->branches_sample_type);
+ return intel_pt_deliver_synth_event(pt, event, &sample,
+ pt->branches_sample_type);
}
static void intel_pt_prep_sample(struct intel_pt *pt,
@@ -1350,27 +1375,12 @@ static void intel_pt_prep_sample(struct intel_pt *pt,
}
if (pt->synth_opts.last_branch) {
- intel_pt_copy_last_branch_rb(ptq);
+ thread_stack__br_sample(ptq->thread, ptq->cpu, ptq->last_branch,
+ pt->br_stack_sz);
sample->branch_stack = ptq->last_branch;
}
}
-static inline int intel_pt_deliver_synth_event(struct intel_pt *pt,
- struct intel_pt_queue *ptq,
- union perf_event *event,
- struct perf_sample *sample,
- u64 type)
-{
- int ret;
-
- ret = intel_pt_deliver_synth_b_event(pt, event, sample, type);
-
- if (pt->synth_opts.last_branch)
- intel_pt_reset_last_branch_rb(ptq);
-
- return ret;
-}
-
static int intel_pt_synth_instruction_sample(struct intel_pt_queue *ptq)
{
struct intel_pt *pt = ptq->pt;
@@ -1395,7 +1405,7 @@ static int intel_pt_synth_instruction_sample(struct intel_pt_queue *ptq)
ptq->last_insn_cnt = ptq->state->tot_insn_cnt;
- return intel_pt_deliver_synth_event(pt, ptq, event, &sample,
+ return intel_pt_deliver_synth_event(pt, event, &sample,
pt->instructions_sample_type);
}
@@ -1413,7 +1423,7 @@ static int intel_pt_synth_transaction_sample(struct intel_pt_queue *ptq)
sample.id = ptq->pt->transactions_id;
sample.stream_id = ptq->pt->transactions_id;
- return intel_pt_deliver_synth_event(pt, ptq, event, &sample,
+ return intel_pt_deliver_synth_event(pt, event, &sample,
pt->transactions_sample_type);
}
@@ -1454,7 +1464,7 @@ static int intel_pt_synth_ptwrite_sample(struct intel_pt_queue *ptq)
sample.raw_size = perf_synth__raw_size(raw);
sample.raw_data = perf_synth__raw_data(&raw);
- return intel_pt_deliver_synth_event(pt, ptq, event, &sample,
+ return intel_pt_deliver_synth_event(pt, event, &sample,
pt->ptwrites_sample_type);
}
@@ -1484,7 +1494,7 @@ static int intel_pt_synth_cbr_sample(struct intel_pt_queue *ptq)
sample.raw_size = perf_synth__raw_size(raw);
sample.raw_data = perf_synth__raw_data(&raw);
- return intel_pt_deliver_synth_event(pt, ptq, event, &sample,
+ return intel_pt_deliver_synth_event(pt, event, &sample,
pt->pwr_events_sample_type);
}
@@ -1509,7 +1519,7 @@ static int intel_pt_synth_mwait_sample(struct intel_pt_queue *ptq)
sample.raw_size = perf_synth__raw_size(raw);
sample.raw_data = perf_synth__raw_data(&raw);
- return intel_pt_deliver_synth_event(pt, ptq, event, &sample,
+ return intel_pt_deliver_synth_event(pt, event, &sample,
pt->pwr_events_sample_type);
}
@@ -1534,7 +1544,7 @@ static int intel_pt_synth_pwre_sample(struct intel_pt_queue *ptq)
sample.raw_size = perf_synth__raw_size(raw);
sample.raw_data = perf_synth__raw_data(&raw);
- return intel_pt_deliver_synth_event(pt, ptq, event, &sample,
+ return intel_pt_deliver_synth_event(pt, event, &sample,
pt->pwr_events_sample_type);
}
@@ -1559,7 +1569,7 @@ static int intel_pt_synth_exstop_sample(struct intel_pt_queue *ptq)
sample.raw_size = perf_synth__raw_size(raw);
sample.raw_data = perf_synth__raw_data(&raw);
- return intel_pt_deliver_synth_event(pt, ptq, event, &sample,
+ return intel_pt_deliver_synth_event(pt, event, &sample,
pt->pwr_events_sample_type);
}
@@ -1584,7 +1594,7 @@ static int intel_pt_synth_pwrx_sample(struct intel_pt_queue *ptq)
sample.raw_size = perf_synth__raw_size(raw);
sample.raw_data = perf_synth__raw_data(&raw);
- return intel_pt_deliver_synth_event(pt, ptq, event, &sample,
+ return intel_pt_deliver_synth_event(pt, event, &sample,
pt->pwr_events_sample_type);
}
@@ -1678,15 +1688,14 @@ static u64 intel_pt_lbr_flags(u64 info)
union {
struct branch_flags flags;
u64 result;
- } u = {
- .flags = {
- .mispred = !!(info & LBR_INFO_MISPRED),
- .predicted = !(info & LBR_INFO_MISPRED),
- .in_tx = !!(info & LBR_INFO_IN_TX),
- .abort = !!(info & LBR_INFO_ABORT),
- .cycles = info & LBR_INFO_CYCLES,
- }
- };
+ } u;
+
+ u.result = 0;
+ u.flags.mispred = !!(info & LBR_INFO_MISPRED);
+ u.flags.predicted = !(info & LBR_INFO_MISPRED);
+ u.flags.in_tx = !!(info & LBR_INFO_IN_TX);
+ u.flags.abort = !!(info & LBR_INFO_ABORT);
+ u.flags.cycles = info & LBR_INFO_CYCLES;
return u.result;
}
@@ -1716,9 +1725,6 @@ static void intel_pt_add_lbrs(struct branch_stack *br_stack,
}
}
-/* INTEL_PT_LBR_0, INTEL_PT_LBR_1 and INTEL_PT_LBR_2 */
-#define LBRS_MAX (INTEL_PT_BLK_ITEM_ID_CNT * 3)
-
static int intel_pt_synth_pebs_sample(struct intel_pt_queue *ptq)
{
const struct intel_pt_blk_items *items = &ptq->state->items;
@@ -1794,23 +1800,18 @@ static int intel_pt_synth_pebs_sample(struct intel_pt_queue *ptq)
}
if (sample_type & PERF_SAMPLE_BRANCH_STACK) {
- struct {
- struct branch_stack br_stack;
- struct branch_entry entries[LBRS_MAX];
- } br;
-
if (items->mask[INTEL_PT_LBR_0_POS] ||
items->mask[INTEL_PT_LBR_1_POS] ||
items->mask[INTEL_PT_LBR_2_POS]) {
- intel_pt_add_lbrs(&br.br_stack, items);
- sample.branch_stack = &br.br_stack;
+ intel_pt_add_lbrs(ptq->last_branch, items);
} else if (pt->synth_opts.last_branch) {
- intel_pt_copy_last_branch_rb(ptq);
- sample.branch_stack = ptq->last_branch;
+ thread_stack__br_sample(ptq->thread, ptq->cpu,
+ ptq->last_branch,
+ pt->br_stack_sz);
} else {
- br.br_stack.nr = 0;
- sample.branch_stack = &br.br_stack;
+ ptq->last_branch->nr = 0;
}
+ sample.branch_stack = ptq->last_branch;
}
if (sample_type & PERF_SAMPLE_ADDR && items->has_mem_access_address)
@@ -1840,7 +1841,7 @@ static int intel_pt_synth_pebs_sample(struct intel_pt_queue *ptq)
sample.transaction = txn;
}
- return intel_pt_deliver_synth_event(pt, ptq, event, &sample, sample_type);
+ return intel_pt_deliver_synth_event(pt, event, &sample, sample_type);
}
static int intel_pt_synth_error(struct intel_pt *pt, int code, int cpu,
@@ -1990,12 +1991,15 @@ static int intel_pt_sample(struct intel_pt_queue *ptq)
if (!(state->type & INTEL_PT_BRANCH))
return 0;
- if (pt->synth_opts.callchain || pt->synth_opts.thread_stack)
- thread_stack__event(ptq->thread, ptq->cpu, ptq->flags, state->from_ip,
- state->to_ip, ptq->insn_len,
- state->trace_nr);
- else
+ if (pt->use_thread_stack) {
+ thread_stack__event(ptq->thread, ptq->cpu, ptq->flags,
+ state->from_ip, state->to_ip, ptq->insn_len,
+ state->trace_nr, pt->callstack,
+ pt->br_stack_sz_plus,
+ pt->mispred_all);
+ } else {
thread_stack__set_trace_nr(ptq->thread, ptq->cpu, state->trace_nr);
+ }
if (pt->sample_branches) {
err = intel_pt_synth_branch_sample(ptq);
@@ -2003,9 +2007,6 @@ static int intel_pt_sample(struct intel_pt_queue *ptq)
return err;
}
- if (pt->synth_opts.last_branch)
- intel_pt_update_last_branch_rb(ptq);
-
if (!ptq->sync_switch)
return 0;
@@ -2482,7 +2483,7 @@ static int intel_pt_process_switch(struct intel_pt *pt,
if (evsel != pt->switch_evsel)
return 0;
- tid = perf_evsel__intval(evsel, sample, "next_pid");
+ tid = evsel__intval(evsel, sample, "next_pid");
cpu = sample->cpu;
intel_pt_log("sched_switch: cpu %d tid %d time %"PRIu64" tsc %#"PRIx64"\n",
@@ -2637,6 +2638,13 @@ static int intel_pt_process_event(struct perf_session *session,
if (err)
return err;
+ if (event->header.type == PERF_RECORD_SAMPLE) {
+ if (pt->synth_opts.add_callchain && !sample->callchain)
+ intel_pt_add_callchain(pt, sample);
+ if (pt->synth_opts.add_last_branch && !sample->branch_stack)
+ intel_pt_add_br_stack(pt, sample);
+ }
+
if (event->header.type == PERF_RECORD_AUX &&
(event->aux.flags & PERF_AUX_FLAG_TRUNCATED) &&
pt->synth_opts.errors) {
@@ -2708,11 +2716,21 @@ static void intel_pt_free(struct perf_session *session)
session->auxtrace = NULL;
thread__put(pt->unknown_thread);
addr_filters__exit(&pt->filts);
+ zfree(&pt->chain);
zfree(&pt->filter);
zfree(&pt->time_ranges);
free(pt);
}
+static bool intel_pt_evsel_is_auxtrace(struct perf_session *session,
+ struct evsel *evsel)
+{
+ struct intel_pt *pt = container_of(session->auxtrace, struct intel_pt,
+ auxtrace);
+
+ return evsel->core.attr.type == pt->pmu_type;
+}
+
static int intel_pt_process_auxtrace_event(struct perf_session *session,
union perf_event *event,
struct perf_tool *tool __maybe_unused)
@@ -3014,7 +3032,7 @@ static struct evsel *intel_pt_find_sched_switch(struct evlist *evlist)
struct evsel *evsel;
evlist__for_each_entry_reverse(evlist, evsel) {
- const char *name = perf_evsel__name(evsel);
+ const char *name = evsel__name(evsel);
if (!strcmp(name, "sched:sched_switch"))
return evsel;
@@ -3308,6 +3326,7 @@ int intel_pt_process_auxtrace_info(union perf_event *event,
pt->auxtrace.flush_events = intel_pt_flush;
pt->auxtrace.free_events = intel_pt_free_events;
pt->auxtrace.free = intel_pt_free;
+ pt->auxtrace.evsel_is_auxtrace = intel_pt_evsel_is_auxtrace;
session->auxtrace = &pt->auxtrace;
if (dump_trace)
@@ -3336,6 +3355,7 @@ int intel_pt_process_auxtrace_info(union perf_event *event,
!session->itrace_synth_opts->inject) {
pt->synth_opts.branches = false;
pt->synth_opts.callchain = true;
+ pt->synth_opts.add_callchain = true;
}
pt->synth_opts.thread_stack =
session->itrace_synth_opts->thread_stack;
@@ -3368,14 +3388,54 @@ int intel_pt_process_auxtrace_info(union perf_event *event,
pt->branches_filter |= PERF_IP_FLAG_RETURN |
PERF_IP_FLAG_TRACE_BEGIN;
- if (pt->synth_opts.callchain && !symbol_conf.use_callchain) {
+ if ((pt->synth_opts.callchain || pt->synth_opts.add_callchain) &&
+ !symbol_conf.use_callchain) {
symbol_conf.use_callchain = true;
if (callchain_register_param(&callchain_param) < 0) {
symbol_conf.use_callchain = false;
pt->synth_opts.callchain = false;
+ pt->synth_opts.add_callchain = false;
}
}
+ if (pt->synth_opts.add_callchain) {
+ err = intel_pt_callchain_init(pt);
+ if (err)
+ goto err_delete_thread;
+ }
+
+ if (pt->synth_opts.last_branch || pt->synth_opts.add_last_branch) {
+ pt->br_stack_sz = pt->synth_opts.last_branch_sz;
+ pt->br_stack_sz_plus = pt->br_stack_sz;
+ }
+
+ if (pt->synth_opts.add_last_branch) {
+ err = intel_pt_br_stack_init(pt);
+ if (err)
+ goto err_delete_thread;
+ /*
+ * Additional branch stack size to cater for tracing from the
+ * actual sample ip to where the sample time is recorded.
+ * Measured at about 200 branches, but generously set to 1024.
+ * If kernel space is not being traced, then add just 1 for the
+ * branch to kernel space.
+ */
+ if (intel_pt_tracing_kernel(pt))
+ pt->br_stack_sz_plus += 1024;
+ else
+ pt->br_stack_sz_plus += 1;
+ }
+
+ pt->use_thread_stack = pt->synth_opts.callchain ||
+ pt->synth_opts.add_callchain ||
+ pt->synth_opts.thread_stack ||
+ pt->synth_opts.last_branch ||
+ pt->synth_opts.add_last_branch;
+
+ pt->callstack = pt->synth_opts.callchain ||
+ pt->synth_opts.add_callchain ||
+ pt->synth_opts.thread_stack;
+
err = intel_pt_synth_events(pt, session);
if (err)
goto err_delete_thread;
@@ -3398,6 +3458,7 @@ int intel_pt_process_auxtrace_info(union perf_event *event,
return 0;
err_delete_thread:
+ zfree(&pt->chain);
thread__zput(pt->unknown_thread);
err_free_queues:
intel_pt_log_disable();
diff --git a/tools/perf/util/jitdump.c b/tools/perf/util/jitdump.c
index e3ccb0ce1938..32bb05e03fb2 100644
--- a/tools/perf/util/jitdump.c
+++ b/tools/perf/util/jitdump.c
@@ -57,7 +57,7 @@ struct debug_line_info {
unsigned long vma;
unsigned int lineno;
/* The filename format is unspecified, absolute path, relative etc. */
- char const filename[0];
+ char const filename[];
};
struct jit_tool {
diff --git a/tools/perf/util/jitdump.h b/tools/perf/util/jitdump.h
index f2c3823cc81a..ab2842def83d 100644
--- a/tools/perf/util/jitdump.h
+++ b/tools/perf/util/jitdump.h
@@ -93,7 +93,7 @@ struct debug_entry {
uint64_t addr;
int lineno; /* source line number starting at 1 */
int discrim; /* column discriminator, 0 is default */
- const char name[0]; /* null terminated filename, \xff\0 if same as previous entry */
+ const char name[]; /* null terminated filename, \xff\0 if same as previous entry */
};
struct jr_code_debug_info {
@@ -101,7 +101,7 @@ struct jr_code_debug_info {
uint64_t code_addr;
uint64_t nr_entry;
- struct debug_entry entries[0];
+ struct debug_entry entries[];
};
struct jr_code_unwinding_info {
@@ -110,7 +110,7 @@ struct jr_code_unwinding_info {
uint64_t unwinding_size;
uint64_t eh_frame_hdr_size;
uint64_t mapped_size;
- const char unwinding_data[0];
+ const char unwinding_data[];
};
union jr_entry {
diff --git a/tools/perf/util/llvm-utils.c b/tools/perf/util/llvm-utils.c
index b5af680fc667..dbdffb6673fe 100644
--- a/tools/perf/util/llvm-utils.c
+++ b/tools/perf/util/llvm-utils.c
@@ -265,6 +265,8 @@ static int detect_kbuild_dir(char **kbuild_dir)
return -ENOMEM;
return 0;
}
+ pr_debug("%s: Couldn't find \"%s\", missing kernel-devel package?.\n",
+ __func__, autoconf_path);
free(autoconf_path);
return -ENOENT;
}
diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c
index fb5c2cd44d30..d5384807372b 100644
--- a/tools/perf/util/machine.c
+++ b/tools/perf/util/machine.c
@@ -33,6 +33,7 @@
#include "asm/bug.h"
#include "bpf-event.h"
#include <internal/lib.h> // page_size
+#include "cgroup.h"
#include <linux/ctype.h>
#include <symbol/kallsyms.h>
@@ -654,6 +655,22 @@ int machine__process_namespaces_event(struct machine *machine __maybe_unused,
return err;
}
+int machine__process_cgroup_event(struct machine *machine,
+ union perf_event *event,
+ struct perf_sample *sample __maybe_unused)
+{
+ struct cgroup *cgrp;
+
+ if (dump_trace)
+ perf_event__fprintf_cgroup(event, stdout);
+
+ cgrp = cgroup__findnew(machine->env, event->cgroup.id, event->cgroup.path);
+ if (cgrp == NULL)
+ return -ENOMEM;
+
+ return 0;
+}
+
int machine__process_lost_event(struct machine *machine __maybe_unused,
union perf_event *event, struct perf_sample *sample __maybe_unused)
{
@@ -719,6 +736,12 @@ int machine__process_switch_event(struct machine *machine __maybe_unused,
return 0;
}
+static int is_bpf_image(const char *name)
+{
+ return strncmp(name, "bpf_trampoline_", sizeof("bpf_trampoline_") - 1) == 0 ||
+ strncmp(name, "bpf_dispatcher_", sizeof("bpf_dispatcher_") - 1) == 0;
+}
+
static int machine__process_ksymbol_register(struct machine *machine,
union perf_event *event,
struct perf_sample *sample __maybe_unused)
@@ -742,6 +765,12 @@ static int machine__process_ksymbol_register(struct machine *machine,
map->start = event->ksymbol.addr;
map->end = map->start + event->ksymbol.len;
maps__insert(&machine->kmaps, map);
+ dso__set_loaded(dso);
+
+ if (is_bpf_image(event->ksymbol.name)) {
+ dso->binary_type = DSO_BINARY_TYPE__BPF_IMAGE;
+ dso__set_long_name(dso, "", false);
+ }
}
sym = symbol__new(map->map_ip(map, map->start),
@@ -1878,6 +1907,8 @@ int machine__process_event(struct machine *machine, union perf_event *event,
ret = machine__process_mmap_event(machine, event, sample); break;
case PERF_RECORD_NAMESPACES:
ret = machine__process_namespaces_event(machine, event, sample); break;
+ case PERF_RECORD_CGROUP:
+ ret = machine__process_cgroup_event(machine, event, sample); break;
case PERF_RECORD_MMAP2:
ret = machine__process_mmap2_event(machine, event, sample); break;
case PERF_RECORD_FORK:
@@ -2081,15 +2112,16 @@ struct branch_info *sample__resolve_bstack(struct perf_sample *sample,
{
unsigned int i;
const struct branch_stack *bs = sample->branch_stack;
+ struct branch_entry *entries = perf_sample__branch_entries(sample);
struct branch_info *bi = calloc(bs->nr, sizeof(struct branch_info));
if (!bi)
return NULL;
for (i = 0; i < bs->nr; i++) {
- ip__resolve_ams(al->thread, &bi[i].to, bs->entries[i].to);
- ip__resolve_ams(al->thread, &bi[i].from, bs->entries[i].from);
- bi[i].flags = bs->entries[i].flags;
+ ip__resolve_ams(al->thread, &bi[i].to, entries[i].to);
+ ip__resolve_ams(al->thread, &bi[i].from, entries[i].from);
+ bi[i].flags = entries[i].flags;
}
return bi;
}
@@ -2158,6 +2190,303 @@ static int remove_loops(struct branch_entry *l, int nr,
return nr;
}
+static int lbr_callchain_add_kernel_ip(struct thread *thread,
+ struct callchain_cursor *cursor,
+ struct perf_sample *sample,
+ struct symbol **parent,
+ struct addr_location *root_al,
+ u64 branch_from,
+ bool callee, int end)
+{
+ struct ip_callchain *chain = sample->callchain;
+ u8 cpumode = PERF_RECORD_MISC_USER;
+ int err, i;
+
+ if (callee) {
+ for (i = 0; i < end + 1; i++) {
+ err = add_callchain_ip(thread, cursor, parent,
+ root_al, &cpumode, chain->ips[i],
+ false, NULL, NULL, branch_from);
+ if (err)
+ return err;
+ }
+ return 0;
+ }
+
+ for (i = end; i >= 0; i--) {
+ err = add_callchain_ip(thread, cursor, parent,
+ root_al, &cpumode, chain->ips[i],
+ false, NULL, NULL, branch_from);
+ if (err)
+ return err;
+ }
+
+ return 0;
+}
+
+static void save_lbr_cursor_node(struct thread *thread,
+ struct callchain_cursor *cursor,
+ int idx)
+{
+ struct lbr_stitch *lbr_stitch = thread->lbr_stitch;
+
+ if (!lbr_stitch)
+ return;
+
+ if (cursor->pos == cursor->nr) {
+ lbr_stitch->prev_lbr_cursor[idx].valid = false;
+ return;
+ }
+
+ if (!cursor->curr)
+ cursor->curr = cursor->first;
+ else
+ cursor->curr = cursor->curr->next;
+ memcpy(&lbr_stitch->prev_lbr_cursor[idx], cursor->curr,
+ sizeof(struct callchain_cursor_node));
+
+ lbr_stitch->prev_lbr_cursor[idx].valid = true;
+ cursor->pos++;
+}
+
+static int lbr_callchain_add_lbr_ip(struct thread *thread,
+ struct callchain_cursor *cursor,
+ struct perf_sample *sample,
+ struct symbol **parent,
+ struct addr_location *root_al,
+ u64 *branch_from,
+ bool callee)
+{
+ struct branch_stack *lbr_stack = sample->branch_stack;
+ struct branch_entry *entries = perf_sample__branch_entries(sample);
+ u8 cpumode = PERF_RECORD_MISC_USER;
+ int lbr_nr = lbr_stack->nr;
+ struct branch_flags *flags;
+ int err, i;
+ u64 ip;
+
+ /*
+ * The curr and pos are not used in writing session. They are cleared
+ * in callchain_cursor_commit() when the writing session is closed.
+ * Using curr and pos to track the current cursor node.
+ */
+ if (thread->lbr_stitch) {
+ cursor->curr = NULL;
+ cursor->pos = cursor->nr;
+ if (cursor->nr) {
+ cursor->curr = cursor->first;
+ for (i = 0; i < (int)(cursor->nr - 1); i++)
+ cursor->curr = cursor->curr->next;
+ }
+ }
+
+ if (callee) {
+ /* Add LBR ip from first entries.to */
+ ip = entries[0].to;
+ flags = &entries[0].flags;
+ *branch_from = entries[0].from;
+ err = add_callchain_ip(thread, cursor, parent,
+ root_al, &cpumode, ip,
+ true, flags, NULL,
+ *branch_from);
+ if (err)
+ return err;
+
+ /*
+ * The number of cursor node increases.
+ * Move the current cursor node.
+ * But does not need to save current cursor node for entry 0.
+ * It's impossible to stitch the whole LBRs of previous sample.
+ */
+ if (thread->lbr_stitch && (cursor->pos != cursor->nr)) {
+ if (!cursor->curr)
+ cursor->curr = cursor->first;
+ else
+ cursor->curr = cursor->curr->next;
+ cursor->pos++;
+ }
+
+ /* Add LBR ip from entries.from one by one. */
+ for (i = 0; i < lbr_nr; i++) {
+ ip = entries[i].from;
+ flags = &entries[i].flags;
+ err = add_callchain_ip(thread, cursor, parent,
+ root_al, &cpumode, ip,
+ true, flags, NULL,
+ *branch_from);
+ if (err)
+ return err;
+ save_lbr_cursor_node(thread, cursor, i);
+ }
+ return 0;
+ }
+
+ /* Add LBR ip from entries.from one by one. */
+ for (i = lbr_nr - 1; i >= 0; i--) {
+ ip = entries[i].from;
+ flags = &entries[i].flags;
+ err = add_callchain_ip(thread, cursor, parent,
+ root_al, &cpumode, ip,
+ true, flags, NULL,
+ *branch_from);
+ if (err)
+ return err;
+ save_lbr_cursor_node(thread, cursor, i);
+ }
+
+ /* Add LBR ip from first entries.to */
+ ip = entries[0].to;
+ flags = &entries[0].flags;
+ *branch_from = entries[0].from;
+ err = add_callchain_ip(thread, cursor, parent,
+ root_al, &cpumode, ip,
+ true, flags, NULL,
+ *branch_from);
+ if (err)
+ return err;
+
+ return 0;
+}
+
+static int lbr_callchain_add_stitched_lbr_ip(struct thread *thread,
+ struct callchain_cursor *cursor)
+{
+ struct lbr_stitch *lbr_stitch = thread->lbr_stitch;
+ struct callchain_cursor_node *cnode;
+ struct stitch_list *stitch_node;
+ int err;
+
+ list_for_each_entry(stitch_node, &lbr_stitch->lists, node) {
+ cnode = &stitch_node->cursor;
+
+ err = callchain_cursor_append(cursor, cnode->ip,
+ &cnode->ms,
+ cnode->branch,
+ &cnode->branch_flags,
+ cnode->nr_loop_iter,
+ cnode->iter_cycles,
+ cnode->branch_from,
+ cnode->srcline);
+ if (err)
+ return err;
+ }
+ return 0;
+}
+
+static struct stitch_list *get_stitch_node(struct thread *thread)
+{
+ struct lbr_stitch *lbr_stitch = thread->lbr_stitch;
+ struct stitch_list *stitch_node;
+
+ if (!list_empty(&lbr_stitch->free_lists)) {
+ stitch_node = list_first_entry(&lbr_stitch->free_lists,
+ struct stitch_list, node);
+ list_del(&stitch_node->node);
+
+ return stitch_node;
+ }
+
+ return malloc(sizeof(struct stitch_list));
+}
+
+static bool has_stitched_lbr(struct thread *thread,
+ struct perf_sample *cur,
+ struct perf_sample *prev,
+ unsigned int max_lbr,
+ bool callee)
+{
+ struct branch_stack *cur_stack = cur->branch_stack;
+ struct branch_entry *cur_entries = perf_sample__branch_entries(cur);
+ struct branch_stack *prev_stack = prev->branch_stack;
+ struct branch_entry *prev_entries = perf_sample__branch_entries(prev);
+ struct lbr_stitch *lbr_stitch = thread->lbr_stitch;
+ int i, j, nr_identical_branches = 0;
+ struct stitch_list *stitch_node;
+ u64 cur_base, distance;
+
+ if (!cur_stack || !prev_stack)
+ return false;
+
+ /* Find the physical index of the base-of-stack for current sample. */
+ cur_base = max_lbr - cur_stack->nr + cur_stack->hw_idx + 1;
+
+ distance = (prev_stack->hw_idx > cur_base) ? (prev_stack->hw_idx - cur_base) :
+ (max_lbr + prev_stack->hw_idx - cur_base);
+ /* Previous sample has shorter stack. Nothing can be stitched. */
+ if (distance + 1 > prev_stack->nr)
+ return false;
+
+ /*
+ * Check if there are identical LBRs between two samples.
+ * Identicall LBRs must have same from, to and flags values. Also,
+ * they have to be saved in the same LBR registers (same physical
+ * index).
+ *
+ * Starts from the base-of-stack of current sample.
+ */
+ for (i = distance, j = cur_stack->nr - 1; (i >= 0) && (j >= 0); i--, j--) {
+ if ((prev_entries[i].from != cur_entries[j].from) ||
+ (prev_entries[i].to != cur_entries[j].to) ||
+ (prev_entries[i].flags.value != cur_entries[j].flags.value))
+ break;
+ nr_identical_branches++;
+ }
+
+ if (!nr_identical_branches)
+ return false;
+
+ /*
+ * Save the LBRs between the base-of-stack of previous sample
+ * and the base-of-stack of current sample into lbr_stitch->lists.
+ * These LBRs will be stitched later.
+ */
+ for (i = prev_stack->nr - 1; i > (int)distance; i--) {
+
+ if (!lbr_stitch->prev_lbr_cursor[i].valid)
+ continue;
+
+ stitch_node = get_stitch_node(thread);
+ if (!stitch_node)
+ return false;
+
+ memcpy(&stitch_node->cursor, &lbr_stitch->prev_lbr_cursor[i],
+ sizeof(struct callchain_cursor_node));
+
+ if (callee)
+ list_add(&stitch_node->node, &lbr_stitch->lists);
+ else
+ list_add_tail(&stitch_node->node, &lbr_stitch->lists);
+ }
+
+ return true;
+}
+
+static bool alloc_lbr_stitch(struct thread *thread, unsigned int max_lbr)
+{
+ if (thread->lbr_stitch)
+ return true;
+
+ thread->lbr_stitch = zalloc(sizeof(*thread->lbr_stitch));
+ if (!thread->lbr_stitch)
+ goto err;
+
+ thread->lbr_stitch->prev_lbr_cursor = calloc(max_lbr + 1, sizeof(struct callchain_cursor_node));
+ if (!thread->lbr_stitch->prev_lbr_cursor)
+ goto free_lbr_stitch;
+
+ INIT_LIST_HEAD(&thread->lbr_stitch->lists);
+ INIT_LIST_HEAD(&thread->lbr_stitch->free_lists);
+
+ return true;
+
+free_lbr_stitch:
+ zfree(&thread->lbr_stitch);
+err:
+ pr_warning("Failed to allocate space for stitched LBRs. Disable LBR stitch\n");
+ thread->lbr_stitch_enable = false;
+ return false;
+}
+
/*
* Recolve LBR callstack chain sample
* Return:
@@ -2170,12 +2499,16 @@ static int resolve_lbr_callchain_sample(struct thread *thread,
struct perf_sample *sample,
struct symbol **parent,
struct addr_location *root_al,
- int max_stack)
+ int max_stack,
+ unsigned int max_lbr)
{
+ bool callee = (callchain_param.order == ORDER_CALLEE);
struct ip_callchain *chain = sample->callchain;
int chain_nr = min(max_stack, (int)chain->nr), i;
- u8 cpumode = PERF_RECORD_MISC_USER;
- u64 ip, branch_from = 0;
+ struct lbr_stitch *lbr_stitch;
+ bool stitched_lbr = false;
+ u64 branch_from = 0;
+ int err;
for (i = 0; i < chain_nr; i++) {
if (chain->ips[i] == PERF_CONTEXT_USER)
@@ -2183,72 +2516,65 @@ static int resolve_lbr_callchain_sample(struct thread *thread,
}
/* LBR only affects the user callchain */
- if (i != chain_nr) {
- struct branch_stack *lbr_stack = sample->branch_stack;
- int lbr_nr = lbr_stack->nr, j, k;
- bool branch;
- struct branch_flags *flags;
- /*
- * LBR callstack can only get user call chain.
- * The mix_chain_nr is kernel call chain
- * number plus LBR user call chain number.
- * i is kernel call chain number,
- * 1 is PERF_CONTEXT_USER,
- * lbr_nr + 1 is the user call chain number.
- * For details, please refer to the comments
- * in callchain__printf
- */
- int mix_chain_nr = i + 1 + lbr_nr + 1;
+ if (i == chain_nr)
+ return 0;
- for (j = 0; j < mix_chain_nr; j++) {
- int err;
- branch = false;
- flags = NULL;
+ if (thread->lbr_stitch_enable && !sample->no_hw_idx &&
+ (max_lbr > 0) && alloc_lbr_stitch(thread, max_lbr)) {
+ lbr_stitch = thread->lbr_stitch;
- if (callchain_param.order == ORDER_CALLEE) {
- if (j < i + 1)
- ip = chain->ips[j];
- else if (j > i + 1) {
- k = j - i - 2;
- ip = lbr_stack->entries[k].from;
- branch = true;
- flags = &lbr_stack->entries[k].flags;
- } else {
- ip = lbr_stack->entries[0].to;
- branch = true;
- flags = &lbr_stack->entries[0].flags;
- branch_from =
- lbr_stack->entries[0].from;
- }
- } else {
- if (j < lbr_nr) {
- k = lbr_nr - j - 1;
- ip = lbr_stack->entries[k].from;
- branch = true;
- flags = &lbr_stack->entries[k].flags;
- }
- else if (j > lbr_nr)
- ip = chain->ips[i + 1 - (j - lbr_nr)];
- else {
- ip = lbr_stack->entries[0].to;
- branch = true;
- flags = &lbr_stack->entries[0].flags;
- branch_from =
- lbr_stack->entries[0].from;
- }
- }
+ stitched_lbr = has_stitched_lbr(thread, sample,
+ &lbr_stitch->prev_sample,
+ max_lbr, callee);
- err = add_callchain_ip(thread, cursor, parent,
- root_al, &cpumode, ip,
- branch, flags, NULL,
- branch_from);
+ if (!stitched_lbr && !list_empty(&lbr_stitch->lists)) {
+ list_replace_init(&lbr_stitch->lists,
+ &lbr_stitch->free_lists);
+ }
+ memcpy(&lbr_stitch->prev_sample, sample, sizeof(*sample));
+ }
+
+ if (callee) {
+ /* Add kernel ip */
+ err = lbr_callchain_add_kernel_ip(thread, cursor, sample,
+ parent, root_al, branch_from,
+ true, i);
+ if (err)
+ goto error;
+
+ err = lbr_callchain_add_lbr_ip(thread, cursor, sample, parent,
+ root_al, &branch_from, true);
+ if (err)
+ goto error;
+
+ if (stitched_lbr) {
+ err = lbr_callchain_add_stitched_lbr_ip(thread, cursor);
if (err)
- return (err < 0) ? err : 0;
+ goto error;
}
- return 1;
+
+ } else {
+ if (stitched_lbr) {
+ err = lbr_callchain_add_stitched_lbr_ip(thread, cursor);
+ if (err)
+ goto error;
+ }
+ err = lbr_callchain_add_lbr_ip(thread, cursor, sample, parent,
+ root_al, &branch_from, false);
+ if (err)
+ goto error;
+
+ /* Add kernel ip */
+ err = lbr_callchain_add_kernel_ip(thread, cursor, sample,
+ parent, root_al, branch_from,
+ false, i);
+ if (err)
+ goto error;
}
+ return 1;
- return 0;
+error:
+ return (err < 0) ? err : 0;
}
static int find_prev_cpumode(struct ip_callchain *chain, struct thread *thread,
@@ -2281,6 +2607,7 @@ static int thread__resolve_callchain_sample(struct thread *thread,
int max_stack)
{
struct branch_stack *branch = sample->branch_stack;
+ struct branch_entry *entries = perf_sample__branch_entries(sample);
struct ip_callchain *chain = sample->callchain;
int chain_nr = 0;
u8 cpumode = PERF_RECORD_MISC_USER;
@@ -2291,9 +2618,12 @@ static int thread__resolve_callchain_sample(struct thread *thread,
if (chain)
chain_nr = chain->nr;
- if (perf_evsel__has_branch_callstack(evsel)) {
+ if (evsel__has_branch_callstack(evsel)) {
+ struct perf_env *env = evsel__env(evsel);
+
err = resolve_lbr_callchain_sample(thread, cursor, sample, parent,
- root_al, max_stack);
+ root_al, max_stack,
+ !env ? 0 : env->max_branches);
if (err)
return (err < 0) ? err : 0;
}
@@ -2328,7 +2658,7 @@ static int thread__resolve_callchain_sample(struct thread *thread,
for (i = 0; i < nr; i++) {
if (callchain_param.order == ORDER_CALLEE) {
- be[i] = branch->entries[i];
+ be[i] = entries[i];
if (chain == NULL)
continue;
@@ -2347,7 +2677,7 @@ static int thread__resolve_callchain_sample(struct thread *thread,
be[i].from >= chain->ips[first_call] - 8)
first_call++;
} else
- be[i] = branch->entries[branch->nr - i - 1];
+ be[i] = entries[branch->nr - i - 1];
}
memset(iter, 0, sizeof(struct iterations) * nr);
diff --git a/tools/perf/util/machine.h b/tools/perf/util/machine.h
index be0a930eca89..fa1be9ea00fa 100644
--- a/tools/perf/util/machine.h
+++ b/tools/perf/util/machine.h
@@ -128,6 +128,9 @@ int machine__process_switch_event(struct machine *machine,
int machine__process_namespaces_event(struct machine *machine,
union perf_event *event,
struct perf_sample *sample);
+int machine__process_cgroup_event(struct machine *machine,
+ union perf_event *event,
+ struct perf_sample *sample);
int machine__process_mmap_event(struct machine *machine, union perf_event *event,
struct perf_sample *sample);
int machine__process_mmap2_event(struct machine *machine, union perf_event *event,
diff --git a/tools/perf/util/map.c b/tools/perf/util/map.c
index b342f744b1fc..53d96611e6a6 100644
--- a/tools/perf/util/map.c
+++ b/tools/perf/util/map.c
@@ -44,8 +44,8 @@ static inline int is_no_dso_memory(const char *filename)
static inline int is_android_lib(const char *filename)
{
- return !strncmp(filename, "/data/app-lib", 13) ||
- !strncmp(filename, "/system/lib", 11);
+ return strstarts(filename, "/data/app-lib/") ||
+ strstarts(filename, "/system/lib/");
}
static inline bool replace_android_lib(const char *filename, char *newfilename)
@@ -65,7 +65,7 @@ static inline bool replace_android_lib(const char *filename, char *newfilename)
app_abi_length = strlen(app_abi);
- if (!strncmp(filename, "/data/app-lib", 13)) {
+ if (strstarts(filename, "/data/app-lib/")) {
char *apk_path;
if (!app_abi_length)
@@ -89,7 +89,7 @@ static inline bool replace_android_lib(const char *filename, char *newfilename)
return true;
}
- if (!strncmp(filename, "/system/lib/", 12)) {
+ if (strstarts(filename, "/system/lib/")) {
char *ndk, *app;
const char *arch;
size_t ndk_length;
diff --git a/tools/perf/util/mem-events.c b/tools/perf/util/mem-events.c
index aa29589f6904..ea0af0bc4314 100644
--- a/tools/perf/util/mem-events.c
+++ b/tools/perf/util/mem-events.c
@@ -103,6 +103,21 @@ int perf_mem_events__init(void)
return found ? 0 : -ENOENT;
}
+void perf_mem_events__list(void)
+{
+ int j;
+
+ for (j = 0; j < PERF_MEM_EVENTS__MAX; j++) {
+ struct perf_mem_event *e = &perf_mem_events[j];
+
+ fprintf(stderr, "%-13s%-*s%s\n",
+ e->tag,
+ verbose > 0 ? 25 : 0,
+ verbose > 0 ? perf_mem_events__name(j) : "",
+ e->supported ? ": available" : "");
+ }
+}
+
static const char * const tlb_access[] = {
"N/A",
"HIT",
diff --git a/tools/perf/util/mem-events.h b/tools/perf/util/mem-events.h
index f1389bdae7bf..904dad34f7f7 100644
--- a/tools/perf/util/mem-events.h
+++ b/tools/perf/util/mem-events.h
@@ -39,6 +39,8 @@ int perf_mem_events__init(void);
char *perf_mem_events__name(int i);
+void perf_mem_events__list(void);
+
struct mem_info;
int perf_mem__tlb_scnprintf(char *out, size_t sz, struct mem_info *mem_info);
int perf_mem__lvl_scnprintf(char *out, size_t sz, struct mem_info *mem_info);
diff --git a/tools/perf/util/mem2node.c b/tools/perf/util/mem2node.c
index 797d86a1ab09..c84f5841c7ab 100644
--- a/tools/perf/util/mem2node.c
+++ b/tools/perf/util/mem2node.c
@@ -1,5 +1,6 @@
#include <errno.h>
#include <inttypes.h>
+#include <asm/bug.h>
#include <linux/bitmap.h>
#include <linux/kernel.h>
#include <linux/zalloc.h>
@@ -95,7 +96,7 @@ int mem2node__init(struct mem2node *map, struct perf_env *env)
/* Cut unused entries, due to merging. */
tmp_entries = realloc(entries, sizeof(*entries) * j);
- if (tmp_entries)
+ if (tmp_entries || WARN_ON_ONCE(j == 0))
entries = tmp_entries;
for (i = 0; i < j; i++) {
diff --git a/tools/perf/util/metricgroup.c b/tools/perf/util/metricgroup.c
index 02aee946b6c1..9e21aa767e41 100644
--- a/tools/perf/util/metricgroup.c
+++ b/tools/perf/util/metricgroup.c
@@ -22,6 +22,8 @@
#include <linux/string.h>
#include <linux/zalloc.h>
#include <subcmd/parse-options.h>
+#include <api/fs/fs.h>
+#include "util.h"
struct metric_event *metricgroup__lookup(struct rblist *metric_events,
struct evsel *evsel,
@@ -83,51 +85,103 @@ static void metricgroup__rblist_init(struct rblist *metric_events)
struct egroup {
struct list_head nd;
- int idnum;
- const char **ids;
+ struct expr_parse_ctx pctx;
const char *metric_name;
const char *metric_expr;
const char *metric_unit;
+ int runtime;
+ bool has_constraint;
};
+/**
+ * Find a group of events in perf_evlist that correpond to those from a parsed
+ * metric expression. Note, as find_evsel_group is called in the same order as
+ * perf_evlist was constructed, metric_no_merge doesn't need to test for
+ * underfilling a group.
+ * @perf_evlist: a list of events something like: {metric1 leader, metric1
+ * sibling, metric1 sibling}:W,duration_time,{metric2 leader, metric2 sibling,
+ * metric2 sibling}:W,duration_time
+ * @pctx: the parse context for the metric expression.
+ * @metric_no_merge: don't attempt to share events for the metric with other
+ * metrics.
+ * @has_constraint: is there a contraint on the group of events? In which case
+ * the events won't be grouped.
+ * @metric_events: out argument, null terminated array of evsel's associated
+ * with the metric.
+ * @evlist_used: in/out argument, bitmap tracking which evlist events are used.
+ * @return the first metric event or NULL on failure.
+ */
static struct evsel *find_evsel_group(struct evlist *perf_evlist,
- const char **ids,
- int idnum,
- struct evsel **metric_events)
+ struct expr_parse_ctx *pctx,
+ bool metric_no_merge,
+ bool has_constraint,
+ struct evsel **metric_events,
+ unsigned long *evlist_used)
{
- struct evsel *ev;
- int i = 0;
- bool leader_found;
+ struct evsel *ev, *current_leader = NULL;
+ double *val_ptr;
+ int i = 0, matched_events = 0, events_to_match;
+ const int idnum = (int)hashmap__size(&pctx->ids);
+
+ /* duration_time is grouped separately. */
+ if (!has_constraint &&
+ hashmap__find(&pctx->ids, "duration_time", (void **)&val_ptr))
+ events_to_match = idnum - 1;
+ else
+ events_to_match = idnum;
evlist__for_each_entry (perf_evlist, ev) {
- if (!strcmp(ev->name, ids[i])) {
- if (!metric_events[i])
- metric_events[i] = ev;
- i++;
- if (i == idnum)
- break;
- } else {
- if (i + 1 == idnum) {
- /* Discard the whole match and start again */
- i = 0;
- memset(metric_events, 0,
- sizeof(struct evsel *) * idnum);
- continue;
+ /*
+ * Events with a constraint aren't grouped and match the first
+ * events available.
+ */
+ if (has_constraint && ev->weak_group)
+ continue;
+ /* Ignore event if already used and merging is disabled. */
+ if (metric_no_merge && test_bit(ev->idx, evlist_used))
+ continue;
+ if (!has_constraint && ev->leader != current_leader) {
+ /*
+ * Start of a new group, discard the whole match and
+ * start again.
+ */
+ matched_events = 0;
+ memset(metric_events, 0,
+ sizeof(struct evsel *) * idnum);
+ current_leader = ev->leader;
+ }
+ if (hashmap__find(&pctx->ids, ev->name, (void **)&val_ptr)) {
+ if (has_constraint) {
+ /*
+ * Events aren't grouped, ensure the same event
+ * isn't matched from two groups.
+ */
+ for (i = 0; i < matched_events; i++) {
+ if (!strcmp(ev->name,
+ metric_events[i]->name)) {
+ break;
+ }
+ }
+ if (i != matched_events)
+ continue;
}
+ metric_events[matched_events++] = ev;
+ }
+ if (matched_events == events_to_match)
+ break;
+ }
- if (!strcmp(ev->name, ids[i]))
- metric_events[i] = ev;
- else {
- /* Discard the whole match and start again */
- i = 0;
- memset(metric_events, 0,
- sizeof(struct evsel *) * idnum);
- continue;
+ if (events_to_match != idnum) {
+ /* Add the first duration_time. */
+ evlist__for_each_entry(perf_evlist, ev) {
+ if (!strcmp(ev->name, "duration_time")) {
+ metric_events[matched_events++] = ev;
+ break;
}
}
}
- if (i != idnum) {
+ if (matched_events != idnum) {
/* Not whole match */
return NULL;
}
@@ -135,22 +189,16 @@ static struct evsel *find_evsel_group(struct evlist *perf_evlist,
metric_events[idnum] = NULL;
for (i = 0; i < idnum; i++) {
- leader_found = false;
- evlist__for_each_entry(perf_evlist, ev) {
- if (!leader_found && (ev == metric_events[i]))
- leader_found = true;
-
- if (leader_found &&
- !strcmp(ev->name, metric_events[i]->name)) {
- ev->metric_leader = metric_events[i];
- }
- }
+ ev = metric_events[i];
+ ev->metric_leader = ev;
+ set_bit(ev->idx, evlist_used);
}
return metric_events[0];
}
static int metricgroup__setup_events(struct list_head *groups,
+ bool metric_no_merge,
struct evlist *perf_evlist,
struct rblist *metric_events_list)
{
@@ -159,41 +207,62 @@ static int metricgroup__setup_events(struct list_head *groups,
int i = 0;
int ret = 0;
struct egroup *eg;
- struct evsel *evsel;
+ struct evsel *evsel, *tmp;
+ unsigned long *evlist_used;
+
+ evlist_used = bitmap_alloc(perf_evlist->core.nr_entries);
+ if (!evlist_used)
+ return -ENOMEM;
list_for_each_entry (eg, groups, nd) {
struct evsel **metric_events;
- metric_events = calloc(sizeof(void *), eg->idnum + 1);
+ metric_events = calloc(sizeof(void *),
+ hashmap__size(&eg->pctx.ids) + 1);
if (!metric_events) {
ret = -ENOMEM;
break;
}
- evsel = find_evsel_group(perf_evlist, eg->ids, eg->idnum,
- metric_events);
+ evsel = find_evsel_group(perf_evlist, &eg->pctx,
+ metric_no_merge,
+ eg->has_constraint, metric_events,
+ evlist_used);
if (!evsel) {
pr_debug("Cannot resolve %s: %s\n",
eg->metric_name, eg->metric_expr);
+ free(metric_events);
continue;
}
- for (i = 0; i < eg->idnum; i++)
+ for (i = 0; metric_events[i]; i++)
metric_events[i]->collect_stat = true;
me = metricgroup__lookup(metric_events_list, evsel, true);
if (!me) {
ret = -ENOMEM;
+ free(metric_events);
break;
}
expr = malloc(sizeof(struct metric_expr));
if (!expr) {
ret = -ENOMEM;
+ free(metric_events);
break;
}
expr->metric_expr = eg->metric_expr;
expr->metric_name = eg->metric_name;
expr->metric_unit = eg->metric_unit;
expr->metric_events = metric_events;
+ expr->runtime = eg->runtime;
list_add(&expr->nd, &me->head);
}
+
+ evlist__for_each_entry_safe(perf_evlist, tmp, evsel) {
+ if (!test_bit(evsel->idx, evlist_used)) {
+ evlist__remove(perf_evlist, evsel);
+ evsel__delete(evsel);
+ }
+ }
+ bitmap_free(evlist_used);
+
return ret;
}
@@ -399,13 +468,142 @@ void metricgroup__print(bool metrics, bool metricgroups, char *filter,
strlist__delete(metriclist);
}
-static int metricgroup__add_metric(const char *metric, struct strbuf *events,
+static void metricgroup__add_metric_weak_group(struct strbuf *events,
+ struct expr_parse_ctx *ctx)
+{
+ struct hashmap_entry *cur;
+ size_t bkt;
+ bool no_group = true, has_duration = false;
+
+ hashmap__for_each_entry((&ctx->ids), cur, bkt) {
+ pr_debug("found event %s\n", (const char *)cur->key);
+ /*
+ * Duration time maps to a software event and can make
+ * groups not count. Always use it outside a
+ * group.
+ */
+ if (!strcmp(cur->key, "duration_time")) {
+ has_duration = true;
+ continue;
+ }
+ strbuf_addf(events, "%s%s",
+ no_group ? "{" : ",",
+ (const char *)cur->key);
+ no_group = false;
+ }
+ if (!no_group) {
+ strbuf_addf(events, "}:W");
+ if (has_duration)
+ strbuf_addf(events, ",duration_time");
+ } else if (has_duration)
+ strbuf_addf(events, "duration_time");
+}
+
+static void metricgroup__add_metric_non_group(struct strbuf *events,
+ struct expr_parse_ctx *ctx)
+{
+ struct hashmap_entry *cur;
+ size_t bkt;
+ bool first = true;
+
+ hashmap__for_each_entry((&ctx->ids), cur, bkt) {
+ if (!first)
+ strbuf_addf(events, ",");
+ strbuf_addf(events, "%s", (const char *)cur->key);
+ first = false;
+ }
+}
+
+static void metricgroup___watchdog_constraint_hint(const char *name, bool foot)
+{
+ static bool violate_nmi_constraint;
+
+ if (!foot) {
+ pr_warning("Splitting metric group %s into standalone metrics.\n", name);
+ violate_nmi_constraint = true;
+ return;
+ }
+
+ if (!violate_nmi_constraint)
+ return;
+
+ pr_warning("Try disabling the NMI watchdog to comply NO_NMI_WATCHDOG metric constraint:\n"
+ " echo 0 > /proc/sys/kernel/nmi_watchdog\n"
+ " perf stat ...\n"
+ " echo 1 > /proc/sys/kernel/nmi_watchdog\n");
+}
+
+static bool metricgroup__has_constraint(struct pmu_event *pe)
+{
+ if (!pe->metric_constraint)
+ return false;
+
+ if (!strcmp(pe->metric_constraint, "NO_NMI_WATCHDOG") &&
+ sysctl__nmi_watchdog_enabled()) {
+ metricgroup___watchdog_constraint_hint(pe->metric_name, false);
+ return true;
+ }
+
+ return false;
+}
+
+int __weak arch_get_runtimeparam(void)
+{
+ return 1;
+}
+
+static int __metricgroup__add_metric(struct list_head *group_list,
+ struct pmu_event *pe,
+ bool metric_no_group,
+ int runtime)
+{
+ struct egroup *eg;
+
+ eg = malloc(sizeof(*eg));
+ if (!eg)
+ return -ENOMEM;
+
+ expr__ctx_init(&eg->pctx);
+ eg->metric_name = pe->metric_name;
+ eg->metric_expr = pe->metric_expr;
+ eg->metric_unit = pe->unit;
+ eg->runtime = runtime;
+ eg->has_constraint = metric_no_group || metricgroup__has_constraint(pe);
+
+ if (expr__find_other(pe->metric_expr, NULL, &eg->pctx, runtime) < 0) {
+ expr__ctx_clear(&eg->pctx);
+ free(eg);
+ return -EINVAL;
+ }
+
+ if (list_empty(group_list))
+ list_add(&eg->nd, group_list);
+ else {
+ struct list_head *pos;
+
+ /* Place the largest groups at the front. */
+ list_for_each_prev(pos, group_list) {
+ struct egroup *old = list_entry(pos, struct egroup, nd);
+
+ if (hashmap__size(&eg->pctx.ids) <=
+ hashmap__size(&old->pctx.ids))
+ break;
+ }
+ list_add(&eg->nd, pos);
+ }
+
+ return 0;
+}
+
+static int metricgroup__add_metric(const char *metric, bool metric_no_group,
+ struct strbuf *events,
struct list_head *group_list)
{
struct pmu_events_map *map = perf_pmu__find_map(NULL);
struct pmu_event *pe;
- int ret = -EINVAL;
- int i, j;
+ struct egroup *eg;
+ int i, ret;
+ bool has_match = false;
if (!map)
return 0;
@@ -413,64 +611,63 @@ static int metricgroup__add_metric(const char *metric, struct strbuf *events,
for (i = 0; ; i++) {
pe = &map->table[i];
- if (!pe->name && !pe->metric_group && !pe->metric_name)
+ if (!pe->name && !pe->metric_group && !pe->metric_name) {
+ /* End of pmu events. */
+ if (!has_match)
+ return -EINVAL;
break;
+ }
if (!pe->metric_expr)
continue;
if (match_metric(pe->metric_group, metric) ||
match_metric(pe->metric_name, metric)) {
- const char **ids;
- int idnum;
- struct egroup *eg;
- bool no_group = false;
-
+ has_match = true;
pr_debug("metric expr %s for %s\n", pe->metric_expr, pe->metric_name);
- if (expr__find_other(pe->metric_expr,
- NULL, &ids, &idnum) < 0)
- continue;
- if (events->len > 0)
- strbuf_addf(events, ",");
- for (j = 0; j < idnum; j++) {
- pr_debug("found event %s\n", ids[j]);
- /*
- * Duration time maps to a software event and can make
- * groups not count. Always use it outside a
- * group.
+ if (!strstr(pe->metric_expr, "?")) {
+ ret = __metricgroup__add_metric(group_list,
+ pe,
+ metric_no_group,
+ 1);
+ if (ret)
+ return ret;
+ } else {
+ int j, count;
+
+ count = arch_get_runtimeparam();
+
+ /* This loop is added to create multiple
+ * events depend on count value and add
+ * those events to group_list.
*/
- if (!strcmp(ids[j], "duration_time")) {
- if (j > 0)
- strbuf_addf(events, "}:W,");
- strbuf_addf(events, "duration_time");
- no_group = true;
- continue;
+
+ for (j = 0; j < count; j++) {
+ ret = __metricgroup__add_metric(
+ group_list, pe,
+ metric_no_group, j);
+ if (ret)
+ return ret;
}
- strbuf_addf(events, "%s%s",
- j == 0 || no_group ? "{" : ",",
- ids[j]);
- no_group = false;
}
- if (!no_group)
- strbuf_addf(events, "}:W");
+ }
+ }
+ list_for_each_entry(eg, group_list, nd) {
+ if (events->len > 0)
+ strbuf_addf(events, ",");
- eg = malloc(sizeof(struct egroup));
- if (!eg) {
- ret = -ENOMEM;
- break;
- }
- eg->ids = ids;
- eg->idnum = idnum;
- eg->metric_name = pe->metric_name;
- eg->metric_expr = pe->metric_expr;
- eg->metric_unit = pe->unit;
- list_add_tail(&eg->nd, group_list);
- ret = 0;
+ if (eg->has_constraint) {
+ metricgroup__add_metric_non_group(events,
+ &eg->pctx);
+ } else {
+ metricgroup__add_metric_weak_group(events,
+ &eg->pctx);
}
}
- return ret;
+ return 0;
}
-static int metricgroup__add_metric_list(const char *list, struct strbuf *events,
+static int metricgroup__add_metric_list(const char *list, bool metric_no_group,
+ struct strbuf *events,
struct list_head *group_list)
{
char *llist, *nlist, *p;
@@ -485,7 +682,8 @@ static int metricgroup__add_metric_list(const char *list, struct strbuf *events,
strbuf_addf(events, "%s", "");
while ((p = strsep(&llist, ",")) != NULL) {
- ret = metricgroup__add_metric(p, events, group_list);
+ ret = metricgroup__add_metric(p, metric_no_group, events,
+ group_list);
if (ret == -EINVAL) {
fprintf(stderr, "Cannot find metric or group `%s'\n",
p);
@@ -493,26 +691,29 @@ static int metricgroup__add_metric_list(const char *list, struct strbuf *events,
}
}
free(nlist);
+
+ if (!ret)
+ metricgroup___watchdog_constraint_hint(NULL, true);
+
return ret;
}
static void metricgroup__free_egroups(struct list_head *group_list)
{
struct egroup *eg, *egtmp;
- int i;
list_for_each_entry_safe (eg, egtmp, group_list, nd) {
- for (i = 0; i < eg->idnum; i++)
- zfree(&eg->ids[i]);
- zfree(&eg->ids);
+ expr__ctx_clear(&eg->pctx);
list_del_init(&eg->nd);
free(eg);
}
}
int metricgroup__parse_groups(const struct option *opt,
- const char *str,
- struct rblist *metric_events)
+ const char *str,
+ bool metric_no_group,
+ bool metric_no_merge,
+ struct rblist *metric_events)
{
struct parse_events_error parse_error;
struct evlist *perf_evlist = *(struct evlist **)opt->value;
@@ -522,7 +723,8 @@ int metricgroup__parse_groups(const struct option *opt,
if (metric_events->nr_entries == 0)
metricgroup__rblist_init(metric_events);
- ret = metricgroup__add_metric_list(str, &extra_events, &group_list);
+ ret = metricgroup__add_metric_list(str, metric_no_group,
+ &extra_events, &group_list);
if (ret)
return ret;
pr_debug("adding %s\n", extra_events.buf);
@@ -533,8 +735,8 @@ int metricgroup__parse_groups(const struct option *opt,
goto out;
}
strbuf_release(&extra_events);
- ret = metricgroup__setup_events(&group_list, perf_evlist,
- metric_events);
+ ret = metricgroup__setup_events(&group_list, metric_no_merge,
+ perf_evlist, metric_events);
out:
metricgroup__free_egroups(&group_list);
return ret;
diff --git a/tools/perf/util/metricgroup.h b/tools/perf/util/metricgroup.h
index 475c7f912864..287850bcdeca 100644
--- a/tools/perf/util/metricgroup.h
+++ b/tools/perf/util/metricgroup.h
@@ -22,16 +22,20 @@ struct metric_expr {
const char *metric_name;
const char *metric_unit;
struct evsel **metric_events;
+ int runtime;
};
struct metric_event *metricgroup__lookup(struct rblist *metric_events,
struct evsel *evsel,
bool create);
int metricgroup__parse_groups(const struct option *opt,
- const char *str,
- struct rblist *metric_events);
+ const char *str,
+ bool metric_no_group,
+ bool metric_no_merge,
+ struct rblist *metric_events);
void metricgroup__print(bool metrics, bool groups, char *filter,
bool raw, bool details);
bool metricgroup__has_metric(const char *metric);
+int arch_get_runtimeparam(void);
#endif
diff --git a/tools/perf/util/mmap.c b/tools/perf/util/mmap.c
index 3b664fa673a6..ab7108d22428 100644
--- a/tools/perf/util/mmap.c
+++ b/tools/perf/util/mmap.c
@@ -98,20 +98,29 @@ static int perf_mmap__aio_bind(struct mmap *map, int idx, int cpu, int affinity)
{
void *data;
size_t mmap_len;
- unsigned long node_mask;
+ unsigned long *node_mask;
+ unsigned long node_index;
+ int err = 0;
if (affinity != PERF_AFFINITY_SYS && cpu__max_node() > 1) {
data = map->aio.data[idx];
mmap_len = mmap__mmap_len(map);
- node_mask = 1UL << cpu__get_node(cpu);
- if (mbind(data, mmap_len, MPOL_BIND, &node_mask, 1, 0)) {
- pr_err("Failed to bind [%p-%p] AIO buffer to node %d: error %m\n",
- data, data + mmap_len, cpu__get_node(cpu));
+ node_index = cpu__get_node(cpu);
+ node_mask = bitmap_alloc(node_index + 1);
+ if (!node_mask) {
+ pr_err("Failed to allocate node mask for mbind: error %m\n");
return -1;
}
+ set_bit(node_index, node_mask);
+ if (mbind(data, mmap_len, MPOL_BIND, node_mask, node_index + 1 + 1, 0)) {
+ pr_err("Failed to bind [%p-%p] AIO buffer to node %lu: error %m\n",
+ data, data + mmap_len, node_index);
+ err = -1;
+ }
+ bitmap_free(node_mask);
}
- return 0;
+ return err;
}
#else /* !HAVE_LIBNUMA_SUPPORT */
static int perf_mmap__aio_alloc(struct mmap *map, int idx)
diff --git a/tools/perf/util/ordered-events.c b/tools/perf/util/ordered-events.c
index 359db2b1fcef..48c8f609441b 100644
--- a/tools/perf/util/ordered-events.c
+++ b/tools/perf/util/ordered-events.c
@@ -314,7 +314,7 @@ static int __ordered_events__flush(struct ordered_events *oe, enum oe_flush how,
case OE_FLUSH__NONE:
default:
break;
- };
+ }
pr_oe_time(oe->next_flush, "next_flush - ordered_events__flush PRE %s, nr_events %u\n",
str[how], oe->nr_events);
diff --git a/tools/perf/util/ordered-events.h b/tools/perf/util/ordered-events.h
index 0920fb0ec6cc..75345946c4b9 100644
--- a/tools/perf/util/ordered-events.h
+++ b/tools/perf/util/ordered-events.h
@@ -29,7 +29,7 @@ typedef int (*ordered_events__deliver_t)(struct ordered_events *oe,
struct ordered_events_buffer {
struct list_head list;
- struct ordered_event event[0];
+ struct ordered_event event[];
};
struct ordered_events {
diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c
index a7dc0b096974..3decbb203846 100644
--- a/tools/perf/util/parse-events.c
+++ b/tools/perf/util/parse-events.c
@@ -26,7 +26,7 @@
#include <api/fs/tracing_path.h>
#include <perf/cpumap.h>
#include "parse-events-bison.h"
-#define YY_EXTRA_TYPE int
+#define YY_EXTRA_TYPE void*
#include "parse-events-flex.h"
#include "pmu.h"
#include "thread_map.h"
@@ -36,6 +36,7 @@
#include "metricgroup.h"
#include "util/evsel_config.h"
#include "util/event.h"
+#include "util/pfm.h"
#define MAX_NAME_LEN 100
@@ -204,7 +205,8 @@ void parse_events__handle_error(struct parse_events_error *err, int idx,
err->help = help;
break;
default:
- WARN_ONCE(1, "WARNING: multiple event parsing errors\n");
+ pr_debug("Multiple errors dropping message: %s (%s)\n",
+ err->str, err->help);
free(err->str);
err->str = str;
free(err->help);
@@ -344,6 +346,7 @@ static char *get_config_name(struct list_head *head_terms)
static struct evsel *
__add_event(struct list_head *list, int *idx,
struct perf_event_attr *attr,
+ bool init_attr,
char *name, struct perf_pmu *pmu,
struct list_head *config_terms, bool auto_merge_stats,
const char *cpu_list)
@@ -352,9 +355,10 @@ __add_event(struct list_head *list, int *idx,
struct perf_cpu_map *cpus = pmu ? pmu->cpus :
cpu_list ? perf_cpu_map__new(cpu_list) : NULL;
- event_attr_init(attr);
+ if (init_attr)
+ event_attr_init(attr);
- evsel = perf_evsel__new_idx(attr, *idx);
+ evsel = evsel__new_idx(attr, *idx);
if (!evsel)
return NULL;
@@ -370,15 +374,25 @@ __add_event(struct list_head *list, int *idx,
if (config_terms)
list_splice(config_terms, &evsel->config_terms);
- list_add_tail(&evsel->core.node, list);
+ if (list)
+ list_add_tail(&evsel->core.node, list);
+
return evsel;
}
+struct evsel *parse_events__add_event(int idx, struct perf_event_attr *attr,
+ char *name, struct perf_pmu *pmu)
+{
+ return __add_event(NULL, &idx, attr, false, name, pmu, NULL, false,
+ NULL);
+}
+
static int add_event(struct list_head *list, int *idx,
struct perf_event_attr *attr, char *name,
struct list_head *config_terms)
{
- return __add_event(list, idx, attr, name, NULL, config_terms, false, NULL) ? 0 : -ENOMEM;
+ return __add_event(list, idx, attr, true, name, NULL, config_terms,
+ false, NULL) ? 0 : -ENOMEM;
}
static int add_event_tool(struct list_head *list, int *idx,
@@ -390,7 +404,8 @@ static int add_event_tool(struct list_head *list, int *idx,
.config = PERF_COUNT_SW_DUMMY,
};
- evsel = __add_event(list, idx, &attr, NULL, NULL, NULL, false, "0");
+ evsel = __add_event(list, idx, &attr, true, NULL, NULL, NULL, false,
+ "0");
if (!evsel)
return -ENOMEM;
evsel->tool_event = tool_event;
@@ -399,13 +414,13 @@ static int add_event_tool(struct list_head *list, int *idx,
return 0;
}
-static int parse_aliases(char *str, const char *names[][PERF_EVSEL__MAX_ALIASES], int size)
+static int parse_aliases(char *str, const char *names[][EVSEL__MAX_ALIASES], int size)
{
int i, j;
int n, longest = -1;
for (i = 0; i < size; i++) {
- for (j = 0; j < PERF_EVSEL__MAX_ALIASES && names[i][j]; j++) {
+ for (j = 0; j < EVSEL__MAX_ALIASES && names[i][j]; j++) {
n = strlen(names[i][j]);
if (n > longest && !strncasecmp(str, names[i][j], n))
longest = n;
@@ -444,8 +459,7 @@ int parse_events_add_cache(struct list_head *list, int *idx,
* No fallback - if we cannot get a clear cache type
* then bail out:
*/
- cache_type = parse_aliases(type, perf_evsel__hw_cache,
- PERF_COUNT_HW_CACHE_MAX);
+ cache_type = parse_aliases(type, evsel__hw_cache, PERF_COUNT_HW_CACHE_MAX);
if (cache_type == -1)
return -EINVAL;
@@ -458,17 +472,17 @@ int parse_events_add_cache(struct list_head *list, int *idx,
n += snprintf(name + n, MAX_NAME_LEN - n, "-%s", str);
if (cache_op == -1) {
- cache_op = parse_aliases(str, perf_evsel__hw_cache_op,
+ cache_op = parse_aliases(str, evsel__hw_cache_op,
PERF_COUNT_HW_CACHE_OP_MAX);
if (cache_op >= 0) {
- if (!perf_evsel__is_cache_op_valid(cache_type, cache_op))
+ if (!evsel__is_cache_op_valid(cache_type, cache_op))
return -EINVAL;
continue;
}
}
if (cache_result == -1) {
- cache_result = parse_aliases(str, perf_evsel__hw_cache_result,
+ cache_result = parse_aliases(str, evsel__hw_cache_result,
PERF_COUNT_HW_CACHE_RESULT_MAX);
if (cache_result >= 0)
continue;
@@ -538,9 +552,8 @@ static int add_tracepoint(struct list_head *list, int *idx,
struct parse_events_error *err,
struct list_head *head_config)
{
- struct evsel *evsel;
+ struct evsel *evsel = evsel__newtp_idx(sys_name, evt_name, (*idx)++);
- evsel = perf_evsel__newtp_idx(sys_name, evt_name, (*idx)++);
if (IS_ERR(evsel)) {
tracepoint_error(err, PTR_ERR(evsel), sys_name, evt_name);
return PTR_ERR(evsel);
@@ -1214,14 +1227,14 @@ static int get_config_terms(struct list_head *head_config,
struct list_head *head_terms __maybe_unused)
{
#define ADD_CONFIG_TERM(__type, __weak) \
- struct perf_evsel_config_term *__t; \
+ struct evsel_config_term *__t; \
\
__t = zalloc(sizeof(*__t)); \
if (!__t) \
return -ENOMEM; \
\
INIT_LIST_HEAD(&__t->list); \
- __t->type = PERF_EVSEL__CONFIG_TERM_ ## __type; \
+ __t->type = EVSEL__CONFIG_TERM_ ## __type; \
__t->weak = __weak; \
list_add_tail(&__t->list, head_terms)
@@ -1312,7 +1325,7 @@ do { \
}
/*
- * Add PERF_EVSEL__CONFIG_TERM_CFG_CHG where cfg_chg will have a bit set for
+ * Add EVSEL__CONFIG_TERM_CFG_CHG where cfg_chg will have a bit set for
* each bit of attr->config that the user has changed.
*/
static int get_config_chgs(struct perf_pmu *pmu, struct list_head *head_config,
@@ -1400,10 +1413,10 @@ int parse_events_add_tool(struct parse_events_state *parse_state,
static bool config_term_percore(struct list_head *config_terms)
{
- struct perf_evsel_config_term *term;
+ struct evsel_config_term *term;
list_for_each_entry(term, config_terms, list) {
- if (term->type == PERF_EVSEL__CONFIG_TERM_PERCORE)
+ if (term->type == EVSEL__CONFIG_TERM_PERCORE)
return term->val.percore;
}
@@ -1424,6 +1437,19 @@ int parse_events_add_pmu(struct parse_events_state *parse_state,
bool use_uncore_alias;
LIST_HEAD(config_terms);
+ if (verbose > 1) {
+ fprintf(stderr, "Attempting to add event pmu '%s' with '",
+ name);
+ if (head_config) {
+ struct parse_events_term *term;
+
+ list_for_each_entry(term, head_config, list) {
+ fprintf(stderr, "%s,", term->config);
+ }
+ }
+ fprintf(stderr, "' that may result in non-fatal errors\n");
+ }
+
pmu = perf_pmu__find(name);
if (!pmu) {
char *err_str;
@@ -1446,10 +1472,10 @@ int parse_events_add_pmu(struct parse_events_state *parse_state,
if (!head_config) {
attr.type = pmu->type;
- evsel = __add_event(list, &parse_state->idx, &attr, NULL, pmu, NULL,
- auto_merge_stats, NULL);
+ evsel = __add_event(list, &parse_state->idx, &attr, true, NULL,
+ pmu, NULL, auto_merge_stats, NULL);
if (evsel) {
- evsel->pmu_name = name;
+ evsel->pmu_name = name ? strdup(name) : NULL;
evsel->use_uncore_alias = use_uncore_alias;
return 0;
} else {
@@ -1460,6 +1486,19 @@ int parse_events_add_pmu(struct parse_events_state *parse_state,
if (perf_pmu__check_alias(pmu, head_config, &info))
return -EINVAL;
+ if (verbose > 1) {
+ fprintf(stderr, "After aliases, add event pmu '%s' with '",
+ name);
+ if (head_config) {
+ struct parse_events_term *term;
+
+ list_for_each_entry(term, head_config, list) {
+ fprintf(stderr, "%s,", term->config);
+ }
+ }
+ fprintf(stderr, "' that may result in non-fatal errors\n");
+ }
+
/*
* Configure hardcoded terms first, no need to check
* return value when called with fail == 0 ;)
@@ -1478,16 +1517,18 @@ int parse_events_add_pmu(struct parse_events_state *parse_state,
return -ENOMEM;
if (perf_pmu__config(pmu, &attr, head_config, parse_state->error)) {
- struct perf_evsel_config_term *pos, *tmp;
+ struct evsel_config_term *pos, *tmp;
list_for_each_entry_safe(pos, tmp, &config_terms, list) {
list_del_init(&pos->list);
+ if (pos->free_str)
+ zfree(&pos->val.str);
free(pos);
}
return -EINVAL;
}
- evsel = __add_event(list, &parse_state->idx, &attr,
+ evsel = __add_event(list, &parse_state->idx, &attr, true,
get_config_name(head_config), pmu,
&config_terms, auto_merge_stats, NULL);
if (evsel) {
@@ -1497,7 +1538,7 @@ int parse_events_add_pmu(struct parse_events_state *parse_state,
evsel->snapshot = info.snapshot;
evsel->metric_expr = info.metric_expr;
evsel->metric_name = info.metric_name;
- evsel->pmu_name = name;
+ evsel->pmu_name = name ? strdup(name) : NULL;
evsel->use_uncore_alias = use_uncore_alias;
evsel->percore = config_term_percore(&evsel->config_terms);
}
@@ -1547,7 +1588,7 @@ int parse_events_multi_pmu_add(struct parse_events_state *parse_state,
if (!parse_events_add_pmu(parse_state, list,
pmu->name, head,
true, true)) {
- pr_debug("%s -> %s/%s/\n", config,
+ pr_debug("%s -> %s/%s/\n", str,
pmu->name, alias->str);
ok++;
}
@@ -1629,12 +1670,11 @@ parse_events__set_leader_for_uncore_aliase(char *name, struct list_head *list,
* event. That can be used to distinguish the leader from
* other members, even they have the same event name.
*/
- if ((leader != evsel) && (leader->pmu_name == evsel->pmu_name)) {
+ if ((leader != evsel) &&
+ !strcmp(leader->pmu_name, evsel->pmu_name)) {
is_leader = false;
continue;
}
- /* The name is always alias name */
- WARN_ON(strcmp(leader->name, evsel->name));
/* Store the leader event for each PMU */
leaders[nr_pmu++] = (uintptr_t) evsel;
@@ -1870,7 +1910,7 @@ int parse_events__modifier_event(struct list_head *list, char *str, bool add)
evsel->precise_max = mod.precise_max;
evsel->weak_group = mod.weak;
- if (perf_evsel__is_group_leader(evsel))
+ if (evsel__is_group_leader(evsel))
evsel->core.attr.pinned = mod.pinned;
}
@@ -2001,13 +2041,14 @@ perf_pmu__parse_check(const char *name)
return r ? r->type : PMU_EVENT_SYMBOL_ERR;
}
-static int parse_events__scanner(const char *str, void *parse_state, int start_token)
+static int parse_events__scanner(const char *str,
+ struct parse_events_state *parse_state)
{
YY_BUFFER_STATE buffer;
void *scanner;
int ret;
- ret = parse_events_lex_init_extra(start_token, &scanner);
+ ret = parse_events_lex_init_extra(parse_state, &scanner);
if (ret)
return ret;
@@ -2015,6 +2056,7 @@ static int parse_events__scanner(const char *str, void *parse_state, int start_t
#ifdef PARSER_DEBUG
parse_events_debug = 1;
+ parse_events_set_debug(1, scanner);
#endif
ret = parse_events_parse(parse_state, scanner);
@@ -2030,11 +2072,12 @@ static int parse_events__scanner(const char *str, void *parse_state, int start_t
int parse_events_terms(struct list_head *terms, const char *str)
{
struct parse_events_state parse_state = {
- .terms = NULL,
+ .terms = NULL,
+ .stoken = PE_START_TERMS,
};
int ret;
- ret = parse_events__scanner(str, &parse_state, PE_START_TERMS);
+ ret = parse_events__scanner(str, &parse_state);
if (!ret) {
list_splice(parse_state.terms, terms);
zfree(&parse_state.terms);
@@ -2053,10 +2096,11 @@ int parse_events(struct evlist *evlist, const char *str,
.idx = evlist->core.nr_entries,
.error = err,
.evlist = evlist,
+ .stoken = PE_START_EVENTS,
};
int ret;
- ret = parse_events__scanner(str, &parse_state, PE_START_EVENTS);
+ ret = parse_events__scanner(str, &parse_state);
perf_pmu__parse_cleanup();
if (!ret && list_empty(&parse_state.list)) {
@@ -2190,6 +2234,29 @@ int parse_events_option(const struct option *opt, const char *str,
return ret;
}
+int parse_events_option_new_evlist(const struct option *opt, const char *str, int unset)
+{
+ struct evlist **evlistp = opt->value;
+ int ret;
+
+ if (*evlistp == NULL) {
+ *evlistp = evlist__new();
+
+ if (*evlistp == NULL) {
+ fprintf(stderr, "Not enough memory to create evlist\n");
+ return -1;
+ }
+ }
+
+ ret = parse_events_option(opt, str, unset);
+ if (ret) {
+ evlist__delete(*evlistp);
+ *evlistp = NULL;
+ }
+
+ return ret;
+}
+
static int
foreach_evsel_in_last_glob(struct evlist *evlist,
int (*func)(struct evsel *evsel,
@@ -2237,7 +2304,7 @@ static int set_filter(struct evsel *evsel, const void *arg)
}
if (evsel->core.attr.type == PERF_TYPE_TRACEPOINT) {
- if (perf_evsel__append_tp_filter(evsel, str) < 0) {
+ if (evsel__append_tp_filter(evsel, str) < 0) {
fprintf(stderr,
"not enough memory to hold filter string\n");
return -1;
@@ -2262,7 +2329,7 @@ static int set_filter(struct evsel *evsel, const void *arg)
return -1;
}
- if (perf_evsel__append_addr_filter(evsel, str) < 0) {
+ if (evsel__append_addr_filter(evsel, str) < 0) {
fprintf(stderr,
"not enough memory to hold filter string\n");
return -1;
@@ -2293,7 +2360,7 @@ static int add_exclude_perf_filter(struct evsel *evsel,
snprintf(new_filter, sizeof(new_filter), "common_pid != %d", getpid());
- if (perf_evsel__append_tp_filter(evsel, new_filter) < 0) {
+ if (evsel__append_tp_filter(evsel, new_filter) < 0) {
fprintf(stderr,
"not enough memory to hold filter string\n");
return -1;
@@ -2603,12 +2670,11 @@ restart:
for (type = 0; type < PERF_COUNT_HW_CACHE_MAX; type++) {
for (op = 0; op < PERF_COUNT_HW_CACHE_OP_MAX; op++) {
/* skip invalid cache type */
- if (!perf_evsel__is_cache_op_valid(type, op))
+ if (!evsel__is_cache_op_valid(type, op))
continue;
for (i = 0; i < PERF_COUNT_HW_CACHE_RESULT_MAX; i++) {
- __perf_evsel__hw_cache_type_op_res_name(type, op, i,
- name, sizeof(name));
+ __evsel__hw_cache_type_op_res_name(type, op, i, name, sizeof(name));
if (event_glob != NULL && !strglobmatch(name, event_glob))
continue;
@@ -2794,6 +2860,8 @@ void print_events(const char *event_glob, bool name_only, bool quiet_flag,
print_sdt_events(NULL, NULL, name_only);
metricgroup__print(true, true, NULL, name_only, details_flag);
+
+ print_libpfm_events(name_only, long_desc);
}
int parse_events__is_hardcoded_term(struct parse_events_term *term)
diff --git a/tools/perf/util/parse-events.h b/tools/perf/util/parse-events.h
index 27596cbd0ba0..1fe23a2f9b36 100644
--- a/tools/perf/util/parse-events.h
+++ b/tools/perf/util/parse-events.h
@@ -17,6 +17,7 @@ struct evlist;
struct parse_events_error;
struct option;
+struct perf_pmu;
struct tracepoint_path {
char *system;
@@ -31,6 +32,7 @@ bool have_tracepoints(struct list_head *evlist);
const char *event_type(int type);
int parse_events_option(const struct option *opt, const char *str, int unset);
+int parse_events_option_new_evlist(const struct option *opt, const char *str, int unset);
int parse_events(struct evlist *evlist, const char *str,
struct parse_events_error *error);
int parse_events_terms(struct list_head *terms, const char *str);
@@ -127,6 +129,7 @@ struct parse_events_state {
struct parse_events_error *error;
struct evlist *evlist;
struct list_head *terms;
+ int stoken;
};
void parse_events__handle_error(struct parse_events_error *err, int idx,
@@ -186,6 +189,9 @@ int parse_events_add_pmu(struct parse_events_state *parse_state,
bool auto_merge_stats,
bool use_alias);
+struct evsel *parse_events__add_event(int idx, struct perf_event_attr *attr,
+ char *name, struct perf_pmu *pmu);
+
int parse_events_multi_pmu_add(struct parse_events_state *parse_state,
char *str,
struct list_head **listp);
diff --git a/tools/perf/util/parse-events.l b/tools/perf/util/parse-events.l
index 7b1c8ee537cf..002802e17059 100644
--- a/tools/perf/util/parse-events.l
+++ b/tools/perf/util/parse-events.l
@@ -209,10 +209,10 @@ modifier_bp [rwx]{1,3}
%%
%{
- {
- int start_token;
+ struct parse_events_state *_parse_state = parse_events_get_extra(yyscanner);
- start_token = parse_events_get_extra(yyscanner);
+ {
+ int start_token = _parse_state->stoken;
if (start_token == PE_START_TERMS)
BEGIN(config);
@@ -220,7 +220,7 @@ modifier_bp [rwx]{1,3}
BEGIN(event);
if (start_token) {
- parse_events_set_extra(NULL, yyscanner);
+ _parse_state->stoken = 0;
/*
* The flex parser does not init locations variable
* via the scan_string interface, so we need do the
@@ -252,7 +252,9 @@ modifier_bp [rwx]{1,3}
BEGIN(INITIAL);
REWIND(0);
}
-
+, {
+ return ',';
+ }
}
<array>{
@@ -286,6 +288,7 @@ no-overwrite { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_NOOVERWRITE); }
percore { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_PERCORE); }
aux-output { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_AUX_OUTPUT); }
aux-sample-size { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_AUX_SAMPLE_SIZE); }
+r{num_raw_hex} { return raw(yyscanner); }
, { return ','; }
"/" { BEGIN(INITIAL); return '/'; }
{name_minus} { return str(yyscanner, PE_NAME); }
@@ -342,11 +345,13 @@ bpf-output { return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_BPF_OUT
* Because the prefix cycles is mixed up with cpu-cycles.
* loads and stores are mixed up with cache event
*/
-cycles-ct { return str(yyscanner, PE_KERNEL_PMU_EVENT); }
-cycles-t { return str(yyscanner, PE_KERNEL_PMU_EVENT); }
-mem-loads { return str(yyscanner, PE_KERNEL_PMU_EVENT); }
-mem-stores { return str(yyscanner, PE_KERNEL_PMU_EVENT); }
-topdown-[a-z-]+ { return str(yyscanner, PE_KERNEL_PMU_EVENT); }
+cycles-ct |
+cycles-t |
+mem-loads |
+mem-stores |
+topdown-[a-z-]+ |
+tx-capacity-[a-z-]+ |
+el-capacity-[a-z-]+ { return str(yyscanner, PE_KERNEL_PMU_EVENT); }
L1-dcache|l1-d|l1d|L1-data |
L1-icache|l1-i|l1i|L1-instruction |
diff --git a/tools/perf/util/parse-events.y b/tools/perf/util/parse-events.y
index 94f8bcd83582..c4ca932d092d 100644
--- a/tools/perf/util/parse-events.y
+++ b/tools/perf/util/parse-events.y
@@ -44,7 +44,7 @@ static void free_list_evsel(struct list_head* list_evsel)
list_for_each_entry_safe(evsel, tmp, list_evsel, core.node) {
list_del_init(&evsel->core.node);
- perf_evsel__delete(evsel);
+ evsel__delete(evsel);
}
free(list_evsel);
}
@@ -326,6 +326,7 @@ PE_NAME opt_pmu_config
}
parse_events_terms__delete($2);
parse_events_terms__delete(orig_terms);
+ free(pattern);
free($1);
$$ = list;
#undef CLEANUP_YYABORT
@@ -706,6 +707,15 @@ event_term
}
event_term:
+PE_RAW
+{
+ struct parse_events_term *term;
+
+ ABORT_ON(parse_events_term__num(&term, PARSE_EVENTS__TERM_TYPE_CONFIG,
+ NULL, $1, false, &@1, NULL));
+ $$ = term;
+}
+|
PE_NAME '=' PE_NAME
{
struct parse_events_term *term;
diff --git a/tools/perf/util/perf_api_probe.c b/tools/perf/util/perf_api_probe.c
new file mode 100644
index 000000000000..1337965673d7
--- /dev/null
+++ b/tools/perf/util/perf_api_probe.c
@@ -0,0 +1,164 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#include "perf-sys.h"
+#include "util/cloexec.h"
+#include "util/evlist.h"
+#include "util/evsel.h"
+#include "util/parse-events.h"
+#include "util/perf_api_probe.h"
+#include <perf/cpumap.h>
+#include <errno.h>
+
+typedef void (*setup_probe_fn_t)(struct evsel *evsel);
+
+static int perf_do_probe_api(setup_probe_fn_t fn, int cpu, const char *str)
+{
+ struct evlist *evlist;
+ struct evsel *evsel;
+ unsigned long flags = perf_event_open_cloexec_flag();
+ int err = -EAGAIN, fd;
+ static pid_t pid = -1;
+
+ evlist = evlist__new();
+ if (!evlist)
+ return -ENOMEM;
+
+ if (parse_events(evlist, str, NULL))
+ goto out_delete;
+
+ evsel = evlist__first(evlist);
+
+ while (1) {
+ fd = sys_perf_event_open(&evsel->core.attr, pid, cpu, -1, flags);
+ if (fd < 0) {
+ if (pid == -1 && errno == EACCES) {
+ pid = 0;
+ continue;
+ }
+ goto out_delete;
+ }
+ break;
+ }
+ close(fd);
+
+ fn(evsel);
+
+ fd = sys_perf_event_open(&evsel->core.attr, pid, cpu, -1, flags);
+ if (fd < 0) {
+ if (errno == EINVAL)
+ err = -EINVAL;
+ goto out_delete;
+ }
+ close(fd);
+ err = 0;
+
+out_delete:
+ evlist__delete(evlist);
+ return err;
+}
+
+static bool perf_probe_api(setup_probe_fn_t fn)
+{
+ const char *try[] = {"cycles:u", "instructions:u", "cpu-clock:u", NULL};
+ struct perf_cpu_map *cpus;
+ int cpu, ret, i = 0;
+
+ cpus = perf_cpu_map__new(NULL);
+ if (!cpus)
+ return false;
+ cpu = cpus->map[0];
+ perf_cpu_map__put(cpus);
+
+ do {
+ ret = perf_do_probe_api(fn, cpu, try[i++]);
+ if (!ret)
+ return true;
+ } while (ret == -EAGAIN && try[i]);
+
+ return false;
+}
+
+static void perf_probe_sample_identifier(struct evsel *evsel)
+{
+ evsel->core.attr.sample_type |= PERF_SAMPLE_IDENTIFIER;
+}
+
+static void perf_probe_comm_exec(struct evsel *evsel)
+{
+ evsel->core.attr.comm_exec = 1;
+}
+
+static void perf_probe_context_switch(struct evsel *evsel)
+{
+ evsel->core.attr.context_switch = 1;
+}
+
+bool perf_can_sample_identifier(void)
+{
+ return perf_probe_api(perf_probe_sample_identifier);
+}
+
+bool perf_can_comm_exec(void)
+{
+ return perf_probe_api(perf_probe_comm_exec);
+}
+
+bool perf_can_record_switch_events(void)
+{
+ return perf_probe_api(perf_probe_context_switch);
+}
+
+bool perf_can_record_cpu_wide(void)
+{
+ struct perf_event_attr attr = {
+ .type = PERF_TYPE_SOFTWARE,
+ .config = PERF_COUNT_SW_CPU_CLOCK,
+ .exclude_kernel = 1,
+ };
+ struct perf_cpu_map *cpus;
+ int cpu, fd;
+
+ cpus = perf_cpu_map__new(NULL);
+ if (!cpus)
+ return false;
+ cpu = cpus->map[0];
+ perf_cpu_map__put(cpus);
+
+ fd = sys_perf_event_open(&attr, -1, cpu, -1, 0);
+ if (fd < 0)
+ return false;
+ close(fd);
+
+ return true;
+}
+
+/*
+ * Architectures are expected to know if AUX area sampling is supported by the
+ * hardware. Here we check for kernel support.
+ */
+bool perf_can_aux_sample(void)
+{
+ struct perf_event_attr attr = {
+ .size = sizeof(struct perf_event_attr),
+ .exclude_kernel = 1,
+ /*
+ * Non-zero value causes the kernel to calculate the effective
+ * attribute size up to that byte.
+ */
+ .aux_sample_size = 1,
+ };
+ int fd;
+
+ fd = sys_perf_event_open(&attr, -1, 0, -1, 0);
+ /*
+ * If the kernel attribute is big enough to contain aux_sample_size
+ * then we assume that it is supported. We are relying on the kernel to
+ * validate the attribute size before anything else that could be wrong.
+ */
+ if (fd < 0 && errno == E2BIG)
+ return false;
+ if (fd >= 0)
+ close(fd);
+
+ return true;
+}
diff --git a/tools/perf/util/perf_api_probe.h b/tools/perf/util/perf_api_probe.h
new file mode 100644
index 000000000000..706c3c6426e2
--- /dev/null
+++ b/tools/perf/util/perf_api_probe.h
@@ -0,0 +1,14 @@
+
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __PERF_API_PROBE_H
+#define __PERF_API_PROBE_H
+
+#include <stdbool.h>
+
+bool perf_can_aux_sample(void);
+bool perf_can_comm_exec(void);
+bool perf_can_record_cpu_wide(void);
+bool perf_can_record_switch_events(void);
+bool perf_can_sample_identifier(void);
+
+#endif // __PERF_API_PROBE_H
diff --git a/tools/perf/util/perf_event_attr_fprintf.c b/tools/perf/util/perf_event_attr_fprintf.c
index 651203126c71..b94fa07f5d32 100644
--- a/tools/perf/util/perf_event_attr_fprintf.c
+++ b/tools/perf/util/perf_event_attr_fprintf.c
@@ -35,6 +35,7 @@ static void __p_sample_type(char *buf, size_t size, u64 value)
bit_name(BRANCH_STACK), bit_name(REGS_USER), bit_name(STACK_USER),
bit_name(IDENTIFIER), bit_name(REGS_INTR), bit_name(DATA_SRC),
bit_name(WEIGHT), bit_name(PHYS_ADDR), bit_name(AUX),
+ bit_name(CGROUP),
{ .name = NULL, }
};
#undef bit_name
@@ -50,6 +51,7 @@ static void __p_branch_sample_type(char *buf, size_t size, u64 value)
bit_name(ABORT_TX), bit_name(IN_TX), bit_name(NO_TX),
bit_name(COND), bit_name(CALL_STACK), bit_name(IND_JUMP),
bit_name(CALL), bit_name(NO_FLAGS), bit_name(NO_CYCLES),
+ bit_name(HW_INDEX),
{ .name = NULL, }
};
#undef bit_name
@@ -131,6 +133,7 @@ int perf_event_attr__fprintf(FILE *fp, struct perf_event_attr *attr,
PRINT_ATTRf(ksymbol, p_unsigned);
PRINT_ATTRf(bpf_event, p_unsigned);
PRINT_ATTRf(aux_output, p_unsigned);
+ PRINT_ATTRf(cgroup, p_unsigned);
PRINT_ATTRn("{ wakeup_events, wakeup_watermark }", wakeup_events, p_unsigned);
PRINT_ATTRf(bp_type, p_unsigned);
diff --git a/tools/perf/util/pfm.c b/tools/perf/util/pfm.c
new file mode 100644
index 000000000000..d735acb6c29c
--- /dev/null
+++ b/tools/perf/util/pfm.c
@@ -0,0 +1,281 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Support for libpfm4 event encoding.
+ *
+ * Copyright 2020 Google LLC.
+ */
+#include "util/cpumap.h"
+#include "util/debug.h"
+#include "util/event.h"
+#include "util/evlist.h"
+#include "util/evsel.h"
+#include "util/parse-events.h"
+#include "util/pmu.h"
+#include "util/pfm.h"
+
+#include <string.h>
+#include <linux/kernel.h>
+#include <perfmon/pfmlib_perf_event.h>
+
+static void libpfm_initialize(void)
+{
+ int ret;
+
+ ret = pfm_initialize();
+ if (ret != PFM_SUCCESS) {
+ ui__warning("libpfm failed to initialize: %s\n",
+ pfm_strerror(ret));
+ }
+}
+
+int parse_libpfm_events_option(const struct option *opt, const char *str,
+ int unset __maybe_unused)
+{
+ struct evlist *evlist = *(struct evlist **)opt->value;
+ struct perf_event_attr attr;
+ struct perf_pmu *pmu;
+ struct evsel *evsel, *grp_leader = NULL;
+ char *p, *q, *p_orig;
+ const char *sep;
+ int grp_evt = -1;
+ int ret;
+
+ libpfm_initialize();
+
+ p_orig = p = strdup(str);
+ if (!p)
+ return -1;
+ /*
+ * force loading of the PMU list
+ */
+ perf_pmu__scan(NULL);
+
+ for (q = p; strsep(&p, ",{}"); q = p) {
+ sep = p ? str + (p - p_orig - 1) : "";
+ if (*sep == '{') {
+ if (grp_evt > -1) {
+ ui__error(
+ "nested event groups not supported\n");
+ goto error;
+ }
+ grp_evt++;
+ }
+
+ /* no event */
+ if (*q == '\0')
+ continue;
+
+ memset(&attr, 0, sizeof(attr));
+ event_attr_init(&attr);
+
+ ret = pfm_get_perf_event_encoding(q, PFM_PLM0|PFM_PLM3,
+ &attr, NULL, NULL);
+
+ if (ret != PFM_SUCCESS) {
+ ui__error("failed to parse event %s : %s\n", str,
+ pfm_strerror(ret));
+ goto error;
+ }
+
+ pmu = perf_pmu__find_by_type((unsigned int)attr.type);
+ evsel = parse_events__add_event(evlist->core.nr_entries,
+ &attr, q, pmu);
+ if (evsel == NULL)
+ goto error;
+
+ evsel->is_libpfm_event = true;
+
+ evlist__add(evlist, evsel);
+
+ if (grp_evt == 0)
+ grp_leader = evsel;
+
+ if (grp_evt > -1) {
+ evsel->leader = grp_leader;
+ grp_leader->core.nr_members++;
+ grp_evt++;
+ }
+
+ if (*sep == '}') {
+ if (grp_evt < 0) {
+ ui__error(
+ "cannot close a non-existing event group\n");
+ goto error;
+ }
+ evlist->nr_groups++;
+ grp_leader = NULL;
+ grp_evt = -1;
+ }
+ }
+ return 0;
+error:
+ free(p_orig);
+ return -1;
+}
+
+static const char *srcs[PFM_ATTR_CTRL_MAX] = {
+ [PFM_ATTR_CTRL_UNKNOWN] = "???",
+ [PFM_ATTR_CTRL_PMU] = "PMU",
+ [PFM_ATTR_CTRL_PERF_EVENT] = "perf_event",
+};
+
+static void
+print_attr_flags(pfm_event_attr_info_t *info)
+{
+ int n = 0;
+
+ if (info->is_dfl) {
+ printf("[default] ");
+ n++;
+ }
+
+ if (info->is_precise) {
+ printf("[precise] ");
+ n++;
+ }
+
+ if (!n)
+ printf("- ");
+}
+
+static void
+print_libpfm_events_detailed(pfm_event_info_t *info, bool long_desc)
+{
+ pfm_event_attr_info_t ainfo;
+ const char *src;
+ int j, ret;
+
+ ainfo.size = sizeof(ainfo);
+
+ printf(" %s\n", info->name);
+ printf(" [%s]\n", info->desc);
+ if (long_desc) {
+ if (info->equiv)
+ printf(" Equiv: %s\n", info->equiv);
+
+ printf(" Code : 0x%"PRIx64"\n", info->code);
+ }
+ pfm_for_each_event_attr(j, info) {
+ ret = pfm_get_event_attr_info(info->idx, j,
+ PFM_OS_PERF_EVENT_EXT, &ainfo);
+ if (ret != PFM_SUCCESS)
+ continue;
+
+ if (ainfo.type == PFM_ATTR_UMASK) {
+ printf(" %s:%s\n", info->name, ainfo.name);
+ printf(" [%s]\n", ainfo.desc);
+ }
+
+ if (!long_desc)
+ continue;
+
+ if (ainfo.ctrl >= PFM_ATTR_CTRL_MAX)
+ ainfo.ctrl = PFM_ATTR_CTRL_UNKNOWN;
+
+ src = srcs[ainfo.ctrl];
+ switch (ainfo.type) {
+ case PFM_ATTR_UMASK:
+ printf(" Umask : 0x%02"PRIx64" : %s: ",
+ ainfo.code, src);
+ print_attr_flags(&ainfo);
+ putchar('\n');
+ break;
+ case PFM_ATTR_MOD_BOOL:
+ printf(" Modif : %s: [%s] : %s (boolean)\n", src,
+ ainfo.name, ainfo.desc);
+ break;
+ case PFM_ATTR_MOD_INTEGER:
+ printf(" Modif : %s: [%s] : %s (integer)\n", src,
+ ainfo.name, ainfo.desc);
+ break;
+ case PFM_ATTR_NONE:
+ case PFM_ATTR_RAW_UMASK:
+ case PFM_ATTR_MAX:
+ default:
+ printf(" Attr : %s: [%s] : %s\n", src,
+ ainfo.name, ainfo.desc);
+ }
+ }
+}
+
+/*
+ * list all pmu::event:umask, pmu::event
+ * printed events may not be all valid combinations of umask for an event
+ */
+static void
+print_libpfm_events_raw(pfm_pmu_info_t *pinfo, pfm_event_info_t *info)
+{
+ pfm_event_attr_info_t ainfo;
+ int j, ret;
+ bool has_umask = false;
+
+ ainfo.size = sizeof(ainfo);
+
+ pfm_for_each_event_attr(j, info) {
+ ret = pfm_get_event_attr_info(info->idx, j,
+ PFM_OS_PERF_EVENT_EXT, &ainfo);
+ if (ret != PFM_SUCCESS)
+ continue;
+
+ if (ainfo.type != PFM_ATTR_UMASK)
+ continue;
+
+ printf("%s::%s:%s\n", pinfo->name, info->name, ainfo.name);
+ has_umask = true;
+ }
+ if (!has_umask)
+ printf("%s::%s\n", pinfo->name, info->name);
+}
+
+void print_libpfm_events(bool name_only, bool long_desc)
+{
+ pfm_event_info_t info;
+ pfm_pmu_info_t pinfo;
+ int i, p, ret;
+
+ libpfm_initialize();
+
+ /* initialize to zero to indicate ABI version */
+ info.size = sizeof(info);
+ pinfo.size = sizeof(pinfo);
+
+ if (!name_only)
+ puts("\nList of pre-defined events (to be used in --pfm-events):\n");
+
+ pfm_for_all_pmus(p) {
+ bool printed_pmu = false;
+
+ ret = pfm_get_pmu_info(p, &pinfo);
+ if (ret != PFM_SUCCESS)
+ continue;
+
+ /* only print events that are supported by host HW */
+ if (!pinfo.is_present)
+ continue;
+
+ /* handled by perf directly */
+ if (pinfo.pmu == PFM_PMU_PERF_EVENT)
+ continue;
+
+ for (i = pinfo.first_event; i != -1;
+ i = pfm_get_event_next(i)) {
+
+ ret = pfm_get_event_info(i, PFM_OS_PERF_EVENT_EXT,
+ &info);
+ if (ret != PFM_SUCCESS)
+ continue;
+
+ if (!name_only && !printed_pmu) {
+ printf("%s:\n", pinfo.name);
+ printed_pmu = true;
+ }
+
+ if (!name_only)
+ print_libpfm_events_detailed(&info, long_desc);
+ else
+ print_libpfm_events_raw(&pinfo, &info);
+ }
+ if (!name_only && printed_pmu)
+ putchar('\n');
+ }
+}
diff --git a/tools/perf/util/pfm.h b/tools/perf/util/pfm.h
new file mode 100644
index 000000000000..7d70dda87012
--- /dev/null
+++ b/tools/perf/util/pfm.h
@@ -0,0 +1,37 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Support for libpfm4 event encoding.
+ *
+ * Copyright 2020 Google LLC.
+ */
+#ifndef __PERF_PFM_H
+#define __PERF_PFM_H
+
+#include <subcmd/parse-options.h>
+
+#ifdef HAVE_LIBPFM
+int parse_libpfm_events_option(const struct option *opt, const char *str,
+ int unset);
+
+void print_libpfm_events(bool name_only, bool long_desc);
+
+#else
+#include <linux/compiler.h>
+
+static inline int parse_libpfm_events_option(
+ const struct option *opt __maybe_unused,
+ const char *str __maybe_unused,
+ int unset __maybe_unused)
+{
+ return 0;
+}
+
+static inline void print_libpfm_events(bool name_only __maybe_unused,
+ bool long_desc __maybe_unused)
+{
+}
+
+#endif
+
+
+#endif /* __PERF_PFM_H */
diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c
index 8b99fd312aae..93fe72a9dc0b 100644
--- a/tools/perf/util/pmu.c
+++ b/tools/perf/util/pmu.c
@@ -18,10 +18,10 @@
#include <regex.h>
#include <perf/cpumap.h>
#include "debug.h"
+#include "evsel.h"
#include "pmu.h"
#include "parse-events.h"
#include "header.h"
-#include "pmu-events/pmu-events.h"
#include "string2.h"
#include "strbuf.h"
#include "fncache.h"
@@ -699,7 +699,7 @@ struct pmu_events_map *perf_pmu__find_map(struct perf_pmu *pmu)
return map;
}
-static bool pmu_uncore_alias_match(const char *pmu_name, const char *name)
+bool pmu_uncore_alias_match(const char *pmu_name, const char *name)
{
char *tmp = NULL, *tok, *str;
bool res;
@@ -744,16 +744,11 @@ out:
* to the current running CPU. Then, add all PMU events from that table
* as aliases.
*/
-static void pmu_add_cpu_aliases(struct list_head *head, struct perf_pmu *pmu)
+void pmu_add_cpu_aliases_map(struct list_head *head, struct perf_pmu *pmu,
+ struct pmu_events_map *map)
{
int i;
- struct pmu_events_map *map;
const char *name = pmu->name;
-
- map = perf_pmu__find_map(pmu);
- if (!map)
- return;
-
/*
* Found a matching PMU events table. Create aliases
*/
@@ -788,6 +783,17 @@ new_alias:
}
}
+static void pmu_add_cpu_aliases(struct list_head *head, struct perf_pmu *pmu)
+{
+ struct pmu_events_map *map;
+
+ map = perf_pmu__find_map(pmu);
+ if (!map)
+ return;
+
+ pmu_add_cpu_aliases_map(head, pmu, map);
+}
+
struct perf_event_attr * __weak
perf_pmu__get_default_config(struct perf_pmu *pmu __maybe_unused)
{
@@ -844,6 +850,7 @@ static struct perf_pmu *pmu_lookup(const char *name)
INIT_LIST_HEAD(&pmu->format);
INIT_LIST_HEAD(&pmu->aliases);
+ INIT_LIST_HEAD(&pmu->caps);
list_splice(&format, &pmu->format);
list_splice(&aliases, &pmu->aliases);
list_add_tail(&pmu->list, &pmus);
@@ -864,6 +871,17 @@ static struct perf_pmu *pmu_find(const char *name)
return NULL;
}
+struct perf_pmu *perf_pmu__find_by_type(unsigned int type)
+{
+ struct perf_pmu *pmu;
+
+ list_for_each_entry(pmu, &pmus, list)
+ if (pmu->type == type)
+ return pmu;
+
+ return NULL;
+}
+
struct perf_pmu *perf_pmu__scan(struct perf_pmu *pmu)
{
/*
@@ -879,6 +897,25 @@ struct perf_pmu *perf_pmu__scan(struct perf_pmu *pmu)
return NULL;
}
+struct perf_pmu *evsel__find_pmu(struct evsel *evsel)
+{
+ struct perf_pmu *pmu = NULL;
+
+ while ((pmu = perf_pmu__scan(pmu)) != NULL) {
+ if (pmu->type == evsel->core.attr.type)
+ break;
+ }
+
+ return pmu;
+}
+
+bool evsel__is_aux_event(struct evsel *evsel)
+{
+ struct perf_pmu *pmu = evsel__find_pmu(evsel);
+
+ return pmu && pmu->auxtrace;
+}
+
struct perf_pmu *perf_pmu__find(const char *name)
{
struct perf_pmu *pmu;
@@ -979,12 +1016,11 @@ static int pmu_resolve_param_term(struct parse_events_term *term,
struct parse_events_term *t;
list_for_each_entry(t, head_terms, list) {
- if (t->type_val == PARSE_EVENTS__TERM_TYPE_NUM) {
- if (!strcmp(t->config, term->config)) {
- t->used = true;
- *value = t->val.num;
- return 0;
- }
+ if (t->type_val == PARSE_EVENTS__TERM_TYPE_NUM &&
+ t->config && !strcmp(t->config, term->config)) {
+ t->used = true;
+ *value = t->val.num;
+ return 0;
}
}
@@ -1020,7 +1056,8 @@ error:
* Setup one of config[12] attr members based on the
* user input data - term parameter.
*/
-static int pmu_config_term(struct list_head *formats,
+static int pmu_config_term(const char *pmu_name,
+ struct list_head *formats,
struct perf_event_attr *attr,
struct parse_events_term *term,
struct list_head *head_terms,
@@ -1046,16 +1083,24 @@ static int pmu_config_term(struct list_head *formats,
format = pmu_find_format(formats, term->config);
if (!format) {
- if (verbose > 0)
- printf("Invalid event/parameter '%s'\n", term->config);
+ char *pmu_term = pmu_formats_string(formats);
+ char *unknown_term;
+ char *help_msg;
+
+ if (asprintf(&unknown_term,
+ "unknown term '%s' for pmu '%s'",
+ term->config, pmu_name) < 0)
+ unknown_term = NULL;
+ help_msg = parse_events_formats_error_string(pmu_term);
if (err) {
- char *pmu_term = pmu_formats_string(formats);
-
parse_events__handle_error(err, term->err_term,
- strdup("unknown term"),
- parse_events_formats_error_string(pmu_term));
- free(pmu_term);
+ unknown_term,
+ help_msg);
+ } else {
+ pr_debug("%s (%s)\n", unknown_term, help_msg);
+ free(unknown_term);
}
+ free(pmu_term);
return -EINVAL;
}
@@ -1132,7 +1177,7 @@ static int pmu_config_term(struct list_head *formats,
return 0;
}
-int perf_pmu__config_terms(struct list_head *formats,
+int perf_pmu__config_terms(const char *pmu_name, struct list_head *formats,
struct perf_event_attr *attr,
struct list_head *head_terms,
bool zero, struct parse_events_error *err)
@@ -1140,7 +1185,7 @@ int perf_pmu__config_terms(struct list_head *formats,
struct parse_events_term *term;
list_for_each_entry(term, head_terms, list) {
- if (pmu_config_term(formats, attr, term, head_terms,
+ if (pmu_config_term(pmu_name, formats, attr, term, head_terms,
zero, err))
return -EINVAL;
}
@@ -1160,8 +1205,8 @@ int perf_pmu__config(struct perf_pmu *pmu, struct perf_event_attr *attr,
bool zero = !!pmu->default_config;
attr->type = pmu->type;
- return perf_pmu__config_terms(&pmu->format, attr, head_terms,
- zero, err);
+ return perf_pmu__config_terms(pmu->name, &pmu->format, attr,
+ head_terms, zero, err);
}
static struct perf_pmu_alias *pmu_find_alias(struct perf_pmu *pmu,
@@ -1395,6 +1440,11 @@ static void wordwrap(char *s, int start, int max, int corr)
}
}
+bool is_pmu_core(const char *name)
+{
+ return !strcmp(name, "cpu") || is_arm_pmu_core(name);
+}
+
void print_pmu_events(const char *event_glob, bool name_only, bool quiet_flag,
bool long_desc, bool details_flag, bool deprecated)
{
@@ -1565,3 +1615,84 @@ int perf_pmu__scan_file(struct perf_pmu *pmu, const char *name, const char *fmt,
va_end(args);
return ret;
}
+
+static int perf_pmu__new_caps(struct list_head *list, char *name, char *value)
+{
+ struct perf_pmu_caps *caps = zalloc(sizeof(*caps));
+
+ if (!caps)
+ return -ENOMEM;
+
+ caps->name = strdup(name);
+ if (!caps->name)
+ goto free_caps;
+ caps->value = strndup(value, strlen(value) - 1);
+ if (!caps->value)
+ goto free_name;
+ list_add_tail(&caps->list, list);
+ return 0;
+
+free_name:
+ zfree(caps->name);
+free_caps:
+ free(caps);
+
+ return -ENOMEM;
+}
+
+/*
+ * Reading/parsing the given pmu capabilities, which should be located at:
+ * /sys/bus/event_source/devices/<dev>/caps as sysfs group attributes.
+ * Return the number of capabilities
+ */
+int perf_pmu__caps_parse(struct perf_pmu *pmu)
+{
+ struct stat st;
+ char caps_path[PATH_MAX];
+ const char *sysfs = sysfs__mountpoint();
+ DIR *caps_dir;
+ struct dirent *evt_ent;
+ int nr_caps = 0;
+
+ if (!sysfs)
+ return -1;
+
+ snprintf(caps_path, PATH_MAX,
+ "%s" EVENT_SOURCE_DEVICE_PATH "%s/caps", sysfs, pmu->name);
+
+ if (stat(caps_path, &st) < 0)
+ return 0; /* no error if caps does not exist */
+
+ caps_dir = opendir(caps_path);
+ if (!caps_dir)
+ return -EINVAL;
+
+ while ((evt_ent = readdir(caps_dir)) != NULL) {
+ char path[PATH_MAX + NAME_MAX + 1];
+ char *name = evt_ent->d_name;
+ char value[128];
+ FILE *file;
+
+ if (!strcmp(name, ".") || !strcmp(name, ".."))
+ continue;
+
+ snprintf(path, sizeof(path), "%s/%s", caps_path, name);
+
+ file = fopen(path, "r");
+ if (!file)
+ continue;
+
+ if (!fgets(value, sizeof(value), file) ||
+ (perf_pmu__new_caps(&pmu->caps, name, value) < 0)) {
+ fclose(file);
+ continue;
+ }
+
+ nr_caps++;
+ fclose(file);
+ }
+
+ closedir(caps_dir);
+
+ return nr_caps;
+}
diff --git a/tools/perf/util/pmu.h b/tools/perf/util/pmu.h
index 6737e3d5d568..85e0c7f2515c 100644
--- a/tools/perf/util/pmu.h
+++ b/tools/perf/util/pmu.h
@@ -7,8 +7,9 @@
#include <linux/perf_event.h>
#include <stdbool.h>
#include "parse-events.h"
+#include "pmu-events/pmu-events.h"
-struct perf_evsel_config_term;
+struct evsel_config_term;
enum {
PERF_PMU_FORMAT_VALUE_CONFIG,
@@ -21,6 +22,12 @@ enum {
struct perf_event_attr;
+struct perf_pmu_caps {
+ char *name;
+ char *value;
+ struct list_head list;
+};
+
struct perf_pmu {
char *name;
__u32 type;
@@ -32,6 +39,7 @@ struct perf_pmu {
struct perf_cpu_map *cpus;
struct list_head format; /* HEAD struct perf_pmu_format -> list */
struct list_head aliases; /* HEAD struct perf_pmu_alias -> list */
+ struct list_head caps; /* HEAD struct perf_pmu_caps -> list */
struct list_head list; /* ELEM */
};
@@ -64,10 +72,11 @@ struct perf_pmu_alias {
};
struct perf_pmu *perf_pmu__find(const char *name);
+struct perf_pmu *perf_pmu__find_by_type(unsigned int type);
int perf_pmu__config(struct perf_pmu *pmu, struct perf_event_attr *attr,
struct list_head *head_terms,
struct parse_events_error *error);
-int perf_pmu__config_terms(struct list_head *formats,
+int perf_pmu__config_terms(const char *pmu_name, struct list_head *formats,
struct perf_event_attr *attr,
struct list_head *head_terms,
bool zero, struct parse_events_error *error);
@@ -87,6 +96,7 @@ int perf_pmu__format_parse(char *dir, struct list_head *head);
struct perf_pmu *perf_pmu__scan(struct perf_pmu *pmu);
+bool is_pmu_core(const char *name);
void print_pmu_events(const char *event_glob, bool name_only, bool quiet,
bool long_desc, bool details_flag,
bool deprecated);
@@ -97,9 +107,14 @@ int perf_pmu__scan_file(struct perf_pmu *pmu, const char *name, const char *fmt,
int perf_pmu__test(void);
struct perf_event_attr *perf_pmu__get_default_config(struct perf_pmu *pmu);
+void pmu_add_cpu_aliases_map(struct list_head *head, struct perf_pmu *pmu,
+ struct pmu_events_map *map);
struct pmu_events_map *perf_pmu__find_map(struct perf_pmu *pmu);
+bool pmu_uncore_alias_match(const char *pmu_name, const char *name);
int perf_pmu__convert_scale(const char *scale, char **end, double *sval);
+int perf_pmu__caps_parse(struct perf_pmu *pmu);
+
#endif /* __PMU_H */
diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c
index eea132f512b0..a08f373d3305 100644
--- a/tools/perf/util/probe-event.c
+++ b/tools/perf/util/probe-event.c
@@ -102,7 +102,7 @@ void exit_probe_symbol_maps(void)
symbol__exit();
}
-static struct ref_reloc_sym *kernel_get_ref_reloc_sym(void)
+static struct ref_reloc_sym *kernel_get_ref_reloc_sym(struct map **pmap)
{
/* kmap->ref_reloc_sym should be set if host_machine is initialized */
struct kmap *kmap;
@@ -114,6 +114,10 @@ static struct ref_reloc_sym *kernel_get_ref_reloc_sym(void)
kmap = map__kmap(map);
if (!kmap)
return NULL;
+
+ if (pmap)
+ *pmap = map;
+
return kmap->ref_reloc_sym;
}
@@ -125,7 +129,7 @@ static int kernel_get_symbol_address_by_name(const char *name, u64 *addr,
struct map *map;
/* ref_reloc_sym is just a label. Need a special fix*/
- reloc_sym = kernel_get_ref_reloc_sym();
+ reloc_sym = kernel_get_ref_reloc_sym(NULL);
if (reloc_sym && strcmp(name, reloc_sym->name) == 0)
*addr = (reloc) ? reloc_sym->addr : reloc_sym->unrelocated_addr;
else {
@@ -232,21 +236,22 @@ static void clear_probe_trace_events(struct probe_trace_event *tevs, int ntevs)
static bool kprobe_blacklist__listed(unsigned long address);
static bool kprobe_warn_out_range(const char *symbol, unsigned long address)
{
- u64 etext_addr = 0;
- int ret;
-
- /* Get the address of _etext for checking non-probable text symbol */
- ret = kernel_get_symbol_address_by_name("_etext", &etext_addr,
- false, false);
+ struct map *map;
+ bool ret = false;
- if (ret == 0 && etext_addr < address)
- pr_warning("%s is out of .text, skip it.\n", symbol);
- else if (kprobe_blacklist__listed(address))
+ map = kernel_get_module_map(NULL);
+ if (map) {
+ ret = address <= map->start || map->end < address;
+ if (ret)
+ pr_warning("%s is out of .text, skip it.\n", symbol);
+ map__put(map);
+ }
+ if (!ret && kprobe_blacklist__listed(address)) {
pr_warning("%s is blacklisted function, skip it.\n", symbol);
- else
- return false;
+ ret = true;
+ }
- return true;
+ return ret;
}
/*
@@ -745,6 +750,7 @@ post_process_kernel_probe_trace_events(struct probe_trace_event *tevs,
int ntevs)
{
struct ref_reloc_sym *reloc_sym;
+ struct map *map;
char *tmp;
int i, skipped = 0;
@@ -753,7 +759,7 @@ post_process_kernel_probe_trace_events(struct probe_trace_event *tevs,
return post_process_offline_probe_trace_events(tevs, ntevs,
symbol_conf.vmlinux_name);
- reloc_sym = kernel_get_ref_reloc_sym();
+ reloc_sym = kernel_get_ref_reloc_sym(&map);
if (!reloc_sym) {
pr_warning("Relocated base symbol is not found!\n");
return -EINVAL;
@@ -764,9 +770,13 @@ post_process_kernel_probe_trace_events(struct probe_trace_event *tevs,
continue;
if (tevs[i].point.retprobe && !kretprobe_offset_is_supported())
continue;
- /* If we found a wrong one, mark it by NULL symbol */
+ /*
+ * If we found a wrong one, mark it by NULL symbol.
+ * Since addresses in debuginfo is same as objdump, we need
+ * to convert it to addresses on memory.
+ */
if (kprobe_warn_out_range(tevs[i].point.symbol,
- tevs[i].point.address)) {
+ map__objdump_2mem(map, tevs[i].point.address))) {
tmp = NULL;
skipped++;
} else {
@@ -1765,8 +1775,7 @@ int parse_probe_trace_command(const char *cmd, struct probe_trace_event *tev)
fmt1_str = strtok_r(argv0_str, ":", &fmt);
fmt2_str = strtok_r(NULL, "/", &fmt);
fmt3_str = strtok_r(NULL, " \t", &fmt);
- if (fmt1_str == NULL || strlen(fmt1_str) != 1 || fmt2_str == NULL
- || fmt3_str == NULL) {
+ if (fmt1_str == NULL || fmt2_str == NULL || fmt3_str == NULL) {
semantic_error("Failed to parse event name: %s\n", argv[0]);
ret = -EINVAL;
goto out;
@@ -2936,7 +2945,7 @@ static int find_probe_trace_events_from_map(struct perf_probe_event *pev,
/* Note that the symbols in the kmodule are not relocated */
if (!pev->uprobes && !pev->target &&
(!pp->retprobe || kretprobe_offset_is_supported())) {
- reloc_sym = kernel_get_ref_reloc_sym();
+ reloc_sym = kernel_get_ref_reloc_sym(NULL);
if (!reloc_sym) {
pr_warning("Relocated base symbol is not found!\n");
ret = -EINVAL;
diff --git a/tools/perf/util/probe-finder.c b/tools/perf/util/probe-finder.c
index e4cff49384f4..55924255c535 100644
--- a/tools/perf/util/probe-finder.c
+++ b/tools/perf/util/probe-finder.c
@@ -101,6 +101,7 @@ enum dso_binary_type distro_dwarf_types[] = {
DSO_BINARY_TYPE__UBUNTU_DEBUGINFO,
DSO_BINARY_TYPE__OPENEMBEDDED_DEBUGINFO,
DSO_BINARY_TYPE__BUILDID_DEBUGINFO,
+ DSO_BINARY_TYPE__MIXEDUP_UBUNTU_DEBUGINFO,
DSO_BINARY_TYPE__NOT_FOUND,
};
diff --git a/tools/perf/util/pstack.c b/tools/perf/util/pstack.c
index 80ff41fc45be..a1d1e4ef6257 100644
--- a/tools/perf/util/pstack.c
+++ b/tools/perf/util/pstack.c
@@ -15,7 +15,7 @@
struct pstack {
unsigned short top;
unsigned short max_nr_entries;
- void *entries[0];
+ void *entries[];
};
struct pstack *pstack__new(unsigned short max_nr_entries)
diff --git a/tools/perf/util/python-ext-sources b/tools/perf/util/python-ext-sources
index e7279ea6043a..a9d9c142eb7c 100644
--- a/tools/perf/util/python-ext-sources
+++ b/tools/perf/util/python-ext-sources
@@ -34,3 +34,4 @@ util/string.c
util/symbol_fprintf.c
util/units.c
util/affinity.c
+util/rwsem.c
diff --git a/tools/perf/util/python.c b/tools/perf/util/python.c
index 83212c65848b..75a9b1d62bba 100644
--- a/tools/perf/util/python.c
+++ b/tools/perf/util/python.c
@@ -801,7 +801,7 @@ static int pyrf_evsel__init(struct pyrf_evsel *pevsel,
static void pyrf_evsel__delete(struct pyrf_evsel *pevsel)
{
- perf_evsel__exit(&pevsel->evsel);
+ evsel__exit(&pevsel->evsel);
Py_TYPE(pevsel)->tp_free((PyObject*)pevsel);
}
@@ -1044,7 +1044,7 @@ static PyObject *pyrf_evlist__read_on_cpu(struct pyrf_evlist *pevlist,
pevent->evsel = evsel;
- err = perf_evsel__parse_sample(evsel, event, &pevent->sample);
+ err = evsel__parse_sample(evsel, event, &pevent->sample);
/* Consume the even only after we parsed it out. */
perf_mmap__consume(&md->core);
diff --git a/tools/perf/util/record.c b/tools/perf/util/record.c
index 7def66168503..a4cc11592f6b 100644
--- a/tools/perf/util/record.c
+++ b/tools/perf/util/record.c
@@ -10,161 +10,64 @@
#include <subcmd/parse-options.h>
#include <perf/cpumap.h>
#include "cloexec.h"
+#include "util/perf_api_probe.h"
#include "record.h"
#include "../perf-sys.h"
-typedef void (*setup_probe_fn_t)(struct evsel *evsel);
-
-static int perf_do_probe_api(setup_probe_fn_t fn, int cpu, const char *str)
+/*
+ * evsel__config_leader_sampling() uses special rules for leader sampling.
+ * However, if the leader is an AUX area event, then assume the event to sample
+ * is the next event.
+ */
+static struct evsel *evsel__read_sampler(struct evsel *evsel, struct evlist *evlist)
{
- struct evlist *evlist;
- struct evsel *evsel;
- unsigned long flags = perf_event_open_cloexec_flag();
- int err = -EAGAIN, fd;
- static pid_t pid = -1;
-
- evlist = evlist__new();
- if (!evlist)
- return -ENOMEM;
-
- if (parse_events(evlist, str, NULL))
- goto out_delete;
-
- evsel = evlist__first(evlist);
+ struct evsel *leader = evsel->leader;
- while (1) {
- fd = sys_perf_event_open(&evsel->core.attr, pid, cpu, -1, flags);
- if (fd < 0) {
- if (pid == -1 && errno == EACCES) {
- pid = 0;
- continue;
- }
- goto out_delete;
+ if (evsel__is_aux_event(leader)) {
+ evlist__for_each_entry(evlist, evsel) {
+ if (evsel->leader == leader && evsel != evsel->leader)
+ return evsel;
}
- break;
- }
- close(fd);
-
- fn(evsel);
-
- fd = sys_perf_event_open(&evsel->core.attr, pid, cpu, -1, flags);
- if (fd < 0) {
- if (errno == EINVAL)
- err = -EINVAL;
- goto out_delete;
}
- close(fd);
- err = 0;
-
-out_delete:
- evlist__delete(evlist);
- return err;
-}
-
-static bool perf_probe_api(setup_probe_fn_t fn)
-{
- const char *try[] = {"cycles:u", "instructions:u", "cpu-clock:u", NULL};
- struct perf_cpu_map *cpus;
- int cpu, ret, i = 0;
-
- cpus = perf_cpu_map__new(NULL);
- if (!cpus)
- return false;
- cpu = cpus->map[0];
- perf_cpu_map__put(cpus);
-
- do {
- ret = perf_do_probe_api(fn, cpu, try[i++]);
- if (!ret)
- return true;
- } while (ret == -EAGAIN && try[i]);
- return false;
+ return leader;
}
-static void perf_probe_sample_identifier(struct evsel *evsel)
+static void evsel__config_leader_sampling(struct evsel *evsel, struct evlist *evlist)
{
- evsel->core.attr.sample_type |= PERF_SAMPLE_IDENTIFIER;
-}
-
-static void perf_probe_comm_exec(struct evsel *evsel)
-{
- evsel->core.attr.comm_exec = 1;
-}
-
-static void perf_probe_context_switch(struct evsel *evsel)
-{
- evsel->core.attr.context_switch = 1;
-}
-
-bool perf_can_sample_identifier(void)
-{
- return perf_probe_api(perf_probe_sample_identifier);
-}
+ struct perf_event_attr *attr = &evsel->core.attr;
+ struct evsel *leader = evsel->leader;
+ struct evsel *read_sampler;
-static bool perf_can_comm_exec(void)
-{
- return perf_probe_api(perf_probe_comm_exec);
-}
+ if (!leader->sample_read)
+ return;
-bool perf_can_record_switch_events(void)
-{
- return perf_probe_api(perf_probe_context_switch);
-}
+ read_sampler = evsel__read_sampler(evsel, evlist);
-bool perf_can_record_cpu_wide(void)
-{
- struct perf_event_attr attr = {
- .type = PERF_TYPE_SOFTWARE,
- .config = PERF_COUNT_SW_CPU_CLOCK,
- .exclude_kernel = 1,
- };
- struct perf_cpu_map *cpus;
- int cpu, fd;
-
- cpus = perf_cpu_map__new(NULL);
- if (!cpus)
- return false;
- cpu = cpus->map[0];
- perf_cpu_map__put(cpus);
+ if (evsel == read_sampler)
+ return;
- fd = sys_perf_event_open(&attr, -1, cpu, -1, 0);
- if (fd < 0)
- return false;
- close(fd);
-
- return true;
-}
-
-/*
- * Architectures are expected to know if AUX area sampling is supported by the
- * hardware. Here we check for kernel support.
- */
-bool perf_can_aux_sample(void)
-{
- struct perf_event_attr attr = {
- .size = sizeof(struct perf_event_attr),
- .exclude_kernel = 1,
- /*
- * Non-zero value causes the kernel to calculate the effective
- * attribute size up to that byte.
- */
- .aux_sample_size = 1,
- };
- int fd;
-
- fd = sys_perf_event_open(&attr, -1, 0, -1, 0);
/*
- * If the kernel attribute is big enough to contain aux_sample_size
- * then we assume that it is supported. We are relying on the kernel to
- * validate the attribute size before anything else that could be wrong.
+ * Disable sampling for all group members other than the leader in
+ * case the leader 'leads' the sampling, except when the leader is an
+ * AUX area event, in which case the 2nd event in the group is the one
+ * that 'leads' the sampling.
*/
- if (fd < 0 && errno == E2BIG)
- return false;
- if (fd >= 0)
- close(fd);
+ attr->freq = 0;
+ attr->sample_freq = 0;
+ attr->sample_period = 0;
+ attr->write_backward = 0;
- return true;
+ /*
+ * We don't get a sample for slave events, we make them when delivering
+ * the group leader sample. Set the slave event to follow the master
+ * sample_type to ease up reporting.
+ * An AUX area event also has sample_type requirements, so also include
+ * the sample type bits from the leader's sample_type to cover that
+ * case.
+ */
+ attr->sample_type = read_sampler->core.attr.sample_type |
+ leader->core.attr.sample_type;
}
void perf_evlist__config(struct evlist *evlist, struct record_opts *opts,
@@ -188,11 +91,15 @@ void perf_evlist__config(struct evlist *evlist, struct record_opts *opts,
use_comm_exec = perf_can_comm_exec();
evlist__for_each_entry(evlist, evsel) {
- perf_evsel__config(evsel, opts, callchain);
+ evsel__config(evsel, opts, callchain);
if (evsel->tracking && use_comm_exec)
evsel->core.attr.comm_exec = 1;
}
+ /* Configure leader sampling here now that the sample type is known */
+ evlist__for_each_entry(evlist, evsel)
+ evsel__config_leader_sampling(evsel, evlist);
+
if (opts->full_auxtrace) {
/*
* Need to be able to synthesize and parse selected events with
@@ -215,7 +122,7 @@ void perf_evlist__config(struct evlist *evlist, struct record_opts *opts,
if (sample_id) {
evlist__for_each_entry(evlist, evsel)
- perf_evsel__set_sample_id(evsel, use_sample_identifier);
+ evsel__set_sample_id(evsel, use_sample_identifier);
}
perf_evlist__set_id_pos(evlist);
diff --git a/tools/perf/util/record.h b/tools/perf/util/record.h
index 5421fd2ad383..39d1de4b2a36 100644
--- a/tools/perf/util/record.h
+++ b/tools/perf/util/record.h
@@ -34,7 +34,9 @@ struct record_opts {
bool auxtrace_snapshot_on_exit;
bool auxtrace_sample_mode;
bool record_namespaces;
+ bool record_cgroup;
bool record_switch_events;
+ bool record_switch_events_set;
bool all_kernel;
bool all_user;
bool kernel_callchains;
@@ -67,6 +69,7 @@ struct record_opts {
int affinity;
int mmap_flush;
unsigned int comp_level;
+ unsigned int nr_threads_synthesize;
};
extern const char * const *record_usage;
@@ -74,4 +77,9 @@ extern struct option *record_options;
int record__parse_freq(const struct option *opt, const char *str, int unset);
+static inline bool record_opts__no_switch_events(const struct record_opts *opts)
+{
+ return opts->record_switch_events_set && !opts->record_switch_events;
+}
+
#endif // _PERF_RECORD_H
diff --git a/tools/perf/util/s390-cpumcf-kernel.h b/tools/perf/util/s390-cpumcf-kernel.h
index d4356030b504..f55ca07f3ca1 100644
--- a/tools/perf/util/s390-cpumcf-kernel.h
+++ b/tools/perf/util/s390-cpumcf-kernel.h
@@ -11,6 +11,7 @@
#define S390_CPUMCF_DIAG_DEF 0xfeef /* Counter diagnostic entry ID */
#define PERF_EVENT_CPUM_CF_DIAG 0xBC000 /* Event: Counter sets */
+#define PERF_EVENT_CPUM_SF_DIAG 0xBD000 /* Event: Combined-sampling */
struct cf_ctrset_entry { /* CPU-M CF counter set entry (8 byte) */
unsigned int def:16; /* 0-15 Data Entry Format */
diff --git a/tools/perf/util/s390-cpumsf.c b/tools/perf/util/s390-cpumsf.c
index 6785cd87aa4d..f8861998e5bd 100644
--- a/tools/perf/util/s390-cpumsf.c
+++ b/tools/perf/util/s390-cpumsf.c
@@ -1047,6 +1047,14 @@ static void s390_cpumsf_free(struct perf_session *session)
free(sf);
}
+static bool
+s390_cpumsf_evsel_is_auxtrace(struct perf_session *session __maybe_unused,
+ struct evsel *evsel)
+{
+ return evsel->core.attr.type == PERF_TYPE_RAW &&
+ evsel->core.attr.config == PERF_EVENT_CPUM_SF_DIAG;
+}
+
static int s390_cpumsf_get_type(const char *cpuid)
{
int ret, family = 0;
@@ -1071,7 +1079,8 @@ static bool check_auxtrace_itrace(struct itrace_synth_opts *itops)
itops->pwr_events || itops->errors ||
itops->dont_decode || itops->calls || itops->returns ||
itops->callchain || itops->thread_stack ||
- itops->last_branch;
+ itops->last_branch || itops->add_callchain ||
+ itops->add_last_branch;
if (!ison)
return true;
pr_err("Unsupported --itrace options specified\n");
@@ -1142,6 +1151,7 @@ int s390_cpumsf_process_auxtrace_info(union perf_event *event,
sf->auxtrace.flush_events = s390_cpumsf_flush;
sf->auxtrace.free_events = s390_cpumsf_free_events;
sf->auxtrace.free = s390_cpumsf_free;
+ sf->auxtrace.evsel_is_auxtrace = s390_cpumsf_evsel_is_auxtrace;
session->auxtrace = &sf->auxtrace;
if (dump_trace)
diff --git a/tools/perf/util/scripting-engines/trace-event-python.c b/tools/perf/util/scripting-engines/trace-event-python.c
index 80ca5d0ab7fe..739516fdf6e3 100644
--- a/tools/perf/util/scripting-engines/trace-event-python.c
+++ b/tools/perf/util/scripting-engines/trace-event-python.c
@@ -464,6 +464,7 @@ static PyObject *python_process_brstack(struct perf_sample *sample,
struct thread *thread)
{
struct branch_stack *br = sample->branch_stack;
+ struct branch_entry *entries = perf_sample__branch_entries(sample);
PyObject *pylist;
u64 i;
@@ -484,28 +485,28 @@ static PyObject *python_process_brstack(struct perf_sample *sample,
Py_FatalError("couldn't create Python dictionary");
pydict_set_item_string_decref(pyelem, "from",
- PyLong_FromUnsignedLongLong(br->entries[i].from));
+ PyLong_FromUnsignedLongLong(entries[i].from));
pydict_set_item_string_decref(pyelem, "to",
- PyLong_FromUnsignedLongLong(br->entries[i].to));
+ PyLong_FromUnsignedLongLong(entries[i].to));
pydict_set_item_string_decref(pyelem, "mispred",
- PyBool_FromLong(br->entries[i].flags.mispred));
+ PyBool_FromLong(entries[i].flags.mispred));
pydict_set_item_string_decref(pyelem, "predicted",
- PyBool_FromLong(br->entries[i].flags.predicted));
+ PyBool_FromLong(entries[i].flags.predicted));
pydict_set_item_string_decref(pyelem, "in_tx",
- PyBool_FromLong(br->entries[i].flags.in_tx));
+ PyBool_FromLong(entries[i].flags.in_tx));
pydict_set_item_string_decref(pyelem, "abort",
- PyBool_FromLong(br->entries[i].flags.abort));
+ PyBool_FromLong(entries[i].flags.abort));
pydict_set_item_string_decref(pyelem, "cycles",
- PyLong_FromUnsignedLongLong(br->entries[i].flags.cycles));
+ PyLong_FromUnsignedLongLong(entries[i].flags.cycles));
thread__find_map_fb(thread, sample->cpumode,
- br->entries[i].from, &al);
+ entries[i].from, &al);
dsoname = get_dsoname(al.map);
pydict_set_item_string_decref(pyelem, "from_dsoname",
_PyUnicode_FromString(dsoname));
thread__find_map_fb(thread, sample->cpumode,
- br->entries[i].to, &al);
+ entries[i].to, &al);
dsoname = get_dsoname(al.map);
pydict_set_item_string_decref(pyelem, "to_dsoname",
_PyUnicode_FromString(dsoname));
@@ -561,6 +562,7 @@ static PyObject *python_process_brstacksym(struct perf_sample *sample,
struct thread *thread)
{
struct branch_stack *br = sample->branch_stack;
+ struct branch_entry *entries = perf_sample__branch_entries(sample);
PyObject *pylist;
u64 i;
char bf[512];
@@ -581,22 +583,22 @@ static PyObject *python_process_brstacksym(struct perf_sample *sample,
Py_FatalError("couldn't create Python dictionary");
thread__find_symbol_fb(thread, sample->cpumode,
- br->entries[i].from, &al);
+ entries[i].from, &al);
get_symoff(al.sym, &al, true, bf, sizeof(bf));
pydict_set_item_string_decref(pyelem, "from",
_PyUnicode_FromString(bf));
thread__find_symbol_fb(thread, sample->cpumode,
- br->entries[i].to, &al);
+ entries[i].to, &al);
get_symoff(al.sym, &al, true, bf, sizeof(bf));
pydict_set_item_string_decref(pyelem, "to",
_PyUnicode_FromString(bf));
- get_br_mspred(&br->entries[i].flags, bf, sizeof(bf));
+ get_br_mspred(&entries[i].flags, bf, sizeof(bf));
pydict_set_item_string_decref(pyelem, "pred",
_PyUnicode_FromString(bf));
- if (br->entries[i].flags.in_tx) {
+ if (entries[i].flags.in_tx) {
pydict_set_item_string_decref(pyelem, "in_tx",
_PyUnicode_FromString("X"));
} else {
@@ -604,7 +606,7 @@ static PyObject *python_process_brstacksym(struct perf_sample *sample,
_PyUnicode_FromString("-"));
}
- if (br->entries[i].flags.abort) {
+ if (entries[i].flags.abort) {
pydict_set_item_string_decref(pyelem, "abort",
_PyUnicode_FromString("A"));
} else {
@@ -692,6 +694,9 @@ static int regs_map(struct regs_dump *regs, uint64_t mask, char *bf, int size)
bf[0] = 0;
+ if (!regs || !regs->regs)
+ return 0;
+
for_each_set_bit(r, (unsigned long *) &mask, sizeof(mask) * 8) {
u64 val = regs->regs[i++];
@@ -736,7 +741,7 @@ static PyObject *get_perf_sample_dict(struct perf_sample *sample,
if (!dict_sample)
Py_FatalError("couldn't create Python dictionary");
- pydict_set_item_string_decref(dict, "ev_name", _PyUnicode_FromString(perf_evsel__name(evsel)));
+ pydict_set_item_string_decref(dict, "ev_name", _PyUnicode_FromString(evsel__name(evsel)));
pydict_set_item_string_decref(dict, "attr", _PyBytes_FromStringAndSize((const char *)&evsel->core.attr, sizeof(evsel->core.attr)));
pydict_set_item_string_decref(dict_sample, "pid",
@@ -963,7 +968,7 @@ static int python_export_evsel(struct db_export *dbe, struct evsel *evsel)
t = tuple_new(2);
tuple_set_u64(t, 0, evsel->db_id);
- tuple_set_string(t, 1, perf_evsel__name(evsel));
+ tuple_set_string(t, 1, evsel__name(evsel));
call_object(tables->evsel_handler, t, "evsel_table");
@@ -1344,7 +1349,7 @@ static void get_handler_name(char *str, size_t size,
{
char *p = str;
- scnprintf(str, size, "stat__%s", perf_evsel__name(evsel));
+ scnprintf(str, size, "stat__%s", evsel__name(evsel));
while ((p = strchr(p, ':'))) {
*p = '_';
diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
index d0d7d25b23e3..1a157e84a04a 100644
--- a/tools/perf/util/session.c
+++ b/tools/perf/util/session.c
@@ -33,7 +33,6 @@
#include "../perf.h"
#include "arch/common.h"
#include <internal/lib.h>
-#include <linux/err.h>
#ifdef HAVE_ZSTD_SUPPORT
static int perf_session__process_compressed_event(struct perf_session *session,
@@ -471,6 +470,8 @@ void perf_tool__fill_defaults(struct perf_tool *tool)
tool->comm = process_event_stub;
if (tool->namespaces == NULL)
tool->namespaces = process_event_stub;
+ if (tool->cgroup == NULL)
+ tool->cgroup = process_event_stub;
if (tool->fork == NULL)
tool->fork = process_event_stub;
if (tool->exit == NULL)
@@ -1007,6 +1008,7 @@ static void callchain__lbr_callstack_printf(struct perf_sample *sample)
{
struct ip_callchain *callchain = sample->callchain;
struct branch_stack *lbr_stack = sample->branch_stack;
+ struct branch_entry *entries = perf_sample__branch_entries(sample);
u64 kernel_callchain_nr = callchain->nr;
unsigned int i;
@@ -1043,10 +1045,10 @@ static void callchain__lbr_callstack_printf(struct perf_sample *sample)
i, callchain->ips[i]);
printf("..... %2d: %016" PRIx64 "\n",
- (int)(kernel_callchain_nr), lbr_stack->entries[0].to);
+ (int)(kernel_callchain_nr), entries[0].to);
for (i = 0; i < lbr_stack->nr; i++)
printf("..... %2d: %016" PRIx64 "\n",
- (int)(i + kernel_callchain_nr + 1), lbr_stack->entries[i].from);
+ (int)(i + kernel_callchain_nr + 1), entries[i].from);
}
}
@@ -1056,7 +1058,7 @@ static void callchain__printf(struct evsel *evsel,
unsigned int i;
struct ip_callchain *callchain = sample->callchain;
- if (perf_evsel__has_branch_callstack(evsel))
+ if (evsel__has_branch_callstack(evsel))
callchain__lbr_callstack_printf(sample);
printf("... FP chain: nr:%" PRIu64 "\n", callchain->nr);
@@ -1068,6 +1070,7 @@ static void callchain__printf(struct evsel *evsel,
static void branch_stack__printf(struct perf_sample *sample, bool callstack)
{
+ struct branch_entry *entries = perf_sample__branch_entries(sample);
uint64_t i;
printf("%s: nr:%" PRIu64 "\n",
@@ -1075,7 +1078,7 @@ static void branch_stack__printf(struct perf_sample *sample, bool callstack)
sample->branch_stack->nr);
for (i = 0; i < sample->branch_stack->nr; i++) {
- struct branch_entry *e = &sample->branch_stack->entries[i];
+ struct branch_entry *e = &entries[i];
if (!callstack) {
printf("..... %2"PRIu64": %016" PRIx64 " -> %016" PRIx64 " %hu cycles %s%s%s%s %x\n",
@@ -1100,7 +1103,7 @@ static void regs_dump__printf(u64 mask, u64 *regs)
for_each_set_bit(rid, (unsigned long *) &mask, sizeof(mask) * 8) {
u64 val = regs[i++];
- printf(".... %-5s 0x%" PRIx64 "\n",
+ printf(".... %-5s 0x%016" PRIx64 "\n",
perf_reg_name(rid), val);
}
}
@@ -1239,8 +1242,8 @@ static void dump_sample(struct evsel *evsel, union perf_event *event,
if (evsel__has_callchain(evsel))
callchain__printf(evsel, sample);
- if (sample_type & PERF_SAMPLE_BRANCH_STACK)
- branch_stack__printf(sample, perf_evsel__has_branch_callstack(evsel));
+ if (evsel__has_br_stack(evsel))
+ branch_stack__printf(sample, evsel__has_branch_callstack(evsel));
if (sample_type & PERF_SAMPLE_REGS_USER)
regs_user__printf(sample);
@@ -1276,8 +1279,7 @@ static void dump_read(struct evsel *evsel, union perf_event *event)
return;
printf(": %d %d %s %" PRI_lu64 "\n", event->read.pid, event->read.tid,
- perf_evsel__name(evsel),
- event->read.value);
+ evsel__name(evsel), event->read.value);
if (!evsel)
return;
@@ -1434,6 +1436,8 @@ static int machines__deliver_event(struct machines *machines,
return tool->comm(tool, event, sample, machine);
case PERF_RECORD_NAMESPACES:
return tool->namespaces(tool, event, sample, machine);
+ case PERF_RECORD_CGROUP:
+ return tool->cgroup(tool, event, sample, machine);
case PERF_RECORD_FORK:
return tool->fork(tool, event, sample, machine);
case PERF_RECORD_EXIT:
@@ -1537,8 +1541,13 @@ static s64 perf_session__process_user_event(struct perf_session *session,
*/
return 0;
case PERF_RECORD_HEADER_TRACING_DATA:
- /* setup for reading amidst mmap */
- lseek(fd, file_offset, SEEK_SET);
+ /*
+ * Setup for reading amidst mmap, but only when we
+ * are in 'file' mode. The 'pipe' fd is in proper
+ * place already.
+ */
+ if (!perf_data__is_pipe(session->data))
+ lseek(fd, file_offset, SEEK_SET);
return tool->tracing_data(session, event);
case PERF_RECORD_HEADER_BUILD_ID:
return tool->build_id(session, event);
diff --git a/tools/perf/util/setup.py b/tools/perf/util/setup.py
index 8a065a6f9713..c5e3e9a68162 100644
--- a/tools/perf/util/setup.py
+++ b/tools/perf/util/setup.py
@@ -3,7 +3,7 @@ from subprocess import Popen, PIPE
from re import sub
cc = getenv("CC")
-cc_is_clang = b"clang version" in Popen([cc, "-v"], stderr=PIPE).stderr.readline()
+cc_is_clang = b"clang version" in Popen([cc.split()[0], "-v"], stderr=PIPE).stderr.readline()
def clang_has_option(option):
return [o for o in Popen([cc, option], stderr=PIPE).stderr.readlines() if b"unknown argument" in o] == [ ]
@@ -21,6 +21,8 @@ if cc_is_clang:
vars[var] = sub("-fstack-clash-protection", "", vars[var])
if not clang_has_option("-fstack-protector-strong"):
vars[var] = sub("-fstack-protector-strong", "", vars[var])
+ if not clang_has_option("-fno-semantic-interposition"):
+ vars[var] = sub("-fno-semantic-interposition", "", vars[var])
from distutils.core import setup, Extension
diff --git a/tools/perf/util/sideband_evlist.c b/tools/perf/util/sideband_evlist.c
new file mode 100644
index 000000000000..ded9ced02797
--- /dev/null
+++ b/tools/perf/util/sideband_evlist.c
@@ -0,0 +1,148 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+#include "util/debug.h"
+#include "util/evlist.h"
+#include "util/evsel.h"
+#include "util/mmap.h"
+#include "util/perf_api_probe.h"
+#include <perf/mmap.h>
+#include <linux/perf_event.h>
+#include <limits.h>
+#include <pthread.h>
+#include <sched.h>
+#include <stdbool.h>
+
+int perf_evlist__add_sb_event(struct evlist *evlist, struct perf_event_attr *attr,
+ evsel__sb_cb_t cb, void *data)
+{
+ struct evsel *evsel;
+
+ if (!attr->sample_id_all) {
+ pr_warning("enabling sample_id_all for all side band events\n");
+ attr->sample_id_all = 1;
+ }
+
+ evsel = evsel__new_idx(attr, evlist->core.nr_entries);
+ if (!evsel)
+ return -1;
+
+ evsel->side_band.cb = cb;
+ evsel->side_band.data = data;
+ evlist__add(evlist, evsel);
+ return 0;
+}
+
+static void *perf_evlist__poll_thread(void *arg)
+{
+ struct evlist *evlist = arg;
+ bool draining = false;
+ int i, done = 0;
+ /*
+ * In order to read symbols from other namespaces perf to needs to call
+ * setns(2). This isn't permitted if the struct_fs has multiple users.
+ * unshare(2) the fs so that we may continue to setns into namespaces
+ * that we're observing when, for instance, reading the build-ids at
+ * the end of a 'perf record' session.
+ */
+ unshare(CLONE_FS);
+
+ while (!done) {
+ bool got_data = false;
+
+ if (evlist->thread.done)
+ draining = true;
+
+ if (!draining)
+ evlist__poll(evlist, 1000);
+
+ for (i = 0; i < evlist->core.nr_mmaps; i++) {
+ struct mmap *map = &evlist->mmap[i];
+ union perf_event *event;
+
+ if (perf_mmap__read_init(&map->core))
+ continue;
+ while ((event = perf_mmap__read_event(&map->core)) != NULL) {
+ struct evsel *evsel = perf_evlist__event2evsel(evlist, event);
+
+ if (evsel && evsel->side_band.cb)
+ evsel->side_band.cb(event, evsel->side_band.data);
+ else
+ pr_warning("cannot locate proper evsel for the side band event\n");
+
+ perf_mmap__consume(&map->core);
+ got_data = true;
+ }
+ perf_mmap__read_done(&map->core);
+ }
+
+ if (draining && !got_data)
+ break;
+ }
+ return NULL;
+}
+
+void evlist__set_cb(struct evlist *evlist, evsel__sb_cb_t cb, void *data)
+{
+ struct evsel *evsel;
+
+ evlist__for_each_entry(evlist, evsel) {
+ evsel->core.attr.sample_id_all = 1;
+ evsel->core.attr.watermark = 1;
+ evsel->core.attr.wakeup_watermark = 1;
+ evsel->side_band.cb = cb;
+ evsel->side_band.data = data;
+ }
+}
+
+int perf_evlist__start_sb_thread(struct evlist *evlist, struct target *target)
+{
+ struct evsel *counter;
+
+ if (!evlist)
+ return 0;
+
+ if (perf_evlist__create_maps(evlist, target))
+ goto out_delete_evlist;
+
+ if (evlist->core.nr_entries > 1) {
+ bool can_sample_identifier = perf_can_sample_identifier();
+
+ evlist__for_each_entry(evlist, counter)
+ evsel__set_sample_id(counter, can_sample_identifier);
+
+ perf_evlist__set_id_pos(evlist);
+ }
+
+ evlist__for_each_entry(evlist, counter) {
+ if (evsel__open(counter, evlist->core.cpus, evlist->core.threads) < 0)
+ goto out_delete_evlist;
+ }
+
+ if (evlist__mmap(evlist, UINT_MAX))
+ goto out_delete_evlist;
+
+ evlist__for_each_entry(evlist, counter) {
+ if (evsel__enable(counter))
+ goto out_delete_evlist;
+ }
+
+ evlist->thread.done = 0;
+ if (pthread_create(&evlist->thread.th, NULL, perf_evlist__poll_thread, evlist))
+ goto out_delete_evlist;
+
+ return 0;
+
+out_delete_evlist:
+ evlist__delete(evlist);
+ evlist = NULL;
+ return -1;
+}
+
+void perf_evlist__stop_sb_thread(struct evlist *evlist)
+{
+ if (!evlist)
+ return;
+ evlist->thread.done = 1;
+ pthread_join(evlist->thread.th, NULL);
+ evlist__delete(evlist);
+}
diff --git a/tools/perf/util/smt.c b/tools/perf/util/smt.c
index 3b791ef2cd50..20bacd5972ad 100644
--- a/tools/perf/util/smt.c
+++ b/tools/perf/util/smt.c
@@ -15,6 +15,9 @@ int smt_on(void)
if (cached)
return cached_result;
+ if (sysfs__read_int("devices/system/cpu/smt/active", &cached_result) > 0)
+ goto done;
+
ncpu = sysconf(_SC_NPROCESSORS_CONF);
for (cpu = 0; cpu < ncpu; cpu++) {
unsigned long long siblings;
@@ -24,13 +27,13 @@ int smt_on(void)
snprintf(fn, sizeof fn,
"devices/system/cpu/cpu%d/topology/core_cpus", cpu);
- if (access(fn, F_OK) == -1) {
+ if (sysfs__read_str(fn, &str, &strlen) < 0) {
snprintf(fn, sizeof fn,
"devices/system/cpu/cpu%d/topology/thread_siblings",
cpu);
+ if (sysfs__read_str(fn, &str, &strlen) < 0)
+ continue;
}
- if (sysfs__read_str(fn, &str, &strlen) < 0)
- continue;
/* Entry is hex, but does not have 0x, so need custom parser */
siblings = strtoull(str, NULL, 16);
free(str);
@@ -42,6 +45,7 @@ int smt_on(void)
}
if (!cached) {
cached_result = 0;
+done:
cached = true;
}
return cached_result;
diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c
index ab0cfd790ad0..d42339df20f8 100644
--- a/tools/perf/util/sort.c
+++ b/tools/perf/util/sort.c
@@ -12,6 +12,7 @@
#include "cacheline.h"
#include "comm.h"
#include "map.h"
+#include "maps.h"
#include "symbol.h"
#include "map_symbol.h"
#include "branch.h"
@@ -25,6 +26,8 @@
#include "mem-events.h"
#include "annotate.h"
#include "time-utils.h"
+#include "cgroup.h"
+#include "machine.h"
#include <linux/kernel.h>
#include <linux/string.h>
@@ -234,7 +237,7 @@ static int64_t _sort__addr_cmp(u64 left_ip, u64 right_ip)
return (int64_t)(right_ip - left_ip);
}
-static int64_t _sort__sym_cmp(struct symbol *sym_l, struct symbol *sym_r)
+int64_t _sort__sym_cmp(struct symbol *sym_l, struct symbol *sym_r)
{
if (!sym_l || !sym_r)
return cmp_null(sym_l, sym_r);
@@ -297,8 +300,14 @@ static int _hist_entry__sym_snprintf(struct map_symbol *ms,
if (verbose > 0) {
char o = map ? dso__symtab_origin(map->dso) : '!';
+ u64 rip = ip;
+
+ if (map && map->dso && map->dso->kernel
+ && map->dso->adjust_symbols)
+ rip = map->unmap_ip(map, ip);
+
ret += repsep_snprintf(bf, size, "%-#*llx %c ",
- BITS_PER_LONG / 4 + 2, ip, o);
+ BITS_PER_LONG / 4 + 2, rip, o);
}
ret += repsep_snprintf(bf + ret, size - ret, "[%c] ", level);
@@ -634,6 +643,39 @@ struct sort_entry sort_cgroup_id = {
.se_width_idx = HISTC_CGROUP_ID,
};
+/* --sort cgroup */
+
+static int64_t
+sort__cgroup_cmp(struct hist_entry *left, struct hist_entry *right)
+{
+ return right->cgroup - left->cgroup;
+}
+
+static int hist_entry__cgroup_snprintf(struct hist_entry *he,
+ char *bf, size_t size,
+ unsigned int width __maybe_unused)
+{
+ const char *cgrp_name = "N/A";
+
+ if (he->cgroup) {
+ struct cgroup *cgrp = cgroup__find(he->ms.maps->machine->env,
+ he->cgroup);
+ if (cgrp != NULL)
+ cgrp_name = cgrp->name;
+ else
+ cgrp_name = "unknown";
+ }
+
+ return repsep_snprintf(bf, size, "%s", cgrp_name);
+}
+
+struct sort_entry sort_cgroup = {
+ .se_header = "Cgroup",
+ .se_cmp = sort__cgroup_cmp,
+ .se_snprintf = hist_entry__cgroup_snprintf,
+ .se_width_idx = HISTC_CGROUP,
+};
+
/* --sort socket */
static int64_t
@@ -869,7 +911,8 @@ static int hist_entry__sym_from_snprintf(struct hist_entry *he, char *bf,
if (he->branch_info) {
struct addr_map_symbol *from = &he->branch_info->from;
- return _hist_entry__sym_snprintf(&from->ms, from->addr, he->level, bf, size, width);
+ return _hist_entry__sym_snprintf(&from->ms, from->al_addr,
+ he->level, bf, size, width);
}
return repsep_snprintf(bf, size, "%-*.*s", width, width, "N/A");
@@ -881,7 +924,8 @@ static int hist_entry__sym_to_snprintf(struct hist_entry *he, char *bf,
if (he->branch_info) {
struct addr_map_symbol *to = &he->branch_info->to;
- return _hist_entry__sym_snprintf(&to->ms, to->addr, he->level, bf, size, width);
+ return _hist_entry__sym_snprintf(&to->ms, to->al_addr,
+ he->level, bf, size, width);
}
return repsep_snprintf(bf, size, "%-*.*s", width, width, "N/A");
@@ -1658,6 +1702,7 @@ static struct sort_dimension common_sort_dimensions[] = {
DIM(SORT_TRACE, "trace", sort_trace),
DIM(SORT_SYM_SIZE, "symbol_size", sort_sym_size),
DIM(SORT_DSO_SIZE, "dso_size", sort_dso_size),
+ DIM(SORT_CGROUP, "cgroup", sort_cgroup),
DIM(SORT_CGROUP_ID, "cgroup_id", sort_cgroup_id),
DIM(SORT_SYM_IPC_NULL, "ipc_null", sort_sym_ipc_null),
DIM(SORT_TIME, "time", sort_time),
@@ -2315,7 +2360,7 @@ static struct evsel *find_evsel(struct evlist *evlist, char *event_name)
evsel = evlist__first(evlist);
while (--nr > 0)
- evsel = perf_evsel__next(evsel);
+ evsel = evsel__next(evsel);
return evsel;
}
@@ -2772,7 +2817,7 @@ static char *prefix_if_not_in(const char *pre, char *str)
return str;
if (asprintf(&n, "%s,%s", pre, str) < 0)
- return NULL;
+ n = NULL;
free(str);
return n;
diff --git a/tools/perf/util/sort.h b/tools/perf/util/sort.h
index 6c862d62d052..66d39c4cfe2b 100644
--- a/tools/perf/util/sort.h
+++ b/tools/perf/util/sort.h
@@ -101,6 +101,7 @@ struct hist_entry {
struct thread *thread;
struct comm *comm;
struct namespace_id cgroup_id;
+ u64 cgroup;
u64 ip;
u64 transaction;
s32 socket;
@@ -224,6 +225,7 @@ enum sort_type {
SORT_TRACE,
SORT_SYM_SIZE,
SORT_DSO_SIZE,
+ SORT_CGROUP,
SORT_CGROUP_ID,
SORT_SYM_IPC_NULL,
SORT_TIME,
@@ -309,5 +311,7 @@ int64_t
sort__daddr_cmp(struct hist_entry *left, struct hist_entry *right);
int64_t
sort__dcacheline_cmp(struct hist_entry *left, struct hist_entry *right);
+int64_t
+_sort__sym_cmp(struct symbol *sym_l, struct symbol *sym_r);
char *hist_entry__srcline(struct hist_entry *he);
#endif /* __PERF_SORT_H */
diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c
index bc31fccc0057..3c6976f7574c 100644
--- a/tools/perf/util/stat-display.c
+++ b/tools/perf/util/stat-display.c
@@ -16,6 +16,7 @@
#include <linux/ctype.h>
#include "cgroup.h"
#include <api/fs/fs.h>
+#include "util.h"
#define CNTR_NOT_SUPPORTED "<not supported>"
#define CNTR_NOT_COUNTED "<not counted>"
@@ -110,15 +111,15 @@ static void aggr_printout(struct perf_stat_config *config,
config->csv_sep);
break;
case AGGR_NONE:
- if (evsel->percore) {
+ if (evsel->percore && !config->percore_show_thread) {
fprintf(config->output, "S%d-D%d-C%*d%s",
cpu_map__id_to_socket(id),
cpu_map__id_to_die(id),
- config->csv_output ? 0 : -5,
+ config->csv_output ? 0 : -3,
cpu_map__id_to_cpu(id), config->csv_sep);
} else {
- fprintf(config->output, "CPU%*d%s ",
- config->csv_output ? 0 : -5,
+ fprintf(config->output, "CPU%*d%s",
+ config->csv_output ? 0 : -7,
evsel__cpus(evsel)->map[id],
config->csv_sep);
}
@@ -236,8 +237,6 @@ static bool valid_only_metric(const char *unit)
if (!unit)
return false;
if (strstr(unit, "/sec") ||
- strstr(unit, "hz") ||
- strstr(unit, "Hz") ||
strstr(unit, "CPUs utilized"))
return false;
return true;
@@ -247,7 +246,7 @@ static const char *fixunit(char *buf, struct evsel *evsel,
const char *unit)
{
if (!strncmp(unit, "of all", 6)) {
- snprintf(buf, 1024, "%s %s", perf_evsel__name(evsel),
+ snprintf(buf, 1024, "%s %s", evsel__name(evsel),
unit);
return buf;
}
@@ -334,7 +333,7 @@ static int first_shadow_cpu(struct perf_stat_config *config,
if (config->aggr_mode == AGGR_GLOBAL)
return 0;
- for (i = 0; i < perf_evsel__nr_cpus(evsel); i++) {
+ for (i = 0; i < evsel__nr_cpus(evsel); i++) {
int cpu2 = evsel__cpus(evsel)->map[i];
if (config->aggr_get_id(config, evlist->core.cpus, cpu2) == id)
@@ -368,7 +367,7 @@ static void abs_printout(struct perf_stat_config *config,
config->csv_output ? 0 : config->unit_width,
evsel->unit, config->csv_sep);
- fprintf(output, "%-*s", config->csv_output ? 0 : 25, perf_evsel__name(evsel));
+ fprintf(output, "%-*s", config->csv_output ? 0 : 25, evsel__name(evsel));
print_cgroup(config, evsel);
}
@@ -462,8 +461,7 @@ static void printout(struct perf_stat_config *config, int id, int nr,
counter->unit, config->csv_sep);
fprintf(config->output, "%*s",
- config->csv_output ? 0 : -25,
- perf_evsel__name(counter));
+ config->csv_output ? 0 : -25, evsel__name(counter));
print_cgroup(config, counter);
@@ -509,7 +507,7 @@ static void aggr_update_shadow(struct perf_stat_config *config,
id = config->aggr_map->map[s];
evlist__for_each_entry(evlist, counter) {
val = 0;
- for (cpu = 0; cpu < perf_evsel__nr_cpus(counter); cpu++) {
+ for (cpu = 0; cpu < evsel__nr_cpus(counter); cpu++) {
s2 = config->aggr_get_id(config, evlist->core.cpus, cpu);
if (s2 != id)
continue;
@@ -560,11 +558,11 @@ static void collect_all_aliases(struct perf_stat_config *config, struct evsel *c
alias = list_prepare_entry(counter, &(evlist->core.entries), core.node);
list_for_each_entry_continue (alias, &evlist->core.entries, core.node) {
- if (strcmp(perf_evsel__name(alias), perf_evsel__name(counter)) ||
+ if (strcmp(evsel__name(alias), evsel__name(counter)) ||
alias->scale != counter->scale ||
alias->cgrp != counter->cgrp ||
strcmp(alias->unit, counter->unit) ||
- perf_evsel__is_clock(alias) != perf_evsel__is_clock(counter) ||
+ evsel__is_clock(alias) != evsel__is_clock(counter) ||
!strcmp(alias->pmu_name, counter->pmu_name))
break;
alias->merged_stat = true;
@@ -600,7 +598,7 @@ static void aggr_cb(struct perf_stat_config *config,
struct aggr_data *ad = data;
int cpu, s2;
- for (cpu = 0; cpu < perf_evsel__nr_cpus(counter); cpu++) {
+ for (cpu = 0; cpu < evsel__nr_cpus(counter); cpu++) {
struct perf_counts_values *counts;
s2 = config->aggr_get_id(config, evsel__cpus(counter), cpu);
@@ -628,7 +626,7 @@ static void aggr_cb(struct perf_stat_config *config,
static void print_counter_aggrdata(struct perf_stat_config *config,
struct evsel *counter, int s,
char *prefix, bool metric_only,
- bool *first)
+ bool *first, int cpu)
{
struct aggr_data ad;
FILE *output = config->output;
@@ -654,7 +652,7 @@ static void print_counter_aggrdata(struct perf_stat_config *config,
fprintf(output, "%s", prefix);
uval = val * counter->scale;
- printout(config, id, nr, counter, uval, prefix,
+ printout(config, cpu != -1 ? cpu : id, nr, counter, uval, prefix,
run, ena, 1.0, &rt_stat);
if (!metric_only)
fputc('\n', output);
@@ -687,7 +685,7 @@ static void print_aggr(struct perf_stat_config *config,
evlist__for_each_entry(evlist, counter) {
print_counter_aggrdata(config, counter, s,
prefix, metric_only,
- &first);
+ &first, -1);
}
if (metric_only)
fputc('\n', output);
@@ -848,7 +846,7 @@ static void print_counter(struct perf_stat_config *config,
double uval;
int cpu;
- for (cpu = 0; cpu < perf_evsel__nr_cpus(counter); cpu++) {
+ for (cpu = 0; cpu < evsel__nr_cpus(counter); cpu++) {
struct aggr_data ad = { .cpu = cpu };
if (!collect_data(config, counter, counter_cb, &ad))
@@ -1097,7 +1095,6 @@ static void print_footer(struct perf_stat_config *config)
{
double avg = avg_stats(config->walltime_nsecs_stats) / NSEC_PER_SEC;
FILE *output = config->output;
- int n;
if (!config->null_run)
fprintf(output, "\n");
@@ -1131,9 +1128,7 @@ static void print_footer(struct perf_stat_config *config)
}
fprintf(output, "\n\n");
- if (config->print_free_counters_hint &&
- sysctl__read_int("kernel/nmi_watchdog", &n) >= 0 &&
- n > 0)
+ if (config->print_free_counters_hint && sysctl__nmi_watchdog_enabled())
fprintf(output,
"Some events weren't counted. Try disabling the NMI watchdog:\n"
" echo 0 > /proc/sys/kernel/nmi_watchdog\n"
@@ -1146,6 +1141,26 @@ static void print_footer(struct perf_stat_config *config)
"the same PMU. Try reorganizing the group.\n");
}
+static void print_percore_thread(struct perf_stat_config *config,
+ struct evsel *counter, char *prefix)
+{
+ int s, s2, id;
+ bool first = true;
+
+ for (int i = 0; i < evsel__nr_cpus(counter); i++) {
+ s2 = config->aggr_get_id(config, evsel__cpus(counter), i);
+ for (s = 0; s < config->aggr_map->nr; s++) {
+ id = config->aggr_map->map[s];
+ if (s2 == id)
+ break;
+ }
+
+ print_counter_aggrdata(config, counter, s,
+ prefix, false,
+ &first, i);
+ }
+}
+
static void print_percore(struct perf_stat_config *config,
struct evsel *counter, char *prefix)
{
@@ -1157,13 +1172,16 @@ static void print_percore(struct perf_stat_config *config,
if (!(config->aggr_map || config->aggr_get_id))
return;
+ if (config->percore_show_thread)
+ return print_percore_thread(config, counter, prefix);
+
for (s = 0; s < config->aggr_map->nr; s++) {
if (prefix && metric_only)
fprintf(output, "%s", prefix);
print_counter_aggrdata(config, counter, s,
prefix, metric_only,
- &first);
+ &first, -1);
}
if (metric_only)
diff --git a/tools/perf/util/stat-shadow.c b/tools/perf/util/stat-shadow.c
index 90d23cc3c8d4..a7c13a88ecb9 100644
--- a/tools/perf/util/stat-shadow.c
+++ b/tools/perf/util/stat-shadow.c
@@ -216,9 +216,9 @@ void perf_stat__update_shadow_stats(struct evsel *counter, u64 count,
count *= counter->scale;
- if (perf_evsel__is_clock(counter))
+ if (evsel__is_clock(counter))
update_runtime_stat(st, STAT_NSECS, 0, cpu, count_ns);
- else if (perf_evsel__match(counter, HARDWARE, HW_CPU_CYCLES))
+ else if (evsel__match(counter, HARDWARE, HW_CPU_CYCLES))
update_runtime_stat(st, STAT_CYCLES, ctx, cpu, count);
else if (perf_stat_evsel__is(counter, CYCLES_IN_TX))
update_runtime_stat(st, STAT_CYCLES_IN_TX, ctx, cpu, count);
@@ -241,25 +241,25 @@ void perf_stat__update_shadow_stats(struct evsel *counter, u64 count,
else if (perf_stat_evsel__is(counter, TOPDOWN_RECOVERY_BUBBLES))
update_runtime_stat(st, STAT_TOPDOWN_RECOVERY_BUBBLES,
ctx, cpu, count);
- else if (perf_evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_FRONTEND))
+ else if (evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_FRONTEND))
update_runtime_stat(st, STAT_STALLED_CYCLES_FRONT,
ctx, cpu, count);
- else if (perf_evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_BACKEND))
+ else if (evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_BACKEND))
update_runtime_stat(st, STAT_STALLED_CYCLES_BACK,
ctx, cpu, count);
- else if (perf_evsel__match(counter, HARDWARE, HW_BRANCH_INSTRUCTIONS))
+ else if (evsel__match(counter, HARDWARE, HW_BRANCH_INSTRUCTIONS))
update_runtime_stat(st, STAT_BRANCHES, ctx, cpu, count);
- else if (perf_evsel__match(counter, HARDWARE, HW_CACHE_REFERENCES))
+ else if (evsel__match(counter, HARDWARE, HW_CACHE_REFERENCES))
update_runtime_stat(st, STAT_CACHEREFS, ctx, cpu, count);
- else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_L1D))
+ else if (evsel__match(counter, HW_CACHE, HW_CACHE_L1D))
update_runtime_stat(st, STAT_L1_DCACHE, ctx, cpu, count);
- else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_L1I))
+ else if (evsel__match(counter, HW_CACHE, HW_CACHE_L1I))
update_runtime_stat(st, STAT_L1_ICACHE, ctx, cpu, count);
- else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_LL))
+ else if (evsel__match(counter, HW_CACHE, HW_CACHE_LL))
update_runtime_stat(st, STAT_LL_CACHE, ctx, cpu, count);
- else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_DTLB))
+ else if (evsel__match(counter, HW_CACHE, HW_CACHE_DTLB))
update_runtime_stat(st, STAT_DTLB_CACHE, ctx, cpu, count);
- else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_ITLB))
+ else if (evsel__match(counter, HW_CACHE, HW_CACHE_ITLB))
update_runtime_stat(st, STAT_ITLB_CACHE, ctx, cpu, count);
else if (perf_stat_evsel__is(counter, SMI_NUM))
update_runtime_stat(st, STAT_SMI_NUM, ctx, cpu, count);
@@ -323,35 +323,46 @@ void perf_stat__collect_metric_expr(struct evlist *evsel_list)
{
struct evsel *counter, *leader, **metric_events, *oc;
bool found;
- const char **metric_names;
+ struct expr_parse_ctx ctx;
+ struct hashmap_entry *cur;
+ size_t bkt;
int i;
- int num_metric_names;
+ expr__ctx_init(&ctx);
evlist__for_each_entry(evsel_list, counter) {
bool invalid = false;
leader = counter->leader;
if (!counter->metric_expr)
continue;
+
+ expr__ctx_clear(&ctx);
metric_events = counter->metric_events;
if (!metric_events) {
- if (expr__find_other(counter->metric_expr, counter->name,
- &metric_names, &num_metric_names) < 0)
+ if (expr__find_other(counter->metric_expr,
+ counter->name,
+ &ctx, 1) < 0)
continue;
metric_events = calloc(sizeof(struct evsel *),
- num_metric_names + 1);
- if (!metric_events)
+ hashmap__size(&ctx.ids) + 1);
+ if (!metric_events) {
+ expr__ctx_clear(&ctx);
return;
+ }
counter->metric_events = metric_events;
}
- for (i = 0; i < num_metric_names; i++) {
+ i = 0;
+ hashmap__for_each_entry((&ctx.ids), cur, bkt) {
+ const char *metric_name = (const char *)cur->key;
+
found = false;
if (leader) {
/* Search in group */
for_each_group_member (oc, leader) {
- if (!strcasecmp(oc->name, metric_names[i]) &&
+ if (!strcasecmp(oc->name,
+ metric_name) &&
!oc->collect_stat) {
found = true;
break;
@@ -360,7 +371,8 @@ void perf_stat__collect_metric_expr(struct evlist *evsel_list)
}
if (!found) {
/* Search ignoring groups */
- oc = perf_stat__find_event(evsel_list, metric_names[i]);
+ oc = perf_stat__find_event(evsel_list,
+ metric_name);
}
if (!oc) {
/* Deduping one is good enough to handle duplicated PMUs. */
@@ -373,27 +385,28 @@ void perf_stat__collect_metric_expr(struct evlist *evsel_list)
* of events. So we ask the user instead to add the missing
* events.
*/
- if (!printed || strcasecmp(printed, metric_names[i])) {
+ if (!printed ||
+ strcasecmp(printed, metric_name)) {
fprintf(stderr,
"Add %s event to groups to get metric expression for %s\n",
- metric_names[i],
+ metric_name,
counter->name);
- printed = strdup(metric_names[i]);
+ printed = strdup(metric_name);
}
invalid = true;
continue;
}
- metric_events[i] = oc;
+ metric_events[i++] = oc;
oc->collect_stat = true;
}
metric_events[i] = NULL;
- free(metric_names);
if (invalid) {
free(metric_events);
counter->metric_events = NULL;
counter->metric_expr = NULL;
}
}
+ expr__ctx_clear(&ctx);
}
static double runtime_stat_avg(struct runtime_stat *st,
@@ -723,21 +736,19 @@ static void generic_metric(struct perf_stat_config *config,
char *name,
const char *metric_name,
const char *metric_unit,
- double avg,
+ int runtime,
int cpu,
struct perf_stat_output_ctx *out,
struct runtime_stat *st)
{
print_metric_t print_metric = out->print_metric;
- struct parse_ctx pctx;
+ struct expr_parse_ctx pctx;
double ratio, scale;
int i;
void *ctxp = out->ctx;
char *n, *pn;
expr__ctx_init(&pctx);
- /* Must be first id entry */
- expr__add_id(&pctx, name, avg);
for (i = 0; metric_events[i]; i++) {
struct saved_value *v;
struct stats *stats;
@@ -777,9 +788,7 @@ static void generic_metric(struct perf_stat_config *config,
}
if (!metric_events[i]) {
- const char *p = metric_expr;
-
- if (expr__parse(&ratio, &pctx, &p) == 0) {
+ if (expr__parse(&ratio, &pctx, metric_expr, runtime) == 0) {
char *unit;
char metric_bf[64];
@@ -788,13 +797,17 @@ static void generic_metric(struct perf_stat_config *config,
&unit, &scale) >= 0) {
ratio *= scale;
}
-
- scnprintf(metric_bf, sizeof(metric_bf),
+ if (strstr(metric_expr, "?"))
+ scnprintf(metric_bf, sizeof(metric_bf),
+ "%s %s_%d", unit, metric_name, runtime);
+ else
+ scnprintf(metric_bf, sizeof(metric_bf),
"%s %s", unit, metric_name);
+
print_metric(config, ctxp, NULL, "%8.1f",
metric_bf, ratio);
} else {
- print_metric(config, ctxp, NULL, "%8.1f",
+ print_metric(config, ctxp, NULL, "%8.2f",
metric_name ?
metric_name :
out->force_header ? name : "",
@@ -805,11 +818,13 @@ static void generic_metric(struct perf_stat_config *config,
out->force_header ?
(metric_name ? metric_name : name) : "", 0);
}
- } else
- print_metric(config, ctxp, NULL, NULL, "", 0);
+ } else {
+ print_metric(config, ctxp, NULL, NULL,
+ out->force_header ?
+ (metric_name ? metric_name : name) : "", 0);
+ }
- for (i = 1; i < pctx.num_ids; i++)
- zfree(&pctx.ids[i].name);
+ expr__ctx_clear(&pctx);
}
void perf_stat__print_shadow_stats(struct perf_stat_config *config,
@@ -827,7 +842,7 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config,
struct metric_event *me;
int num = 1;
- if (perf_evsel__match(evsel, HARDWARE, HW_INSTRUCTIONS)) {
+ if (evsel__match(evsel, HARDWARE, HW_INSTRUCTIONS)) {
total = runtime_stat_avg(st, STAT_CYCLES, ctx, cpu);
if (total) {
@@ -852,7 +867,7 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config,
"stalled cycles per insn",
ratio);
}
- } else if (perf_evsel__match(evsel, HARDWARE, HW_BRANCH_MISSES)) {
+ } else if (evsel__match(evsel, HARDWARE, HW_BRANCH_MISSES)) {
if (runtime_stat_n(st, STAT_BRANCHES, ctx, cpu) != 0)
print_branch_misses(config, cpu, evsel, avg, out, st);
else
@@ -907,7 +922,7 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config,
print_ll_cache_misses(config, cpu, evsel, avg, out, st);
else
print_metric(config, ctxp, NULL, NULL, "of all LL-cache hits", 0);
- } else if (perf_evsel__match(evsel, HARDWARE, HW_CACHE_MISSES)) {
+ } else if (evsel__match(evsel, HARDWARE, HW_CACHE_MISSES)) {
total = runtime_stat_avg(st, STAT_CACHEREFS, ctx, cpu);
if (total)
@@ -918,11 +933,11 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config,
"of all cache refs", ratio);
else
print_metric(config, ctxp, NULL, NULL, "of all cache refs", 0);
- } else if (perf_evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES_FRONTEND)) {
+ } else if (evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES_FRONTEND)) {
print_stalled_cycles_frontend(config, cpu, evsel, avg, out, st);
- } else if (perf_evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES_BACKEND)) {
+ } else if (evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES_BACKEND)) {
print_stalled_cycles_backend(config, cpu, evsel, avg, out, st);
- } else if (perf_evsel__match(evsel, HARDWARE, HW_CPU_CYCLES)) {
+ } else if (evsel__match(evsel, HARDWARE, HW_CPU_CYCLES)) {
total = runtime_stat_avg(st, STAT_NSECS, 0, cpu);
if (total) {
@@ -973,7 +988,7 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config,
ratio = total / avg;
print_metric(config, ctxp, NULL, "%8.0f", "cycles / elision", ratio);
- } else if (perf_evsel__is_clock(evsel)) {
+ } else if (evsel__is_clock(evsel)) {
if ((ratio = avg_stats(&walltime_nsecs_stats)) != 0)
print_metric(config, ctxp, NULL, "%8.3f", "CPUs utilized",
avg / (ratio * evsel->scale));
@@ -1021,7 +1036,7 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config,
print_metric(config, ctxp, NULL, NULL, name, 0);
} else if (evsel->metric_expr) {
generic_metric(config, evsel->metric_expr, evsel->metric_events, evsel->name,
- evsel->metric_name, NULL, avg, cpu, out, st);
+ evsel->metric_name, NULL, 1, cpu, out, st);
} else if (runtime_stat_n(st, STAT_NSECS, 0, cpu) != 0) {
char unit = 'M';
char unit_buf[10];
@@ -1050,7 +1065,7 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config,
out->new_line(config, ctxp);
generic_metric(config, mexp->metric_expr, mexp->metric_events,
evsel->name, mexp->metric_name,
- mexp->metric_unit, avg, cpu, out, st);
+ mexp->metric_unit, mexp->runtime, cpu, out, st);
}
}
if (num == 0)
diff --git a/tools/perf/util/stat.c b/tools/perf/util/stat.c
index 5f26137b8d60..cdb154381a87 100644
--- a/tools/perf/util/stat.c
+++ b/tools/perf/util/stat.c
@@ -108,14 +108,14 @@ static void perf_stat_evsel_id_init(struct evsel *evsel)
/* ps->id is 0 hence PERF_STAT_EVSEL_ID__NONE by default */
for (i = 0; i < PERF_STAT_EVSEL_ID__MAX; i++) {
- if (!strcmp(perf_evsel__name(evsel), id_str[i])) {
+ if (!strcmp(evsel__name(evsel), id_str[i])) {
ps->id = i;
break;
}
}
}
-static void perf_evsel__reset_stat_priv(struct evsel *evsel)
+static void evsel__reset_stat_priv(struct evsel *evsel)
{
int i;
struct perf_stat_evsel *ps = evsel->stats;
@@ -126,16 +126,16 @@ static void perf_evsel__reset_stat_priv(struct evsel *evsel)
perf_stat_evsel_id_init(evsel);
}
-static int perf_evsel__alloc_stat_priv(struct evsel *evsel)
+static int evsel__alloc_stat_priv(struct evsel *evsel)
{
evsel->stats = zalloc(sizeof(struct perf_stat_evsel));
if (evsel->stats == NULL)
return -ENOMEM;
- perf_evsel__reset_stat_priv(evsel);
+ evsel__reset_stat_priv(evsel);
return 0;
}
-static void perf_evsel__free_stat_priv(struct evsel *evsel)
+static void evsel__free_stat_priv(struct evsel *evsel)
{
struct perf_stat_evsel *ps = evsel->stats;
@@ -144,8 +144,7 @@ static void perf_evsel__free_stat_priv(struct evsel *evsel)
zfree(&evsel->stats);
}
-static int perf_evsel__alloc_prev_raw_counts(struct evsel *evsel,
- int ncpus, int nthreads)
+static int evsel__alloc_prev_raw_counts(struct evsel *evsel, int ncpus, int nthreads)
{
struct perf_counts *counts;
@@ -156,29 +155,26 @@ static int perf_evsel__alloc_prev_raw_counts(struct evsel *evsel,
return counts ? 0 : -ENOMEM;
}
-static void perf_evsel__free_prev_raw_counts(struct evsel *evsel)
+static void evsel__free_prev_raw_counts(struct evsel *evsel)
{
perf_counts__delete(evsel->prev_raw_counts);
evsel->prev_raw_counts = NULL;
}
-static void perf_evsel__reset_prev_raw_counts(struct evsel *evsel)
+static void evsel__reset_prev_raw_counts(struct evsel *evsel)
{
- if (evsel->prev_raw_counts) {
- evsel->prev_raw_counts->aggr.val = 0;
- evsel->prev_raw_counts->aggr.ena = 0;
- evsel->prev_raw_counts->aggr.run = 0;
- }
+ if (evsel->prev_raw_counts)
+ perf_counts__reset(evsel->prev_raw_counts);
}
-static int perf_evsel__alloc_stats(struct evsel *evsel, bool alloc_raw)
+static int evsel__alloc_stats(struct evsel *evsel, bool alloc_raw)
{
- int ncpus = perf_evsel__nr_cpus(evsel);
+ int ncpus = evsel__nr_cpus(evsel);
int nthreads = perf_thread_map__nr(evsel->core.threads);
- if (perf_evsel__alloc_stat_priv(evsel) < 0 ||
- perf_evsel__alloc_counts(evsel, ncpus, nthreads) < 0 ||
- (alloc_raw && perf_evsel__alloc_prev_raw_counts(evsel, ncpus, nthreads) < 0))
+ if (evsel__alloc_stat_priv(evsel) < 0 ||
+ evsel__alloc_counts(evsel, ncpus, nthreads) < 0 ||
+ (alloc_raw && evsel__alloc_prev_raw_counts(evsel, ncpus, nthreads) < 0))
return -ENOMEM;
return 0;
@@ -189,7 +185,7 @@ int perf_evlist__alloc_stats(struct evlist *evlist, bool alloc_raw)
struct evsel *evsel;
evlist__for_each_entry(evlist, evsel) {
- if (perf_evsel__alloc_stats(evsel, alloc_raw))
+ if (evsel__alloc_stats(evsel, alloc_raw))
goto out_free;
}
@@ -205,9 +201,9 @@ void perf_evlist__free_stats(struct evlist *evlist)
struct evsel *evsel;
evlist__for_each_entry(evlist, evsel) {
- perf_evsel__free_stat_priv(evsel);
- perf_evsel__free_counts(evsel);
- perf_evsel__free_prev_raw_counts(evsel);
+ evsel__free_stat_priv(evsel);
+ evsel__free_counts(evsel);
+ evsel__free_prev_raw_counts(evsel);
}
}
@@ -216,8 +212,8 @@ void perf_evlist__reset_stats(struct evlist *evlist)
struct evsel *evsel;
evlist__for_each_entry(evlist, evsel) {
- perf_evsel__reset_stat_priv(evsel);
- perf_evsel__reset_counts(evsel);
+ evsel__reset_stat_priv(evsel);
+ evsel__reset_counts(evsel);
}
}
@@ -226,7 +222,51 @@ void perf_evlist__reset_prev_raw_counts(struct evlist *evlist)
struct evsel *evsel;
evlist__for_each_entry(evlist, evsel)
- perf_evsel__reset_prev_raw_counts(evsel);
+ evsel__reset_prev_raw_counts(evsel);
+}
+
+static void perf_evsel__copy_prev_raw_counts(struct evsel *evsel)
+{
+ int ncpus = evsel__nr_cpus(evsel);
+ int nthreads = perf_thread_map__nr(evsel->core.threads);
+
+ for (int thread = 0; thread < nthreads; thread++) {
+ for (int cpu = 0; cpu < ncpus; cpu++) {
+ *perf_counts(evsel->counts, cpu, thread) =
+ *perf_counts(evsel->prev_raw_counts, cpu,
+ thread);
+ }
+ }
+
+ evsel->counts->aggr = evsel->prev_raw_counts->aggr;
+}
+
+void perf_evlist__copy_prev_raw_counts(struct evlist *evlist)
+{
+ struct evsel *evsel;
+
+ evlist__for_each_entry(evlist, evsel)
+ perf_evsel__copy_prev_raw_counts(evsel);
+}
+
+void perf_evlist__save_aggr_prev_raw_counts(struct evlist *evlist)
+{
+ struct evsel *evsel;
+
+ /*
+ * To collect the overall statistics for interval mode,
+ * we copy the counts from evsel->prev_raw_counts to
+ * evsel->counts. The perf_stat_process_counter creates
+ * aggr values from per cpu values, but the per cpu values
+ * are 0 for AGGR_GLOBAL. So we use a trick that saves the
+ * previous aggr value to the first member of perf_counts,
+ * then aggr calculation in process_counter_values can work
+ * correctly.
+ */
+ evlist__for_each_entry(evlist, evsel) {
+ *perf_counts(evsel->prev_raw_counts, 0, 0) =
+ evsel->prev_raw_counts->aggr;
+ }
}
static void zero_per_pkg(struct evsel *counter)
@@ -302,7 +342,7 @@ process_counter_values(struct perf_stat_config *config, struct evsel *evsel,
case AGGR_NODE:
case AGGR_NONE:
if (!evsel->snapshot)
- perf_evsel__compute_deltas(evsel, cpu, thread, count);
+ evsel__compute_deltas(evsel, cpu, thread, count);
perf_counts_values__scale(count, config->scale, NULL);
if ((config->aggr_mode == AGGR_NONE) && (!evsel->percore)) {
perf_stat__update_shadow_stats(evsel, count->val,
@@ -334,7 +374,7 @@ static int process_counter_maps(struct perf_stat_config *config,
struct evsel *counter)
{
int nthreads = perf_thread_map__nr(counter->core.threads);
- int ncpus = perf_evsel__nr_cpus(counter);
+ int ncpus = evsel__nr_cpus(counter);
int cpu, thread;
if (counter->core.system_wide)
@@ -368,8 +408,10 @@ int perf_stat_process_counter(struct perf_stat_config *config,
* interval mode, otherwise overall avg running
* averages will be shown for each interval.
*/
- if (config->interval)
- init_stats(ps->res_stats);
+ if (config->interval || config->summary) {
+ for (i = 0; i < 3; i++)
+ init_stats(&ps->res_stats[i]);
+ }
if (counter->per_pkg)
zero_per_pkg(counter);
@@ -382,7 +424,7 @@ int perf_stat_process_counter(struct perf_stat_config *config,
return 0;
if (!counter->snapshot)
- perf_evsel__compute_deltas(counter, -1, -1, aggr);
+ evsel__compute_deltas(counter, -1, -1, aggr);
perf_counts_values__scale(aggr, config->scale, &counter->counts->scaled);
for (i = 0; i < 3; i++)
@@ -390,7 +432,7 @@ int perf_stat_process_counter(struct perf_stat_config *config,
if (verbose > 0) {
fprintf(config->output, "%s: %" PRIu64 " %" PRIu64 " %" PRIu64 "\n",
- perf_evsel__name(counter), count[0], count[1], count[2]);
+ evsel__name(counter), count[0], count[1], count[2]);
}
/*
@@ -507,7 +549,7 @@ int create_perf_stat_counter(struct evsel *evsel,
* either manually by us or by kernel via enable_on_exec
* set later.
*/
- if (perf_evsel__is_group_leader(evsel)) {
+ if (evsel__is_group_leader(evsel)) {
attr->disabled = 1;
/*
@@ -519,7 +561,7 @@ int create_perf_stat_counter(struct evsel *evsel,
}
if (target__has_cpu(target) && !target__has_per_thread(target))
- return perf_evsel__open_per_cpu(evsel, evsel__cpus(evsel), cpu);
+ return evsel__open_per_cpu(evsel, evsel__cpus(evsel), cpu);
- return perf_evsel__open_per_thread(evsel, evsel->core.threads);
+ return evsel__open_per_thread(evsel, evsel->core.threads);
}
diff --git a/tools/perf/util/stat.h b/tools/perf/util/stat.h
index fb990efa54a8..f75ae679eb28 100644
--- a/tools/perf/util/stat.h
+++ b/tools/perf/util/stat.h
@@ -109,6 +109,10 @@ struct perf_stat_config {
bool walltime_run_table;
bool all_kernel;
bool all_user;
+ bool percore_show_thread;
+ bool summary;
+ bool metric_no_group;
+ bool metric_no_merge;
FILE *output;
unsigned int interval;
unsigned int timeout;
@@ -131,6 +135,8 @@ struct perf_stat_config {
struct rblist metric_events;
};
+void perf_stat__set_big_num(int set);
+
void update_stats(struct stats *stats, u64 val);
double avg_stats(struct stats *stats);
double stddev_stats(struct stats *stats);
@@ -197,6 +203,8 @@ int perf_evlist__alloc_stats(struct evlist *evlist, bool alloc_raw);
void perf_evlist__free_stats(struct evlist *evlist);
void perf_evlist__reset_stats(struct evlist *evlist);
void perf_evlist__reset_prev_raw_counts(struct evlist *evlist);
+void perf_evlist__copy_prev_raw_counts(struct evlist *evlist);
+void perf_evlist__save_aggr_prev_raw_counts(struct evlist *evlist);
int perf_stat_process_counter(struct perf_stat_config *config,
struct evsel *counter);
diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c
index 1965aefccb02..5e43054bffea 100644
--- a/tools/perf/util/symbol-elf.c
+++ b/tools/perf/util/symbol-elf.c
@@ -704,9 +704,15 @@ void symsrc__destroy(struct symsrc *ss)
close(ss->fd);
}
-bool __weak elf__needs_adjust_symbols(GElf_Ehdr ehdr)
+bool elf__needs_adjust_symbols(GElf_Ehdr ehdr)
{
- return ehdr.e_type == ET_EXEC || ehdr.e_type == ET_REL;
+ /*
+ * Usually vmlinux is an ELF file with type ET_EXEC for most
+ * architectures; except Arm64 kernel is linked with option
+ * '-share', so need to check type ET_DYN.
+ */
+ return ehdr.e_type == ET_EXEC || ehdr.e_type == ET_REL ||
+ ehdr.e_type == ET_DYN;
}
int symsrc__init(struct symsrc *ss, struct dso *dso, const char *name,
@@ -1452,6 +1458,7 @@ struct kcore_copy_info {
u64 first_symbol;
u64 last_symbol;
u64 first_module;
+ u64 first_module_symbol;
u64 last_module_symbol;
size_t phnum;
struct list_head phdrs;
@@ -1528,6 +1535,8 @@ static int kcore_copy__process_kallsyms(void *arg, const char *name, char type,
return 0;
if (strchr(name, '[')) {
+ if (!kci->first_module_symbol || start < kci->first_module_symbol)
+ kci->first_module_symbol = start;
if (start > kci->last_module_symbol)
kci->last_module_symbol = start;
return 0;
@@ -1725,6 +1734,10 @@ static int kcore_copy__calc_maps(struct kcore_copy_info *kci, const char *dir,
kci->etext += page_size;
}
+ if (kci->first_module_symbol &&
+ (!kci->first_module || kci->first_module_symbol < kci->first_module))
+ kci->first_module = kci->first_module_symbol;
+
kci->first_module = round_down(kci->first_module, page_size);
if (kci->last_module_symbol) {
diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c
index 26bc6a0096ce..5ddf84dcbae7 100644
--- a/tools/perf/util/symbol.c
+++ b/tools/perf/util/symbol.c
@@ -79,6 +79,7 @@ static enum dso_binary_type binary_type_symtab[] = {
DSO_BINARY_TYPE__SYSTEM_PATH_KMODULE,
DSO_BINARY_TYPE__SYSTEM_PATH_KMODULE_COMP,
DSO_BINARY_TYPE__OPENEMBEDDED_DEBUGINFO,
+ DSO_BINARY_TYPE__MIXEDUP_UBUNTU_DEBUGINFO,
DSO_BINARY_TYPE__NOT_FOUND,
};
@@ -566,6 +567,20 @@ void dso__sort_by_name(struct dso *dso)
return symbols__sort_by_name(&dso->symbol_names, &dso->symbols);
}
+/*
+ * While we find nice hex chars, build a long_val.
+ * Return number of chars processed.
+ */
+static int hex2u64(const char *ptr, u64 *long_val)
+{
+ char *p;
+
+ *long_val = strtoull(ptr, &p, 16);
+
+ return p - ptr;
+}
+
+
int modules__parse(const char *filename, void *arg,
int (*process_module)(void *arg, const char *name,
u64 start, u64 size))
@@ -1209,6 +1224,7 @@ int maps__merge_in(struct maps *kmaps, struct map *new_map)
m->end = old_map->start;
list_add_tail(&m->node, &merged);
+ new_map->pgoff += old_map->end - new_map->start;
new_map->start = old_map->end;
}
} else {
@@ -1229,6 +1245,7 @@ int maps__merge_in(struct maps *kmaps, struct map *new_map)
* |new......| -> |new...|
* |old....| -> |old....|
*/
+ new_map->pgoff += old_map->end - new_map->start;
new_map->start = old_map->end;
}
}
@@ -1515,6 +1532,7 @@ static bool dso__is_compatible_symtab_type(struct dso *dso, bool kmod,
case DSO_BINARY_TYPE__SYSTEM_PATH_DSO:
case DSO_BINARY_TYPE__FEDORA_DEBUGINFO:
case DSO_BINARY_TYPE__UBUNTU_DEBUGINFO:
+ case DSO_BINARY_TYPE__MIXEDUP_UBUNTU_DEBUGINFO:
case DSO_BINARY_TYPE__BUILDID_DEBUGINFO:
case DSO_BINARY_TYPE__OPENEMBEDDED_DEBUGINFO:
return !kmod && dso->kernel == DSO_TYPE_USER;
@@ -1544,6 +1562,7 @@ static bool dso__is_compatible_symtab_type(struct dso *dso, bool kmod,
return true;
case DSO_BINARY_TYPE__BPF_PROG_INFO:
+ case DSO_BINARY_TYPE__BPF_IMAGE:
case DSO_BINARY_TYPE__NOT_FOUND:
default:
return false;
diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h
index 93fc43db1be3..ff4f4c47e148 100644
--- a/tools/perf/util/symbol.h
+++ b/tools/perf/util/symbol.h
@@ -55,7 +55,7 @@ struct symbol {
u8 inlined:1;
u8 arch_sym;
bool annotate2;
- char name[0];
+ char name[];
};
void symbol__delete(struct symbol *sym);
diff --git a/tools/perf/util/symbol_conf.h b/tools/perf/util/symbol_conf.h
index 10f1ec3e0349..b916afb95ec5 100644
--- a/tools/perf/util/symbol_conf.h
+++ b/tools/perf/util/symbol_conf.h
@@ -73,6 +73,7 @@ struct symbol_conf {
const char *symfs;
int res_sample;
int pad_output_len_dso;
+ int group_sort_idx;
};
extern struct symbol_conf symbol_conf;
diff --git a/tools/perf/util/synthetic-events.c b/tools/perf/util/synthetic-events.c
index c423298fe62d..89b390623b63 100644
--- a/tools/perf/util/synthetic-events.c
+++ b/tools/perf/util/synthetic-events.c
@@ -16,6 +16,7 @@
#include "util/synthetic-events.h"
#include "util/target.h"
#include "util/time-utils.h"
+#include "util/cgroup.h"
#include <linux/bitops.h>
#include <linux/kernel.h>
#include <linux/string.h>
@@ -36,6 +37,7 @@
#include <string.h>
#include <uapi/linux/mman.h> /* To get things like MAP_HUGETLB even on older libc headers */
#include <api/fs/fs.h>
+#include <api/io.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
@@ -70,7 +72,6 @@ int perf_tool__process_synth_event(struct perf_tool *tool,
static int perf_event__get_comm_ids(pid_t pid, char *comm, size_t len,
pid_t *tgid, pid_t *ppid)
{
- char filename[PATH_MAX];
char bf[4096];
int fd;
size_t size = 0;
@@ -80,11 +81,11 @@ static int perf_event__get_comm_ids(pid_t pid, char *comm, size_t len,
*tgid = -1;
*ppid = -1;
- snprintf(filename, sizeof(filename), "/proc/%d/status", pid);
+ snprintf(bf, sizeof(bf), "/proc/%d/status", pid);
- fd = open(filename, O_RDONLY);
+ fd = open(bf, O_RDONLY);
if (fd < 0) {
- pr_debug("couldn't open %s\n", filename);
+ pr_debug("couldn't open %s\n", bf);
return -1;
}
@@ -273,6 +274,79 @@ static int perf_event__synthesize_fork(struct perf_tool *tool,
return 0;
}
+static bool read_proc_maps_line(struct io *io, __u64 *start, __u64 *end,
+ u32 *prot, u32 *flags, __u64 *offset,
+ u32 *maj, u32 *min,
+ __u64 *inode,
+ ssize_t pathname_size, char *pathname)
+{
+ __u64 temp;
+ int ch;
+ char *start_pathname = pathname;
+
+ if (io__get_hex(io, start) != '-')
+ return false;
+ if (io__get_hex(io, end) != ' ')
+ return false;
+
+ /* map protection and flags bits */
+ *prot = 0;
+ ch = io__get_char(io);
+ if (ch == 'r')
+ *prot |= PROT_READ;
+ else if (ch != '-')
+ return false;
+ ch = io__get_char(io);
+ if (ch == 'w')
+ *prot |= PROT_WRITE;
+ else if (ch != '-')
+ return false;
+ ch = io__get_char(io);
+ if (ch == 'x')
+ *prot |= PROT_EXEC;
+ else if (ch != '-')
+ return false;
+ ch = io__get_char(io);
+ if (ch == 's')
+ *flags = MAP_SHARED;
+ else if (ch == 'p')
+ *flags = MAP_PRIVATE;
+ else
+ return false;
+ if (io__get_char(io) != ' ')
+ return false;
+
+ if (io__get_hex(io, offset) != ' ')
+ return false;
+
+ if (io__get_hex(io, &temp) != ':')
+ return false;
+ *maj = temp;
+ if (io__get_hex(io, &temp) != ' ')
+ return false;
+ *min = temp;
+
+ ch = io__get_dec(io, inode);
+ if (ch != ' ') {
+ *pathname = '\0';
+ return ch == '\n';
+ }
+ do {
+ ch = io__get_char(io);
+ } while (ch == ' ');
+ while (true) {
+ if (ch < 0)
+ return false;
+ if (ch == '\0' || ch == '\n' ||
+ (pathname + 1 - start_pathname) >= pathname_size) {
+ *pathname = '\0';
+ return true;
+ }
+ *pathname++ = ch;
+ ch = io__get_char(io);
+ }
+}
+
int perf_event__synthesize_mmap_events(struct perf_tool *tool,
union perf_event *event,
pid_t pid, pid_t tgid,
@@ -280,9 +354,9 @@ int perf_event__synthesize_mmap_events(struct perf_tool *tool,
struct machine *machine,
bool mmap_data)
{
- char filename[PATH_MAX];
- FILE *fp;
unsigned long long t;
+ char bf[BUFSIZ];
+ struct io io;
bool truncation = false;
unsigned long long timeout = proc_map_timeout * 1000000ULL;
int rc = 0;
@@ -292,59 +366,53 @@ int perf_event__synthesize_mmap_events(struct perf_tool *tool,
if (machine__is_default_guest(machine))
return 0;
- snprintf(filename, sizeof(filename), "%s/proc/%d/task/%d/maps",
- machine->root_dir, pid, pid);
+ snprintf(bf, sizeof(bf), "%s/proc/%d/task/%d/maps",
+ machine->root_dir, pid, pid);
- fp = fopen(filename, "r");
- if (fp == NULL) {
+ io.fd = open(bf, O_RDONLY, 0);
+ if (io.fd < 0) {
/*
* We raced with a task exiting - just return:
*/
- pr_debug("couldn't open %s\n", filename);
+ pr_debug("couldn't open %s\n", bf);
return -1;
}
+ io__init(&io, io.fd, bf, sizeof(bf));
event->header.type = PERF_RECORD_MMAP2;
t = rdclock();
- while (1) {
- char bf[BUFSIZ];
- char prot[5];
- char execname[PATH_MAX];
- char anonstr[] = "//anon";
- unsigned int ino;
+ while (!io.eof) {
+ static const char anonstr[] = "//anon";
size_t size;
- ssize_t n;
- if (fgets(bf, sizeof(bf), fp) == NULL)
- break;
+ /* ensure null termination since stack will be reused. */
+ event->mmap2.filename[0] = '\0';
+
+ /* 00400000-0040c000 r-xp 00000000 fd:01 41038 /bin/cat */
+ if (!read_proc_maps_line(&io,
+ &event->mmap2.start,
+ &event->mmap2.len,
+ &event->mmap2.prot,
+ &event->mmap2.flags,
+ &event->mmap2.pgoff,
+ &event->mmap2.maj,
+ &event->mmap2.min,
+ &event->mmap2.ino,
+ sizeof(event->mmap2.filename),
+ event->mmap2.filename))
+ continue;
if ((rdclock() - t) > timeout) {
- pr_warning("Reading %s time out. "
+ pr_warning("Reading %s/proc/%d/task/%d/maps time out. "
"You may want to increase "
"the time limit by --proc-map-timeout\n",
- filename);
+ machine->root_dir, pid, pid);
truncation = true;
goto out;
}
- /* ensure null termination since stack will be reused. */
- strcpy(execname, "");
-
- /* 00400000-0040c000 r-xp 00000000 fd:01 41038 /bin/cat */
- n = sscanf(bf, "%"PRI_lx64"-%"PRI_lx64" %s %"PRI_lx64" %x:%x %u %[^\n]\n",
- &event->mmap2.start, &event->mmap2.len, prot,
- &event->mmap2.pgoff, &event->mmap2.maj,
- &event->mmap2.min,
- &ino, execname);
-
- /*
- * Anon maps don't have the execname.
- */
- if (n < 7)
- continue;
-
- event->mmap2.ino = (u64)ino;
+ event->mmap2.ino_generation = 0;
/*
* Just like the kernel, see __perf_event_mmap in kernel/perf_event.c
@@ -354,23 +422,8 @@ int perf_event__synthesize_mmap_events(struct perf_tool *tool,
else
event->header.misc = PERF_RECORD_MISC_GUEST_USER;
- /* map protection and flags bits */
- event->mmap2.prot = 0;
- event->mmap2.flags = 0;
- if (prot[0] == 'r')
- event->mmap2.prot |= PROT_READ;
- if (prot[1] == 'w')
- event->mmap2.prot |= PROT_WRITE;
- if (prot[2] == 'x')
- event->mmap2.prot |= PROT_EXEC;
-
- if (prot[3] == 's')
- event->mmap2.flags |= MAP_SHARED;
- else
- event->mmap2.flags |= MAP_PRIVATE;
-
- if (prot[2] != 'x') {
- if (!mmap_data || prot[0] != 'r')
+ if ((event->mmap2.prot & PROT_EXEC) == 0) {
+ if (!mmap_data || (event->mmap2.prot & PROT_READ) == 0)
continue;
event->header.misc |= PERF_RECORD_MISC_MMAP_DATA;
@@ -380,17 +433,17 @@ out:
if (truncation)
event->header.misc |= PERF_RECORD_MISC_PROC_MAP_PARSE_TIMEOUT;
- if (!strcmp(execname, ""))
- strcpy(execname, anonstr);
+ if (!strcmp(event->mmap2.filename, ""))
+ strcpy(event->mmap2.filename, anonstr);
if (hugetlbfs_mnt_len &&
- !strncmp(execname, hugetlbfs_mnt, hugetlbfs_mnt_len)) {
- strcpy(execname, anonstr);
+ !strncmp(event->mmap2.filename, hugetlbfs_mnt,
+ hugetlbfs_mnt_len)) {
+ strcpy(event->mmap2.filename, anonstr);
event->mmap2.flags |= MAP_HUGETLB;
}
- size = strlen(execname) + 1;
- memcpy(event->mmap2.filename, execname, size);
+ size = strlen(event->mmap2.filename) + 1;
size = PERF_ALIGN(size, sizeof(u64));
event->mmap2.len -= event->mmap.start;
event->mmap2.header.size = (sizeof(event->mmap2) -
@@ -409,10 +462,131 @@ out:
break;
}
- fclose(fp);
+ close(io.fd);
return rc;
}
+#ifdef HAVE_FILE_HANDLE
+static int perf_event__synthesize_cgroup(struct perf_tool *tool,
+ union perf_event *event,
+ char *path, size_t mount_len,
+ perf_event__handler_t process,
+ struct machine *machine)
+{
+ size_t event_size = sizeof(event->cgroup) - sizeof(event->cgroup.path);
+ size_t path_len = strlen(path) - mount_len + 1;
+ struct {
+ struct file_handle fh;
+ uint64_t cgroup_id;
+ } handle;
+ int mount_id;
+
+ while (path_len % sizeof(u64))
+ path[mount_len + path_len++] = '\0';
+
+ memset(&event->cgroup, 0, event_size);
+
+ event->cgroup.header.type = PERF_RECORD_CGROUP;
+ event->cgroup.header.size = event_size + path_len + machine->id_hdr_size;
+
+ handle.fh.handle_bytes = sizeof(handle.cgroup_id);
+ if (name_to_handle_at(AT_FDCWD, path, &handle.fh, &mount_id, 0) < 0) {
+ pr_debug("stat failed: %s\n", path);
+ return -1;
+ }
+
+ event->cgroup.id = handle.cgroup_id;
+ strncpy(event->cgroup.path, path + mount_len, path_len);
+ memset(event->cgroup.path + path_len, 0, machine->id_hdr_size);
+
+ if (perf_tool__process_synth_event(tool, event, machine, process) < 0) {
+ pr_debug("process synth event failed\n");
+ return -1;
+ }
+
+ return 0;
+}
+
+static int perf_event__walk_cgroup_tree(struct perf_tool *tool,
+ union perf_event *event,
+ char *path, size_t mount_len,
+ perf_event__handler_t process,
+ struct machine *machine)
+{
+ size_t pos = strlen(path);
+ DIR *d;
+ struct dirent *dent;
+ int ret = 0;
+
+ if (perf_event__synthesize_cgroup(tool, event, path, mount_len,
+ process, machine) < 0)
+ return -1;
+
+ d = opendir(path);
+ if (d == NULL) {
+ pr_debug("failed to open directory: %s\n", path);
+ return -1;
+ }
+
+ while ((dent = readdir(d)) != NULL) {
+ if (dent->d_type != DT_DIR)
+ continue;
+ if (!strcmp(dent->d_name, ".") ||
+ !strcmp(dent->d_name, ".."))
+ continue;
+
+ /* any sane path should be less than PATH_MAX */
+ if (strlen(path) + strlen(dent->d_name) + 1 >= PATH_MAX)
+ continue;
+
+ if (path[pos - 1] != '/')
+ strcat(path, "/");
+ strcat(path, dent->d_name);
+
+ ret = perf_event__walk_cgroup_tree(tool, event, path,
+ mount_len, process, machine);
+ if (ret < 0)
+ break;
+
+ path[pos] = '\0';
+ }
+
+ closedir(d);
+ return ret;
+}
+
+int perf_event__synthesize_cgroups(struct perf_tool *tool,
+ perf_event__handler_t process,
+ struct machine *machine)
+{
+ union perf_event event;
+ char cgrp_root[PATH_MAX];
+ size_t mount_len; /* length of mount point in the path */
+
+ if (cgroupfs_find_mountpoint(cgrp_root, PATH_MAX, "perf_event") < 0) {
+ pr_debug("cannot find cgroup mount point\n");
+ return -1;
+ }
+
+ mount_len = strlen(cgrp_root);
+ /* make sure the path starts with a slash (after mount point) */
+ strcat(cgrp_root, "/");
+
+ if (perf_event__walk_cgroup_tree(tool, &event, cgrp_root, mount_len,
+ process, machine) < 0)
+ return -1;
+
+ return 0;
+}
+#else
+int perf_event__synthesize_cgroups(struct perf_tool *tool __maybe_unused,
+ perf_event__handler_t process __maybe_unused,
+ struct machine *machine __maybe_unused)
+{
+ return -1;
+}
+#endif
+
int perf_event__synthesize_modules(struct perf_tool *tool, perf_event__handler_t process,
struct machine *machine)
{
@@ -1007,7 +1181,7 @@ void cpu_map_data__synthesize(struct perf_record_cpu_map_data *data, struct perf
synthesize_mask((struct perf_record_record_cpu_map *)data->data, map, max);
default:
break;
- };
+ }
}
static struct perf_record_cpu_map *cpu_map_event__new(struct perf_cpu_map *map)
@@ -1183,7 +1357,8 @@ size_t perf_event__sample_event_size(const struct perf_sample *sample, u64 type,
if (type & PERF_SAMPLE_BRANCH_STACK) {
sz = sample->branch_stack->nr * sizeof(struct branch_entry);
- sz += sizeof(u64);
+ /* nr, hw_idx */
+ sz += 2 * sizeof(u64);
result += sz;
}
@@ -1228,6 +1403,9 @@ size_t perf_event__sample_event_size(const struct perf_sample *sample, u64 type,
if (type & PERF_SAMPLE_PHYS_ADDR)
result += sizeof(u64);
+ if (type & PERF_SAMPLE_CGROUP)
+ result += sizeof(u64);
+
if (type & PERF_SAMPLE_AUX) {
result += sizeof(u64);
result += sample->aux_sample.size;
@@ -1344,7 +1522,8 @@ int perf_event__synthesize_sample(union perf_event *event, u64 type, u64 read_fo
if (type & PERF_SAMPLE_BRANCH_STACK) {
sz = sample->branch_stack->nr * sizeof(struct branch_entry);
- sz += sizeof(u64);
+ /* nr, hw_idx */
+ sz += 2 * sizeof(u64);
memcpy(array, sample->branch_stack, sz);
array = (void *)array + sz;
}
@@ -1401,6 +1580,11 @@ int perf_event__synthesize_sample(union perf_event *event, u64 type, u64 read_fo
array++;
}
+ if (type & PERF_SAMPLE_CGROUP) {
+ *array = sample->cgroup;
+ array++;
+ }
+
if (type & PERF_SAMPLE_AUX) {
sz = sample->aux_sample.size;
*array++ = sz;
diff --git a/tools/perf/util/synthetic-events.h b/tools/perf/util/synthetic-events.h
index baead0cdc381..e7a3e9589738 100644
--- a/tools/perf/util/synthetic-events.h
+++ b/tools/perf/util/synthetic-events.h
@@ -45,6 +45,7 @@ int perf_event__synthesize_kernel_mmap(struct perf_tool *tool, perf_event__handl
int perf_event__synthesize_mmap_events(struct perf_tool *tool, union perf_event *event, pid_t pid, pid_t tgid, perf_event__handler_t process, struct machine *machine, bool mmap_data);
int perf_event__synthesize_modules(struct perf_tool *tool, perf_event__handler_t process, struct machine *machine);
int perf_event__synthesize_namespaces(struct perf_tool *tool, union perf_event *event, pid_t pid, pid_t tgid, perf_event__handler_t process, struct machine *machine);
+int perf_event__synthesize_cgroups(struct perf_tool *tool, perf_event__handler_t process, struct machine *machine);
int perf_event__synthesize_sample(union perf_event *event, u64 type, u64 read_format, const struct perf_sample *sample);
int perf_event__synthesize_stat_config(struct perf_tool *tool, struct perf_stat_config *config, perf_event__handler_t process, struct machine *machine);
int perf_event__synthesize_stat_events(struct perf_stat_config *config, struct perf_tool *tool, struct evlist *evlist, perf_event__handler_t process, bool attrs);
diff --git a/tools/perf/util/syscalltbl.c b/tools/perf/util/syscalltbl.c
index 820fceeb19a9..03bd99d3be16 100644
--- a/tools/perf/util/syscalltbl.c
+++ b/tools/perf/util/syscalltbl.c
@@ -8,9 +8,9 @@
#include "syscalltbl.h"
#include <stdlib.h>
#include <linux/compiler.h>
+#include <linux/zalloc.h>
#ifdef HAVE_SYSCALL_TABLE_SUPPORT
-#include <linux/zalloc.h>
#include <string.h>
#include "string2.h"
@@ -142,7 +142,7 @@ int syscalltbl__strglobmatch_first(struct syscalltbl *tbl, const char *syscall_g
struct syscalltbl *syscalltbl__new(void)
{
- struct syscalltbl *tbl = malloc(sizeof(*tbl));
+ struct syscalltbl *tbl = zalloc(sizeof(*tbl));
if (tbl)
tbl->audit_machine = audit_detect_machine();
return tbl;
diff --git a/tools/perf/util/syscalltbl.h b/tools/perf/util/syscalltbl.h
index 9172613028d0..a41d2ca9e4ae 100644
--- a/tools/perf/util/syscalltbl.h
+++ b/tools/perf/util/syscalltbl.h
@@ -3,14 +3,12 @@
#define __PERF_SYSCALLTBL_H
struct syscalltbl {
- union {
- int audit_machine;
- struct {
- int max_id;
- int nr_entries;
- void *entries;
- } syscalls;
- };
+ int audit_machine;
+ struct {
+ int max_id;
+ int nr_entries;
+ void *entries;
+ } syscalls;
};
struct syscalltbl *syscalltbl__new(void);
diff --git a/tools/perf/util/thread-stack.c b/tools/perf/util/thread-stack.c
index 0885967d5bc3..1b992bbba4e8 100644
--- a/tools/perf/util/thread-stack.c
+++ b/tools/perf/util/thread-stack.c
@@ -80,6 +80,10 @@ struct thread_stack_entry {
* @comm: current comm
* @arr_sz: size of array if this is the first element of an array
* @rstate: used to detect retpolines
+ * @br_stack_rb: branch stack (ring buffer)
+ * @br_stack_sz: maximum branch stack size
+ * @br_stack_pos: current position in @br_stack_rb
+ * @mispred_all: mark all branches as mispredicted
*/
struct thread_stack {
struct thread_stack_entry *stack;
@@ -95,6 +99,10 @@ struct thread_stack {
struct comm *comm;
unsigned int arr_sz;
enum retpoline_state_t rstate;
+ struct branch_stack *br_stack_rb;
+ unsigned int br_stack_sz;
+ unsigned int br_stack_pos;
+ bool mispred_all;
};
/*
@@ -126,13 +134,26 @@ static int thread_stack__grow(struct thread_stack *ts)
}
static int thread_stack__init(struct thread_stack *ts, struct thread *thread,
- struct call_return_processor *crp)
+ struct call_return_processor *crp,
+ bool callstack, unsigned int br_stack_sz)
{
int err;
- err = thread_stack__grow(ts);
- if (err)
- return err;
+ if (callstack) {
+ err = thread_stack__grow(ts);
+ if (err)
+ return err;
+ }
+
+ if (br_stack_sz) {
+ size_t sz = sizeof(struct branch_stack);
+
+ sz += br_stack_sz * sizeof(struct branch_entry);
+ ts->br_stack_rb = zalloc(sz);
+ if (!ts->br_stack_rb)
+ return -ENOMEM;
+ ts->br_stack_sz = br_stack_sz;
+ }
if (thread->maps && thread->maps->machine) {
struct machine *machine = thread->maps->machine;
@@ -150,7 +171,9 @@ static int thread_stack__init(struct thread_stack *ts, struct thread *thread,
}
static struct thread_stack *thread_stack__new(struct thread *thread, int cpu,
- struct call_return_processor *crp)
+ struct call_return_processor *crp,
+ bool callstack,
+ unsigned int br_stack_sz)
{
struct thread_stack *ts = thread->ts, *new_ts;
unsigned int old_sz = ts ? ts->arr_sz : 0;
@@ -176,7 +199,7 @@ static struct thread_stack *thread_stack__new(struct thread *thread, int cpu,
ts += cpu;
if (!ts->stack &&
- thread_stack__init(ts, thread, crp))
+ thread_stack__init(ts, thread, crp, callstack, br_stack_sz))
return NULL;
return ts;
@@ -319,6 +342,9 @@ static int __thread_stack__flush(struct thread *thread, struct thread_stack *ts)
if (!crp) {
ts->cnt = 0;
+ ts->br_stack_pos = 0;
+ if (ts->br_stack_rb)
+ ts->br_stack_rb->nr = 0;
return 0;
}
@@ -353,8 +379,33 @@ int thread_stack__flush(struct thread *thread)
return err;
}
+static void thread_stack__update_br_stack(struct thread_stack *ts, u32 flags,
+ u64 from_ip, u64 to_ip)
+{
+ struct branch_stack *bs = ts->br_stack_rb;
+ struct branch_entry *be;
+
+ if (!ts->br_stack_pos)
+ ts->br_stack_pos = ts->br_stack_sz;
+
+ ts->br_stack_pos -= 1;
+
+ be = &bs->entries[ts->br_stack_pos];
+ be->from = from_ip;
+ be->to = to_ip;
+ be->flags.value = 0;
+ be->flags.abort = !!(flags & PERF_IP_FLAG_TX_ABORT);
+ be->flags.in_tx = !!(flags & PERF_IP_FLAG_IN_TX);
+ /* No support for mispredict */
+ be->flags.mispred = ts->mispred_all;
+
+ if (bs->nr < ts->br_stack_sz)
+ bs->nr += 1;
+}
+
int thread_stack__event(struct thread *thread, int cpu, u32 flags, u64 from_ip,
- u64 to_ip, u16 insn_len, u64 trace_nr)
+ u64 to_ip, u16 insn_len, u64 trace_nr, bool callstack,
+ unsigned int br_stack_sz, bool mispred_all)
{
struct thread_stack *ts = thread__stack(thread, cpu);
@@ -362,12 +413,13 @@ int thread_stack__event(struct thread *thread, int cpu, u32 flags, u64 from_ip,
return -EINVAL;
if (!ts) {
- ts = thread_stack__new(thread, cpu, NULL);
+ ts = thread_stack__new(thread, cpu, NULL, callstack, br_stack_sz);
if (!ts) {
pr_warning("Out of memory: no thread stack\n");
return -ENOMEM;
}
ts->trace_nr = trace_nr;
+ ts->mispred_all = mispred_all;
}
/*
@@ -381,8 +433,14 @@ int thread_stack__event(struct thread *thread, int cpu, u32 flags, u64 from_ip,
ts->trace_nr = trace_nr;
}
- /* Stop here if thread_stack__process() is in use */
- if (ts->crp)
+ if (br_stack_sz)
+ thread_stack__update_br_stack(ts, flags, from_ip, to_ip);
+
+ /*
+ * Stop here if thread_stack__process() is in use, or not recording call
+ * stack.
+ */
+ if (ts->crp || !callstack)
return 0;
if (flags & PERF_IP_FLAG_CALL) {
@@ -430,6 +488,7 @@ static void __thread_stack__free(struct thread *thread, struct thread_stack *ts)
{
__thread_stack__flush(thread, ts);
zfree(&ts->stack);
+ zfree(&ts->br_stack_rb);
}
static void thread_stack__reset(struct thread *thread, struct thread_stack *ts)
@@ -497,6 +556,199 @@ void thread_stack__sample(struct thread *thread, int cpu,
chain->nr = i;
}
+/*
+ * Hardware sample records, created some time after the event occurred, need to
+ * have subsequent addresses removed from the call chain.
+ */
+void thread_stack__sample_late(struct thread *thread, int cpu,
+ struct ip_callchain *chain, size_t sz,
+ u64 sample_ip, u64 kernel_start)
+{
+ struct thread_stack *ts = thread__stack(thread, cpu);
+ u64 sample_context = callchain_context(sample_ip, kernel_start);
+ u64 last_context, context, ip;
+ size_t nr = 0, j;
+
+ if (sz < 2) {
+ chain->nr = 0;
+ return;
+ }
+
+ if (!ts)
+ goto out;
+
+ /*
+ * When tracing kernel space, kernel addresses occur at the top of the
+ * call chain after the event occurred but before tracing stopped.
+ * Skip them.
+ */
+ for (j = 1; j <= ts->cnt; j++) {
+ ip = ts->stack[ts->cnt - j].ret_addr;
+ context = callchain_context(ip, kernel_start);
+ if (context == PERF_CONTEXT_USER ||
+ (context == sample_context && ip == sample_ip))
+ break;
+ }
+
+ last_context = sample_ip; /* Use sample_ip as an invalid context */
+
+ for (; nr < sz && j <= ts->cnt; nr++, j++) {
+ ip = ts->stack[ts->cnt - j].ret_addr;
+ context = callchain_context(ip, kernel_start);
+ if (context != last_context) {
+ if (nr >= sz - 1)
+ break;
+ chain->ips[nr++] = context;
+ last_context = context;
+ }
+ chain->ips[nr] = ip;
+ }
+out:
+ if (nr) {
+ chain->nr = nr;
+ } else {
+ chain->ips[0] = sample_context;
+ chain->ips[1] = sample_ip;
+ chain->nr = 2;
+ }
+}
+
+void thread_stack__br_sample(struct thread *thread, int cpu,
+ struct branch_stack *dst, unsigned int sz)
+{
+ struct thread_stack *ts = thread__stack(thread, cpu);
+ const size_t bsz = sizeof(struct branch_entry);
+ struct branch_stack *src;
+ struct branch_entry *be;
+ unsigned int nr;
+
+ dst->nr = 0;
+
+ if (!ts)
+ return;
+
+ src = ts->br_stack_rb;
+ if (!src->nr)
+ return;
+
+ dst->nr = min((unsigned int)src->nr, sz);
+
+ be = &dst->entries[0];
+ nr = min(ts->br_stack_sz - ts->br_stack_pos, (unsigned int)dst->nr);
+ memcpy(be, &src->entries[ts->br_stack_pos], bsz * nr);
+
+ if (src->nr >= ts->br_stack_sz) {
+ sz -= nr;
+ be = &dst->entries[nr];
+ nr = min(ts->br_stack_pos, sz);
+ memcpy(be, &src->entries[0], bsz * ts->br_stack_pos);
+ }
+}
+
+/* Start of user space branch entries */
+static bool us_start(struct branch_entry *be, u64 kernel_start, bool *start)
+{
+ if (!*start)
+ *start = be->to && be->to < kernel_start;
+
+ return *start;
+}
+
+/*
+ * Start of branch entries after the ip fell in between 2 branches, or user
+ * space branch entries.
+ */
+static bool ks_start(struct branch_entry *be, u64 sample_ip, u64 kernel_start,
+ bool *start, struct branch_entry *nb)
+{
+ if (!*start) {
+ *start = (nb && sample_ip >= be->to && sample_ip <= nb->from) ||
+ be->from < kernel_start ||
+ (be->to && be->to < kernel_start);
+ }
+
+ return *start;
+}
+
+/*
+ * Hardware sample records, created some time after the event occurred, need to
+ * have subsequent addresses removed from the branch stack.
+ */
+void thread_stack__br_sample_late(struct thread *thread, int cpu,
+ struct branch_stack *dst, unsigned int sz,
+ u64 ip, u64 kernel_start)
+{
+ struct thread_stack *ts = thread__stack(thread, cpu);
+ struct branch_entry *d, *s, *spos, *ssz;
+ struct branch_stack *src;
+ unsigned int nr = 0;
+ bool start = false;
+
+ dst->nr = 0;
+
+ if (!ts)
+ return;
+
+ src = ts->br_stack_rb;
+ if (!src->nr)
+ return;
+
+ spos = &src->entries[ts->br_stack_pos];
+ ssz = &src->entries[ts->br_stack_sz];
+
+ d = &dst->entries[0];
+ s = spos;
+
+ if (ip < kernel_start) {
+ /*
+ * User space sample: start copying branch entries when the
+ * branch is in user space.
+ */
+ for (s = spos; s < ssz && nr < sz; s++) {
+ if (us_start(s, kernel_start, &start)) {
+ *d++ = *s;
+ nr += 1;
+ }
+ }
+
+ if (src->nr >= ts->br_stack_sz) {
+ for (s = &src->entries[0]; s < spos && nr < sz; s++) {
+ if (us_start(s, kernel_start, &start)) {
+ *d++ = *s;
+ nr += 1;
+ }
+ }
+ }
+ } else {
+ struct branch_entry *nb = NULL;
+
+ /*
+ * Kernel space sample: start copying branch entries when the ip
+ * falls in between 2 branches (or the branch is in user space
+ * because then the start must have been missed).
+ */
+ for (s = spos; s < ssz && nr < sz; s++) {
+ if (ks_start(s, ip, kernel_start, &start, nb)) {
+ *d++ = *s;
+ nr += 1;
+ }
+ nb = s;
+ }
+
+ if (src->nr >= ts->br_stack_sz) {
+ for (s = &src->entries[0]; s < spos && nr < sz; s++) {
+ if (ks_start(s, ip, kernel_start, &start, nb)) {
+ *d++ = *s;
+ nr += 1;
+ }
+ nb = s;
+ }
+ }
+ }
+
+ dst->nr = nr;
+}
+
struct call_return_processor *
call_return_processor__new(int (*process)(struct call_return *cr, u64 *parent_db_id, void *data),
void *data)
@@ -864,7 +1116,7 @@ int thread_stack__process(struct thread *thread, struct comm *comm,
}
if (!ts) {
- ts = thread_stack__new(thread, sample->cpu, crp);
+ ts = thread_stack__new(thread, sample->cpu, crp, true, 0);
if (!ts)
return -ENOMEM;
ts->comm = comm;
diff --git a/tools/perf/util/thread-stack.h b/tools/perf/util/thread-stack.h
index e1ec5a58f1b2..3bc47a42af8e 100644
--- a/tools/perf/util/thread-stack.h
+++ b/tools/perf/util/thread-stack.h
@@ -81,10 +81,19 @@ struct call_return_processor {
};
int thread_stack__event(struct thread *thread, int cpu, u32 flags, u64 from_ip,
- u64 to_ip, u16 insn_len, u64 trace_nr);
+ u64 to_ip, u16 insn_len, u64 trace_nr, bool callstack,
+ unsigned int br_stack_sz, bool mispred_all);
void thread_stack__set_trace_nr(struct thread *thread, int cpu, u64 trace_nr);
void thread_stack__sample(struct thread *thread, int cpu, struct ip_callchain *chain,
size_t sz, u64 ip, u64 kernel_start);
+void thread_stack__sample_late(struct thread *thread, int cpu,
+ struct ip_callchain *chain, size_t sz, u64 ip,
+ u64 kernel_start);
+void thread_stack__br_sample(struct thread *thread, int cpu,
+ struct branch_stack *dst, unsigned int sz);
+void thread_stack__br_sample_late(struct thread *thread, int cpu,
+ struct branch_stack *dst, unsigned int sz,
+ u64 sample_ip, u64 kernel_start);
int thread_stack__flush(struct thread *thread);
void thread_stack__free(struct thread *thread);
size_t thread_stack__depth(struct thread *thread, int cpu);
diff --git a/tools/perf/util/thread.c b/tools/perf/util/thread.c
index 28b719388028..665e5c0618ed 100644
--- a/tools/perf/util/thread.c
+++ b/tools/perf/util/thread.c
@@ -47,6 +47,7 @@ struct thread *thread__new(pid_t pid, pid_t tid)
thread->tid = tid;
thread->ppid = -1;
thread->cpu = -1;
+ thread->lbr_stitch_enable = false;
INIT_LIST_HEAD(&thread->namespaces_list);
INIT_LIST_HEAD(&thread->comm_list);
init_rwsem(&thread->namespaces_lock);
@@ -110,6 +111,7 @@ void thread__delete(struct thread *thread)
exit_rwsem(&thread->namespaces_lock);
exit_rwsem(&thread->comm_lock);
+ thread__free_stitch_list(thread);
free(thread);
}
@@ -452,3 +454,25 @@ int thread__memcpy(struct thread *thread, struct machine *machine,
return dso__data_read_offset(al.map->dso, machine, offset, buf, len);
}
+
+void thread__free_stitch_list(struct thread *thread)
+{
+ struct lbr_stitch *lbr_stitch = thread->lbr_stitch;
+ struct stitch_list *pos, *tmp;
+
+ if (!lbr_stitch)
+ return;
+
+ list_for_each_entry_safe(pos, tmp, &lbr_stitch->lists, node) {
+ list_del_init(&pos->node);
+ free(pos);
+ }
+
+ list_for_each_entry_safe(pos, tmp, &lbr_stitch->free_lists, node) {
+ list_del_init(&pos->node);
+ free(pos);
+ }
+
+ zfree(&lbr_stitch->prev_lbr_cursor);
+ zfree(&thread->lbr_stitch);
+}
diff --git a/tools/perf/util/thread.h b/tools/perf/util/thread.h
index 20b96b5d1f15..b066fb30d203 100644
--- a/tools/perf/util/thread.h
+++ b/tools/perf/util/thread.h
@@ -13,6 +13,8 @@
#include <strlist.h>
#include <intlist.h>
#include "rwsem.h"
+#include "event.h"
+#include "callchain.h"
struct addr_location;
struct map;
@@ -20,6 +22,13 @@ struct perf_record_namespaces;
struct thread_stack;
struct unwind_libunwind_ops;
+struct lbr_stitch {
+ struct list_head lists;
+ struct list_head free_lists;
+ struct perf_sample prev_sample;
+ struct callchain_cursor_node *prev_lbr_cursor;
+};
+
struct thread {
union {
struct rb_node rb_node;
@@ -46,6 +55,10 @@ struct thread {
struct srccode_state srccode_state;
bool filter;
int filter_entry_depth;
+
+ /* LBR call stack stitch */
+ bool lbr_stitch_enable;
+ struct lbr_stitch *lbr_stitch;
};
struct machine;
@@ -142,4 +155,6 @@ static inline bool thread__is_filtered(struct thread *thread)
return false;
}
+void thread__free_stitch_list(struct thread *thread);
+
#endif /* __PERF_THREAD_H */
diff --git a/tools/perf/util/tool.h b/tools/perf/util/tool.h
index 2abbf668b8de..3fb67bd31e4a 100644
--- a/tools/perf/util/tool.h
+++ b/tools/perf/util/tool.h
@@ -46,6 +46,7 @@ struct perf_tool {
mmap2,
comm,
namespaces,
+ cgroup,
fork,
exit,
lost,
@@ -78,6 +79,7 @@ struct perf_tool {
bool ordered_events;
bool ordering_requires_timestamps;
bool namespace_events;
+ bool cgroup_events;
bool no_warn;
enum show_feature_header show_feat_hdr;
};
diff --git a/tools/perf/util/top.c b/tools/perf/util/top.c
index 3dce2de9d005..27945eeb0cb5 100644
--- a/tools/perf/util/top.c
+++ b/tools/perf/util/top.c
@@ -77,7 +77,7 @@ size_t perf_top__header_snprintf(struct perf_top *top, char *bf, size_t size)
opts->freq ? "Hz" : "");
}
- ret += SNPRINTF(bf + ret, size - ret, "%s", perf_evsel__name(top->sym_evsel));
+ ret += SNPRINTF(bf + ret, size - ret, "%s", evsel__name(top->sym_evsel));
ret += SNPRINTF(bf + ret, size - ret, "], ");
diff --git a/tools/perf/util/top.h b/tools/perf/util/top.h
index f117d4f4821e..ff8391208ecd 100644
--- a/tools/perf/util/top.h
+++ b/tools/perf/util/top.h
@@ -18,7 +18,7 @@ struct perf_session;
struct perf_top {
struct perf_tool tool;
- struct evlist *evlist;
+ struct evlist *evlist, *sb_evlist;
struct record_opts record_opts;
struct annotation_options annotation_opts;
struct evswitch evswitch;
@@ -36,6 +36,7 @@ struct perf_top {
bool use_tui, use_stdio;
bool vmlinux_warned;
bool dump_symtab;
+ bool stitch_lbr;
struct hist_entry *sym_filter_entry;
struct evsel *sym_evsel;
struct perf_session *session;
diff --git a/tools/perf/util/trace-event-info.c b/tools/perf/util/trace-event-info.c
index 086e98ff42a3..0e5c4786f296 100644
--- a/tools/perf/util/trace-event-info.c
+++ b/tools/perf/util/trace-event-info.c
@@ -428,7 +428,7 @@ try_id:
if (!ppath->next) {
error:
pr_debug("No memory to alloc tracepoints list\n");
- put_tracepoints_path(&path);
+ put_tracepoints_path(path.next);
return NULL;
}
next:
diff --git a/tools/perf/util/trace-event-read.c b/tools/perf/util/trace-event-read.c
index 8593d3c200c6..f507dff713c9 100644
--- a/tools/perf/util/trace-event-read.c
+++ b/tools/perf/util/trace-event-read.c
@@ -75,7 +75,7 @@ static void skip(int size)
r = size > BUFSIZ ? BUFSIZ : size;
do_read(buf, r);
size -= r;
- };
+ }
}
static unsigned int read4(struct tep_handle *pevent)
diff --git a/tools/perf/util/unwind-libunwind-local.c b/tools/perf/util/unwind-libunwind-local.c
index b4649f5a0c2f..9aededc0bc06 100644
--- a/tools/perf/util/unwind-libunwind-local.c
+++ b/tools/perf/util/unwind-libunwind-local.c
@@ -243,7 +243,7 @@ struct eh_frame_hdr {
* encoded_t fde_addr;
* } binary_search_table[fde_count];
*/
- char data[0];
+ char data[];
} __packed;
static int unwind_spec_ehframe(struct dso *dso, struct machine *machine,
diff --git a/tools/perf/util/util.c b/tools/perf/util/util.c
index 969ae560dad9..37a9492edb3e 100644
--- a/tools/perf/util/util.c
+++ b/tools/perf/util/util.c
@@ -55,6 +55,24 @@ int sysctl__max_stack(void)
return sysctl_perf_event_max_stack;
}
+bool sysctl__nmi_watchdog_enabled(void)
+{
+ static bool cached;
+ static bool nmi_watchdog;
+ int value;
+
+ if (cached)
+ return nmi_watchdog;
+
+ if (sysctl__read_int("kernel/nmi_watchdog", &value) < 0)
+ return false;
+
+ nmi_watchdog = (value > 0) ? true : false;
+ cached = true;
+
+ return nmi_watchdog;
+}
+
bool test_attr__enabled;
bool perf_host = true;
@@ -272,6 +290,7 @@ int perf_event_paranoid(void)
bool perf_event_paranoid_check(int max_level)
{
return perf_cap__capable(CAP_SYS_ADMIN) ||
+ perf_cap__capable(CAP_PERFMON) ||
perf_event_paranoid() <= max_level;
}
diff --git a/tools/perf/util/util.h b/tools/perf/util/util.h
index 9969b8b46f7c..f486fdd3a538 100644
--- a/tools/perf/util/util.h
+++ b/tools/perf/util/util.h
@@ -29,6 +29,8 @@ size_t hex_width(u64 v);
int sysctl__max_stack(void);
+bool sysctl__nmi_watchdog_enabled(void);
+
int fetch_kernel_version(unsigned int *puint,
char *str, size_t str_sz);
#define KVER_VERSION(x) (((x) >> 16) & 0xff)
diff --git a/tools/power/acpi/.gitignore b/tools/power/acpi/.gitignore
index f698a0e5bfa6..0b319fc8bb17 100644
--- a/tools/power/acpi/.gitignore
+++ b/tools/power/acpi/.gitignore
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
/acpidbg
/acpidump
/ec
diff --git a/tools/power/cpupower/.gitignore b/tools/power/cpupower/.gitignore
index 1f9977cc609c..7677329c42a6 100644
--- a/tools/power/cpupower/.gitignore
+++ b/tools/power/cpupower/.gitignore
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
.libs
libcpupower.so
libcpupower.so.*
diff --git a/tools/power/cpupower/utils/cpupower-info.c b/tools/power/cpupower/utils/cpupower-info.c
index d3755ea70d4d..0ba61a2c4d81 100644
--- a/tools/power/cpupower/utils/cpupower-info.c
+++ b/tools/power/cpupower/utils/cpupower-info.c
@@ -62,7 +62,7 @@ int cmd_info(int argc, char **argv)
default:
print_wrong_arg_exit();
}
- };
+ }
if (!params.params)
params.params = 0x7;
diff --git a/tools/power/cpupower/utils/cpupower-set.c b/tools/power/cpupower/utils/cpupower-set.c
index 3cca6f715dd9..052044d7e012 100644
--- a/tools/power/cpupower/utils/cpupower-set.c
+++ b/tools/power/cpupower/utils/cpupower-set.c
@@ -72,7 +72,7 @@ int cmd_set(int argc, char **argv)
default:
print_wrong_arg_exit();
}
- };
+ }
if (!params.params)
print_wrong_arg_exit();
diff --git a/tools/power/cpupower/utils/idle_monitor/amd_fam14h_idle.c b/tools/power/cpupower/utils/idle_monitor/amd_fam14h_idle.c
index 20f46348271b..5edd35bd9ee9 100644
--- a/tools/power/cpupower/utils/idle_monitor/amd_fam14h_idle.c
+++ b/tools/power/cpupower/utils/idle_monitor/amd_fam14h_idle.c
@@ -117,7 +117,7 @@ static int amd_fam14h_get_pci_info(struct cstate *state,
break;
default:
return -1;
- };
+ }
return 0;
}
diff --git a/tools/power/cpupower/utils/idle_monitor/cpuidle_sysfs.c b/tools/power/cpupower/utils/idle_monitor/cpuidle_sysfs.c
index a65f7d011513..8b42c2f0a5b0 100644
--- a/tools/power/cpupower/utils/idle_monitor/cpuidle_sysfs.c
+++ b/tools/power/cpupower/utils/idle_monitor/cpuidle_sysfs.c
@@ -53,7 +53,7 @@ static int cpuidle_start(void)
dprint("CPU %d - State: %d - Val: %llu\n",
cpu, state, previous_count[cpu][state]);
}
- };
+ }
return 0;
}
@@ -72,7 +72,7 @@ static int cpuidle_stop(void)
dprint("CPU %d - State: %d - Val: %llu\n",
cpu, state, previous_count[cpu][state]);
}
- };
+ }
return 0;
}
@@ -172,7 +172,7 @@ static struct cpuidle_monitor *cpuidle_register(void)
cpuidle_cstates[num].id = num;
cpuidle_cstates[num].get_count_percent =
cpuidle_get_count_percent;
- };
+ }
/* Free this at program termination */
previous_count = malloc(sizeof(long long *) * cpu_count);
diff --git a/tools/power/cpupower/utils/idle_monitor/hsw_ext_idle.c b/tools/power/cpupower/utils/idle_monitor/hsw_ext_idle.c
index 97ad3233a521..55e55b6b42f9 100644
--- a/tools/power/cpupower/utils/idle_monitor/hsw_ext_idle.c
+++ b/tools/power/cpupower/utils/idle_monitor/hsw_ext_idle.c
@@ -79,7 +79,7 @@ static int hsw_ext_get_count(enum intel_hsw_ext_id id, unsigned long long *val,
break;
default:
return -1;
- };
+ }
if (read_msr(cpu, msr, val))
return -1;
return 0;
diff --git a/tools/power/cpupower/utils/idle_monitor/nhm_idle.c b/tools/power/cpupower/utils/idle_monitor/nhm_idle.c
index 114271165182..16eaf006f61f 100644
--- a/tools/power/cpupower/utils/idle_monitor/nhm_idle.c
+++ b/tools/power/cpupower/utils/idle_monitor/nhm_idle.c
@@ -91,7 +91,7 @@ static int nhm_get_count(enum intel_nhm_id id, unsigned long long *val,
break;
default:
return -1;
- };
+ }
if (read_msr(cpu, msr, val))
return -1;
diff --git a/tools/power/cpupower/utils/idle_monitor/snb_idle.c b/tools/power/cpupower/utils/idle_monitor/snb_idle.c
index df8b223cc096..811d63ab17a7 100644
--- a/tools/power/cpupower/utils/idle_monitor/snb_idle.c
+++ b/tools/power/cpupower/utils/idle_monitor/snb_idle.c
@@ -77,7 +77,7 @@ static int snb_get_count(enum intel_snb_id id, unsigned long long *val,
break;
default:
return -1;
- };
+ }
if (read_msr(cpu, msr, val))
return -1;
return 0;
diff --git a/tools/power/pm-graph/Makefile b/tools/power/pm-graph/Makefile
index 845541544570..b5310832c19c 100644
--- a/tools/power/pm-graph/Makefile
+++ b/tools/power/pm-graph/Makefile
@@ -41,6 +41,10 @@ uninstall :
if [ -d $(DESTDIR)$(PREFIX)/lib/pm-graph/config ] ; then \
rmdir $(DESTDIR)$(PREFIX)/lib/pm-graph/config; \
fi;
+ rm -f $(DESTDIR)$(PREFIX)/lib/pm-graph/__pycache__/*
+ if [ -d $(DESTDIR)$(PREFIX)/lib/pm-graph/__pycache__ ] ; then \
+ rmdir $(DESTDIR)$(PREFIX)/lib/pm-graph/__pycache__; \
+ fi;
rm -f $(DESTDIR)$(PREFIX)/lib/pm-graph/*
if [ -d $(DESTDIR)$(PREFIX)/lib/pm-graph ] ; then \
rmdir $(DESTDIR)$(PREFIX)/lib/pm-graph; \
diff --git a/tools/power/pm-graph/README b/tools/power/pm-graph/README
index 96259f6e5715..afe6beb40ad9 100644
--- a/tools/power/pm-graph/README
+++ b/tools/power/pm-graph/README
@@ -1,7 +1,12 @@
- p m - g r a p h
+ _
+ _ __ _ __ ___ __ _ _ __ __ _ _ __ | |__
+ | '_ \| '_ ` _ \ _____ / _` | '__/ _` | '_ \| '_ \
+ | |_) | | | | | |_____| (_| | | | (_| | |_) | | | |
+ | .__/|_| |_| |_| \__, |_| \__,_| .__/|_| |_|
+ |_| |___/ |_|
pm-graph: suspend/resume/boot timing analysis tools
- Version: 5.5
+ Version: 5.6
Author: Todd Brandt <todd.e.brandt@intel.com>
Home Page: https://01.org/pm-graph
@@ -18,10 +23,6 @@
- upstream version in git:
https://github.com/intel/pm-graph/
- Requirements:
- - runs with python2 or python3, choice is made by /usr/bin/python link
- - python2 now requires python-configparser be installed
-
Table of Contents
- Overview
- Setup
@@ -29,6 +30,8 @@
- Basic Usage
- Dev Mode Usage
- Proc Mode Usage
+ - Endurance Testing
+ - Usage Examples
- Configuration Files
- Usage Examples
- Config File Options
@@ -54,15 +57,18 @@
| SETUP |
------------------------------------------------------------------
- These packages are required to execute the scripts
+ Package Requirements
+ - runs with python2 or python3, choice is made by /usr/bin/python link
- python
- - python-requests
+ - python-configparser (for python2 sleepgraph)
+ - python-requests (for googlesheet.py)
+ - linux-tools-common (for turbostat usage in sleepgraph)
Ubuntu:
- sudo apt-get install python python-requests
+ sudo apt-get install python python-configparser python-requests linux-tools-common
Fedora:
- sudo dnf install python python-requests
+ sudo dnf install python python-configparser python-requests linux-tools-common
The tools can most easily be installed via git clone and make install
@@ -190,6 +196,104 @@ _______________
%> sudo ./sleepgraph.py -config config/suspend-proc.cfg
+------------------------------------------------------------------
+| ENDURANCE TESTING |
+------------------------------------------------------------------
+
+ The best way to gauge the health of a system is to run a series of
+ suspend/resumes over an extended period and analyze the behavior. This can be
+ accomplished with sleepgraph's -multi argument. You specify two numbers: the
+ number of tests to run OR the duration in days, hours, or minutes, and the
+ delay in seconds between them. For instance, -multi 20 5: execute 20 tests with
+ a 5 second delay between each, or -multi 24h 0: execute tests over a 24 hour
+ period with no delay between tests. You can include any other options you like
+ to generate the data you want. It's most useful to collect dev mode timelines
+ as the kprobes don't alter the performance much and you get more insight.
+
+ On completion, the output folder contains a series of folders for the
+ individual test data and a set of summary pages in the root. The summary.html
+ file is a tabular list of the tests with relevant info and links. The
+ summary-issue.html and summary-devices.html files include data taken from
+ all tests on kernel issues and device performance. The folder looks like this:
+
+ suspend-xN-{date}-{time}:
+ summary.html
+ summary-issues.html
+ summary-devices.html
+ suspend-{date}-{time} (1)
+ suspend-{date}-{time} (2)
+ ...
+
+ These are the relevant arguments to use for testing:
+
+ -m mode
+ Mode to initiate for suspend e.g. mem, freeze, standby (default: mem).
+
+ -rtcwake t
+ Use rtcwake to autoresume after t seconds (default: 15).
+
+ -gzip (optional)
+ Gzip the trace and dmesg logs to save space. The tool can also read in
+ gzipped logs for processing. This reduces the multitest folder size.
+
+ -dev (optional)
+ Add kernel source calls and threads to the timeline (default: disabled).
+
+ -multi n d
+ Execute n consecutive tests at d seconds intervals. The outputs will be
+ created in a new subdirectory: suspend-xN-{date}-{time}. When the multitest
+ run is done, the -summary command is called automatically to create summary
+ html files for all the data (unless you use -skiphtml). -skiphtml will
+ speed up the testing by not creating timelines or summary html files. You
+ can then run the tool again at a later time with -summary and -genhtml to
+ create the timelines.
+
+ -skiphtml (optional)
+ Run the test and capture the trace logs, but skip the timeline and summary
+ html generation. This can greatly speed up overall testing. You can then
+ copy the data to a faster host machine and run -summary -genhtml to
+ generate the timelines and summary.
+
+ These are the relevant commands to use after testing is complete:
+
+ -summary indir
+ Generate or regenerate the summary for a -multi test run. Creates three
+ files: summary.html, summary-issues.html, and summary-devices.html in the
+ current folder. summary.html is a table of tests with relevant info sorted
+ by kernel/host/mode, and links to the test html files. summary-issues.html
+ is a list of kernel issues found in dmesg from all the tests.
+ summary-devices.html is a list of devices and times from all the tests.
+
+ -genhtml
+ Used with -summary to regenerate any missing html timelines from their
+ dmesg and ftrace logs. This will require a significant amount of time if
+ there are thousands of tests.
+
+Usage Examples
+_______________
+
+ A multitest is initiated like this:
+
+ %> sudo ./sleepgraph.py -m mem -rtcwake 10 -dev -gzip -multi 2000 0
+
+ or you can skip timeline generation in order to speed things up
+
+ %> sudo ./sleepgraph.py -m mem -rtcwake 10 -dev -gzip -multi 2000 0 -skiphtml
+
+ The tool will produce an output folder with all the test subfolders inside.
+ Each test subfolder contains the dmesg/ftrace logs and/or the html timeline
+ depending on whether you used the -skiphtml option. The root folder contains
+ the summary.html files.
+
+ The summary for an existing multitest is generated like this:
+
+ %> cd suspend-x2000-{date}-{time}
+ %> sleepgraph.py -summary .
+
+ or if you need to generate the html timelines you can use -genhtml
+
+ %> cd suspend-xN-{date}-{time}
+ %> sleepgraph.py -summary . -genhtml
------------------------------------------------------------------
| CONFIGURATION FILES |
diff --git a/tools/power/pm-graph/bootgraph.py b/tools/power/pm-graph/bootgraph.py
index d3b99a1e92d6..2823cd3122f7 100755
--- a/tools/power/pm-graph/bootgraph.py
+++ b/tools/power/pm-graph/bootgraph.py
@@ -1,4 +1,4 @@
-#!/usr/bin/python
+#!/usr/bin/env python3
# SPDX-License-Identifier: GPL-2.0-only
#
# Tool for analyzing boot timing
diff --git a/tools/power/pm-graph/config/custom-timeline-functions.cfg b/tools/power/pm-graph/config/custom-timeline-functions.cfg
index 4f80ad7d7275..962e5768681c 100644
--- a/tools/power/pm-graph/config/custom-timeline-functions.cfg
+++ b/tools/power/pm-graph/config/custom-timeline-functions.cfg
@@ -125,7 +125,7 @@ acpi_suspend_begin:
suspend_console:
acpi_pm_prepare:
syscore_suspend:
-arch_enable_nonboot_cpus_end:
+arch_thaw_secondary_cpus_end:
syscore_resume:
acpi_pm_finish:
resume_console:
diff --git a/tools/power/pm-graph/sleepgraph.8 b/tools/power/pm-graph/sleepgraph.8
index 43aee64316df..5126271de98a 100644
--- a/tools/power/pm-graph/sleepgraph.8
+++ b/tools/power/pm-graph/sleepgraph.8
@@ -74,8 +74,10 @@ after the test is complete.
Switch the display to the requested mode for the test using the xset command.
This helps maintain the consistency of test data for better comparison.
.TP
-\fB-skiphtml\fR
-Run the test and capture the trace logs, but skip the timeline generation.
+\fB-wifi\fR
+If a wifi connection is available, check that it reconnects after resume. Include
+the reconnect time in the total resume time calculation and treat wifi timeouts
+as resume failures.
.SS "advanced"
.TP
@@ -117,8 +119,24 @@ Include \fIt\fR ms delay before 1st suspend (default: 0 ms).
Include \fIt\fR ms delay after last resume (default: 0 ms).
.TP
\fB-multi \fIn d\fR
-Execute \fIn\fR consecutive tests at \fId\fR seconds intervals. The outputs will
-be created in a new subdirectory with a summary page: suspend-xN-{date}-{time}.
+Used for endurance testing. If \fIn\fR is entirely numeric, it's treated as a count:
+Execute \fIn\fR consecutive tests at \fId\fR second intervals.
+If \fIn\fR is an integer followed by a "d", "h", or "m", it's treated as a duration:
+Execute tests continuously over \fIn\fR days, hours, or minutes at \fId\fR second intervals.
+The outputs will be created in a new subdirectory, for count: suspend-{date}-{time}-xN,
+for duration: suspend-{date}-{time}-Nm. When the multitest run is done, the \fI-summary\fR
+command is called automatically to create summary html files for all the data (unless you
+use \fI-skiphtml\fR). \fI-skiphtml\fR will speed up the testing by not creating timelines
+or summary html files. You can then run the tool again at a later time with \fI-summary\fR
+and \fI-genhtml\fR to create the timelines.
+.TP
+\fB-maxfail \fIn\fR
+Abort a -multi run after \fIn\fR consecutive fails. 0 means never abort (default = 0).
+.TP
+\fB-skiphtml\fR
+Run the test and capture the trace logs, but skip the timeline generation.
+You can generate the html timelines later with \fI-dmesg\fR & \fI-ftrace\fR, or
+by running \fI-summary\fR and \fI-genhtml\fR.
.SS "ftrace debug"
.TP
@@ -173,11 +191,20 @@ Set trace buffer size to N kilo-bytes (default: all of free memory up to 3GB)
.SH COMMANDS
.TP
\fB-summary \fIindir\fR
-Create a summary page of all tests in \fIindir\fR. Creates summary.html
-in the current folder. The output page is a table of tests with
-suspend and resume values sorted by suspend mode, host, and kernel.
-Includes test averages by mode and links to the test html files.
-Use -genhtml to include tests with missing html.
+Create a set of summary pages for all tests in \fIindir\fR recursively.
+Creates summary.html, summary-issues.html, and summary-devices.html in the current folder.
+summary.html is a table of tests with relevant info sorted by kernel/host/mode,
+and links to the test html files. It identifies the minimum, maximum, and median
+suspend and resume times for you with highlights and links in the header.
+summary-issues.html is a list of kernel issues found in dmesg from all the tests.
+summary-devices.html is a list of devices and times from all the tests.
+
+Use \fI-genhtml\fR to regenerate any tests with missing html.
+.TP
+\fB-genhtml\fR
+Used with \fI-summary\fR to regenerate any missing html timelines from their
+dmesg and ftrace logs. This will require a significant amount of time if there
+are thousands of tests.
.TP
\fB-modes\fR
List available suspend modes.
@@ -189,10 +216,7 @@ with any options you intend to use to see if they will work.
\fB-fpdt\fR
Print out the contents of the ACPI Firmware Performance Data Table.
.TP
-\fB-battery\fR
-Print out battery status and current charge.
-.TP
-\fB-wifi\fR
+\fB-wificheck\fR
Print out wifi status and connection details.
.TP
\fB-xon/-xoff/-xstandby/-xsuspend\fR
@@ -208,6 +232,9 @@ Print out system info extracted from BIOS. Reads /dev/mem directly instead of go
\fB-devinfo\fR
Print out the pm settings of all devices which support runtime suspend.
.TP
+\fB-cmdinfo\fR
+Print out all the platform data collected from the system that makes it into the logs.
+.TP
\fB-flist\fR
Print the list of ftrace functions currently being captured. Functions
that are not available as symbols in the current kernel are shown in red.
@@ -272,14 +299,20 @@ Run two suspends back to back, include a 500ms delay before, after, and in betwe
.IP
\f(CW$ sudo sleepgraph -m mem -rtcwake 15 -x2 -predelay 500 -x2delay 500 -postdelay 500\fR
.PP
+Execute a suspend using a custom command.
+.IP
+\f(CW$ sudo sleepgraph -cmd "echo mem > /sys/power/state" -rtcwake 15\fR
+.PP
+
+.SS "endurance testing using -multi"
+.PP
Do a batch run of 10 freezes with 30 seconds delay between runs.
.IP
\f(CW$ sudo sleepgraph -m freeze -rtcwake 15 -multi 10 30\fR
.PP
-Execute a suspend using a custom command.
+Do a batch run of freezes for 24 hours.
.IP
-\f(CW$ sudo sleepgraph -cmd "echo mem > /sys/power/state" -rtcwake 15\fR
-.PP
+\f(CW$ sudo sleepgraph -m freeze -rtcwake 15 -multi 24h 0\fR
.SS "adding callgraph data"
Add device callgraphs. Limit the trace depth and only show callgraphs 10ms or larger.
diff --git a/tools/power/pm-graph/sleepgraph.py b/tools/power/pm-graph/sleepgraph.py
index f7d1c1f62f86..602e64b68ba7 100755
--- a/tools/power/pm-graph/sleepgraph.py
+++ b/tools/power/pm-graph/sleepgraph.py
@@ -1,4 +1,4 @@
-#!/usr/bin/python
+#!/usr/bin/env python3
# SPDX-License-Identifier: GPL-2.0-only
#
# Tool for analyzing suspend/resume timing
@@ -58,7 +58,7 @@ import re
import platform
import signal
import codecs
-from datetime import datetime
+from datetime import datetime, timedelta
import struct
import configparser
import gzip
@@ -81,12 +81,13 @@ def ascii(text):
# store system values and test parameters
class SystemValues:
title = 'SleepGraph'
- version = '5.5'
+ version = '5.6'
ansi = False
rs = 0
display = ''
gzip = False
sync = False
+ wifi = False
verbose = False
testlog = True
dmesglog = True
@@ -97,7 +98,8 @@ class SystemValues:
cgphase = ''
cgtest = -1
cgskip = ''
- multitest = {'run': False, 'count': 0, 'delay': 0}
+ maxfail = 0
+ multitest = {'run': False, 'count': 1000000, 'delay': 0}
max_graph_depth = 0
callloopmaxgap = 0.0001
callloopmaxlen = 0.005
@@ -148,7 +150,7 @@ class SystemValues:
x2delay = 0
skiphtml = False
usecallgraph = False
- ftopfunc = 'suspend_devices_and_enter'
+ ftopfunc = 'pm_suspend'
ftop = False
usetraceevents = False
usetracemarkers = True
@@ -164,6 +166,8 @@ class SystemValues:
predelay = 0
postdelay = 0
pmdebug = ''
+ tmstart = 'SUSPEND START %Y%m%d-%H:%M:%S.%f'
+ tmend = 'RESUME COMPLETE %Y%m%d-%H:%M:%S.%f'
tracefuncs = {
'sys_sync': {},
'ksys_sync': {},
@@ -183,16 +187,18 @@ class SystemValues:
'acpi_s2idle_sync': {},
'acpi_s2idle_begin': {},
'acpi_s2idle_prepare': {},
+ 'acpi_s2idle_prepare_late': {},
'acpi_s2idle_wake': {},
'acpi_s2idle_wakeup': {},
'acpi_s2idle_restore': {},
+ 'acpi_s2idle_restore_early': {},
'hibernate_preallocate_memory': {},
'create_basic_memory_bitmaps': {},
'swsusp_write': {},
'suspend_console': {},
'acpi_pm_prepare': {},
'syscore_suspend': {},
- 'arch_enable_nonboot_cpus_end': {},
+ 'arch_thaw_secondary_cpus_end': {},
'syscore_resume': {},
'acpi_pm_finish': {},
'resume_console': {},
@@ -270,12 +276,23 @@ class SystemValues:
'intel_opregion_init': {},
'intel_fbdev_set_suspend': {},
}
+ infocmds = [
+ [0, 'kparams', 'cat', '/proc/cmdline'],
+ [0, 'mcelog', 'mcelog'],
+ [0, 'pcidevices', 'lspci', '-tv'],
+ [0, 'usbdevices', 'lsusb', '-t'],
+ [1, 'interrupts', 'cat', '/proc/interrupts'],
+ [1, 'wakeups', 'cat', '/sys/kernel/debug/wakeup_sources'],
+ [2, 'gpecounts', 'sh', '-c', 'grep -v invalid /sys/firmware/acpi/interrupts/*'],
+ [2, 'suspendstats', 'sh', '-c', 'grep -v invalid /sys/power/suspend_stats/*'],
+ [2, 'cpuidle', 'sh', '-c', 'grep -v invalid /sys/devices/system/cpu/cpu*/cpuidle/state*/s2idle/*'],
+ [2, 'battery', 'sh', '-c', 'grep -v invalid /sys/class/power_supply/*/*'],
+ ]
cgblacklist = []
kprobes = dict()
timeformat = '%.3f'
cmdline = '%s %s' % \
(os.path.basename(sys.argv[0]), ' '.join(sys.argv[1:]))
- kparams = ''
sudouser = ''
def __init__(self):
self.archargs = 'args_'+platform.machine()
@@ -295,6 +312,9 @@ class SystemValues:
if os.getuid() == 0 and 'SUDO_USER' in os.environ and \
os.environ['SUDO_USER']:
self.sudouser = os.environ['SUDO_USER']
+ def resetlog(self):
+ self.logmsg = ''
+ self.platinfo = []
def vprint(self, msg):
self.logmsg += msg+'\n'
if self.verbose or msg.startswith('WARNING:'):
@@ -304,11 +324,11 @@ class SystemValues:
return
signame = self.signames[signum] if signum in self.signames else 'UNKNOWN'
msg = 'Signal %s caused a tool exit, line %d' % (signame, frame.f_lineno)
- sysvals.outputResult({'error':msg})
+ self.outputResult({'error':msg})
sys.exit(3)
def signalHandlerInit(self):
capture = ['BUS', 'SYS', 'XCPU', 'XFSZ', 'PWR', 'HUP', 'INT', 'QUIT',
- 'ILL', 'ABRT', 'FPE', 'SEGV', 'TERM', 'TSTP']
+ 'ILL', 'ABRT', 'FPE', 'SEGV', 'TERM']
self.signames = dict()
for i in capture:
s = 'SIG'+i
@@ -336,6 +356,8 @@ class SystemValues:
self.outputResult({'error':msg})
sys.exit(1)
return False
+ def usable(self, file):
+ return (os.path.exists(file) and os.path.getsize(file) > 0)
def getExec(self, cmd):
try:
fp = Popen(['which', cmd], stdout=PIPE, stderr=PIPE).stdout
@@ -389,12 +411,6 @@ class SystemValues:
r = info['bios-release-date'] if 'bios-release-date' in info else ''
self.sysstamp = '# sysinfo | man:%s | plat:%s | cpu:%s | bios:%s | biosdate:%s | numcpu:%d | memsz:%d | memfr:%d' % \
(m, p, c, b, r, self.cpucount, self.memtotal, self.memfree)
- try:
- kcmd = open('/proc/cmdline', 'r').read().strip()
- except:
- kcmd = ''
- if kcmd:
- self.sysstamp += '\n# kparams | %s' % kcmd
def printSystemInfo(self, fatal=False):
self.rootCheck(True)
out = dmidecode(self.mempath, fatal)
@@ -441,6 +457,7 @@ class SystemValues:
self.testdir+'/'+self.prefix+'_'+self.suspendmode+'.html'
if not os.path.isdir(self.testdir):
os.makedirs(self.testdir)
+ self.sudoUserchown(self.testdir)
def getValueList(self, value):
out = []
for i in value.split(','):
@@ -486,7 +503,7 @@ class SystemValues:
fp.close()
self.dmesgstart = float(ktime)
def getdmesg(self, testdata):
- op = self.writeDatafileHeader(sysvals.dmesgfile, testdata)
+ op = self.writeDatafileHeader(self.dmesgfile, testdata)
# store all new dmesg lines since initdmesg was called
fp = Popen('dmesg', stdout=PIPE).stdout
for line in fp:
@@ -716,9 +733,10 @@ class SystemValues:
if name == f:
return True
return False
- def initFtrace(self):
- self.printSystemInfo(False)
- pprint('INITIALIZING FTRACE...')
+ def initFtrace(self, quiet=False):
+ if not quiet:
+ sysvals.printSystemInfo(False)
+ pprint('INITIALIZING FTRACE...')
# turn trace off
self.fsetVal('0', 'tracing_on')
self.cleanupFtrace()
@@ -746,7 +764,7 @@ class SystemValues:
if tgtsize < 65536:
tgtsize = int(self.fgetVal('buffer_size_kb')) * cpus
break
- pprint('Setting trace buffers to %d kB (%d kB per cpu)' % (tgtsize, tgtsize/cpus))
+ self.vprint('Setting trace buffers to %d kB (%d kB per cpu)' % (tgtsize, tgtsize/cpus))
# initialize the callgraph trace
if(self.usecallgraph):
# set trace type
@@ -782,7 +800,8 @@ class SystemValues:
if self.usedevsrc:
for name in self.dev_tracefuncs:
self.defaultKprobe(name, self.dev_tracefuncs[name])
- pprint('INITIALIZING KPROBES...')
+ if not quiet:
+ pprint('INITIALIZING KPROBES...')
self.addKprobes(self.verbose)
if(self.usetraceevents):
# turn trace events on
@@ -827,21 +846,10 @@ class SystemValues:
fw = test['fw']
if(fw):
fp.write('# fwsuspend %u fwresume %u\n' % (fw[0], fw[1]))
- if 'mcelog' in test:
- fp.write('# mcelog %s\n' % test['mcelog'])
if 'turbo' in test:
fp.write('# turbostat %s\n' % test['turbo'])
- if 'bat' in test:
- (a1, c1), (a2, c2) = test['bat']
- fp.write('# battery %s %d %s %d\n' % (a1, c1, a2, c2))
if 'wifi' in test:
- wstr = []
- for wifi in test['wifi']:
- tmp = []
- for key in sorted(wifi):
- tmp.append('%s:%s' % (key, wifi[key]))
- wstr.append('|'.join(tmp))
- fp.write('# wifi %s\n' % (','.join(wstr)))
+ fp.write('# wifi %s\n' % test['wifi'])
if test['error'] or len(testdata) > 1:
fp.write('# enter_sleep_error %s\n' % test['error'])
return fp
@@ -901,23 +909,7 @@ class SystemValues:
def b64zip(self, data):
out = base64.b64encode(codecs.encode(data.encode(), 'zlib')).decode()
return out
- def mcelog(self, clear=False):
- cmd = self.getExec('mcelog')
- if not cmd:
- return ''
- if clear:
- call(cmd+' > /dev/null 2>&1', shell=True)
- return ''
- try:
- fp = Popen([cmd], stdout=PIPE, stderr=PIPE).stdout
- out = ascii(fp.read()).strip()
- fp.close()
- except:
- return ''
- if not out:
- return ''
- return self.b64zip(out)
- def platforminfo(self):
+ def platforminfo(self, cmdafter):
# add platform info on to a completed ftrace file
if not os.path.exists(self.ftracefile):
return False
@@ -1001,31 +993,76 @@ class SystemValues:
footer += '# platform-devinfo: %s\n' % self.b64zip(out)
# add a line for each of these commands with their outputs
- cmds = [
- ['pcidevices', 'lspci', '-tv'],
- ['interrupts', 'cat', '/proc/interrupts'],
- ['gpecounts', 'sh', '-c', 'grep -v invalid /sys/firmware/acpi/interrupts/gpe*'],
- ]
- for cargs in cmds:
- name = cargs[0]
- cmdline = ' '.join(cargs[1:])
- cmdpath = self.getExec(cargs[1])
- if not cmdpath:
+ for name, cmdline, info in cmdafter:
+ footer += '# platform-%s: %s | %s\n' % (name, cmdline, self.b64zip(info))
+
+ with self.openlog(self.ftracefile, 'a') as fp:
+ fp.write(footer)
+ return True
+ def commonPrefix(self, list):
+ if len(list) < 2:
+ return ''
+ prefix = list[0]
+ for s in list[1:]:
+ while s[:len(prefix)] != prefix and prefix:
+ prefix = prefix[:len(prefix)-1]
+ if not prefix:
+ break
+ if '/' in prefix and prefix[-1] != '/':
+ prefix = prefix[0:prefix.rfind('/')+1]
+ return prefix
+ def dictify(self, text, format):
+ out = dict()
+ header = True if format == 1 else False
+ delim = ' ' if format == 1 else ':'
+ for line in text.split('\n'):
+ if header:
+ header, out['@'] = False, line
+ continue
+ line = line.strip()
+ if delim in line:
+ data = line.split(delim, 1)
+ num = re.search(r'[\d]+', data[1])
+ if format == 2 and num:
+ out[data[0].strip()] = num.group()
+ else:
+ out[data[0].strip()] = data[1]
+ return out
+ def cmdinfo(self, begin, debug=False):
+ out = []
+ if begin:
+ self.cmd1 = dict()
+ for cargs in self.infocmds:
+ delta, name = cargs[0], cargs[1]
+ cmdline, cmdpath = ' '.join(cargs[2:]), self.getExec(cargs[2])
+ if not cmdpath or (begin and not delta):
continue
- cmd = [cmdpath] + cargs[2:]
try:
- fp = Popen(cmd, stdout=PIPE, stderr=PIPE).stdout
+ fp = Popen([cmdpath]+cargs[3:], stdout=PIPE, stderr=PIPE).stdout
info = ascii(fp.read()).strip()
fp.close()
except:
continue
- if not info:
- continue
- footer += '# platform-%s: %s | %s\n' % (name, cmdline, self.b64zip(info))
-
- with self.openlog(self.ftracefile, 'a') as fp:
- fp.write(footer)
- return True
+ if not debug and begin:
+ self.cmd1[name] = self.dictify(info, delta)
+ elif not debug and delta and name in self.cmd1:
+ before, after = self.cmd1[name], self.dictify(info, delta)
+ dinfo = ('\t%s\n' % before['@']) if '@' in before else ''
+ prefix = self.commonPrefix(list(before.keys()))
+ for key in sorted(before):
+ if key in after and before[key] != after[key]:
+ title = key.replace(prefix, '')
+ if delta == 2:
+ dinfo += '\t%s : %s -> %s\n' % \
+ (title, before[key].strip(), after[key].strip())
+ else:
+ dinfo += '%10s (start) : %s\n%10s (after) : %s\n' % \
+ (title, before[key], title, after[key])
+ dinfo = '\tnothing changed' if not dinfo else dinfo.rstrip()
+ out.append((name, cmdline, dinfo))
+ else:
+ out.append((name, cmdline, '\tnothing' if not info else info))
+ return out
def haveTurbostat(self):
if not self.tstat:
return False
@@ -1035,8 +1072,8 @@ class SystemValues:
fp = Popen([cmd, '-v'], stdout=PIPE, stderr=PIPE).stderr
out = ascii(fp.read()).strip()
fp.close()
- if re.match('turbostat version [0-9\.]* .*', out):
- sysvals.vprint(out)
+ if re.match('turbostat version .*', out):
+ self.vprint(out)
return True
return False
def turbostat(self):
@@ -1056,11 +1093,11 @@ class SystemValues:
fp.close()
if not keyline or not valline or len(keyline) != len(valline):
errmsg = 'unrecognized turbostat output:\n'+rawout.strip()
- sysvals.vprint(errmsg)
- if not sysvals.verbose:
+ self.vprint(errmsg)
+ if not self.verbose:
pprint(errmsg)
return ''
- if sysvals.verbose:
+ if self.verbose:
pprint(rawout.strip())
out = []
for key in keyline:
@@ -1068,30 +1105,36 @@ class SystemValues:
val = valline[idx]
out.append('%s=%s' % (key, val))
return '|'.join(out)
- def checkWifi(self):
- out = dict()
- iwcmd, ifcmd = self.getExec('iwconfig'), self.getExec('ifconfig')
- if not iwcmd or not ifcmd:
- return out
- fp = Popen(iwcmd, stdout=PIPE, stderr=PIPE).stdout
- for line in fp:
- m = re.match('(?P<dev>\S*) .* ESSID:(?P<ess>\S*)', ascii(line))
- if not m:
+ def wifiDetails(self, dev):
+ try:
+ info = open('/sys/class/net/%s/device/uevent' % dev, 'r').read().strip()
+ except:
+ return dev
+ vals = [dev]
+ for prop in info.split('\n'):
+ if prop.startswith('DRIVER=') or prop.startswith('PCI_ID='):
+ vals.append(prop.split('=')[-1])
+ return ':'.join(vals)
+ def checkWifi(self, dev=''):
+ try:
+ w = open('/proc/net/wireless', 'r').read().strip()
+ except:
+ return ''
+ for line in reversed(w.split('\n')):
+ m = re.match(' *(?P<dev>.*): (?P<stat>[0-9a-f]*) .*', w.split('\n')[-1])
+ if not m or (dev and dev != m.group('dev')):
continue
- out['device'] = m.group('dev')
- if '"' in m.group('ess'):
- out['essid'] = m.group('ess').strip('"')
- break
- fp.close()
- if 'device' in out:
- fp = Popen([ifcmd, out['device']], stdout=PIPE, stderr=PIPE).stdout
- for line in fp:
- m = re.match('.* inet (?P<ip>[0-9\.]*)', ascii(line))
- if m:
- out['ip'] = m.group('ip')
- break
- fp.close()
- return out
+ return m.group('dev')
+ return ''
+ def pollWifi(self, dev, timeout=60):
+ start = time.time()
+ while (time.time() - start) < timeout:
+ w = self.checkWifi(dev)
+ if w:
+ return '%s reconnected %.2f' % \
+ (self.wifiDetails(dev), max(0, time.time() - start))
+ time.sleep(0.01)
+ return '%s timeout %d' % (self.wifiDetails(dev), timeout)
def errorSummary(self, errinfo, msg):
found = False
for entry in errinfo:
@@ -1113,8 +1156,9 @@ class SystemValues:
arr[j] = arr[j]\
.replace('\\', '\\\\').replace(']', '\]').replace('[', '\[')\
.replace('.', '\.').replace('+', '\+').replace('*', '\*')\
- .replace('(', '\(').replace(')', '\)')
- mstr = ' '.join(arr)
+ .replace('(', '\(').replace(')', '\)').replace('}', '\}')\
+ .replace('{', '\{')
+ mstr = ' *'.join(arr)
entry = {
'line': msg,
'match': mstr,
@@ -1122,6 +1166,44 @@ class SystemValues:
'urls': {self.hostname: [self.htmlfile]}
}
errinfo.append(entry)
+ def multistat(self, start, idx, finish):
+ if 'time' in self.multitest:
+ id = '%d Duration=%dmin' % (idx+1, self.multitest['time'])
+ else:
+ id = '%d/%d' % (idx+1, self.multitest['count'])
+ t = time.time()
+ if 'start' not in self.multitest:
+ self.multitest['start'] = self.multitest['last'] = t
+ self.multitest['total'] = 0.0
+ pprint('TEST (%s) START' % id)
+ return
+ dt = t - self.multitest['last']
+ if not start:
+ if idx == 0 and self.multitest['delay'] > 0:
+ self.multitest['total'] += self.multitest['delay']
+ pprint('TEST (%s) COMPLETE -- Duration %.1fs' % (id, dt))
+ return
+ self.multitest['total'] += dt
+ self.multitest['last'] = t
+ avg = self.multitest['total'] / idx
+ if 'time' in self.multitest:
+ left = finish - datetime.now()
+ left -= timedelta(microseconds=left.microseconds)
+ else:
+ left = timedelta(seconds=((self.multitest['count'] - idx) * int(avg)))
+ pprint('TEST (%s) START - Avg Duration %.1fs, Time left %s' % \
+ (id, avg, str(left)))
+ def multiinit(self, c, d):
+ sz, unit = 'count', 'm'
+ if c.endswith('d') or c.endswith('h') or c.endswith('m'):
+ sz, unit, c = 'time', c[-1], c[:-1]
+ self.multitest['run'] = True
+ self.multitest[sz] = getArgInt('multi: n d (exec count)', c, 1, 1000000, False)
+ self.multitest['delay'] = getArgInt('multi: n d (delay between tests)', d, 0, 3600, False)
+ if unit == 'd':
+ self.multitest[sz] *= 1440
+ elif unit == 'h':
+ self.multitest[sz] *= 60
sysvals = SystemValues()
switchvalues = ['enable', 'disable', 'on', 'off', 'true', 'false', '1', '0']
@@ -1210,25 +1292,30 @@ class Data:
'resume_complete': {'order': 9, 'color': '#FFFFCC'},
}
errlist = {
- 'HWERROR' : '.*\[ *Hardware Error *\].*',
- 'FWBUG' : '.*\[ *Firmware Bug *\].*',
- 'BUG' : '.*BUG.*',
- 'ERROR' : '.*ERROR.*',
- 'WARNING' : '.*WARNING.*',
- 'IRQ' : '.*genirq: .*',
- 'TASKFAIL': '.*Freezing of tasks *.*',
- 'ACPI' : '.*ACPI *(?P<b>[A-Za-z]*) *Error[: ].*',
- 'DEVFAIL' : '.* failed to (?P<b>[a-z]*) async: .*',
- 'DISKFULL': '.*No space left on device.*',
- 'USBERR' : '.*usb .*device .*, error [0-9-]*',
- 'ATAERR' : ' *ata[0-9\.]*: .*failed.*',
- 'MEIERR' : ' *mei.*: .*failed.*',
- 'TPMERR' : '(?i) *tpm *tpm[0-9]*: .*error.*',
+ 'HWERROR' : r'.*\[ *Hardware Error *\].*',
+ 'FWBUG' : r'.*\[ *Firmware Bug *\].*',
+ 'BUG' : r'(?i).*\bBUG\b.*',
+ 'ERROR' : r'(?i).*\bERROR\b.*',
+ 'WARNING' : r'(?i).*\bWARNING\b.*',
+ 'FAULT' : r'(?i).*\bFAULT\b.*',
+ 'FAIL' : r'(?i).*\bFAILED\b.*',
+ 'INVALID' : r'(?i).*\bINVALID\b.*',
+ 'CRASH' : r'(?i).*\bCRASHED\b.*',
+ 'IRQ' : r'.*\bgenirq: .*',
+ 'TASKFAIL': r'.*Freezing of tasks *.*',
+ 'ACPI' : r'.*\bACPI *(?P<b>[A-Za-z]*) *Error[: ].*',
+ 'DISKFULL': r'.*\bNo space left on device.*',
+ 'USBERR' : r'.*usb .*device .*, error [0-9-]*',
+ 'ATAERR' : r' *ata[0-9\.]*: .*failed.*',
+ 'MEIERR' : r' *mei.*: .*failed.*',
+ 'TPMERR' : r'(?i) *tpm *tpm[0-9]*: .*error.*',
}
def __init__(self, num):
idchar = 'abcdefghij'
self.start = 0.0 # test start
self.end = 0.0 # test end
+ self.hwstart = 0 # rtc test start
+ self.hwend = 0 # rtc test end
self.tSuspended = 0.0 # low-level suspend start
self.tResumed = 0.0 # low-level resume start
self.tKernSus = 0.0 # kernel level suspend start
@@ -1240,10 +1327,8 @@ class Data:
self.stamp = 0
self.outfile = ''
self.kerror = False
- self.battery = 0
- self.wifi = 0
+ self.wifi = dict()
self.turbostat = 0
- self.mcelog = 0
self.enterfail = ''
self.currphase = ''
self.pstl = dict() # process timeline
@@ -1308,6 +1393,8 @@ class Data:
continue
dir = 'suspend' if t < self.tSuspended else 'resume'
msg = m.group('msg')
+ if re.match('capability: warning: .*', msg):
+ continue
for err in self.errlist:
if re.match(self.errlist[err], msg):
list.append((msg, err, dir, t, i, i))
@@ -1316,17 +1403,26 @@ class Data:
msglist = []
for msg, type, dir, t, idx1, idx2 in list:
msglist.append(msg)
- sysvals.vprint('kernel %s found in %s at %f' % (type, dir, t))
self.errorinfo[dir].append((type, t, idx1, idx2))
if self.kerror:
sysvals.dmesglog = True
if len(self.dmesgtext) < 1 and sysvals.dmesgfile:
lf.close()
return msglist
- def setStart(self, time):
+ def setStart(self, time, msg=''):
self.start = time
- def setEnd(self, time):
+ if msg:
+ try:
+ self.hwstart = datetime.strptime(msg, sysvals.tmstart)
+ except:
+ self.hwstart = 0
+ def setEnd(self, time, msg=''):
self.end = time
+ if msg:
+ try:
+ self.hwend = datetime.strptime(msg, sysvals.tmend)
+ except:
+ self.hwend = 0
def isTraceEventOutsideDeviceCalls(self, pid, time):
for phase in self.sortedPhases():
list = self.dmesg[phase]['list']
@@ -1546,6 +1642,14 @@ class Data:
self.trimTime(tS, tL, left)
self.tLow.append('%.0f'%(tL*1000))
lp = phase
+ def getMemTime(self):
+ if not self.hwstart or not self.hwend:
+ return
+ stime = (self.tSuspended - self.start) * 1000000
+ rtime = (self.end - self.tResumed) * 1000000
+ hws = self.hwstart + timedelta(microseconds=stime)
+ hwr = self.hwend - timedelta(microseconds=rtime)
+ self.tLow.append('%.0f'%((hwr - hws).total_seconds() * 1000))
def getTimeValues(self):
sktime = (self.tSuspended - self.tKernSus) * 1000
rktime = (self.tKernRes - self.tResumed) * 1000
@@ -1883,9 +1987,9 @@ class Data:
c = self.addProcessUsageEvent(ps, tres)
if c > 0:
sysvals.vprint('%25s (res): %d' % (ps, c))
- def handleEndMarker(self, time):
+ def handleEndMarker(self, time, msg=''):
dm = self.dmesg
- self.setEnd(time)
+ self.setEnd(time, msg)
self.initDevicegroups()
# give suspend_prepare an end if needed
if 'suspend_prepare' in dm and dm['suspend_prepare']['end'] < 0:
@@ -2071,7 +2175,7 @@ class FTraceLine:
if not self.fevent:
return False
if sysvals.usetracemarkers:
- if(self.name == 'SUSPEND START'):
+ if(self.name.startswith('SUSPEND START')):
return True
return False
else:
@@ -2084,7 +2188,7 @@ class FTraceLine:
if not self.fevent:
return False
if sysvals.usetracemarkers:
- if(self.name == 'RESUME COMPLETE'):
+ if(self.name.startswith('RESUME COMPLETE')):
return True
return False
else:
@@ -2444,7 +2548,7 @@ class Timeline:
def createHeader(self, sv, stamp):
if(not stamp['time']):
return
- self.html += '<div class="version"><a href="https://01.org/suspendresume">%s v%s</a></div>' \
+ self.html += '<div class="version"><a href="https://01.org/pm-graph">%s v%s</a></div>' \
% (sv.title, sv.version)
if sv.logmsg and sv.testlog:
self.html += '<button id="showtest" class="logbtn btnfmt">log</button>'
@@ -2670,14 +2774,11 @@ class TestProps:
stampfmt = '# [a-z]*-(?P<m>[0-9]{2})(?P<d>[0-9]{2})(?P<y>[0-9]{2})-'+\
'(?P<H>[0-9]{2})(?P<M>[0-9]{2})(?P<S>[0-9]{2})'+\
' (?P<host>.*) (?P<mode>.*) (?P<kernel>.*)$'
- batteryfmt = '^# battery (?P<a1>\w*) (?P<c1>\d*) (?P<a2>\w*) (?P<c2>\d*)'
- wififmt = '^# wifi (?P<w>.*)'
+ wififmt = '^# wifi *(?P<d>\S*) *(?P<s>\S*) *(?P<t>[0-9\.]+).*'
tstatfmt = '^# turbostat (?P<t>\S*)'
- mcelogfmt = '^# mcelog (?P<m>\S*)'
testerrfmt = '^# enter_sleep_error (?P<e>.*)'
sysinfofmt = '^# sysinfo .*'
cmdlinefmt = '^# command \| (?P<cmd>.*)'
- kparamsfmt = '^# kparams \| (?P<kp>.*)'
devpropfmt = '# Device Properties: .*'
pinfofmt = '# platform-(?P<val>[a-z,A-Z,0-9]*): (?P<info>.*)'
tracertypefmt = '# tracer: (?P<t>.*)'
@@ -2695,11 +2796,8 @@ class TestProps:
self.stamp = ''
self.sysinfo = ''
self.cmdline = ''
- self.kparams = ''
self.testerror = []
- self.mcelog = []
self.turbostat = []
- self.battery = []
self.wifi = []
self.fwdata = []
self.ftrace_line_fmt = self.ftrace_line_fmt_nop
@@ -2721,21 +2819,12 @@ class TestProps:
elif re.match(self.sysinfofmt, line):
self.sysinfo = line
return True
- elif re.match(self.kparamsfmt, line):
- self.kparams = line
- return True
elif re.match(self.cmdlinefmt, line):
self.cmdline = line
return True
- elif re.match(self.mcelogfmt, line):
- self.mcelog.append(line)
- return True
elif re.match(self.tstatfmt, line):
self.turbostat.append(line)
return True
- elif re.match(self.batteryfmt, line):
- self.battery.append(line)
- return True
elif re.match(self.wififmt, line):
self.wifi.append(line)
return True
@@ -2749,6 +2838,8 @@ class TestProps:
def parseStamp(self, data, sv):
# global test data
m = re.match(self.stampfmt, self.stamp)
+ if not self.stamp or not m:
+ doError('data does not include the expected stamp')
data.stamp = {'time': '', 'host': '', 'mode': ''}
dt = datetime(int(m.group('y'))+2000, int(m.group('m')),
int(m.group('d')), int(m.group('H')), int(m.group('M')),
@@ -2780,10 +2871,6 @@ class TestProps:
m = re.match(self.cmdlinefmt, self.cmdline)
if m:
sv.cmdline = m.group('cmd')
- if self.kparams:
- m = re.match(self.kparamsfmt, self.kparams)
- if m:
- sv.kparams = m.group('kp')
if not sv.stamp:
sv.stamp = data.stamp
# firmware data
@@ -2793,26 +2880,18 @@ class TestProps:
data.fwSuspend, data.fwResume = int(m.group('s')), int(m.group('r'))
if(data.fwSuspend > 0 or data.fwResume > 0):
data.fwValid = True
- # mcelog data
- if len(self.mcelog) > data.testnumber:
- m = re.match(self.mcelogfmt, self.mcelog[data.testnumber])
- if m:
- data.mcelog = sv.b64unzip(m.group('m'))
# turbostat data
if len(self.turbostat) > data.testnumber:
m = re.match(self.tstatfmt, self.turbostat[data.testnumber])
if m:
data.turbostat = m.group('t')
- # battery data
- if len(self.battery) > data.testnumber:
- m = re.match(self.batteryfmt, self.battery[data.testnumber])
- if m:
- data.battery = m.groups()
# wifi data
if len(self.wifi) > data.testnumber:
m = re.match(self.wififmt, self.wifi[data.testnumber])
if m:
- data.wifi = m.group('w')
+ data.wifi = {'dev': m.group('d'), 'stat': m.group('s'),
+ 'time': float(m.group('t'))}
+ data.stamp['wifi'] = m.group('d')
# sleep mode enter errors
if len(self.testerror) > data.testnumber:
m = re.match(self.testerrfmt, self.testerror[data.testnumber])
@@ -3012,13 +3091,13 @@ def appendIncompleteTraceLog(testruns):
if(t.startMarker()):
data = testrun[testidx].data
tp.parseStamp(data, sysvals)
- data.setStart(t.time)
+ data.setStart(t.time, t.name)
continue
if(not data):
continue
# find the end of resume
if(t.endMarker()):
- data.setEnd(t.time)
+ data.setEnd(t.time, t.name)
testidx += 1
if(testidx >= testcnt):
break
@@ -3081,7 +3160,7 @@ def parseTraceLog(live=False):
doError('%s does not exist' % sysvals.ftracefile)
if not live:
sysvals.setupAllKprobes()
- ksuscalls = ['pm_prepare_console']
+ ksuscalls = ['ksys_sync', 'pm_prepare_console']
krescalls = ['pm_restore_console']
tracewatch = ['irq_wakeup']
if sysvals.usekprobes:
@@ -3094,7 +3173,7 @@ def parseTraceLog(live=False):
testruns = []
testdata = []
testrun = 0
- data = 0
+ data, limbo = 0, True
tf = sysvals.openlog(sysvals.ftracefile, 'r')
phase = 'suspend_prepare'
for line in tf:
@@ -3141,16 +3220,16 @@ def parseTraceLog(live=False):
continue
# find the start of suspend
if(t.startMarker()):
- data = Data(len(testdata))
+ data, limbo = Data(len(testdata)), False
testdata.append(data)
testrun = TestRun(data)
testruns.append(testrun)
tp.parseStamp(data, sysvals)
- data.setStart(t.time)
+ data.setStart(t.time, t.name)
data.first_suspend_prepare = True
phase = data.setPhase('suspend_prepare', t.time, True)
continue
- if(not data):
+ if(not data or limbo):
continue
# process cpu exec line
if t.type == 'tracing_mark_write':
@@ -3167,14 +3246,16 @@ def parseTraceLog(live=False):
continue
# find the end of resume
if(t.endMarker()):
- data.handleEndMarker(t.time)
+ if data.tKernRes == 0:
+ data.tKernRes = t.time
+ data.handleEndMarker(t.time, t.name)
if(not sysvals.usetracemarkers):
# no trace markers? then quit and be sure to finish recording
# the event we used to trigger resume end
if('thaw_processes' in testrun.ttemp and len(testrun.ttemp['thaw_processes']) > 0):
# if an entry exists, assume this is its end
testrun.ttemp['thaw_processes'][-1]['end'] = t.time
- break
+ limbo = True
continue
# trace event processing
if(t.fevent):
@@ -3197,7 +3278,7 @@ def parseTraceLog(live=False):
# -- phase changes --
# start of kernel suspend
if(re.match('suspend_enter\[.*', t.name)):
- if(isbegin):
+ if(isbegin and data.tKernSus == 0):
data.tKernSus = t.time
continue
# suspend_prepare start
@@ -3225,7 +3306,7 @@ def parseTraceLog(live=False):
elif(re.match('machine_suspend\[.*', t.name)):
if(isbegin):
lp = data.lastPhase()
- if lp == 'resume_machine':
+ if lp.startswith('resume_machine'):
data.dmesg[lp]['end'] = t.time
phase = data.setPhase('suspend_machine', data.dmesg[lp]['end'], True)
data.setPhase(phase, t.time, False)
@@ -3320,7 +3401,8 @@ def parseTraceLog(live=False):
'proc': m_proc,
})
# start of kernel resume
- if(phase == 'suspend_prepare' and kprobename in ksuscalls):
+ if(data.tKernSus == 0 and phase == 'suspend_prepare' \
+ and kprobename in ksuscalls):
data.tKernSus = t.time
elif(t.freturn):
if(key not in tp.ktemp) or len(tp.ktemp[key]) < 1:
@@ -3355,7 +3437,7 @@ def parseTraceLog(live=False):
sysvals.vprint('WARNING: ftrace start marker is missing')
if data and not data.devicegroups:
sysvals.vprint('WARNING: ftrace end marker is missing')
- data.handleEndMarker(t.time)
+ data.handleEndMarker(t.time, t.name)
if sysvals.suspendmode == 'command':
for test in testruns:
@@ -3476,6 +3558,10 @@ def parseTraceLog(live=False):
data.fwValid = False
sysvals.vprint('WARNING: phase "%s" is missing!' % p)
lp = p
+ if not terr and 'dev' in data.wifi and data.wifi['stat'] == 'timeout':
+ terr = '%s%s failed in wifi_resume <i>(%s %.0fs timeout)</i>' % \
+ (sysvals.suspendmode, tn, data.wifi['dev'], data.wifi['time'])
+ error.append(terr)
if not terr and data.enterfail:
pprint('test%s FAILED: enter %s failed with %s' % (tn, sysvals.suspendmode, data.enterfail))
terr = 'test%s failed to enter %s mode' % (tn, sysvals.suspendmode)
@@ -3933,7 +4019,7 @@ def createHTMLSummarySimple(testruns, htmlfile, title):
tAvg, tMin, tMax, tMed = [0.0, 0.0], [0.0, 0.0], [0.0, 0.0], [dict(), dict()]
iMin, iMed, iMax = [0, 0], [0, 0], [0, 0]
num = 0
- useturbo = False
+ useturbo = usewifi = False
lastmode = ''
cnt = dict()
for data in sorted(testruns, key=lambda v:(v['mode'], v['host'], v['kernel'], v['time'])):
@@ -3952,17 +4038,17 @@ def createHTMLSummarySimple(testruns, htmlfile, title):
tAvg, tMin, tMax, tMed = [0.0, 0.0], [0.0, 0.0], [0.0, 0.0], [dict(), dict()]
iMin, iMed, iMax = [0, 0], [0, 0], [0, 0]
num = 0
- pkgpc10 = syslpi = ''
+ pkgpc10 = syslpi = wifi = ''
if 'pkgpc10' in data and 'syslpi' in data:
- pkgpc10 = data['pkgpc10']
- syslpi = data['syslpi']
- useturbo = True
+ pkgpc10, syslpi, useturbo = data['pkgpc10'], data['syslpi'], True
+ if 'wifi' in data:
+ wifi, usewifi = data['wifi'], True
res = data['result']
tVal = [float(data['suspend']), float(data['resume'])]
list[mode]['data'].append([data['host'], data['kernel'],
data['time'], tVal[0], tVal[1], data['url'], res,
data['issues'], data['sus_worst'], data['sus_worsttime'],
- data['res_worst'], data['res_worsttime'], pkgpc10, syslpi])
+ data['res_worst'], data['res_worsttime'], pkgpc10, syslpi, wifi])
idx = len(list[mode]['data']) - 1
if res.startswith('fail in'):
res = 'fail'
@@ -4002,7 +4088,12 @@ def createHTMLSummarySimple(testruns, htmlfile, title):
td = '\t<td>{0}</td>\n'
tdh = '\t<td{1}>{0}</td>\n'
tdlink = '\t<td><a href="{0}">html</a></td>\n'
- colspan = '14' if useturbo else '12'
+ cols = 12
+ if useturbo:
+ cols += 2
+ if usewifi:
+ cols += 1
+ colspan = '%d' % cols
# table header
html += '<table>\n<tr>\n' + th.format('#') +\
@@ -4013,6 +4104,8 @@ def createHTMLSummarySimple(testruns, htmlfile, title):
th.format('Worst Resume Device') + th.format('RD Time')
if useturbo:
html += th.format('PkgPC10') + th.format('SysLPI')
+ if usewifi:
+ html += th.format('Wifi')
html += th.format('Detail')+'</tr>\n'
# export list into html
head = '<tr class="head"><td>{0}</td><td>{1}</td>'+\
@@ -4076,6 +4169,8 @@ def createHTMLSummarySimple(testruns, htmlfile, title):
if useturbo:
html += td.format(d[12]) # pkg_pc10
html += td.format(d[13]) # syslpi
+ if usewifi:
+ html += td.format(d[14]) # wifi
html += tdlink.format(d[5]) if d[5] else td.format('') # url
html += '</tr>\n'
num += 1
@@ -4224,6 +4319,8 @@ def createHTML(testruns, testfail):
kerror = True
if(sysvals.suspendmode in ['freeze', 'standby']):
data.trimFreezeTime(testruns[-1].tSuspended)
+ else:
+ data.getMemTime()
# html function templates
html_error = '<div id="{1}" title="kernel error/warning" class="err" style="right:{0}%">{2}&rarr;</div>\n'
@@ -4242,13 +4339,10 @@ def createHTML(testruns, testfail):
'<td class="green">Execution Time: <b>{0} ms</b></td>'\
'<td class="yellow">Command: <b>{1}</b></td>'\
'</tr>\n</table>\n'
- html_timegroups = '<table class="time2">\n<tr>'\
- '<td class="green" title="time from kernel enter_state({5}) to firmware mode [kernel time only]">{4}Kernel Suspend: {0} ms</td>'\
- '<td class="purple">{4}Firmware Suspend: {1} ms</td>'\
- '<td class="purple">{4}Firmware Resume: {2} ms</td>'\
- '<td class="yellow" title="time from firmware mode to return from kernel enter_state({5}) [kernel time only]">{4}Kernel Resume: {3} ms</td>'\
- '</tr>\n</table>\n'
html_fail = '<table class="testfail"><tr><td>{0}</td></tr></table>\n'
+ html_kdesc = '<td class="{3}" title="time spent in kernel execution">{0}Kernel {2}: {1} ms</td>'
+ html_fwdesc = '<td class="{3}" title="time spent in firmware">{0}Firmware {2}: {1} ms</td>'
+ html_wifdesc = '<td class="yellow" title="time for wifi to reconnect after resume complete ({2})">{0}Wifi Resume: {1}</td>'
# html format variables
scaleH = 20
@@ -4264,13 +4358,10 @@ def createHTML(testruns, testfail):
# Generate the header for this timeline
for data in testruns:
tTotal = data.end - data.start
- sktime, rktime = data.getTimeValues()
if(tTotal == 0):
doError('No timeline data')
- if(len(data.tLow) > 0):
- low_time = '+'.join(data.tLow)
if sysvals.suspendmode == 'command':
- run_time = '%.0f'%((data.end-data.start)*1000)
+ run_time = '%.0f' % (tTotal * 1000)
if sysvals.testcommand:
testdesc = sysvals.testcommand
else:
@@ -4279,43 +4370,55 @@ def createHTML(testruns, testfail):
testdesc = ordinal(data.testnumber+1)+' '+testdesc
thtml = html_timetotal3.format(run_time, testdesc)
devtl.html += thtml
- elif data.fwValid:
- suspend_time = '%.0f'%(sktime + (data.fwSuspend/1000000.0))
- resume_time = '%.0f'%(rktime + (data.fwResume/1000000.0))
- testdesc1 = 'Total'
- testdesc2 = ''
- stitle = 'time from kernel enter_state(%s) to low-power mode [kernel & firmware time]' % sysvals.suspendmode
- rtitle = 'time from low-power mode to return from kernel enter_state(%s) [firmware & kernel time]' % sysvals.suspendmode
- if(len(testruns) > 1):
- testdesc1 = testdesc2 = ordinal(data.testnumber+1)
- testdesc2 += ' '
- if(len(data.tLow) == 0):
- thtml = html_timetotal.format(suspend_time, \
- resume_time, testdesc1, stitle, rtitle)
- else:
- thtml = html_timetotal2.format(suspend_time, low_time, \
- resume_time, testdesc1, stitle, rtitle)
- devtl.html += thtml
+ continue
+ # typical full suspend/resume header
+ stot, rtot = sktime, rktime = data.getTimeValues()
+ ssrc, rsrc, testdesc, testdesc2 = ['kernel'], ['kernel'], 'Kernel', ''
+ if data.fwValid:
+ stot += (data.fwSuspend/1000000.0)
+ rtot += (data.fwResume/1000000.0)
+ ssrc.append('firmware')
+ rsrc.append('firmware')
+ testdesc = 'Total'
+ if 'time' in data.wifi and data.wifi['stat'] != 'timeout':
+ rtot += data.end - data.tKernRes + (data.wifi['time'] * 1000.0)
+ rsrc.append('wifi')
+ testdesc = 'Total'
+ suspend_time, resume_time = '%.3f' % stot, '%.3f' % rtot
+ stitle = 'time from kernel suspend start to %s mode [%s time]' % \
+ (sysvals.suspendmode, ' & '.join(ssrc))
+ rtitle = 'time from %s mode to kernel resume complete [%s time]' % \
+ (sysvals.suspendmode, ' & '.join(rsrc))
+ if(len(testruns) > 1):
+ testdesc = testdesc2 = ordinal(data.testnumber+1)
+ testdesc2 += ' '
+ if(len(data.tLow) == 0):
+ thtml = html_timetotal.format(suspend_time, \
+ resume_time, testdesc, stitle, rtitle)
+ else:
+ low_time = '+'.join(data.tLow)
+ thtml = html_timetotal2.format(suspend_time, low_time, \
+ resume_time, testdesc, stitle, rtitle)
+ devtl.html += thtml
+ if not data.fwValid and 'dev' not in data.wifi:
+ continue
+ # extra detail when the times come from multiple sources
+ thtml = '<table class="time2">\n<tr>'
+ thtml += html_kdesc.format(testdesc2, '%.3f'%sktime, 'Suspend', 'green')
+ if data.fwValid:
sftime = '%.3f'%(data.fwSuspend / 1000000.0)
rftime = '%.3f'%(data.fwResume / 1000000.0)
- devtl.html += html_timegroups.format('%.3f'%sktime, \
- sftime, rftime, '%.3f'%rktime, testdesc2, sysvals.suspendmode)
- else:
- suspend_time = '%.3f' % sktime
- resume_time = '%.3f' % rktime
- testdesc = 'Kernel'
- stitle = 'time from kernel enter_state(%s) to firmware mode [kernel time only]' % sysvals.suspendmode
- rtitle = 'time from firmware mode to return from kernel enter_state(%s) [kernel time only]' % sysvals.suspendmode
- if(len(testruns) > 1):
- testdesc = ordinal(data.testnumber+1)+' '+testdesc
- if(len(data.tLow) == 0):
- thtml = html_timetotal.format(suspend_time, \
- resume_time, testdesc, stitle, rtitle)
+ thtml += html_fwdesc.format(testdesc2, sftime, 'Suspend', 'green')
+ thtml += html_fwdesc.format(testdesc2, rftime, 'Resume', 'yellow')
+ thtml += html_kdesc.format(testdesc2, '%.3f'%rktime, 'Resume', 'yellow')
+ if 'time' in data.wifi:
+ if data.wifi['stat'] != 'timeout':
+ wtime = '%.0f ms'%(data.end - data.tKernRes + (data.wifi['time'] * 1000.0))
else:
- thtml = html_timetotal2.format(suspend_time, low_time, \
- resume_time, testdesc, stitle, rtitle)
- devtl.html += thtml
-
+ wtime = 'TIMEOUT'
+ thtml += html_wifdesc.format(testdesc2, wtime, data.wifi['dev'])
+ thtml += '</tr>\n</table>\n'
+ devtl.html += thtml
if testfail:
devtl.html += html_fail.format(testfail)
@@ -5082,28 +5185,32 @@ def setRuntimeSuspend(before=True):
# Description:
# Execute system suspend through the sysfs interface, then copy the output
# dmesg and ftrace files to the test output directory.
-def executeSuspend():
+def executeSuspend(quiet=False):
pm = ProcessMonitor()
tp = sysvals.tpath
- wifi = sysvals.checkWifi()
+ if sysvals.wifi:
+ wifi = sysvals.checkWifi()
testdata = []
- battery = True if getBattery() else False
# run these commands to prepare the system for suspend
if sysvals.display:
- pprint('SET DISPLAY TO %s' % sysvals.display.upper())
+ if not quiet:
+ pprint('SET DISPLAY TO %s' % sysvals.display.upper())
displayControl(sysvals.display)
time.sleep(1)
if sysvals.sync:
- pprint('SYNCING FILESYSTEMS')
+ if not quiet:
+ pprint('SYNCING FILESYSTEMS')
call('sync', shell=True)
# mark the start point in the kernel ring buffer just as we start
sysvals.initdmesg()
# start ftrace
if(sysvals.usecallgraph or sysvals.usetraceevents):
- pprint('START TRACING')
+ if not quiet:
+ pprint('START TRACING')
sysvals.fsetVal('1', 'tracing_on')
if sysvals.useprocmon:
pm.start()
+ sysvals.cmdinfo(True)
# execute however many s/r runs requested
for count in range(1,sysvals.execcount+1):
# x2delay in between test runs
@@ -5119,15 +5226,14 @@ def executeSuspend():
pprint('SUSPEND START')
else:
pprint('SUSPEND START (press a key to resume)')
- sysvals.mcelog(True)
- bat1 = getBattery() if battery else False
# set rtcwake
if(sysvals.rtcwake):
- pprint('will issue an rtcwake in %d seconds' % sysvals.rtcwaketime)
+ if not quiet:
+ pprint('will issue an rtcwake in %d seconds' % sysvals.rtcwaketime)
sysvals.rtcWakeAlarmOn()
# start of suspend trace marker
if(sysvals.usecallgraph or sysvals.usetraceevents):
- sysvals.fsetVal('SUSPEND START', 'trace_marker')
+ sysvals.fsetVal(datetime.now().strftime(sysvals.tmstart), 'trace_marker')
# predelay delay
if(count == 1 and sysvals.predelay > 0):
sysvals.fsetVal('WAIT %d' % sysvals.predelay, 'trace_marker')
@@ -5174,37 +5280,33 @@ def executeSuspend():
# return from suspend
pprint('RESUME COMPLETE')
if(sysvals.usecallgraph or sysvals.usetraceevents):
- sysvals.fsetVal('RESUME COMPLETE', 'trace_marker')
+ sysvals.fsetVal(datetime.now().strftime(sysvals.tmend), 'trace_marker')
+ if sysvals.wifi and wifi:
+ tdata['wifi'] = sysvals.pollWifi(wifi)
if(sysvals.suspendmode == 'mem' or sysvals.suspendmode == 'command'):
tdata['fw'] = getFPDT(False)
- mcelog = sysvals.mcelog()
- if mcelog:
- tdata['mcelog'] = mcelog
- bat2 = getBattery() if battery else False
- if battery and bat1 and bat2:
- tdata['bat'] = (bat1, bat2)
- if 'device' in wifi and 'ip' in wifi:
- tdata['wifi'] = (wifi, sysvals.checkWifi())
testdata.append(tdata)
+ cmdafter = sysvals.cmdinfo(False)
# stop ftrace
if(sysvals.usecallgraph or sysvals.usetraceevents):
if sysvals.useprocmon:
pm.stop()
sysvals.fsetVal('0', 'tracing_on')
# grab a copy of the dmesg output
- pprint('CAPTURING DMESG')
+ if not quiet:
+ pprint('CAPTURING DMESG')
sysvals.getdmesg(testdata)
# grab a copy of the ftrace output
if(sysvals.usecallgraph or sysvals.usetraceevents):
- pprint('CAPTURING TRACE')
+ if not quiet:
+ pprint('CAPTURING TRACE')
op = sysvals.writeDatafileHeader(sysvals.ftracefile, testdata)
fp = open(tp+'trace', 'r')
for line in fp:
op.write(line)
op.close()
sysvals.fsetVal('', 'trace')
- sysvals.platforminfo()
- return testdata
+ sysvals.platforminfo(cmdafter)
def readFile(file):
if os.path.islink(file):
@@ -5447,25 +5549,6 @@ def dmidecode(mempath, fatal=False):
count += 1
return out
-def getBattery():
- p, charge, bat = '/sys/class/power_supply', 0, {}
- if not os.path.exists(p):
- return False
- for d in os.listdir(p):
- type = sysvals.getVal(os.path.join(p, d, 'type')).strip().lower()
- if type != 'battery':
- continue
- for v in ['status', 'energy_now', 'capacity_now']:
- bat[v] = sysvals.getVal(os.path.join(p, d, v)).strip().lower()
- break
- if 'status' not in bat:
- return False
- ac = False if 'discharging' in bat['status'] else True
- for v in ['energy_now', 'capacity_now']:
- if v in bat and bat[v]:
- charge = int(bat[v])
- return (ac, charge)
-
def displayControl(cmd):
xset, ret = 'timeout 10 xset -d :0.0 {0}', 0
if sysvals.sudouser:
@@ -5715,6 +5798,17 @@ def statusCheck(probecheck=False):
status = 'rtcwake is not properly supported'
pprint(' is rtcwake supported: %s' % res)
+ # check info commands
+ pprint(' optional commands this tool may use for info:')
+ no = sysvals.colorText('MISSING')
+ yes = sysvals.colorText('FOUND', 32)
+ for c in ['turbostat', 'mcelog', 'lspci', 'lsusb']:
+ if c == 'turbostat':
+ res = yes if sysvals.haveTurbostat() else no
+ else:
+ res = yes if sysvals.getExec(c) else no
+ pprint(' %s: %s' % (c, res))
+
if not probecheck:
return status
@@ -5780,8 +5874,9 @@ def getArgFloat(name, args, min, max, main=True):
doError(name+': value should be between %f and %f' % (min, max), True)
return val
-def processData(live=False):
- pprint('PROCESSING DATA')
+def processData(live=False, quiet=False):
+ if not quiet:
+ pprint('PROCESSING DATA')
sysvals.vprint('usetraceevents=%s, usetracemarkers=%s, usekprobes=%s' % \
(sysvals.usetraceevents, sysvals.usetracemarkers, sysvals.usekprobes))
error = ''
@@ -5796,20 +5891,17 @@ def processData(live=False):
parseKernelLog(data)
if(sysvals.ftracefile and (sysvals.usecallgraph or sysvals.usetraceevents)):
appendIncompleteTraceLog(testruns)
+ if not sysvals.stamp:
+ pprint('ERROR: data does not include the expected stamp')
+ return (testruns, {'error': 'timeline generation failed'})
shown = ['bios', 'biosdate', 'cpu', 'host', 'kernel', 'man', 'memfr',
- 'memsz', 'mode', 'numcpu', 'plat', 'time']
+ 'memsz', 'mode', 'numcpu', 'plat', 'time', 'wifi']
sysvals.vprint('System Info:')
for key in sorted(sysvals.stamp):
if key in shown:
sysvals.vprint(' %-8s : %s' % (key.upper(), sysvals.stamp[key]))
- if sysvals.kparams:
- sysvals.vprint('Kparams:\n %s' % sysvals.kparams)
sysvals.vprint('Command:\n %s' % sysvals.cmdline)
for data in testruns:
- if data.mcelog:
- sysvals.vprint('MCELOG Data:')
- for line in data.mcelog.split('\n'):
- sysvals.vprint(' %s' % line)
if data.turbostat:
idx, s = 0, 'Turbostat:\n '
for val in data.turbostat.split('|'):
@@ -5819,23 +5911,13 @@ def processData(live=False):
s += '\n '
s += val + ' '
sysvals.vprint(s)
- if data.battery:
- a1, c1, a2, c2 = data.battery
- s = 'Battery:\n Before - AC: %s, Charge: %d\n After - AC: %s, Charge: %d' % \
- (a1, int(c1), a2, int(c2))
- sysvals.vprint(s)
- if data.wifi:
- w = data.wifi.replace('|', ' ').split(',')
- s = 'Wifi:\n Before %s\n After %s' % \
- (w[0], w[1])
- sysvals.vprint(s)
data.printDetails()
- if len(sysvals.platinfo) > 0:
- sysvals.vprint('\nPlatform Info:')
- for info in sysvals.platinfo:
- sysvals.vprint(info[0]+' - '+info[1])
- sysvals.vprint(info[2])
- sysvals.vprint('')
+ if len(sysvals.platinfo) > 0:
+ sysvals.vprint('\nPlatform Info:')
+ for info in sysvals.platinfo:
+ sysvals.vprint('[%s - %s]' % (info[0], info[1]))
+ sysvals.vprint(info[2])
+ sysvals.vprint('')
if sysvals.cgdump:
for data in testruns:
data.debugPrint()
@@ -5845,7 +5927,8 @@ def processData(live=False):
return (testruns, {'error': 'timeline generation failed'})
sysvals.vprint('Creating the html timeline (%s)...' % sysvals.htmlfile)
createHTML(testruns, error)
- pprint('DONE')
+ if not quiet:
+ pprint('DONE')
data = testruns[0]
stamp = data.stamp
stamp['suspend'], stamp['resume'] = data.getTimeValues()
@@ -5872,31 +5955,28 @@ def rerunTest(htmlfile=''):
doError('a directory already exists with this name: %s' % sysvals.htmlfile)
elif not os.access(sysvals.htmlfile, os.W_OK):
doError('missing permission to write to %s' % sysvals.htmlfile)
- testruns, stamp = processData(False)
- sysvals.logmsg = ''
+ testruns, stamp = processData()
+ sysvals.resetlog()
return stamp
# Function: runTest
# Description:
# execute a suspend/resume, gather the logs, and generate the output
-def runTest(n=0):
+def runTest(n=0, quiet=False):
# prepare for the test
- sysvals.initFtrace()
+ sysvals.initFtrace(quiet)
sysvals.initTestOutput('suspend')
# execute the test
- testdata = executeSuspend()
+ executeSuspend(quiet)
sysvals.cleanupFtrace()
if sysvals.skiphtml:
+ sysvals.outputResult({}, n)
sysvals.sudoUserchown(sysvals.testdir)
return
- if not testdata[0]['error']:
- testruns, stamp = processData(True)
- for data in testruns:
- del data
- else:
- stamp = testdata[0]
-
+ testruns, stamp = processData(True, quiet)
+ for data in testruns:
+ del data
sysvals.sudoUserchown(sysvals.testdir)
sysvals.outputResult(stamp, n)
if 'error' in stamp:
@@ -5904,13 +5984,14 @@ def runTest(n=0):
return 0
def find_in_html(html, start, end, firstonly=True):
- n, out = 0, []
- while n < len(html):
- m = re.search(start, html[n:])
+ n, cnt, out = 0, len(html), []
+ while n < cnt:
+ e = cnt if (n + 10000 > cnt or n == 0) else n + 10000
+ m = re.search(start, html[n:e])
if not m:
break
i = m.end()
- m = re.search(end, html[n+i:])
+ m = re.search(end, html[n+i:e])
if not m:
break
j = m.start()
@@ -5945,7 +6026,7 @@ def data_from_html(file, outpath, issues, fulldetail=False):
tstr = dt.strftime('%Y/%m/%d %H:%M:%S')
error = find_in_html(html, '<table class="testfail"><tr><td>', '</td>')
if error:
- m = re.match('[a-z]* failed in (?P<p>[a-z0-9_]*) phase', error)
+ m = re.match('[a-z0-9]* failed in (?P<p>\S*).*', error)
if m:
result = 'fail in %s' % m.group('p')
else:
@@ -5974,6 +6055,9 @@ def data_from_html(file, outpath, issues, fulldetail=False):
elist[err[0]] += 1
for i in elist:
ilist.append('%sx%d' % (i, elist[i]) if elist[i] > 1 else i)
+ wifi = find_in_html(html, 'Wifi Resume: ', '</td>')
+ if wifi:
+ extra['wifi'] = wifi
low = find_in_html(html, 'freeze time: <b>', ' ms</b>')
if low and '|' in low:
issue = 'FREEZEx%d' % len(low.split('|'))
@@ -6048,13 +6132,15 @@ def genHtml(subdir, force=False):
for dirname, dirnames, filenames in os.walk(subdir):
sysvals.dmesgfile = sysvals.ftracefile = sysvals.htmlfile = ''
for filename in filenames:
- if(re.match('.*_dmesg.txt', filename)):
- sysvals.dmesgfile = os.path.join(dirname, filename)
- elif(re.match('.*_ftrace.txt', filename)):
- sysvals.ftracefile = os.path.join(dirname, filename)
+ file = os.path.join(dirname, filename)
+ if sysvals.usable(file):
+ if(re.match('.*_dmesg.txt', filename)):
+ sysvals.dmesgfile = file
+ elif(re.match('.*_ftrace.txt', filename)):
+ sysvals.ftracefile = file
sysvals.setOutputFile()
- if sysvals.ftracefile and sysvals.htmlfile and \
- (force or not os.path.exists(sysvals.htmlfile)):
+ if (sysvals.dmesgfile or sysvals.ftracefile) and sysvals.htmlfile and \
+ (force or not sysvals.usable(sysvals.htmlfile)):
pprint('FTRACE: %s' % sysvals.ftracefile)
if sysvals.dmesgfile:
pprint('DMESG : %s' % sysvals.dmesgfile)
@@ -6169,9 +6255,9 @@ def configFromFile(file):
sysvals.cgtest = getArgInt('cgtest', value, 0, 1, False)
elif(option == 'cgphase'):
d = Data(0)
- if value not in d.sortedPhases():
+ if value not in d.phasedef:
doError('invalid phase --> (%s: %s), valid phases are %s'\
- % (option, value, d.sortedPhases()), True)
+ % (option, value, d.phasedef.keys()), True)
sysvals.cgphase = value
elif(option == 'fadd'):
file = sysvals.configFile(value)
@@ -6184,9 +6270,7 @@ def configFromFile(file):
nums = value.split()
if len(nums) != 2:
doError('multi requires 2 integers (exec_count and delay)', True)
- sysvals.multitest['run'] = True
- sysvals.multitest['count'] = getArgInt('multi: n d (exec count)', nums[0], 2, 1000000, False)
- sysvals.multitest['delay'] = getArgInt('multi: n d (delay between tests)', nums[1], 0, 3600, False)
+ sysvals.multiinit(nums[0], nums[1])
elif(option == 'devicefilter'):
sysvals.setDeviceFilter(value)
elif(option == 'expandcg'):
@@ -6342,6 +6426,7 @@ def printHelp():
' -srgap Add a visible gap in the timeline between sus/res (default: disabled)\n'\
' -skiphtml Run the test and capture the trace logs, but skip the timeline (default: disabled)\n'\
' -result fn Export a results table to a text file for parsing.\n'\
+ ' -wifi If a wifi connection is available, check that it reconnects after resume.\n'\
' [testprep]\n'\
' -sync Sync the filesystems before starting the test\n'\
' -rs on/off Enable/disable runtime suspend for all devices, restore all after test\n'\
@@ -6356,8 +6441,10 @@ def printHelp():
' -predelay t Include t ms delay before 1st suspend (default: 0 ms)\n'\
' -postdelay t Include t ms delay after last resume (default: 0 ms)\n'\
' -mindev ms Discard all device blocks shorter than ms milliseconds (e.g. 0.001 for us)\n'\
- ' -multi n d Execute <n> consecutive tests at <d> seconds intervals. The outputs will\n'\
- ' be created in a new subdirectory with a summary page.\n'\
+ ' -multi n d Execute <n> consecutive tests at <d> seconds intervals. If <n> is followed\n'\
+ ' by a "d", "h", or "m" execute for <n> days, hours, or mins instead.\n'\
+ ' The outputs will be created in a new subdirectory with a summary page.\n'\
+ ' -maxfail n Abort a -multi run after n consecutive fails (default is 0 = never abort)\n'\
' [debug]\n'\
' -f Use ftrace to create device callgraphs (default: disabled)\n'\
' -ftop Use ftrace on the top level call: "%s" (default: disabled)\n'\
@@ -6379,11 +6466,11 @@ def printHelp():
' -modes List available suspend modes\n'\
' -status Test to see if the system is enabled to run this tool\n'\
' -fpdt Print out the contents of the ACPI Firmware Performance Data Table\n'\
- ' -battery Print out battery info (if available)\n'\
- ' -wifi Print out wifi connection info (if wireless-tools and device exists)\n'\
+ ' -wificheck Print out wifi connection info\n'\
' -x<mode> Test xset by toggling the given mode (on/off/standby/suspend)\n'\
' -sysinfo Print out system info extracted from BIOS\n'\
' -devinfo Print out the pm settings of all devices which support runtime suspend\n'\
+ ' -cmdinfo Print out all the platform info collected before and after suspend/resume\n'\
' -flist Print the list of functions currently being captured in ftrace\n'\
' -flistall Print all functions capable of being captured in ftrace\n'\
' -summary dir Create a summary of tests in this dir [-genhtml builds missing html]\n'\
@@ -6399,8 +6486,8 @@ if __name__ == '__main__':
genhtml = False
cmd = ''
simplecmds = ['-sysinfo', '-modes', '-fpdt', '-flist', '-flistall',
- '-devinfo', '-status', '-battery', '-xon', '-xoff', '-xstandby',
- '-xsuspend', '-xinit', '-xreset', '-xstat', '-wifi']
+ '-devinfo', '-status', '-xon', '-xoff', '-xstandby', '-xsuspend',
+ '-xinit', '-xreset', '-xstat', '-wificheck', '-cmdinfo']
if '-f' in sys.argv:
sysvals.cgskip = sysvals.configFile('cgskip.txt')
# loop through the command line arguments
@@ -6462,8 +6549,15 @@ if __name__ == '__main__':
sysvals.usedevsrc = True
elif(arg == '-sync'):
sysvals.sync = True
+ elif(arg == '-wifi'):
+ sysvals.wifi = True
elif(arg == '-gzip'):
sysvals.gzip = True
+ elif(arg == '-info'):
+ try:
+ val = next(args)
+ except:
+ doError('-info requires one string argument', True)
elif(arg == '-rs'):
try:
val = next(args)
@@ -6555,10 +6649,14 @@ if __name__ == '__main__':
sysvals.cgexp = True
elif(arg == '-srgap'):
sysvals.srgap = 5
+ elif(arg == '-maxfail'):
+ sysvals.maxfail = getArgInt('-maxfail', args, 0, 1000000)
elif(arg == '-multi'):
- sysvals.multitest['run'] = True
- sysvals.multitest['count'] = getArgInt('-multi n d (exec count)', args, 2, 1000000)
- sysvals.multitest['delay'] = getArgInt('-multi n d (delay between tests)', args, 0, 3600)
+ try:
+ c, d = next(args), next(args)
+ except:
+ doError('-multi requires two values', True)
+ sysvals.multiinit(c, d)
elif(arg == '-o'):
try:
val = next(args)
@@ -6655,13 +6753,6 @@ if __name__ == '__main__':
elif(cmd == 'fpdt'):
if not getFPDT(True):
ret = 1
- elif(cmd == 'battery'):
- out = getBattery()
- if out:
- pprint('AC Connect : %s\nBattery Charge: %d' % out)
- else:
- pprint('no battery found')
- ret = 1
elif(cmd == 'sysinfo'):
sysvals.printSystemInfo(True)
elif(cmd == 'devinfo'):
@@ -6679,13 +6770,15 @@ if __name__ == '__main__':
ret = displayControl(cmd[1:])
elif(cmd == 'xstat'):
pprint('Display Status: %s' % displayControl('stat').upper())
- elif(cmd == 'wifi'):
- out = sysvals.checkWifi()
- if 'device' not in out:
- pprint('WIFI interface not found')
+ elif(cmd == 'wificheck'):
+ dev = sysvals.checkWifi()
+ if dev:
+ print('%s is connected' % sysvals.wifiDetails(dev))
else:
- for key in sorted(out):
- pprint('%6s: %s' % (key.upper(), out[key]))
+ print('No wifi connection found')
+ elif(cmd == 'cmdinfo'):
+ for out in sysvals.cmdinfo(False, True):
+ print('[%s - %s]\n%s\n' % out)
sys.exit(ret)
# if instructed, re-analyze existing data files
@@ -6720,24 +6813,38 @@ if __name__ == '__main__':
setRuntimeSuspend(True)
if sysvals.display:
displayControl('init')
- ret = 0
+ failcnt, ret = 0, 0
if sysvals.multitest['run']:
# run multiple tests in a separate subdirectory
if not sysvals.outdir:
- s = 'suspend-x%d' % sysvals.multitest['count']
- sysvals.outdir = datetime.now().strftime(s+'-%y%m%d-%H%M%S')
+ if 'time' in sysvals.multitest:
+ s = '-%dm' % sysvals.multitest['time']
+ else:
+ s = '-x%d' % sysvals.multitest['count']
+ sysvals.outdir = datetime.now().strftime('suspend-%y%m%d-%H%M%S'+s)
if not os.path.isdir(sysvals.outdir):
os.makedirs(sysvals.outdir)
+ sysvals.sudoUserchown(sysvals.outdir)
+ finish = datetime.now()
+ if 'time' in sysvals.multitest:
+ finish += timedelta(minutes=sysvals.multitest['time'])
for i in range(sysvals.multitest['count']):
- if(i != 0):
+ sysvals.multistat(True, i, finish)
+ if i != 0 and sysvals.multitest['delay'] > 0:
pprint('Waiting %d seconds...' % (sysvals.multitest['delay']))
time.sleep(sysvals.multitest['delay'])
- pprint('TEST (%d/%d) START' % (i+1, sysvals.multitest['count']))
fmt = 'suspend-%y%m%d-%H%M%S'
sysvals.testdir = os.path.join(sysvals.outdir, datetime.now().strftime(fmt))
- ret = runTest(i+1)
- pprint('TEST (%d/%d) COMPLETE' % (i+1, sysvals.multitest['count']))
- sysvals.logmsg = ''
+ ret = runTest(i+1, True)
+ failcnt = 0 if not ret else failcnt + 1
+ if sysvals.maxfail > 0 and failcnt >= sysvals.maxfail:
+ pprint('Maximum fail count of %d reached, aborting multitest' % (sysvals.maxfail))
+ break
+ time.sleep(5)
+ sysvals.resetlog()
+ sysvals.multistat(False, i, finish)
+ if 'time' in sysvals.multitest and datetime.now() >= finish:
+ break
if not sysvals.skiphtml:
runSummary(sysvals.outdir, False, False)
sysvals.sudoUserchown(sysvals.outdir)
diff --git a/tools/power/x86/intel-speed-select/.gitignore b/tools/power/x86/intel-speed-select/.gitignore
index f61145925ce9..a814f89fe75f 100644
--- a/tools/power/x86/intel-speed-select/.gitignore
+++ b/tools/power/x86/intel-speed-select/.gitignore
@@ -1,2 +1,3 @@
+# SPDX-License-Identifier: GPL-2.0-only
include/
intel-speed-select
diff --git a/tools/power/x86/intel-speed-select/isst-config.c b/tools/power/x86/intel-speed-select/isst-config.c
index 2b2b8167c65b..9f68f51ca652 100644
--- a/tools/power/x86/intel-speed-select/isst-config.c
+++ b/tools/power/x86/intel-speed-select/isst-config.c
@@ -15,7 +15,7 @@ struct process_cmd_struct {
int arg;
};
-static const char *version_str = "v1.2";
+static const char *version_str = "v1.4";
static const int supported_api_ver = 1;
static struct isst_if_platform_info isst_platform_info;
static char *progname;
@@ -25,7 +25,7 @@ static FILE *outf;
static int cpu_model;
static int cpu_stepping;
-#define MAX_CPUS_IN_ONE_REQ 64
+#define MAX_CPUS_IN_ONE_REQ 256
static short max_target_cpus;
static unsigned short target_cpus[MAX_CPUS_IN_ONE_REQ];
@@ -42,6 +42,7 @@ static int out_format_json;
static int cmd_help;
static int force_online_offline;
static int auto_mode;
+static int fact_enable_fail;
/* clos related */
static int current_clos = -1;
@@ -61,6 +62,18 @@ struct _cpu_map {
};
struct _cpu_map *cpu_map;
+struct cpu_topology {
+ short cpu;
+ short core_id;
+ short pkg_id;
+ short die_id;
+};
+
+FILE *get_output_file(void)
+{
+ return outf;
+}
+
void debug_printf(const char *format, ...)
{
va_list args;
@@ -82,6 +95,14 @@ int is_clx_n_platform(void)
return 0;
}
+int is_skx_based_platform(void)
+{
+ if (cpu_model == 0x55)
+ return 1;
+
+ return 0;
+}
+
static int update_cpu_model(void)
{
unsigned int ebx, ecx, edx;
@@ -175,25 +196,137 @@ int out_format_is_json(void)
return out_format_json;
}
+static int get_stored_topology_info(int cpu, int *core_id, int *pkg_id, int *die_id)
+{
+ const char *pathname = "/tmp/isst_cpu_topology.dat";
+ struct cpu_topology cpu_top;
+ FILE *fp;
+ int ret;
+
+ fp = fopen(pathname, "rb");
+ if (!fp)
+ return -1;
+
+ ret = fseek(fp, cpu * sizeof(cpu_top), SEEK_SET);
+ if (ret)
+ goto err_ret;
+
+ ret = fread(&cpu_top, sizeof(cpu_top), 1, fp);
+ if (ret != 1) {
+ ret = -1;
+ goto err_ret;
+ }
+
+ *pkg_id = cpu_top.pkg_id;
+ *core_id = cpu_top.core_id;
+ *die_id = cpu_top.die_id;
+ ret = 0;
+
+err_ret:
+ fclose(fp);
+
+ return ret;
+}
+
+static void store_cpu_topology(void)
+{
+ const char *pathname = "/tmp/isst_cpu_topology.dat";
+ FILE *fp;
+ int i;
+
+ fp = fopen(pathname, "rb");
+ if (fp) {
+ /* Mapping already exists */
+ fclose(fp);
+ return;
+ }
+
+ fp = fopen(pathname, "wb");
+ if (!fp) {
+ fprintf(stderr, "Can't create file:%s\n", pathname);
+ return;
+ }
+
+ for (i = 0; i < topo_max_cpus; ++i) {
+ struct cpu_topology cpu_top;
+
+ cpu_top.core_id = parse_int_file(0,
+ "/sys/devices/system/cpu/cpu%d/topology/core_id", i);
+ if (cpu_top.core_id < 0)
+ cpu_top.core_id = -1;
+
+ cpu_top.pkg_id = parse_int_file(0,
+ "/sys/devices/system/cpu/cpu%d/topology/physical_package_id", i);
+ if (cpu_top.pkg_id < 0)
+ cpu_top.pkg_id = -1;
+
+ cpu_top.die_id = parse_int_file(0,
+ "/sys/devices/system/cpu/cpu%d/topology/die_id", i);
+ if (cpu_top.die_id < 0)
+ cpu_top.die_id = -1;
+
+ cpu_top.cpu = i;
+
+ if (fwrite(&cpu_top, sizeof(cpu_top), 1, fp) != 1) {
+ fprintf(stderr, "Can't write to:%s\n", pathname);
+ break;
+ }
+ }
+
+ fclose(fp);
+}
+
int get_physical_package_id(int cpu)
{
- return parse_int_file(
- 0, "/sys/devices/system/cpu/cpu%d/topology/physical_package_id",
- cpu);
+ int ret;
+
+ ret = parse_int_file(0,
+ "/sys/devices/system/cpu/cpu%d/topology/physical_package_id",
+ cpu);
+ if (ret < 0) {
+ int core_id, pkg_id, die_id;
+
+ ret = get_stored_topology_info(cpu, &core_id, &pkg_id, &die_id);
+ if (!ret)
+ return pkg_id;
+ }
+
+ return ret;
}
int get_physical_core_id(int cpu)
{
- return parse_int_file(
- 0, "/sys/devices/system/cpu/cpu%d/topology/core_id", cpu);
+ int ret;
+
+ ret = parse_int_file(0,
+ "/sys/devices/system/cpu/cpu%d/topology/core_id",
+ cpu);
+ if (ret < 0) {
+ int core_id, pkg_id, die_id;
+
+ ret = get_stored_topology_info(cpu, &core_id, &pkg_id, &die_id);
+ if (!ret)
+ return core_id;
+ }
+
+ return ret;
}
int get_physical_die_id(int cpu)
{
int ret;
- ret = parse_int_file(0, "/sys/devices/system/cpu/cpu%d/topology/die_id",
- cpu);
+ ret = parse_int_file(0,
+ "/sys/devices/system/cpu/cpu%d/topology/die_id",
+ cpu);
+ if (ret < 0) {
+ int core_id, pkg_id, die_id;
+
+ ret = get_stored_topology_info(cpu, &core_id, &pkg_id, &die_id);
+ if (!ret)
+ return die_id;
+ }
+
if (ret < 0)
ret = 0;
@@ -219,8 +352,14 @@ static void set_cpu_online_offline(int cpu, int state)
"/sys/devices/system/cpu/cpu%d/online", cpu);
fd = open(buffer, O_WRONLY);
- if (fd < 0)
+ if (fd < 0) {
+ if (!cpu && state) {
+ fprintf(stderr, "This system is not configured for CPU 0 online/offline\n");
+ fprintf(stderr, "Ignoring online request for CPU 0 as this is already online\n");
+ return;
+ }
err(-1, "%s open failed", buffer);
+ }
if (state)
ret = write(fd, "1\n", 2);
@@ -259,7 +398,12 @@ static void for_each_online_package_in_set(void (*callback)(int, void *, void *,
die_id = get_physical_die_id(i);
if (die_id < 0)
die_id = 0;
- pkg_id = get_physical_package_id(i);
+
+ pkg_id = parse_int_file(0,
+ "/sys/devices/system/cpu/cpu%d/topology/physical_package_id", i);
+ if (pkg_id < 0)
+ continue;
+
/* Create an unique id for package, die combination to store */
pkg_id = (MAX_PACKAGE_COUNT * pkg_id + die_id);
@@ -281,7 +425,7 @@ static void for_each_online_target_cpu_in_set(
void (*callback)(int, void *, void *, void *, void *), void *arg1,
void *arg2, void *arg3, void *arg4)
{
- int i;
+ int i, found = 0;
for (i = 0; i < topo_max_cpus; ++i) {
int online;
@@ -295,9 +439,14 @@ static void for_each_online_target_cpu_in_set(
online =
1; /* online entry for CPU 0 needs some special configs */
- if (online && callback)
+ if (online && callback) {
callback(i, arg1, arg2, arg3, arg4);
+ found = 1;
+ }
}
+
+ if (!found)
+ fprintf(stderr, "No valid CPU in the list\n");
}
#define BITMASK_SIZE 32
@@ -305,14 +454,27 @@ static void set_max_cpu_num(void)
{
FILE *filep;
unsigned long dummy;
+ int i;
topo_max_cpus = 0;
- filep = fopen_or_exit(
- "/sys/devices/system/cpu/cpu0/topology/thread_siblings", "r");
+ for (i = 0; i < 256; ++i) {
+ char path[256];
+
+ snprintf(path, sizeof(path),
+ "/sys/devices/system/cpu/cpu%d/topology/thread_siblings", i);
+ filep = fopen(path, "r");
+ if (filep)
+ break;
+ }
+
+ if (!filep) {
+ fprintf(stderr, "Can't get max cpu number\n");
+ exit(0);
+ }
+
while (fscanf(filep, "%lx,", &dummy) == 1)
topo_max_cpus += BITMASK_SIZE;
fclose(filep);
- topo_max_cpus--; /* 0 based */
debug_printf("max cpus %d\n", topo_max_cpus);
}
@@ -362,6 +524,10 @@ static void set_cpu_present_cpu_mask(void)
die_id = 0;
pkg_id = get_physical_package_id(i);
+ if (pkg_id < 0) {
+ fprintf(stderr, "Failed to get package id, CPU %d may be offline\n", i);
+ continue;
+ }
if (pkg_id < MAX_PACKAGE_COUNT &&
die_id < MAX_DIE_PER_PACKAGE) {
int core_id = get_physical_core_id(i);
@@ -487,7 +653,7 @@ void set_cpu_mask_from_punit_coremask(int cpu, unsigned long long core_mask,
pkg_id = get_physical_package_id(cpu);
for (i = 0; i < 64; ++i) {
- if (core_mask & BIT(i)) {
+ if (core_mask & BIT_ULL(i)) {
int j;
for (j = 0; j < topo_max_cpus; ++j) {
@@ -542,7 +708,11 @@ static int isst_send_mmio_command(unsigned int cpu, unsigned int reg, int write,
}
if (ioctl(fd, cmd, &io_regs) == -1) {
- perror("ISST_IF_IO_CMD");
+ if (errno == ENOTTY) {
+ perror("ISST_IF_IO_COMMAND\n");
+ fprintf(stderr, "Check presence of kernel modules: isst_if_mmio\n");
+ exit(0);
+ }
fprintf(outf, "Error: mmio_cmd cpu:%d reg:%x read_write:%x\n",
cpu, reg, write);
} else {
@@ -571,7 +741,8 @@ int isst_send_mbox_command(unsigned int cpu, unsigned char command,
"mbox_send: cpu:%d command:%x sub_command:%x parameter:%x req_data:%x\n",
cpu, command, sub_command, parameter, req_data);
- if (isst_platform_info.mmio_supported && command == CONFIG_CLOS) {
+ if (!is_skx_based_platform() && command == CONFIG_CLOS &&
+ sub_command != CLOS_PM_QOS_CONFIG) {
unsigned int value;
int write = 0;
int clos_id, core_id, ret = 0;
@@ -620,10 +791,14 @@ int isst_send_mbox_command(unsigned int cpu, unsigned char command,
err(-1, "%s open failed", pathname);
if (ioctl(fd, ISST_IF_MBOX_COMMAND, &mbox_cmds) == -1) {
- perror("ISST_IF_MBOX_COMMAND");
- fprintf(outf,
- "Error: mbox_cmd cpu:%d command:%x sub_command:%x parameter:%x req_data:%x\n",
- cpu, command, sub_command, parameter, req_data);
+ if (errno == ENOTTY) {
+ perror("ISST_IF_MBOX_COMMAND\n");
+ fprintf(stderr, "Check presence of kernel modules: isst_if_mbox_pci or isst_if_mbox_msr\n");
+ exit(0);
+ }
+ debug_printf(
+ "Error: mbox_cmd cpu:%d command:%x sub_command:%x parameter:%x req_data:%x errorno:%d\n",
+ cpu, command, sub_command, parameter, req_data, errno);
return -1;
} else {
*resp = mbox_cmds.mbox_cmd[0].resp_data;
@@ -656,7 +831,7 @@ int isst_send_msr_command(unsigned int cpu, unsigned int msr, int write,
msr_cmds.msr_cmd[0].data = *req_resp;
if (ioctl(fd, ISST_IF_MSR_COMMAND, &msr_cmds) == -1) {
- perror("ISST_IF_MSR_COMMAD");
+ perror("ISST_IF_MSR_COMMAND");
fprintf(outf, "Error: msr_cmd cpu:%d msr:%x read_write:%d\n",
cpu, msr, write);
} else {
@@ -697,12 +872,85 @@ static int isst_fill_platform_info(void)
return 0;
}
+static void isst_print_extended_platform_info(void)
+{
+ int cp_state, cp_cap, fact_support = 0, pbf_support = 0;
+ struct isst_pkg_ctdp_level_info ctdp_level;
+ struct isst_pkg_ctdp pkg_dev;
+ int ret, i, j;
+ FILE *filep;
+
+ for (i = 0; i < 256; ++i) {
+ char path[256];
+
+ snprintf(path, sizeof(path),
+ "/sys/devices/system/cpu/cpu%d/topology/thread_siblings", i);
+ filep = fopen(path, "r");
+ if (filep)
+ break;
+ }
+
+ if (!filep)
+ return;
+
+ fclose(filep);
+
+ ret = isst_get_ctdp_levels(i, &pkg_dev);
+ if (ret)
+ return;
+
+ if (pkg_dev.enabled) {
+ fprintf(outf, "Intel(R) SST-PP (feature perf-profile) is supported\n");
+ } else {
+ fprintf(outf, "Intel(R) SST-PP (feature perf-profile) is not supported\n");
+ fprintf(outf, "Only performance level 0 (base level) is present\n");
+ }
+
+ if (pkg_dev.locked)
+ fprintf(outf, "TDP level change control is locked\n");
+ else
+ fprintf(outf, "TDP level change control is unlocked, max level: %d \n", pkg_dev.levels);
+
+ for (j = 0; j <= pkg_dev.levels; ++j) {
+ ret = isst_get_ctdp_control(i, j, &ctdp_level);
+ if (ret)
+ continue;
+
+ if (!fact_support && ctdp_level.fact_support)
+ fact_support = 1;
+
+ if (!pbf_support && ctdp_level.pbf_support)
+ pbf_support = 1;
+ }
+
+ if (fact_support)
+ fprintf(outf, "Intel(R) SST-TF (feature turbo-freq) is supported\n");
+ else
+ fprintf(outf, "Intel(R) SST-TF (feature turbo-freq) is not supported\n");
+
+ if (pbf_support)
+ fprintf(outf, "Intel(R) SST-BF (feature base-freq) is supported\n");
+ else
+ fprintf(outf, "Intel(R) SST-BF (feature base-freq) is not supported\n");
+
+ ret = isst_read_pm_config(i, &cp_state, &cp_cap);
+ if (cp_cap)
+ fprintf(outf, "Intel(R) SST-CP (feature core-power) is supported\n");
+ else
+ fprintf(outf, "Intel(R) SST-CP (feature core-power) is not supported\n");
+}
+
static void isst_print_platform_information(void)
{
struct isst_if_platform_info platform_info;
const char *pathname = "/dev/isst_interface";
int fd;
+ if (is_clx_n_platform()) {
+ fprintf(stderr, "\nThis option in not supported on this platform\n");
+ exit(0);
+ }
+
fd = open(pathname, O_RDWR);
if (fd < 0)
err(-1, "%s open failed", pathname);
@@ -718,6 +966,7 @@ static void isst_print_platform_information(void)
platform_info.mbox_supported);
fprintf(outf, "Platform: mmio supported : %d\n",
platform_info.mmio_supported);
+ isst_print_extended_platform_info();
}
close(fd);
@@ -725,6 +974,7 @@ static void isst_print_platform_information(void)
exit(0);
}
+static char *local_str0, *local_str1;
static void exec_on_get_ctdp_cpu(int cpu, void *arg1, void *arg2, void *arg3,
void *arg4)
{
@@ -734,13 +984,14 @@ static void exec_on_get_ctdp_cpu(int cpu, void *arg1, void *arg2, void *arg3,
fn_ptr = arg1;
ret = fn_ptr(cpu, arg2);
if (ret)
- perror("get_tdp_*");
+ isst_display_error_info_message(1, "get_tdp_* failed", 0, 0);
else
isst_ctdp_display_core_info(cpu, outf, arg3,
- *(unsigned int *)arg4);
+ *(unsigned int *)arg4,
+ local_str0, local_str1);
}
-#define _get_tdp_level(desc, suffix, object, help) \
+#define _get_tdp_level(desc, suffix, object, help, str0, str1) \
static void get_tdp_##object(int arg) \
{ \
struct isst_pkg_ctdp ctdp; \
@@ -751,6 +1002,8 @@ static void exec_on_get_ctdp_cpu(int cpu, void *arg1, void *arg2, void *arg3,
help); \
exit(0); \
} \
+ local_str0 = str0; \
+ local_str1 = str1; \
isst_ctdp_display_information_start(outf); \
if (max_target_cpus) \
for_each_online_target_cpu_in_set( \
@@ -764,12 +1017,12 @@ static void exec_on_get_ctdp_cpu(int cpu, void *arg1, void *arg2, void *arg3,
isst_ctdp_display_information_end(outf); \
}
-_get_tdp_level("get-config-levels", levels, levels, "TDP levels");
-_get_tdp_level("get-config-version", levels, version, "TDP version");
-_get_tdp_level("get-config-enabled", levels, enabled, "TDP enable status");
+_get_tdp_level("get-config-levels", levels, levels, "Max TDP level", NULL, NULL);
+_get_tdp_level("get-config-version", levels, version, "TDP version", NULL, NULL);
+_get_tdp_level("get-config-enabled", levels, enabled, "perf-profile enable status", "disabled", "enabled");
_get_tdp_level("get-config-current_level", levels, current_level,
- "Current TDP Level");
-_get_tdp_level("get-lock-status", levels, locked, "TDP lock status");
+ "Current TDP Level", NULL, NULL);
+_get_tdp_level("get-lock-status", levels, locked, "TDP lock status", "unlocked", "locked");
struct isst_pkg_ctdp clx_n_pkg_dev;
@@ -902,15 +1155,21 @@ static void dump_clx_n_config_for_cpu(int cpu, void *arg1, void *arg2,
{
int ret;
+ if (tdp_level != 0xff && tdp_level != 0) {
+ isst_display_error_info_message(1, "Invalid level", 1, tdp_level);
+ exit(0);
+ }
+
ret = clx_n_config(cpu);
if (ret) {
- perror("isst_get_process_ctdp");
+ debug_printf("clx_n_config failed");
} else {
struct isst_pkg_ctdp_level_info *ctdp_level;
struct isst_pbf_info *pbf_info;
ctdp_level = &clx_n_pkg_dev.ctdp_level[0];
pbf_info = &ctdp_level->pbf_info;
+ clx_n_pkg_dev.processed = 1;
isst_ctdp_display_information(cpu, outf, tdp_level, &clx_n_pkg_dev);
free_cpu_set(ctdp_level->core_cpumask);
free_cpu_set(pbf_info->core_cpumask);
@@ -926,7 +1185,9 @@ static void dump_isst_config_for_cpu(int cpu, void *arg1, void *arg2,
memset(&pkg_dev, 0, sizeof(pkg_dev));
ret = isst_get_process_ctdp(cpu, tdp_level, &pkg_dev);
if (ret) {
- perror("isst_get_process_ctdp");
+ isst_display_error_info_message(1, "Failed to get perf-profile info on cpu", 1, cpu);
+ isst_ctdp_display_information_end(outf);
+ exit(1);
} else {
isst_ctdp_display_information(cpu, outf, tdp_level, &pkg_dev);
isst_get_process_ctdp_complete(cpu, &pkg_dev);
@@ -969,9 +1230,11 @@ static void set_tdp_level_for_cpu(int cpu, void *arg1, void *arg2, void *arg3,
int ret;
ret = isst_set_tdp_level(cpu, tdp_level);
- if (ret)
- perror("set_tdp_level_for_cpu");
- else {
+ if (ret) {
+ isst_display_error_info_message(1, "Set TDP level failed", 0, 0);
+ isst_ctdp_display_information_end(outf);
+ exit(1);
+ } else {
isst_display_result(cpu, outf, "perf-profile", "set_tdp_level",
ret);
if (force_online_offline) {
@@ -1009,11 +1272,13 @@ static void set_tdp_level(int arg)
"\t Arguments: -l|--level : Specify tdp level\n");
fprintf(stderr,
"\t Optional Arguments: -o | online : online/offline for the tdp level\n");
+ fprintf(stderr,
+ "\t online/offline operation has limitations, refer to Linux hotplug documentation\n");
exit(0);
}
if (tdp_level == 0xff) {
- fprintf(outf, "Invalid command: specify tdp_level\n");
+ isst_display_error_info_message(1, "Invalid command: specify tdp_level", 0, 0);
exit(1);
}
isst_ctdp_display_information_start(outf);
@@ -1033,7 +1298,7 @@ static void clx_n_dump_pbf_config_for_cpu(int cpu, void *arg1, void *arg2,
ret = clx_n_config(cpu);
if (ret) {
- perror("isst_get_process_ctdp");
+ isst_display_error_info_message(1, "clx_n_config failed", 0, 0);
} else {
struct isst_pkg_ctdp_level_info *ctdp_level;
struct isst_pbf_info *pbf_info;
@@ -1054,7 +1319,9 @@ static void dump_pbf_config_for_cpu(int cpu, void *arg1, void *arg2, void *arg3,
ret = isst_get_pbf_info(cpu, tdp_level, &pbf_info);
if (ret) {
- perror("isst_get_pbf_info");
+ isst_display_error_info_message(1, "Failed to get base-freq info at this level", 1, tdp_level);
+ isst_ctdp_display_information_end(outf);
+ exit(1);
} else {
isst_pbf_display_information(cpu, outf, tdp_level, &pbf_info);
isst_get_pbf_info_complete(&pbf_info);
@@ -1074,7 +1341,7 @@ static void dump_pbf_config(int arg)
}
if (tdp_level == 0xff) {
- fprintf(outf, "Invalid command: specify tdp_level\n");
+ isst_display_error_info_message(1, "Invalid command: specify tdp_level", 0, 0);
exit(1);
}
@@ -1100,7 +1367,7 @@ static int set_clos_param(int cpu, int clos, int epp, int wt, int min, int max)
ret = isst_pm_get_clos(cpu, clos, &clos_config);
if (ret) {
- perror("isst_pm_get_clos");
+ isst_display_error_info_message(1, "isst_pm_get_clos failed", 0, 0);
return ret;
}
clos_config.clos_min = min;
@@ -1109,7 +1376,7 @@ static int set_clos_param(int cpu, int clos, int epp, int wt, int min, int max)
clos_config.clos_prop_prio = wt;
ret = isst_set_clos(cpu, clos, &clos_config);
if (ret) {
- perror("isst_pm_set_clos");
+ isst_display_error_info_message(1, "isst_set_clos failed", 0, 0);
return ret;
}
@@ -1153,7 +1420,7 @@ static int set_clx_pbf_cpufreq_scaling_min_max(int cpu)
ret = clx_n_config(cpu);
if (ret) {
- perror("set_clx_pbf_cpufreq_scaling_min_max");
+ debug_printf("cpufreq_scaling_min_max failed for CLX");
return ret;
}
@@ -1316,7 +1583,7 @@ static int set_core_priority_and_min(int cpu, int mask_size,
debug_printf("Associate cpu: %d clos: %d\n", i, clos);
ret = isst_clos_associate(i, clos);
if (ret) {
- perror("isst_clos_associate");
+ isst_display_error_info_message(1, "isst_clos_associate failed", 0, 0);
return ret;
}
}
@@ -1332,14 +1599,14 @@ static int set_pbf_core_power(int cpu)
ret = isst_get_ctdp_levels(cpu, &pkg_dev);
if (ret) {
- perror("isst_get_ctdp_levels");
+ debug_printf("isst_get_ctdp_levels failed");
return ret;
}
debug_printf("Current_level: %d\n", pkg_dev.current_level);
ret = isst_get_pbf_info(cpu, pkg_dev.current_level, &pbf_info);
if (ret) {
- perror("isst_get_pbf_info");
+ debug_printf("isst_get_pbf_info failed");
return ret;
}
debug_printf("p1_high: %d p1_low: %d\n", pbf_info.p1_high,
@@ -1349,13 +1616,13 @@ static int set_pbf_core_power(int cpu)
pbf_info.core_cpumask,
pbf_info.p1_high, pbf_info.p1_low);
if (ret) {
- perror("set_core_priority_and_min");
+ debug_printf("set_core_priority_and_min failed");
return ret;
}
ret = isst_pm_qos_config(cpu, 1, 1);
if (ret) {
- perror("isst_pm_qos_config");
+ debug_printf("isst_pm_qos_config failed");
return ret;
}
@@ -1365,25 +1632,41 @@ static int set_pbf_core_power(int cpu)
static void set_pbf_for_cpu(int cpu, void *arg1, void *arg2, void *arg3,
void *arg4)
{
+ struct isst_pkg_ctdp_level_info ctdp_level;
+ struct isst_pkg_ctdp pkg_dev;
int ret;
int status = *(int *)arg4;
if (is_clx_n_platform()) {
+ ret = 0;
if (status) {
- ret = 0;
- if (auto_mode)
- set_clx_pbf_cpufreq_scaling_min_max(cpu);
+ set_clx_pbf_cpufreq_scaling_min_max(cpu);
} else {
- ret = -1;
- if (auto_mode) {
- set_scaling_max_to_cpuinfo_max(cpu);
- set_scaling_min_to_cpuinfo_min(cpu);
- }
+ set_scaling_max_to_cpuinfo_max(cpu);
+ set_scaling_min_to_cpuinfo_min(cpu);
}
goto disp_result;
}
+ ret = isst_get_ctdp_levels(cpu, &pkg_dev);
+ if (ret) {
+ isst_display_error_info_message(1, "Failed to get number of levels", 0, 0);
+ goto disp_result;
+ }
+
+ ret = isst_get_ctdp_control(cpu, pkg_dev.current_level, &ctdp_level);
+ if (ret) {
+ isst_display_error_info_message(1, "Failed to get current level", 0, 0);
+ goto disp_result;
+ }
+
+ if (!ctdp_level.pbf_support) {
+ isst_display_error_info_message(1, "base-freq feature is not present at this level", 1, pkg_dev.current_level);
+ ret = -1;
+ goto disp_result;
+ }
+
if (auto_mode && status) {
ret = set_pbf_core_power(cpu);
if (ret)
@@ -1392,7 +1675,7 @@ static void set_pbf_for_cpu(int cpu, void *arg1, void *arg2, void *arg3,
ret = isst_set_pbf_fact_status(cpu, 1, status);
if (ret) {
- perror("isst_set_pbf");
+ debug_printf("isst_set_pbf_fact_status failed");
if (auto_mode)
isst_pm_qos_config(cpu, 0, 0);
} else {
@@ -1405,7 +1688,7 @@ static void set_pbf_for_cpu(int cpu, void *arg1, void *arg2, void *arg3,
}
if (auto_mode && !status)
- isst_pm_qos_config(cpu, 0, 0);
+ isst_pm_qos_config(cpu, 0, 1);
disp_result:
if (status)
@@ -1424,10 +1707,25 @@ static void set_pbf_enable(int arg)
if (enable) {
fprintf(stderr,
"Enable Intel Speed Select Technology base frequency feature\n");
+ if (is_clx_n_platform()) {
+ fprintf(stderr,
+ "\tOn this platform this command doesn't enable feature in the hardware.\n");
+ fprintf(stderr,
+ "\tIt updates the cpufreq scaling_min_freq to match cpufreq base_frequency.\n");
+ exit(0);
+
+ }
fprintf(stderr,
"\tOptional Arguments: -a|--auto : Use priority of cores to set core-power associations\n");
} else {
+ if (is_clx_n_platform()) {
+ fprintf(stderr,
+ "\tOn this platform this command doesn't disable feature in the hardware.\n");
+ fprintf(stderr,
+ "\tIt updates the cpufreq scaling_min_freq to match cpuinfo_min_freq\n");
+ exit(0);
+ }
fprintf(stderr,
"Disable Intel Speed Select Technology base frequency feature\n");
fprintf(stderr,
@@ -1452,12 +1750,15 @@ static void dump_fact_config_for_cpu(int cpu, void *arg1, void *arg2,
struct isst_fact_info fact_info;
int ret;
- ret = isst_get_fact_info(cpu, tdp_level, &fact_info);
- if (ret)
- perror("isst_get_fact_bucket_info");
- else
+ ret = isst_get_fact_info(cpu, tdp_level, fact_bucket, &fact_info);
+ if (ret) {
+ isst_display_error_info_message(1, "Failed to get turbo-freq info at this level", 1, tdp_level);
+ isst_ctdp_display_information_end(outf);
+ exit(1);
+ } else {
isst_fact_display_information(cpu, outf, tdp_level, fact_bucket,
fact_avx, &fact_info);
+ }
}
static void dump_fact_config(int arg)
@@ -1475,7 +1776,7 @@ static void dump_fact_config(int arg)
}
if (tdp_level == 0xff) {
- fprintf(outf, "Invalid command: specify tdp_level\n");
+ isst_display_error_info_message(1, "Invalid command: specify tdp_level\n", 0, 0);
exit(1);
}
@@ -1492,10 +1793,30 @@ static void dump_fact_config(int arg)
static void set_fact_for_cpu(int cpu, void *arg1, void *arg2, void *arg3,
void *arg4)
{
+ struct isst_pkg_ctdp_level_info ctdp_level;
+ struct isst_pkg_ctdp pkg_dev;
int ret;
int status = *(int *)arg4;
- if (auto_mode && status) {
+ ret = isst_get_ctdp_levels(cpu, &pkg_dev);
+ if (ret) {
+ isst_display_error_info_message(1, "Failed to get number of levels", 0, 0);
+ goto disp_results;
+ }
+
+ ret = isst_get_ctdp_control(cpu, pkg_dev.current_level, &ctdp_level);
+ if (ret) {
+ isst_display_error_info_message(1, "Failed to get current level", 0, 0);
+ goto disp_results;
+ }
+
+ if (!ctdp_level.fact_support) {
+ isst_display_error_info_message(1, "turbo-freq feature is not present at this level", 1, pkg_dev.current_level);
+ ret = -1;
+ goto disp_results;
+ }
+
+ if (status) {
ret = isst_pm_qos_config(cpu, 1, 1);
if (ret)
goto disp_results;
@@ -1503,7 +1824,7 @@ static void set_fact_for_cpu(int cpu, void *arg1, void *arg2, void *arg3,
ret = isst_set_pbf_fact_status(cpu, 0, status);
if (ret) {
- perror("isst_set_fact");
+ debug_printf("isst_set_pbf_fact_status failed");
if (auto_mode)
isst_pm_qos_config(cpu, 0, 0);
@@ -1527,6 +1848,8 @@ static void set_fact_for_cpu(int cpu, void *arg1, void *arg2, void *arg3,
disp_results:
if (status) {
isst_display_result(cpu, outf, "turbo-freq", "enable", ret);
+ if (ret)
+ fact_enable_fail = ret;
} else {
/* Since we modified TRL during Fact enable, restore it */
isst_set_trl_from_current_tdp(cpu, fact_trl);
@@ -1568,7 +1891,7 @@ static void set_fact_enable(int arg)
NULL, &enable);
isst_ctdp_display_information_end(outf);
- if (enable && auto_mode) {
+ if (!fact_enable_fail && enable && auto_mode) {
/*
* When we adjust CLOS param, we have to set for siblings also.
* So for the each user specified CPU, also add the sibling
@@ -1652,9 +1975,12 @@ static void enable_clos_qos_config(int cpu, void *arg1, void *arg2, void *arg3,
int ret;
int status = *(int *)arg4;
+ if (is_skx_based_platform())
+ clos_priority_type = 1;
+
ret = isst_pm_qos_config(cpu, status, clos_priority_type);
if (ret)
- perror("isst_pm_qos_config");
+ isst_display_error_info_message(1, "isst_pm_qos_config failed", 0, 0);
if (status)
isst_display_result(cpu, outf, "core-power", "enable",
@@ -1672,9 +1998,11 @@ static void set_clos_enable(int arg)
if (enable) {
fprintf(stderr,
"Enable core-power for a package/die\n");
- fprintf(stderr,
- "\tClos Enable: Specify priority type with [--priority|-p]\n");
- fprintf(stderr, "\t\t 0: Proportional, 1: Ordered\n");
+ if (!is_skx_based_platform()) {
+ fprintf(stderr,
+ "\tClos Enable: Specify priority type with [--priority|-p]\n");
+ fprintf(stderr, "\t\t 0: Proportional, 1: Ordered\n");
+ }
} else {
fprintf(stderr,
"Disable core-power: [No command arguments are required]\n");
@@ -1705,7 +2033,7 @@ static void dump_clos_config_for_cpu(int cpu, void *arg1, void *arg2,
ret = isst_pm_get_clos(cpu, current_clos, &clos_config);
if (ret)
- perror("isst_pm_get_clos");
+ isst_display_error_info_message(1, "isst_pm_get_clos failed", 0, 0);
else
isst_clos_display_information(cpu, outf, current_clos,
&clos_config);
@@ -1721,7 +2049,8 @@ static void dump_clos_config(int arg)
exit(0);
}
if (current_clos < 0 || current_clos > 3) {
- fprintf(stderr, "Invalid clos id\n");
+ isst_display_error_info_message(1, "Invalid clos id\n", 0, 0);
+ isst_ctdp_display_information_end(outf);
exit(0);
}
@@ -1742,9 +2071,14 @@ static void get_clos_info_for_cpu(int cpu, void *arg1, void *arg2, void *arg3,
ret = isst_clos_get_clos_information(cpu, &enable, &prio_type);
if (ret)
- perror("isst_clos_get_info");
- else
- isst_clos_display_clos_information(cpu, outf, enable, prio_type);
+ isst_display_error_info_message(1, "isst_clos_get_info failed", 0, 0);
+ else {
+ int cp_state, cp_cap;
+
+ isst_read_pm_config(cpu, &cp_state, &cp_cap);
+ isst_clos_display_clos_information(cpu, outf, enable, prio_type,
+ cp_state, cp_cap);
+ }
}
static void dump_clos_info(int arg)
@@ -1752,19 +2086,17 @@ static void dump_clos_info(int arg)
if (cmd_help) {
fprintf(stderr,
"Print Intel Speed Select Technology core power information\n");
- fprintf(stderr, "\tSpecify targeted cpu id with [--cpu|-c]\n");
- exit(0);
- }
-
- if (!max_target_cpus) {
- fprintf(stderr,
- "Invalid target cpu. Specify with [-c|--cpu]\n");
+ fprintf(stderr, "\t Optionally specify targeted cpu id with [--cpu|-c]\n");
exit(0);
}
isst_ctdp_display_information_start(outf);
- for_each_online_target_cpu_in_set(get_clos_info_for_cpu, NULL,
- NULL, NULL, NULL);
+ if (max_target_cpus)
+ for_each_online_target_cpu_in_set(get_clos_info_for_cpu, NULL,
+ NULL, NULL, NULL);
+ else
+ for_each_online_package_in_set(get_clos_info_for_cpu, NULL,
+ NULL, NULL, NULL);
isst_ctdp_display_information_end(outf);
}
@@ -1785,7 +2117,7 @@ static void set_clos_config_for_cpu(int cpu, void *arg1, void *arg2, void *arg3,
clos_config.clos_desired = clos_desired;
ret = isst_set_clos(cpu, current_clos, &clos_config);
if (ret)
- perror("isst_set_clos");
+ isst_display_error_info_message(1, "isst_set_clos failed", 0, 0);
else
isst_display_result(cpu, outf, "core-power", "config", ret);
}
@@ -1797,26 +2129,27 @@ static void set_clos_config(int arg)
"Set core-power configuration for one of the four clos ids\n");
fprintf(stderr,
"\tSpecify targeted clos id with [--clos|-c]\n");
- fprintf(stderr, "\tSpecify clos EPP with [--epp|-e]\n");
- fprintf(stderr,
- "\tSpecify clos Proportional Priority [--weight|-w]\n");
+ if (!is_skx_based_platform()) {
+ fprintf(stderr, "\tSpecify clos EPP with [--epp|-e]\n");
+ fprintf(stderr,
+ "\tSpecify clos Proportional Priority [--weight|-w]\n");
+ }
fprintf(stderr, "\tSpecify clos min in MHz with [--min|-n]\n");
fprintf(stderr, "\tSpecify clos max in MHz with [--max|-m]\n");
- fprintf(stderr, "\tSpecify clos desired in MHz with [--desired|-d]\n");
exit(0);
}
if (current_clos < 0 || current_clos > 3) {
- fprintf(stderr, "Invalid clos id\n");
+ isst_display_error_info_message(1, "Invalid clos id\n", 0, 0);
exit(0);
}
- if (clos_epp < 0 || clos_epp > 0x0F) {
- fprintf(stderr, "clos epp is not specified, default: 0\n");
+ if (!is_skx_based_platform() && (clos_epp < 0 || clos_epp > 0x0F)) {
+ fprintf(stderr, "clos epp is not specified or invalid, default: 0\n");
clos_epp = 0;
}
- if (clos_prop_prio < 0 || clos_prop_prio > 0x0F) {
+ if (!is_skx_based_platform() && (clos_prop_prio < 0 || clos_prop_prio > 0x0F)) {
fprintf(stderr,
- "clos frequency weight is not specified, default: 0\n");
+ "clos frequency weight is not specified or invalid, default: 0\n");
clos_prop_prio = 0;
}
if (clos_min < 0) {
@@ -1824,11 +2157,11 @@ static void set_clos_config(int arg)
clos_min = 0;
}
if (clos_max < 0) {
- fprintf(stderr, "clos max is not specified, default: 25500 MHz\n");
+ fprintf(stderr, "clos max is not specified, default: Max frequency (ratio 0xff)\n");
clos_max = 0xff;
}
- if (clos_desired < 0) {
- fprintf(stderr, "clos desired is not specified, default: 0\n");
+ if (clos_desired) {
+ fprintf(stderr, "clos desired is not supported on this platform\n");
clos_desired = 0x00;
}
@@ -1849,7 +2182,7 @@ static void set_clos_assoc_for_cpu(int cpu, void *arg1, void *arg2, void *arg3,
ret = isst_clos_associate(cpu, current_clos);
if (ret)
- perror("isst_clos_associate");
+ debug_printf("isst_clos_associate failed");
else
isst_display_result(cpu, outf, "core-power", "assoc", ret);
}
@@ -1860,19 +2193,22 @@ static void set_clos_assoc(int arg)
fprintf(stderr, "Associate a clos id to a CPU\n");
fprintf(stderr,
"\tSpecify targeted clos id with [--clos|-c]\n");
+ fprintf(stderr,
+ "\tFor example to associate clos 1 to CPU 0: issue\n");
+ fprintf(stderr,
+ "\tintel-speed-select --cpu 0 core-power assoc --clos 1\n");
exit(0);
}
if (current_clos < 0 || current_clos > 3) {
- fprintf(stderr, "Invalid clos id\n");
+ isst_display_error_info_message(1, "Invalid clos id\n", 0, 0);
exit(0);
}
if (max_target_cpus)
for_each_online_target_cpu_in_set(set_clos_assoc_for_cpu, NULL,
NULL, NULL, NULL);
else {
- fprintf(stderr,
- "Invalid target cpu. Specify with [-c|--cpu]\n");
+ isst_display_error_info_message(1, "Invalid target cpu. Specify with [-c|--cpu]", 0, 0);
}
}
@@ -1883,7 +2219,7 @@ static void get_clos_assoc_for_cpu(int cpu, void *arg1, void *arg2, void *arg3,
ret = isst_clos_get_assoc_status(cpu, &clos);
if (ret)
- perror("isst_clos_get_assoc_status");
+ isst_display_error_info_message(1, "isst_clos_get_assoc_status failed", 0, 0);
else
isst_clos_display_assoc_information(cpu, outf, clos);
}
@@ -1897,8 +2233,7 @@ static void get_clos_assoc(int arg)
}
if (!max_target_cpus) {
- fprintf(stderr,
- "Invalid target cpu. Specify with [-c|--cpu]\n");
+ isst_display_error_info_message(1, "Invalid target cpu. Specify with [-c|--cpu]", 0, 0);
exit(0);
}
@@ -2035,7 +2370,7 @@ static void parse_cmd_args(int argc, int start, char **argv)
option_index = start;
optind = start + 1;
- while ((opt = getopt_long(argc, argv, "b:l:t:c:d:e:n:m:p:w:hoa",
+ while ((opt = getopt_long(argc, argv, "b:l:t:c:d:e:n:m:p:w:r:hoa",
long_options, &option_index)) != -1) {
switch (opt) {
case 'a':
@@ -2061,7 +2396,7 @@ static void parse_cmd_args(int argc, int start, char **argv)
fact_avx = 0x01;
} else if (!strncmp(optarg, "avx2", 4)) {
fact_avx = 0x02;
- } else if (!strncmp(optarg, "avx512", 4)) {
+ } else if (!strncmp(optarg, "avx512", 6)) {
fact_avx = 0x04;
} else {
fprintf(outf, "Invalid sse,avx options\n");
@@ -2078,6 +2413,10 @@ static void parse_cmd_args(int argc, int start, char **argv)
break;
case 'e':
clos_epp = atoi(optarg);
+ if (is_skx_based_platform()) {
+ isst_display_error_info_message(1, "epp can't be specified on this platform", 0, 0);
+ exit(0);
+ }
break;
case 'n':
clos_min = atoi(optarg);
@@ -2089,14 +2428,25 @@ static void parse_cmd_args(int argc, int start, char **argv)
break;
case 'p':
clos_priority_type = atoi(optarg);
+ if (is_skx_based_platform() && !clos_priority_type) {
+ isst_display_error_info_message(1, "Invalid clos priority type: proportional for this platform", 0, 0);
+ exit(0);
+ }
break;
case 'w':
clos_prop_prio = atoi(optarg);
+ if (is_skx_based_platform()) {
+ isst_display_error_info_message(1, "weight can't be specified on this platform", 0, 0);
+ exit(0);
+ }
break;
default:
- printf("no match\n");
+ printf("Unknown option: ignore\n");
}
}
+
+ if (argv[optind])
+ printf("Garbage at the end of command: ignore\n");
}
static void isst_help(void)
@@ -2214,11 +2564,18 @@ void process_command(int argc, char **argv,
static void usage(void)
{
- printf("Intel(R) Speed Select Technology\n");
+ if (is_clx_n_platform()) {
+ fprintf(stderr, "\nThere is limited support of Intel Speed Select features on this platform.\n");
+ fprintf(stderr, "Everything is pre-configured using BIOS options, this tool can't enable any feature in the hardware.\n\n");
+ }
+
printf("\nUsage:\n");
printf("intel-speed-select [OPTIONS] FEATURE COMMAND COMMAND_ARGUMENTS\n");
- printf("\nUse this tool to enumerate and control the Intel Speed Select Technology features,\n");
- printf("\nFEATURE : [perf-profile|base-freq|turbo-freq|core-power]\n");
+ printf("\nUse this tool to enumerate and control the Intel Speed Select Technology features:\n");
+ if (is_clx_n_platform())
+ printf("\nFEATURE : [perf-profile|base-freq]\n");
+ else
+ printf("\nFEATURE : [perf-profile|base-freq|turbo-freq|core-power]\n");
printf("\nFor help on each feature, use -h|--help\n");
printf("\tFor example: intel-speed-select perf-profile -h\n");
@@ -2231,17 +2588,29 @@ static void usage(void)
printf("\t\tDefault: Die scoped for all dies in the system with multiple dies/package\n");
printf("\t\t\t Or Package scoped for all Packages when each package contains one die\n");
printf("\t[-d|--debug] : Debug mode\n");
+ printf("\t[-f|--format] : output format [json|text]. Default: text\n");
printf("\t[-h|--help] : Print help\n");
printf("\t[-i|--info] : Print platform information\n");
printf("\t[-o|--out] : Output file\n");
printf("\t\t\tDefault : stderr\n");
- printf("\t[-f|--format] : output format [json|text]. Default: text\n");
printf("\t[-v|--version] : Print version\n");
printf("\nResult format\n");
printf("\tResult display uses a common format for each command:\n");
printf("\tResults are formatted in text/JSON with\n");
printf("\t\tPackage, Die, CPU, and command specific results.\n");
+
+ printf("\nExamples\n");
+ printf("\tTo get platform information:\n");
+ printf("\t\tintel-speed-select --info\n");
+ printf("\tTo get full perf-profile information dump:\n");
+ printf("\t\tintel-speed-select perf-profile info\n");
+ printf("\tTo get full base-freq information dump:\n");
+ printf("\t\tintel-speed-select base-freq info -l 0\n");
+ if (!is_clx_n_platform()) {
+ printf("\tTo get full turbo-freq information dump:\n");
+ printf("\t\tintel-speed-select turbo-freq info -l 0\n");
+ }
exit(1);
}
@@ -2254,6 +2623,8 @@ static void print_version(void)
static void cmdline(int argc, char **argv)
{
+ const char *pathname = "/dev/isst_interface";
+ FILE *fp;
int opt;
int option_index = 0;
int ret;
@@ -2269,6 +2640,28 @@ static void cmdline(int argc, char **argv)
{ 0, 0, 0, 0 }
};
+ if (geteuid() != 0) {
+ fprintf(stderr, "Must run as root\n");
+ exit(0);
+ }
+
+ ret = update_cpu_model();
+ if (ret)
+ err(-1, "Invalid CPU model (%d)\n", cpu_model);
+ printf("Intel(R) Speed Select Technology\n");
+ printf("Executing on CPU model:%d[0x%x]\n", cpu_model, cpu_model);
+
+ if (!is_clx_n_platform()) {
+ fp = fopen(pathname, "rb");
+ if (!fp) {
+ fprintf(stderr, "Intel speed select drivers are not loaded on this system.\n");
+ fprintf(stderr, "Verify that kernel config includes CONFIG_INTEL_SPEED_SELECT_INTERFACE.\n");
+ fprintf(stderr, "If the config is included then this is not a supported platform.\n");
+ exit(0);
+ }
+ fclose(fp);
+ }
+
progname = argv[0];
while ((opt = getopt_long_only(argc, argv, "+c:df:hio:v", long_options,
&option_index)) != -1) {
@@ -2303,21 +2696,12 @@ static void cmdline(int argc, char **argv)
}
}
- if (geteuid() != 0) {
- fprintf(stderr, "Must run as root\n");
- exit(0);
- }
-
if (optind > (argc - 2)) {
- fprintf(stderr, "Feature name and|or command not specified\n");
+ usage();
exit(0);
}
- ret = update_cpu_model();
- if (ret)
- err(-1, "Invalid CPU model (%d)\n", cpu_model);
- printf("Intel(R) Speed Select Technology\n");
- printf("Executing on CPU model:%d[0x%x]\n", cpu_model, cpu_model);
set_max_cpu_num();
+ store_cpu_topology();
set_cpu_present_cpu_mask();
set_cpu_target_cpu_mask();
diff --git a/tools/power/x86/intel-speed-select/isst-core.c b/tools/power/x86/intel-speed-select/isst-core.c
index 81a119f688a3..a7f4337c5777 100644
--- a/tools/power/x86/intel-speed-select/isst-core.c
+++ b/tools/power/x86/intel-speed-select/isst-core.c
@@ -114,8 +114,10 @@ int isst_get_tdp_info(int cpu, int config_index,
ret = isst_send_mbox_command(cpu, CONFIG_TDP, CONFIG_TDP_GET_TDP_INFO,
0, config_index, &resp);
- if (ret)
+ if (ret) {
+ isst_display_error_info_message(1, "Invalid level, Can't get TDP information at level", 1, config_index);
return ret;
+ }
ctdp_level->pkg_tdp = resp & GENMASK(14, 0);
ctdp_level->tdp_ratio = (resp & GENMASK(23, 16)) >> 16;
@@ -352,7 +354,7 @@ int isst_set_tdp_level_msr(int cpu, int tdp_level)
debug_printf("cpu: tdp_level via MSR %d\n", cpu, tdp_level);
if (isst_get_config_tdp_lock_status(cpu)) {
- debug_printf("cpu: tdp_locked %d\n", cpu);
+ isst_display_error_info_message(1, "tdp_locked", 0, 0);
return -1;
}
@@ -373,19 +375,50 @@ int isst_set_tdp_level(int cpu, int tdp_level)
unsigned int resp;
int ret;
+
+ if (isst_get_config_tdp_lock_status(cpu)) {
+ isst_display_error_info_message(1, "TDP is locked", 0, 0);
+ return -1;
+
+ }
+
ret = isst_send_mbox_command(cpu, CONFIG_TDP, CONFIG_TDP_SET_LEVEL, 0,
tdp_level, &resp);
- if (ret)
- return isst_set_tdp_level_msr(cpu, tdp_level);
+ if (ret) {
+ isst_display_error_info_message(1, "Set TDP level failed for level", 1, tdp_level);
+ return ret;
+ }
return 0;
}
int isst_get_pbf_info(int cpu, int level, struct isst_pbf_info *pbf_info)
{
+ struct isst_pkg_ctdp_level_info ctdp_level;
+ struct isst_pkg_ctdp pkg_dev;
int i, ret, core_cnt, max;
unsigned int req, resp;
+ ret = isst_get_ctdp_levels(cpu, &pkg_dev);
+ if (ret) {
+ isst_display_error_info_message(1, "Failed to get number of levels", 0, 0);
+ return ret;
+ }
+
+ if (level > pkg_dev.levels) {
+ isst_display_error_info_message(1, "Invalid level", 1, level);
+ return -1;
+ }
+
+ ret = isst_get_ctdp_control(cpu, level, &ctdp_level);
+ if (ret)
+ return ret;
+
+ if (!ctdp_level.pbf_support) {
+ isst_display_error_info_message(1, "base-freq feature is not present at this level", 1, level);
+ return -1;
+ }
+
pbf_info->core_cpumask_size = alloc_cpu_set(&pbf_info->core_cpumask);
core_cnt = get_core_count(get_physical_package_id(cpu), get_physical_die_id(cpu));
@@ -481,6 +514,10 @@ int isst_set_pbf_fact_status(int cpu, int pbf, int enable)
else
req &= ~BIT(17);
} else {
+
+ if (enable && !ctdp_level.sst_cp_enabled)
+ isst_display_error_info_message(0, "Make sure to execute before: core-power enable", 0, 0);
+
if (ctdp_level.pbf_enabled)
req = BIT(17);
@@ -566,10 +603,32 @@ int isst_get_fact_bucket_info(int cpu, int level,
return 0;
}
-int isst_get_fact_info(int cpu, int level, struct isst_fact_info *fact_info)
+int isst_get_fact_info(int cpu, int level, int fact_bucket, struct isst_fact_info *fact_info)
{
+ struct isst_pkg_ctdp_level_info ctdp_level;
+ struct isst_pkg_ctdp pkg_dev;
unsigned int resp;
- int ret;
+ int j, ret, print;
+
+ ret = isst_get_ctdp_levels(cpu, &pkg_dev);
+ if (ret) {
+ isst_display_error_info_message(1, "Failed to get number of levels", 0, 0);
+ return ret;
+ }
+
+ if (level > pkg_dev.levels) {
+ isst_display_error_info_message(1, "Invalid level", 1, level);
+ return -1;
+ }
+
+ ret = isst_get_ctdp_control(cpu, level, &ctdp_level);
+ if (ret)
+ return ret;
+
+ if (!ctdp_level.fact_support) {
+ isst_display_error_info_message(1, "turbo-freq feature is not present at this level", 1, level);
+ return -1;
+ }
ret = isst_send_mbox_command(cpu, CONFIG_TDP,
CONFIG_TDP_GET_FACT_LP_CLIPPING_RATIO, 0,
@@ -585,8 +644,25 @@ int isst_get_fact_info(int cpu, int level, struct isst_fact_info *fact_info)
fact_info->lp_clipping_ratio_license_avx512 = (resp >> 16) & 0xff;
ret = isst_get_fact_bucket_info(cpu, level, fact_info->bucket_info);
+ if (ret)
+ return ret;
- return ret;
+ print = 0;
+ for (j = 0; j < ISST_FACT_MAX_BUCKETS; ++j) {
+ if (fact_bucket != 0xff && fact_bucket != j)
+ continue;
+
+ if (!fact_info->bucket_info[j].high_priority_cores_count)
+ break;
+
+ print = 1;
+ }
+ if (!print) {
+ isst_display_error_info_message(1, "Invalid bucket", 0, 0);
+ return -1;
+ }
+
+ return 0;
}
int isst_set_trl(int cpu, unsigned long long trl)
@@ -671,7 +747,7 @@ void isst_get_process_ctdp_complete(int cpu, struct isst_pkg_ctdp *pkg_dev)
int isst_get_process_ctdp(int cpu, int tdp_level, struct isst_pkg_ctdp *pkg_dev)
{
- int i, ret;
+ int i, ret, valid = 0;
if (pkg_dev->processed)
return 0;
@@ -684,6 +760,14 @@ int isst_get_process_ctdp(int cpu, int tdp_level, struct isst_pkg_ctdp *pkg_dev)
cpu, pkg_dev->enabled, pkg_dev->current_level,
pkg_dev->levels);
+ if (tdp_level != 0xff && tdp_level > pkg_dev->levels) {
+ isst_display_error_info_message(1, "Invalid level", 0, 0);
+ return -1;
+ }
+
+ if (!pkg_dev->enabled)
+ isst_display_error_info_message(0, "perf-profile feature is not supported, just base-config level 0 is valid", 0, 0);
+
for (i = 0; i <= pkg_dev->levels; ++i) {
struct isst_pkg_ctdp_level_info *ctdp_level;
@@ -703,6 +787,7 @@ int isst_get_process_ctdp(int cpu, int tdp_level, struct isst_pkg_ctdp *pkg_dev)
if (ret)
continue;
+ valid = 1;
pkg_dev->processed = 1;
ctdp_level->processed = 1;
@@ -713,7 +798,7 @@ int isst_get_process_ctdp(int cpu, int tdp_level, struct isst_pkg_ctdp *pkg_dev)
}
if (ctdp_level->fact_support) {
- ret = isst_get_fact_info(cpu, i,
+ ret = isst_get_fact_info(cpu, i, 0xff,
&ctdp_level->fact_info);
if (ret)
return ret;
@@ -775,6 +860,9 @@ int isst_get_process_ctdp(int cpu, int tdp_level, struct isst_pkg_ctdp *pkg_dev)
isst_get_uncore_mem_freq(cpu, i, ctdp_level);
}
+ if (!valid)
+ isst_display_error_info_message(0, "Invalid level, Can't get TDP control information at specified levels on cpu", 1, cpu);
+
return 0;
}
@@ -824,22 +912,24 @@ int isst_pm_qos_config(int cpu, int enable_clos, int priority_type)
return ret;
if (ctdp_level.fact_enabled) {
- debug_printf("Turbo-freq feature must be disabled first\n");
+ isst_display_error_info_message(1, "Ignoring request, turbo-freq feature is still enabled", 0, 0);
return -EINVAL;
}
ret = isst_write_pm_config(cpu, 0);
if (ret)
- perror("isst_write_pm_config\n");
+ isst_display_error_info_message(0, "WRITE_PM_CONFIG command failed, ignoring error", 0, 0);
} else {
ret = isst_write_pm_config(cpu, 1);
if (ret)
- perror("isst_write_pm_config\n");
+ isst_display_error_info_message(0, "WRITE_PM_CONFIG command failed, ignoring error", 0, 0);
}
ret = isst_send_mbox_command(cpu, CONFIG_CLOS, CLOS_PM_QOS_CONFIG, 0, 0,
&resp);
- if (ret)
+ if (ret) {
+ isst_display_error_info_message(1, "CLOS_PM_QOS_CONFIG command failed", 0, 0);
return ret;
+ }
debug_printf("cpu:%d CLOS_PM_QOS_CONFIG resp:%x\n", cpu, resp);
@@ -850,6 +940,9 @@ int isst_pm_qos_config(int cpu, int enable_clos, int priority_type)
else
req = req & ~BIT(1);
+ if (priority_type > 1)
+ isst_display_error_info_message(1, "Invalid priority type: Changing type to ordered", 0, 0);
+
if (priority_type)
req = req | BIT(2);
else
diff --git a/tools/power/x86/intel-speed-select/isst-display.c b/tools/power/x86/intel-speed-select/isst-display.c
index 4fb0c1d49d64..e105fece47b6 100644
--- a/tools/power/x86/intel-speed-select/isst-display.c
+++ b/tools/power/x86/intel-speed-select/isst-display.c
@@ -158,10 +158,17 @@ static void format_and_print(FILE *outf, int level, char *header, char *value)
last_level = level;
}
-static void print_package_info(int cpu, FILE *outf)
+static int print_package_info(int cpu, FILE *outf)
{
char header[256];
+ if (out_format_is_json()) {
+ snprintf(header, sizeof(header), "package-%d:die-%d:cpu-%d",
+ get_physical_package_id(cpu), get_physical_die_id(cpu),
+ cpu);
+ format_and_print(outf, 1, header, NULL);
+ return 1;
+ }
snprintf(header, sizeof(header), "package-%d",
get_physical_package_id(cpu));
format_and_print(outf, 1, header, NULL);
@@ -169,6 +176,8 @@ static void print_package_info(int cpu, FILE *outf)
format_and_print(outf, 2, header, NULL);
snprintf(header, sizeof(header), "cpu-%d", cpu);
format_and_print(outf, 3, header, NULL);
+
+ return 3;
}
static void _isst_pbf_display_information(int cpu, FILE *outf, int level,
@@ -178,7 +187,7 @@ static void _isst_pbf_display_information(int cpu, FILE *outf, int level,
char header[256];
char value[256];
- snprintf(header, sizeof(header), "speed-select-base-freq");
+ snprintf(header, sizeof(header), "speed-select-base-freq-properties");
format_and_print(outf, disp_level, header, NULL);
snprintf(header, sizeof(header), "high-priority-base-frequency(MHz)");
@@ -222,9 +231,23 @@ static void _isst_fact_display_information(int cpu, FILE *outf, int level,
struct isst_fact_bucket_info *bucket_info = fact_info->bucket_info;
char header[256];
char value[256];
- int j;
+ int print = 0, j;
+
+ for (j = 0; j < ISST_FACT_MAX_BUCKETS; ++j) {
+ if (fact_bucket != 0xff && fact_bucket != j)
+ continue;
+
+ if (!bucket_info[j].high_priority_cores_count)
+ break;
+
+ print = 1;
+ }
+ if (!print) {
+ fprintf(stderr, "Invalid bucket\n");
+ return;
+ }
- snprintf(header, sizeof(header), "speed-select-turbo-freq");
+ snprintf(header, sizeof(header), "speed-select-turbo-freq-properties");
format_and_print(outf, base_level, header, NULL);
for (j = 0; j < ISST_FACT_MAX_BUCKETS; ++j) {
if (fact_bucket != 0xff && fact_bucket != j)
@@ -289,21 +312,35 @@ static void _isst_fact_display_information(int cpu, FILE *outf, int level,
}
void isst_ctdp_display_core_info(int cpu, FILE *outf, char *prefix,
- unsigned int val)
+ unsigned int val, char *str0, char *str1)
{
char header[256];
char value[256];
+ int level = 1;
- snprintf(header, sizeof(header), "package-%d",
- get_physical_package_id(cpu));
- format_and_print(outf, 1, header, NULL);
- snprintf(header, sizeof(header), "die-%d", get_physical_die_id(cpu));
- format_and_print(outf, 2, header, NULL);
- snprintf(header, sizeof(header), "cpu-%d", cpu);
- format_and_print(outf, 3, header, NULL);
+ if (out_format_is_json()) {
+ snprintf(header, sizeof(header), "package-%d:die-%d:cpu-%d",
+ get_physical_package_id(cpu), get_physical_die_id(cpu),
+ cpu);
+ format_and_print(outf, level++, header, NULL);
+ } else {
+ snprintf(header, sizeof(header), "package-%d",
+ get_physical_package_id(cpu));
+ format_and_print(outf, level++, header, NULL);
+ snprintf(header, sizeof(header), "die-%d",
+ get_physical_die_id(cpu));
+ format_and_print(outf, level++, header, NULL);
+ snprintf(header, sizeof(header), "cpu-%d", cpu);
+ format_and_print(outf, level++, header, NULL);
+ }
- snprintf(value, sizeof(value), "%u", val);
- format_and_print(outf, 4, prefix, value);
+ if (str0 && !val)
+ snprintf(value, sizeof(value), "%s", str0);
+ else if (str1 && val)
+ snprintf(value, sizeof(value), "%s", str1);
+ else
+ snprintf(value, sizeof(value), "%u", val);
+ format_and_print(outf, level, prefix, value);
format_and_print(outf, 1, NULL, NULL);
}
@@ -313,10 +350,11 @@ void isst_ctdp_display_information(int cpu, FILE *outf, int tdp_level,
{
char header[256];
char value[256];
- int i, base_level = 1;
+ static int level;
+ int i;
if (pkg_dev->processed)
- print_package_info(cpu, outf);
+ level = print_package_info(cpu, outf);
for (i = 0; i <= pkg_dev->levels; ++i) {
struct isst_pkg_ctdp_level_info *ctdp_level;
@@ -328,72 +366,80 @@ void isst_ctdp_display_information(int cpu, FILE *outf, int tdp_level,
snprintf(header, sizeof(header), "perf-profile-level-%d",
ctdp_level->level);
- format_and_print(outf, base_level + 3, header, NULL);
+ format_and_print(outf, level + 1, header, NULL);
snprintf(header, sizeof(header), "cpu-count");
j = get_cpu_count(get_physical_die_id(cpu),
get_physical_die_id(cpu));
snprintf(value, sizeof(value), "%d", j);
- format_and_print(outf, base_level + 4, header, value);
+ format_and_print(outf, level + 2, header, value);
+
+ j = CPU_COUNT_S(ctdp_level->core_cpumask_size,
+ ctdp_level->core_cpumask);
+ if (j) {
+ snprintf(header, sizeof(header), "enable-cpu-count");
+ snprintf(value, sizeof(value), "%d", j);
+ format_and_print(outf, level + 2, header, value);
+ }
if (ctdp_level->core_cpumask_size) {
snprintf(header, sizeof(header), "enable-cpu-mask");
printcpumask(sizeof(value), value,
ctdp_level->core_cpumask_size,
ctdp_level->core_cpumask);
- format_and_print(outf, base_level + 4, header, value);
+ format_and_print(outf, level + 2, header, value);
snprintf(header, sizeof(header), "enable-cpu-list");
printcpulist(sizeof(value), value,
ctdp_level->core_cpumask_size,
ctdp_level->core_cpumask);
- format_and_print(outf, base_level + 4, header, value);
+ format_and_print(outf, level + 2, header, value);
}
snprintf(header, sizeof(header), "thermal-design-power-ratio");
snprintf(value, sizeof(value), "%d", ctdp_level->tdp_ratio);
- format_and_print(outf, base_level + 4, header, value);
+ format_and_print(outf, level + 2, header, value);
snprintf(header, sizeof(header), "base-frequency(MHz)");
if (!ctdp_level->sse_p1)
ctdp_level->sse_p1 = ctdp_level->tdp_ratio;
snprintf(value, sizeof(value), "%d",
ctdp_level->sse_p1 * DISP_FREQ_MULTIPLIER);
- format_and_print(outf, base_level + 4, header, value);
+ format_and_print(outf, level + 2, header, value);
if (ctdp_level->avx2_p1) {
snprintf(header, sizeof(header), "base-frequency-avx2(MHz)");
snprintf(value, sizeof(value), "%d",
ctdp_level->avx2_p1 * DISP_FREQ_MULTIPLIER);
- format_and_print(outf, base_level + 4, header, value);
+ format_and_print(outf, level + 2, header, value);
}
if (ctdp_level->avx512_p1) {
snprintf(header, sizeof(header), "base-frequency-avx512(MHz)");
snprintf(value, sizeof(value), "%d",
ctdp_level->avx512_p1 * DISP_FREQ_MULTIPLIER);
- format_and_print(outf, base_level + 4, header, value);
+ format_and_print(outf, level + 2, header, value);
}
if (ctdp_level->uncore_p1) {
snprintf(header, sizeof(header), "uncore-frequency-min(MHz)");
snprintf(value, sizeof(value), "%d",
ctdp_level->uncore_p1 * DISP_FREQ_MULTIPLIER);
- format_and_print(outf, base_level + 4, header, value);
+ format_and_print(outf, level + 2, header, value);
}
if (ctdp_level->uncore_p0) {
snprintf(header, sizeof(header), "uncore-frequency-max(MHz)");
snprintf(value, sizeof(value), "%d",
ctdp_level->uncore_p0 * DISP_FREQ_MULTIPLIER);
- format_and_print(outf, base_level + 4, header, value);
+ format_and_print(outf, level + 2, header, value);
}
if (ctdp_level->mem_freq) {
snprintf(header, sizeof(header), "mem-frequency(MHz)");
snprintf(value, sizeof(value), "%d",
ctdp_level->mem_freq * DISP_FREQ_MULTIPLIER);
- format_and_print(outf, base_level + 4, header, value);
+ format_and_print(outf, level + 2, header, value);
}
snprintf(header, sizeof(header),
@@ -405,7 +451,7 @@ void isst_ctdp_display_information(int cpu, FILE *outf, int tdp_level,
snprintf(value, sizeof(value), "disabled");
} else
snprintf(value, sizeof(value), "unsupported");
- format_and_print(outf, base_level + 4, header, value);
+ format_and_print(outf, level + 2, header, value);
snprintf(header, sizeof(header),
"speed-select-base-freq");
@@ -416,7 +462,7 @@ void isst_ctdp_display_information(int cpu, FILE *outf, int tdp_level,
snprintf(value, sizeof(value), "disabled");
} else
snprintf(value, sizeof(value), "unsupported");
- format_and_print(outf, base_level + 4, header, value);
+ format_and_print(outf, level + 2, header, value);
snprintf(header, sizeof(header),
"speed-select-core-power");
@@ -427,110 +473,115 @@ void isst_ctdp_display_information(int cpu, FILE *outf, int tdp_level,
snprintf(value, sizeof(value), "disabled");
} else
snprintf(value, sizeof(value), "unsupported");
- format_and_print(outf, base_level + 4, header, value);
+ format_and_print(outf, level + 2, header, value);
if (is_clx_n_platform()) {
if (ctdp_level->pbf_support)
_isst_pbf_display_information(cpu, outf,
tdp_level,
&ctdp_level->pbf_info,
- base_level + 4);
+ level + 2);
continue;
}
if (ctdp_level->pkg_tdp) {
snprintf(header, sizeof(header), "thermal-design-power(W)");
snprintf(value, sizeof(value), "%d", ctdp_level->pkg_tdp);
- format_and_print(outf, base_level + 4, header, value);
+ format_and_print(outf, level + 2, header, value);
}
if (ctdp_level->t_proc_hot) {
snprintf(header, sizeof(header), "tjunction-max(C)");
snprintf(value, sizeof(value), "%d", ctdp_level->t_proc_hot);
- format_and_print(outf, base_level + 4, header, value);
+ format_and_print(outf, level + 2, header, value);
}
snprintf(header, sizeof(header), "turbo-ratio-limits-sse");
- format_and_print(outf, base_level + 4, header, NULL);
+ format_and_print(outf, level + 2, header, NULL);
for (j = 0; j < 8; ++j) {
snprintf(header, sizeof(header), "bucket-%d", j);
- format_and_print(outf, base_level + 5, header, NULL);
+ format_and_print(outf, level + 3, header, NULL);
snprintf(header, sizeof(header), "core-count");
snprintf(value, sizeof(value), "%llu", (ctdp_level->buckets_info >> (j * 8)) & 0xff);
- format_and_print(outf, base_level + 6, header, value);
+ format_and_print(outf, level + 4, header, value);
snprintf(header, sizeof(header),
"max-turbo-frequency(MHz)");
snprintf(value, sizeof(value), "%d",
ctdp_level->trl_sse_active_cores[j] *
DISP_FREQ_MULTIPLIER);
- format_and_print(outf, base_level + 6, header, value);
+ format_and_print(outf, level + 4, header, value);
}
if (ctdp_level->trl_avx_active_cores[0]) {
snprintf(header, sizeof(header), "turbo-ratio-limits-avx2");
- format_and_print(outf, base_level + 4, header, NULL);
+ format_and_print(outf, level + 2, header, NULL);
for (j = 0; j < 8; ++j) {
snprintf(header, sizeof(header), "bucket-%d", j);
- format_and_print(outf, base_level + 5, header, NULL);
+ format_and_print(outf, level + 3, header, NULL);
snprintf(header, sizeof(header), "core-count");
snprintf(value, sizeof(value), "%llu", (ctdp_level->buckets_info >> (j * 8)) & 0xff);
- format_and_print(outf, base_level + 6, header, value);
+ format_and_print(outf, level + 4, header, value);
snprintf(header, sizeof(header), "max-turbo-frequency(MHz)");
snprintf(value, sizeof(value), "%d", ctdp_level->trl_avx_active_cores[j] * DISP_FREQ_MULTIPLIER);
- format_and_print(outf, base_level + 6, header, value);
+ format_and_print(outf, level + 4, header, value);
}
}
if (ctdp_level->trl_avx_512_active_cores[0]) {
snprintf(header, sizeof(header), "turbo-ratio-limits-avx512");
- format_and_print(outf, base_level + 4, header, NULL);
+ format_and_print(outf, level + 2, header, NULL);
for (j = 0; j < 8; ++j) {
snprintf(header, sizeof(header), "bucket-%d", j);
- format_and_print(outf, base_level + 5, header, NULL);
+ format_and_print(outf, level + 3, header, NULL);
snprintf(header, sizeof(header), "core-count");
snprintf(value, sizeof(value), "%llu", (ctdp_level->buckets_info >> (j * 8)) & 0xff);
- format_and_print(outf, base_level + 6, header, value);
+ format_and_print(outf, level + 4, header, value);
snprintf(header, sizeof(header), "max-turbo-frequency(MHz)");
snprintf(value, sizeof(value), "%d", ctdp_level->trl_avx_512_active_cores[j] * DISP_FREQ_MULTIPLIER);
- format_and_print(outf, base_level + 6, header, value);
+ format_and_print(outf, level + 4, header, value);
}
}
if (ctdp_level->pbf_support)
_isst_pbf_display_information(cpu, outf, i,
&ctdp_level->pbf_info,
- base_level + 4);
+ level + 2);
if (ctdp_level->fact_support)
_isst_fact_display_information(cpu, outf, i, 0xff, 0xff,
&ctdp_level->fact_info,
- base_level + 4);
+ level + 2);
}
format_and_print(outf, 1, NULL, NULL);
}
+static int start;
void isst_ctdp_display_information_start(FILE *outf)
{
last_level = 0;
format_and_print(outf, 0, "start", NULL);
+ start = 1;
}
void isst_ctdp_display_information_end(FILE *outf)
{
format_and_print(outf, 0, NULL, NULL);
+ start = 0;
}
void isst_pbf_display_information(int cpu, FILE *outf, int level,
struct isst_pbf_info *pbf_info)
{
- print_package_info(cpu, outf);
- _isst_pbf_display_information(cpu, outf, level, pbf_info, 4);
+ int _level;
+
+ _level = print_package_info(cpu, outf);
+ _isst_pbf_display_information(cpu, outf, level, pbf_info, _level + 1);
format_and_print(outf, 1, NULL, NULL);
}
@@ -538,9 +589,11 @@ void isst_fact_display_information(int cpu, FILE *outf, int level,
int fact_bucket, int fact_avx,
struct isst_fact_info *fact_info)
{
- print_package_info(cpu, outf);
+ int _level;
+
+ _level = print_package_info(cpu, outf);
_isst_fact_display_information(cpu, outf, level, fact_bucket, fact_avx,
- fact_info, 4);
+ fact_info, _level + 1);
format_and_print(outf, 1, NULL, NULL);
}
@@ -549,94 +602,103 @@ void isst_clos_display_information(int cpu, FILE *outf, int clos,
{
char header[256];
char value[256];
+ int level;
- snprintf(header, sizeof(header), "package-%d",
- get_physical_package_id(cpu));
- format_and_print(outf, 1, header, NULL);
- snprintf(header, sizeof(header), "die-%d", get_physical_die_id(cpu));
- format_and_print(outf, 2, header, NULL);
- snprintf(header, sizeof(header), "cpu-%d", cpu);
- format_and_print(outf, 3, header, NULL);
+ level = print_package_info(cpu, outf);
snprintf(header, sizeof(header), "core-power");
- format_and_print(outf, 4, header, NULL);
+ format_and_print(outf, level + 1, header, NULL);
snprintf(header, sizeof(header), "clos");
snprintf(value, sizeof(value), "%d", clos);
- format_and_print(outf, 5, header, value);
+ format_and_print(outf, level + 2, header, value);
snprintf(header, sizeof(header), "epp");
snprintf(value, sizeof(value), "%d", clos_config->epp);
- format_and_print(outf, 5, header, value);
+ format_and_print(outf, level + 2, header, value);
snprintf(header, sizeof(header), "clos-proportional-priority");
snprintf(value, sizeof(value), "%d", clos_config->clos_prop_prio);
- format_and_print(outf, 5, header, value);
+ format_and_print(outf, level + 2, header, value);
snprintf(header, sizeof(header), "clos-min");
snprintf(value, sizeof(value), "%d MHz", clos_config->clos_min * DISP_FREQ_MULTIPLIER);
- format_and_print(outf, 5, header, value);
+ format_and_print(outf, level + 2, header, value);
snprintf(header, sizeof(header), "clos-max");
- snprintf(value, sizeof(value), "%d MHz", clos_config->clos_max * DISP_FREQ_MULTIPLIER);
- format_and_print(outf, 5, header, value);
+ if (clos_config->clos_max == 0xff)
+ snprintf(value, sizeof(value), "Max Turbo frequency");
+ else
+ snprintf(value, sizeof(value), "%d MHz", clos_config->clos_max * DISP_FREQ_MULTIPLIER);
+ format_and_print(outf, level + 2, header, value);
snprintf(header, sizeof(header), "clos-desired");
snprintf(value, sizeof(value), "%d MHz", clos_config->clos_desired * DISP_FREQ_MULTIPLIER);
- format_and_print(outf, 5, header, value);
+ format_and_print(outf, level + 2, header, value);
- format_and_print(outf, 1, NULL, NULL);
+ format_and_print(outf, level, NULL, NULL);
}
void isst_clos_display_clos_information(int cpu, FILE *outf,
- int clos_enable, int type)
+ int clos_enable, int type,
+ int state, int cap)
{
char header[256];
char value[256];
+ int level;
- snprintf(header, sizeof(header), "package-%d",
- get_physical_package_id(cpu));
- format_and_print(outf, 1, header, NULL);
- snprintf(header, sizeof(header), "die-%d", get_physical_die_id(cpu));
- format_and_print(outf, 2, header, NULL);
- snprintf(header, sizeof(header), "cpu-%d", cpu);
- format_and_print(outf, 3, header, NULL);
+ level = print_package_info(cpu, outf);
snprintf(header, sizeof(header), "core-power");
- format_and_print(outf, 4, header, NULL);
+ format_and_print(outf, level + 1, header, NULL);
+
+ snprintf(header, sizeof(header), "support-status");
+ if (cap)
+ snprintf(value, sizeof(value), "supported");
+ else
+ snprintf(value, sizeof(value), "unsupported");
+ format_and_print(outf, level + 2, header, value);
snprintf(header, sizeof(header), "enable-status");
- snprintf(value, sizeof(value), "%d", clos_enable);
- format_and_print(outf, 5, header, value);
+ if (state)
+ snprintf(value, sizeof(value), "enabled");
+ else
+ snprintf(value, sizeof(value), "disabled");
+ format_and_print(outf, level + 2, header, value);
+
+ snprintf(header, sizeof(header), "clos-enable-status");
+ if (clos_enable)
+ snprintf(value, sizeof(value), "enabled");
+ else
+ snprintf(value, sizeof(value), "disabled");
+ format_and_print(outf, level + 2, header, value);
snprintf(header, sizeof(header), "priority-type");
- snprintf(value, sizeof(value), "%d", type);
- format_and_print(outf, 5, header, value);
+ if (type)
+ snprintf(value, sizeof(value), "ordered");
+ else
+ snprintf(value, sizeof(value), "proportional");
+ format_and_print(outf, level + 2, header, value);
- format_and_print(outf, 1, NULL, NULL);
+ format_and_print(outf, level, NULL, NULL);
}
void isst_clos_display_assoc_information(int cpu, FILE *outf, int clos)
{
char header[256];
char value[256];
+ int level;
- snprintf(header, sizeof(header), "package-%d",
- get_physical_package_id(cpu));
- format_and_print(outf, 1, header, NULL);
- snprintf(header, sizeof(header), "die-%d", get_physical_die_id(cpu));
- format_and_print(outf, 2, header, NULL);
- snprintf(header, sizeof(header), "cpu-%d", cpu);
- format_and_print(outf, 3, header, NULL);
+ level = print_package_info(cpu, outf);
snprintf(header, sizeof(header), "get-assoc");
- format_and_print(outf, 4, header, NULL);
+ format_and_print(outf, level + 1, header, NULL);
snprintf(header, sizeof(header), "clos");
snprintf(value, sizeof(value), "%d", clos);
- format_and_print(outf, 5, header, value);
+ format_and_print(outf, level + 2, header, value);
- format_and_print(outf, 1, NULL, NULL);
+ format_and_print(outf, level, NULL, NULL);
}
void isst_display_result(int cpu, FILE *outf, char *feature, char *cmd,
@@ -644,24 +706,60 @@ void isst_display_result(int cpu, FILE *outf, char *feature, char *cmd,
{
char header[256];
char value[256];
+ int level = 3;
+
+ if (cpu >= 0)
+ level = print_package_info(cpu, outf);
- if (cpu >= 0) {
- snprintf(header, sizeof(header), "package-%d",
- get_physical_package_id(cpu));
- format_and_print(outf, 1, header, NULL);
- snprintf(header, sizeof(header), "die-%d", get_physical_die_id(cpu));
- format_and_print(outf, 2, header, NULL);
- snprintf(header, sizeof(header), "cpu-%d", cpu);
- format_and_print(outf, 3, header, NULL);
- }
snprintf(header, sizeof(header), "%s", feature);
- format_and_print(outf, 4, header, NULL);
+ format_and_print(outf, level + 1, header, NULL);
snprintf(header, sizeof(header), "%s", cmd);
if (!result)
snprintf(value, sizeof(value), "success");
else
snprintf(value, sizeof(value), "failed(error %d)", result);
- format_and_print(outf, 5, header, value);
+ format_and_print(outf, level + 2, header, value);
+
+ format_and_print(outf, level, NULL, NULL);
+}
+
+void isst_display_error_info_message(int error, char *msg, int arg_valid, int arg)
+{
+ FILE *outf = get_output_file();
+ static int error_index;
+ char header[256];
+ char value[256];
+
+ if (!out_format_is_json()) {
+ if (arg_valid)
+ snprintf(value, sizeof(value), "%s %d", msg, arg);
+ else
+ snprintf(value, sizeof(value), "%s", msg);
+
+ if (error)
+ fprintf(outf, "Error: %s\n", value);
+ else
+ fprintf(outf, "Information: %s\n", value);
+ return;
+ }
+
+ if (!start)
+ format_and_print(outf, 0, "start", NULL);
+
+ if (error)
+ snprintf(header, sizeof(header), "Error%d", error_index++);
+ else
+ snprintf(header, sizeof(header), "Information:%d", error_index++);
+ format_and_print(outf, 1, header, NULL);
+
+ snprintf(header, sizeof(header), "message");
+ if (arg_valid)
+ snprintf(value, sizeof(value), "%s %d", msg, arg);
+ else
+ snprintf(value, sizeof(value), "%s", msg);
+ format_and_print(outf, 2, header, value);
format_and_print(outf, 1, NULL, NULL);
+ if (!start)
+ format_and_print(outf, 0, NULL, NULL);
}
diff --git a/tools/power/x86/intel-speed-select/isst.h b/tools/power/x86/intel-speed-select/isst.h
index ad5aa6341d0f..094ba4589a9c 100644
--- a/tools/power/x86/intel-speed-select/isst.h
+++ b/tools/power/x86/intel-speed-select/isst.h
@@ -29,6 +29,7 @@
#include <sys/ioctl.h>
#define BIT(x) (1 << (x))
+#define BIT_ULL(nr) (1ULL << (nr))
#define GENMASK(h, l) (((~0UL) << (l)) & (~0UL >> (sizeof(long) * 8 - 1 - (h))))
#define GENMASK_ULL(h, l) \
(((~0ULL) << (l)) & (~0ULL >> (sizeof(long long) * 8 - 1 - (h))))
@@ -172,6 +173,7 @@ extern int get_cpu_count(int pkg_id, int die_id);
extern int get_core_count(int pkg_id, int die_id);
/* Common interfaces */
+FILE *get_output_file(void);
extern void debug_printf(const char *format, ...);
extern int out_format_is_json(void);
extern int get_physical_package_id(int cpu);
@@ -196,6 +198,8 @@ extern int isst_send_msr_command(unsigned int cpu, unsigned int command,
int write, unsigned long long *req_resp);
extern int isst_get_ctdp_levels(int cpu, struct isst_pkg_ctdp *pkg_dev);
+extern int isst_get_ctdp_control(int cpu, int config_index,
+ struct isst_pkg_ctdp_level_info *ctdp_level);
extern int isst_get_coremask_info(int cpu, int config_index,
struct isst_pkg_ctdp_level_info *ctdp_level);
extern int isst_get_process_ctdp(int cpu, int tdp_level,
@@ -205,7 +209,7 @@ extern void isst_get_process_ctdp_complete(int cpu,
extern void isst_ctdp_display_information(int cpu, FILE *outf, int tdp_level,
struct isst_pkg_ctdp *pkg_dev);
extern void isst_ctdp_display_core_info(int cpu, FILE *outf, char *prefix,
- unsigned int val);
+ unsigned int val, char *str0, char *str1);
extern void isst_ctdp_display_information_start(FILE *outf);
extern void isst_ctdp_display_information_end(FILE *outf);
extern void isst_pbf_display_information(int cpu, FILE *outf, int level,
@@ -216,7 +220,7 @@ extern int isst_set_pbf_fact_status(int cpu, int pbf, int enable);
extern int isst_get_pbf_info(int cpu, int level,
struct isst_pbf_info *pbf_info);
extern void isst_get_pbf_info_complete(struct isst_pbf_info *pbf_info);
-extern int isst_get_fact_info(int cpu, int level,
+extern int isst_get_fact_info(int cpu, int level, int fact_bucket,
struct isst_fact_info *fact_info);
extern int isst_get_fact_bucket_info(int cpu, int level,
struct isst_fact_bucket_info *bucket_info);
@@ -245,7 +249,10 @@ extern void isst_display_result(int cpu, FILE *outf, char *feature, char *cmd,
extern int isst_clos_get_clos_information(int cpu, int *enable, int *type);
extern void isst_clos_display_clos_information(int cpu, FILE *outf,
- int clos_enable, int type);
+ int clos_enable, int type,
+ int state, int cap);
extern int is_clx_n_platform(void);
extern int get_cpufreq_base_freq(int cpu);
+extern int isst_read_pm_config(int cpu, int *cp_state, int *cp_cap);
+extern void isst_display_error_info_message(int error, char *msg, int arg_valid, int arg);
#endif
diff --git a/tools/power/x86/intel_pstate_tracer/intel_pstate_tracer.py b/tools/power/x86/intel_pstate_tracer/intel_pstate_tracer.py
index 256199c7a182..3c47865bb247 100755
--- a/tools/power/x86/intel_pstate_tracer/intel_pstate_tracer.py
+++ b/tools/power/x86/intel_pstate_tracer/intel_pstate_tracer.py
@@ -235,7 +235,6 @@ def plot_duration_cpu():
output_png = 'all_cpu_durations.png'
g_plot = common_all_gnuplot_settings(output_png)
# autoscale this one, no set y range
- g_plot('set ytics 0, 500')
g_plot('set ylabel "Timer Duration (MilliSeconds)"')
g_plot('set title "{} : cpu durations : {:%F %H:%M}"'.format(testname, datetime.now()))
diff --git a/tools/power/x86/turbostat/.gitignore b/tools/power/x86/turbostat/.gitignore
index 7521370d3568..e13109b43cd1 100644
--- a/tools/power/x86/turbostat/.gitignore
+++ b/tools/power/x86/turbostat/.gitignore
@@ -1 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0-only
turbostat
diff --git a/tools/scripts/Makefile.include b/tools/scripts/Makefile.include
index 6d2f3a1b2249..a7974638561c 100644
--- a/tools/scripts/Makefile.include
+++ b/tools/scripts/Makefile.include
@@ -106,6 +106,7 @@ ifneq ($(silent),1)
ifneq ($(V),1)
QUIET_CC = @echo ' CC '$@;
QUIET_CC_FPIC = @echo ' CC FPIC '$@;
+ QUIET_CLANG = @echo ' CLANG '$@;
QUIET_AR = @echo ' AR '$@;
QUIET_LINK = @echo ' LINK '$@;
QUIET_MKDIR = @echo ' MKDIR '$@;
diff --git a/tools/spi/.gitignore b/tools/spi/.gitignore
index 4280576397e8..14ddba3d2195 100644
--- a/tools/spi/.gitignore
+++ b/tools/spi/.gitignore
@@ -1,2 +1,3 @@
+# SPDX-License-Identifier: GPL-2.0-only
spidev_fdx
spidev_test
diff --git a/tools/spi/Makefile b/tools/spi/Makefile
index 5c342e655e55..ada881afb489 100644
--- a/tools/spi/Makefile
+++ b/tools/spi/Makefile
@@ -51,8 +51,10 @@ $(OUTPUT)spidev_fdx: $(SPIDEV_FDX_IN)
clean:
rm -f $(ALL_PROGRAMS)
- rm -f $(OUTPUT)include/linux/spi/spidev.h
- find $(if $(OUTPUT),$(OUTPUT),.) -name '*.o' -delete -o -name '\.*.d' -delete
+ rm -rf $(OUTPUT)include/
+ find $(if $(OUTPUT),$(OUTPUT),.) -name '*.o' -delete
+ find $(if $(OUTPUT),$(OUTPUT),.) -name '\.*.o.d' -delete
+ find $(if $(OUTPUT),$(OUTPUT),.) -name '\.*.o.cmd' -delete
install: $(ALL_PROGRAMS)
install -d -m 755 $(DESTDIR)$(bindir); \
diff --git a/tools/spi/spidev_test.c b/tools/spi/spidev_test.c
index 3559e7646256..eec23fa693bd 100644
--- a/tools/spi/spidev_test.c
+++ b/tools/spi/spidev_test.c
@@ -13,6 +13,7 @@
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
+#include <errno.h>
#include <getopt.h>
#include <fcntl.h>
#include <time.h>
@@ -26,7 +27,11 @@
static void pabort(const char *s)
{
- perror(s);
+ if (errno != 0)
+ perror(s);
+ else
+ printf("%s\n", s);
+
abort();
}
@@ -123,18 +128,22 @@ static void transfer(int fd, uint8_t const *tx, uint8_t const *rx, size_t len)
.bits_per_word = bits,
};
- if (mode & SPI_TX_QUAD)
+ if (mode & SPI_TX_OCTAL)
+ tr.tx_nbits = 8;
+ else if (mode & SPI_TX_QUAD)
tr.tx_nbits = 4;
else if (mode & SPI_TX_DUAL)
tr.tx_nbits = 2;
- if (mode & SPI_RX_QUAD)
+ if (mode & SPI_RX_OCTAL)
+ tr.rx_nbits = 8;
+ else if (mode & SPI_RX_QUAD)
tr.rx_nbits = 4;
else if (mode & SPI_RX_DUAL)
tr.rx_nbits = 2;
if (!(mode & SPI_LOOP)) {
- if (mode & (SPI_TX_QUAD | SPI_TX_DUAL))
+ if (mode & (SPI_TX_OCTAL | SPI_TX_QUAD | SPI_TX_DUAL))
tr.rx_buf = 0;
- else if (mode & (SPI_RX_QUAD | SPI_RX_DUAL))
+ else if (mode & (SPI_RX_OCTAL | SPI_RX_QUAD | SPI_RX_DUAL))
tr.tx_buf = 0;
}
@@ -182,6 +191,7 @@ static void print_usage(const char *prog)
" -R --ready slave pulls low to pause\n"
" -2 --dual dual transfer\n"
" -4 --quad quad transfer\n"
+ " -8 --octal octal transfer\n"
" -S --size transfer size\n"
" -I --iter iterations\n");
exit(1);
@@ -208,13 +218,14 @@ static void parse_opts(int argc, char *argv[])
{ "dual", 0, 0, '2' },
{ "verbose", 0, 0, 'v' },
{ "quad", 0, 0, '4' },
+ { "octal", 0, 0, '8' },
{ "size", 1, 0, 'S' },
{ "iter", 1, 0, 'I' },
{ NULL, 0, 0, 0 },
};
int c;
- c = getopt_long(argc, argv, "D:s:d:b:i:o:lHOLC3NR24p:vS:I:",
+ c = getopt_long(argc, argv, "D:s:d:b:i:o:lHOLC3NR248p:vS:I:",
lopts, NULL);
if (c == -1)
@@ -275,6 +286,9 @@ static void parse_opts(int argc, char *argv[])
case '4':
mode |= SPI_TX_QUAD;
break;
+ case '8':
+ mode |= SPI_TX_OCTAL;
+ break;
case 'S':
transfer_size = atoi(optarg);
break;
@@ -283,7 +297,6 @@ static void parse_opts(int argc, char *argv[])
break;
default:
print_usage(argv[0]);
- break;
}
}
if (mode & SPI_LOOP) {
@@ -291,6 +304,8 @@ static void parse_opts(int argc, char *argv[])
mode |= SPI_RX_DUAL;
if (mode & SPI_TX_QUAD)
mode |= SPI_RX_QUAD;
+ if (mode & SPI_TX_OCTAL)
+ mode |= SPI_RX_OCTAL;
}
}
@@ -405,6 +420,9 @@ int main(int argc, char *argv[])
parse_opts(argc, argv);
+ if (input_tx && input_file)
+ pabort("only one of -p and --input may be selected");
+
fd = open(device, O_RDWR);
if (fd < 0)
pabort("can't open device");
@@ -446,9 +464,6 @@ int main(int argc, char *argv[])
printf("bits per word: %d\n", bits);
printf("max speed: %d Hz (%d KHz)\n", speed, speed/1000);
- if (input_tx && input_file)
- pabort("only one of -p and --input may be selected");
-
if (input_tx)
transfer_escaped_string(fd, input_tx);
else if (input_file)
diff --git a/tools/testing/kunit/.gitattributes b/tools/testing/kunit/.gitattributes
new file mode 100644
index 000000000000..5b7da1fc3b8f
--- /dev/null
+++ b/tools/testing/kunit/.gitattributes
@@ -0,0 +1 @@
+test_data/* binary
diff --git a/tools/testing/kunit/.gitignore b/tools/testing/kunit/.gitignore
index c791ff59a37a..1c63e31f7edf 100644
--- a/tools/testing/kunit/.gitignore
+++ b/tools/testing/kunit/.gitignore
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod] \ No newline at end of file
diff --git a/tools/testing/kunit/configs/broken_on_uml.config b/tools/testing/kunit/configs/broken_on_uml.config
new file mode 100644
index 000000000000..239b9f03da2c
--- /dev/null
+++ b/tools/testing/kunit/configs/broken_on_uml.config
@@ -0,0 +1,41 @@
+# These are currently broken on UML and prevent allyesconfig from building
+# CONFIG_STATIC_LINK is not set
+# CONFIG_UML_NET_VECTOR is not set
+# CONFIG_UML_NET_VDE is not set
+# CONFIG_UML_NET_PCAP is not set
+# CONFIG_NET_PTP_CLASSIFY is not set
+# CONFIG_IP_VS is not set
+# CONFIG_BRIDGE_EBT_BROUTE is not set
+# CONFIG_BRIDGE_EBT_T_FILTER is not set
+# CONFIG_BRIDGE_EBT_T_NAT is not set
+# CONFIG_MTD_NAND_CADENCE is not set
+# CONFIG_MTD_NAND_NANDSIM is not set
+# CONFIG_BLK_DEV_NULL_BLK is not set
+# CONFIG_BLK_DEV_RAM is not set
+# CONFIG_SCSI_DEBUG is not set
+# CONFIG_NET_VENDOR_XILINX is not set
+# CONFIG_NULL_TTY is not set
+# CONFIG_PTP_1588_CLOCK is not set
+# CONFIG_PINCTRL_EQUILIBRIUM is not set
+# CONFIG_DMABUF_SELFTESTS is not set
+# CONFIG_COMEDI is not set
+# CONFIG_XIL_AXIS_FIFO is not set
+# CONFIG_EXFAT_FS is not set
+# CONFIG_STM_DUMMY is not set
+# CONFIG_FSI_MASTER_ASPEED is not set
+# CONFIG_JFS_FS is not set
+# CONFIG_UBIFS_FS is not set
+# CONFIG_CRAMFS is not set
+# CONFIG_CRYPTO_DEV_SAFEXCEL is not set
+# CONFIG_CRYPTO_DEV_AMLOGIC_GXL is not set
+# CONFIG_KCOV is not set
+# CONFIG_LKDTM is not set
+# CONFIG_REED_SOLOMON_TEST is not set
+# CONFIG_TEST_RHASHTABLE is not set
+# CONFIG_TEST_MEMINIT is not set
+# CONFIG_NETWORK_PHY_TIMESTAMPING is not set
+# CONFIG_DEBUG_INFO_BTF is not set
+# CONFIG_PTP_1588_CLOCK_INES is not set
+# CONFIG_QCOM_CPR is not set
+# CONFIG_RESET_BRCMSTB_RESCAL is not set
+# CONFIG_RESET_INTEL_GW is not set
diff --git a/tools/testing/kunit/kunit.py b/tools/testing/kunit/kunit.py
index 180ad1e1b04f..787b6d4ad716 100755
--- a/tools/testing/kunit/kunit.py
+++ b/tools/testing/kunit/kunit.py
@@ -20,9 +20,20 @@ import kunit_config
import kunit_kernel
import kunit_parser
-KunitResult = namedtuple('KunitResult', ['status','result'])
+KunitResult = namedtuple('KunitResult', ['status','result','elapsed_time'])
-KunitRequest = namedtuple('KunitRequest', ['raw_output','timeout', 'jobs', 'build_dir', 'defconfig'])
+KunitConfigRequest = namedtuple('KunitConfigRequest',
+ ['build_dir', 'make_options'])
+KunitBuildRequest = namedtuple('KunitBuildRequest',
+ ['jobs', 'build_dir', 'alltests',
+ 'make_options'])
+KunitExecRequest = namedtuple('KunitExecRequest',
+ ['timeout', 'build_dir', 'alltests'])
+KunitParseRequest = namedtuple('KunitParseRequest',
+ ['raw_output', 'input_data'])
+KunitRequest = namedtuple('KunitRequest', ['raw_output','timeout', 'jobs',
+ 'build_dir', 'alltests',
+ 'make_options'])
KernelDirectoryPath = sys.argv[0].split('tools/testing/kunit/')[0]
@@ -44,88 +55,231 @@ def get_kernel_root_path():
sys.exit(1)
return parts[0]
-def run_tests(linux: kunit_kernel.LinuxSourceTree,
- request: KunitRequest) -> KunitResult:
+def config_tests(linux: kunit_kernel.LinuxSourceTree,
+ request: KunitConfigRequest) -> KunitResult:
+ kunit_parser.print_with_timestamp('Configuring KUnit Kernel ...')
+
config_start = time.time()
- success = linux.build_reconfig(request.build_dir)
+ create_default_kunitconfig()
+ success = linux.build_reconfig(request.build_dir, request.make_options)
config_end = time.time()
if not success:
- return KunitResult(KunitStatus.CONFIG_FAILURE, 'could not configure kernel')
+ return KunitResult(KunitStatus.CONFIG_FAILURE,
+ 'could not configure kernel',
+ config_end - config_start)
+ return KunitResult(KunitStatus.SUCCESS,
+ 'configured kernel successfully',
+ config_end - config_start)
+def build_tests(linux: kunit_kernel.LinuxSourceTree,
+ request: KunitBuildRequest) -> KunitResult:
kunit_parser.print_with_timestamp('Building KUnit Kernel ...')
build_start = time.time()
- success = linux.build_um_kernel(request.jobs, request.build_dir)
+ success = linux.build_um_kernel(request.alltests,
+ request.jobs,
+ request.build_dir,
+ request.make_options)
build_end = time.time()
if not success:
return KunitResult(KunitStatus.BUILD_FAILURE, 'could not build kernel')
+ if not success:
+ return KunitResult(KunitStatus.BUILD_FAILURE,
+ 'could not build kernel',
+ build_end - build_start)
+ return KunitResult(KunitStatus.SUCCESS,
+ 'built kernel successfully',
+ build_end - build_start)
+def exec_tests(linux: kunit_kernel.LinuxSourceTree,
+ request: KunitExecRequest) -> KunitResult:
kunit_parser.print_with_timestamp('Starting KUnit Kernel ...')
test_start = time.time()
+ result = linux.run_kernel(
+ timeout=None if request.alltests else request.timeout,
+ build_dir=request.build_dir)
+
+ test_end = time.time()
+
+ return KunitResult(KunitStatus.SUCCESS,
+ result,
+ test_end - test_start)
+
+def parse_tests(request: KunitParseRequest) -> KunitResult:
+ parse_start = time.time()
test_result = kunit_parser.TestResult(kunit_parser.TestStatus.SUCCESS,
[],
'Tests not Parsed.')
if request.raw_output:
- kunit_parser.raw_output(
- linux.run_kernel(timeout=request.timeout,
- build_dir=request.build_dir))
+ kunit_parser.raw_output(request.input_data)
else:
- kunit_output = linux.run_kernel(timeout=request.timeout,
- build_dir=request.build_dir)
- test_result = kunit_parser.parse_run_tests(kunit_output)
- test_end = time.time()
+ test_result = kunit_parser.parse_run_tests(request.input_data)
+ parse_end = time.time()
+
+ if test_result.status != kunit_parser.TestStatus.SUCCESS:
+ return KunitResult(KunitStatus.TEST_FAILURE, test_result,
+ parse_end - parse_start)
+
+ return KunitResult(KunitStatus.SUCCESS, test_result,
+ parse_end - parse_start)
+
+
+def run_tests(linux: kunit_kernel.LinuxSourceTree,
+ request: KunitRequest) -> KunitResult:
+ run_start = time.time()
+
+ config_request = KunitConfigRequest(request.build_dir,
+ request.make_options)
+ config_result = config_tests(linux, config_request)
+ if config_result.status != KunitStatus.SUCCESS:
+ return config_result
+
+ build_request = KunitBuildRequest(request.jobs, request.build_dir,
+ request.alltests,
+ request.make_options)
+ build_result = build_tests(linux, build_request)
+ if build_result.status != KunitStatus.SUCCESS:
+ return build_result
+
+ exec_request = KunitExecRequest(request.timeout, request.build_dir,
+ request.alltests)
+ exec_result = exec_tests(linux, exec_request)
+ if exec_result.status != KunitStatus.SUCCESS:
+ return exec_result
+
+ parse_request = KunitParseRequest(request.raw_output,
+ exec_result.result)
+ parse_result = parse_tests(parse_request)
+
+ run_end = time.time()
kunit_parser.print_with_timestamp((
'Elapsed time: %.3fs total, %.3fs configuring, %.3fs ' +
'building, %.3fs running\n') % (
- test_end - config_start,
- config_end - config_start,
- build_end - build_start,
- test_end - test_start))
+ run_end - run_start,
+ config_result.elapsed_time,
+ build_result.elapsed_time,
+ exec_result.elapsed_time))
+ return parse_result
+
+def add_common_opts(parser):
+ parser.add_argument('--build_dir',
+ help='As in the make command, it specifies the build '
+ 'directory.',
+ type=str, default='.kunit', metavar='build_dir')
+ parser.add_argument('--make_options',
+ help='X=Y make option, can be repeated.',
+ action='append')
+ parser.add_argument('--alltests',
+ help='Run all KUnit tests through allyesconfig',
+ action='store_true')
+
+def add_build_opts(parser):
+ parser.add_argument('--jobs',
+ help='As in the make command, "Specifies the number of '
+ 'jobs (commands) to run simultaneously."',
+ type=int, default=8, metavar='jobs')
+
+def add_exec_opts(parser):
+ parser.add_argument('--timeout',
+ help='maximum number of seconds to allow for all tests '
+ 'to run. This does not include time taken to build the '
+ 'tests.',
+ type=int,
+ default=300,
+ metavar='timeout')
+
+def add_parse_opts(parser):
+ parser.add_argument('--raw_output', help='don\'t format output from kernel',
+ action='store_true')
- if test_result.status != kunit_parser.TestStatus.SUCCESS:
- return KunitResult(KunitStatus.TEST_FAILURE, test_result)
- else:
- return KunitResult(KunitStatus.SUCCESS, test_result)
def main(argv, linux=None):
parser = argparse.ArgumentParser(
description='Helps writing and running KUnit tests.')
subparser = parser.add_subparsers(dest='subcommand')
+ # The 'run' command will config, build, exec, and parse in one go.
run_parser = subparser.add_parser('run', help='Runs KUnit tests.')
- run_parser.add_argument('--raw_output', help='don\'t format output from kernel',
- action='store_true')
-
- run_parser.add_argument('--timeout',
- help='maximum number of seconds to allow for all tests '
- 'to run. This does not include time taken to build the '
- 'tests.',
- type=int,
- default=300,
- metavar='timeout')
-
- run_parser.add_argument('--jobs',
- help='As in the make command, "Specifies the number of '
- 'jobs (commands) to run simultaneously."',
- type=int, default=8, metavar='jobs')
-
- run_parser.add_argument('--build_dir',
- help='As in the make command, it specifies the build '
- 'directory.',
- type=str, default='', metavar='build_dir')
-
- run_parser.add_argument('--defconfig',
- help='Uses a default .kunitconfig.',
- action='store_true')
+ add_common_opts(run_parser)
+ add_build_opts(run_parser)
+ add_exec_opts(run_parser)
+ add_parse_opts(run_parser)
+
+ config_parser = subparser.add_parser('config',
+ help='Ensures that .config contains all of '
+ 'the options in .kunitconfig')
+ add_common_opts(config_parser)
+
+ build_parser = subparser.add_parser('build', help='Builds a kernel with KUnit tests')
+ add_common_opts(build_parser)
+ add_build_opts(build_parser)
+
+ exec_parser = subparser.add_parser('exec', help='Run a kernel with KUnit tests')
+ add_common_opts(exec_parser)
+ add_exec_opts(exec_parser)
+ add_parse_opts(exec_parser)
+
+ # The 'parse' option is special, as it doesn't need the kernel source
+ # (therefore there is no need for a build_dir, hence no add_common_opts)
+ # and the '--file' argument is not relevant to 'run', so isn't in
+ # add_parse_opts()
+ parse_parser = subparser.add_parser('parse',
+ help='Parses KUnit results from a file, '
+ 'and parses formatted results.')
+ add_parse_opts(parse_parser)
+ parse_parser.add_argument('file',
+ help='Specifies the file to read results from.',
+ type=str, nargs='?', metavar='input_file')
cli_args = parser.parse_args(argv)
if cli_args.subcommand == 'run':
- if get_kernel_root_path():
- os.chdir(get_kernel_root_path())
+ if not os.path.exists(cli_args.build_dir):
+ os.mkdir(cli_args.build_dir)
+ kunit_kernel.kunitconfig_path = os.path.join(
+ cli_args.build_dir,
+ kunit_kernel.kunitconfig_path)
+
+ if not os.path.exists(kunit_kernel.kunitconfig_path):
+ create_default_kunitconfig()
+
+ if not linux:
+ linux = kunit_kernel.LinuxSourceTree()
+
+ request = KunitRequest(cli_args.raw_output,
+ cli_args.timeout,
+ cli_args.jobs,
+ cli_args.build_dir,
+ cli_args.alltests,
+ cli_args.make_options)
+ result = run_tests(linux, request)
+ if result.status != KunitStatus.SUCCESS:
+ sys.exit(1)
+ elif cli_args.subcommand == 'config':
+ if cli_args.build_dir:
+ if not os.path.exists(cli_args.build_dir):
+ os.mkdir(cli_args.build_dir)
+ kunit_kernel.kunitconfig_path = os.path.join(
+ cli_args.build_dir,
+ kunit_kernel.kunitconfig_path)
+
+ if not os.path.exists(kunit_kernel.kunitconfig_path):
+ create_default_kunitconfig()
+
+ if not linux:
+ linux = kunit_kernel.LinuxSourceTree()
+ request = KunitConfigRequest(cli_args.build_dir,
+ cli_args.make_options)
+ result = config_tests(linux, request)
+ kunit_parser.print_with_timestamp((
+ 'Elapsed time: %.3fs\n') % (
+ result.elapsed_time))
+ if result.status != KunitStatus.SUCCESS:
+ sys.exit(1)
+ elif cli_args.subcommand == 'build':
if cli_args.build_dir:
if not os.path.exists(cli_args.build_dir):
os.mkdir(cli_args.build_dir)
@@ -133,18 +287,57 @@ def main(argv, linux=None):
cli_args.build_dir,
kunit_kernel.kunitconfig_path)
- if cli_args.defconfig:
+ if not os.path.exists(kunit_kernel.kunitconfig_path):
create_default_kunitconfig()
if not linux:
linux = kunit_kernel.LinuxSourceTree()
- request = KunitRequest(cli_args.raw_output,
- cli_args.timeout,
- cli_args.jobs,
- cli_args.build_dir,
- cli_args.defconfig)
- result = run_tests(linux, request)
+ request = KunitBuildRequest(cli_args.jobs,
+ cli_args.build_dir,
+ cli_args.alltests,
+ cli_args.make_options)
+ result = build_tests(linux, request)
+ kunit_parser.print_with_timestamp((
+ 'Elapsed time: %.3fs\n') % (
+ result.elapsed_time))
+ if result.status != KunitStatus.SUCCESS:
+ sys.exit(1)
+ elif cli_args.subcommand == 'exec':
+ if cli_args.build_dir:
+ if not os.path.exists(cli_args.build_dir):
+ os.mkdir(cli_args.build_dir)
+ kunit_kernel.kunitconfig_path = os.path.join(
+ cli_args.build_dir,
+ kunit_kernel.kunitconfig_path)
+
+ if not os.path.exists(kunit_kernel.kunitconfig_path):
+ create_default_kunitconfig()
+
+ if not linux:
+ linux = kunit_kernel.LinuxSourceTree()
+
+ exec_request = KunitExecRequest(cli_args.timeout,
+ cli_args.build_dir,
+ cli_args.alltests)
+ exec_result = exec_tests(linux, exec_request)
+ parse_request = KunitParseRequest(cli_args.raw_output,
+ exec_result.result)
+ result = parse_tests(parse_request)
+ kunit_parser.print_with_timestamp((
+ 'Elapsed time: %.3fs\n') % (
+ exec_result.elapsed_time))
+ if result.status != KunitStatus.SUCCESS:
+ sys.exit(1)
+ elif cli_args.subcommand == 'parse':
+ if cli_args.file == None:
+ kunit_output = sys.stdin
+ else:
+ with open(cli_args.file, 'r') as f:
+ kunit_output = f.read().splitlines()
+ request = KunitParseRequest(cli_args.raw_output,
+ kunit_output)
+ result = parse_tests(request)
if result.status != KunitStatus.SUCCESS:
sys.exit(1)
else:
diff --git a/tools/testing/kunit/kunit_config.py b/tools/testing/kunit/kunit_config.py
index ebf3942b23f5..e75063d603b5 100644
--- a/tools/testing/kunit/kunit_config.py
+++ b/tools/testing/kunit/kunit_config.py
@@ -9,16 +9,18 @@
import collections
import re
-CONFIG_IS_NOT_SET_PATTERN = r'^# CONFIG_\w+ is not set$'
-CONFIG_PATTERN = r'^CONFIG_\w+=\S+$'
-
-KconfigEntryBase = collections.namedtuple('KconfigEntry', ['raw_entry'])
+CONFIG_IS_NOT_SET_PATTERN = r'^# CONFIG_(\w+) is not set$'
+CONFIG_PATTERN = r'^CONFIG_(\w+)=(\S+)$'
+KconfigEntryBase = collections.namedtuple('KconfigEntry', ['name', 'value'])
class KconfigEntry(KconfigEntryBase):
def __str__(self) -> str:
- return self.raw_entry
+ if self.value == 'n':
+ return r'# CONFIG_%s is not set' % (self.name)
+ else:
+ return r'CONFIG_%s=%s' % (self.name, self.value)
class KconfigParseError(Exception):
@@ -38,7 +40,17 @@ class Kconfig(object):
self._entries.append(entry)
def is_subset_of(self, other: 'Kconfig') -> bool:
- return self.entries().issubset(other.entries())
+ for a in self.entries():
+ found = False
+ for b in other.entries():
+ if a.name != b.name:
+ continue
+ if a.value != b.value:
+ return False
+ found = True
+ if a.value != 'n' and found == False:
+ return False
+ return True
def write_to_file(self, path: str) -> None:
with open(path, 'w') as f:
@@ -54,9 +66,20 @@ class Kconfig(object):
line = line.strip()
if not line:
continue
- elif config_matcher.match(line) or is_not_set_matcher.match(line):
- self._entries.append(KconfigEntry(line))
- elif line[0] == '#':
+
+ match = config_matcher.match(line)
+ if match:
+ entry = KconfigEntry(match.group(1), match.group(2))
+ self.add_entry(entry)
+ continue
+
+ empty_match = is_not_set_matcher.match(line)
+ if empty_match:
+ entry = KconfigEntry(empty_match.group(1), 'n')
+ self.add_entry(entry)
+ continue
+
+ if line[0] == '#':
continue
else:
raise KconfigParseError('Failed to parse: ' + line)
diff --git a/tools/testing/kunit/kunit_kernel.py b/tools/testing/kunit/kunit_kernel.py
index d99ae75ef72f..63dbda2d029f 100644
--- a/tools/testing/kunit/kunit_kernel.py
+++ b/tools/testing/kunit/kunit_kernel.py
@@ -10,11 +10,16 @@
import logging
import subprocess
import os
+import signal
+
+from contextlib import ExitStack
import kunit_config
+import kunit_parser
KCONFIG_PATH = '.config'
kunitconfig_path = '.kunitconfig'
+BROKEN_ALLCONFIG_PATH = 'tools/testing/kunit/configs/broken_on_uml.config'
class ConfigError(Exception):
"""Represents an error trying to configure the Linux kernel."""
@@ -35,19 +40,40 @@ class LinuxSourceTreeOperations(object):
except subprocess.CalledProcessError as e:
raise ConfigError(e.output)
- def make_olddefconfig(self, build_dir):
+ def make_olddefconfig(self, build_dir, make_options):
command = ['make', 'ARCH=um', 'olddefconfig']
+ if make_options:
+ command.extend(make_options)
if build_dir:
command += ['O=' + build_dir]
try:
- subprocess.check_output(command)
+ subprocess.check_output(command, stderr=subprocess.PIPE)
except OSError as e:
raise ConfigError('Could not call make command: ' + e)
except subprocess.CalledProcessError as e:
raise ConfigError(e.output)
- def make(self, jobs, build_dir):
+ def make_allyesconfig(self):
+ kunit_parser.print_with_timestamp(
+ 'Enabling all CONFIGs for UML...')
+ process = subprocess.Popen(
+ ['make', 'ARCH=um', 'allyesconfig'],
+ stdout=subprocess.DEVNULL,
+ stderr=subprocess.STDOUT)
+ process.wait()
+ kunit_parser.print_with_timestamp(
+ 'Disabling broken configs to run KUnit tests...')
+ with ExitStack() as es:
+ config = open(KCONFIG_PATH, 'a')
+ disable = open(BROKEN_ALLCONFIG_PATH, 'r').read()
+ config.write(disable)
+ kunit_parser.print_with_timestamp(
+ 'Starting Kernel with all configs takes a few minutes...')
+
+ def make(self, jobs, build_dir, make_options):
command = ['make', 'ARCH=um', '--jobs=' + str(jobs)]
+ if make_options:
+ command.extend(make_options)
if build_dir:
command += ['O=' + build_dir]
try:
@@ -57,18 +83,16 @@ class LinuxSourceTreeOperations(object):
except subprocess.CalledProcessError as e:
raise BuildError(e.output)
- def linux_bin(self, params, timeout, build_dir):
+ def linux_bin(self, params, timeout, build_dir, outfile):
"""Runs the Linux UML binary. Must be named 'linux'."""
linux_bin = './linux'
if build_dir:
linux_bin = os.path.join(build_dir, 'linux')
- process = subprocess.Popen(
- [linux_bin] + params,
- stdin=subprocess.PIPE,
- stdout=subprocess.PIPE,
- stderr=subprocess.PIPE)
- process.wait(timeout=timeout)
- return process
+ with open(outfile, 'w') as output:
+ process = subprocess.Popen([linux_bin] + params,
+ stdout=output,
+ stderr=subprocess.STDOUT)
+ process.wait(timeout)
def get_kconfig_path(build_dir):
@@ -84,6 +108,7 @@ class LinuxSourceTree(object):
self._kconfig = kunit_config.Kconfig()
self._kconfig.read_from_file(kunitconfig_path)
self._ops = LinuxSourceTreeOperations()
+ signal.signal(signal.SIGINT, self.signal_handler)
def clean(self):
try:
@@ -107,19 +132,19 @@ class LinuxSourceTree(object):
return False
return True
- def build_config(self, build_dir):
+ def build_config(self, build_dir, make_options):
kconfig_path = get_kconfig_path(build_dir)
if build_dir and not os.path.exists(build_dir):
os.mkdir(build_dir)
self._kconfig.write_to_file(kconfig_path)
try:
- self._ops.make_olddefconfig(build_dir)
+ self._ops.make_olddefconfig(build_dir, make_options)
except ConfigError as e:
logging.error(e)
return False
return self.validate_config(build_dir)
- def build_reconfig(self, build_dir):
+ def build_reconfig(self, build_dir, make_options):
"""Creates a new .config if it is not a subset of the .kunitconfig."""
kconfig_path = get_kconfig_path(build_dir)
if os.path.exists(kconfig_path):
@@ -128,26 +153,33 @@ class LinuxSourceTree(object):
if not self._kconfig.is_subset_of(existing_kconfig):
print('Regenerating .config ...')
os.remove(kconfig_path)
- return self.build_config(build_dir)
+ return self.build_config(build_dir, make_options)
else:
return True
else:
print('Generating .config ...')
- return self.build_config(build_dir)
+ return self.build_config(build_dir, make_options)
- def build_um_kernel(self, jobs, build_dir):
+ def build_um_kernel(self, alltests, jobs, build_dir, make_options):
+ if alltests:
+ self._ops.make_allyesconfig()
try:
- self._ops.make_olddefconfig(build_dir)
- self._ops.make(jobs, build_dir)
+ self._ops.make_olddefconfig(build_dir, make_options)
+ self._ops.make(jobs, build_dir, make_options)
except (ConfigError, BuildError) as e:
logging.error(e)
return False
return self.validate_config(build_dir)
- def run_kernel(self, args=[], timeout=None, build_dir=''):
- args.extend(['mem=256M'])
- process = self._ops.linux_bin(args, timeout, build_dir)
- with open(os.path.join(build_dir, 'test.log'), 'w') as f:
- for line in process.stdout:
- f.write(line.rstrip().decode('ascii') + '\n')
- yield line.rstrip().decode('ascii')
+ def run_kernel(self, args=[], build_dir='', timeout=None):
+ args.extend(['mem=1G'])
+ outfile = 'test.log'
+ self._ops.linux_bin(args, timeout, build_dir, outfile)
+ subprocess.call(['stty', 'sane'])
+ with open(outfile, 'r') as file:
+ for line in file:
+ yield line
+
+ def signal_handler(self, sig, frame):
+ logging.error('Build interruption occurred. Cleaning console.')
+ subprocess.call(['stty', 'sane'])
diff --git a/tools/testing/kunit/kunit_parser.py b/tools/testing/kunit/kunit_parser.py
index 4ffbae0f6732..64aac9dcd431 100644
--- a/tools/testing/kunit/kunit_parser.py
+++ b/tools/testing/kunit/kunit_parser.py
@@ -46,23 +46,26 @@ class TestStatus(Enum):
TEST_CRASHED = auto()
NO_TESTS = auto()
-kunit_start_re = re.compile(r'^TAP version [0-9]+$')
-kunit_end_re = re.compile('List of all partitions:')
+kunit_start_re = re.compile(r'TAP version [0-9]+$')
+kunit_end_re = re.compile('(List of all partitions:|'
+ 'Kernel panic - not syncing: VFS:|reboot: System halted)')
def isolate_kunit_output(kernel_output):
started = False
for line in kernel_output:
- if kunit_start_re.match(line):
+ if kunit_start_re.search(line):
+ prefix_len = len(line.split('TAP version')[0])
started = True
- yield line
- elif kunit_end_re.match(line):
+ yield line[prefix_len:] if prefix_len > 0 else line
+ elif kunit_end_re.search(line):
break
elif started:
- yield line
+ yield line[prefix_len:] if prefix_len > 0 else line
def raw_output(kernel_output):
for line in kernel_output:
print(line)
+ yield line
DIVIDER = '=' * 60
@@ -91,7 +94,7 @@ def print_log(log):
for m in log:
print_with_timestamp(m)
-TAP_ENTRIES = re.compile(r'^(TAP|\t?ok|\t?not ok|\t?[0-9]+\.\.[0-9]+|\t?#).*$')
+TAP_ENTRIES = re.compile(r'^(TAP|[\s]*ok|[\s]*not ok|[\s]*[0-9]+\.\.[0-9]+|[\s]*#).*$')
def consume_non_diagnositic(lines: List[str]) -> None:
while lines and not TAP_ENTRIES.match(lines[0]):
@@ -104,22 +107,20 @@ def save_non_diagnositic(lines: List[str], test_case: TestCase) -> None:
OkNotOkResult = namedtuple('OkNotOkResult', ['is_ok','description', 'text'])
-OK_NOT_OK_SUBTEST = re.compile(r'^\t(ok|not ok) [0-9]+ - (.*)$')
+OK_NOT_OK_SUBTEST = re.compile(r'^[\s]+(ok|not ok) [0-9]+ - (.*)$')
OK_NOT_OK_MODULE = re.compile(r'^(ok|not ok) [0-9]+ - (.*)$')
-def parse_ok_not_ok_test_case(lines: List[str],
- test_case: TestCase,
- expecting_test_case: bool) -> bool:
+def parse_ok_not_ok_test_case(lines: List[str], test_case: TestCase) -> bool:
save_non_diagnositic(lines, test_case)
if not lines:
- if expecting_test_case:
- test_case.status = TestStatus.TEST_CRASHED
- return True
- else:
- return False
+ test_case.status = TestStatus.TEST_CRASHED
+ return True
line = lines[0]
match = OK_NOT_OK_SUBTEST.match(line)
+ while not match and lines:
+ line = lines.pop(0)
+ match = OK_NOT_OK_SUBTEST.match(line)
if match:
test_case.log.append(lines.pop(0))
test_case.name = match.group(2)
@@ -133,7 +134,7 @@ def parse_ok_not_ok_test_case(lines: List[str],
else:
return False
-SUBTEST_DIAGNOSTIC = re.compile(r'^\t# .*?: (.*)$')
+SUBTEST_DIAGNOSTIC = re.compile(r'^[\s]+# .*?: (.*)$')
DIAGNOSTIC_CRASH_MESSAGE = 'kunit test case crashed!'
def parse_diagnostic(lines: List[str], test_case: TestCase) -> bool:
@@ -150,17 +151,17 @@ def parse_diagnostic(lines: List[str], test_case: TestCase) -> bool:
else:
return False
-def parse_test_case(lines: List[str], expecting_test_case: bool) -> TestCase:
+def parse_test_case(lines: List[str]) -> TestCase:
test_case = TestCase()
save_non_diagnositic(lines, test_case)
while parse_diagnostic(lines, test_case):
pass
- if parse_ok_not_ok_test_case(lines, test_case, expecting_test_case):
+ if parse_ok_not_ok_test_case(lines, test_case):
return test_case
else:
return None
-SUBTEST_HEADER = re.compile(r'^\t# Subtest: (.*)$')
+SUBTEST_HEADER = re.compile(r'^[\s]+# Subtest: (.*)$')
def parse_subtest_header(lines: List[str]) -> str:
consume_non_diagnositic(lines)
@@ -173,7 +174,7 @@ def parse_subtest_header(lines: List[str]) -> str:
else:
return None
-SUBTEST_PLAN = re.compile(r'\t[0-9]+\.\.([0-9]+)')
+SUBTEST_PLAN = re.compile(r'[\s]+[0-9]+\.\.([0-9]+)')
def parse_subtest_plan(lines: List[str]) -> int:
consume_non_diagnositic(lines)
@@ -234,11 +235,11 @@ def parse_test_suite(lines: List[str]) -> TestSuite:
expected_test_case_num = parse_subtest_plan(lines)
if not expected_test_case_num:
return None
- test_case = parse_test_case(lines, expected_test_case_num > 0)
- expected_test_case_num -= 1
- while test_case:
+ while expected_test_case_num > 0:
+ test_case = parse_test_case(lines)
+ if not test_case:
+ break
test_suite.cases.append(test_case)
- test_case = parse_test_case(lines, expected_test_case_num > 0)
expected_test_case_num -= 1
if parse_ok_not_ok_test_suite(lines, test_suite):
test_suite.status = bubble_up_test_case_errors(test_suite)
diff --git a/tools/testing/kunit/kunit_tool_test.py b/tools/testing/kunit/kunit_tool_test.py
index cba97756ac4a..5bb7b118ebd9 100755
--- a/tools/testing/kunit/kunit_tool_test.py
+++ b/tools/testing/kunit/kunit_tool_test.py
@@ -37,7 +37,7 @@ class KconfigTest(unittest.TestCase):
self.assertTrue(kconfig0.is_subset_of(kconfig0))
kconfig1 = kunit_config.Kconfig()
- kconfig1.add_entry(kunit_config.KconfigEntry('CONFIG_TEST=y'))
+ kconfig1.add_entry(kunit_config.KconfigEntry('TEST', 'y'))
self.assertTrue(kconfig1.is_subset_of(kconfig1))
self.assertTrue(kconfig0.is_subset_of(kconfig1))
self.assertFalse(kconfig1.is_subset_of(kconfig0))
@@ -51,15 +51,15 @@ class KconfigTest(unittest.TestCase):
expected_kconfig = kunit_config.Kconfig()
expected_kconfig.add_entry(
- kunit_config.KconfigEntry('CONFIG_UML=y'))
+ kunit_config.KconfigEntry('UML', 'y'))
expected_kconfig.add_entry(
- kunit_config.KconfigEntry('CONFIG_MMU=y'))
+ kunit_config.KconfigEntry('MMU', 'y'))
expected_kconfig.add_entry(
- kunit_config.KconfigEntry('CONFIG_TEST=y'))
+ kunit_config.KconfigEntry('TEST', 'y'))
expected_kconfig.add_entry(
- kunit_config.KconfigEntry('CONFIG_EXAMPLE_TEST=y'))
+ kunit_config.KconfigEntry('EXAMPLE_TEST', 'y'))
expected_kconfig.add_entry(
- kunit_config.KconfigEntry('# CONFIG_MK8 is not set'))
+ kunit_config.KconfigEntry('MK8', 'n'))
self.assertEqual(kconfig.entries(), expected_kconfig.entries())
@@ -68,15 +68,15 @@ class KconfigTest(unittest.TestCase):
expected_kconfig = kunit_config.Kconfig()
expected_kconfig.add_entry(
- kunit_config.KconfigEntry('CONFIG_UML=y'))
+ kunit_config.KconfigEntry('UML', 'y'))
expected_kconfig.add_entry(
- kunit_config.KconfigEntry('CONFIG_MMU=y'))
+ kunit_config.KconfigEntry('MMU', 'y'))
expected_kconfig.add_entry(
- kunit_config.KconfigEntry('CONFIG_TEST=y'))
+ kunit_config.KconfigEntry('TEST', 'y'))
expected_kconfig.add_entry(
- kunit_config.KconfigEntry('CONFIG_EXAMPLE_TEST=y'))
+ kunit_config.KconfigEntry('EXAMPLE_TEST', 'y'))
expected_kconfig.add_entry(
- kunit_config.KconfigEntry('# CONFIG_MK8 is not set'))
+ kunit_config.KconfigEntry('MK8', 'n'))
expected_kconfig.write_to_file(kconfig_path)
@@ -108,6 +108,36 @@ class KUnitParserTest(unittest.TestCase):
self.assertContains('ok 1 - example', result)
file.close()
+ def test_output_with_prefix_isolated_correctly(self):
+ log_path = get_absolute_path(
+ 'test_data/test_pound_sign.log')
+ with open(log_path) as file:
+ result = kunit_parser.isolate_kunit_output(file.readlines())
+ self.assertContains('TAP version 14\n', result)
+ self.assertContains(' # Subtest: kunit-resource-test', result)
+ self.assertContains(' 1..5', result)
+ self.assertContains(' ok 1 - kunit_resource_test_init_resources', result)
+ self.assertContains(' ok 2 - kunit_resource_test_alloc_resource', result)
+ self.assertContains(' ok 3 - kunit_resource_test_destroy_resource', result)
+ self.assertContains(' foo bar #', result)
+ self.assertContains(' ok 4 - kunit_resource_test_cleanup_resources', result)
+ self.assertContains(' ok 5 - kunit_resource_test_proper_free_ordering', result)
+ self.assertContains('ok 1 - kunit-resource-test', result)
+ self.assertContains(' foo bar # non-kunit output', result)
+ self.assertContains(' # Subtest: kunit-try-catch-test', result)
+ self.assertContains(' 1..2', result)
+ self.assertContains(' ok 1 - kunit_test_try_catch_successful_try_no_catch',
+ result)
+ self.assertContains(' ok 2 - kunit_test_try_catch_unsuccessful_try_does_catch',
+ result)
+ self.assertContains('ok 2 - kunit-try-catch-test', result)
+ self.assertContains(' # Subtest: string-stream-test', result)
+ self.assertContains(' 1..3', result)
+ self.assertContains(' ok 1 - string_stream_test_empty_on_creation', result)
+ self.assertContains(' ok 2 - string_stream_test_not_empty_after_add', result)
+ self.assertContains(' ok 3 - string_stream_test_get_string', result)
+ self.assertContains('ok 3 - string-stream-test', result)
+
def test_parse_successful_test_log(self):
all_passed_log = get_absolute_path(
'test_data/test_is_test_passed-all_passed.log')
@@ -150,6 +180,45 @@ class KUnitParserTest(unittest.TestCase):
result.status)
file.close()
+ def test_ignores_prefix_printk_time(self):
+ prefix_log = get_absolute_path(
+ 'test_data/test_config_printk_time.log')
+ with open(prefix_log) as file:
+ result = kunit_parser.parse_run_tests(file.readlines())
+ self.assertEqual('kunit-resource-test', result.suites[0].name)
+
+ def test_ignores_multiple_prefixes(self):
+ prefix_log = get_absolute_path(
+ 'test_data/test_multiple_prefixes.log')
+ with open(prefix_log) as file:
+ result = kunit_parser.parse_run_tests(file.readlines())
+ self.assertEqual('kunit-resource-test', result.suites[0].name)
+
+ def test_prefix_mixed_kernel_output(self):
+ mixed_prefix_log = get_absolute_path(
+ 'test_data/test_interrupted_tap_output.log')
+ with open(mixed_prefix_log) as file:
+ result = kunit_parser.parse_run_tests(file.readlines())
+ self.assertEqual('kunit-resource-test', result.suites[0].name)
+
+ def test_prefix_poundsign(self):
+ pound_log = get_absolute_path('test_data/test_pound_sign.log')
+ with open(pound_log) as file:
+ result = kunit_parser.parse_run_tests(file.readlines())
+ self.assertEqual('kunit-resource-test', result.suites[0].name)
+
+ def test_kernel_panic_end(self):
+ panic_log = get_absolute_path('test_data/test_kernel_panic_interrupt.log')
+ with open(panic_log) as file:
+ result = kunit_parser.parse_run_tests(file.readlines())
+ self.assertEqual('kunit-resource-test', result.suites[0].name)
+
+ def test_pound_no_prefix(self):
+ pound_log = get_absolute_path('test_data/test_pound_no_prefix.log')
+ with open(pound_log) as file:
+ result = kunit_parser.parse_run_tests(file.readlines())
+ self.assertEqual('kunit-resource-test', result.suites[0].name)
+
class StrContains(str):
def __eq__(self, other):
return self in other
@@ -170,13 +239,39 @@ class KUnitMainTest(unittest.TestCase):
self.print_patch.stop()
pass
+ def test_config_passes_args_pass(self):
+ kunit.main(['config'], self.linux_source_mock)
+ assert self.linux_source_mock.build_reconfig.call_count == 1
+ assert self.linux_source_mock.run_kernel.call_count == 0
+
+ def test_build_passes_args_pass(self):
+ kunit.main(['build'], self.linux_source_mock)
+ assert self.linux_source_mock.build_reconfig.call_count == 0
+ self.linux_source_mock.build_um_kernel.assert_called_once_with(False, 8, '', None)
+ assert self.linux_source_mock.run_kernel.call_count == 0
+
+ def test_exec_passes_args_pass(self):
+ kunit.main(['exec'], self.linux_source_mock)
+ assert self.linux_source_mock.build_reconfig.call_count == 0
+ assert self.linux_source_mock.run_kernel.call_count == 1
+ self.linux_source_mock.run_kernel.assert_called_once_with(build_dir='', timeout=300)
+ self.print_mock.assert_any_call(StrContains('Testing complete.'))
+
def test_run_passes_args_pass(self):
kunit.main(['run'], self.linux_source_mock)
assert self.linux_source_mock.build_reconfig.call_count == 1
assert self.linux_source_mock.run_kernel.call_count == 1
- self.linux_source_mock.run_kernel.assert_called_once_with(build_dir='', timeout=300)
+ self.linux_source_mock.run_kernel.assert_called_once_with(
+ build_dir='', timeout=300)
self.print_mock.assert_any_call(StrContains('Testing complete.'))
+ def test_exec_passes_args_fail(self):
+ self.linux_source_mock.run_kernel = mock.Mock(return_value=[])
+ with self.assertRaises(SystemExit) as e:
+ kunit.main(['exec'], self.linux_source_mock)
+ assert type(e.exception) == SystemExit
+ assert e.exception.code == 1
+
def test_run_passes_args_fail(self):
self.linux_source_mock.run_kernel = mock.Mock(return_value=[])
with self.assertRaises(SystemExit) as e:
@@ -187,6 +282,14 @@ class KUnitMainTest(unittest.TestCase):
assert self.linux_source_mock.run_kernel.call_count == 1
self.print_mock.assert_any_call(StrContains(' 0 tests run'))
+ def test_exec_raw_output(self):
+ self.linux_source_mock.run_kernel = mock.Mock(return_value=[])
+ kunit.main(['exec', '--raw_output'], self.linux_source_mock)
+ assert self.linux_source_mock.run_kernel.call_count == 1
+ for kall in self.print_mock.call_args_list:
+ assert kall != mock.call(StrContains('Testing complete.'))
+ assert kall != mock.call(StrContains(' 0 tests run'))
+
def test_run_raw_output(self):
self.linux_source_mock.run_kernel = mock.Mock(return_value=[])
kunit.main(['run', '--raw_output'], self.linux_source_mock)
@@ -196,17 +299,41 @@ class KUnitMainTest(unittest.TestCase):
assert kall != mock.call(StrContains('Testing complete.'))
assert kall != mock.call(StrContains(' 0 tests run'))
+ def test_exec_timeout(self):
+ timeout = 3453
+ kunit.main(['exec', '--timeout', str(timeout)], self.linux_source_mock)
+ self.linux_source_mock.run_kernel.assert_called_once_with(build_dir='', timeout=timeout)
+ self.print_mock.assert_any_call(StrContains('Testing complete.'))
+
def test_run_timeout(self):
timeout = 3453
kunit.main(['run', '--timeout', str(timeout)], self.linux_source_mock)
assert self.linux_source_mock.build_reconfig.call_count == 1
- self.linux_source_mock.run_kernel.assert_called_once_with(build_dir='', timeout=timeout)
+ self.linux_source_mock.run_kernel.assert_called_once_with(
+ build_dir='', timeout=timeout)
self.print_mock.assert_any_call(StrContains('Testing complete.'))
def test_run_builddir(self):
build_dir = '.kunit'
kunit.main(['run', '--build_dir', build_dir], self.linux_source_mock)
assert self.linux_source_mock.build_reconfig.call_count == 1
+ self.linux_source_mock.run_kernel.assert_called_once_with(
+ build_dir=build_dir, timeout=300)
+ self.print_mock.assert_any_call(StrContains('Testing complete.'))
+
+ def test_config_builddir(self):
+ build_dir = '.kunit'
+ kunit.main(['config', '--build_dir', build_dir], self.linux_source_mock)
+ assert self.linux_source_mock.build_reconfig.call_count == 1
+
+ def test_build_builddir(self):
+ build_dir = '.kunit'
+ kunit.main(['build', '--build_dir', build_dir], self.linux_source_mock)
+ self.linux_source_mock.build_um_kernel.assert_called_once_with(False, 8, build_dir, None)
+
+ def test_exec_builddir(self):
+ build_dir = '.kunit'
+ kunit.main(['exec', '--build_dir', build_dir], self.linux_source_mock)
self.linux_source_mock.run_kernel.assert_called_once_with(build_dir=build_dir, timeout=300)
self.print_mock.assert_any_call(StrContains('Testing complete.'))
diff --git a/tools/testing/kunit/test_data/test_config_printk_time.log b/tools/testing/kunit/test_data/test_config_printk_time.log
new file mode 100644
index 000000000000..c02ca773946d
--- /dev/null
+++ b/tools/testing/kunit/test_data/test_config_printk_time.log
@@ -0,0 +1,31 @@
+[ 0.060000] printk: console [mc-1] enabled
+[ 0.060000] random: get_random_bytes called from init_oops_id+0x35/0x40 with crng_init=0
+[ 0.060000] TAP version 14
+[ 0.060000] # Subtest: kunit-resource-test
+[ 0.060000] 1..5
+[ 0.060000] ok 1 - kunit_resource_test_init_resources
+[ 0.060000] ok 2 - kunit_resource_test_alloc_resource
+[ 0.060000] ok 3 - kunit_resource_test_destroy_resource
+[ 0.060000] ok 4 - kunit_resource_test_cleanup_resources
+[ 0.060000] ok 5 - kunit_resource_test_proper_free_ordering
+[ 0.060000] ok 1 - kunit-resource-test
+[ 0.060000] # Subtest: kunit-try-catch-test
+[ 0.060000] 1..2
+[ 0.060000] ok 1 - kunit_test_try_catch_successful_try_no_catch
+[ 0.060000] ok 2 - kunit_test_try_catch_unsuccessful_try_does_catch
+[ 0.060000] ok 2 - kunit-try-catch-test
+[ 0.060000] # Subtest: string-stream-test
+[ 0.060000] 1..3
+[ 0.060000] ok 1 - string_stream_test_empty_on_creation
+[ 0.060000] ok 2 - string_stream_test_not_empty_after_add
+[ 0.060000] ok 3 - string_stream_test_get_string
+[ 0.060000] ok 3 - string-stream-test
+[ 0.060000] List of all partitions:
+[ 0.060000] No filesystem could mount root, tried:
+[ 0.060000]
+[ 0.060000] Kernel panic - not syncing: VFS: Unable to mount root fs on unknown-block(98,0)
+[ 0.060000] CPU: 0 PID: 1 Comm: swapper Not tainted 5.4.0-rc1-gea2dd7c0875e-dirty #2
+[ 0.060000] Stack:
+[ 0.060000] 602086f8 601bc260 705c0000 705c0000
+[ 0.060000] 602086f8 6005fcec 705c0000 6002c6ab
+[ 0.060000] 6005fcec 601bc260 705c0000 3000000010 \ No newline at end of file
diff --git a/tools/testing/kunit/test_data/test_interrupted_tap_output.log b/tools/testing/kunit/test_data/test_interrupted_tap_output.log
new file mode 100644
index 000000000000..5c73fb3a1c6f
--- /dev/null
+++ b/tools/testing/kunit/test_data/test_interrupted_tap_output.log
@@ -0,0 +1,37 @@
+[ 0.060000] printk: console [mc-1] enabled
+[ 0.060000] random: get_random_bytes called from init_oops_id+0x35/0x40 with crng_init=0
+[ 0.060000] TAP version 14
+[ 0.060000] # Subtest: kunit-resource-test
+[ 0.060000] 1..5
+[ 0.060000] ok 1 - kunit_resource_test_init_resources
+[ 0.060000] ok 2 - kunit_resource_test_alloc_resource
+[ 0.060000] ok 3 - kunit_resource_test_destroy_resource
+[ 0.060000] kAFS: Red Hat AFS client v0.1 registering.
+[ 0.060000] FS-Cache: Netfs 'afs' registered for caching
+[ 0.060000] *** VALIDATE kAFS ***
+[ 0.060000] Btrfs loaded, crc32c=crc32c-generic, debug=on, assert=on, integrity-checker=on, ref-verify=on
+[ 0.060000] BTRFS: selftest: sectorsize: 4096 nodesize: 4096
+[ 0.060000] BTRFS: selftest: running btrfs free space cache tests
+[ 0.060000] ok 4 - kunit_resource_test_cleanup_resources
+[ 0.060000] ok 5 - kunit_resource_test_proper_free_ordering
+[ 0.060000] ok 1 - kunit-resource-test
+[ 0.060000] # Subtest: kunit-try-catch-test
+[ 0.060000] 1..2
+[ 0.060000] ok 1 - kunit_test_try_catch_successful_try_no_catch
+[ 0.060000] ok 2 - kunit_test_try_catch_unsuccessful_try_does_catch
+[ 0.060000] ok 2 - kunit-try-catch-test
+[ 0.060000] # Subtest: string-stream-test
+[ 0.060000] 1..3
+[ 0.060000] ok 1 - string_stream_test_empty_on_creation
+[ 0.060000] ok 2 - string_stream_test_not_empty_after_add
+[ 0.060000] ok 3 - string_stream_test_get_string
+[ 0.060000] ok 3 - string-stream-test
+[ 0.060000] List of all partitions:
+[ 0.060000] No filesystem could mount root, tried:
+[ 0.060000]
+[ 0.060000] Kernel panic - not syncing: VFS: Unable to mount root fs on unknown-block(98,0)
+[ 0.060000] CPU: 0 PID: 1 Comm: swapper Not tainted 5.4.0-rc1-gea2dd7c0875e-dirty #2
+[ 0.060000] Stack:
+[ 0.060000] 602086f8 601bc260 705c0000 705c0000
+[ 0.060000] 602086f8 6005fcec 705c0000 6002c6ab
+[ 0.060000] 6005fcec 601bc260 705c0000 3000000010 \ No newline at end of file
diff --git a/tools/testing/kunit/test_data/test_kernel_panic_interrupt.log b/tools/testing/kunit/test_data/test_kernel_panic_interrupt.log
new file mode 100644
index 000000000000..c045eee75f27
--- /dev/null
+++ b/tools/testing/kunit/test_data/test_kernel_panic_interrupt.log
@@ -0,0 +1,25 @@
+[ 0.060000] printk: console [mc-1] enabled
+[ 0.060000] random: get_random_bytes called from init_oops_id+0x35/0x40 with crng_init=0
+[ 0.060000] TAP version 14
+[ 0.060000] # Subtest: kunit-resource-test
+[ 0.060000] 1..5
+[ 0.060000] ok 1 - kunit_resource_test_init_resources
+[ 0.060000] ok 2 - kunit_resource_test_alloc_resource
+[ 0.060000] ok 3 - kunit_resource_test_destroy_resource
+[ 0.060000] ok 4 - kunit_resource_test_cleanup_resources
+[ 0.060000] ok 5 - kunit_resource_test_proper_free_ordering
+[ 0.060000] ok 1 - kunit-resource-test
+[ 0.060000] # Subtest: kunit-try-catch-test
+[ 0.060000] 1..2
+[ 0.060000] ok 1 - kunit_test_try_catch_successful_try_no_catch
+[ 0.060000] ok 2 - kunit_test_try_catch_unsuccessful_try_does_catch
+[ 0.060000] ok 2 - kunit-try-catch-test
+[ 0.060000] # Subtest: string-stream-test
+[ 0.060000] 1..3
+[ 0.060000] ok 1 - string_stream_test_empty_on_creation
+[ 0.060000] Kernel panic - not syncing: VFS: Unable to mount root fs on unknown-block(98,0)
+[ 0.060000] CPU: 0 PID: 1 Comm: swapper Not tainted 5.4.0-rc1-gea2dd7c0875e-dirty #2
+[ 0.060000] Stack:
+[ 0.060000] 602086f8 601bc260 705c0000 705c0000
+[ 0.060000] 602086f8 6005fcec 705c0000 6002c6ab
+[ 0.060000] 6005fcec 601bc260 705c0000 3000000010 \ No newline at end of file
diff --git a/tools/testing/kunit/test_data/test_multiple_prefixes.log b/tools/testing/kunit/test_data/test_multiple_prefixes.log
new file mode 100644
index 000000000000..bc48407dcc36
--- /dev/null
+++ b/tools/testing/kunit/test_data/test_multiple_prefixes.log
@@ -0,0 +1,31 @@
+[ 0.060000][ T1] printk: console [mc-1] enabled
+[ 0.060000][ T1] random: get_random_bytes called from init_oops_id+0x35/0x40 with crng_init=0
+[ 0.060000][ T1] TAP version 14
+[ 0.060000][ T1] # Subtest: kunit-resource-test
+[ 0.060000][ T1] 1..5
+[ 0.060000][ T1] ok 1 - kunit_resource_test_init_resources
+[ 0.060000][ T1] ok 2 - kunit_resource_test_alloc_resource
+[ 0.060000][ T1] ok 3 - kunit_resource_test_destroy_resource
+[ 0.060000][ T1] ok 4 - kunit_resource_test_cleanup_resources
+[ 0.060000][ T1] ok 5 - kunit_resource_test_proper_free_ordering
+[ 0.060000][ T1] ok 1 - kunit-resource-test
+[ 0.060000][ T1] # Subtest: kunit-try-catch-test
+[ 0.060000][ T1] 1..2
+[ 0.060000][ T1] ok 1 - kunit_test_try_catch_successful_try_no_catch
+[ 0.060000][ T1] ok 2 - kunit_test_try_catch_unsuccessful_try_does_catch
+[ 0.060000][ T1] ok 2 - kunit-try-catch-test
+[ 0.060000][ T1] # Subtest: string-stream-test
+[ 0.060000][ T1] 1..3
+[ 0.060000][ T1] ok 1 - string_stream_test_empty_on_creation
+[ 0.060000][ T1] ok 2 - string_stream_test_not_empty_after_add
+[ 0.060000][ T1] ok 3 - string_stream_test_get_string
+[ 0.060000][ T1] ok 3 - string-stream-test
+[ 0.060000][ T1] List of all partitions:
+[ 0.060000][ T1] No filesystem could mount root, tried:
+[ 0.060000][ T1]
+[ 0.060000][ T1] Kernel panic - not syncing: VFS: Unable to mount root fs on unknown-block(98,0)
+[ 0.060000][ T1] CPU: 0 PID: 1 Comm: swapper Not tainted 5.4.0-rc1-gea2dd7c0875e-dirty #2
+[ 0.060000][ T1] Stack:
+[ 0.060000][ T1] 602086f8 601bc260 705c0000 705c0000
+[ 0.060000][ T1] 602086f8 6005fcec 705c0000 6002c6ab
+[ 0.060000][ T1] 6005fcec 601bc260 705c0000 3000000010 \ No newline at end of file
diff --git a/tools/testing/kunit/test_data/test_output_with_prefix_isolated_correctly.log b/tools/testing/kunit/test_data/test_output_with_prefix_isolated_correctly.log
new file mode 100644
index 000000000000..0f87cdabebb0
--- /dev/null
+++ b/tools/testing/kunit/test_data/test_output_with_prefix_isolated_correctly.log
@@ -0,0 +1,33 @@
+[ 0.060000] printk: console [mc-1] enabled
+[ 0.060000] random: get_random_bytes called from init_oops_id+0x35/0x40 with crng_init=0
+[ 0.060000] TAP version 14
+[ 0.060000] # Subtest: kunit-resource-test
+[ 0.060000] 1..5
+[ 0.060000] ok 1 - kunit_resource_test_init_resources
+[ 0.060000] ok 2 - kunit_resource_test_alloc_resource
+[ 0.060000] ok 3 - kunit_resource_test_destroy_resource
+[ 0.060000] foo bar #
+[ 0.060000] ok 4 - kunit_resource_test_cleanup_resources
+[ 0.060000] ok 5 - kunit_resource_test_proper_free_ordering
+[ 0.060000] ok 1 - kunit-resource-test
+[ 0.060000] foo bar # non-kunit output
+[ 0.060000] # Subtest: kunit-try-catch-test
+[ 0.060000] 1..2
+[ 0.060000] ok 1 - kunit_test_try_catch_successful_try_no_catch
+[ 0.060000] ok 2 - kunit_test_try_catch_unsuccessful_try_does_catch
+[ 0.060000] ok 2 - kunit-try-catch-test
+[ 0.060000] # Subtest: string-stream-test
+[ 0.060000] 1..3
+[ 0.060000] ok 1 - string_stream_test_empty_on_creation
+[ 0.060000] ok 2 - string_stream_test_not_empty_after_add
+[ 0.060000] ok 3 - string_stream_test_get_string
+[ 0.060000] ok 3 - string-stream-test
+[ 0.060000] List of all partitions:
+[ 0.060000] No filesystem could mount root, tried:
+[ 0.060000]
+[ 0.060000] Kernel panic - not syncing: VFS: Unable to mount root fs on unknown-block(98,0)
+[ 0.060000] CPU: 0 PID: 1 Comm: swapper Not tainted 5.4.0-rc1-gea2dd7c0875e-dirty #2
+[ 0.060000] Stack:
+[ 0.060000] 602086f8 601bc260 705c0000 705c0000
+[ 0.060000] 602086f8 6005fcec 705c0000 6002c6ab
+[ 0.060000] 6005fcec 601bc260 705c0000 3000000010 \ No newline at end of file
diff --git a/tools/testing/kunit/test_data/test_pound_no_prefix.log b/tools/testing/kunit/test_data/test_pound_no_prefix.log
new file mode 100644
index 000000000000..2ceb360be7d5
--- /dev/null
+++ b/tools/testing/kunit/test_data/test_pound_no_prefix.log
@@ -0,0 +1,33 @@
+ printk: console [mc-1] enabled
+ random: get_random_bytes called from init_oops_id+0x35/0x40 with crng_init=0
+ TAP version 14
+ # Subtest: kunit-resource-test
+ 1..5
+ ok 1 - kunit_resource_test_init_resources
+ ok 2 - kunit_resource_test_alloc_resource
+ ok 3 - kunit_resource_test_destroy_resource
+ foo bar #
+ ok 4 - kunit_resource_test_cleanup_resources
+ ok 5 - kunit_resource_test_proper_free_ordering
+ ok 1 - kunit-resource-test
+ foo bar # non-kunit output
+ # Subtest: kunit-try-catch-test
+ 1..2
+ ok 1 - kunit_test_try_catch_successful_try_no_catch
+ ok 2 - kunit_test_try_catch_unsuccessful_try_does_catch
+ ok 2 - kunit-try-catch-test
+ # Subtest: string-stream-test
+ 1..3
+ ok 1 - string_stream_test_empty_on_creation
+ ok 2 - string_stream_test_not_empty_after_add
+ ok 3 - string_stream_test_get_string
+ ok 3 - string-stream-test
+ List of all partitions:
+ No filesystem could mount root, tried:
+
+ Kernel panic - not syncing: VFS: Unable to mount root fs on unknown-block(98,0)
+ CPU: 0 PID: 1 Comm: swapper Not tainted 5.4.0-rc1-gea2dd7c0875e-dirty #2
+ Stack:
+ 602086f8 601bc260 705c0000 705c0000
+ 602086f8 6005fcec 705c0000 6002c6ab
+ 6005fcec 601bc260 705c0000 3000000010 \ No newline at end of file
diff --git a/tools/testing/kunit/test_data/test_pound_sign.log b/tools/testing/kunit/test_data/test_pound_sign.log
new file mode 100644
index 000000000000..28ffa5ba03bf
--- /dev/null
+++ b/tools/testing/kunit/test_data/test_pound_sign.log
@@ -0,0 +1,33 @@
+[ 0.060000] printk: console [mc-1] enabled
+[ 0.060000] random: get_random_bytes called from init_oops_id+0x35/0x40 with crng_init=0
+[ 0.060000] TAP version 14
+[ 0.060000] # Subtest: kunit-resource-test
+[ 0.060000] 1..5
+[ 0.060000] ok 1 - kunit_resource_test_init_resources
+[ 0.060000] ok 2 - kunit_resource_test_alloc_resource
+[ 0.060000] ok 3 - kunit_resource_test_destroy_resource
+[ 0.060000] foo bar #
+[ 0.060000] ok 4 - kunit_resource_test_cleanup_resources
+[ 0.060000] ok 5 - kunit_resource_test_proper_free_ordering
+[ 0.060000] ok 1 - kunit-resource-test
+[ 0.060000] foo bar # non-kunit output
+[ 0.060000] # Subtest: kunit-try-catch-test
+[ 0.060000] 1..2
+[ 0.060000] ok 1 - kunit_test_try_catch_successful_try_no_catch
+[ 0.060000] ok 2 - kunit_test_try_catch_unsuccessful_try_does_catch
+[ 0.060000] ok 2 - kunit-try-catch-test
+[ 0.060000] # Subtest: string-stream-test
+[ 0.060000] 1..3
+[ 0.060000] ok 1 - string_stream_test_empty_on_creation
+[ 0.060000] ok 2 - string_stream_test_not_empty_after_add
+[ 0.060000] ok 3 - string_stream_test_get_string
+[ 0.060000] ok 3 - string-stream-test
+[ 0.060000] List of all partitions:
+[ 0.060000] No filesystem could mount root, tried:
+[ 0.060000]
+[ 0.060000] Kernel panic - not syncing: VFS: Unable to mount root fs on unknown-block(98,0)
+[ 0.060000] CPU: 0 PID: 1 Comm: swapper Not tainted 5.4.0-rc1-gea2dd7c0875e-dirty #2
+[ 0.060000] Stack:
+[ 0.060000] 602086f8 601bc260 705c0000 705c0000
+[ 0.060000] 602086f8 6005fcec 705c0000 6002c6ab
+[ 0.060000] 6005fcec 601bc260 705c0000 3000000010
diff --git a/tools/testing/nvdimm/Kbuild b/tools/testing/nvdimm/Kbuild
index dbebf05f5931..47f9cc9dcd94 100644
--- a/tools/testing/nvdimm/Kbuild
+++ b/tools/testing/nvdimm/Kbuild
@@ -21,8 +21,8 @@ DRIVERS := ../../../drivers
NVDIMM_SRC := $(DRIVERS)/nvdimm
ACPI_SRC := $(DRIVERS)/acpi/nfit
DAX_SRC := $(DRIVERS)/dax
-ccflags-y := -I$(src)/$(NVDIMM_SRC)/
-ccflags-y += -I$(src)/$(ACPI_SRC)/
+ccflags-y := -I$(srctree)/drivers/nvdimm/
+ccflags-y += -I$(srctree)/drivers/acpi/nfit/
obj-$(CONFIG_LIBNVDIMM) += libnvdimm.o
obj-$(CONFIG_BLK_DEV_PMEM) += nd_pmem.o
diff --git a/tools/testing/nvdimm/test/Kbuild b/tools/testing/nvdimm/test/Kbuild
index fb3c3d7cdb9b..75baebf8f4ba 100644
--- a/tools/testing/nvdimm/test/Kbuild
+++ b/tools/testing/nvdimm/test/Kbuild
@@ -1,6 +1,6 @@
# SPDX-License-Identifier: GPL-2.0
-ccflags-y := -I$(src)/../../../../drivers/nvdimm/
-ccflags-y += -I$(src)/../../../../drivers/acpi/nfit/
+ccflags-y := -I$(srctree)/drivers/nvdimm/
+ccflags-y += -I$(srctree)/drivers/acpi/nfit/
obj-m += nfit_test.o
obj-m += nfit_test_iomap.o
diff --git a/tools/testing/nvdimm/test/nfit.c b/tools/testing/nvdimm/test/nfit.c
index bf6422a6af7f..a8ee5c4d41eb 100644
--- a/tools/testing/nvdimm/test/nfit.c
+++ b/tools/testing/nvdimm/test/nfit.c
@@ -3164,7 +3164,9 @@ static __init int nfit_test_init(void)
mcsafe_test();
dax_pmem_test();
dax_pmem_core_test();
+#ifdef CONFIG_DEV_DAX_PMEM_COMPAT
dax_pmem_compat_test();
+#endif
nfit_test_setup(nfit_test_lookup, nfit_test_evaluate_dsm);
diff --git a/tools/testing/radix-tree/.gitignore b/tools/testing/radix-tree/.gitignore
index 3834899b6693..d971516401e6 100644
--- a/tools/testing/radix-tree/.gitignore
+++ b/tools/testing/radix-tree/.gitignore
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
generated/map-shift.h
idr.c
idr-test
diff --git a/tools/testing/radix-tree/Makefile b/tools/testing/radix-tree/Makefile
index 397d6b612502..aa6abfe0749c 100644
--- a/tools/testing/radix-tree/Makefile
+++ b/tools/testing/radix-tree/Makefile
@@ -7,8 +7,8 @@ LDLIBS+= -lpthread -lurcu
TARGETS = main idr-test multiorder xarray
CORE_OFILES := xarray.o radix-tree.o idr.o linux.o test.o find_bit.o bitmap.o
OFILES = main.o $(CORE_OFILES) regression1.o regression2.o regression3.o \
- regression4.o \
- tag_check.o multiorder.o idr-test.o iteration_check.o benchmark.o
+ regression4.o tag_check.o multiorder.o idr-test.o iteration_check.o \
+ iteration_check_2.o benchmark.o
ifndef SHIFT
SHIFT=3
diff --git a/tools/testing/radix-tree/iteration_check_2.c b/tools/testing/radix-tree/iteration_check_2.c
new file mode 100644
index 000000000000..aac5c50a3674
--- /dev/null
+++ b/tools/testing/radix-tree/iteration_check_2.c
@@ -0,0 +1,87 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * iteration_check_2.c: Check that deleting a tagged entry doesn't cause
+ * an RCU walker to finish early.
+ * Copyright (c) 2020 Oracle
+ * Author: Matthew Wilcox <willy@infradead.org>
+ */
+#include <pthread.h>
+#include "test.h"
+
+static volatile bool test_complete;
+
+static void *iterator(void *arg)
+{
+ XA_STATE(xas, arg, 0);
+ void *entry;
+
+ rcu_register_thread();
+
+ while (!test_complete) {
+ xas_set(&xas, 0);
+ rcu_read_lock();
+ xas_for_each_marked(&xas, entry, ULONG_MAX, XA_MARK_0)
+ ;
+ rcu_read_unlock();
+ assert(xas.xa_index >= 100);
+ }
+
+ rcu_unregister_thread();
+ return NULL;
+}
+
+static void *throbber(void *arg)
+{
+ struct xarray *xa = arg;
+
+ rcu_register_thread();
+
+ while (!test_complete) {
+ int i;
+
+ for (i = 0; i < 100; i++) {
+ xa_store(xa, i, xa_mk_value(i), GFP_KERNEL);
+ xa_set_mark(xa, i, XA_MARK_0);
+ }
+ for (i = 0; i < 100; i++)
+ xa_erase(xa, i);
+ }
+
+ rcu_unregister_thread();
+ return NULL;
+}
+
+void iteration_test2(unsigned test_duration)
+{
+ pthread_t threads[2];
+ DEFINE_XARRAY(array);
+ int i;
+
+ printv(1, "Running iteration test 2 for %d seconds\n", test_duration);
+
+ test_complete = false;
+
+ xa_store(&array, 100, xa_mk_value(100), GFP_KERNEL);
+ xa_set_mark(&array, 100, XA_MARK_0);
+
+ if (pthread_create(&threads[0], NULL, iterator, &array)) {
+ perror("create iterator thread");
+ exit(1);
+ }
+ if (pthread_create(&threads[1], NULL, throbber, &array)) {
+ perror("create throbber thread");
+ exit(1);
+ }
+
+ sleep(test_duration);
+ test_complete = true;
+
+ for (i = 0; i < 2; i++) {
+ if (pthread_join(threads[i], NULL)) {
+ perror("pthread_join");
+ exit(1);
+ }
+ }
+
+ xa_destroy(&array);
+}
diff --git a/tools/testing/radix-tree/linux.c b/tools/testing/radix-tree/linux.c
index 44a0d1ad4408..2d9c59df60de 100644
--- a/tools/testing/radix-tree/linux.c
+++ b/tools/testing/radix-tree/linux.c
@@ -19,37 +19,44 @@ int test_verbose;
struct kmem_cache {
pthread_mutex_t lock;
- int size;
+ unsigned int size;
+ unsigned int align;
int nr_objs;
void *objs;
void (*ctor)(void *);
};
-void *kmem_cache_alloc(struct kmem_cache *cachep, int flags)
+void *kmem_cache_alloc(struct kmem_cache *cachep, int gfp)
{
- struct radix_tree_node *node;
+ void *p;
- if (!(flags & __GFP_DIRECT_RECLAIM))
+ if (!(gfp & __GFP_DIRECT_RECLAIM))
return NULL;
pthread_mutex_lock(&cachep->lock);
if (cachep->nr_objs) {
+ struct radix_tree_node *node = cachep->objs;
cachep->nr_objs--;
- node = cachep->objs;
cachep->objs = node->parent;
pthread_mutex_unlock(&cachep->lock);
node->parent = NULL;
+ p = node;
} else {
pthread_mutex_unlock(&cachep->lock);
- node = malloc(cachep->size);
+ if (cachep->align)
+ posix_memalign(&p, cachep->align, cachep->size);
+ else
+ p = malloc(cachep->size);
if (cachep->ctor)
- cachep->ctor(node);
+ cachep->ctor(p);
+ else if (gfp & __GFP_ZERO)
+ memset(p, 0, cachep->size);
}
uatomic_inc(&nr_allocated);
if (kmalloc_verbose)
- printf("Allocating %p from slab\n", node);
- return node;
+ printf("Allocating %p from slab\n", p);
+ return p;
}
void kmem_cache_free(struct kmem_cache *cachep, void *objp)
@@ -59,7 +66,7 @@ void kmem_cache_free(struct kmem_cache *cachep, void *objp)
if (kmalloc_verbose)
printf("Freeing %p to slab\n", objp);
pthread_mutex_lock(&cachep->lock);
- if (cachep->nr_objs > 10) {
+ if (cachep->nr_objs > 10 || cachep->align) {
memset(objp, POISON_FREE, cachep->size);
free(objp);
} else {
@@ -98,13 +105,14 @@ void kfree(void *p)
}
struct kmem_cache *
-kmem_cache_create(const char *name, size_t size, size_t offset,
- unsigned long flags, void (*ctor)(void *))
+kmem_cache_create(const char *name, unsigned int size, unsigned int align,
+ unsigned int flags, void (*ctor)(void *))
{
struct kmem_cache *ret = malloc(sizeof(*ret));
pthread_mutex_init(&ret->lock, NULL);
ret->size = size;
+ ret->align = align;
ret->nr_objs = 0;
ret->objs = NULL;
ret->ctor = ctor;
diff --git a/tools/testing/radix-tree/linux/slab.h b/tools/testing/radix-tree/linux/slab.h
index a037def0dec6..2958830ce4d7 100644
--- a/tools/testing/radix-tree/linux/slab.h
+++ b/tools/testing/radix-tree/linux/slab.h
@@ -20,8 +20,8 @@ static inline void *kzalloc(size_t size, gfp_t gfp)
void *kmem_cache_alloc(struct kmem_cache *cachep, int flags);
void kmem_cache_free(struct kmem_cache *cachep, void *objp);
-struct kmem_cache *
-kmem_cache_create(const char *name, size_t size, size_t offset,
- unsigned long flags, void (*ctor)(void *));
+struct kmem_cache *kmem_cache_create(const char *name, unsigned int size,
+ unsigned int align, unsigned int flags,
+ void (*ctor)(void *));
#endif /* SLAB_H */
diff --git a/tools/testing/radix-tree/main.c b/tools/testing/radix-tree/main.c
index 7a22d6e3732e..f2cbc8e5b97c 100644
--- a/tools/testing/radix-tree/main.c
+++ b/tools/testing/radix-tree/main.c
@@ -311,6 +311,7 @@ int main(int argc, char **argv)
regression4_test();
iteration_test(0, 10 + 90 * long_run);
iteration_test(7, 10 + 90 * long_run);
+ iteration_test2(10 + 90 * long_run);
single_thread_tests(long_run);
/* Free any remaining preallocated nodes */
diff --git a/tools/testing/radix-tree/test.h b/tools/testing/radix-tree/test.h
index 1ee4b2c0ad10..34dab4d18744 100644
--- a/tools/testing/radix-tree/test.h
+++ b/tools/testing/radix-tree/test.h
@@ -34,6 +34,7 @@ void xarray_tests(void);
void tag_check(void);
void multiorder_checks(void);
void iteration_test(unsigned order, unsigned duration);
+void iteration_test2(unsigned duration);
void benchmark(void);
void idr_checks(void);
void ida_tests(void);
diff --git a/tools/testing/selftests/.gitignore b/tools/testing/selftests/.gitignore
index 61df01cdf0b2..055a5019b13c 100644
--- a/tools/testing/selftests/.gitignore
+++ b/tools/testing/selftests/.gitignore
@@ -1,6 +1,10 @@
+# SPDX-License-Identifier: GPL-2.0-only
gpiogpio-event-mon
gpiogpio-hammer
gpioinclude/
gpiolsgpio
tpm2/SpaceTest.log
-tpm2/*.pyc
+
+# Python bytecode and cache
+__pycache__/
+*.py[cod]
diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile
index b93fa645ee54..1195bd85af38 100644
--- a/tools/testing/selftests/Makefile
+++ b/tools/testing/selftests/Makefile
@@ -36,9 +36,9 @@ TARGETS += net
TARGETS += net/forwarding
TARGETS += net/mptcp
TARGETS += netfilter
-TARGETS += networking/timestamping
TARGETS += nsfs
TARGETS += pidfd
+TARGETS += pid_namespace
TARGETS += powerpc
TARGETS += proc
TARGETS += pstore
@@ -92,7 +92,7 @@ override LDFLAGS =
override MAKEFLAGS =
endif
-# Append kselftest to KBUILD_OUTPUT to avoid cluttering
+# Append kselftest to KBUILD_OUTPUT and O to avoid cluttering
# KBUILD_OUTPUT with selftest objects and headers installed
# by selftests Makefile or lib.mk.
ifdef building_out_of_srctree
@@ -100,7 +100,7 @@ override LDFLAGS =
endif
ifneq ($(O),)
- BUILD := $(O)
+ BUILD := $(O)/kselftest
else
ifneq ($(KBUILD_OUTPUT),)
BUILD := $(KBUILD_OUTPUT)/kselftest
@@ -249,10 +249,17 @@ else
$(error Error: set INSTALL_PATH to use install)
endif
+FORMAT ?= .gz
+TAR_PATH = $(abspath ${INSTALL_PATH}/kselftest-packages/kselftest.tar${FORMAT})
+gen_tar: install
+ @mkdir -p ${INSTALL_PATH}/kselftest-packages/
+ @tar caf ${TAR_PATH} --exclude=kselftest-packages -C ${INSTALL_PATH} .
+ @echo "Created ${TAR_PATH}"
+
clean:
@for TARGET in $(TARGETS); do \
BUILD_TARGET=$$BUILD/$$TARGET; \
$(MAKE) OUTPUT=$$BUILD_TARGET -C $$TARGET clean;\
done;
-.PHONY: khdr all run_tests hotplug run_hotplug clean_hotplug run_pstore_crash install clean
+.PHONY: khdr all run_tests hotplug run_hotplug clean_hotplug run_pstore_crash install clean gen_tar
diff --git a/tools/testing/selftests/android/Makefile b/tools/testing/selftests/android/Makefile
index 7c462714b418..9258306cafe9 100644
--- a/tools/testing/selftests/android/Makefile
+++ b/tools/testing/selftests/android/Makefile
@@ -21,7 +21,7 @@ all:
override define INSTALL_RULE
mkdir -p $(INSTALL_PATH)
- install -t $(INSTALL_PATH) $(TEST_PROGS) $(TEST_PROGS_EXTENDED) $(TEST_FILES)
+install -t $(INSTALL_PATH) $(TEST_PROGS) $(TEST_PROGS_EXTENDED) $(TEST_FILES) $(TEST_GEN_PROGS) $(TEST_CUSTOM_PROGS) $(TEST_GEN_PROGS_EXTENDED) $(TEST_GEN_FILES)
@for SUBDIR in $(SUBDIRS); do \
BUILD_TARGET=$(OUTPUT)/$$SUBDIR; \
diff --git a/tools/testing/selftests/android/ion/.gitignore b/tools/testing/selftests/android/ion/.gitignore
index 95e8f4561474..78eae9972bb1 100644
--- a/tools/testing/selftests/android/ion/.gitignore
+++ b/tools/testing/selftests/android/ion/.gitignore
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
ionapp_export
ionapp_import
ionmap_test
diff --git a/tools/testing/selftests/android/ion/Makefile b/tools/testing/selftests/android/ion/Makefile
index 0eb7ab626e1c..42b71f005332 100644
--- a/tools/testing/selftests/android/ion/Makefile
+++ b/tools/testing/selftests/android/ion/Makefile
@@ -17,4 +17,4 @@ include ../../lib.mk
$(OUTPUT)/ionapp_export: ionapp_export.c ipcsocket.c ionutils.c
$(OUTPUT)/ionapp_import: ionapp_import.c ipcsocket.c ionutils.c
-$(OUTPUT)/ionmap_test: ionmap_test.c ionutils.c
+$(OUTPUT)/ionmap_test: ionmap_test.c ionutils.c ipcsocket.c
diff --git a/tools/testing/selftests/arm64/signal/.gitignore b/tools/testing/selftests/arm64/signal/.gitignore
index 3c5b4e8ff894..78c902045ca7 100644
--- a/tools/testing/selftests/arm64/signal/.gitignore
+++ b/tools/testing/selftests/arm64/signal/.gitignore
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
mangle_*
fake_sigreturn_*
!*.[ch]
diff --git a/tools/testing/selftests/arm64/tags/.gitignore b/tools/testing/selftests/arm64/tags/.gitignore
index e8fae8d61ed6..f4f6c5112463 100644
--- a/tools/testing/selftests/arm64/tags/.gitignore
+++ b/tools/testing/selftests/arm64/tags/.gitignore
@@ -1 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0-only
tags_test
diff --git a/tools/testing/selftests/bpf/.gitignore b/tools/testing/selftests/bpf/.gitignore
index ec464859c6b6..1bb204cee853 100644
--- a/tools/testing/selftests/bpf/.gitignore
+++ b/tools/testing/selftests/bpf/.gitignore
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
test_verifier
test_maps
test_lru_map
@@ -29,12 +30,12 @@ test_tcpnotify_user
test_libbpf
test_tcp_check_syncookie_user
test_sysctl
-test_hashmap
-test_btf_dump
+test_current_pid_tgid_new_ns
xdping
test_cpp
*.skel.h
/no_alu32
/bpf_gcc
/tools
-
+/runqslower
+/bench
diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index 257a1aaaa37d..22aaec74ea0a 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -2,6 +2,8 @@
include ../../../../scripts/Kbuild.include
include ../../../scripts/Makefile.arch
+CXX ?= $(CROSS_COMPILE)g++
+
CURDIR := $(abspath .)
TOOLSDIR := $(abspath ../../..)
LIBDIR := $(TOOLSDIR)/lib
@@ -20,8 +22,10 @@ CLANG ?= clang
LLC ?= llc
LLVM_OBJCOPY ?= llvm-objcopy
BPF_GCC ?= $(shell command -v bpf-gcc;)
-CFLAGS += -g -Wall -O2 $(GENFLAGS) -I$(CURDIR) -I$(APIDIR) \
- -I$(INCLUDE_DIR) -I$(GENDIR) -I$(LIBDIR) -I$(TOOLSINCDIR) \
+SAN_CFLAGS ?=
+CFLAGS += -g -rdynamic -Wall -O2 $(GENFLAGS) $(SAN_CFLAGS) \
+ -I$(CURDIR) -I$(INCLUDE_DIR) -I$(GENDIR) -I$(LIBDIR) \
+ -I$(TOOLSINCDIR) -I$(APIDIR) \
-Dbpf_prog_load=bpf_prog_test_load \
-Dbpf_load_program=bpf_test_load_program
LDLIBS += -lcap -lelf -lz -lrt -lpthread
@@ -31,8 +35,9 @@ TEST_GEN_PROGS = test_verifier test_tag test_maps test_lru_map test_lpm_map test
test_align test_verifier_log test_dev_cgroup test_tcpbpf_user \
test_sock test_btf test_sockmap get_cgroup_id_user test_socket_cookie \
test_cgroup_storage \
- test_netcnt test_tcpnotify_user test_sock_fields test_sysctl test_hashmap \
- test_progs-no_alu32
+ test_netcnt test_tcpnotify_user test_sock_fields test_sysctl \
+ test_progs-no_alu32 \
+ test_current_pid_tgid_new_ns
# Also test bpf-gcc, if present
ifneq ($(BPF_GCC),)
@@ -62,7 +67,8 @@ TEST_PROGS := test_kmod.sh \
test_tc_tunnel.sh \
test_tc_edt.sh \
test_xdping.sh \
- test_bpftool_build.sh
+ test_bpftool_build.sh \
+ test_bpftool.sh
TEST_PROGS_EXTENDED := with_addr.sh \
with_tunnels.sh \
@@ -73,7 +79,7 @@ TEST_PROGS_EXTENDED := with_addr.sh \
# Compile but not part of 'make run_tests'
TEST_GEN_PROGS_EXTENDED = test_sock_addr test_skb_cgroup_id_user \
flow_dissector_load test_flow_dissector test_tcp_check_syncookie_user \
- test_lirc_mode2_user xdping test_cpp runqslower
+ test_lirc_mode2_user xdping test_cpp runqslower bench
TEST_CUSTOM_PROGS = urandom_read
@@ -128,14 +134,18 @@ $(OUTPUT)/test_stub.o: test_stub.c $(BPFOBJ)
$(call msg,CC,,$@)
$(CC) -c $(CFLAGS) -o $@ $<
-VMLINUX_BTF_PATHS := $(abspath ../../../../vmlinux) \
- /sys/kernel/btf/vmlinux \
- /boot/vmlinux-$(shell uname -r)
-VMLINUX_BTF:= $(firstword $(wildcard $(VMLINUX_BTF_PATHS)))
+VMLINUX_BTF_PATHS := $(if $(O),$(O)/vmlinux) \
+ $(if $(KBUILD_OUTPUT),$(KBUILD_OUTPUT)/vmlinux) \
+ ../../../../vmlinux \
+ /sys/kernel/btf/vmlinux \
+ /boot/vmlinux-$(shell uname -r)
+VMLINUX_BTF := $(abspath $(firstword $(wildcard $(VMLINUX_BTF_PATHS))))
+
$(OUTPUT)/runqslower: $(BPFOBJ)
$(Q)$(MAKE) $(submake_extras) -C $(TOOLSDIR)/bpf/runqslower \
OUTPUT=$(SCRATCH_DIR)/ VMLINUX_BTF=$(VMLINUX_BTF) \
- BPFOBJ=$(BPFOBJ) BPF_INCLUDE=$(INCLUDE_DIR)
+ BPFOBJ=$(BPFOBJ) BPF_INCLUDE=$(INCLUDE_DIR) && \
+ cp $(SCRATCH_DIR)/runqslower $@
$(TEST_GEN_PROGS) $(TEST_GEN_PROGS_EXTENDED): $(OUTPUT)/test_stub.o $(BPFOBJ)
@@ -171,6 +181,10 @@ $(BUILD_DIR)/libbpf $(BUILD_DIR)/bpftool $(INCLUDE_DIR):
$(call msg,MKDIR,,$@)
mkdir -p $@
+$(INCLUDE_DIR)/vmlinux.h: $(VMLINUX_BTF) | $(BPFTOOL) $(INCLUDE_DIR)
+ $(call msg,GEN,,$@)
+ $(BPFTOOL) btf dump file $(VMLINUX_BTF) format c > $@
+
# Get Clang's default includes on this system, as opposed to those seen by
# '-target bpf'. This fixes "missing" files on some architectures/distros,
# such as asm/byteorder.h, asm/socket.h, asm/sockios.h, sys/cdefs.h etc.
@@ -189,8 +203,8 @@ MENDIAN=$(if $(IS_LITTLE_ENDIAN),-mlittle-endian,-mbig-endian)
CLANG_SYS_INCLUDES = $(call get_sys_includes,$(CLANG))
BPF_CFLAGS = -g -D__TARGET_ARCH_$(SRCARCH) $(MENDIAN) \
- -I$(INCLUDE_DIR) -I$(CURDIR) -I$(CURDIR)/include/uapi \
- -I$(APIDIR) -I$(abspath $(OUTPUT)/../usr/include)
+ -I$(INCLUDE_DIR) -I$(CURDIR) -I$(APIDIR) \
+ -I$(abspath $(OUTPUT)/../usr/include)
CLANG_CFLAGS = $(CLANG_SYS_INCLUDES) \
-Wno-compare-distinct-pointer-types
@@ -209,7 +223,7 @@ define CLANG_BPF_BUILD_RULE
$(call msg,CLNG-LLC,$(TRUNNER_BINARY),$2)
($(CLANG) $3 -O2 -target bpf -emit-llvm \
-c $1 -o - || echo "BPF obj compilation failed") | \
- $(LLC) -mattr=dwarfris -march=bpf -mcpu=probe $4 -filetype=obj -o $2
+ $(LLC) -mattr=dwarfris -march=bpf -mcpu=v3 $4 -filetype=obj -o $2
endef
# Similar to CLANG_BPF_BUILD_RULE, but with disabled alu32
define CLANG_NOALU32_BPF_BUILD_RULE
@@ -223,7 +237,7 @@ define CLANG_NATIVE_BPF_BUILD_RULE
$(call msg,CLNG-BPF,$(TRUNNER_BINARY),$2)
($(CLANG) $3 -O2 -emit-llvm \
-c $1 -o - || echo "BPF obj compilation failed") | \
- $(LLC) -march=bpf -mcpu=probe $4 -filetype=obj -o $2
+ $(LLC) -march=bpf -mcpu=v3 $4 -filetype=obj -o $2
endef
# Build BPF object using GCC
define GCC_BPF_BUILD_RULE
@@ -231,7 +245,7 @@ define GCC_BPF_BUILD_RULE
$(BPF_GCC) $3 $4 -O2 -c $1 -o $2
endef
-SKEL_BLACKLIST := btf__% test_pinning_invalid.c
+SKEL_BLACKLIST := btf__% test_pinning_invalid.c test_sk_assign.c
# Set up extra TRUNNER_XXX "temporary" variables in the environment (relies on
# $eval()) and pass control to DEFINE_TEST_RUNNER_RULES.
@@ -253,6 +267,7 @@ TRUNNER_BPF_OBJS := $$(patsubst %.c,$$(TRUNNER_OUTPUT)/%.o, $$(TRUNNER_BPF_SRCS)
TRUNNER_BPF_SKELS := $$(patsubst %.c,$$(TRUNNER_OUTPUT)/%.skel.h, \
$$(filter-out $(SKEL_BLACKLIST), \
$$(TRUNNER_BPF_SRCS)))
+TEST_GEN_FILES += $$(TRUNNER_BPF_OBJS)
# Evaluate rules now with extra TRUNNER_XXX variables above already defined
$$(eval $$(call DEFINE_TEST_RUNNER_RULES,$1,$2))
@@ -279,6 +294,7 @@ $(TRUNNER_BPF_PROGS_DIR)$(if $2,-)$2-bpfobjs := y
$(TRUNNER_BPF_OBJS): $(TRUNNER_OUTPUT)/%.o: \
$(TRUNNER_BPF_PROGS_DIR)/%.c \
$(TRUNNER_BPF_PROGS_DIR)/*.h \
+ $$(INCLUDE_DIR)/vmlinux.h \
$$(BPFOBJ) | $(TRUNNER_OUTPUT)
$$(call $(TRUNNER_BPF_BUILD_RULE),$$<,$$@, \
$(TRUNNER_BPF_CFLAGS), \
@@ -312,7 +328,7 @@ $(TRUNNER_TEST_OBJS): $(TRUNNER_OUTPUT)/%.test.o: \
$(TRUNNER_BPF_SKELS) \
$$(BPFOBJ) | $(TRUNNER_OUTPUT)
$$(call msg,TEST-OBJ,$(TRUNNER_BINARY),$$@)
- cd $$(@D) && $$(CC) $$(CFLAGS) -c $(CURDIR)/$$< $$(LDLIBS) -o $$(@F)
+ cd $$(@D) && $$(CC) -I. $$(CFLAGS) -c $(CURDIR)/$$< $$(LDLIBS) -o $$(@F)
$(TRUNNER_EXTRA_OBJS): $(TRUNNER_OUTPUT)/%.o: \
%.c \
@@ -341,6 +357,7 @@ endef
TRUNNER_TESTS_DIR := prog_tests
TRUNNER_BPF_PROGS_DIR := progs
TRUNNER_EXTRA_SOURCES := test_progs.c cgroup_helpers.c trace_helpers.c \
+ network_helpers.c testing_helpers.c \
flow_dissector_load.h
TRUNNER_EXTRA_FILES := $(OUTPUT)/urandom_read \
$(wildcard progs/btf_dump_test_case_*.c)
@@ -392,6 +409,24 @@ $(OUTPUT)/test_cpp: test_cpp.cpp $(OUTPUT)/test_core_extern.skel.h $(BPFOBJ)
$(call msg,CXX,,$@)
$(CXX) $(CFLAGS) $^ $(LDLIBS) -o $@
+# Benchmark runner
+$(OUTPUT)/bench_%.o: benchs/bench_%.c bench.h
+ $(call msg,CC,,$@)
+ $(CC) $(CFLAGS) -c $(filter %.c,$^) $(LDLIBS) -o $@
+$(OUTPUT)/bench_rename.o: $(OUTPUT)/test_overhead.skel.h
+$(OUTPUT)/bench_trigger.o: $(OUTPUT)/trigger_bench.skel.h
+$(OUTPUT)/bench_ringbufs.o: $(OUTPUT)/ringbuf_bench.skel.h \
+ $(OUTPUT)/perfbuf_bench.skel.h
+$(OUTPUT)/bench.o: bench.h testing_helpers.h
+$(OUTPUT)/bench: LDLIBS += -lm
+$(OUTPUT)/bench: $(OUTPUT)/bench.o $(OUTPUT)/testing_helpers.o \
+ $(OUTPUT)/bench_count.o \
+ $(OUTPUT)/bench_rename.o \
+ $(OUTPUT)/bench_trigger.o \
+ $(OUTPUT)/bench_ringbufs.o
+ $(call msg,BINARY,,$@)
+ $(CC) $(LDFLAGS) -o $@ $(filter %.a %.o,$^) $(LDLIBS)
+
EXTRA_CLEAN := $(TEST_CUSTOM_PROGS) $(SCRATCH_DIR) \
prog_tests/tests.h map_tests/tests.h verifier/tests.h \
feature \
diff --git a/tools/testing/selftests/bpf/README.rst b/tools/testing/selftests/bpf/README.rst
new file mode 100644
index 000000000000..e885d351595f
--- /dev/null
+++ b/tools/testing/selftests/bpf/README.rst
@@ -0,0 +1,45 @@
+==================
+BPF Selftest Notes
+==================
+General instructions on running selftests can be found in
+`Documentation/bpf/bpf_devel_QA.rst`_.
+
+Additional information about selftest failures are
+documented here.
+
+bpf_iter test failures with clang/llvm 10.0.0
+=============================================
+
+With clang/llvm 10.0.0, the following two bpf_iter tests failed:
+ * ``bpf_iter/ipv6_route``
+ * ``bpf_iter/netlink``
+
+The symptom for ``bpf_iter/ipv6_route`` looks like
+
+.. code-block:: c
+
+ 2: (79) r8 = *(u64 *)(r1 +8)
+ ...
+ 14: (bf) r2 = r8
+ 15: (0f) r2 += r1
+ ; BPF_SEQ_PRINTF(seq, "%pi6 %02x ", &rt->fib6_dst.addr, rt->fib6_dst.plen);
+ 16: (7b) *(u64 *)(r8 +64) = r2
+ only read is supported
+
+The symptom for ``bpf_iter/netlink`` looks like
+
+.. code-block:: c
+
+ ; struct netlink_sock *nlk = ctx->sk;
+ 2: (79) r7 = *(u64 *)(r1 +8)
+ ...
+ 15: (bf) r2 = r7
+ 16: (0f) r2 += r1
+ ; BPF_SEQ_PRINTF(seq, "%pK %-3d ", s, s->sk_protocol);
+ 17: (7b) *(u64 *)(r7 +0) = r2
+ only read is supported
+
+This is due to a llvm BPF backend bug. The fix
+ https://reviews.llvm.org/D78466
+has been pushed to llvm 10.x release branch and will be
+available in 10.0.1. The fix is available in llvm 11.0.0 trunk.
diff --git a/tools/testing/selftests/bpf/bench.c b/tools/testing/selftests/bpf/bench.c
new file mode 100644
index 000000000000..944ad4721c83
--- /dev/null
+++ b/tools/testing/selftests/bpf/bench.c
@@ -0,0 +1,465 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+#define _GNU_SOURCE
+#include <argp.h>
+#include <linux/compiler.h>
+#include <sys/time.h>
+#include <sched.h>
+#include <fcntl.h>
+#include <pthread.h>
+#include <sys/sysinfo.h>
+#include <sys/resource.h>
+#include <signal.h>
+#include "bench.h"
+#include "testing_helpers.h"
+
+struct env env = {
+ .warmup_sec = 1,
+ .duration_sec = 5,
+ .affinity = false,
+ .consumer_cnt = 1,
+ .producer_cnt = 1,
+};
+
+static int libbpf_print_fn(enum libbpf_print_level level,
+ const char *format, va_list args)
+{
+ if (level == LIBBPF_DEBUG && !env.verbose)
+ return 0;
+ return vfprintf(stderr, format, args);
+}
+
+static int bump_memlock_rlimit(void)
+{
+ struct rlimit rlim_new = {
+ .rlim_cur = RLIM_INFINITY,
+ .rlim_max = RLIM_INFINITY,
+ };
+
+ return setrlimit(RLIMIT_MEMLOCK, &rlim_new);
+}
+
+void setup_libbpf()
+{
+ int err;
+
+ libbpf_set_print(libbpf_print_fn);
+
+ err = bump_memlock_rlimit();
+ if (err)
+ fprintf(stderr, "failed to increase RLIMIT_MEMLOCK: %d", err);
+}
+
+void hits_drops_report_progress(int iter, struct bench_res *res, long delta_ns)
+{
+ double hits_per_sec, drops_per_sec;
+ double hits_per_prod;
+
+ hits_per_sec = res->hits / 1000000.0 / (delta_ns / 1000000000.0);
+ hits_per_prod = hits_per_sec / env.producer_cnt;
+ drops_per_sec = res->drops / 1000000.0 / (delta_ns / 1000000000.0);
+
+ printf("Iter %3d (%7.3lfus): ",
+ iter, (delta_ns - 1000000000) / 1000.0);
+
+ printf("hits %8.3lfM/s (%7.3lfM/prod), drops %8.3lfM/s\n",
+ hits_per_sec, hits_per_prod, drops_per_sec);
+}
+
+void hits_drops_report_final(struct bench_res res[], int res_cnt)
+{
+ int i;
+ double hits_mean = 0.0, drops_mean = 0.0;
+ double hits_stddev = 0.0, drops_stddev = 0.0;
+
+ for (i = 0; i < res_cnt; i++) {
+ hits_mean += res[i].hits / 1000000.0 / (0.0 + res_cnt);
+ drops_mean += res[i].drops / 1000000.0 / (0.0 + res_cnt);
+ }
+
+ if (res_cnt > 1) {
+ for (i = 0; i < res_cnt; i++) {
+ hits_stddev += (hits_mean - res[i].hits / 1000000.0) *
+ (hits_mean - res[i].hits / 1000000.0) /
+ (res_cnt - 1.0);
+ drops_stddev += (drops_mean - res[i].drops / 1000000.0) *
+ (drops_mean - res[i].drops / 1000000.0) /
+ (res_cnt - 1.0);
+ }
+ hits_stddev = sqrt(hits_stddev);
+ drops_stddev = sqrt(drops_stddev);
+ }
+ printf("Summary: hits %8.3lf \u00B1 %5.3lfM/s (%7.3lfM/prod), ",
+ hits_mean, hits_stddev, hits_mean / env.producer_cnt);
+ printf("drops %8.3lf \u00B1 %5.3lfM/s\n",
+ drops_mean, drops_stddev);
+}
+
+const char *argp_program_version = "benchmark";
+const char *argp_program_bug_address = "<bpf@vger.kernel.org>";
+const char argp_program_doc[] =
+"benchmark Generic benchmarking framework.\n"
+"\n"
+"This tool runs benchmarks.\n"
+"\n"
+"USAGE: benchmark <bench-name>\n"
+"\n"
+"EXAMPLES:\n"
+" # run 'count-local' benchmark with 1 producer and 1 consumer\n"
+" benchmark count-local\n"
+" # run 'count-local' with 16 producer and 8 consumer thread, pinned to CPUs\n"
+" benchmark -p16 -c8 -a count-local\n";
+
+enum {
+ ARG_PROD_AFFINITY_SET = 1000,
+ ARG_CONS_AFFINITY_SET = 1001,
+};
+
+static const struct argp_option opts[] = {
+ { "list", 'l', NULL, 0, "List available benchmarks"},
+ { "duration", 'd', "SEC", 0, "Duration of benchmark, seconds"},
+ { "warmup", 'w', "SEC", 0, "Warm-up period, seconds"},
+ { "producers", 'p', "NUM", 0, "Number of producer threads"},
+ { "consumers", 'c', "NUM", 0, "Number of consumer threads"},
+ { "verbose", 'v', NULL, 0, "Verbose debug output"},
+ { "affinity", 'a', NULL, 0, "Set consumer/producer thread affinity"},
+ { "prod-affinity", ARG_PROD_AFFINITY_SET, "CPUSET", 0,
+ "Set of CPUs for producer threads; implies --affinity"},
+ { "cons-affinity", ARG_CONS_AFFINITY_SET, "CPUSET", 0,
+ "Set of CPUs for consumer threads; implies --affinity"},
+ {},
+};
+
+extern struct argp bench_ringbufs_argp;
+
+static const struct argp_child bench_parsers[] = {
+ { &bench_ringbufs_argp, 0, "Ring buffers benchmark", 0 },
+ {},
+};
+
+static error_t parse_arg(int key, char *arg, struct argp_state *state)
+{
+ static int pos_args;
+
+ switch (key) {
+ case 'v':
+ env.verbose = true;
+ break;
+ case 'l':
+ env.list = true;
+ break;
+ case 'd':
+ env.duration_sec = strtol(arg, NULL, 10);
+ if (env.duration_sec <= 0) {
+ fprintf(stderr, "Invalid duration: %s\n", arg);
+ argp_usage(state);
+ }
+ break;
+ case 'w':
+ env.warmup_sec = strtol(arg, NULL, 10);
+ if (env.warmup_sec <= 0) {
+ fprintf(stderr, "Invalid warm-up duration: %s\n", arg);
+ argp_usage(state);
+ }
+ break;
+ case 'p':
+ env.producer_cnt = strtol(arg, NULL, 10);
+ if (env.producer_cnt <= 0) {
+ fprintf(stderr, "Invalid producer count: %s\n", arg);
+ argp_usage(state);
+ }
+ break;
+ case 'c':
+ env.consumer_cnt = strtol(arg, NULL, 10);
+ if (env.consumer_cnt <= 0) {
+ fprintf(stderr, "Invalid consumer count: %s\n", arg);
+ argp_usage(state);
+ }
+ break;
+ case 'a':
+ env.affinity = true;
+ break;
+ case ARG_PROD_AFFINITY_SET:
+ env.affinity = true;
+ if (parse_num_list(arg, &env.prod_cpus.cpus,
+ &env.prod_cpus.cpus_len)) {
+ fprintf(stderr, "Invalid format of CPU set for producers.");
+ argp_usage(state);
+ }
+ break;
+ case ARG_CONS_AFFINITY_SET:
+ env.affinity = true;
+ if (parse_num_list(arg, &env.cons_cpus.cpus,
+ &env.cons_cpus.cpus_len)) {
+ fprintf(stderr, "Invalid format of CPU set for consumers.");
+ argp_usage(state);
+ }
+ break;
+ case ARGP_KEY_ARG:
+ if (pos_args++) {
+ fprintf(stderr,
+ "Unrecognized positional argument: %s\n", arg);
+ argp_usage(state);
+ }
+ env.bench_name = strdup(arg);
+ break;
+ default:
+ return ARGP_ERR_UNKNOWN;
+ }
+ return 0;
+}
+
+static void parse_cmdline_args(int argc, char **argv)
+{
+ static const struct argp argp = {
+ .options = opts,
+ .parser = parse_arg,
+ .doc = argp_program_doc,
+ .children = bench_parsers,
+ };
+ if (argp_parse(&argp, argc, argv, 0, NULL, NULL))
+ exit(1);
+ if (!env.list && !env.bench_name) {
+ argp_help(&argp, stderr, ARGP_HELP_DOC, "bench");
+ exit(1);
+ }
+}
+
+static void collect_measurements(long delta_ns);
+
+static __u64 last_time_ns;
+static void sigalarm_handler(int signo)
+{
+ long new_time_ns = get_time_ns();
+ long delta_ns = new_time_ns - last_time_ns;
+
+ collect_measurements(delta_ns);
+
+ last_time_ns = new_time_ns;
+}
+
+/* set up periodic 1-second timer */
+static void setup_timer()
+{
+ static struct sigaction sigalarm_action = {
+ .sa_handler = sigalarm_handler,
+ };
+ struct itimerval timer_settings = {};
+ int err;
+
+ last_time_ns = get_time_ns();
+ err = sigaction(SIGALRM, &sigalarm_action, NULL);
+ if (err < 0) {
+ fprintf(stderr, "failed to install SIGALRM handler: %d\n", -errno);
+ exit(1);
+ }
+ timer_settings.it_interval.tv_sec = 1;
+ timer_settings.it_value.tv_sec = 1;
+ err = setitimer(ITIMER_REAL, &timer_settings, NULL);
+ if (err < 0) {
+ fprintf(stderr, "failed to arm interval timer: %d\n", -errno);
+ exit(1);
+ }
+}
+
+static void set_thread_affinity(pthread_t thread, int cpu)
+{
+ cpu_set_t cpuset;
+
+ CPU_ZERO(&cpuset);
+ CPU_SET(cpu, &cpuset);
+ if (pthread_setaffinity_np(thread, sizeof(cpuset), &cpuset)) {
+ fprintf(stderr, "setting affinity to CPU #%d failed: %d\n",
+ cpu, errno);
+ exit(1);
+ }
+}
+
+static int next_cpu(struct cpu_set *cpu_set)
+{
+ if (cpu_set->cpus) {
+ int i;
+
+ /* find next available CPU */
+ for (i = cpu_set->next_cpu; i < cpu_set->cpus_len; i++) {
+ if (cpu_set->cpus[i]) {
+ cpu_set->next_cpu = i + 1;
+ return i;
+ }
+ }
+ fprintf(stderr, "Not enough CPUs specified, need CPU #%d or higher.\n", i);
+ exit(1);
+ }
+
+ return cpu_set->next_cpu++;
+}
+
+static struct bench_state {
+ int res_cnt;
+ struct bench_res *results;
+ pthread_t *consumers;
+ pthread_t *producers;
+} state;
+
+const struct bench *bench = NULL;
+
+extern const struct bench bench_count_global;
+extern const struct bench bench_count_local;
+extern const struct bench bench_rename_base;
+extern const struct bench bench_rename_kprobe;
+extern const struct bench bench_rename_kretprobe;
+extern const struct bench bench_rename_rawtp;
+extern const struct bench bench_rename_fentry;
+extern const struct bench bench_rename_fexit;
+extern const struct bench bench_rename_fmodret;
+extern const struct bench bench_trig_base;
+extern const struct bench bench_trig_tp;
+extern const struct bench bench_trig_rawtp;
+extern const struct bench bench_trig_kprobe;
+extern const struct bench bench_trig_fentry;
+extern const struct bench bench_trig_fmodret;
+extern const struct bench bench_rb_libbpf;
+extern const struct bench bench_rb_custom;
+extern const struct bench bench_pb_libbpf;
+extern const struct bench bench_pb_custom;
+
+static const struct bench *benchs[] = {
+ &bench_count_global,
+ &bench_count_local,
+ &bench_rename_base,
+ &bench_rename_kprobe,
+ &bench_rename_kretprobe,
+ &bench_rename_rawtp,
+ &bench_rename_fentry,
+ &bench_rename_fexit,
+ &bench_rename_fmodret,
+ &bench_trig_base,
+ &bench_trig_tp,
+ &bench_trig_rawtp,
+ &bench_trig_kprobe,
+ &bench_trig_fentry,
+ &bench_trig_fmodret,
+ &bench_rb_libbpf,
+ &bench_rb_custom,
+ &bench_pb_libbpf,
+ &bench_pb_custom,
+};
+
+static void setup_benchmark()
+{
+ int i, err;
+
+ if (!env.bench_name) {
+ fprintf(stderr, "benchmark name is not specified\n");
+ exit(1);
+ }
+
+ for (i = 0; i < ARRAY_SIZE(benchs); i++) {
+ if (strcmp(benchs[i]->name, env.bench_name) == 0) {
+ bench = benchs[i];
+ break;
+ }
+ }
+ if (!bench) {
+ fprintf(stderr, "benchmark '%s' not found\n", env.bench_name);
+ exit(1);
+ }
+
+ printf("Setting up benchmark '%s'...\n", bench->name);
+
+ state.producers = calloc(env.producer_cnt, sizeof(*state.producers));
+ state.consumers = calloc(env.consumer_cnt, sizeof(*state.consumers));
+ state.results = calloc(env.duration_sec + env.warmup_sec + 2,
+ sizeof(*state.results));
+ if (!state.producers || !state.consumers || !state.results)
+ exit(1);
+
+ if (bench->validate)
+ bench->validate();
+ if (bench->setup)
+ bench->setup();
+
+ for (i = 0; i < env.consumer_cnt; i++) {
+ err = pthread_create(&state.consumers[i], NULL,
+ bench->consumer_thread, (void *)(long)i);
+ if (err) {
+ fprintf(stderr, "failed to create consumer thread #%d: %d\n",
+ i, -errno);
+ exit(1);
+ }
+ if (env.affinity)
+ set_thread_affinity(state.consumers[i],
+ next_cpu(&env.cons_cpus));
+ }
+
+ /* unless explicit producer CPU list is specified, continue after
+ * last consumer CPU
+ */
+ if (!env.prod_cpus.cpus)
+ env.prod_cpus.next_cpu = env.cons_cpus.next_cpu;
+
+ for (i = 0; i < env.producer_cnt; i++) {
+ err = pthread_create(&state.producers[i], NULL,
+ bench->producer_thread, (void *)(long)i);
+ if (err) {
+ fprintf(stderr, "failed to create producer thread #%d: %d\n",
+ i, -errno);
+ exit(1);
+ }
+ if (env.affinity)
+ set_thread_affinity(state.producers[i],
+ next_cpu(&env.prod_cpus));
+ }
+
+ printf("Benchmark '%s' started.\n", bench->name);
+}
+
+static pthread_mutex_t bench_done_mtx = PTHREAD_MUTEX_INITIALIZER;
+static pthread_cond_t bench_done = PTHREAD_COND_INITIALIZER;
+
+static void collect_measurements(long delta_ns) {
+ int iter = state.res_cnt++;
+ struct bench_res *res = &state.results[iter];
+
+ bench->measure(res);
+
+ if (bench->report_progress)
+ bench->report_progress(iter, res, delta_ns);
+
+ if (iter == env.duration_sec + env.warmup_sec) {
+ pthread_mutex_lock(&bench_done_mtx);
+ pthread_cond_signal(&bench_done);
+ pthread_mutex_unlock(&bench_done_mtx);
+ }
+}
+
+int main(int argc, char **argv)
+{
+ parse_cmdline_args(argc, argv);
+
+ if (env.list) {
+ int i;
+
+ printf("Available benchmarks:\n");
+ for (i = 0; i < ARRAY_SIZE(benchs); i++) {
+ printf("- %s\n", benchs[i]->name);
+ }
+ return 0;
+ }
+
+ setup_benchmark();
+
+ setup_timer();
+
+ pthread_mutex_lock(&bench_done_mtx);
+ pthread_cond_wait(&bench_done, &bench_done_mtx);
+ pthread_mutex_unlock(&bench_done_mtx);
+
+ if (bench->report_final)
+ /* skip first sample */
+ bench->report_final(state.results + env.warmup_sec,
+ state.res_cnt - env.warmup_sec);
+
+ return 0;
+}
+
diff --git a/tools/testing/selftests/bpf/bench.h b/tools/testing/selftests/bpf/bench.h
new file mode 100644
index 000000000000..c1f48a473b02
--- /dev/null
+++ b/tools/testing/selftests/bpf/bench.h
@@ -0,0 +1,81 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#pragma once
+#include <stdlib.h>
+#include <stdbool.h>
+#include <linux/err.h>
+#include <errno.h>
+#include <unistd.h>
+#include <bpf/bpf.h>
+#include <bpf/libbpf.h>
+#include <math.h>
+#include <time.h>
+#include <sys/syscall.h>
+
+struct cpu_set {
+ bool *cpus;
+ int cpus_len;
+ int next_cpu;
+};
+
+struct env {
+ char *bench_name;
+ int duration_sec;
+ int warmup_sec;
+ bool verbose;
+ bool list;
+ bool affinity;
+ int consumer_cnt;
+ int producer_cnt;
+ struct cpu_set prod_cpus;
+ struct cpu_set cons_cpus;
+};
+
+struct bench_res {
+ long hits;
+ long drops;
+};
+
+struct bench {
+ const char *name;
+ void (*validate)();
+ void (*setup)();
+ void *(*producer_thread)(void *ctx);
+ void *(*consumer_thread)(void *ctx);
+ void (*measure)(struct bench_res* res);
+ void (*report_progress)(int iter, struct bench_res* res, long delta_ns);
+ void (*report_final)(struct bench_res res[], int res_cnt);
+};
+
+struct counter {
+ long value;
+} __attribute__((aligned(128)));
+
+extern struct env env;
+extern const struct bench *bench;
+
+void setup_libbpf();
+void hits_drops_report_progress(int iter, struct bench_res *res, long delta_ns);
+void hits_drops_report_final(struct bench_res res[], int res_cnt);
+
+static inline __u64 get_time_ns() {
+ struct timespec t;
+
+ clock_gettime(CLOCK_MONOTONIC, &t);
+
+ return (u64)t.tv_sec * 1000000000 + t.tv_nsec;
+}
+
+static inline void atomic_inc(long *value)
+{
+ (void)__atomic_add_fetch(value, 1, __ATOMIC_RELAXED);
+}
+
+static inline void atomic_add(long *value, long n)
+{
+ (void)__atomic_add_fetch(value, n, __ATOMIC_RELAXED);
+}
+
+static inline long atomic_swap(long *value, long n)
+{
+ return __atomic_exchange_n(value, n, __ATOMIC_RELAXED);
+}
diff --git a/tools/testing/selftests/bpf/benchs/bench_count.c b/tools/testing/selftests/bpf/benchs/bench_count.c
new file mode 100644
index 000000000000..befba7a82643
--- /dev/null
+++ b/tools/testing/selftests/bpf/benchs/bench_count.c
@@ -0,0 +1,91 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+#include "bench.h"
+
+/* COUNT-GLOBAL benchmark */
+
+static struct count_global_ctx {
+ struct counter hits;
+} count_global_ctx;
+
+static void *count_global_producer(void *input)
+{
+ struct count_global_ctx *ctx = &count_global_ctx;
+
+ while (true) {
+ atomic_inc(&ctx->hits.value);
+ }
+ return NULL;
+}
+
+static void *count_global_consumer(void *input)
+{
+ return NULL;
+}
+
+static void count_global_measure(struct bench_res *res)
+{
+ struct count_global_ctx *ctx = &count_global_ctx;
+
+ res->hits = atomic_swap(&ctx->hits.value, 0);
+}
+
+/* COUNT-local benchmark */
+
+static struct count_local_ctx {
+ struct counter *hits;
+} count_local_ctx;
+
+static void count_local_setup()
+{
+ struct count_local_ctx *ctx = &count_local_ctx;
+
+ ctx->hits = calloc(env.consumer_cnt, sizeof(*ctx->hits));
+ if (!ctx->hits)
+ exit(1);
+}
+
+static void *count_local_producer(void *input)
+{
+ struct count_local_ctx *ctx = &count_local_ctx;
+ int idx = (long)input;
+
+ while (true) {
+ atomic_inc(&ctx->hits[idx].value);
+ }
+ return NULL;
+}
+
+static void *count_local_consumer(void *input)
+{
+ return NULL;
+}
+
+static void count_local_measure(struct bench_res *res)
+{
+ struct count_local_ctx *ctx = &count_local_ctx;
+ int i;
+
+ for (i = 0; i < env.producer_cnt; i++) {
+ res->hits += atomic_swap(&ctx->hits[i].value, 0);
+ }
+}
+
+const struct bench bench_count_global = {
+ .name = "count-global",
+ .producer_thread = count_global_producer,
+ .consumer_thread = count_global_consumer,
+ .measure = count_global_measure,
+ .report_progress = hits_drops_report_progress,
+ .report_final = hits_drops_report_final,
+};
+
+const struct bench bench_count_local = {
+ .name = "count-local",
+ .setup = count_local_setup,
+ .producer_thread = count_local_producer,
+ .consumer_thread = count_local_consumer,
+ .measure = count_local_measure,
+ .report_progress = hits_drops_report_progress,
+ .report_final = hits_drops_report_final,
+};
diff --git a/tools/testing/selftests/bpf/benchs/bench_rename.c b/tools/testing/selftests/bpf/benchs/bench_rename.c
new file mode 100644
index 000000000000..e74cff40f4fe
--- /dev/null
+++ b/tools/testing/selftests/bpf/benchs/bench_rename.c
@@ -0,0 +1,195 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+#include <fcntl.h>
+#include "bench.h"
+#include "test_overhead.skel.h"
+
+/* BPF triggering benchmarks */
+static struct ctx {
+ struct test_overhead *skel;
+ struct counter hits;
+ int fd;
+} ctx;
+
+static void validate()
+{
+ if (env.producer_cnt != 1) {
+ fprintf(stderr, "benchmark doesn't support multi-producer!\n");
+ exit(1);
+ }
+ if (env.consumer_cnt != 1) {
+ fprintf(stderr, "benchmark doesn't support multi-consumer!\n");
+ exit(1);
+ }
+}
+
+static void *producer(void *input)
+{
+ char buf[] = "test_overhead";
+ int err;
+
+ while (true) {
+ err = write(ctx.fd, buf, sizeof(buf));
+ if (err < 0) {
+ fprintf(stderr, "write failed\n");
+ exit(1);
+ }
+ atomic_inc(&ctx.hits.value);
+ }
+}
+
+static void measure(struct bench_res *res)
+{
+ res->hits = atomic_swap(&ctx.hits.value, 0);
+}
+
+static void setup_ctx()
+{
+ setup_libbpf();
+
+ ctx.skel = test_overhead__open_and_load();
+ if (!ctx.skel) {
+ fprintf(stderr, "failed to open skeleton\n");
+ exit(1);
+ }
+
+ ctx.fd = open("/proc/self/comm", O_WRONLY|O_TRUNC);
+ if (ctx.fd < 0) {
+ fprintf(stderr, "failed to open /proc/self/comm: %d\n", -errno);
+ exit(1);
+ }
+}
+
+static void attach_bpf(struct bpf_program *prog)
+{
+ struct bpf_link *link;
+
+ link = bpf_program__attach(prog);
+ if (IS_ERR(link)) {
+ fprintf(stderr, "failed to attach program!\n");
+ exit(1);
+ }
+}
+
+static void setup_base()
+{
+ setup_ctx();
+}
+
+static void setup_kprobe()
+{
+ setup_ctx();
+ attach_bpf(ctx.skel->progs.prog1);
+}
+
+static void setup_kretprobe()
+{
+ setup_ctx();
+ attach_bpf(ctx.skel->progs.prog2);
+}
+
+static void setup_rawtp()
+{
+ setup_ctx();
+ attach_bpf(ctx.skel->progs.prog3);
+}
+
+static void setup_fentry()
+{
+ setup_ctx();
+ attach_bpf(ctx.skel->progs.prog4);
+}
+
+static void setup_fexit()
+{
+ setup_ctx();
+ attach_bpf(ctx.skel->progs.prog5);
+}
+
+static void setup_fmodret()
+{
+ setup_ctx();
+ attach_bpf(ctx.skel->progs.prog6);
+}
+
+static void *consumer(void *input)
+{
+ return NULL;
+}
+
+const struct bench bench_rename_base = {
+ .name = "rename-base",
+ .validate = validate,
+ .setup = setup_base,
+ .producer_thread = producer,
+ .consumer_thread = consumer,
+ .measure = measure,
+ .report_progress = hits_drops_report_progress,
+ .report_final = hits_drops_report_final,
+};
+
+const struct bench bench_rename_kprobe = {
+ .name = "rename-kprobe",
+ .validate = validate,
+ .setup = setup_kprobe,
+ .producer_thread = producer,
+ .consumer_thread = consumer,
+ .measure = measure,
+ .report_progress = hits_drops_report_progress,
+ .report_final = hits_drops_report_final,
+};
+
+const struct bench bench_rename_kretprobe = {
+ .name = "rename-kretprobe",
+ .validate = validate,
+ .setup = setup_kretprobe,
+ .producer_thread = producer,
+ .consumer_thread = consumer,
+ .measure = measure,
+ .report_progress = hits_drops_report_progress,
+ .report_final = hits_drops_report_final,
+};
+
+const struct bench bench_rename_rawtp = {
+ .name = "rename-rawtp",
+ .validate = validate,
+ .setup = setup_rawtp,
+ .producer_thread = producer,
+ .consumer_thread = consumer,
+ .measure = measure,
+ .report_progress = hits_drops_report_progress,
+ .report_final = hits_drops_report_final,
+};
+
+const struct bench bench_rename_fentry = {
+ .name = "rename-fentry",
+ .validate = validate,
+ .setup = setup_fentry,
+ .producer_thread = producer,
+ .consumer_thread = consumer,
+ .measure = measure,
+ .report_progress = hits_drops_report_progress,
+ .report_final = hits_drops_report_final,
+};
+
+const struct bench bench_rename_fexit = {
+ .name = "rename-fexit",
+ .validate = validate,
+ .setup = setup_fexit,
+ .producer_thread = producer,
+ .consumer_thread = consumer,
+ .measure = measure,
+ .report_progress = hits_drops_report_progress,
+ .report_final = hits_drops_report_final,
+};
+
+const struct bench bench_rename_fmodret = {
+ .name = "rename-fmodret",
+ .validate = validate,
+ .setup = setup_fmodret,
+ .producer_thread = producer,
+ .consumer_thread = consumer,
+ .measure = measure,
+ .report_progress = hits_drops_report_progress,
+ .report_final = hits_drops_report_final,
+};
diff --git a/tools/testing/selftests/bpf/benchs/bench_ringbufs.c b/tools/testing/selftests/bpf/benchs/bench_ringbufs.c
new file mode 100644
index 000000000000..da87c7f31891
--- /dev/null
+++ b/tools/testing/selftests/bpf/benchs/bench_ringbufs.c
@@ -0,0 +1,566 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+#include <asm/barrier.h>
+#include <linux/perf_event.h>
+#include <linux/ring_buffer.h>
+#include <sys/epoll.h>
+#include <sys/mman.h>
+#include <argp.h>
+#include <stdlib.h>
+#include "bench.h"
+#include "ringbuf_bench.skel.h"
+#include "perfbuf_bench.skel.h"
+
+static struct {
+ bool back2back;
+ int batch_cnt;
+ bool sampled;
+ int sample_rate;
+ int ringbuf_sz; /* per-ringbuf, in bytes */
+ bool ringbuf_use_output; /* use slower output API */
+ int perfbuf_sz; /* per-CPU size, in pages */
+} args = {
+ .back2back = false,
+ .batch_cnt = 500,
+ .sampled = false,
+ .sample_rate = 500,
+ .ringbuf_sz = 512 * 1024,
+ .ringbuf_use_output = false,
+ .perfbuf_sz = 128,
+};
+
+enum {
+ ARG_RB_BACK2BACK = 2000,
+ ARG_RB_USE_OUTPUT = 2001,
+ ARG_RB_BATCH_CNT = 2002,
+ ARG_RB_SAMPLED = 2003,
+ ARG_RB_SAMPLE_RATE = 2004,
+};
+
+static const struct argp_option opts[] = {
+ { "rb-b2b", ARG_RB_BACK2BACK, NULL, 0, "Back-to-back mode"},
+ { "rb-use-output", ARG_RB_USE_OUTPUT, NULL, 0, "Use bpf_ringbuf_output() instead of bpf_ringbuf_reserve()"},
+ { "rb-batch-cnt", ARG_RB_BATCH_CNT, "CNT", 0, "Set BPF-side record batch count"},
+ { "rb-sampled", ARG_RB_SAMPLED, NULL, 0, "Notification sampling"},
+ { "rb-sample-rate", ARG_RB_SAMPLE_RATE, "RATE", 0, "Notification sample rate"},
+ {},
+};
+
+static error_t parse_arg(int key, char *arg, struct argp_state *state)
+{
+ switch (key) {
+ case ARG_RB_BACK2BACK:
+ args.back2back = true;
+ break;
+ case ARG_RB_USE_OUTPUT:
+ args.ringbuf_use_output = true;
+ break;
+ case ARG_RB_BATCH_CNT:
+ args.batch_cnt = strtol(arg, NULL, 10);
+ if (args.batch_cnt < 0) {
+ fprintf(stderr, "Invalid batch count.");
+ argp_usage(state);
+ }
+ break;
+ case ARG_RB_SAMPLED:
+ args.sampled = true;
+ break;
+ case ARG_RB_SAMPLE_RATE:
+ args.sample_rate = strtol(arg, NULL, 10);
+ if (args.sample_rate < 0) {
+ fprintf(stderr, "Invalid perfbuf sample rate.");
+ argp_usage(state);
+ }
+ break;
+ default:
+ return ARGP_ERR_UNKNOWN;
+ }
+ return 0;
+}
+
+/* exported into benchmark runner */
+const struct argp bench_ringbufs_argp = {
+ .options = opts,
+ .parser = parse_arg,
+};
+
+/* RINGBUF-LIBBPF benchmark */
+
+static struct counter buf_hits;
+
+static inline void bufs_trigger_batch()
+{
+ (void)syscall(__NR_getpgid);
+}
+
+static void bufs_validate()
+{
+ if (env.consumer_cnt != 1) {
+ fprintf(stderr, "rb-libbpf benchmark doesn't support multi-consumer!\n");
+ exit(1);
+ }
+
+ if (args.back2back && env.producer_cnt > 1) {
+ fprintf(stderr, "back-to-back mode makes sense only for single-producer case!\n");
+ exit(1);
+ }
+}
+
+static void *bufs_sample_producer(void *input)
+{
+ if (args.back2back) {
+ /* initial batch to get everything started */
+ bufs_trigger_batch();
+ return NULL;
+ }
+
+ while (true)
+ bufs_trigger_batch();
+ return NULL;
+}
+
+static struct ringbuf_libbpf_ctx {
+ struct ringbuf_bench *skel;
+ struct ring_buffer *ringbuf;
+} ringbuf_libbpf_ctx;
+
+static void ringbuf_libbpf_measure(struct bench_res *res)
+{
+ struct ringbuf_libbpf_ctx *ctx = &ringbuf_libbpf_ctx;
+
+ res->hits = atomic_swap(&buf_hits.value, 0);
+ res->drops = atomic_swap(&ctx->skel->bss->dropped, 0);
+}
+
+static struct ringbuf_bench *ringbuf_setup_skeleton()
+{
+ struct ringbuf_bench *skel;
+
+ setup_libbpf();
+
+ skel = ringbuf_bench__open();
+ if (!skel) {
+ fprintf(stderr, "failed to open skeleton\n");
+ exit(1);
+ }
+
+ skel->rodata->batch_cnt = args.batch_cnt;
+ skel->rodata->use_output = args.ringbuf_use_output ? 1 : 0;
+
+ if (args.sampled)
+ /* record data + header take 16 bytes */
+ skel->rodata->wakeup_data_size = args.sample_rate * 16;
+
+ bpf_map__resize(skel->maps.ringbuf, args.ringbuf_sz);
+
+ if (ringbuf_bench__load(skel)) {
+ fprintf(stderr, "failed to load skeleton\n");
+ exit(1);
+ }
+
+ return skel;
+}
+
+static int buf_process_sample(void *ctx, void *data, size_t len)
+{
+ atomic_inc(&buf_hits.value);
+ return 0;
+}
+
+static void ringbuf_libbpf_setup()
+{
+ struct ringbuf_libbpf_ctx *ctx = &ringbuf_libbpf_ctx;
+ struct bpf_link *link;
+
+ ctx->skel = ringbuf_setup_skeleton();
+ ctx->ringbuf = ring_buffer__new(bpf_map__fd(ctx->skel->maps.ringbuf),
+ buf_process_sample, NULL, NULL);
+ if (!ctx->ringbuf) {
+ fprintf(stderr, "failed to create ringbuf\n");
+ exit(1);
+ }
+
+ link = bpf_program__attach(ctx->skel->progs.bench_ringbuf);
+ if (IS_ERR(link)) {
+ fprintf(stderr, "failed to attach program!\n");
+ exit(1);
+ }
+}
+
+static void *ringbuf_libbpf_consumer(void *input)
+{
+ struct ringbuf_libbpf_ctx *ctx = &ringbuf_libbpf_ctx;
+
+ while (ring_buffer__poll(ctx->ringbuf, -1) >= 0) {
+ if (args.back2back)
+ bufs_trigger_batch();
+ }
+ fprintf(stderr, "ringbuf polling failed!\n");
+ return NULL;
+}
+
+/* RINGBUF-CUSTOM benchmark */
+struct ringbuf_custom {
+ __u64 *consumer_pos;
+ __u64 *producer_pos;
+ __u64 mask;
+ void *data;
+ int map_fd;
+};
+
+static struct ringbuf_custom_ctx {
+ struct ringbuf_bench *skel;
+ struct ringbuf_custom ringbuf;
+ int epoll_fd;
+ struct epoll_event event;
+} ringbuf_custom_ctx;
+
+static void ringbuf_custom_measure(struct bench_res *res)
+{
+ struct ringbuf_custom_ctx *ctx = &ringbuf_custom_ctx;
+
+ res->hits = atomic_swap(&buf_hits.value, 0);
+ res->drops = atomic_swap(&ctx->skel->bss->dropped, 0);
+}
+
+static void ringbuf_custom_setup()
+{
+ struct ringbuf_custom_ctx *ctx = &ringbuf_custom_ctx;
+ const size_t page_size = getpagesize();
+ struct bpf_link *link;
+ struct ringbuf_custom *r;
+ void *tmp;
+ int err;
+
+ ctx->skel = ringbuf_setup_skeleton();
+
+ ctx->epoll_fd = epoll_create1(EPOLL_CLOEXEC);
+ if (ctx->epoll_fd < 0) {
+ fprintf(stderr, "failed to create epoll fd: %d\n", -errno);
+ exit(1);
+ }
+
+ r = &ctx->ringbuf;
+ r->map_fd = bpf_map__fd(ctx->skel->maps.ringbuf);
+ r->mask = args.ringbuf_sz - 1;
+
+ /* Map writable consumer page */
+ tmp = mmap(NULL, page_size, PROT_READ | PROT_WRITE, MAP_SHARED,
+ r->map_fd, 0);
+ if (tmp == MAP_FAILED) {
+ fprintf(stderr, "failed to mmap consumer page: %d\n", -errno);
+ exit(1);
+ }
+ r->consumer_pos = tmp;
+
+ /* Map read-only producer page and data pages. */
+ tmp = mmap(NULL, page_size + 2 * args.ringbuf_sz, PROT_READ, MAP_SHARED,
+ r->map_fd, page_size);
+ if (tmp == MAP_FAILED) {
+ fprintf(stderr, "failed to mmap data pages: %d\n", -errno);
+ exit(1);
+ }
+ r->producer_pos = tmp;
+ r->data = tmp + page_size;
+
+ ctx->event.events = EPOLLIN;
+ err = epoll_ctl(ctx->epoll_fd, EPOLL_CTL_ADD, r->map_fd, &ctx->event);
+ if (err < 0) {
+ fprintf(stderr, "failed to epoll add ringbuf: %d\n", -errno);
+ exit(1);
+ }
+
+ link = bpf_program__attach(ctx->skel->progs.bench_ringbuf);
+ if (IS_ERR(link)) {
+ fprintf(stderr, "failed to attach program\n");
+ exit(1);
+ }
+}
+
+#define RINGBUF_BUSY_BIT (1 << 31)
+#define RINGBUF_DISCARD_BIT (1 << 30)
+#define RINGBUF_META_LEN 8
+
+static inline int roundup_len(__u32 len)
+{
+ /* clear out top 2 bits */
+ len <<= 2;
+ len >>= 2;
+ /* add length prefix */
+ len += RINGBUF_META_LEN;
+ /* round up to 8 byte alignment */
+ return (len + 7) / 8 * 8;
+}
+
+static void ringbuf_custom_process_ring(struct ringbuf_custom *r)
+{
+ unsigned long cons_pos, prod_pos;
+ int *len_ptr, len;
+ bool got_new_data;
+
+ cons_pos = smp_load_acquire(r->consumer_pos);
+ while (true) {
+ got_new_data = false;
+ prod_pos = smp_load_acquire(r->producer_pos);
+ while (cons_pos < prod_pos) {
+ len_ptr = r->data + (cons_pos & r->mask);
+ len = smp_load_acquire(len_ptr);
+
+ /* sample not committed yet, bail out for now */
+ if (len & RINGBUF_BUSY_BIT)
+ return;
+
+ got_new_data = true;
+ cons_pos += roundup_len(len);
+
+ atomic_inc(&buf_hits.value);
+ }
+ if (got_new_data)
+ smp_store_release(r->consumer_pos, cons_pos);
+ else
+ break;
+ };
+}
+
+static void *ringbuf_custom_consumer(void *input)
+{
+ struct ringbuf_custom_ctx *ctx = &ringbuf_custom_ctx;
+ int cnt;
+
+ do {
+ if (args.back2back)
+ bufs_trigger_batch();
+ cnt = epoll_wait(ctx->epoll_fd, &ctx->event, 1, -1);
+ if (cnt > 0)
+ ringbuf_custom_process_ring(&ctx->ringbuf);
+ } while (cnt >= 0);
+ fprintf(stderr, "ringbuf polling failed!\n");
+ return 0;
+}
+
+/* PERFBUF-LIBBPF benchmark */
+static struct perfbuf_libbpf_ctx {
+ struct perfbuf_bench *skel;
+ struct perf_buffer *perfbuf;
+} perfbuf_libbpf_ctx;
+
+static void perfbuf_measure(struct bench_res *res)
+{
+ struct perfbuf_libbpf_ctx *ctx = &perfbuf_libbpf_ctx;
+
+ res->hits = atomic_swap(&buf_hits.value, 0);
+ res->drops = atomic_swap(&ctx->skel->bss->dropped, 0);
+}
+
+static struct perfbuf_bench *perfbuf_setup_skeleton()
+{
+ struct perfbuf_bench *skel;
+
+ setup_libbpf();
+
+ skel = perfbuf_bench__open();
+ if (!skel) {
+ fprintf(stderr, "failed to open skeleton\n");
+ exit(1);
+ }
+
+ skel->rodata->batch_cnt = args.batch_cnt;
+
+ if (perfbuf_bench__load(skel)) {
+ fprintf(stderr, "failed to load skeleton\n");
+ exit(1);
+ }
+
+ return skel;
+}
+
+static enum bpf_perf_event_ret
+perfbuf_process_sample_raw(void *input_ctx, int cpu,
+ struct perf_event_header *e)
+{
+ switch (e->type) {
+ case PERF_RECORD_SAMPLE:
+ atomic_inc(&buf_hits.value);
+ break;
+ case PERF_RECORD_LOST:
+ break;
+ default:
+ return LIBBPF_PERF_EVENT_ERROR;
+ }
+ return LIBBPF_PERF_EVENT_CONT;
+}
+
+static void perfbuf_libbpf_setup()
+{
+ struct perfbuf_libbpf_ctx *ctx = &perfbuf_libbpf_ctx;
+ struct perf_event_attr attr;
+ struct perf_buffer_raw_opts pb_opts = {
+ .event_cb = perfbuf_process_sample_raw,
+ .ctx = (void *)(long)0,
+ .attr = &attr,
+ };
+ struct bpf_link *link;
+
+ ctx->skel = perfbuf_setup_skeleton();
+
+ memset(&attr, 0, sizeof(attr));
+ attr.config = PERF_COUNT_SW_BPF_OUTPUT,
+ attr.type = PERF_TYPE_SOFTWARE;
+ attr.sample_type = PERF_SAMPLE_RAW;
+ /* notify only every Nth sample */
+ if (args.sampled) {
+ attr.sample_period = args.sample_rate;
+ attr.wakeup_events = args.sample_rate;
+ } else {
+ attr.sample_period = 1;
+ attr.wakeup_events = 1;
+ }
+
+ if (args.sample_rate > args.batch_cnt) {
+ fprintf(stderr, "sample rate %d is too high for given batch count %d\n",
+ args.sample_rate, args.batch_cnt);
+ exit(1);
+ }
+
+ ctx->perfbuf = perf_buffer__new_raw(bpf_map__fd(ctx->skel->maps.perfbuf),
+ args.perfbuf_sz, &pb_opts);
+ if (!ctx->perfbuf) {
+ fprintf(stderr, "failed to create perfbuf\n");
+ exit(1);
+ }
+
+ link = bpf_program__attach(ctx->skel->progs.bench_perfbuf);
+ if (IS_ERR(link)) {
+ fprintf(stderr, "failed to attach program\n");
+ exit(1);
+ }
+}
+
+static void *perfbuf_libbpf_consumer(void *input)
+{
+ struct perfbuf_libbpf_ctx *ctx = &perfbuf_libbpf_ctx;
+
+ while (perf_buffer__poll(ctx->perfbuf, -1) >= 0) {
+ if (args.back2back)
+ bufs_trigger_batch();
+ }
+ fprintf(stderr, "perfbuf polling failed!\n");
+ return NULL;
+}
+
+/* PERFBUF-CUSTOM benchmark */
+
+/* copies of internal libbpf definitions */
+struct perf_cpu_buf {
+ struct perf_buffer *pb;
+ void *base; /* mmap()'ed memory */
+ void *buf; /* for reconstructing segmented data */
+ size_t buf_size;
+ int fd;
+ int cpu;
+ int map_key;
+};
+
+struct perf_buffer {
+ perf_buffer_event_fn event_cb;
+ perf_buffer_sample_fn sample_cb;
+ perf_buffer_lost_fn lost_cb;
+ void *ctx; /* passed into callbacks */
+
+ size_t page_size;
+ size_t mmap_size;
+ struct perf_cpu_buf **cpu_bufs;
+ struct epoll_event *events;
+ int cpu_cnt; /* number of allocated CPU buffers */
+ int epoll_fd; /* perf event FD */
+ int map_fd; /* BPF_MAP_TYPE_PERF_EVENT_ARRAY BPF map FD */
+};
+
+static void *perfbuf_custom_consumer(void *input)
+{
+ struct perfbuf_libbpf_ctx *ctx = &perfbuf_libbpf_ctx;
+ struct perf_buffer *pb = ctx->perfbuf;
+ struct perf_cpu_buf *cpu_buf;
+ struct perf_event_mmap_page *header;
+ size_t mmap_mask = pb->mmap_size - 1;
+ struct perf_event_header *ehdr;
+ __u64 data_head, data_tail;
+ size_t ehdr_size;
+ void *base;
+ int i, cnt;
+
+ while (true) {
+ if (args.back2back)
+ bufs_trigger_batch();
+ cnt = epoll_wait(pb->epoll_fd, pb->events, pb->cpu_cnt, -1);
+ if (cnt <= 0) {
+ fprintf(stderr, "perf epoll failed: %d\n", -errno);
+ exit(1);
+ }
+
+ for (i = 0; i < cnt; ++i) {
+ cpu_buf = pb->events[i].data.ptr;
+ header = cpu_buf->base;
+ base = ((void *)header) + pb->page_size;
+
+ data_head = ring_buffer_read_head(header);
+ data_tail = header->data_tail;
+ while (data_head != data_tail) {
+ ehdr = base + (data_tail & mmap_mask);
+ ehdr_size = ehdr->size;
+
+ if (ehdr->type == PERF_RECORD_SAMPLE)
+ atomic_inc(&buf_hits.value);
+
+ data_tail += ehdr_size;
+ }
+ ring_buffer_write_tail(header, data_tail);
+ }
+ }
+ return NULL;
+}
+
+const struct bench bench_rb_libbpf = {
+ .name = "rb-libbpf",
+ .validate = bufs_validate,
+ .setup = ringbuf_libbpf_setup,
+ .producer_thread = bufs_sample_producer,
+ .consumer_thread = ringbuf_libbpf_consumer,
+ .measure = ringbuf_libbpf_measure,
+ .report_progress = hits_drops_report_progress,
+ .report_final = hits_drops_report_final,
+};
+
+const struct bench bench_rb_custom = {
+ .name = "rb-custom",
+ .validate = bufs_validate,
+ .setup = ringbuf_custom_setup,
+ .producer_thread = bufs_sample_producer,
+ .consumer_thread = ringbuf_custom_consumer,
+ .measure = ringbuf_custom_measure,
+ .report_progress = hits_drops_report_progress,
+ .report_final = hits_drops_report_final,
+};
+
+const struct bench bench_pb_libbpf = {
+ .name = "pb-libbpf",
+ .validate = bufs_validate,
+ .setup = perfbuf_libbpf_setup,
+ .producer_thread = bufs_sample_producer,
+ .consumer_thread = perfbuf_libbpf_consumer,
+ .measure = perfbuf_measure,
+ .report_progress = hits_drops_report_progress,
+ .report_final = hits_drops_report_final,
+};
+
+const struct bench bench_pb_custom = {
+ .name = "pb-custom",
+ .validate = bufs_validate,
+ .setup = perfbuf_libbpf_setup,
+ .producer_thread = bufs_sample_producer,
+ .consumer_thread = perfbuf_custom_consumer,
+ .measure = perfbuf_measure,
+ .report_progress = hits_drops_report_progress,
+ .report_final = hits_drops_report_final,
+};
+
diff --git a/tools/testing/selftests/bpf/benchs/bench_trigger.c b/tools/testing/selftests/bpf/benchs/bench_trigger.c
new file mode 100644
index 000000000000..49c22832f216
--- /dev/null
+++ b/tools/testing/selftests/bpf/benchs/bench_trigger.c
@@ -0,0 +1,167 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+#include "bench.h"
+#include "trigger_bench.skel.h"
+
+/* BPF triggering benchmarks */
+static struct trigger_ctx {
+ struct trigger_bench *skel;
+} ctx;
+
+static struct counter base_hits;
+
+static void trigger_validate()
+{
+ if (env.consumer_cnt != 1) {
+ fprintf(stderr, "benchmark doesn't support multi-consumer!\n");
+ exit(1);
+ }
+}
+
+static void *trigger_base_producer(void *input)
+{
+ while (true) {
+ (void)syscall(__NR_getpgid);
+ atomic_inc(&base_hits.value);
+ }
+ return NULL;
+}
+
+static void trigger_base_measure(struct bench_res *res)
+{
+ res->hits = atomic_swap(&base_hits.value, 0);
+}
+
+static void *trigger_producer(void *input)
+{
+ while (true)
+ (void)syscall(__NR_getpgid);
+ return NULL;
+}
+
+static void trigger_measure(struct bench_res *res)
+{
+ res->hits = atomic_swap(&ctx.skel->bss->hits, 0);
+}
+
+static void setup_ctx()
+{
+ setup_libbpf();
+
+ ctx.skel = trigger_bench__open_and_load();
+ if (!ctx.skel) {
+ fprintf(stderr, "failed to open skeleton\n");
+ exit(1);
+ }
+}
+
+static void attach_bpf(struct bpf_program *prog)
+{
+ struct bpf_link *link;
+
+ link = bpf_program__attach(prog);
+ if (IS_ERR(link)) {
+ fprintf(stderr, "failed to attach program!\n");
+ exit(1);
+ }
+}
+
+static void trigger_tp_setup()
+{
+ setup_ctx();
+ attach_bpf(ctx.skel->progs.bench_trigger_tp);
+}
+
+static void trigger_rawtp_setup()
+{
+ setup_ctx();
+ attach_bpf(ctx.skel->progs.bench_trigger_raw_tp);
+}
+
+static void trigger_kprobe_setup()
+{
+ setup_ctx();
+ attach_bpf(ctx.skel->progs.bench_trigger_kprobe);
+}
+
+static void trigger_fentry_setup()
+{
+ setup_ctx();
+ attach_bpf(ctx.skel->progs.bench_trigger_fentry);
+}
+
+static void trigger_fmodret_setup()
+{
+ setup_ctx();
+ attach_bpf(ctx.skel->progs.bench_trigger_fmodret);
+}
+
+static void *trigger_consumer(void *input)
+{
+ return NULL;
+}
+
+const struct bench bench_trig_base = {
+ .name = "trig-base",
+ .validate = trigger_validate,
+ .producer_thread = trigger_base_producer,
+ .consumer_thread = trigger_consumer,
+ .measure = trigger_base_measure,
+ .report_progress = hits_drops_report_progress,
+ .report_final = hits_drops_report_final,
+};
+
+const struct bench bench_trig_tp = {
+ .name = "trig-tp",
+ .validate = trigger_validate,
+ .setup = trigger_tp_setup,
+ .producer_thread = trigger_producer,
+ .consumer_thread = trigger_consumer,
+ .measure = trigger_measure,
+ .report_progress = hits_drops_report_progress,
+ .report_final = hits_drops_report_final,
+};
+
+const struct bench bench_trig_rawtp = {
+ .name = "trig-rawtp",
+ .validate = trigger_validate,
+ .setup = trigger_rawtp_setup,
+ .producer_thread = trigger_producer,
+ .consumer_thread = trigger_consumer,
+ .measure = trigger_measure,
+ .report_progress = hits_drops_report_progress,
+ .report_final = hits_drops_report_final,
+};
+
+const struct bench bench_trig_kprobe = {
+ .name = "trig-kprobe",
+ .validate = trigger_validate,
+ .setup = trigger_kprobe_setup,
+ .producer_thread = trigger_producer,
+ .consumer_thread = trigger_consumer,
+ .measure = trigger_measure,
+ .report_progress = hits_drops_report_progress,
+ .report_final = hits_drops_report_final,
+};
+
+const struct bench bench_trig_fentry = {
+ .name = "trig-fentry",
+ .validate = trigger_validate,
+ .setup = trigger_fentry_setup,
+ .producer_thread = trigger_producer,
+ .consumer_thread = trigger_consumer,
+ .measure = trigger_measure,
+ .report_progress = hits_drops_report_progress,
+ .report_final = hits_drops_report_final,
+};
+
+const struct bench bench_trig_fmodret = {
+ .name = "trig-fmodret",
+ .validate = trigger_validate,
+ .setup = trigger_fmodret_setup,
+ .producer_thread = trigger_producer,
+ .consumer_thread = trigger_consumer,
+ .measure = trigger_measure,
+ .report_progress = hits_drops_report_progress,
+ .report_final = hits_drops_report_final,
+};
diff --git a/tools/testing/selftests/bpf/benchs/run_bench_rename.sh b/tools/testing/selftests/bpf/benchs/run_bench_rename.sh
new file mode 100755
index 000000000000..16f774b1cdbe
--- /dev/null
+++ b/tools/testing/selftests/bpf/benchs/run_bench_rename.sh
@@ -0,0 +1,9 @@
+#!/bin/bash
+
+set -eufo pipefail
+
+for i in base kprobe kretprobe rawtp fentry fexit fmodret
+do
+ summary=$(sudo ./bench -w2 -d5 -a rename-$i | tail -n1 | cut -d'(' -f1 | cut -d' ' -f3-)
+ printf "%-10s: %s\n" $i "$summary"
+done
diff --git a/tools/testing/selftests/bpf/benchs/run_bench_ringbufs.sh b/tools/testing/selftests/bpf/benchs/run_bench_ringbufs.sh
new file mode 100755
index 000000000000..af4aa04caba6
--- /dev/null
+++ b/tools/testing/selftests/bpf/benchs/run_bench_ringbufs.sh
@@ -0,0 +1,75 @@
+#!/bin/bash
+
+set -eufo pipefail
+
+RUN_BENCH="sudo ./bench -w3 -d10 -a"
+
+function hits()
+{
+ echo "$*" | sed -E "s/.*hits\s+([0-9]+\.[0-9]+ ± [0-9]+\.[0-9]+M\/s).*/\1/"
+}
+
+function drops()
+{
+ echo "$*" | sed -E "s/.*drops\s+([0-9]+\.[0-9]+ ± [0-9]+\.[0-9]+M\/s).*/\1/"
+}
+
+function header()
+{
+ local len=${#1}
+
+ printf "\n%s\n" "$1"
+ for i in $(seq 1 $len); do printf '='; done
+ printf '\n'
+}
+
+function summarize()
+{
+ bench="$1"
+ summary=$(echo $2 | tail -n1)
+ printf "%-20s %s (drops %s)\n" "$bench" "$(hits $summary)" "$(drops $summary)"
+}
+
+header "Single-producer, parallel producer"
+for b in rb-libbpf rb-custom pb-libbpf pb-custom; do
+ summarize $b "$($RUN_BENCH $b)"
+done
+
+header "Single-producer, parallel producer, sampled notification"
+for b in rb-libbpf rb-custom pb-libbpf pb-custom; do
+ summarize $b "$($RUN_BENCH --rb-sampled $b)"
+done
+
+header "Single-producer, back-to-back mode"
+for b in rb-libbpf rb-custom pb-libbpf pb-custom; do
+ summarize $b "$($RUN_BENCH --rb-b2b $b)"
+ summarize $b-sampled "$($RUN_BENCH --rb-sampled --rb-b2b $b)"
+done
+
+header "Ringbuf back-to-back, effect of sample rate"
+for b in 1 5 10 25 50 100 250 500 1000 2000 3000; do
+ summarize "rb-sampled-$b" "$($RUN_BENCH --rb-b2b --rb-batch-cnt $b --rb-sampled --rb-sample-rate $b rb-custom)"
+done
+header "Perfbuf back-to-back, effect of sample rate"
+for b in 1 5 10 25 50 100 250 500 1000 2000 3000; do
+ summarize "pb-sampled-$b" "$($RUN_BENCH --rb-b2b --rb-batch-cnt $b --rb-sampled --rb-sample-rate $b pb-custom)"
+done
+
+header "Ringbuf back-to-back, reserve+commit vs output"
+summarize "reserve" "$($RUN_BENCH --rb-b2b rb-custom)"
+summarize "output" "$($RUN_BENCH --rb-b2b --rb-use-output rb-custom)"
+
+header "Ringbuf sampled, reserve+commit vs output"
+summarize "reserve-sampled" "$($RUN_BENCH --rb-sampled rb-custom)"
+summarize "output-sampled" "$($RUN_BENCH --rb-sampled --rb-use-output rb-custom)"
+
+header "Single-producer, consumer/producer competing on the same CPU, low batch count"
+for b in rb-libbpf rb-custom pb-libbpf pb-custom; do
+ summarize $b "$($RUN_BENCH --rb-batch-cnt 1 --rb-sample-rate 1 --prod-affinity 0 --cons-affinity 0 $b)"
+done
+
+header "Ringbuf, multi-producer contention"
+for b in 1 2 3 4 8 12 16 20 24 28 32 36 40 44 48 52; do
+ summarize "rb-libbpf nr_prod $b" "$($RUN_BENCH -p$b --rb-batch-cnt 50 rb-libbpf)"
+done
+
diff --git a/tools/testing/selftests/bpf/benchs/run_bench_trigger.sh b/tools/testing/selftests/bpf/benchs/run_bench_trigger.sh
new file mode 100755
index 000000000000..78e83f243294
--- /dev/null
+++ b/tools/testing/selftests/bpf/benchs/run_bench_trigger.sh
@@ -0,0 +1,9 @@
+#!/bin/bash
+
+set -eufo pipefail
+
+for i in base tp rawtp kprobe fentry fmodret
+do
+ summary=$(sudo ./bench -w2 -d5 -a trig-$i | tail -n1 | cut -d'(' -f1 | cut -d' ' -f3-)
+ printf "%-10s: %s\n" $i "$summary"
+done
diff --git a/tools/testing/selftests/bpf/bpf_tcp_helpers.h b/tools/testing/selftests/bpf/bpf_tcp_helpers.h
index 8f21965ffc6c..5bf2fe9b1efa 100644
--- a/tools/testing/selftests/bpf/bpf_tcp_helpers.h
+++ b/tools/testing/selftests/bpf/bpf_tcp_helpers.h
@@ -6,7 +6,7 @@
#include <linux/types.h>
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_core_read.h>
-#include "bpf_trace_helpers.h"
+#include <bpf/bpf_tracing.h>
#define BPF_STRUCT_OPS(name, args...) \
SEC("struct_ops/"#name) \
diff --git a/tools/testing/selftests/bpf/bpf_trace_helpers.h b/tools/testing/selftests/bpf/bpf_trace_helpers.h
deleted file mode 100644
index c6f1354d93fb..000000000000
--- a/tools/testing/selftests/bpf/bpf_trace_helpers.h
+++ /dev/null
@@ -1,120 +0,0 @@
-/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */
-#ifndef __BPF_TRACE_HELPERS_H
-#define __BPF_TRACE_HELPERS_H
-
-#include <bpf/bpf_helpers.h>
-
-#define ___bpf_concat(a, b) a ## b
-#define ___bpf_apply(fn, n) ___bpf_concat(fn, n)
-#define ___bpf_nth(_, _1, _2, _3, _4, _5, _6, _7, _8, _9, _a, _b, _c, N, ...) N
-#define ___bpf_narg(...) \
- ___bpf_nth(_, ##__VA_ARGS__, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0)
-#define ___bpf_empty(...) \
- ___bpf_nth(_, ##__VA_ARGS__, N, N, N, N, N, N, N, N, N, N, 0)
-
-#define ___bpf_ctx_cast0() ctx
-#define ___bpf_ctx_cast1(x) ___bpf_ctx_cast0(), (void *)ctx[0]
-#define ___bpf_ctx_cast2(x, args...) ___bpf_ctx_cast1(args), (void *)ctx[1]
-#define ___bpf_ctx_cast3(x, args...) ___bpf_ctx_cast2(args), (void *)ctx[2]
-#define ___bpf_ctx_cast4(x, args...) ___bpf_ctx_cast3(args), (void *)ctx[3]
-#define ___bpf_ctx_cast5(x, args...) ___bpf_ctx_cast4(args), (void *)ctx[4]
-#define ___bpf_ctx_cast6(x, args...) ___bpf_ctx_cast5(args), (void *)ctx[5]
-#define ___bpf_ctx_cast7(x, args...) ___bpf_ctx_cast6(args), (void *)ctx[6]
-#define ___bpf_ctx_cast8(x, args...) ___bpf_ctx_cast7(args), (void *)ctx[7]
-#define ___bpf_ctx_cast9(x, args...) ___bpf_ctx_cast8(args), (void *)ctx[8]
-#define ___bpf_ctx_cast10(x, args...) ___bpf_ctx_cast9(args), (void *)ctx[9]
-#define ___bpf_ctx_cast11(x, args...) ___bpf_ctx_cast10(args), (void *)ctx[10]
-#define ___bpf_ctx_cast12(x, args...) ___bpf_ctx_cast11(args), (void *)ctx[11]
-#define ___bpf_ctx_cast(args...) \
- ___bpf_apply(___bpf_ctx_cast, ___bpf_narg(args))(args)
-
-/*
- * BPF_PROG is a convenience wrapper for generic tp_btf/fentry/fexit and
- * similar kinds of BPF programs, that accept input arguments as a single
- * pointer to untyped u64 array, where each u64 can actually be a typed
- * pointer or integer of different size. Instead of requring user to write
- * manual casts and work with array elements by index, BPF_PROG macro
- * allows user to declare a list of named and typed input arguments in the
- * same syntax as for normal C function. All the casting is hidden and
- * performed transparently, while user code can just assume working with
- * function arguments of specified type and name.
- *
- * Original raw context argument is preserved as well as 'ctx' argument.
- * This is useful when using BPF helpers that expect original context
- * as one of the parameters (e.g., for bpf_perf_event_output()).
- */
-#define BPF_PROG(name, args...) \
-name(unsigned long long *ctx); \
-static __always_inline typeof(name(0)) \
-____##name(unsigned long long *ctx, ##args); \
-typeof(name(0)) name(unsigned long long *ctx) \
-{ \
- _Pragma("GCC diagnostic push") \
- _Pragma("GCC diagnostic ignored \"-Wint-conversion\"") \
- return ____##name(___bpf_ctx_cast(args)); \
- _Pragma("GCC diagnostic pop") \
-} \
-static __always_inline typeof(name(0)) \
-____##name(unsigned long long *ctx, ##args)
-
-struct pt_regs;
-
-#define ___bpf_kprobe_args0() ctx
-#define ___bpf_kprobe_args1(x) \
- ___bpf_kprobe_args0(), (void *)PT_REGS_PARM1(ctx)
-#define ___bpf_kprobe_args2(x, args...) \
- ___bpf_kprobe_args1(args), (void *)PT_REGS_PARM2(ctx)
-#define ___bpf_kprobe_args3(x, args...) \
- ___bpf_kprobe_args2(args), (void *)PT_REGS_PARM3(ctx)
-#define ___bpf_kprobe_args4(x, args...) \
- ___bpf_kprobe_args3(args), (void *)PT_REGS_PARM4(ctx)
-#define ___bpf_kprobe_args5(x, args...) \
- ___bpf_kprobe_args4(args), (void *)PT_REGS_PARM5(ctx)
-#define ___bpf_kprobe_args(args...) \
- ___bpf_apply(___bpf_kprobe_args, ___bpf_narg(args))(args)
-
-/*
- * BPF_KPROBE serves the same purpose for kprobes as BPF_PROG for
- * tp_btf/fentry/fexit BPF programs. It hides the underlying platform-specific
- * low-level way of getting kprobe input arguments from struct pt_regs, and
- * provides a familiar typed and named function arguments syntax and
- * semantics of accessing kprobe input paremeters.
- *
- * Original struct pt_regs* context is preserved as 'ctx' argument. This might
- * be necessary when using BPF helpers like bpf_perf_event_output().
- */
-#define BPF_KPROBE(name, args...) \
-name(struct pt_regs *ctx); \
-static __always_inline typeof(name(0)) ____##name(struct pt_regs *ctx, ##args);\
-typeof(name(0)) name(struct pt_regs *ctx) \
-{ \
- _Pragma("GCC diagnostic push") \
- _Pragma("GCC diagnostic ignored \"-Wint-conversion\"") \
- return ____##name(___bpf_kprobe_args(args)); \
- _Pragma("GCC diagnostic pop") \
-} \
-static __always_inline typeof(name(0)) ____##name(struct pt_regs *ctx, ##args)
-
-#define ___bpf_kretprobe_args0() ctx
-#define ___bpf_kretprobe_argsN(x, args...) \
- ___bpf_kprobe_args(args), (void *)PT_REGS_RET(ctx)
-#define ___bpf_kretprobe_args(args...) \
- ___bpf_apply(___bpf_kretprobe_args, ___bpf_empty(args))(args)
-
-/*
- * BPF_KRETPROBE is similar to BPF_KPROBE, except, in addition to listing all
- * input kprobe arguments, one last extra argument has to be specified, which
- * captures kprobe return value.
- */
-#define BPF_KRETPROBE(name, args...) \
-name(struct pt_regs *ctx); \
-static __always_inline typeof(name(0)) ____##name(struct pt_regs *ctx, ##args);\
-typeof(name(0)) name(struct pt_regs *ctx) \
-{ \
- _Pragma("GCC diagnostic push") \
- _Pragma("GCC diagnostic ignored \"-Wint-conversion\"") \
- return ____##name(___bpf_kretprobe_args(args)); \
- _Pragma("GCC diagnostic pop") \
-} \
-static __always_inline typeof(name(0)) ____##name(struct pt_regs *ctx, ##args)
-#endif
diff --git a/tools/testing/selftests/bpf/config b/tools/testing/selftests/bpf/config
index 5dc109f4c097..2118e23ac07a 100644
--- a/tools/testing/selftests/bpf/config
+++ b/tools/testing/selftests/bpf/config
@@ -25,6 +25,7 @@ CONFIG_XDP_SOCKETS=y
CONFIG_FTRACE_SYSCALLS=y
CONFIG_IPV6_TUNNEL=y
CONFIG_IPV6_GRE=y
+CONFIG_IPV6_SEG6_BPF=y
CONFIG_NET_FOU=m
CONFIG_NET_FOU_IP_TUNNELS=y
CONFIG_IPV6_FOU=m
@@ -35,3 +36,6 @@ CONFIG_MPLS_ROUTING=m
CONFIG_MPLS_IPTUNNEL=m
CONFIG_IPV6_SIT=m
CONFIG_BPF_JIT=y
+CONFIG_BPF_LSM=y
+CONFIG_SECURITY=y
+CONFIG_LIRC=y
diff --git a/tools/testing/selftests/bpf/map_tests/.gitignore b/tools/testing/selftests/bpf/map_tests/.gitignore
index 45984a364647..89c4a3d37544 100644
--- a/tools/testing/selftests/bpf/map_tests/.gitignore
+++ b/tools/testing/selftests/bpf/map_tests/.gitignore
@@ -1 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0-only
tests.h
diff --git a/tools/testing/selftests/bpf/network_helpers.c b/tools/testing/selftests/bpf/network_helpers.c
new file mode 100644
index 000000000000..e36dd1a1780d
--- /dev/null
+++ b/tools/testing/selftests/bpf/network_helpers.c
@@ -0,0 +1,167 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include <errno.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <string.h>
+#include <unistd.h>
+
+#include <arpa/inet.h>
+
+#include <sys/epoll.h>
+
+#include <linux/err.h>
+#include <linux/in.h>
+#include <linux/in6.h>
+
+#include "bpf_util.h"
+#include "network_helpers.h"
+
+#define clean_errno() (errno == 0 ? "None" : strerror(errno))
+#define log_err(MSG, ...) fprintf(stderr, "(%s:%d: errno: %s) " MSG "\n", \
+ __FILE__, __LINE__, clean_errno(), ##__VA_ARGS__)
+
+struct ipv4_packet pkt_v4 = {
+ .eth.h_proto = __bpf_constant_htons(ETH_P_IP),
+ .iph.ihl = 5,
+ .iph.protocol = IPPROTO_TCP,
+ .iph.tot_len = __bpf_constant_htons(MAGIC_BYTES),
+ .tcp.urg_ptr = 123,
+ .tcp.doff = 5,
+};
+
+struct ipv6_packet pkt_v6 = {
+ .eth.h_proto = __bpf_constant_htons(ETH_P_IPV6),
+ .iph.nexthdr = IPPROTO_TCP,
+ .iph.payload_len = __bpf_constant_htons(MAGIC_BYTES),
+ .tcp.urg_ptr = 123,
+ .tcp.doff = 5,
+};
+
+int start_server_with_port(int family, int type, __u16 port)
+{
+ struct sockaddr_storage addr = {};
+ socklen_t len;
+ int fd;
+
+ if (family == AF_INET) {
+ struct sockaddr_in *sin = (void *)&addr;
+
+ sin->sin_family = AF_INET;
+ sin->sin_port = htons(port);
+ len = sizeof(*sin);
+ } else {
+ struct sockaddr_in6 *sin6 = (void *)&addr;
+
+ sin6->sin6_family = AF_INET6;
+ sin6->sin6_port = htons(port);
+ len = sizeof(*sin6);
+ }
+
+ fd = socket(family, type | SOCK_NONBLOCK, 0);
+ if (fd < 0) {
+ log_err("Failed to create server socket");
+ return -1;
+ }
+
+ if (bind(fd, (const struct sockaddr *)&addr, len) < 0) {
+ log_err("Failed to bind socket");
+ close(fd);
+ return -1;
+ }
+
+ if (type == SOCK_STREAM) {
+ if (listen(fd, 1) < 0) {
+ log_err("Failed to listed on socket");
+ close(fd);
+ return -1;
+ }
+ }
+
+ return fd;
+}
+
+int start_server(int family, int type)
+{
+ return start_server_with_port(family, type, 0);
+}
+
+static const struct timeval timeo_sec = { .tv_sec = 3 };
+static const size_t timeo_optlen = sizeof(timeo_sec);
+
+int connect_to_fd(int family, int type, int server_fd)
+{
+ int fd, save_errno;
+
+ fd = socket(family, type, 0);
+ if (fd < 0) {
+ log_err("Failed to create client socket");
+ return -1;
+ }
+
+ if (connect_fd_to_fd(fd, server_fd) < 0 && errno != EINPROGRESS) {
+ save_errno = errno;
+ close(fd);
+ errno = save_errno;
+ return -1;
+ }
+
+ return fd;
+}
+
+int connect_fd_to_fd(int client_fd, int server_fd)
+{
+ struct sockaddr_storage addr;
+ socklen_t len = sizeof(addr);
+ int save_errno;
+
+ if (setsockopt(client_fd, SOL_SOCKET, SO_RCVTIMEO, &timeo_sec,
+ timeo_optlen)) {
+ log_err("Failed to set SO_RCVTIMEO");
+ return -1;
+ }
+
+ if (getsockname(server_fd, (struct sockaddr *)&addr, &len)) {
+ log_err("Failed to get server addr");
+ return -1;
+ }
+
+ if (connect(client_fd, (const struct sockaddr *)&addr, len) < 0) {
+ if (errno != EINPROGRESS) {
+ save_errno = errno;
+ log_err("Failed to connect to server");
+ errno = save_errno;
+ }
+ return -1;
+ }
+
+ return 0;
+}
+
+int connect_wait(int fd)
+{
+ struct epoll_event ev = {}, events[2];
+ int timeout_ms = 1000;
+ int efd, nfd;
+
+ efd = epoll_create1(EPOLL_CLOEXEC);
+ if (efd < 0) {
+ log_err("Failed to open epoll fd");
+ return -1;
+ }
+
+ ev.events = EPOLLRDHUP | EPOLLOUT;
+ ev.data.fd = fd;
+
+ if (epoll_ctl(efd, EPOLL_CTL_ADD, fd, &ev) < 0) {
+ log_err("Failed to register fd=%d on epoll fd=%d", fd, efd);
+ close(efd);
+ return -1;
+ }
+
+ nfd = epoll_wait(efd, events, ARRAY_SIZE(events), timeout_ms);
+ if (nfd < 0)
+ log_err("Failed to wait for I/O event on epoll fd=%d", efd);
+
+ close(efd);
+ return nfd;
+}
diff --git a/tools/testing/selftests/bpf/network_helpers.h b/tools/testing/selftests/bpf/network_helpers.h
new file mode 100644
index 000000000000..6a8009605670
--- /dev/null
+++ b/tools/testing/selftests/bpf/network_helpers.h
@@ -0,0 +1,42 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __NETWORK_HELPERS_H
+#define __NETWORK_HELPERS_H
+#include <sys/socket.h>
+#include <sys/types.h>
+#include <linux/types.h>
+typedef __u16 __sum16;
+#include <linux/if_ether.h>
+#include <linux/if_packet.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+#include <netinet/tcp.h>
+#include <bpf/bpf_endian.h>
+
+#define MAGIC_VAL 0x1234
+#define NUM_ITER 100000
+#define VIP_NUM 5
+#define MAGIC_BYTES 123
+
+/* ipv4 test vector */
+struct ipv4_packet {
+ struct ethhdr eth;
+ struct iphdr iph;
+ struct tcphdr tcp;
+} __packed;
+extern struct ipv4_packet pkt_v4;
+
+/* ipv6 test vector */
+struct ipv6_packet {
+ struct ethhdr eth;
+ struct ipv6hdr iph;
+ struct tcphdr tcp;
+} __packed;
+extern struct ipv6_packet pkt_v6;
+
+int start_server(int family, int type);
+int start_server_with_port(int family, int type, __u16 port);
+int connect_to_fd(int family, int type, int server_fd);
+int connect_fd_to_fd(int client_fd, int server_fd);
+int connect_wait(int client_fd);
+
+#endif
diff --git a/tools/testing/selftests/bpf/prog_tests/.gitignore b/tools/testing/selftests/bpf/prog_tests/.gitignore
index 45984a364647..89c4a3d37544 100644
--- a/tools/testing/selftests/bpf/prog_tests/.gitignore
+++ b/tools/testing/selftests/bpf/prog_tests/.gitignore
@@ -1 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0-only
tests.h
diff --git a/tools/testing/selftests/bpf/test_align.c b/tools/testing/selftests/bpf/prog_tests/align.c
index 0262f7b374f9..c548aded6585 100644
--- a/tools/testing/selftests/bpf/test_align.c
+++ b/tools/testing/selftests/bpf/prog_tests/align.c
@@ -1,24 +1,5 @@
-#include <asm/types.h>
-#include <linux/types.h>
-#include <stdint.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <unistd.h>
-#include <errno.h>
-#include <string.h>
-#include <stddef.h>
-#include <stdbool.h>
-
-#include <linux/unistd.h>
-#include <linux/filter.h>
-#include <linux/bpf_perf_event.h>
-#include <linux/bpf.h>
-
-#include <bpf/bpf.h>
-
-#include "../../../include/linux/filter.h"
-#include "bpf_rlimit.h"
-#include "bpf_util.h"
+// SPDX-License-Identifier: GPL-2.0
+#include <test_progs.h>
#define MAX_INSNS 512
#define MAX_MATCHES 16
@@ -359,15 +340,15 @@ static struct bpf_align_test tests[] = {
* is still (4n), fixed offset is not changed.
* Also, we create a new reg->id.
*/
- {29, "R5_w=pkt(id=4,off=18,r=0,umax_value=2040,var_off=(0x0; 0x7fc))"},
+ {29, "R5_w=pkt(id=4,off=18,r=0,umax_value=2040,var_off=(0x0; 0x7fc)"},
/* At the time the word size load is performed from R5,
* its total fixed offset is NET_IP_ALIGN + reg->off (18)
* which is 20. Then the variable offset is (4n), so
* the total offset is 4-byte aligned and meets the
* load's requirements.
*/
- {33, "R4=pkt(id=4,off=22,r=22,umax_value=2040,var_off=(0x0; 0x7fc))"},
- {33, "R5=pkt(id=4,off=18,r=22,umax_value=2040,var_off=(0x0; 0x7fc))"},
+ {33, "R4=pkt(id=4,off=22,r=22,umax_value=2040,var_off=(0x0; 0x7fc)"},
+ {33, "R5=pkt(id=4,off=18,r=22,umax_value=2040,var_off=(0x0; 0x7fc)"},
},
},
{
@@ -410,15 +391,15 @@ static struct bpf_align_test tests[] = {
/* Adding 14 makes R6 be (4n+2) */
{9, "R6_w=inv(id=0,umin_value=14,umax_value=1034,var_off=(0x2; 0x7fc))"},
/* Packet pointer has (4n+2) offset */
- {11, "R5_w=pkt(id=1,off=0,r=0,umin_value=14,umax_value=1034,var_off=(0x2; 0x7fc))"},
- {13, "R4=pkt(id=1,off=4,r=0,umin_value=14,umax_value=1034,var_off=(0x2; 0x7fc))"},
+ {11, "R5_w=pkt(id=1,off=0,r=0,umin_value=14,umax_value=1034,var_off=(0x2; 0x7fc)"},
+ {13, "R4=pkt(id=1,off=4,r=0,umin_value=14,umax_value=1034,var_off=(0x2; 0x7fc)"},
/* At the time the word size load is performed from R5,
* its total fixed offset is NET_IP_ALIGN + reg->off (0)
* which is 2. Then the variable offset is (4n+2), so
* the total offset is 4-byte aligned and meets the
* load's requirements.
*/
- {15, "R5=pkt(id=1,off=0,r=4,umin_value=14,umax_value=1034,var_off=(0x2; 0x7fc))"},
+ {15, "R5=pkt(id=1,off=0,r=4,umin_value=14,umax_value=1034,var_off=(0x2; 0x7fc)"},
/* Newly read value in R6 was shifted left by 2, so has
* known alignment of 4.
*/
@@ -426,15 +407,15 @@ static struct bpf_align_test tests[] = {
/* Added (4n) to packet pointer's (4n+2) var_off, giving
* another (4n+2).
*/
- {19, "R5_w=pkt(id=2,off=0,r=0,umin_value=14,umax_value=2054,var_off=(0x2; 0xffc))"},
- {21, "R4=pkt(id=2,off=4,r=0,umin_value=14,umax_value=2054,var_off=(0x2; 0xffc))"},
+ {19, "R5_w=pkt(id=2,off=0,r=0,umin_value=14,umax_value=2054,var_off=(0x2; 0xffc)"},
+ {21, "R4=pkt(id=2,off=4,r=0,umin_value=14,umax_value=2054,var_off=(0x2; 0xffc)"},
/* At the time the word size load is performed from R5,
* its total fixed offset is NET_IP_ALIGN + reg->off (0)
* which is 2. Then the variable offset is (4n+2), so
* the total offset is 4-byte aligned and meets the
* load's requirements.
*/
- {23, "R5=pkt(id=2,off=0,r=4,umin_value=14,umax_value=2054,var_off=(0x2; 0xffc))"},
+ {23, "R5=pkt(id=2,off=0,r=4,umin_value=14,umax_value=2054,var_off=(0x2; 0xffc)"},
},
},
{
@@ -469,16 +450,16 @@ static struct bpf_align_test tests[] = {
.matches = {
{4, "R5_w=pkt_end(id=0,off=0,imm=0)"},
/* (ptr - ptr) << 2 == unknown, (4n) */
- {6, "R5_w=inv(id=0,smax_value=9223372036854775804,umax_value=18446744073709551612,var_off=(0x0; 0xfffffffffffffffc))"},
+ {6, "R5_w=inv(id=0,smax_value=9223372036854775804,umax_value=18446744073709551612,var_off=(0x0; 0xfffffffffffffffc)"},
/* (4n) + 14 == (4n+2). We blow our bounds, because
* the add could overflow.
*/
- {7, "R5_w=inv(id=0,var_off=(0x2; 0xfffffffffffffffc))"},
+ {7, "R5_w=inv(id=0,smin_value=-9223372036854775806,smax_value=9223372036854775806,umin_value=2,umax_value=18446744073709551614,var_off=(0x2; 0xfffffffffffffffc)"},
/* Checked s>=0 */
- {9, "R5=inv(id=0,umin_value=2,umax_value=9223372036854775806,var_off=(0x2; 0x7ffffffffffffffc))"},
+ {9, "R5=inv(id=0,umin_value=2,umax_value=9223372034707292158,var_off=(0x2; 0x7fffffff7ffffffc)"},
/* packet pointer + nonnegative (4n+2) */
- {11, "R6_w=pkt(id=1,off=0,r=0,umin_value=2,umax_value=9223372036854775806,var_off=(0x2; 0x7ffffffffffffffc))"},
- {13, "R4_w=pkt(id=1,off=4,r=0,umin_value=2,umax_value=9223372036854775806,var_off=(0x2; 0x7ffffffffffffffc))"},
+ {11, "R6_w=pkt(id=1,off=0,r=0,umin_value=2,umax_value=9223372034707292158,var_off=(0x2; 0x7fffffff7ffffffc)"},
+ {13, "R4_w=pkt(id=1,off=4,r=0,umin_value=2,umax_value=9223372034707292158,var_off=(0x2; 0x7fffffff7ffffffc)"},
/* NET_IP_ALIGN + (4n+2) == (4n), alignment is fine.
* We checked the bounds, but it might have been able
* to overflow if the packet pointer started in the
@@ -486,7 +467,7 @@ static struct bpf_align_test tests[] = {
* So we did not get a 'range' on R6, and the access
* attempt will fail.
*/
- {15, "R6_w=pkt(id=1,off=0,r=0,umin_value=2,umax_value=9223372036854775806,var_off=(0x2; 0x7ffffffffffffffc))"},
+ {15, "R6_w=pkt(id=1,off=0,r=0,umin_value=2,umax_value=9223372034707292158,var_off=(0x2; 0x7fffffff7ffffffc)"},
}
},
{
@@ -528,7 +509,7 @@ static struct bpf_align_test tests[] = {
/* New unknown value in R7 is (4n) */
{11, "R7_w=inv(id=0,umax_value=1020,var_off=(0x0; 0x3fc))"},
/* Subtracting it from R6 blows our unsigned bounds */
- {12, "R6=inv(id=0,smin_value=-1006,smax_value=1034,var_off=(0x2; 0xfffffffffffffffc))"},
+ {12, "R6=inv(id=0,smin_value=-1006,smax_value=1034,umin_value=2,umax_value=18446744073709551614,var_off=(0x2; 0xfffffffffffffffc)"},
/* Checked s>= 0 */
{14, "R6=inv(id=0,umin_value=2,umax_value=1034,var_off=(0x2; 0x7fc))"},
/* At the time the word size load is performed from R5,
@@ -537,7 +518,8 @@ static struct bpf_align_test tests[] = {
* the total offset is 4-byte aligned and meets the
* load's requirements.
*/
- {20, "R5=pkt(id=1,off=0,r=4,umin_value=2,umax_value=1034,var_off=(0x2; 0x7fc))"},
+ {20, "R5=pkt(id=1,off=0,r=4,umin_value=2,umax_value=1034,var_off=(0x2; 0x7fc)"},
+
},
},
{
@@ -579,18 +561,18 @@ static struct bpf_align_test tests[] = {
/* Adding 14 makes R6 be (4n+2) */
{11, "R6_w=inv(id=0,umin_value=14,umax_value=74,var_off=(0x2; 0x7c))"},
/* Subtracting from packet pointer overflows ubounds */
- {13, "R5_w=pkt(id=1,off=0,r=8,umin_value=18446744073709551542,umax_value=18446744073709551602,var_off=(0xffffffffffffff82; 0x7c))"},
+ {13, "R5_w=pkt(id=1,off=0,r=8,umin_value=18446744073709551542,umax_value=18446744073709551602,var_off=(0xffffffffffffff82; 0x7c)"},
/* New unknown value in R7 is (4n), >= 76 */
{15, "R7_w=inv(id=0,umin_value=76,umax_value=1096,var_off=(0x0; 0x7fc))"},
/* Adding it to packet pointer gives nice bounds again */
- {16, "R5_w=pkt(id=2,off=0,r=0,umin_value=2,umax_value=1082,var_off=(0x2; 0x7fc))"},
+ {16, "R5_w=pkt(id=2,off=0,r=0,umin_value=2,umax_value=1082,var_off=(0x2; 0xfffffffc)"},
/* At the time the word size load is performed from R5,
* its total fixed offset is NET_IP_ALIGN + reg->off (0)
* which is 2. Then the variable offset is (4n+2), so
* the total offset is 4-byte aligned and meets the
* load's requirements.
*/
- {20, "R5=pkt(id=2,off=0,r=4,umin_value=2,umax_value=1082,var_off=(0x2; 0x7fc))"},
+ {20, "R5=pkt(id=2,off=0,r=4,umin_value=2,umax_value=1082,var_off=(0x2; 0xfffffffc)"},
},
},
};
@@ -669,51 +651,16 @@ static int do_test_single(struct bpf_align_test *test)
return ret;
}
-static int do_test(unsigned int from, unsigned int to)
+void test_align(void)
{
- int all_pass = 0;
- int all_fail = 0;
unsigned int i;
- for (i = from; i < to; i++) {
+ for (i = 0; i < ARRAY_SIZE(tests); i++) {
struct bpf_align_test *test = &tests[i];
- int fail;
- printf("Test %3d: %s ... ",
- i, test->descr);
- fail = do_test_single(test);
- if (fail) {
- all_fail++;
- printf("FAIL\n");
- } else {
- all_pass++;
- printf("PASS\n");
- }
- }
- printf("Results: %d pass %d fail\n",
- all_pass, all_fail);
- return all_fail ? EXIT_FAILURE : EXIT_SUCCESS;
-}
+ if (!test__start_subtest(test->descr))
+ continue;
-int main(int argc, char **argv)
-{
- unsigned int from = 0, to = ARRAY_SIZE(tests);
-
- if (argc == 3) {
- unsigned int l = atoi(argv[argc - 2]);
- unsigned int u = atoi(argv[argc - 1]);
-
- if (l < to && u < to) {
- from = l;
- to = u + 1;
- }
- } else if (argc == 2) {
- unsigned int t = atoi(argv[argc - 1]);
-
- if (t < to) {
- from = t;
- to = t + 1;
- }
+ CHECK_FAIL(do_test_single(test));
}
- return do_test(from, to);
}
diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
new file mode 100644
index 000000000000..87c29dde1cf9
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
@@ -0,0 +1,409 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+#include <test_progs.h>
+#include "bpf_iter_ipv6_route.skel.h"
+#include "bpf_iter_netlink.skel.h"
+#include "bpf_iter_bpf_map.skel.h"
+#include "bpf_iter_task.skel.h"
+#include "bpf_iter_task_file.skel.h"
+#include "bpf_iter_test_kern1.skel.h"
+#include "bpf_iter_test_kern2.skel.h"
+#include "bpf_iter_test_kern3.skel.h"
+#include "bpf_iter_test_kern4.skel.h"
+
+static int duration;
+
+static void test_btf_id_or_null(void)
+{
+ struct bpf_iter_test_kern3 *skel;
+
+ skel = bpf_iter_test_kern3__open_and_load();
+ if (CHECK(skel, "bpf_iter_test_kern3__open_and_load",
+ "skeleton open_and_load unexpectedly succeeded\n")) {
+ bpf_iter_test_kern3__destroy(skel);
+ return;
+ }
+}
+
+static void do_dummy_read(struct bpf_program *prog)
+{
+ struct bpf_link *link;
+ char buf[16] = {};
+ int iter_fd, len;
+
+ link = bpf_program__attach_iter(prog, NULL);
+ if (CHECK(IS_ERR(link), "attach_iter", "attach_iter failed\n"))
+ return;
+
+ iter_fd = bpf_iter_create(bpf_link__fd(link));
+ if (CHECK(iter_fd < 0, "create_iter", "create_iter failed\n"))
+ goto free_link;
+
+ /* not check contents, but ensure read() ends without error */
+ while ((len = read(iter_fd, buf, sizeof(buf))) > 0)
+ ;
+ CHECK(len < 0, "read", "read failed: %s\n", strerror(errno));
+
+ close(iter_fd);
+
+free_link:
+ bpf_link__destroy(link);
+}
+
+static void test_ipv6_route(void)
+{
+ struct bpf_iter_ipv6_route *skel;
+
+ skel = bpf_iter_ipv6_route__open_and_load();
+ if (CHECK(!skel, "bpf_iter_ipv6_route__open_and_load",
+ "skeleton open_and_load failed\n"))
+ return;
+
+ do_dummy_read(skel->progs.dump_ipv6_route);
+
+ bpf_iter_ipv6_route__destroy(skel);
+}
+
+static void test_netlink(void)
+{
+ struct bpf_iter_netlink *skel;
+
+ skel = bpf_iter_netlink__open_and_load();
+ if (CHECK(!skel, "bpf_iter_netlink__open_and_load",
+ "skeleton open_and_load failed\n"))
+ return;
+
+ do_dummy_read(skel->progs.dump_netlink);
+
+ bpf_iter_netlink__destroy(skel);
+}
+
+static void test_bpf_map(void)
+{
+ struct bpf_iter_bpf_map *skel;
+
+ skel = bpf_iter_bpf_map__open_and_load();
+ if (CHECK(!skel, "bpf_iter_bpf_map__open_and_load",
+ "skeleton open_and_load failed\n"))
+ return;
+
+ do_dummy_read(skel->progs.dump_bpf_map);
+
+ bpf_iter_bpf_map__destroy(skel);
+}
+
+static void test_task(void)
+{
+ struct bpf_iter_task *skel;
+
+ skel = bpf_iter_task__open_and_load();
+ if (CHECK(!skel, "bpf_iter_task__open_and_load",
+ "skeleton open_and_load failed\n"))
+ return;
+
+ do_dummy_read(skel->progs.dump_task);
+
+ bpf_iter_task__destroy(skel);
+}
+
+static void test_task_file(void)
+{
+ struct bpf_iter_task_file *skel;
+
+ skel = bpf_iter_task_file__open_and_load();
+ if (CHECK(!skel, "bpf_iter_task_file__open_and_load",
+ "skeleton open_and_load failed\n"))
+ return;
+
+ do_dummy_read(skel->progs.dump_task_file);
+
+ bpf_iter_task_file__destroy(skel);
+}
+
+/* The expected string is less than 16 bytes */
+static int do_read_with_fd(int iter_fd, const char *expected,
+ bool read_one_char)
+{
+ int err = -1, len, read_buf_len, start;
+ char buf[16] = {};
+
+ read_buf_len = read_one_char ? 1 : 16;
+ start = 0;
+ while ((len = read(iter_fd, buf + start, read_buf_len)) > 0) {
+ start += len;
+ if (CHECK(start >= 16, "read", "read len %d\n", len))
+ return -1;
+ read_buf_len = read_one_char ? 1 : 16 - start;
+ }
+ if (CHECK(len < 0, "read", "read failed: %s\n", strerror(errno)))
+ return -1;
+
+ err = strcmp(buf, expected);
+ if (CHECK(err, "read", "incorrect read result: buf %s, expected %s\n",
+ buf, expected))
+ return -1;
+
+ return 0;
+}
+
+static void test_anon_iter(bool read_one_char)
+{
+ struct bpf_iter_test_kern1 *skel;
+ struct bpf_link *link;
+ int iter_fd, err;
+
+ skel = bpf_iter_test_kern1__open_and_load();
+ if (CHECK(!skel, "bpf_iter_test_kern1__open_and_load",
+ "skeleton open_and_load failed\n"))
+ return;
+
+ err = bpf_iter_test_kern1__attach(skel);
+ if (CHECK(err, "bpf_iter_test_kern1__attach",
+ "skeleton attach failed\n")) {
+ goto out;
+ }
+
+ link = skel->links.dump_task;
+ iter_fd = bpf_iter_create(bpf_link__fd(link));
+ if (CHECK(iter_fd < 0, "create_iter", "create_iter failed\n"))
+ goto out;
+
+ do_read_with_fd(iter_fd, "abcd", read_one_char);
+ close(iter_fd);
+
+out:
+ bpf_iter_test_kern1__destroy(skel);
+}
+
+static int do_read(const char *path, const char *expected)
+{
+ int err, iter_fd;
+
+ iter_fd = open(path, O_RDONLY);
+ if (CHECK(iter_fd < 0, "open", "open %s failed: %s\n",
+ path, strerror(errno)))
+ return -1;
+
+ err = do_read_with_fd(iter_fd, expected, false);
+ close(iter_fd);
+ return err;
+}
+
+static void test_file_iter(void)
+{
+ const char *path = "/sys/fs/bpf/bpf_iter_test1";
+ struct bpf_iter_test_kern1 *skel1;
+ struct bpf_iter_test_kern2 *skel2;
+ struct bpf_link *link;
+ int err;
+
+ skel1 = bpf_iter_test_kern1__open_and_load();
+ if (CHECK(!skel1, "bpf_iter_test_kern1__open_and_load",
+ "skeleton open_and_load failed\n"))
+ return;
+
+ link = bpf_program__attach_iter(skel1->progs.dump_task, NULL);
+ if (CHECK(IS_ERR(link), "attach_iter", "attach_iter failed\n"))
+ goto out;
+
+ /* unlink this path if it exists. */
+ unlink(path);
+
+ err = bpf_link__pin(link, path);
+ if (CHECK(err, "pin_iter", "pin_iter to %s failed: %d\n", path, err))
+ goto free_link;
+
+ err = do_read(path, "abcd");
+ if (err)
+ goto unlink_path;
+
+ /* file based iterator seems working fine. Let us a link update
+ * of the underlying link and `cat` the iterator again, its content
+ * should change.
+ */
+ skel2 = bpf_iter_test_kern2__open_and_load();
+ if (CHECK(!skel2, "bpf_iter_test_kern2__open_and_load",
+ "skeleton open_and_load failed\n"))
+ goto unlink_path;
+
+ err = bpf_link__update_program(link, skel2->progs.dump_task);
+ if (CHECK(err, "update_prog", "update_prog failed\n"))
+ goto destroy_skel2;
+
+ do_read(path, "ABCD");
+
+destroy_skel2:
+ bpf_iter_test_kern2__destroy(skel2);
+unlink_path:
+ unlink(path);
+free_link:
+ bpf_link__destroy(link);
+out:
+ bpf_iter_test_kern1__destroy(skel1);
+}
+
+static void test_overflow(bool test_e2big_overflow, bool ret1)
+{
+ __u32 map_info_len, total_read_len, expected_read_len;
+ int err, iter_fd, map1_fd, map2_fd, len;
+ struct bpf_map_info map_info = {};
+ struct bpf_iter_test_kern4 *skel;
+ struct bpf_link *link;
+ __u32 page_size;
+ char *buf;
+
+ skel = bpf_iter_test_kern4__open();
+ if (CHECK(!skel, "bpf_iter_test_kern4__open",
+ "skeleton open failed\n"))
+ return;
+
+ /* create two maps: bpf program will only do bpf_seq_write
+ * for these two maps. The goal is one map output almost
+ * fills seq_file buffer and then the other will trigger
+ * overflow and needs restart.
+ */
+ map1_fd = bpf_create_map(BPF_MAP_TYPE_ARRAY, 4, 8, 1, 0);
+ if (CHECK(map1_fd < 0, "bpf_create_map",
+ "map_creation failed: %s\n", strerror(errno)))
+ goto out;
+ map2_fd = bpf_create_map(BPF_MAP_TYPE_ARRAY, 4, 8, 1, 0);
+ if (CHECK(map2_fd < 0, "bpf_create_map",
+ "map_creation failed: %s\n", strerror(errno)))
+ goto free_map1;
+
+ /* bpf_seq_printf kernel buffer is one page, so one map
+ * bpf_seq_write will mostly fill it, and the other map
+ * will partially fill and then trigger overflow and need
+ * bpf_seq_read restart.
+ */
+ page_size = sysconf(_SC_PAGE_SIZE);
+
+ if (test_e2big_overflow) {
+ skel->rodata->print_len = (page_size + 8) / 8;
+ expected_read_len = 2 * (page_size + 8);
+ } else if (!ret1) {
+ skel->rodata->print_len = (page_size - 8) / 8;
+ expected_read_len = 2 * (page_size - 8);
+ } else {
+ skel->rodata->print_len = 1;
+ expected_read_len = 2 * 8;
+ }
+ skel->rodata->ret1 = ret1;
+
+ if (CHECK(bpf_iter_test_kern4__load(skel),
+ "bpf_iter_test_kern4__load", "skeleton load failed\n"))
+ goto free_map2;
+
+ /* setup filtering map_id in bpf program */
+ map_info_len = sizeof(map_info);
+ err = bpf_obj_get_info_by_fd(map1_fd, &map_info, &map_info_len);
+ if (CHECK(err, "get_map_info", "get map info failed: %s\n",
+ strerror(errno)))
+ goto free_map2;
+ skel->bss->map1_id = map_info.id;
+
+ err = bpf_obj_get_info_by_fd(map2_fd, &map_info, &map_info_len);
+ if (CHECK(err, "get_map_info", "get map info failed: %s\n",
+ strerror(errno)))
+ goto free_map2;
+ skel->bss->map2_id = map_info.id;
+
+ link = bpf_program__attach_iter(skel->progs.dump_bpf_map, NULL);
+ if (CHECK(IS_ERR(link), "attach_iter", "attach_iter failed\n"))
+ goto free_map2;
+
+ iter_fd = bpf_iter_create(bpf_link__fd(link));
+ if (CHECK(iter_fd < 0, "create_iter", "create_iter failed\n"))
+ goto free_link;
+
+ buf = malloc(expected_read_len);
+ if (!buf)
+ goto close_iter;
+
+ /* do read */
+ total_read_len = 0;
+ if (test_e2big_overflow) {
+ while ((len = read(iter_fd, buf, expected_read_len)) > 0)
+ total_read_len += len;
+
+ CHECK(len != -1 || errno != E2BIG, "read",
+ "expected ret -1, errno E2BIG, but get ret %d, error %s\n",
+ len, strerror(errno));
+ goto free_buf;
+ } else if (!ret1) {
+ while ((len = read(iter_fd, buf, expected_read_len)) > 0)
+ total_read_len += len;
+
+ if (CHECK(len < 0, "read", "read failed: %s\n",
+ strerror(errno)))
+ goto free_buf;
+ } else {
+ do {
+ len = read(iter_fd, buf, expected_read_len);
+ if (len > 0)
+ total_read_len += len;
+ } while (len > 0 || len == -EAGAIN);
+
+ if (CHECK(len < 0, "read", "read failed: %s\n",
+ strerror(errno)))
+ goto free_buf;
+ }
+
+ if (CHECK(total_read_len != expected_read_len, "read",
+ "total len %u, expected len %u\n", total_read_len,
+ expected_read_len))
+ goto free_buf;
+
+ if (CHECK(skel->bss->map1_accessed != 1, "map1_accessed",
+ "expected 1 actual %d\n", skel->bss->map1_accessed))
+ goto free_buf;
+
+ if (CHECK(skel->bss->map2_accessed != 2, "map2_accessed",
+ "expected 2 actual %d\n", skel->bss->map2_accessed))
+ goto free_buf;
+
+ CHECK(skel->bss->map2_seqnum1 != skel->bss->map2_seqnum2,
+ "map2_seqnum", "two different seqnum %lld %lld\n",
+ skel->bss->map2_seqnum1, skel->bss->map2_seqnum2);
+
+free_buf:
+ free(buf);
+close_iter:
+ close(iter_fd);
+free_link:
+ bpf_link__destroy(link);
+free_map2:
+ close(map2_fd);
+free_map1:
+ close(map1_fd);
+out:
+ bpf_iter_test_kern4__destroy(skel);
+}
+
+void test_bpf_iter(void)
+{
+ if (test__start_subtest("btf_id_or_null"))
+ test_btf_id_or_null();
+ if (test__start_subtest("ipv6_route"))
+ test_ipv6_route();
+ if (test__start_subtest("netlink"))
+ test_netlink();
+ if (test__start_subtest("bpf_map"))
+ test_bpf_map();
+ if (test__start_subtest("task"))
+ test_task();
+ if (test__start_subtest("task_file"))
+ test_task_file();
+ if (test__start_subtest("anon"))
+ test_anon_iter(false);
+ if (test__start_subtest("anon-read-one-char"))
+ test_anon_iter(true);
+ if (test__start_subtest("file"))
+ test_file_iter();
+ if (test__start_subtest("overflow"))
+ test_overflow(false, false);
+ if (test__start_subtest("overflow-e2big"))
+ test_overflow(true, false);
+ if (test__start_subtest("prog-ret-1"))
+ test_overflow(false, true);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_obj_id.c b/tools/testing/selftests/bpf/prog_tests/bpf_obj_id.c
index f10029821e16..7afa4160416f 100644
--- a/tools/testing/selftests/bpf/prog_tests/bpf_obj_id.c
+++ b/tools/testing/selftests/bpf/prog_tests/bpf_obj_id.c
@@ -1,26 +1,30 @@
// SPDX-License-Identifier: GPL-2.0
#include <test_progs.h>
+#define nr_iters 2
+
void test_bpf_obj_id(void)
{
const __u64 array_magic_value = 0xfaceb00c;
const __u32 array_key = 0;
- const int nr_iters = 2;
const char *file = "./test_obj_id.o";
const char *expected_prog_name = "test_obj_id";
const char *expected_map_name = "test_map_id";
const __u64 nsec_per_sec = 1000000000;
- struct bpf_object *objs[nr_iters];
+ struct bpf_object *objs[nr_iters] = {};
+ struct bpf_link *links[nr_iters] = {};
+ struct bpf_program *prog;
int prog_fds[nr_iters], map_fds[nr_iters];
/* +1 to test for the info_len returned by kernel */
struct bpf_prog_info prog_infos[nr_iters + 1];
struct bpf_map_info map_infos[nr_iters + 1];
+ struct bpf_link_info link_infos[nr_iters + 1];
/* Each prog only uses one map. +1 to test nr_map_ids
* returned by kernel.
*/
__u32 map_ids[nr_iters + 1];
- char jited_insns[128], xlated_insns[128], zeros[128];
+ char jited_insns[128], xlated_insns[128], zeros[128], tp_name[128];
__u32 i, next_id, info_len, nr_id_found, duration = 0;
struct timespec real_time_ts, boot_time_ts;
int err = 0;
@@ -36,14 +40,15 @@ void test_bpf_obj_id(void)
CHECK(err >= 0 || errno != ENOENT,
"get-fd-by-notexist-map-id", "err %d errno %d\n", err, errno);
- for (i = 0; i < nr_iters; i++)
- objs[i] = NULL;
+ err = bpf_link_get_fd_by_id(0);
+ CHECK(err >= 0 || errno != ENOENT,
+ "get-fd-by-notexist-link-id", "err %d errno %d\n", err, errno);
/* Check bpf_obj_get_info_by_fd() */
bzero(zeros, sizeof(zeros));
for (i = 0; i < nr_iters; i++) {
now = time(NULL);
- err = bpf_prog_load(file, BPF_PROG_TYPE_SOCKET_FILTER,
+ err = bpf_prog_load(file, BPF_PROG_TYPE_RAW_TRACEPOINT,
&objs[i], &prog_fds[i]);
/* test_obj_id.o is a dumb prog. It should never fail
* to load.
@@ -60,6 +65,17 @@ void test_bpf_obj_id(void)
if (CHECK_FAIL(err))
goto done;
+ prog = bpf_object__find_program_by_title(objs[i],
+ "raw_tp/sys_enter");
+ if (CHECK_FAIL(!prog))
+ goto done;
+ links[i] = bpf_program__attach(prog);
+ err = libbpf_get_error(links[i]);
+ if (CHECK(err, "prog_attach", "prog #%d, err %d\n", i, err)) {
+ links[i] = NULL;
+ goto done;
+ }
+
/* Check getting map info */
info_len = sizeof(struct bpf_map_info) * 2;
bzero(&map_infos[i], info_len);
@@ -107,7 +123,7 @@ void test_bpf_obj_id(void)
load_time = (real_time_ts.tv_sec - boot_time_ts.tv_sec)
+ (prog_infos[i].load_time / nsec_per_sec);
if (CHECK(err ||
- prog_infos[i].type != BPF_PROG_TYPE_SOCKET_FILTER ||
+ prog_infos[i].type != BPF_PROG_TYPE_RAW_TRACEPOINT ||
info_len != sizeof(struct bpf_prog_info) ||
(env.jit_enabled && !prog_infos[i].jited_prog_len) ||
(env.jit_enabled &&
@@ -120,7 +136,11 @@ void test_bpf_obj_id(void)
*(int *)(long)prog_infos[i].map_ids != map_infos[i].id ||
strcmp((char *)prog_infos[i].name, expected_prog_name),
"get-prog-info(fd)",
- "err %d errno %d i %d type %d(%d) info_len %u(%zu) jit_enabled %d jited_prog_len %u xlated_prog_len %u jited_prog %d xlated_prog %d load_time %lu(%lu) uid %u(%u) nr_map_ids %u(%u) map_id %u(%u) name %s(%s)\n",
+ "err %d errno %d i %d type %d(%d) info_len %u(%zu) "
+ "jit_enabled %d jited_prog_len %u xlated_prog_len %u "
+ "jited_prog %d xlated_prog %d load_time %lu(%lu) "
+ "uid %u(%u) nr_map_ids %u(%u) map_id %u(%u) "
+ "name %s(%s)\n",
err, errno, i,
prog_infos[i].type, BPF_PROG_TYPE_SOCKET_FILTER,
info_len, sizeof(struct bpf_prog_info),
@@ -135,6 +155,33 @@ void test_bpf_obj_id(void)
*(int *)(long)prog_infos[i].map_ids, map_infos[i].id,
prog_infos[i].name, expected_prog_name))
goto done;
+
+ /* Check getting link info */
+ info_len = sizeof(struct bpf_link_info) * 2;
+ bzero(&link_infos[i], info_len);
+ link_infos[i].raw_tracepoint.tp_name = (__u64)&tp_name;
+ link_infos[i].raw_tracepoint.tp_name_len = sizeof(tp_name);
+ err = bpf_obj_get_info_by_fd(bpf_link__fd(links[i]),
+ &link_infos[i], &info_len);
+ if (CHECK(err ||
+ link_infos[i].type != BPF_LINK_TYPE_RAW_TRACEPOINT ||
+ link_infos[i].prog_id != prog_infos[i].id ||
+ link_infos[i].raw_tracepoint.tp_name != (__u64)&tp_name ||
+ strcmp((char *)link_infos[i].raw_tracepoint.tp_name,
+ "sys_enter") ||
+ info_len != sizeof(struct bpf_link_info),
+ "get-link-info(fd)",
+ "err %d errno %d info_len %u(%zu) type %d(%d) id %d "
+ "prog_id %d (%d) tp_name %s(%s)\n",
+ err, errno,
+ info_len, sizeof(struct bpf_link_info),
+ link_infos[i].type, BPF_LINK_TYPE_RAW_TRACEPOINT,
+ link_infos[i].id,
+ link_infos[i].prog_id, prog_infos[i].id,
+ (char *)link_infos[i].raw_tracepoint.tp_name,
+ "sys_enter"))
+ goto done;
+
}
/* Check bpf_prog_get_next_id() */
@@ -247,7 +294,52 @@ void test_bpf_obj_id(void)
"nr_id_found %u(%u)\n",
nr_id_found, nr_iters);
+ /* Check bpf_link_get_next_id() */
+ nr_id_found = 0;
+ next_id = 0;
+ while (!bpf_link_get_next_id(next_id, &next_id)) {
+ struct bpf_link_info link_info;
+ int link_fd, cmp_res;
+
+ info_len = sizeof(link_info);
+ memset(&link_info, 0, info_len);
+
+ link_fd = bpf_link_get_fd_by_id(next_id);
+ if (link_fd < 0 && errno == ENOENT)
+ /* The bpf_link is in the dead row */
+ continue;
+ if (CHECK(link_fd < 0, "get-link-fd(next_id)",
+ "link_fd %d next_id %u errno %d\n",
+ link_fd, next_id, errno))
+ break;
+
+ for (i = 0; i < nr_iters; i++)
+ if (link_infos[i].id == next_id)
+ break;
+
+ if (i == nr_iters)
+ continue;
+
+ nr_id_found++;
+
+ err = bpf_obj_get_info_by_fd(link_fd, &link_info, &info_len);
+ cmp_res = memcmp(&link_info, &link_infos[i],
+ offsetof(struct bpf_link_info, raw_tracepoint));
+ CHECK(err || info_len != sizeof(link_info) || cmp_res,
+ "check get-link-info(next_id->fd)",
+ "err %d errno %d info_len %u(%zu) memcmp %d\n",
+ err, errno, info_len, sizeof(struct bpf_link_info),
+ cmp_res);
+
+ close(link_fd);
+ }
+ CHECK(nr_id_found != nr_iters,
+ "check total link id found by get_next_id",
+ "nr_id_found %u(%u)\n", nr_id_found, nr_iters);
+
done:
- for (i = 0; i < nr_iters; i++)
+ for (i = 0; i < nr_iters; i++) {
+ bpf_link__destroy(links[i]);
bpf_object__close(objs[i]);
+ }
}
diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c b/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c
index 8482bbc67eec..9a8f47fc0b91 100644
--- a/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c
+++ b/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c
@@ -11,6 +11,7 @@
static const unsigned int total_bytes = 10 * 1024 * 1024;
static const struct timeval timeo_sec = { .tv_sec = 10 };
static const size_t timeo_optlen = sizeof(timeo_sec);
+static int expected_stg = 0xeB9F;
static int stop, duration;
static int settimeo(int fd)
@@ -88,7 +89,7 @@ done:
return NULL;
}
-static void do_test(const char *tcp_ca)
+static void do_test(const char *tcp_ca, const struct bpf_map *sk_stg_map)
{
struct sockaddr_in6 sa6 = {};
ssize_t nr_recv = 0, bytes = 0;
@@ -126,14 +127,34 @@ static void do_test(const char *tcp_ca)
err = listen(lfd, 1);
if (CHECK(err == -1, "listen", "errno:%d\n", errno))
goto done;
- err = pthread_create(&srv_thread, NULL, server, (void *)(long)lfd);
- if (CHECK(err != 0, "pthread_create", "err:%d\n", err))
- goto done;
+
+ if (sk_stg_map) {
+ err = bpf_map_update_elem(bpf_map__fd(sk_stg_map), &fd,
+ &expected_stg, BPF_NOEXIST);
+ if (CHECK(err, "bpf_map_update_elem(sk_stg_map)",
+ "err:%d errno:%d\n", err, errno))
+ goto done;
+ }
/* connect to server */
err = connect(fd, (struct sockaddr *)&sa6, addrlen);
if (CHECK(err == -1, "connect", "errno:%d\n", errno))
- goto wait_thread;
+ goto done;
+
+ if (sk_stg_map) {
+ int tmp_stg;
+
+ err = bpf_map_lookup_elem(bpf_map__fd(sk_stg_map), &fd,
+ &tmp_stg);
+ if (CHECK(!err || errno != ENOENT,
+ "bpf_map_lookup_elem(sk_stg_map)",
+ "err:%d errno:%d\n", err, errno))
+ goto done;
+ }
+
+ err = pthread_create(&srv_thread, NULL, server, (void *)(long)lfd);
+ if (CHECK(err != 0, "pthread_create", "err:%d errno:%d\n", err, errno))
+ goto done;
/* recv total_bytes */
while (bytes < total_bytes && !READ_ONCE(stop)) {
@@ -149,7 +170,6 @@ static void do_test(const char *tcp_ca)
CHECK(bytes != total_bytes, "recv", "%zd != %u nr_recv:%zd errno:%d\n",
bytes, total_bytes, nr_recv, errno);
-wait_thread:
WRITE_ONCE(stop, 1);
pthread_join(srv_thread, &thread_ret);
CHECK(IS_ERR(thread_ret), "pthread_join", "thread_ret:%ld",
@@ -175,7 +195,7 @@ static void test_cubic(void)
return;
}
- do_test("bpf_cubic");
+ do_test("bpf_cubic", NULL);
bpf_link__destroy(link);
bpf_cubic__destroy(cubic_skel);
@@ -197,7 +217,10 @@ static void test_dctcp(void)
return;
}
- do_test("bpf_dctcp");
+ do_test("bpf_dctcp", dctcp_skel->maps.sk_stg_map);
+ CHECK(dctcp_skel->bss->stg_result != expected_stg,
+ "Unexpected stg_result", "stg_result (%x) != expected_stg (%x)\n",
+ dctcp_skel->bss->stg_result, expected_stg);
bpf_link__destroy(link);
bpf_dctcp__destroy(dctcp_skel);
diff --git a/tools/testing/selftests/bpf/prog_tests/btf_dump.c b/tools/testing/selftests/bpf/prog_tests/btf_dump.c
index 7390d3061065..cb33a7ee4e04 100644
--- a/tools/testing/selftests/bpf/prog_tests/btf_dump.c
+++ b/tools/testing/selftests/bpf/prog_tests/btf_dump.c
@@ -125,6 +125,6 @@ void test_btf_dump() {
if (!test__start_subtest(t->name))
continue;
- test_btf_dump_case(i, &btf_dump_test_cases[i]);
+ test_btf_dump_case(i, &btf_dump_test_cases[i]);
}
}
diff --git a/tools/testing/selftests/bpf/prog_tests/btf_map_in_map.c b/tools/testing/selftests/bpf/prog_tests/btf_map_in_map.c
new file mode 100644
index 000000000000..f7ee8fa377ad
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/btf_map_in_map.c
@@ -0,0 +1,49 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+
+#include <test_progs.h>
+
+#include "test_btf_map_in_map.skel.h"
+
+void test_btf_map_in_map(void)
+{
+ int duration = 0, err, key = 0, val;
+ struct test_btf_map_in_map* skel;
+
+ skel = test_btf_map_in_map__open_and_load();
+ if (CHECK(!skel, "skel_open", "failed to open&load skeleton\n"))
+ return;
+
+ err = test_btf_map_in_map__attach(skel);
+ if (CHECK(err, "skel_attach", "skeleton attach failed: %d\n", err))
+ goto cleanup;
+
+ /* inner1 = input, inner2 = input + 1 */
+ val = bpf_map__fd(skel->maps.inner_map1);
+ bpf_map_update_elem(bpf_map__fd(skel->maps.outer_arr), &key, &val, 0);
+ val = bpf_map__fd(skel->maps.inner_map2);
+ bpf_map_update_elem(bpf_map__fd(skel->maps.outer_hash), &key, &val, 0);
+ skel->bss->input = 1;
+ usleep(1);
+
+ bpf_map_lookup_elem(bpf_map__fd(skel->maps.inner_map1), &key, &val);
+ CHECK(val != 1, "inner1", "got %d != exp %d\n", val, 1);
+ bpf_map_lookup_elem(bpf_map__fd(skel->maps.inner_map2), &key, &val);
+ CHECK(val != 2, "inner2", "got %d != exp %d\n", val, 2);
+
+ /* inner1 = input + 1, inner2 = input */
+ val = bpf_map__fd(skel->maps.inner_map2);
+ bpf_map_update_elem(bpf_map__fd(skel->maps.outer_arr), &key, &val, 0);
+ val = bpf_map__fd(skel->maps.inner_map1);
+ bpf_map_update_elem(bpf_map__fd(skel->maps.outer_hash), &key, &val, 0);
+ skel->bss->input = 3;
+ usleep(1);
+
+ bpf_map_lookup_elem(bpf_map__fd(skel->maps.inner_map1), &key, &val);
+ CHECK(val != 4, "inner1", "got %d != exp %d\n", val, 4);
+ bpf_map_lookup_elem(bpf_map__fd(skel->maps.inner_map2), &key, &val);
+ CHECK(val != 3, "inner2", "got %d != exp %d\n", val, 3);
+
+cleanup:
+ test_btf_map_in_map__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/cgroup_attach_autodetach.c b/tools/testing/selftests/bpf/prog_tests/cgroup_attach_autodetach.c
index 5b13f2c6c402..70e94e783070 100644
--- a/tools/testing/selftests/bpf/prog_tests/cgroup_attach_autodetach.c
+++ b/tools/testing/selftests/bpf/prog_tests/cgroup_attach_autodetach.c
@@ -6,7 +6,7 @@
#define PING_CMD "ping -q -c1 -w1 127.0.0.1 > /dev/null"
-char bpf_log_buf[BPF_LOG_BUF_SIZE];
+static char bpf_log_buf[BPF_LOG_BUF_SIZE];
static int prog_load(void)
{
diff --git a/tools/testing/selftests/bpf/prog_tests/cgroup_attach_multi.c b/tools/testing/selftests/bpf/prog_tests/cgroup_attach_multi.c
index 2ff21dbce179..139f8e82c7c6 100644
--- a/tools/testing/selftests/bpf/prog_tests/cgroup_attach_multi.c
+++ b/tools/testing/selftests/bpf/prog_tests/cgroup_attach_multi.c
@@ -6,7 +6,7 @@
#define PING_CMD "ping -q -c1 -w1 127.0.0.1 > /dev/null"
-char bpf_log_buf[BPF_LOG_BUF_SIZE];
+static char bpf_log_buf[BPF_LOG_BUF_SIZE];
static int map_fd = -1;
diff --git a/tools/testing/selftests/bpf/prog_tests/cgroup_attach_override.c b/tools/testing/selftests/bpf/prog_tests/cgroup_attach_override.c
index 9d8cb48b99de..9e96f8d87fea 100644
--- a/tools/testing/selftests/bpf/prog_tests/cgroup_attach_override.c
+++ b/tools/testing/selftests/bpf/prog_tests/cgroup_attach_override.c
@@ -8,7 +8,7 @@
#define BAR "/foo/bar/"
#define PING_CMD "ping -q -c1 -w1 127.0.0.1 > /dev/null"
-char bpf_log_buf[BPF_LOG_BUF_SIZE];
+static char bpf_log_buf[BPF_LOG_BUF_SIZE];
static int prog_load(int verdict)
{
diff --git a/tools/testing/selftests/bpf/prog_tests/cgroup_link.c b/tools/testing/selftests/bpf/prog_tests/cgroup_link.c
new file mode 100644
index 000000000000..6e04f8d1d15b
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/cgroup_link.c
@@ -0,0 +1,244 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <test_progs.h>
+#include "cgroup_helpers.h"
+#include "test_cgroup_link.skel.h"
+
+static __u32 duration = 0;
+#define PING_CMD "ping -q -c1 -w1 127.0.0.1 > /dev/null"
+
+static struct test_cgroup_link *skel = NULL;
+
+int ping_and_check(int exp_calls, int exp_alt_calls)
+{
+ skel->bss->calls = 0;
+ skel->bss->alt_calls = 0;
+ CHECK_FAIL(system(PING_CMD));
+ if (CHECK(skel->bss->calls != exp_calls, "call_cnt",
+ "exp %d, got %d\n", exp_calls, skel->bss->calls))
+ return -EINVAL;
+ if (CHECK(skel->bss->alt_calls != exp_alt_calls, "alt_call_cnt",
+ "exp %d, got %d\n", exp_alt_calls, skel->bss->alt_calls))
+ return -EINVAL;
+ return 0;
+}
+
+void test_cgroup_link(void)
+{
+ struct {
+ const char *path;
+ int fd;
+ } cgs[] = {
+ { "/cg1" },
+ { "/cg1/cg2" },
+ { "/cg1/cg2/cg3" },
+ { "/cg1/cg2/cg3/cg4" },
+ };
+ int last_cg = ARRAY_SIZE(cgs) - 1, cg_nr = ARRAY_SIZE(cgs);
+ DECLARE_LIBBPF_OPTS(bpf_link_update_opts, link_upd_opts);
+ struct bpf_link *links[ARRAY_SIZE(cgs)] = {}, *tmp_link;
+ __u32 prog_ids[ARRAY_SIZE(cgs)], prog_cnt = 0, attach_flags;
+ int i = 0, err, prog_fd;
+ bool detach_legacy = false;
+
+ skel = test_cgroup_link__open_and_load();
+ if (CHECK(!skel, "skel_open_load", "failed to open/load skeleton\n"))
+ return;
+ prog_fd = bpf_program__fd(skel->progs.egress);
+
+ err = setup_cgroup_environment();
+ if (CHECK(err, "cg_init", "failed: %d\n", err))
+ goto cleanup;
+
+ for (i = 0; i < cg_nr; i++) {
+ cgs[i].fd = create_and_get_cgroup(cgs[i].path);
+ if (CHECK(cgs[i].fd < 0, "cg_create", "fail: %d\n", cgs[i].fd))
+ goto cleanup;
+ }
+
+ err = join_cgroup(cgs[last_cg].path);
+ if (CHECK(err, "cg_join", "fail: %d\n", err))
+ goto cleanup;
+
+ for (i = 0; i < cg_nr; i++) {
+ links[i] = bpf_program__attach_cgroup(skel->progs.egress,
+ cgs[i].fd);
+ if (CHECK(IS_ERR(links[i]), "cg_attach", "i: %d, err: %ld\n",
+ i, PTR_ERR(links[i])))
+ goto cleanup;
+ }
+
+ ping_and_check(cg_nr, 0);
+
+ /* query the number of effective progs and attach flags in root cg */
+ err = bpf_prog_query(cgs[0].fd, BPF_CGROUP_INET_EGRESS,
+ BPF_F_QUERY_EFFECTIVE, &attach_flags, NULL,
+ &prog_cnt);
+ CHECK_FAIL(err);
+ CHECK_FAIL(attach_flags != BPF_F_ALLOW_MULTI);
+ if (CHECK(prog_cnt != 1, "effect_cnt", "exp %d, got %d\n", 1, prog_cnt))
+ goto cleanup;
+
+ /* query the number of effective progs in last cg */
+ err = bpf_prog_query(cgs[last_cg].fd, BPF_CGROUP_INET_EGRESS,
+ BPF_F_QUERY_EFFECTIVE, NULL, NULL,
+ &prog_cnt);
+ CHECK_FAIL(err);
+ CHECK_FAIL(attach_flags != BPF_F_ALLOW_MULTI);
+ if (CHECK(prog_cnt != cg_nr, "effect_cnt", "exp %d, got %d\n",
+ cg_nr, prog_cnt))
+ goto cleanup;
+
+ /* query the effective prog IDs in last cg */
+ err = bpf_prog_query(cgs[last_cg].fd, BPF_CGROUP_INET_EGRESS,
+ BPF_F_QUERY_EFFECTIVE, &attach_flags,
+ prog_ids, &prog_cnt);
+ CHECK_FAIL(err);
+ CHECK_FAIL(attach_flags != BPF_F_ALLOW_MULTI);
+ if (CHECK(prog_cnt != cg_nr, "effect_cnt", "exp %d, got %d\n",
+ cg_nr, prog_cnt))
+ goto cleanup;
+ for (i = 1; i < prog_cnt; i++) {
+ CHECK(prog_ids[i - 1] != prog_ids[i], "prog_id_check",
+ "idx %d, prev id %d, cur id %d\n",
+ i, prog_ids[i - 1], prog_ids[i]);
+ }
+
+ /* detach bottom program and ping again */
+ bpf_link__destroy(links[last_cg]);
+ links[last_cg] = NULL;
+
+ ping_and_check(cg_nr - 1, 0);
+
+ /* mix in with non link-based multi-attachments */
+ err = bpf_prog_attach(prog_fd, cgs[last_cg].fd,
+ BPF_CGROUP_INET_EGRESS, BPF_F_ALLOW_MULTI);
+ if (CHECK(err, "cg_attach_legacy", "errno=%d\n", errno))
+ goto cleanup;
+ detach_legacy = true;
+
+ links[last_cg] = bpf_program__attach_cgroup(skel->progs.egress,
+ cgs[last_cg].fd);
+ if (CHECK(IS_ERR(links[last_cg]), "cg_attach", "err: %ld\n",
+ PTR_ERR(links[last_cg])))
+ goto cleanup;
+
+ ping_and_check(cg_nr + 1, 0);
+
+ /* detach link */
+ bpf_link__destroy(links[last_cg]);
+ links[last_cg] = NULL;
+
+ /* detach legacy */
+ err = bpf_prog_detach2(prog_fd, cgs[last_cg].fd, BPF_CGROUP_INET_EGRESS);
+ if (CHECK(err, "cg_detach_legacy", "errno=%d\n", errno))
+ goto cleanup;
+ detach_legacy = false;
+
+ /* attach legacy exclusive prog attachment */
+ err = bpf_prog_attach(prog_fd, cgs[last_cg].fd,
+ BPF_CGROUP_INET_EGRESS, 0);
+ if (CHECK(err, "cg_attach_exclusive", "errno=%d\n", errno))
+ goto cleanup;
+ detach_legacy = true;
+
+ /* attempt to mix in with multi-attach bpf_link */
+ tmp_link = bpf_program__attach_cgroup(skel->progs.egress,
+ cgs[last_cg].fd);
+ if (CHECK(!IS_ERR(tmp_link), "cg_attach_fail", "unexpected success!\n")) {
+ bpf_link__destroy(tmp_link);
+ goto cleanup;
+ }
+
+ ping_and_check(cg_nr, 0);
+
+ /* detach */
+ err = bpf_prog_detach2(prog_fd, cgs[last_cg].fd, BPF_CGROUP_INET_EGRESS);
+ if (CHECK(err, "cg_detach_legacy", "errno=%d\n", errno))
+ goto cleanup;
+ detach_legacy = false;
+
+ ping_and_check(cg_nr - 1, 0);
+
+ /* attach back link-based one */
+ links[last_cg] = bpf_program__attach_cgroup(skel->progs.egress,
+ cgs[last_cg].fd);
+ if (CHECK(IS_ERR(links[last_cg]), "cg_attach", "err: %ld\n",
+ PTR_ERR(links[last_cg])))
+ goto cleanup;
+
+ ping_and_check(cg_nr, 0);
+
+ /* check legacy exclusive prog can't be attached */
+ err = bpf_prog_attach(prog_fd, cgs[last_cg].fd,
+ BPF_CGROUP_INET_EGRESS, 0);
+ if (CHECK(!err, "cg_attach_exclusive", "unexpected success")) {
+ bpf_prog_detach2(prog_fd, cgs[last_cg].fd, BPF_CGROUP_INET_EGRESS);
+ goto cleanup;
+ }
+
+ /* replace BPF programs inside their links for all but first link */
+ for (i = 1; i < cg_nr; i++) {
+ err = bpf_link__update_program(links[i], skel->progs.egress_alt);
+ if (CHECK(err, "prog_upd", "link #%d\n", i))
+ goto cleanup;
+ }
+
+ ping_and_check(1, cg_nr - 1);
+
+ /* Attempt program update with wrong expected BPF program */
+ link_upd_opts.old_prog_fd = bpf_program__fd(skel->progs.egress_alt);
+ link_upd_opts.flags = BPF_F_REPLACE;
+ err = bpf_link_update(bpf_link__fd(links[0]),
+ bpf_program__fd(skel->progs.egress_alt),
+ &link_upd_opts);
+ if (CHECK(err == 0 || errno != EPERM, "prog_cmpxchg1",
+ "unexpectedly succeeded, err %d, errno %d\n", err, -errno))
+ goto cleanup;
+
+ /* Compare-exchange single link program from egress to egress_alt */
+ link_upd_opts.old_prog_fd = bpf_program__fd(skel->progs.egress);
+ link_upd_opts.flags = BPF_F_REPLACE;
+ err = bpf_link_update(bpf_link__fd(links[0]),
+ bpf_program__fd(skel->progs.egress_alt),
+ &link_upd_opts);
+ if (CHECK(err, "prog_cmpxchg2", "errno %d\n", -errno))
+ goto cleanup;
+
+ /* ping */
+ ping_and_check(0, cg_nr);
+
+ /* close cgroup FDs before detaching links */
+ for (i = 0; i < cg_nr; i++) {
+ if (cgs[i].fd > 0) {
+ close(cgs[i].fd);
+ cgs[i].fd = -1;
+ }
+ }
+
+ /* BPF programs should still get called */
+ ping_and_check(0, cg_nr);
+
+ /* leave cgroup and remove them, don't detach programs */
+ cleanup_cgroup_environment();
+
+ /* BPF programs should have been auto-detached */
+ ping_and_check(0, 0);
+
+cleanup:
+ if (detach_legacy)
+ bpf_prog_detach2(prog_fd, cgs[last_cg].fd,
+ BPF_CGROUP_INET_EGRESS);
+
+ for (i = 0; i < cg_nr; i++) {
+ if (!IS_ERR(links[i]))
+ bpf_link__destroy(links[i]);
+ }
+ test_cgroup_link__destroy(skel);
+
+ for (i = 0; i < cg_nr; i++) {
+ if (cgs[i].fd > 0)
+ close(cgs[i].fd);
+ }
+ cleanup_cgroup_environment();
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/cgroup_skb_sk_lookup.c b/tools/testing/selftests/bpf/prog_tests/cgroup_skb_sk_lookup.c
new file mode 100644
index 000000000000..059047af7df3
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/cgroup_skb_sk_lookup.c
@@ -0,0 +1,95 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2020 Facebook
+
+#include <test_progs.h>
+
+#include "network_helpers.h"
+#include "cgroup_skb_sk_lookup_kern.skel.h"
+
+static void run_lookup_test(__u16 *g_serv_port, int out_sk)
+{
+ int serv_sk = -1, in_sk = -1, serv_in_sk = -1, err;
+ struct sockaddr_in6 addr = {};
+ socklen_t addr_len = sizeof(addr);
+ __u32 duration = 0;
+
+ serv_sk = start_server(AF_INET6, SOCK_STREAM);
+ if (CHECK(serv_sk < 0, "start_server", "failed to start server\n"))
+ return;
+
+ err = getsockname(serv_sk, (struct sockaddr *)&addr, &addr_len);
+ if (CHECK(err, "getsockname", "errno %d\n", errno))
+ goto cleanup;
+
+ *g_serv_port = addr.sin6_port;
+
+ /* Client outside of test cgroup should fail to connect by timeout. */
+ err = connect_fd_to_fd(out_sk, serv_sk);
+ if (CHECK(!err || errno != EINPROGRESS, "connect_fd_to_fd",
+ "unexpected result err %d errno %d\n", err, errno))
+ goto cleanup;
+
+ err = connect_wait(out_sk);
+ if (CHECK(err, "connect_wait", "unexpected result %d\n", err))
+ goto cleanup;
+
+ /* Client inside test cgroup should connect just fine. */
+ in_sk = connect_to_fd(AF_INET6, SOCK_STREAM, serv_sk);
+ if (CHECK(in_sk < 0, "connect_to_fd", "errno %d\n", errno))
+ goto cleanup;
+
+ serv_in_sk = accept(serv_sk, NULL, NULL);
+ if (CHECK(serv_in_sk < 0, "accept", "errno %d\n", errno))
+ goto cleanup;
+
+cleanup:
+ close(serv_in_sk);
+ close(in_sk);
+ close(serv_sk);
+}
+
+static void run_cgroup_bpf_test(const char *cg_path, int out_sk)
+{
+ struct cgroup_skb_sk_lookup_kern *skel;
+ struct bpf_link *link;
+ __u32 duration = 0;
+ int cgfd = -1;
+
+ skel = cgroup_skb_sk_lookup_kern__open_and_load();
+ if (CHECK(!skel, "skel_open_load", "open_load failed\n"))
+ return;
+
+ cgfd = test__join_cgroup(cg_path);
+ if (CHECK(cgfd < 0, "cgroup_join", "cgroup setup failed\n"))
+ goto cleanup;
+
+ link = bpf_program__attach_cgroup(skel->progs.ingress_lookup, cgfd);
+ if (CHECK(IS_ERR(link), "cgroup_attach", "err: %ld\n", PTR_ERR(link)))
+ goto cleanup;
+
+ run_lookup_test(&skel->bss->g_serv_port, out_sk);
+
+ bpf_link__destroy(link);
+
+cleanup:
+ close(cgfd);
+ cgroup_skb_sk_lookup_kern__destroy(skel);
+}
+
+void test_cgroup_skb_sk_lookup(void)
+{
+ const char *cg_path = "/foo";
+ int out_sk;
+
+ /* Create a socket before joining testing cgroup so that its cgroup id
+ * differs from that of testing cgroup. Moving selftests process to
+ * testing cgroup won't change cgroup id of an already created socket.
+ */
+ out_sk = socket(AF_INET6, SOCK_STREAM | SOCK_NONBLOCK, 0);
+ if (CHECK_FAIL(out_sk < 0))
+ return;
+
+ run_cgroup_bpf_test(cg_path, out_sk);
+
+ close(out_sk);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/cls_redirect.c b/tools/testing/selftests/bpf/prog_tests/cls_redirect.c
new file mode 100644
index 000000000000..f259085cca6a
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/cls_redirect.c
@@ -0,0 +1,456 @@
+// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
+// Copyright (c) 2020 Cloudflare
+
+#define _GNU_SOURCE
+
+#include <arpa/inet.h>
+#include <string.h>
+
+#include <linux/pkt_cls.h>
+
+#include <test_progs.h>
+
+#include "progs/test_cls_redirect.h"
+#include "test_cls_redirect.skel.h"
+
+#define ENCAP_IP INADDR_LOOPBACK
+#define ENCAP_PORT (1234)
+
+struct addr_port {
+ in_port_t port;
+ union {
+ struct in_addr in_addr;
+ struct in6_addr in6_addr;
+ };
+};
+
+struct tuple {
+ int family;
+ struct addr_port src;
+ struct addr_port dst;
+};
+
+static int start_server(const struct sockaddr *addr, socklen_t len, int type)
+{
+ int fd = socket(addr->sa_family, type, 0);
+ if (CHECK_FAIL(fd == -1))
+ return -1;
+ if (CHECK_FAIL(bind(fd, addr, len) == -1))
+ goto err;
+ if (type == SOCK_STREAM && CHECK_FAIL(listen(fd, 128) == -1))
+ goto err;
+
+ return fd;
+
+err:
+ close(fd);
+ return -1;
+}
+
+static int connect_to_server(const struct sockaddr *addr, socklen_t len,
+ int type)
+{
+ int fd = socket(addr->sa_family, type, 0);
+ if (CHECK_FAIL(fd == -1))
+ return -1;
+ if (CHECK_FAIL(connect(fd, addr, len)))
+ goto err;
+
+ return fd;
+
+err:
+ close(fd);
+ return -1;
+}
+
+static bool fill_addr_port(const struct sockaddr *sa, struct addr_port *ap)
+{
+ const struct sockaddr_in6 *in6;
+ const struct sockaddr_in *in;
+
+ switch (sa->sa_family) {
+ case AF_INET:
+ in = (const struct sockaddr_in *)sa;
+ ap->in_addr = in->sin_addr;
+ ap->port = in->sin_port;
+ return true;
+
+ case AF_INET6:
+ in6 = (const struct sockaddr_in6 *)sa;
+ ap->in6_addr = in6->sin6_addr;
+ ap->port = in6->sin6_port;
+ return true;
+
+ default:
+ return false;
+ }
+}
+
+static bool set_up_conn(const struct sockaddr *addr, socklen_t len, int type,
+ int *server, int *conn, struct tuple *tuple)
+{
+ struct sockaddr_storage ss;
+ socklen_t slen = sizeof(ss);
+ struct sockaddr *sa = (struct sockaddr *)&ss;
+
+ *server = start_server(addr, len, type);
+ if (*server < 0)
+ return false;
+
+ if (CHECK_FAIL(getsockname(*server, sa, &slen)))
+ goto close_server;
+
+ *conn = connect_to_server(sa, slen, type);
+ if (*conn < 0)
+ goto close_server;
+
+ /* We want to simulate packets arriving at conn, so we have to
+ * swap src and dst.
+ */
+ slen = sizeof(ss);
+ if (CHECK_FAIL(getsockname(*conn, sa, &slen)))
+ goto close_conn;
+
+ if (CHECK_FAIL(!fill_addr_port(sa, &tuple->dst)))
+ goto close_conn;
+
+ slen = sizeof(ss);
+ if (CHECK_FAIL(getpeername(*conn, sa, &slen)))
+ goto close_conn;
+
+ if (CHECK_FAIL(!fill_addr_port(sa, &tuple->src)))
+ goto close_conn;
+
+ tuple->family = ss.ss_family;
+ return true;
+
+close_conn:
+ close(*conn);
+ *conn = -1;
+close_server:
+ close(*server);
+ *server = -1;
+ return false;
+}
+
+static socklen_t prepare_addr(struct sockaddr_storage *addr, int family)
+{
+ struct sockaddr_in *addr4;
+ struct sockaddr_in6 *addr6;
+
+ switch (family) {
+ case AF_INET:
+ addr4 = (struct sockaddr_in *)addr;
+ memset(addr4, 0, sizeof(*addr4));
+ addr4->sin_family = family;
+ addr4->sin_addr.s_addr = htonl(INADDR_LOOPBACK);
+ return sizeof(*addr4);
+ case AF_INET6:
+ addr6 = (struct sockaddr_in6 *)addr;
+ memset(addr6, 0, sizeof(*addr6));
+ addr6->sin6_family = family;
+ addr6->sin6_addr = in6addr_loopback;
+ return sizeof(*addr6);
+ default:
+ fprintf(stderr, "Invalid family %d", family);
+ return 0;
+ }
+}
+
+static bool was_decapsulated(struct bpf_prog_test_run_attr *tattr)
+{
+ return tattr->data_size_out < tattr->data_size_in;
+}
+
+enum type {
+ UDP,
+ TCP,
+ __NR_KIND,
+};
+
+enum hops {
+ NO_HOPS,
+ ONE_HOP,
+};
+
+enum flags {
+ NONE,
+ SYN,
+ ACK,
+};
+
+enum conn {
+ KNOWN_CONN,
+ UNKNOWN_CONN,
+};
+
+enum result {
+ ACCEPT,
+ FORWARD,
+};
+
+struct test_cfg {
+ enum type type;
+ enum result result;
+ enum conn conn;
+ enum hops hops;
+ enum flags flags;
+};
+
+static int test_str(void *buf, size_t len, const struct test_cfg *test,
+ int family)
+{
+ const char *family_str, *type, *conn, *hops, *result, *flags;
+
+ family_str = "IPv4";
+ if (family == AF_INET6)
+ family_str = "IPv6";
+
+ type = "TCP";
+ if (test->type == UDP)
+ type = "UDP";
+
+ conn = "known";
+ if (test->conn == UNKNOWN_CONN)
+ conn = "unknown";
+
+ hops = "no hops";
+ if (test->hops == ONE_HOP)
+ hops = "one hop";
+
+ result = "accept";
+ if (test->result == FORWARD)
+ result = "forward";
+
+ flags = "none";
+ if (test->flags == SYN)
+ flags = "SYN";
+ else if (test->flags == ACK)
+ flags = "ACK";
+
+ return snprintf(buf, len, "%s %s %s %s (%s, flags: %s)", family_str,
+ type, result, conn, hops, flags);
+}
+
+static struct test_cfg tests[] = {
+ { TCP, ACCEPT, UNKNOWN_CONN, NO_HOPS, SYN },
+ { TCP, ACCEPT, UNKNOWN_CONN, NO_HOPS, ACK },
+ { TCP, FORWARD, UNKNOWN_CONN, ONE_HOP, ACK },
+ { TCP, ACCEPT, KNOWN_CONN, ONE_HOP, ACK },
+ { UDP, ACCEPT, UNKNOWN_CONN, NO_HOPS, NONE },
+ { UDP, FORWARD, UNKNOWN_CONN, ONE_HOP, NONE },
+ { UDP, ACCEPT, KNOWN_CONN, ONE_HOP, NONE },
+};
+
+static void encap_init(encap_headers_t *encap, uint8_t hop_count, uint8_t proto)
+{
+ const uint8_t hlen =
+ (sizeof(struct guehdr) / sizeof(uint32_t)) + hop_count;
+ *encap = (encap_headers_t){
+ .eth = { .h_proto = htons(ETH_P_IP) },
+ .ip = {
+ .ihl = 5,
+ .version = 4,
+ .ttl = IPDEFTTL,
+ .protocol = IPPROTO_UDP,
+ .daddr = htonl(ENCAP_IP)
+ },
+ .udp = {
+ .dest = htons(ENCAP_PORT),
+ },
+ .gue = {
+ .hlen = hlen,
+ .proto_ctype = proto
+ },
+ .unigue = {
+ .hop_count = hop_count
+ },
+ };
+}
+
+static size_t build_input(const struct test_cfg *test, void *const buf,
+ const struct tuple *tuple)
+{
+ in_port_t sport = tuple->src.port;
+ encap_headers_t encap;
+ struct iphdr ip;
+ struct ipv6hdr ipv6;
+ struct tcphdr tcp;
+ struct udphdr udp;
+ struct in_addr next_hop;
+ uint8_t *p = buf;
+ int proto;
+
+ proto = IPPROTO_IPIP;
+ if (tuple->family == AF_INET6)
+ proto = IPPROTO_IPV6;
+
+ encap_init(&encap, test->hops == ONE_HOP ? 1 : 0, proto);
+ p = mempcpy(p, &encap, sizeof(encap));
+
+ if (test->hops == ONE_HOP) {
+ next_hop = (struct in_addr){ .s_addr = htonl(0x7f000002) };
+ p = mempcpy(p, &next_hop, sizeof(next_hop));
+ }
+
+ proto = IPPROTO_TCP;
+ if (test->type == UDP)
+ proto = IPPROTO_UDP;
+
+ switch (tuple->family) {
+ case AF_INET:
+ ip = (struct iphdr){
+ .ihl = 5,
+ .version = 4,
+ .ttl = IPDEFTTL,
+ .protocol = proto,
+ .saddr = tuple->src.in_addr.s_addr,
+ .daddr = tuple->dst.in_addr.s_addr,
+ };
+ p = mempcpy(p, &ip, sizeof(ip));
+ break;
+ case AF_INET6:
+ ipv6 = (struct ipv6hdr){
+ .version = 6,
+ .hop_limit = IPDEFTTL,
+ .nexthdr = proto,
+ .saddr = tuple->src.in6_addr,
+ .daddr = tuple->dst.in6_addr,
+ };
+ p = mempcpy(p, &ipv6, sizeof(ipv6));
+ break;
+ default:
+ return 0;
+ }
+
+ if (test->conn == UNKNOWN_CONN)
+ sport--;
+
+ switch (test->type) {
+ case TCP:
+ tcp = (struct tcphdr){
+ .source = sport,
+ .dest = tuple->dst.port,
+ };
+ if (test->flags == SYN)
+ tcp.syn = true;
+ if (test->flags == ACK)
+ tcp.ack = true;
+ p = mempcpy(p, &tcp, sizeof(tcp));
+ break;
+ case UDP:
+ udp = (struct udphdr){
+ .source = sport,
+ .dest = tuple->dst.port,
+ };
+ p = mempcpy(p, &udp, sizeof(udp));
+ break;
+ default:
+ return 0;
+ }
+
+ return (void *)p - buf;
+}
+
+static void close_fds(int *fds, int n)
+{
+ int i;
+
+ for (i = 0; i < n; i++)
+ if (fds[i] > 0)
+ close(fds[i]);
+}
+
+void test_cls_redirect(void)
+{
+ struct test_cls_redirect *skel = NULL;
+ struct bpf_prog_test_run_attr tattr = {};
+ int families[] = { AF_INET, AF_INET6 };
+ struct sockaddr_storage ss;
+ struct sockaddr *addr;
+ socklen_t slen;
+ int i, j, err;
+
+ int servers[__NR_KIND][ARRAY_SIZE(families)] = {};
+ int conns[__NR_KIND][ARRAY_SIZE(families)] = {};
+ struct tuple tuples[__NR_KIND][ARRAY_SIZE(families)];
+
+ skel = test_cls_redirect__open();
+ if (CHECK_FAIL(!skel))
+ return;
+
+ skel->rodata->ENCAPSULATION_IP = htonl(ENCAP_IP);
+ skel->rodata->ENCAPSULATION_PORT = htons(ENCAP_PORT);
+
+ if (CHECK_FAIL(test_cls_redirect__load(skel)))
+ goto cleanup;
+
+ addr = (struct sockaddr *)&ss;
+ for (i = 0; i < ARRAY_SIZE(families); i++) {
+ slen = prepare_addr(&ss, families[i]);
+ if (CHECK_FAIL(!slen))
+ goto cleanup;
+
+ if (CHECK_FAIL(!set_up_conn(addr, slen, SOCK_DGRAM,
+ &servers[UDP][i], &conns[UDP][i],
+ &tuples[UDP][i])))
+ goto cleanup;
+
+ if (CHECK_FAIL(!set_up_conn(addr, slen, SOCK_STREAM,
+ &servers[TCP][i], &conns[TCP][i],
+ &tuples[TCP][i])))
+ goto cleanup;
+ }
+
+ tattr.prog_fd = bpf_program__fd(skel->progs.cls_redirect);
+ for (i = 0; i < ARRAY_SIZE(tests); i++) {
+ struct test_cfg *test = &tests[i];
+
+ for (j = 0; j < ARRAY_SIZE(families); j++) {
+ struct tuple *tuple = &tuples[test->type][j];
+ char input[256];
+ char tmp[256];
+
+ test_str(tmp, sizeof(tmp), test, tuple->family);
+ if (!test__start_subtest(tmp))
+ continue;
+
+ tattr.data_out = tmp;
+ tattr.data_size_out = sizeof(tmp);
+
+ tattr.data_in = input;
+ tattr.data_size_in = build_input(test, input, tuple);
+ if (CHECK_FAIL(!tattr.data_size_in))
+ continue;
+
+ err = bpf_prog_test_run_xattr(&tattr);
+ if (CHECK_FAIL(err))
+ continue;
+
+ if (tattr.retval != TC_ACT_REDIRECT) {
+ PRINT_FAIL("expected TC_ACT_REDIRECT, got %d\n",
+ tattr.retval);
+ continue;
+ }
+
+ switch (test->result) {
+ case ACCEPT:
+ if (CHECK_FAIL(!was_decapsulated(&tattr)))
+ continue;
+ break;
+ case FORWARD:
+ if (CHECK_FAIL(was_decapsulated(&tattr)))
+ continue;
+ break;
+ default:
+ PRINT_FAIL("unknown result %d\n", test->result);
+ continue;
+ }
+ }
+ }
+
+cleanup:
+ test_cls_redirect__destroy(skel);
+ close_fds((int *)servers, sizeof(servers) / sizeof(servers[0][0]));
+ close_fds((int *)conns, sizeof(conns) / sizeof(conns[0][0]));
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/connect_force_port.c b/tools/testing/selftests/bpf/prog_tests/connect_force_port.c
new file mode 100644
index 000000000000..17bbf76812ca
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/connect_force_port.c
@@ -0,0 +1,166 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <test_progs.h>
+#include "cgroup_helpers.h"
+#include "network_helpers.h"
+
+static int verify_ports(int family, int fd,
+ __u16 expected_local, __u16 expected_peer)
+{
+ struct sockaddr_storage addr;
+ socklen_t len = sizeof(addr);
+ __u16 port;
+
+ if (getsockname(fd, (struct sockaddr *)&addr, &len)) {
+ log_err("Failed to get server addr");
+ return -1;
+ }
+
+ if (family == AF_INET)
+ port = ((struct sockaddr_in *)&addr)->sin_port;
+ else
+ port = ((struct sockaddr_in6 *)&addr)->sin6_port;
+
+ if (ntohs(port) != expected_local) {
+ log_err("Unexpected local port %d, expected %d", ntohs(port),
+ expected_local);
+ return -1;
+ }
+
+ if (getpeername(fd, (struct sockaddr *)&addr, &len)) {
+ log_err("Failed to get peer addr");
+ return -1;
+ }
+
+ if (family == AF_INET)
+ port = ((struct sockaddr_in *)&addr)->sin_port;
+ else
+ port = ((struct sockaddr_in6 *)&addr)->sin6_port;
+
+ if (ntohs(port) != expected_peer) {
+ log_err("Unexpected peer port %d, expected %d", ntohs(port),
+ expected_peer);
+ return -1;
+ }
+
+ return 0;
+}
+
+static int run_test(int cgroup_fd, int server_fd, int family, int type)
+{
+ bool v4 = family == AF_INET;
+ __u16 expected_local_port = v4 ? 22222 : 22223;
+ __u16 expected_peer_port = 60000;
+ struct bpf_prog_load_attr attr = {
+ .file = v4 ? "./connect_force_port4.o" :
+ "./connect_force_port6.o",
+ };
+ struct bpf_program *prog;
+ struct bpf_object *obj;
+ int xlate_fd, fd, err;
+ __u32 duration = 0;
+
+ err = bpf_prog_load_xattr(&attr, &obj, &xlate_fd);
+ if (err) {
+ log_err("Failed to load BPF object");
+ return -1;
+ }
+
+ prog = bpf_object__find_program_by_title(obj, v4 ?
+ "cgroup/connect4" :
+ "cgroup/connect6");
+ if (CHECK(!prog, "find_prog", "connect prog not found\n")) {
+ err = -EIO;
+ goto close_bpf_object;
+ }
+
+ err = bpf_prog_attach(bpf_program__fd(prog), cgroup_fd, v4 ?
+ BPF_CGROUP_INET4_CONNECT :
+ BPF_CGROUP_INET6_CONNECT, 0);
+ if (err) {
+ log_err("Failed to attach BPF program");
+ goto close_bpf_object;
+ }
+
+ prog = bpf_object__find_program_by_title(obj, v4 ?
+ "cgroup/getpeername4" :
+ "cgroup/getpeername6");
+ if (CHECK(!prog, "find_prog", "getpeername prog not found\n")) {
+ err = -EIO;
+ goto close_bpf_object;
+ }
+
+ err = bpf_prog_attach(bpf_program__fd(prog), cgroup_fd, v4 ?
+ BPF_CGROUP_INET4_GETPEERNAME :
+ BPF_CGROUP_INET6_GETPEERNAME, 0);
+ if (err) {
+ log_err("Failed to attach BPF program");
+ goto close_bpf_object;
+ }
+
+ prog = bpf_object__find_program_by_title(obj, v4 ?
+ "cgroup/getsockname4" :
+ "cgroup/getsockname6");
+ if (CHECK(!prog, "find_prog", "getsockname prog not found\n")) {
+ err = -EIO;
+ goto close_bpf_object;
+ }
+
+ err = bpf_prog_attach(bpf_program__fd(prog), cgroup_fd, v4 ?
+ BPF_CGROUP_INET4_GETSOCKNAME :
+ BPF_CGROUP_INET6_GETSOCKNAME, 0);
+ if (err) {
+ log_err("Failed to attach BPF program");
+ goto close_bpf_object;
+ }
+
+ fd = connect_to_fd(family, type, server_fd);
+ if (fd < 0) {
+ err = -1;
+ goto close_bpf_object;
+ }
+
+ err = verify_ports(family, fd, expected_local_port,
+ expected_peer_port);
+ close(fd);
+
+close_bpf_object:
+ bpf_object__close(obj);
+ return err;
+}
+
+void test_connect_force_port(void)
+{
+ int server_fd, cgroup_fd;
+
+ cgroup_fd = test__join_cgroup("/connect_force_port");
+ if (CHECK_FAIL(cgroup_fd < 0))
+ return;
+
+ server_fd = start_server_with_port(AF_INET, SOCK_STREAM, 60123);
+ if (CHECK_FAIL(server_fd < 0))
+ goto close_cgroup_fd;
+ CHECK_FAIL(run_test(cgroup_fd, server_fd, AF_INET, SOCK_STREAM));
+ close(server_fd);
+
+ server_fd = start_server_with_port(AF_INET6, SOCK_STREAM, 60124);
+ if (CHECK_FAIL(server_fd < 0))
+ goto close_cgroup_fd;
+ CHECK_FAIL(run_test(cgroup_fd, server_fd, AF_INET6, SOCK_STREAM));
+ close(server_fd);
+
+ server_fd = start_server_with_port(AF_INET, SOCK_DGRAM, 60123);
+ if (CHECK_FAIL(server_fd < 0))
+ goto close_cgroup_fd;
+ CHECK_FAIL(run_test(cgroup_fd, server_fd, AF_INET, SOCK_DGRAM));
+ close(server_fd);
+
+ server_fd = start_server_with_port(AF_INET6, SOCK_DGRAM, 60124);
+ if (CHECK_FAIL(server_fd < 0))
+ goto close_cgroup_fd;
+ CHECK_FAIL(run_test(cgroup_fd, server_fd, AF_INET6, SOCK_DGRAM));
+ close(server_fd);
+
+close_cgroup_fd:
+ close(cgroup_fd);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/core_reloc.c b/tools/testing/selftests/bpf/prog_tests/core_reloc.c
index 31e177adbdf1..084ed26a7d78 100644
--- a/tools/testing/selftests/bpf/prog_tests/core_reloc.c
+++ b/tools/testing/selftests/bpf/prog_tests/core_reloc.c
@@ -392,7 +392,7 @@ static struct core_reloc_test_case test_cases[] = {
.input = STRUCT_TO_CHAR_PTR(core_reloc_existence___minimal) {
.a = 42,
},
- .input_len = sizeof(struct core_reloc_existence),
+ .input_len = sizeof(struct core_reloc_existence___minimal),
.output = STRUCT_TO_CHAR_PTR(core_reloc_existence_output) {
.a_exists = 1,
.b_exists = 0,
diff --git a/tools/testing/selftests/bpf/prog_tests/enable_stats.c b/tools/testing/selftests/bpf/prog_tests/enable_stats.c
new file mode 100644
index 000000000000..2cb2085917e7
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/enable_stats.c
@@ -0,0 +1,45 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <test_progs.h>
+#include "test_enable_stats.skel.h"
+
+void test_enable_stats(void)
+{
+ struct test_enable_stats *skel;
+ int stats_fd, err, prog_fd;
+ struct bpf_prog_info info;
+ __u32 info_len = sizeof(info);
+ int duration = 0;
+
+ skel = test_enable_stats__open_and_load();
+ if (CHECK(!skel, "skel_open_and_load", "skeleton open/load failed\n"))
+ return;
+
+ stats_fd = bpf_enable_stats(BPF_STATS_RUN_TIME);
+ if (CHECK(stats_fd < 0, "get_stats_fd", "failed %d\n", errno)) {
+ test_enable_stats__destroy(skel);
+ return;
+ }
+
+ err = test_enable_stats__attach(skel);
+ if (CHECK(err, "attach_raw_tp", "err %d\n", err))
+ goto cleanup;
+
+ test_enable_stats__detach(skel);
+
+ prog_fd = bpf_program__fd(skel->progs.test_enable_stats);
+ memset(&info, 0, info_len);
+ err = bpf_obj_get_info_by_fd(prog_fd, &info, &info_len);
+ if (CHECK(err, "get_prog_info",
+ "failed to get bpf_prog_info for fd %d\n", prog_fd))
+ goto cleanup;
+ if (CHECK(info.run_time_ns == 0, "check_stats_enabled",
+ "failed to enable run_time_ns stats\n"))
+ goto cleanup;
+
+ CHECK(info.run_cnt != skel->bss->count, "check_run_cnt_valid",
+ "invalid run_cnt stats\n");
+
+cleanup:
+ test_enable_stats__destroy(skel);
+ close(stats_fd);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/fentry_fexit.c b/tools/testing/selftests/bpf/prog_tests/fentry_fexit.c
index 235ac4f67f5b..83493bd5745c 100644
--- a/tools/testing/selftests/bpf/prog_tests/fentry_fexit.c
+++ b/tools/testing/selftests/bpf/prog_tests/fentry_fexit.c
@@ -1,22 +1,17 @@
// SPDX-License-Identifier: GPL-2.0
/* Copyright (c) 2019 Facebook */
#include <test_progs.h>
-#include "test_pkt_access.skel.h"
#include "fentry_test.skel.h"
#include "fexit_test.skel.h"
void test_fentry_fexit(void)
{
- struct test_pkt_access *pkt_skel = NULL;
struct fentry_test *fentry_skel = NULL;
struct fexit_test *fexit_skel = NULL;
__u64 *fentry_res, *fexit_res;
__u32 duration = 0, retval;
- int err, pkt_fd, i;
+ int err, prog_fd, i;
- pkt_skel = test_pkt_access__open_and_load();
- if (CHECK(!pkt_skel, "pkt_skel_load", "pkt_access skeleton failed\n"))
- return;
fentry_skel = fentry_test__open_and_load();
if (CHECK(!fentry_skel, "fentry_skel_load", "fentry skeleton failed\n"))
goto close_prog;
@@ -31,8 +26,8 @@ void test_fentry_fexit(void)
if (CHECK(err, "fexit_attach", "fexit attach failed: %d\n", err))
goto close_prog;
- pkt_fd = bpf_program__fd(pkt_skel->progs.test_pkt_access);
- err = bpf_prog_test_run(pkt_fd, 1, &pkt_v6, sizeof(pkt_v6),
+ prog_fd = bpf_program__fd(fexit_skel->progs.test1);
+ err = bpf_prog_test_run(prog_fd, 1, NULL, 0,
NULL, NULL, &retval, &duration);
CHECK(err || retval, "ipv6",
"err %d errno %d retval %d duration %d\n",
@@ -49,7 +44,6 @@ void test_fentry_fexit(void)
}
close_prog:
- test_pkt_access__destroy(pkt_skel);
fentry_test__destroy(fentry_skel);
fexit_test__destroy(fexit_skel);
}
diff --git a/tools/testing/selftests/bpf/prog_tests/fentry_test.c b/tools/testing/selftests/bpf/prog_tests/fentry_test.c
index 5cc06021f27d..04ebbf1cb390 100644
--- a/tools/testing/selftests/bpf/prog_tests/fentry_test.c
+++ b/tools/testing/selftests/bpf/prog_tests/fentry_test.c
@@ -1,20 +1,15 @@
// SPDX-License-Identifier: GPL-2.0
/* Copyright (c) 2019 Facebook */
#include <test_progs.h>
-#include "test_pkt_access.skel.h"
#include "fentry_test.skel.h"
void test_fentry_test(void)
{
- struct test_pkt_access *pkt_skel = NULL;
struct fentry_test *fentry_skel = NULL;
- int err, pkt_fd, i;
+ int err, prog_fd, i;
__u32 duration = 0, retval;
__u64 *result;
- pkt_skel = test_pkt_access__open_and_load();
- if (CHECK(!pkt_skel, "pkt_skel_load", "pkt_access skeleton failed\n"))
- return;
fentry_skel = fentry_test__open_and_load();
if (CHECK(!fentry_skel, "fentry_skel_load", "fentry skeleton failed\n"))
goto cleanup;
@@ -23,10 +18,10 @@ void test_fentry_test(void)
if (CHECK(err, "fentry_attach", "fentry attach failed: %d\n", err))
goto cleanup;
- pkt_fd = bpf_program__fd(pkt_skel->progs.test_pkt_access);
- err = bpf_prog_test_run(pkt_fd, 1, &pkt_v6, sizeof(pkt_v6),
+ prog_fd = bpf_program__fd(fentry_skel->progs.test1);
+ err = bpf_prog_test_run(prog_fd, 1, NULL, 0,
NULL, NULL, &retval, &duration);
- CHECK(err || retval, "ipv6",
+ CHECK(err || retval, "test_run",
"err %d errno %d retval %d duration %d\n",
err, errno, retval, duration);
@@ -39,5 +34,4 @@ void test_fentry_test(void)
cleanup:
fentry_test__destroy(fentry_skel);
- test_pkt_access__destroy(pkt_skel);
}
diff --git a/tools/testing/selftests/bpf/prog_tests/fexit_bpf2bpf.c b/tools/testing/selftests/bpf/prog_tests/fexit_bpf2bpf.c
index cde463af7071..a895bfed55db 100644
--- a/tools/testing/selftests/bpf/prog_tests/fexit_bpf2bpf.c
+++ b/tools/testing/selftests/bpf/prog_tests/fexit_bpf2bpf.c
@@ -1,11 +1,13 @@
// SPDX-License-Identifier: GPL-2.0
/* Copyright (c) 2019 Facebook */
#include <test_progs.h>
+#include <network_helpers.h>
static void test_fexit_bpf2bpf_common(const char *obj_file,
const char *target_obj_file,
int prog_cnt,
- const char **prog_name)
+ const char **prog_name,
+ bool run_prog)
{
struct bpf_object *obj = NULL, *pkt_obj;
int err, pkt_fd, i;
@@ -18,7 +20,8 @@ static void test_fexit_bpf2bpf_common(const char *obj_file,
err = bpf_prog_load(target_obj_file, BPF_PROG_TYPE_UNSPEC,
&pkt_obj, &pkt_fd);
- if (CHECK(err, "prog_load sched cls", "err %d errno %d\n", err, errno))
+ if (CHECK(err, "tgt_prog_load", "file %s err %d errno %d\n",
+ target_obj_file, err, errno))
return;
DECLARE_LIBBPF_OPTS(bpf_object_open_opts, opts,
.attach_prog_fd = pkt_fd,
@@ -33,7 +36,7 @@ static void test_fexit_bpf2bpf_common(const char *obj_file,
obj = bpf_object__open_file(obj_file, &opts);
if (CHECK(IS_ERR_OR_NULL(obj), "obj_open",
- "failed to open fexit_bpf2bpf: %ld\n",
+ "failed to open %s: %ld\n", obj_file,
PTR_ERR(obj)))
goto close_prog;
@@ -49,6 +52,10 @@ static void test_fexit_bpf2bpf_common(const char *obj_file,
if (CHECK(IS_ERR(link[i]), "attach_trace", "failed to link\n"))
goto close_prog;
}
+
+ if (!run_prog)
+ goto close_prog;
+
data_map = bpf_object__find_map_by_name(obj, "fexit_bp.bss");
if (CHECK(!data_map, "find_data_map", "data map not found\n"))
goto close_prog;
@@ -89,7 +96,7 @@ static void test_target_no_callees(void)
test_fexit_bpf2bpf_common("./fexit_bpf2bpf_simple.o",
"./test_pkt_md_access.o",
ARRAY_SIZE(prog_name),
- prog_name);
+ prog_name, true);
}
static void test_target_yes_callees(void)
@@ -103,7 +110,7 @@ static void test_target_yes_callees(void)
test_fexit_bpf2bpf_common("./fexit_bpf2bpf.o",
"./test_pkt_access.o",
ARRAY_SIZE(prog_name),
- prog_name);
+ prog_name, true);
}
static void test_func_replace(void)
@@ -120,7 +127,18 @@ static void test_func_replace(void)
test_fexit_bpf2bpf_common("./fexit_bpf2bpf.o",
"./test_pkt_access.o",
ARRAY_SIZE(prog_name),
- prog_name);
+ prog_name, true);
+}
+
+static void test_func_replace_verify(void)
+{
+ const char *prog_name[] = {
+ "freplace/do_bind",
+ };
+ test_fexit_bpf2bpf_common("./freplace_connect4.o",
+ "./connect4_prog.o",
+ ARRAY_SIZE(prog_name),
+ prog_name, false);
}
void test_fexit_bpf2bpf(void)
@@ -128,4 +146,5 @@ void test_fexit_bpf2bpf(void)
test_target_no_callees();
test_target_yes_callees();
test_func_replace();
+ test_func_replace_verify();
}
diff --git a/tools/testing/selftests/bpf/prog_tests/fexit_test.c b/tools/testing/selftests/bpf/prog_tests/fexit_test.c
index d2c3655dd7a3..78d7a2765c27 100644
--- a/tools/testing/selftests/bpf/prog_tests/fexit_test.c
+++ b/tools/testing/selftests/bpf/prog_tests/fexit_test.c
@@ -1,64 +1,37 @@
// SPDX-License-Identifier: GPL-2.0
/* Copyright (c) 2019 Facebook */
#include <test_progs.h>
+#include "fexit_test.skel.h"
void test_fexit_test(void)
{
- struct bpf_prog_load_attr attr = {
- .file = "./fexit_test.o",
- };
-
- char prog_name[] = "fexit/bpf_fentry_testX";
- struct bpf_object *obj = NULL, *pkt_obj;
- int err, pkt_fd, kfree_skb_fd, i;
- struct bpf_link *link[6] = {};
- struct bpf_program *prog[6];
+ struct fexit_test *fexit_skel = NULL;
+ int err, prog_fd, i;
__u32 duration = 0, retval;
- struct bpf_map *data_map;
- const int zero = 0;
- u64 result[6];
+ __u64 *result;
- err = bpf_prog_load("./test_pkt_access.o", BPF_PROG_TYPE_SCHED_CLS,
- &pkt_obj, &pkt_fd);
- if (CHECK(err, "prog_load sched cls", "err %d errno %d\n", err, errno))
- return;
- err = bpf_prog_load_xattr(&attr, &obj, &kfree_skb_fd);
- if (CHECK(err, "prog_load fail", "err %d errno %d\n", err, errno))
- goto close_prog;
+ fexit_skel = fexit_test__open_and_load();
+ if (CHECK(!fexit_skel, "fexit_skel_load", "fexit skeleton failed\n"))
+ goto cleanup;
- for (i = 0; i < 6; i++) {
- prog_name[sizeof(prog_name) - 2] = '1' + i;
- prog[i] = bpf_object__find_program_by_title(obj, prog_name);
- if (CHECK(!prog[i], "find_prog", "prog %s not found\n", prog_name))
- goto close_prog;
- link[i] = bpf_program__attach_trace(prog[i]);
- if (CHECK(IS_ERR(link[i]), "attach_trace", "failed to link\n"))
- goto close_prog;
- }
- data_map = bpf_object__find_map_by_name(obj, "fexit_te.bss");
- if (CHECK(!data_map, "find_data_map", "data map not found\n"))
- goto close_prog;
+ err = fexit_test__attach(fexit_skel);
+ if (CHECK(err, "fexit_attach", "fexit attach failed: %d\n", err))
+ goto cleanup;
- err = bpf_prog_test_run(pkt_fd, 1, &pkt_v6, sizeof(pkt_v6),
+ prog_fd = bpf_program__fd(fexit_skel->progs.test1);
+ err = bpf_prog_test_run(prog_fd, 1, NULL, 0,
NULL, NULL, &retval, &duration);
- CHECK(err || retval, "ipv6",
+ CHECK(err || retval, "test_run",
"err %d errno %d retval %d duration %d\n",
err, errno, retval, duration);
- err = bpf_map_lookup_elem(bpf_map__fd(data_map), &zero, &result);
- if (CHECK(err, "get_result",
- "failed to get output data: %d\n", err))
- goto close_prog;
-
- for (i = 0; i < 6; i++)
- if (CHECK(result[i] != 1, "result", "bpf_fentry_test%d failed err %ld\n",
- i + 1, result[i]))
- goto close_prog;
+ result = (__u64 *)fexit_skel->bss;
+ for (i = 0; i < 6; i++) {
+ if (CHECK(result[i] != 1, "result",
+ "fexit_test%d failed err %lld\n", i + 1, result[i]))
+ goto cleanup;
+ }
-close_prog:
- for (i = 0; i < 6; i++)
- if (!IS_ERR_OR_NULL(link[i]))
- bpf_link__destroy(link[i]);
- bpf_object__close(obj);
- bpf_object__close(pkt_obj);
+cleanup:
+ fexit_test__destroy(fexit_skel);
}
diff --git a/tools/testing/selftests/bpf/prog_tests/flow_dissector.c b/tools/testing/selftests/bpf/prog_tests/flow_dissector.c
index 92563898867c..ea14e3ece812 100644
--- a/tools/testing/selftests/bpf/prog_tests/flow_dissector.c
+++ b/tools/testing/selftests/bpf/prog_tests/flow_dissector.c
@@ -1,10 +1,13 @@
// SPDX-License-Identifier: GPL-2.0
#include <test_progs.h>
+#include <network_helpers.h>
#include <error.h>
#include <linux/if.h>
#include <linux/if_tun.h>
#include <sys/uio.h>
+#include "bpf_flow.skel.h"
+
#ifndef IP_MF
#define IP_MF 0x2000
#endif
@@ -100,6 +103,7 @@ struct test {
#define VLAN_HLEN 4
+static __u32 duration;
struct test tests[] = {
{
.name = "ipv4",
@@ -443,17 +447,130 @@ static int ifup(const char *ifname)
return 0;
}
+static int init_prog_array(struct bpf_object *obj, struct bpf_map *prog_array)
+{
+ int i, err, map_fd, prog_fd;
+ struct bpf_program *prog;
+ char prog_name[32];
+
+ map_fd = bpf_map__fd(prog_array);
+ if (map_fd < 0)
+ return -1;
+
+ for (i = 0; i < bpf_map__def(prog_array)->max_entries; i++) {
+ snprintf(prog_name, sizeof(prog_name), "flow_dissector/%i", i);
+
+ prog = bpf_object__find_program_by_title(obj, prog_name);
+ if (!prog)
+ return -1;
+
+ prog_fd = bpf_program__fd(prog);
+ if (prog_fd < 0)
+ return -1;
+
+ err = bpf_map_update_elem(map_fd, &i, &prog_fd, BPF_ANY);
+ if (err)
+ return -1;
+ }
+ return 0;
+}
+
+static void run_tests_skb_less(int tap_fd, struct bpf_map *keys)
+{
+ int i, err, keys_fd;
+
+ keys_fd = bpf_map__fd(keys);
+ if (CHECK(keys_fd < 0, "bpf_map__fd", "err %d\n", keys_fd))
+ return;
+
+ for (i = 0; i < ARRAY_SIZE(tests); i++) {
+ /* Keep in sync with 'flags' from eth_get_headlen. */
+ __u32 eth_get_headlen_flags =
+ BPF_FLOW_DISSECTOR_F_PARSE_1ST_FRAG;
+ struct bpf_prog_test_run_attr tattr = {};
+ struct bpf_flow_keys flow_keys = {};
+ __u32 key = (__u32)(tests[i].keys.sport) << 16 |
+ tests[i].keys.dport;
+
+ /* For skb-less case we can't pass input flags; run
+ * only the tests that have a matching set of flags.
+ */
+
+ if (tests[i].flags != eth_get_headlen_flags)
+ continue;
+
+ err = tx_tap(tap_fd, &tests[i].pkt, sizeof(tests[i].pkt));
+ CHECK(err < 0, "tx_tap", "err %d errno %d\n", err, errno);
+
+ err = bpf_map_lookup_elem(keys_fd, &key, &flow_keys);
+ CHECK_ATTR(err, tests[i].name, "bpf_map_lookup_elem %d\n", err);
+
+ CHECK_ATTR(err, tests[i].name, "skb-less err %d\n", err);
+ CHECK_FLOW_KEYS(tests[i].name, flow_keys, tests[i].keys);
+
+ err = bpf_map_delete_elem(keys_fd, &key);
+ CHECK_ATTR(err, tests[i].name, "bpf_map_delete_elem %d\n", err);
+ }
+}
+
+static void test_skb_less_prog_attach(struct bpf_flow *skel, int tap_fd)
+{
+ int err, prog_fd;
+
+ prog_fd = bpf_program__fd(skel->progs._dissect);
+ if (CHECK(prog_fd < 0, "bpf_program__fd", "err %d\n", prog_fd))
+ return;
+
+ err = bpf_prog_attach(prog_fd, 0, BPF_FLOW_DISSECTOR, 0);
+ if (CHECK(err, "bpf_prog_attach", "err %d errno %d\n", err, errno))
+ return;
+
+ run_tests_skb_less(tap_fd, skel->maps.last_dissection);
+
+ err = bpf_prog_detach(prog_fd, BPF_FLOW_DISSECTOR);
+ CHECK(err, "bpf_prog_detach", "err %d errno %d\n", err, errno);
+}
+
+static void test_skb_less_link_create(struct bpf_flow *skel, int tap_fd)
+{
+ struct bpf_link *link;
+ int err, net_fd;
+
+ net_fd = open("/proc/self/ns/net", O_RDONLY);
+ if (CHECK(net_fd < 0, "open(/proc/self/ns/net)", "err %d\n", errno))
+ return;
+
+ link = bpf_program__attach_netns(skel->progs._dissect, net_fd);
+ if (CHECK(IS_ERR(link), "attach_netns", "err %ld\n", PTR_ERR(link)))
+ goto out_close;
+
+ run_tests_skb_less(tap_fd, skel->maps.last_dissection);
+
+ err = bpf_link__destroy(link);
+ CHECK(err, "bpf_link__destroy", "err %d\n", err);
+out_close:
+ close(net_fd);
+}
+
void test_flow_dissector(void)
{
int i, err, prog_fd, keys_fd = -1, tap_fd;
- struct bpf_object *obj;
- __u32 duration = 0;
+ struct bpf_flow *skel;
- err = bpf_flow_load(&obj, "./bpf_flow.o", "flow_dissector",
- "jmp_table", "last_dissection", &prog_fd, &keys_fd);
- if (CHECK_FAIL(err))
+ skel = bpf_flow__open_and_load();
+ if (CHECK(!skel, "skel", "failed to open/load skeleton\n"))
return;
+ prog_fd = bpf_program__fd(skel->progs._dissect);
+ if (CHECK(prog_fd < 0, "bpf_program__fd", "err %d\n", prog_fd))
+ goto out_destroy_skel;
+ keys_fd = bpf_map__fd(skel->maps.last_dissection);
+ if (CHECK(keys_fd < 0, "bpf_map__fd", "err %d\n", keys_fd))
+ goto out_destroy_skel;
+ err = init_prog_array(skel->obj, skel->maps.jmp_table);
+ if (CHECK(err, "init_prog_array", "err %d\n", err))
+ goto out_destroy_skel;
+
for (i = 0; i < ARRAY_SIZE(tests); i++) {
struct bpf_flow_keys flow_keys;
struct bpf_prog_test_run_attr tattr = {
@@ -486,43 +603,17 @@ void test_flow_dissector(void)
* via BPF map in this case.
*/
- err = bpf_prog_attach(prog_fd, 0, BPF_FLOW_DISSECTOR, 0);
- CHECK(err, "bpf_prog_attach", "err %d errno %d\n", err, errno);
-
tap_fd = create_tap("tap0");
CHECK(tap_fd < 0, "create_tap", "tap_fd %d errno %d\n", tap_fd, errno);
err = ifup("tap0");
CHECK(err, "ifup", "err %d errno %d\n", err, errno);
- for (i = 0; i < ARRAY_SIZE(tests); i++) {
- /* Keep in sync with 'flags' from eth_get_headlen. */
- __u32 eth_get_headlen_flags =
- BPF_FLOW_DISSECTOR_F_PARSE_1ST_FRAG;
- struct bpf_prog_test_run_attr tattr = {};
- struct bpf_flow_keys flow_keys = {};
- __u32 key = (__u32)(tests[i].keys.sport) << 16 |
- tests[i].keys.dport;
-
- /* For skb-less case we can't pass input flags; run
- * only the tests that have a matching set of flags.
- */
-
- if (tests[i].flags != eth_get_headlen_flags)
- continue;
-
- err = tx_tap(tap_fd, &tests[i].pkt, sizeof(tests[i].pkt));
- CHECK(err < 0, "tx_tap", "err %d errno %d\n", err, errno);
-
- err = bpf_map_lookup_elem(keys_fd, &key, &flow_keys);
- CHECK_ATTR(err, tests[i].name, "bpf_map_lookup_elem %d\n", err);
-
- CHECK_ATTR(err, tests[i].name, "skb-less err %d\n", err);
- CHECK_FLOW_KEYS(tests[i].name, flow_keys, tests[i].keys);
-
- err = bpf_map_delete_elem(keys_fd, &key);
- CHECK_ATTR(err, tests[i].name, "bpf_map_delete_elem %d\n", err);
- }
+ /* Test direct prog attachment */
+ test_skb_less_prog_attach(skel, tap_fd);
+ /* Test indirect prog attachment via link */
+ test_skb_less_link_create(skel, tap_fd);
- bpf_prog_detach(prog_fd, BPF_FLOW_DISSECTOR);
- bpf_object__close(obj);
+ close(tap_fd);
+out_destroy_skel:
+ bpf_flow__destroy(skel);
}
diff --git a/tools/testing/selftests/bpf/prog_tests/flow_dissector_load_bytes.c b/tools/testing/selftests/bpf/prog_tests/flow_dissector_load_bytes.c
index dc5ef155ec28..0e8a4d2f023d 100644
--- a/tools/testing/selftests/bpf/prog_tests/flow_dissector_load_bytes.c
+++ b/tools/testing/selftests/bpf/prog_tests/flow_dissector_load_bytes.c
@@ -1,5 +1,6 @@
// SPDX-License-Identifier: GPL-2.0
#include <test_progs.h>
+#include <network_helpers.h>
void test_flow_dissector_load_bytes(void)
{
diff --git a/tools/testing/selftests/bpf/prog_tests/flow_dissector_reattach.c b/tools/testing/selftests/bpf/prog_tests/flow_dissector_reattach.c
index 1f51ba66b98b..15cb554a66d8 100644
--- a/tools/testing/selftests/bpf/prog_tests/flow_dissector_reattach.c
+++ b/tools/testing/selftests/bpf/prog_tests/flow_dissector_reattach.c
@@ -11,6 +11,7 @@
#include <fcntl.h>
#include <sched.h>
#include <stdbool.h>
+#include <sys/stat.h>
#include <unistd.h>
#include <linux/bpf.h>
@@ -18,21 +19,30 @@
#include "test_progs.h"
-static bool is_attached(int netns)
+static int init_net = -1;
+
+static __u32 query_attached_prog_id(int netns)
{
- __u32 cnt;
+ __u32 prog_ids[1] = {};
+ __u32 prog_cnt = ARRAY_SIZE(prog_ids);
int err;
- err = bpf_prog_query(netns, BPF_FLOW_DISSECTOR, 0, NULL, NULL, &cnt);
+ err = bpf_prog_query(netns, BPF_FLOW_DISSECTOR, 0, NULL,
+ prog_ids, &prog_cnt);
if (CHECK_FAIL(err)) {
perror("bpf_prog_query");
- return true; /* fail-safe */
+ return 0;
}
- return cnt > 0;
+ return prog_cnt == 1 ? prog_ids[0] : 0;
+}
+
+static bool prog_is_attached(int netns)
+{
+ return query_attached_prog_id(netns) > 0;
}
-static int load_prog(void)
+static int load_prog(enum bpf_prog_type type)
{
struct bpf_insn prog[] = {
BPF_MOV64_IMM(BPF_REG_0, BPF_OK),
@@ -40,61 +50,566 @@ static int load_prog(void)
};
int fd;
- fd = bpf_load_program(BPF_PROG_TYPE_FLOW_DISSECTOR, prog,
- ARRAY_SIZE(prog), "GPL", 0, NULL, 0);
+ fd = bpf_load_program(type, prog, ARRAY_SIZE(prog), "GPL", 0, NULL, 0);
if (CHECK_FAIL(fd < 0))
perror("bpf_load_program");
return fd;
}
-static void do_flow_dissector_reattach(void)
+static __u32 query_prog_id(int prog)
{
- int prog_fd[2] = { -1, -1 };
+ struct bpf_prog_info info = {};
+ __u32 info_len = sizeof(info);
int err;
- prog_fd[0] = load_prog();
- if (prog_fd[0] < 0)
- return;
+ err = bpf_obj_get_info_by_fd(prog, &info, &info_len);
+ if (CHECK_FAIL(err || info_len != sizeof(info))) {
+ perror("bpf_obj_get_info_by_fd");
+ return 0;
+ }
- prog_fd[1] = load_prog();
- if (prog_fd[1] < 0)
- goto out_close;
+ return info.id;
+}
+
+static int unshare_net(int old_net)
+{
+ int err, new_net;
- err = bpf_prog_attach(prog_fd[0], 0, BPF_FLOW_DISSECTOR, 0);
+ err = unshare(CLONE_NEWNET);
if (CHECK_FAIL(err)) {
- perror("bpf_prog_attach-0");
- goto out_close;
+ perror("unshare(CLONE_NEWNET)");
+ return -1;
+ }
+ new_net = open("/proc/self/ns/net", O_RDONLY);
+ if (CHECK_FAIL(new_net < 0)) {
+ perror("open(/proc/self/ns/net)");
+ setns(old_net, CLONE_NEWNET);
+ return -1;
}
+ return new_net;
+}
+
+static void test_prog_attach_prog_attach(int netns, int prog1, int prog2)
+{
+ int err;
+
+ err = bpf_prog_attach(prog1, 0, BPF_FLOW_DISSECTOR, 0);
+ if (CHECK_FAIL(err)) {
+ perror("bpf_prog_attach(prog1)");
+ return;
+ }
+ CHECK_FAIL(query_attached_prog_id(netns) != query_prog_id(prog1));
/* Expect success when attaching a different program */
- err = bpf_prog_attach(prog_fd[1], 0, BPF_FLOW_DISSECTOR, 0);
+ err = bpf_prog_attach(prog2, 0, BPF_FLOW_DISSECTOR, 0);
if (CHECK_FAIL(err)) {
- perror("bpf_prog_attach-1");
+ perror("bpf_prog_attach(prog2) #1");
goto out_detach;
}
+ CHECK_FAIL(query_attached_prog_id(netns) != query_prog_id(prog2));
/* Expect failure when attaching the same program twice */
- err = bpf_prog_attach(prog_fd[1], 0, BPF_FLOW_DISSECTOR, 0);
+ err = bpf_prog_attach(prog2, 0, BPF_FLOW_DISSECTOR, 0);
if (CHECK_FAIL(!err || errno != EINVAL))
- perror("bpf_prog_attach-2");
+ perror("bpf_prog_attach(prog2) #2");
+ CHECK_FAIL(query_attached_prog_id(netns) != query_prog_id(prog2));
out_detach:
err = bpf_prog_detach(0, BPF_FLOW_DISSECTOR);
if (CHECK_FAIL(err))
perror("bpf_prog_detach");
+ CHECK_FAIL(prog_is_attached(netns));
+}
+
+static void test_link_create_link_create(int netns, int prog1, int prog2)
+{
+ DECLARE_LIBBPF_OPTS(bpf_link_create_opts, opts);
+ int link1, link2;
+
+ link1 = bpf_link_create(prog1, netns, BPF_FLOW_DISSECTOR, &opts);
+ if (CHECK_FAIL(link < 0)) {
+ perror("bpf_link_create(prog1)");
+ return;
+ }
+ CHECK_FAIL(query_attached_prog_id(netns) != query_prog_id(prog1));
+
+ /* Expect failure creating link when another link exists */
+ errno = 0;
+ link2 = bpf_link_create(prog2, netns, BPF_FLOW_DISSECTOR, &opts);
+ if (CHECK_FAIL(link2 != -1 || errno != E2BIG))
+ perror("bpf_prog_attach(prog2) expected E2BIG");
+ if (link2 != -1)
+ close(link2);
+ CHECK_FAIL(query_attached_prog_id(netns) != query_prog_id(prog1));
+
+ close(link1);
+ CHECK_FAIL(prog_is_attached(netns));
+}
+
+static void test_prog_attach_link_create(int netns, int prog1, int prog2)
+{
+ DECLARE_LIBBPF_OPTS(bpf_link_create_opts, opts);
+ int err, link;
+
+ err = bpf_prog_attach(prog1, -1, BPF_FLOW_DISSECTOR, 0);
+ if (CHECK_FAIL(err)) {
+ perror("bpf_prog_attach(prog1)");
+ return;
+ }
+ CHECK_FAIL(query_attached_prog_id(netns) != query_prog_id(prog1));
+
+ /* Expect failure creating link when prog attached */
+ errno = 0;
+ link = bpf_link_create(prog2, netns, BPF_FLOW_DISSECTOR, &opts);
+ if (CHECK_FAIL(link != -1 || errno != EEXIST))
+ perror("bpf_link_create(prog2) expected EEXIST");
+ if (link != -1)
+ close(link);
+ CHECK_FAIL(query_attached_prog_id(netns) != query_prog_id(prog1));
+
+ err = bpf_prog_detach(-1, BPF_FLOW_DISSECTOR);
+ if (CHECK_FAIL(err))
+ perror("bpf_prog_detach");
+ CHECK_FAIL(prog_is_attached(netns));
+}
+
+static void test_link_create_prog_attach(int netns, int prog1, int prog2)
+{
+ DECLARE_LIBBPF_OPTS(bpf_link_create_opts, opts);
+ int err, link;
+
+ link = bpf_link_create(prog1, netns, BPF_FLOW_DISSECTOR, &opts);
+ if (CHECK_FAIL(link < 0)) {
+ perror("bpf_link_create(prog1)");
+ return;
+ }
+ CHECK_FAIL(query_attached_prog_id(netns) != query_prog_id(prog1));
+
+ /* Expect failure attaching prog when link exists */
+ errno = 0;
+ err = bpf_prog_attach(prog2, -1, BPF_FLOW_DISSECTOR, 0);
+ if (CHECK_FAIL(!err || errno != EEXIST))
+ perror("bpf_prog_attach(prog2) expected EEXIST");
+ CHECK_FAIL(query_attached_prog_id(netns) != query_prog_id(prog1));
+
+ close(link);
+ CHECK_FAIL(prog_is_attached(netns));
+}
+
+static void test_link_create_prog_detach(int netns, int prog1, int prog2)
+{
+ DECLARE_LIBBPF_OPTS(bpf_link_create_opts, opts);
+ int err, link;
+
+ link = bpf_link_create(prog1, netns, BPF_FLOW_DISSECTOR, &opts);
+ if (CHECK_FAIL(link < 0)) {
+ perror("bpf_link_create(prog1)");
+ return;
+ }
+ CHECK_FAIL(query_attached_prog_id(netns) != query_prog_id(prog1));
+
+ /* Expect failure detaching prog when link exists */
+ errno = 0;
+ err = bpf_prog_detach(-1, BPF_FLOW_DISSECTOR);
+ if (CHECK_FAIL(!err || errno != EINVAL))
+ perror("bpf_prog_detach expected EINVAL");
+ CHECK_FAIL(query_attached_prog_id(netns) != query_prog_id(prog1));
+
+ close(link);
+ CHECK_FAIL(prog_is_attached(netns));
+}
+
+static void test_prog_attach_detach_query(int netns, int prog1, int prog2)
+{
+ int err;
+
+ err = bpf_prog_attach(prog1, 0, BPF_FLOW_DISSECTOR, 0);
+ if (CHECK_FAIL(err)) {
+ perror("bpf_prog_attach(prog1)");
+ return;
+ }
+ CHECK_FAIL(query_attached_prog_id(netns) != query_prog_id(prog1));
+
+ err = bpf_prog_detach(0, BPF_FLOW_DISSECTOR);
+ if (CHECK_FAIL(err)) {
+ perror("bpf_prog_detach");
+ return;
+ }
+
+ /* Expect no prog attached after successful detach */
+ CHECK_FAIL(prog_is_attached(netns));
+}
+
+static void test_link_create_close_query(int netns, int prog1, int prog2)
+{
+ DECLARE_LIBBPF_OPTS(bpf_link_create_opts, opts);
+ int link;
+
+ link = bpf_link_create(prog1, netns, BPF_FLOW_DISSECTOR, &opts);
+ if (CHECK_FAIL(link < 0)) {
+ perror("bpf_link_create(prog1)");
+ return;
+ }
+ CHECK_FAIL(query_attached_prog_id(netns) != query_prog_id(prog1));
+
+ close(link);
+ /* Expect no prog attached after closing last link FD */
+ CHECK_FAIL(prog_is_attached(netns));
+}
+
+static void test_link_update_no_old_prog(int netns, int prog1, int prog2)
+{
+ DECLARE_LIBBPF_OPTS(bpf_link_create_opts, create_opts);
+ DECLARE_LIBBPF_OPTS(bpf_link_update_opts, update_opts);
+ int err, link;
+
+ link = bpf_link_create(prog1, netns, BPF_FLOW_DISSECTOR, &create_opts);
+ if (CHECK_FAIL(link < 0)) {
+ perror("bpf_link_create(prog1)");
+ return;
+ }
+ CHECK_FAIL(query_attached_prog_id(netns) != query_prog_id(prog1));
+
+ /* Expect success replacing the prog when old prog not specified */
+ update_opts.flags = 0;
+ update_opts.old_prog_fd = 0;
+ err = bpf_link_update(link, prog2, &update_opts);
+ if (CHECK_FAIL(err))
+ perror("bpf_link_update");
+ CHECK_FAIL(query_attached_prog_id(netns) != query_prog_id(prog2));
+
+ close(link);
+ CHECK_FAIL(prog_is_attached(netns));
+}
+
+static void test_link_update_replace_old_prog(int netns, int prog1, int prog2)
+{
+ DECLARE_LIBBPF_OPTS(bpf_link_create_opts, create_opts);
+ DECLARE_LIBBPF_OPTS(bpf_link_update_opts, update_opts);
+ int err, link;
+ link = bpf_link_create(prog1, netns, BPF_FLOW_DISSECTOR, &create_opts);
+ if (CHECK_FAIL(link < 0)) {
+ perror("bpf_link_create(prog1)");
+ return;
+ }
+ CHECK_FAIL(query_attached_prog_id(netns) != query_prog_id(prog1));
+
+ /* Expect success F_REPLACE and old prog specified to succeed */
+ update_opts.flags = BPF_F_REPLACE;
+ update_opts.old_prog_fd = prog1;
+ err = bpf_link_update(link, prog2, &update_opts);
+ if (CHECK_FAIL(err))
+ perror("bpf_link_update");
+ CHECK_FAIL(query_attached_prog_id(netns) != query_prog_id(prog2));
+
+ close(link);
+ CHECK_FAIL(prog_is_attached(netns));
+}
+
+static void test_link_update_invalid_opts(int netns, int prog1, int prog2)
+{
+ DECLARE_LIBBPF_OPTS(bpf_link_create_opts, create_opts);
+ DECLARE_LIBBPF_OPTS(bpf_link_update_opts, update_opts);
+ int err, link;
+
+ link = bpf_link_create(prog1, netns, BPF_FLOW_DISSECTOR, &create_opts);
+ if (CHECK_FAIL(link < 0)) {
+ perror("bpf_link_create(prog1)");
+ return;
+ }
+ CHECK_FAIL(query_attached_prog_id(netns) != query_prog_id(prog1));
+
+ /* Expect update to fail w/ old prog FD but w/o F_REPLACE*/
+ errno = 0;
+ update_opts.flags = 0;
+ update_opts.old_prog_fd = prog1;
+ err = bpf_link_update(link, prog2, &update_opts);
+ if (CHECK_FAIL(!err || errno != EINVAL)) {
+ perror("bpf_link_update expected EINVAL");
+ goto out_close;
+ }
+ CHECK_FAIL(query_attached_prog_id(netns) != query_prog_id(prog1));
+
+ /* Expect update to fail on old prog FD mismatch */
+ errno = 0;
+ update_opts.flags = BPF_F_REPLACE;
+ update_opts.old_prog_fd = prog2;
+ err = bpf_link_update(link, prog2, &update_opts);
+ if (CHECK_FAIL(!err || errno != EPERM)) {
+ perror("bpf_link_update expected EPERM");
+ goto out_close;
+ }
+ CHECK_FAIL(query_attached_prog_id(netns) != query_prog_id(prog1));
+
+ /* Expect update to fail for invalid old prog FD */
+ errno = 0;
+ update_opts.flags = BPF_F_REPLACE;
+ update_opts.old_prog_fd = -1;
+ err = bpf_link_update(link, prog2, &update_opts);
+ if (CHECK_FAIL(!err || errno != EBADF)) {
+ perror("bpf_link_update expected EBADF");
+ goto out_close;
+ }
+ CHECK_FAIL(query_attached_prog_id(netns) != query_prog_id(prog1));
+
+ /* Expect update to fail with invalid flags */
+ errno = 0;
+ update_opts.flags = BPF_F_ALLOW_MULTI;
+ update_opts.old_prog_fd = 0;
+ err = bpf_link_update(link, prog2, &update_opts);
+ if (CHECK_FAIL(!err || errno != EINVAL))
+ perror("bpf_link_update expected EINVAL");
+ CHECK_FAIL(query_attached_prog_id(netns) != query_prog_id(prog1));
+
+out_close:
+ close(link);
+ CHECK_FAIL(prog_is_attached(netns));
+}
+
+static void test_link_update_invalid_prog(int netns, int prog1, int prog2)
+{
+ DECLARE_LIBBPF_OPTS(bpf_link_create_opts, create_opts);
+ DECLARE_LIBBPF_OPTS(bpf_link_update_opts, update_opts);
+ int err, link, prog3;
+
+ link = bpf_link_create(prog1, netns, BPF_FLOW_DISSECTOR, &create_opts);
+ if (CHECK_FAIL(link < 0)) {
+ perror("bpf_link_create(prog1)");
+ return;
+ }
+ CHECK_FAIL(query_attached_prog_id(netns) != query_prog_id(prog1));
+
+ /* Expect failure when new prog FD is not valid */
+ errno = 0;
+ update_opts.flags = 0;
+ update_opts.old_prog_fd = 0;
+ err = bpf_link_update(link, -1, &update_opts);
+ if (CHECK_FAIL(!err || errno != EBADF)) {
+ perror("bpf_link_update expected EINVAL");
+ goto out_close_link;
+ }
+ CHECK_FAIL(query_attached_prog_id(netns) != query_prog_id(prog1));
+
+ prog3 = load_prog(BPF_PROG_TYPE_SOCKET_FILTER);
+ if (prog3 < 0)
+ goto out_close_link;
+
+ /* Expect failure when new prog FD type doesn't match */
+ errno = 0;
+ update_opts.flags = 0;
+ update_opts.old_prog_fd = 0;
+ err = bpf_link_update(link, prog3, &update_opts);
+ if (CHECK_FAIL(!err || errno != EINVAL))
+ perror("bpf_link_update expected EINVAL");
+ CHECK_FAIL(query_attached_prog_id(netns) != query_prog_id(prog1));
+
+ close(prog3);
+out_close_link:
+ close(link);
+ CHECK_FAIL(prog_is_attached(netns));
+}
+
+static void test_link_update_netns_gone(int netns, int prog1, int prog2)
+{
+ DECLARE_LIBBPF_OPTS(bpf_link_create_opts, create_opts);
+ DECLARE_LIBBPF_OPTS(bpf_link_update_opts, update_opts);
+ int err, link, old_net;
+
+ old_net = netns;
+ netns = unshare_net(old_net);
+ if (netns < 0)
+ return;
+
+ link = bpf_link_create(prog1, netns, BPF_FLOW_DISSECTOR, &create_opts);
+ if (CHECK_FAIL(link < 0)) {
+ perror("bpf_link_create(prog1)");
+ return;
+ }
+ CHECK_FAIL(query_attached_prog_id(netns) != query_prog_id(prog1));
+
+ close(netns);
+ err = setns(old_net, CLONE_NEWNET);
+ if (CHECK_FAIL(err)) {
+ perror("setns(CLONE_NEWNET)");
+ close(link);
+ return;
+ }
+
+ /* Expect failure when netns destroyed */
+ errno = 0;
+ update_opts.flags = 0;
+ update_opts.old_prog_fd = 0;
+ err = bpf_link_update(link, prog2, &update_opts);
+ if (CHECK_FAIL(!err || errno != ENOLINK))
+ perror("bpf_link_update");
+
+ close(link);
+}
+
+static void test_link_get_info(int netns, int prog1, int prog2)
+{
+ DECLARE_LIBBPF_OPTS(bpf_link_create_opts, create_opts);
+ DECLARE_LIBBPF_OPTS(bpf_link_update_opts, update_opts);
+ struct bpf_link_info info = {};
+ struct stat netns_stat = {};
+ __u32 info_len, link_id;
+ int err, link, old_net;
+
+ old_net = netns;
+ netns = unshare_net(old_net);
+ if (netns < 0)
+ return;
+
+ err = fstat(netns, &netns_stat);
+ if (CHECK_FAIL(err)) {
+ perror("stat(netns)");
+ goto out_resetns;
+ }
+
+ link = bpf_link_create(prog1, netns, BPF_FLOW_DISSECTOR, &create_opts);
+ if (CHECK_FAIL(link < 0)) {
+ perror("bpf_link_create(prog1)");
+ goto out_resetns;
+ }
+
+ info_len = sizeof(info);
+ err = bpf_obj_get_info_by_fd(link, &info, &info_len);
+ if (CHECK_FAIL(err)) {
+ perror("bpf_obj_get_info");
+ goto out_unlink;
+ }
+ CHECK_FAIL(info_len != sizeof(info));
+
+ /* Expect link info to be sane and match prog and netns details */
+ CHECK_FAIL(info.type != BPF_LINK_TYPE_NETNS);
+ CHECK_FAIL(info.id == 0);
+ CHECK_FAIL(info.prog_id != query_prog_id(prog1));
+ CHECK_FAIL(info.netns.netns_ino != netns_stat.st_ino);
+ CHECK_FAIL(info.netns.attach_type != BPF_FLOW_DISSECTOR);
+
+ update_opts.flags = 0;
+ update_opts.old_prog_fd = 0;
+ err = bpf_link_update(link, prog2, &update_opts);
+ if (CHECK_FAIL(err)) {
+ perror("bpf_link_update(prog2)");
+ goto out_unlink;
+ }
+
+ link_id = info.id;
+ info_len = sizeof(info);
+ err = bpf_obj_get_info_by_fd(link, &info, &info_len);
+ if (CHECK_FAIL(err)) {
+ perror("bpf_obj_get_info");
+ goto out_unlink;
+ }
+ CHECK_FAIL(info_len != sizeof(info));
+
+ /* Expect no info change after update except in prog id */
+ CHECK_FAIL(info.type != BPF_LINK_TYPE_NETNS);
+ CHECK_FAIL(info.id != link_id);
+ CHECK_FAIL(info.prog_id != query_prog_id(prog2));
+ CHECK_FAIL(info.netns.netns_ino != netns_stat.st_ino);
+ CHECK_FAIL(info.netns.attach_type != BPF_FLOW_DISSECTOR);
+
+ /* Leave netns link is attached to and close last FD to it */
+ err = setns(old_net, CLONE_NEWNET);
+ if (CHECK_FAIL(err)) {
+ perror("setns(NEWNET)");
+ goto out_unlink;
+ }
+ close(netns);
+ old_net = -1;
+ netns = -1;
+
+ info_len = sizeof(info);
+ err = bpf_obj_get_info_by_fd(link, &info, &info_len);
+ if (CHECK_FAIL(err)) {
+ perror("bpf_obj_get_info");
+ goto out_unlink;
+ }
+ CHECK_FAIL(info_len != sizeof(info));
+
+ /* Expect netns_ino to change to 0 */
+ CHECK_FAIL(info.type != BPF_LINK_TYPE_NETNS);
+ CHECK_FAIL(info.id != link_id);
+ CHECK_FAIL(info.prog_id != query_prog_id(prog2));
+ CHECK_FAIL(info.netns.netns_ino != 0);
+ CHECK_FAIL(info.netns.attach_type != BPF_FLOW_DISSECTOR);
+
+out_unlink:
+ close(link);
+out_resetns:
+ if (old_net != -1)
+ setns(old_net, CLONE_NEWNET);
+ if (netns != -1)
+ close(netns);
+}
+
+static void run_tests(int netns)
+{
+ struct test {
+ const char *test_name;
+ void (*test_func)(int netns, int prog1, int prog2);
+ } tests[] = {
+ { "prog attach, prog attach",
+ test_prog_attach_prog_attach },
+ { "link create, link create",
+ test_link_create_link_create },
+ { "prog attach, link create",
+ test_prog_attach_link_create },
+ { "link create, prog attach",
+ test_link_create_prog_attach },
+ { "link create, prog detach",
+ test_link_create_prog_detach },
+ { "prog attach, detach, query",
+ test_prog_attach_detach_query },
+ { "link create, close, query",
+ test_link_create_close_query },
+ { "link update no old prog",
+ test_link_update_no_old_prog },
+ { "link update with replace old prog",
+ test_link_update_replace_old_prog },
+ { "link update invalid opts",
+ test_link_update_invalid_opts },
+ { "link update invalid prog",
+ test_link_update_invalid_prog },
+ { "link update netns gone",
+ test_link_update_netns_gone },
+ { "link get info",
+ test_link_get_info },
+ };
+ int i, progs[2] = { -1, -1 };
+ char test_name[80];
+
+ for (i = 0; i < ARRAY_SIZE(progs); i++) {
+ progs[i] = load_prog(BPF_PROG_TYPE_FLOW_DISSECTOR);
+ if (progs[i] < 0)
+ goto out_close;
+ }
+
+ for (i = 0; i < ARRAY_SIZE(tests); i++) {
+ snprintf(test_name, sizeof(test_name),
+ "flow dissector %s%s",
+ tests[i].test_name,
+ netns == init_net ? " (init_net)" : "");
+ if (test__start_subtest(test_name))
+ tests[i].test_func(netns, progs[0], progs[1]);
+ }
out_close:
- close(prog_fd[1]);
- close(prog_fd[0]);
+ for (i = 0; i < ARRAY_SIZE(progs); i++) {
+ if (progs[i] != -1)
+ CHECK_FAIL(close(progs[i]));
+ }
}
void test_flow_dissector_reattach(void)
{
- int init_net, self_net, err;
+ int err, new_net, saved_net;
- self_net = open("/proc/self/ns/net", O_RDONLY);
- if (CHECK_FAIL(self_net < 0)) {
+ saved_net = open("/proc/self/ns/net", O_RDONLY);
+ if (CHECK_FAIL(saved_net < 0)) {
perror("open(/proc/self/ns/net");
return;
}
@@ -111,30 +626,29 @@ void test_flow_dissector_reattach(void)
goto out_close;
}
- if (is_attached(init_net)) {
+ if (prog_is_attached(init_net)) {
test__skip();
printf("Can't test with flow dissector attached to init_net\n");
goto out_setns;
}
/* First run tests in root network namespace */
- do_flow_dissector_reattach();
+ run_tests(init_net);
/* Then repeat tests in a non-root namespace */
- err = unshare(CLONE_NEWNET);
- if (CHECK_FAIL(err)) {
- perror("unshare(CLONE_NEWNET)");
+ new_net = unshare_net(init_net);
+ if (new_net < 0)
goto out_setns;
- }
- do_flow_dissector_reattach();
+ run_tests(new_net);
+ close(new_net);
out_setns:
/* Move back to netns we started in. */
- err = setns(self_net, CLONE_NEWNET);
+ err = setns(saved_net, CLONE_NEWNET);
if (CHECK_FAIL(err))
perror("setns(/proc/self/ns/net)");
out_close:
close(init_net);
- close(self_net);
+ close(saved_net);
}
diff --git a/tools/testing/selftests/bpf/prog_tests/get_stack_raw_tp.c b/tools/testing/selftests/bpf/prog_tests/get_stack_raw_tp.c
index eba9a970703b..925722217edf 100644
--- a/tools/testing/selftests/bpf/prog_tests/get_stack_raw_tp.c
+++ b/tools/testing/selftests/bpf/prog_tests/get_stack_raw_tp.c
@@ -82,6 +82,7 @@ static void get_stack_print_output(void *ctx, int cpu, void *data, __u32 size)
void test_get_stack_raw_tp(void)
{
const char *file = "./test_get_stack_rawtp.o";
+ const char *file_err = "./test_get_stack_rawtp_err.o";
const char *prog_name = "raw_tracepoint/sys_enter";
int i, err, prog_fd, exp_cnt = MAX_CNT_RAWTP;
struct perf_buffer_opts pb_opts = {};
@@ -93,6 +94,10 @@ void test_get_stack_raw_tp(void)
struct bpf_map *map;
cpu_set_t cpu_set;
+ err = bpf_prog_load(file_err, BPF_PROG_TYPE_RAW_TRACEPOINT, &obj, &prog_fd);
+ if (CHECK(err >= 0, "prog_load raw tp", "err %d errno %d\n", err, errno))
+ return;
+
err = bpf_prog_load(file, BPF_PROG_TYPE_RAW_TRACEPOINT, &obj, &prog_fd);
if (CHECK(err, "prog_load raw tp", "err %d errno %d\n", err, errno))
return;
diff --git a/tools/testing/selftests/bpf/prog_tests/global_data.c b/tools/testing/selftests/bpf/prog_tests/global_data.c
index c680926fce73..e3cb62b0a110 100644
--- a/tools/testing/selftests/bpf/prog_tests/global_data.c
+++ b/tools/testing/selftests/bpf/prog_tests/global_data.c
@@ -1,5 +1,6 @@
// SPDX-License-Identifier: GPL-2.0
#include <test_progs.h>
+#include <network_helpers.h>
static void test_global_data_number(struct bpf_object *obj, __u32 duration)
{
diff --git a/tools/testing/selftests/bpf/prog_tests/global_data_init.c b/tools/testing/selftests/bpf/prog_tests/global_data_init.c
new file mode 100644
index 000000000000..3bdaa5a40744
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/global_data_init.c
@@ -0,0 +1,61 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <test_progs.h>
+
+void test_global_data_init(void)
+{
+ const char *file = "./test_global_data.o";
+ int err = -ENOMEM, map_fd, zero = 0;
+ __u8 *buff = NULL, *newval = NULL;
+ struct bpf_object *obj;
+ struct bpf_map *map;
+ __u32 duration = 0;
+ size_t sz;
+
+ obj = bpf_object__open_file(file, NULL);
+ if (CHECK_FAIL(!obj))
+ return;
+
+ map = bpf_object__find_map_by_name(obj, "test_glo.rodata");
+ if (CHECK_FAIL(!map || !bpf_map__is_internal(map)))
+ goto out;
+
+ sz = bpf_map__def(map)->value_size;
+ newval = malloc(sz);
+ if (CHECK_FAIL(!newval))
+ goto out;
+
+ memset(newval, 0, sz);
+ /* wrong size, should fail */
+ err = bpf_map__set_initial_value(map, newval, sz - 1);
+ if (CHECK(!err, "reject set initial value wrong size", "err %d\n", err))
+ goto out;
+
+ err = bpf_map__set_initial_value(map, newval, sz);
+ if (CHECK(err, "set initial value", "err %d\n", err))
+ goto out;
+
+ err = bpf_object__load(obj);
+ if (CHECK_FAIL(err))
+ goto out;
+
+ map_fd = bpf_map__fd(map);
+ if (CHECK_FAIL(map_fd < 0))
+ goto out;
+
+ buff = malloc(sz);
+ if (buff)
+ err = bpf_map_lookup_elem(map_fd, &zero, buff);
+ if (CHECK(!buff || err || memcmp(buff, newval, sz),
+ "compare .rodata map data override",
+ "err %d errno %d\n", err, errno))
+ goto out;
+
+ memset(newval, 1, sz);
+ /* object loaded - should fail */
+ err = bpf_map__set_initial_value(map, newval, sz);
+ CHECK(!err, "reject set initial value after load", "err %d\n", err);
+out:
+ free(buff);
+ free(newval);
+ bpf_object__close(obj);
+}
diff --git a/tools/testing/selftests/bpf/test_hashmap.c b/tools/testing/selftests/bpf/prog_tests/hashmap.c
index c490e012c23f..428d488830c6 100644
--- a/tools/testing/selftests/bpf/test_hashmap.c
+++ b/tools/testing/selftests/bpf/prog_tests/hashmap.c
@@ -5,26 +5,17 @@
*
* Copyright (c) 2019 Facebook
*/
-#include <stdio.h>
-#include <errno.h>
-#include <linux/err.h>
+#include "test_progs.h"
#include "bpf/hashmap.h"
-#define CHECK(condition, format...) ({ \
- int __ret = !!(condition); \
- if (__ret) { \
- fprintf(stderr, "%s:%d:FAIL ", __func__, __LINE__); \
- fprintf(stderr, format); \
- } \
- __ret; \
-})
+static int duration = 0;
-size_t hash_fn(const void *k, void *ctx)
+static size_t hash_fn(const void *k, void *ctx)
{
return (long)k;
}
-bool equal_fn(const void *a, const void *b, void *ctx)
+static bool equal_fn(const void *a, const void *b, void *ctx)
{
return (long)a == (long)b;
}
@@ -49,53 +40,55 @@ static inline size_t exp_cap(size_t sz)
#define ELEM_CNT 62
-int test_hashmap_generic(void)
+static void test_hashmap_generic(void)
{
struct hashmap_entry *entry, *tmp;
int err, bkt, found_cnt, i;
long long found_msk;
struct hashmap *map;
- fprintf(stderr, "%s: ", __func__);
-
map = hashmap__new(hash_fn, equal_fn, NULL);
- if (CHECK(IS_ERR(map), "failed to create map: %ld\n", PTR_ERR(map)))
- return 1;
+ if (CHECK(IS_ERR(map), "hashmap__new",
+ "failed to create map: %ld\n", PTR_ERR(map)))
+ return;
for (i = 0; i < ELEM_CNT; i++) {
const void *oldk, *k = (const void *)(long)i;
void *oldv, *v = (void *)(long)(1024 + i);
err = hashmap__update(map, k, v, &oldk, &oldv);
- if (CHECK(err != -ENOENT, "unexpected result: %d\n", err))
- return 1;
+ if (CHECK(err != -ENOENT, "hashmap__update",
+ "unexpected result: %d\n", err))
+ goto cleanup;
if (i % 2) {
err = hashmap__add(map, k, v);
} else {
err = hashmap__set(map, k, v, &oldk, &oldv);
- if (CHECK(oldk != NULL || oldv != NULL,
+ if (CHECK(oldk != NULL || oldv != NULL, "check_kv",
"unexpected k/v: %p=%p\n", oldk, oldv))
- return 1;
+ goto cleanup;
}
- if (CHECK(err, "failed to add k/v %ld = %ld: %d\n",
+ if (CHECK(err, "elem_add", "failed to add k/v %ld = %ld: %d\n",
(long)k, (long)v, err))
- return 1;
+ goto cleanup;
- if (CHECK(!hashmap__find(map, k, &oldv),
+ if (CHECK(!hashmap__find(map, k, &oldv), "elem_find",
"failed to find key %ld\n", (long)k))
- return 1;
- if (CHECK(oldv != v, "found value is wrong: %ld\n", (long)oldv))
- return 1;
+ goto cleanup;
+ if (CHECK(oldv != v, "elem_val",
+ "found value is wrong: %ld\n", (long)oldv))
+ goto cleanup;
}
- if (CHECK(hashmap__size(map) != ELEM_CNT,
+ if (CHECK(hashmap__size(map) != ELEM_CNT, "hashmap__size",
"invalid map size: %zu\n", hashmap__size(map)))
- return 1;
+ goto cleanup;
if (CHECK(hashmap__capacity(map) != exp_cap(hashmap__size(map)),
+ "hashmap_cap",
"unexpected map capacity: %zu\n", hashmap__capacity(map)))
- return 1;
+ goto cleanup;
found_msk = 0;
hashmap__for_each_entry(map, entry, bkt) {
@@ -103,42 +96,47 @@ int test_hashmap_generic(void)
long v = (long)entry->value;
found_msk |= 1ULL << k;
- if (CHECK(v - k != 1024, "invalid k/v pair: %ld = %ld\n", k, v))
- return 1;
+ if (CHECK(v - k != 1024, "check_kv",
+ "invalid k/v pair: %ld = %ld\n", k, v))
+ goto cleanup;
}
- if (CHECK(found_msk != (1ULL << ELEM_CNT) - 1,
+ if (CHECK(found_msk != (1ULL << ELEM_CNT) - 1, "elem_cnt",
"not all keys iterated: %llx\n", found_msk))
- return 1;
+ goto cleanup;
for (i = 0; i < ELEM_CNT; i++) {
const void *oldk, *k = (const void *)(long)i;
void *oldv, *v = (void *)(long)(256 + i);
err = hashmap__add(map, k, v);
- if (CHECK(err != -EEXIST, "unexpected add result: %d\n", err))
- return 1;
+ if (CHECK(err != -EEXIST, "hashmap__add",
+ "unexpected add result: %d\n", err))
+ goto cleanup;
if (i % 2)
err = hashmap__update(map, k, v, &oldk, &oldv);
else
err = hashmap__set(map, k, v, &oldk, &oldv);
- if (CHECK(err, "failed to update k/v %ld = %ld: %d\n",
- (long)k, (long)v, err))
- return 1;
- if (CHECK(!hashmap__find(map, k, &oldv),
+ if (CHECK(err, "elem_upd",
+ "failed to update k/v %ld = %ld: %d\n",
+ (long)k, (long)v, err))
+ goto cleanup;
+ if (CHECK(!hashmap__find(map, k, &oldv), "elem_find",
"failed to find key %ld\n", (long)k))
- return 1;
- if (CHECK(oldv != v, "found value is wrong: %ld\n", (long)oldv))
- return 1;
+ goto cleanup;
+ if (CHECK(oldv != v, "elem_val",
+ "found value is wrong: %ld\n", (long)oldv))
+ goto cleanup;
}
- if (CHECK(hashmap__size(map) != ELEM_CNT,
+ if (CHECK(hashmap__size(map) != ELEM_CNT, "hashmap__size",
"invalid updated map size: %zu\n", hashmap__size(map)))
- return 1;
+ goto cleanup;
if (CHECK(hashmap__capacity(map) != exp_cap(hashmap__size(map)),
+ "hashmap__capacity",
"unexpected map capacity: %zu\n", hashmap__capacity(map)))
- return 1;
+ goto cleanup;
found_msk = 0;
hashmap__for_each_entry_safe(map, entry, tmp, bkt) {
@@ -146,20 +144,21 @@ int test_hashmap_generic(void)
long v = (long)entry->value;
found_msk |= 1ULL << k;
- if (CHECK(v - k != 256,
+ if (CHECK(v - k != 256, "elem_check",
"invalid updated k/v pair: %ld = %ld\n", k, v))
- return 1;
+ goto cleanup;
}
- if (CHECK(found_msk != (1ULL << ELEM_CNT) - 1,
+ if (CHECK(found_msk != (1ULL << ELEM_CNT) - 1, "elem_cnt",
"not all keys iterated after update: %llx\n", found_msk))
- return 1;
+ goto cleanup;
found_cnt = 0;
hashmap__for_each_key_entry(map, entry, (void *)0) {
found_cnt++;
}
- if (CHECK(!found_cnt, "didn't find any entries for key 0\n"))
- return 1;
+ if (CHECK(!found_cnt, "found_cnt",
+ "didn't find any entries for key 0\n"))
+ goto cleanup;
found_msk = 0;
found_cnt = 0;
@@ -173,30 +172,31 @@ int test_hashmap_generic(void)
found_cnt++;
found_msk |= 1ULL << (long)k;
- if (CHECK(!hashmap__delete(map, k, &oldk, &oldv),
+ if (CHECK(!hashmap__delete(map, k, &oldk, &oldv), "elem_del",
"failed to delete k/v %ld = %ld\n",
(long)k, (long)v))
- return 1;
- if (CHECK(oldk != k || oldv != v,
+ goto cleanup;
+ if (CHECK(oldk != k || oldv != v, "check_old",
"invalid deleted k/v: expected %ld = %ld, got %ld = %ld\n",
(long)k, (long)v, (long)oldk, (long)oldv))
- return 1;
- if (CHECK(hashmap__delete(map, k, &oldk, &oldv),
+ goto cleanup;
+ if (CHECK(hashmap__delete(map, k, &oldk, &oldv), "elem_del",
"unexpectedly deleted k/v %ld = %ld\n",
(long)oldk, (long)oldv))
- return 1;
+ goto cleanup;
}
- if (CHECK(!found_cnt || !found_msk,
+ if (CHECK(!found_cnt || !found_msk, "found_entries",
"didn't delete any key entries\n"))
- return 1;
- if (CHECK(hashmap__size(map) != ELEM_CNT - found_cnt,
+ goto cleanup;
+ if (CHECK(hashmap__size(map) != ELEM_CNT - found_cnt, "elem_cnt",
"invalid updated map size (already deleted: %d): %zu\n",
found_cnt, hashmap__size(map)))
- return 1;
+ goto cleanup;
if (CHECK(hashmap__capacity(map) != exp_cap(hashmap__size(map)),
+ "hashmap__capacity",
"unexpected map capacity: %zu\n", hashmap__capacity(map)))
- return 1;
+ goto cleanup;
hashmap__for_each_entry_safe(map, entry, tmp, bkt) {
const void *oldk, *k;
@@ -208,53 +208,56 @@ int test_hashmap_generic(void)
found_cnt++;
found_msk |= 1ULL << (long)k;
- if (CHECK(!hashmap__delete(map, k, &oldk, &oldv),
+ if (CHECK(!hashmap__delete(map, k, &oldk, &oldv), "elem_del",
"failed to delete k/v %ld = %ld\n",
(long)k, (long)v))
- return 1;
- if (CHECK(oldk != k || oldv != v,
+ goto cleanup;
+ if (CHECK(oldk != k || oldv != v, "elem_check",
"invalid old k/v: expect %ld = %ld, got %ld = %ld\n",
(long)k, (long)v, (long)oldk, (long)oldv))
- return 1;
- if (CHECK(hashmap__delete(map, k, &oldk, &oldv),
+ goto cleanup;
+ if (CHECK(hashmap__delete(map, k, &oldk, &oldv), "elem_del",
"unexpectedly deleted k/v %ld = %ld\n",
(long)k, (long)v))
- return 1;
+ goto cleanup;
}
if (CHECK(found_cnt != ELEM_CNT || found_msk != (1ULL << ELEM_CNT) - 1,
+ "found_cnt",
"not all keys were deleted: found_cnt:%d, found_msk:%llx\n",
found_cnt, found_msk))
- return 1;
- if (CHECK(hashmap__size(map) != 0,
+ goto cleanup;
+ if (CHECK(hashmap__size(map) != 0, "hashmap__size",
"invalid updated map size (already deleted: %d): %zu\n",
found_cnt, hashmap__size(map)))
- return 1;
+ goto cleanup;
found_cnt = 0;
hashmap__for_each_entry(map, entry, bkt) {
- CHECK(false, "unexpected map entries left: %ld = %ld\n",
- (long)entry->key, (long)entry->value);
- return 1;
+ CHECK(false, "elem_exists",
+ "unexpected map entries left: %ld = %ld\n",
+ (long)entry->key, (long)entry->value);
+ goto cleanup;
}
- hashmap__free(map);
+ hashmap__clear(map);
hashmap__for_each_entry(map, entry, bkt) {
- CHECK(false, "unexpected map entries left: %ld = %ld\n",
- (long)entry->key, (long)entry->value);
- return 1;
+ CHECK(false, "elem_exists",
+ "unexpected map entries left: %ld = %ld\n",
+ (long)entry->key, (long)entry->value);
+ goto cleanup;
}
- fprintf(stderr, "OK\n");
- return 0;
+cleanup:
+ hashmap__free(map);
}
-size_t collision_hash_fn(const void *k, void *ctx)
+static size_t collision_hash_fn(const void *k, void *ctx)
{
return 0;
}
-int test_hashmap_multimap(void)
+static void test_hashmap_multimap(void)
{
void *k1 = (void *)0, *k2 = (void *)1;
struct hashmap_entry *entry;
@@ -262,121 +265,116 @@ int test_hashmap_multimap(void)
long found_msk;
int err, bkt;
- fprintf(stderr, "%s: ", __func__);
-
/* force collisions */
map = hashmap__new(collision_hash_fn, equal_fn, NULL);
- if (CHECK(IS_ERR(map), "failed to create map: %ld\n", PTR_ERR(map)))
- return 1;
-
+ if (CHECK(IS_ERR(map), "hashmap__new",
+ "failed to create map: %ld\n", PTR_ERR(map)))
+ return;
/* set up multimap:
* [0] -> 1, 2, 4;
* [1] -> 8, 16, 32;
*/
err = hashmap__append(map, k1, (void *)1);
- if (CHECK(err, "failed to add k/v: %d\n", err))
- return 1;
+ if (CHECK(err, "elem_add", "failed to add k/v: %d\n", err))
+ goto cleanup;
err = hashmap__append(map, k1, (void *)2);
- if (CHECK(err, "failed to add k/v: %d\n", err))
- return 1;
+ if (CHECK(err, "elem_add", "failed to add k/v: %d\n", err))
+ goto cleanup;
err = hashmap__append(map, k1, (void *)4);
- if (CHECK(err, "failed to add k/v: %d\n", err))
- return 1;
+ if (CHECK(err, "elem_add", "failed to add k/v: %d\n", err))
+ goto cleanup;
err = hashmap__append(map, k2, (void *)8);
- if (CHECK(err, "failed to add k/v: %d\n", err))
- return 1;
+ if (CHECK(err, "elem_add", "failed to add k/v: %d\n", err))
+ goto cleanup;
err = hashmap__append(map, k2, (void *)16);
- if (CHECK(err, "failed to add k/v: %d\n", err))
- return 1;
+ if (CHECK(err, "elem_add", "failed to add k/v: %d\n", err))
+ goto cleanup;
err = hashmap__append(map, k2, (void *)32);
- if (CHECK(err, "failed to add k/v: %d\n", err))
- return 1;
+ if (CHECK(err, "elem_add", "failed to add k/v: %d\n", err))
+ goto cleanup;
- if (CHECK(hashmap__size(map) != 6,
+ if (CHECK(hashmap__size(map) != 6, "hashmap_size",
"invalid map size: %zu\n", hashmap__size(map)))
- return 1;
+ goto cleanup;
/* verify global iteration still works and sees all values */
found_msk = 0;
hashmap__for_each_entry(map, entry, bkt) {
found_msk |= (long)entry->value;
}
- if (CHECK(found_msk != (1 << 6) - 1,
+ if (CHECK(found_msk != (1 << 6) - 1, "found_msk",
"not all keys iterated: %lx\n", found_msk))
- return 1;
+ goto cleanup;
/* iterate values for key 1 */
found_msk = 0;
hashmap__for_each_key_entry(map, entry, k1) {
found_msk |= (long)entry->value;
}
- if (CHECK(found_msk != (1 | 2 | 4),
+ if (CHECK(found_msk != (1 | 2 | 4), "found_msk",
"invalid k1 values: %lx\n", found_msk))
- return 1;
+ goto cleanup;
/* iterate values for key 2 */
found_msk = 0;
hashmap__for_each_key_entry(map, entry, k2) {
found_msk |= (long)entry->value;
}
- if (CHECK(found_msk != (8 | 16 | 32),
+ if (CHECK(found_msk != (8 | 16 | 32), "found_msk",
"invalid k2 values: %lx\n", found_msk))
- return 1;
+ goto cleanup;
- fprintf(stderr, "OK\n");
- return 0;
+cleanup:
+ hashmap__free(map);
}
-int test_hashmap_empty()
+static void test_hashmap_empty()
{
struct hashmap_entry *entry;
int bkt;
struct hashmap *map;
void *k = (void *)0;
- fprintf(stderr, "%s: ", __func__);
-
/* force collisions */
map = hashmap__new(hash_fn, equal_fn, NULL);
- if (CHECK(IS_ERR(map), "failed to create map: %ld\n", PTR_ERR(map)))
- return 1;
+ if (CHECK(IS_ERR(map), "hashmap__new",
+ "failed to create map: %ld\n", PTR_ERR(map)))
+ goto cleanup;
- if (CHECK(hashmap__size(map) != 0,
+ if (CHECK(hashmap__size(map) != 0, "hashmap__size",
"invalid map size: %zu\n", hashmap__size(map)))
- return 1;
- if (CHECK(hashmap__capacity(map) != 0,
+ goto cleanup;
+ if (CHECK(hashmap__capacity(map) != 0, "hashmap__capacity",
"invalid map capacity: %zu\n", hashmap__capacity(map)))
- return 1;
- if (CHECK(hashmap__find(map, k, NULL), "unexpected find\n"))
- return 1;
- if (CHECK(hashmap__delete(map, k, NULL, NULL), "unexpected delete\n"))
- return 1;
+ goto cleanup;
+ if (CHECK(hashmap__find(map, k, NULL), "elem_find",
+ "unexpected find\n"))
+ goto cleanup;
+ if (CHECK(hashmap__delete(map, k, NULL, NULL), "elem_del",
+ "unexpected delete\n"))
+ goto cleanup;
hashmap__for_each_entry(map, entry, bkt) {
- CHECK(false, "unexpected iterated entry\n");
- return 1;
+ CHECK(false, "elem_found", "unexpected iterated entry\n");
+ goto cleanup;
}
hashmap__for_each_key_entry(map, entry, k) {
- CHECK(false, "unexpected key entry\n");
- return 1;
+ CHECK(false, "key_found", "unexpected key entry\n");
+ goto cleanup;
}
- fprintf(stderr, "OK\n");
- return 0;
+cleanup:
+ hashmap__free(map);
}
-int main(int argc, char **argv)
+void test_hashmap()
{
- bool failed = false;
-
- if (test_hashmap_generic())
- failed = true;
- if (test_hashmap_multimap())
- failed = true;
- if (test_hashmap_empty())
- failed = true;
-
- return failed;
+ if (test__start_subtest("generic"))
+ test_hashmap_generic();
+ if (test__start_subtest("multimap"))
+ test_hashmap_multimap();
+ if (test__start_subtest("empty"))
+ test_hashmap_empty();
}
diff --git a/tools/testing/selftests/bpf/prog_tests/kfree_skb.c b/tools/testing/selftests/bpf/prog_tests/kfree_skb.c
index 7507c8f689bc..42c3a3103c26 100644
--- a/tools/testing/selftests/bpf/prog_tests/kfree_skb.c
+++ b/tools/testing/selftests/bpf/prog_tests/kfree_skb.c
@@ -1,5 +1,6 @@
// SPDX-License-Identifier: GPL-2.0
#include <test_progs.h>
+#include <network_helpers.h>
struct meta {
int ifindex;
diff --git a/tools/testing/selftests/bpf/prog_tests/l4lb_all.c b/tools/testing/selftests/bpf/prog_tests/l4lb_all.c
index eaf64595be88..c2d373e294bb 100644
--- a/tools/testing/selftests/bpf/prog_tests/l4lb_all.c
+++ b/tools/testing/selftests/bpf/prog_tests/l4lb_all.c
@@ -1,5 +1,6 @@
// SPDX-License-Identifier: GPL-2.0
#include <test_progs.h>
+#include <network_helpers.h>
static void test_l4lb(const char *file)
{
diff --git a/tools/testing/selftests/bpf/prog_tests/link_pinning.c b/tools/testing/selftests/bpf/prog_tests/link_pinning.c
new file mode 100644
index 000000000000..a743288cf384
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/link_pinning.c
@@ -0,0 +1,105 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+
+#include <test_progs.h>
+#include <sys/stat.h>
+
+#include "test_link_pinning.skel.h"
+
+static int duration = 0;
+
+void test_link_pinning_subtest(struct bpf_program *prog,
+ struct test_link_pinning__bss *bss)
+{
+ const char *link_pin_path = "/sys/fs/bpf/pinned_link_test";
+ struct stat statbuf = {};
+ struct bpf_link *link;
+ int err, i;
+
+ link = bpf_program__attach(prog);
+ if (CHECK(IS_ERR(link), "link_attach", "err: %ld\n", PTR_ERR(link)))
+ goto cleanup;
+
+ bss->in = 1;
+ usleep(1);
+ CHECK(bss->out != 1, "res_check1", "exp %d, got %d\n", 1, bss->out);
+
+ /* pin link */
+ err = bpf_link__pin(link, link_pin_path);
+ if (CHECK(err, "link_pin", "err: %d\n", err))
+ goto cleanup;
+
+ CHECK(strcmp(link_pin_path, bpf_link__pin_path(link)), "pin_path1",
+ "exp %s, got %s\n", link_pin_path, bpf_link__pin_path(link));
+
+ /* check that link was pinned */
+ err = stat(link_pin_path, &statbuf);
+ if (CHECK(err, "stat_link", "err %d errno %d\n", err, errno))
+ goto cleanup;
+
+ bss->in = 2;
+ usleep(1);
+ CHECK(bss->out != 2, "res_check2", "exp %d, got %d\n", 2, bss->out);
+
+ /* destroy link, pinned link should keep program attached */
+ bpf_link__destroy(link);
+ link = NULL;
+
+ bss->in = 3;
+ usleep(1);
+ CHECK(bss->out != 3, "res_check3", "exp %d, got %d\n", 3, bss->out);
+
+ /* re-open link from BPFFS */
+ link = bpf_link__open(link_pin_path);
+ if (CHECK(IS_ERR(link), "link_open", "err: %ld\n", PTR_ERR(link)))
+ goto cleanup;
+
+ CHECK(strcmp(link_pin_path, bpf_link__pin_path(link)), "pin_path2",
+ "exp %s, got %s\n", link_pin_path, bpf_link__pin_path(link));
+
+ /* unpin link from BPFFS, program still attached */
+ err = bpf_link__unpin(link);
+ if (CHECK(err, "link_unpin", "err: %d\n", err))
+ goto cleanup;
+
+ /* still active, as we have FD open now */
+ bss->in = 4;
+ usleep(1);
+ CHECK(bss->out != 4, "res_check4", "exp %d, got %d\n", 4, bss->out);
+
+ bpf_link__destroy(link);
+ link = NULL;
+
+ /* Validate it's finally detached.
+ * Actual detachment might get delayed a bit, so there is no reliable
+ * way to validate it immediately here, let's count up for long enough
+ * and see if eventually output stops being updated
+ */
+ for (i = 5; i < 10000; i++) {
+ bss->in = i;
+ usleep(1);
+ if (bss->out == i - 1)
+ break;
+ }
+ CHECK(i == 10000, "link_attached", "got to iteration #%d\n", i);
+
+cleanup:
+ if (!IS_ERR(link))
+ bpf_link__destroy(link);
+}
+
+void test_link_pinning(void)
+{
+ struct test_link_pinning* skel;
+
+ skel = test_link_pinning__open_and_load();
+ if (CHECK(!skel, "skel_open", "failed to open skeleton\n"))
+ return;
+
+ if (test__start_subtest("pin_raw_tp"))
+ test_link_pinning_subtest(skel->progs.raw_tp_prog, skel->bss);
+ if (test__start_subtest("pin_tp_btf"))
+ test_link_pinning_subtest(skel->progs.tp_btf_prog, skel->bss);
+
+ test_link_pinning__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/map_lock.c b/tools/testing/selftests/bpf/prog_tests/map_lock.c
index 8f91f1881d11..ce17b1ed8709 100644
--- a/tools/testing/selftests/bpf/prog_tests/map_lock.c
+++ b/tools/testing/selftests/bpf/prog_tests/map_lock.c
@@ -1,5 +1,19 @@
// SPDX-License-Identifier: GPL-2.0
#include <test_progs.h>
+#include <network_helpers.h>
+
+static void *spin_lock_thread(void *arg)
+{
+ __u32 duration, retval;
+ int err, prog_fd = *(u32 *) arg;
+
+ err = bpf_prog_test_run(prog_fd, 10000, &pkt_v4, sizeof(pkt_v4),
+ NULL, NULL, &retval, &duration);
+ CHECK(err || retval, "",
+ "err %d errno %d retval %d duration %d\n",
+ err, errno, retval, duration);
+ pthread_exit(arg);
+}
static void *parallel_map_access(void *arg)
{
diff --git a/tools/testing/selftests/bpf/prog_tests/mmap.c b/tools/testing/selftests/bpf/prog_tests/mmap.c
index 16a814eb4d64..43d0b5578f46 100644
--- a/tools/testing/selftests/bpf/prog_tests/mmap.c
+++ b/tools/testing/selftests/bpf/prog_tests/mmap.c
@@ -19,15 +19,16 @@ void test_mmap(void)
const size_t map_sz = roundup_page(sizeof(struct map_data));
const int zero = 0, one = 1, two = 2, far = 1500;
const long page_size = sysconf(_SC_PAGE_SIZE);
- int err, duration = 0, i, data_map_fd;
+ int err, duration = 0, i, data_map_fd, data_map_id, tmp_fd, rdmap_fd;
struct bpf_map *data_map, *bss_map;
void *bss_mmaped = NULL, *map_mmaped = NULL, *tmp1, *tmp2;
struct test_mmap__bss *bss_data;
+ struct bpf_map_info map_info;
+ __u32 map_info_sz = sizeof(map_info);
struct map_data *map_data;
struct test_mmap *skel;
__u64 val = 0;
-
skel = test_mmap__open_and_load();
if (CHECK(!skel, "skel_open_and_load", "skeleton open/load failed\n"))
return;
@@ -36,6 +37,25 @@ void test_mmap(void)
data_map = skel->maps.data_map;
data_map_fd = bpf_map__fd(data_map);
+ rdmap_fd = bpf_map__fd(skel->maps.rdonly_map);
+ tmp1 = mmap(NULL, 4096, PROT_READ | PROT_WRITE, MAP_SHARED, rdmap_fd, 0);
+ if (CHECK(tmp1 != MAP_FAILED, "rdonly_write_mmap", "unexpected success\n")) {
+ munmap(tmp1, 4096);
+ goto cleanup;
+ }
+ /* now double-check if it's mmap()'able at all */
+ tmp1 = mmap(NULL, 4096, PROT_READ, MAP_SHARED, rdmap_fd, 0);
+ if (CHECK(tmp1 == MAP_FAILED, "rdonly_read_mmap", "failed: %d\n", errno))
+ goto cleanup;
+
+ /* get map's ID */
+ memset(&map_info, 0, map_info_sz);
+ err = bpf_obj_get_info_by_fd(data_map_fd, &map_info, &map_info_sz);
+ if (CHECK(err, "map_get_info", "failed %d\n", errno))
+ goto cleanup;
+ data_map_id = map_info.id;
+
+ /* mmap BSS map */
bss_mmaped = mmap(NULL, bss_sz, PROT_READ | PROT_WRITE, MAP_SHARED,
bpf_map__fd(bss_map), 0);
if (CHECK(bss_mmaped == MAP_FAILED, "bss_mmap",
@@ -98,6 +118,10 @@ void test_mmap(void)
"data_map freeze succeeded: err=%d, errno=%d\n", err, errno))
goto cleanup;
+ err = mprotect(map_mmaped, map_sz, PROT_READ);
+ if (CHECK(err, "mprotect_ro", "mprotect to r/o failed %d\n", errno))
+ goto cleanup;
+
/* unmap R/W mapping */
err = munmap(map_mmaped, map_sz);
map_mmaped = NULL;
@@ -111,6 +135,12 @@ void test_mmap(void)
map_mmaped = NULL;
goto cleanup;
}
+ err = mprotect(map_mmaped, map_sz, PROT_WRITE);
+ if (CHECK(!err, "mprotect_wr", "mprotect() succeeded unexpectedly!\n"))
+ goto cleanup;
+ err = mprotect(map_mmaped, map_sz, PROT_EXEC);
+ if (CHECK(!err, "mprotect_ex", "mprotect() succeeded unexpectedly!\n"))
+ goto cleanup;
map_data = map_mmaped;
/* map/unmap in a loop to test ref counting */
@@ -197,6 +227,53 @@ void test_mmap(void)
CHECK_FAIL(map_data->val[far] != 3 * 321);
munmap(tmp2, 4 * page_size);
+
+ /* map all 4 pages, but with pg_off=1 page, should fail */
+ tmp1 = mmap(NULL, 4 * page_size, PROT_READ, MAP_SHARED | MAP_FIXED,
+ data_map_fd, page_size /* initial page shift */);
+ if (CHECK(tmp1 != MAP_FAILED, "adv_mmap7", "unexpected success")) {
+ munmap(tmp1, 4 * page_size);
+ goto cleanup;
+ }
+
+ tmp1 = mmap(NULL, map_sz, PROT_READ, MAP_SHARED, data_map_fd, 0);
+ if (CHECK(tmp1 == MAP_FAILED, "last_mmap", "failed %d\n", errno))
+ goto cleanup;
+
+ test_mmap__destroy(skel);
+ skel = NULL;
+ CHECK_FAIL(munmap(bss_mmaped, bss_sz));
+ bss_mmaped = NULL;
+ CHECK_FAIL(munmap(map_mmaped, map_sz));
+ map_mmaped = NULL;
+
+ /* map should be still held by active mmap */
+ tmp_fd = bpf_map_get_fd_by_id(data_map_id);
+ if (CHECK(tmp_fd < 0, "get_map_by_id", "failed %d\n", errno)) {
+ munmap(tmp1, map_sz);
+ goto cleanup;
+ }
+ close(tmp_fd);
+
+ /* this should release data map finally */
+ munmap(tmp1, map_sz);
+
+ /* we need to wait for RCU grace period */
+ for (i = 0; i < 10000; i++) {
+ __u32 id = data_map_id - 1;
+ if (bpf_map_get_next_id(id, &id) || id > data_map_id)
+ break;
+ usleep(1);
+ }
+
+ /* should fail to get map FD by non-existing ID */
+ tmp_fd = bpf_map_get_fd_by_id(data_map_id);
+ if (CHECK(tmp_fd >= 0, "get_map_by_id_after",
+ "unexpectedly succeeded %d\n", tmp_fd)) {
+ close(tmp_fd);
+ goto cleanup;
+ }
+
cleanup:
if (bss_mmaped)
CHECK_FAIL(munmap(bss_mmaped, bss_sz));
diff --git a/tools/testing/selftests/bpf/prog_tests/modify_return.c b/tools/testing/selftests/bpf/prog_tests/modify_return.c
new file mode 100644
index 000000000000..97fec70c600b
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/modify_return.c
@@ -0,0 +1,65 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/*
+ * Copyright 2020 Google LLC.
+ */
+
+#include <test_progs.h>
+#include "modify_return.skel.h"
+
+#define LOWER(x) ((x) & 0xffff)
+#define UPPER(x) ((x) >> 16)
+
+
+static void run_test(__u32 input_retval, __u16 want_side_effect, __s16 want_ret)
+{
+ struct modify_return *skel = NULL;
+ int err, prog_fd;
+ __u32 duration = 0, retval;
+ __u16 side_effect;
+ __s16 ret;
+
+ skel = modify_return__open_and_load();
+ if (CHECK(!skel, "skel_load", "modify_return skeleton failed\n"))
+ goto cleanup;
+
+ err = modify_return__attach(skel);
+ if (CHECK(err, "modify_return", "attach failed: %d\n", err))
+ goto cleanup;
+
+ skel->bss->input_retval = input_retval;
+ prog_fd = bpf_program__fd(skel->progs.fmod_ret_test);
+ err = bpf_prog_test_run(prog_fd, 1, NULL, 0, NULL, 0,
+ &retval, &duration);
+
+ CHECK(err, "test_run", "err %d errno %d\n", err, errno);
+
+ side_effect = UPPER(retval);
+ ret = LOWER(retval);
+
+ CHECK(ret != want_ret, "test_run",
+ "unexpected ret: %d, expected: %d\n", ret, want_ret);
+ CHECK(side_effect != want_side_effect, "modify_return",
+ "unexpected side_effect: %d\n", side_effect);
+
+ CHECK(skel->bss->fentry_result != 1, "modify_return",
+ "fentry failed\n");
+ CHECK(skel->bss->fexit_result != 1, "modify_return",
+ "fexit failed\n");
+ CHECK(skel->bss->fmod_ret_result != 1, "modify_return",
+ "fmod_ret failed\n");
+
+cleanup:
+ modify_return__destroy(skel);
+}
+
+void test_modify_return(void)
+{
+ run_test(0 /* input_retval */,
+ 1 /* want_side_effect */,
+ 4 /* want_ret */);
+ run_test(-EINVAL /* input_retval */,
+ 0 /* want_side_effect */,
+ -EINVAL /* want_ret */);
+}
+
diff --git a/tools/testing/selftests/bpf/prog_tests/ns_current_pid_tgid.c b/tools/testing/selftests/bpf/prog_tests/ns_current_pid_tgid.c
new file mode 100644
index 000000000000..e74dc501b27f
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/ns_current_pid_tgid.c
@@ -0,0 +1,85 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Carlos Neira cneirabustos@gmail.com */
+#include <test_progs.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <unistd.h>
+#include <sys/syscall.h>
+
+struct bss {
+ __u64 dev;
+ __u64 ino;
+ __u64 pid_tgid;
+ __u64 user_pid_tgid;
+};
+
+void test_ns_current_pid_tgid(void)
+{
+ const char *probe_name = "raw_tracepoint/sys_enter";
+ const char *file = "test_ns_current_pid_tgid.o";
+ int err, key = 0, duration = 0;
+ struct bpf_link *link = NULL;
+ struct bpf_program *prog;
+ struct bpf_map *bss_map;
+ struct bpf_object *obj;
+ struct bss bss;
+ struct stat st;
+ __u64 id;
+
+ obj = bpf_object__open_file(file, NULL);
+ if (CHECK(IS_ERR(obj), "obj_open", "err %ld\n", PTR_ERR(obj)))
+ return;
+
+ err = bpf_object__load(obj);
+ if (CHECK(err, "obj_load", "err %d errno %d\n", err, errno))
+ goto cleanup;
+
+ bss_map = bpf_object__find_map_by_name(obj, "test_ns_.bss");
+ if (CHECK(!bss_map, "find_bss_map", "failed\n"))
+ goto cleanup;
+
+ prog = bpf_object__find_program_by_title(obj, probe_name);
+ if (CHECK(!prog, "find_prog", "prog '%s' not found\n",
+ probe_name))
+ goto cleanup;
+
+ memset(&bss, 0, sizeof(bss));
+ pid_t tid = syscall(SYS_gettid);
+ pid_t pid = getpid();
+
+ id = (__u64) tid << 32 | pid;
+ bss.user_pid_tgid = id;
+
+ if (CHECK_FAIL(stat("/proc/self/ns/pid", &st))) {
+ perror("Failed to stat /proc/self/ns/pid");
+ goto cleanup;
+ }
+
+ bss.dev = st.st_dev;
+ bss.ino = st.st_ino;
+
+ err = bpf_map_update_elem(bpf_map__fd(bss_map), &key, &bss, 0);
+ if (CHECK(err, "setting_bss", "failed to set bss : %d\n", err))
+ goto cleanup;
+
+ link = bpf_program__attach_raw_tracepoint(prog, "sys_enter");
+ if (CHECK(IS_ERR(link), "attach_raw_tp", "err %ld\n",
+ PTR_ERR(link))) {
+ link = NULL;
+ goto cleanup;
+ }
+
+ /* trigger some syscalls */
+ usleep(1);
+
+ err = bpf_map_lookup_elem(bpf_map__fd(bss_map), &key, &bss);
+ if (CHECK(err, "set_bss", "failed to get bss : %d\n", err))
+ goto cleanup;
+
+ if (CHECK(id != bss.pid_tgid, "Compare user pid/tgid vs. bpf pid/tgid",
+ "User pid/tgid %llu BPF pid/tgid %llu\n", id, bss.pid_tgid))
+ goto cleanup;
+cleanup:
+ bpf_link__destroy(link);
+ bpf_object__close(obj);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/perf_branches.c b/tools/testing/selftests/bpf/prog_tests/perf_branches.c
new file mode 100644
index 000000000000..e35c444902a7
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/perf_branches.c
@@ -0,0 +1,170 @@
+// SPDX-License-Identifier: GPL-2.0
+#define _GNU_SOURCE
+#include <pthread.h>
+#include <sched.h>
+#include <sys/socket.h>
+#include <test_progs.h>
+#include "bpf/libbpf_internal.h"
+#include "test_perf_branches.skel.h"
+
+static void check_good_sample(struct test_perf_branches *skel)
+{
+ int written_global = skel->bss->written_global_out;
+ int required_size = skel->bss->required_size_out;
+ int written_stack = skel->bss->written_stack_out;
+ int pbe_size = sizeof(struct perf_branch_entry);
+ int duration = 0;
+
+ if (CHECK(!skel->bss->valid, "output not valid",
+ "no valid sample from prog"))
+ return;
+
+ /*
+ * It's hard to validate the contents of the branch entries b/c it
+ * would require some kind of disassembler and also encoding the
+ * valid jump instructions for supported architectures. So just check
+ * the easy stuff for now.
+ */
+ CHECK(required_size <= 0, "read_branches_size", "err %d\n", required_size);
+ CHECK(written_stack < 0, "read_branches_stack", "err %d\n", written_stack);
+ CHECK(written_stack % pbe_size != 0, "read_branches_stack",
+ "stack bytes written=%d not multiple of struct size=%d\n",
+ written_stack, pbe_size);
+ CHECK(written_global < 0, "read_branches_global", "err %d\n", written_global);
+ CHECK(written_global % pbe_size != 0, "read_branches_global",
+ "global bytes written=%d not multiple of struct size=%d\n",
+ written_global, pbe_size);
+ CHECK(written_global < written_stack, "read_branches_size",
+ "written_global=%d < written_stack=%d\n", written_global, written_stack);
+}
+
+static void check_bad_sample(struct test_perf_branches *skel)
+{
+ int written_global = skel->bss->written_global_out;
+ int required_size = skel->bss->required_size_out;
+ int written_stack = skel->bss->written_stack_out;
+ int duration = 0;
+
+ if (CHECK(!skel->bss->valid, "output not valid",
+ "no valid sample from prog"))
+ return;
+
+ CHECK((required_size != -EINVAL && required_size != -ENOENT),
+ "read_branches_size", "err %d\n", required_size);
+ CHECK((written_stack != -EINVAL && written_stack != -ENOENT),
+ "read_branches_stack", "written %d\n", written_stack);
+ CHECK((written_global != -EINVAL && written_global != -ENOENT),
+ "read_branches_global", "written %d\n", written_global);
+}
+
+static void test_perf_branches_common(int perf_fd,
+ void (*cb)(struct test_perf_branches *))
+{
+ struct test_perf_branches *skel;
+ int err, i, duration = 0;
+ bool detached = false;
+ struct bpf_link *link;
+ volatile int j = 0;
+ cpu_set_t cpu_set;
+
+ skel = test_perf_branches__open_and_load();
+ if (CHECK(!skel, "test_perf_branches_load",
+ "perf_branches skeleton failed\n"))
+ return;
+
+ /* attach perf_event */
+ link = bpf_program__attach_perf_event(skel->progs.perf_branches, perf_fd);
+ if (CHECK(IS_ERR(link), "attach_perf_event", "err %ld\n", PTR_ERR(link)))
+ goto out_destroy_skel;
+
+ /* generate some branches on cpu 0 */
+ CPU_ZERO(&cpu_set);
+ CPU_SET(0, &cpu_set);
+ err = pthread_setaffinity_np(pthread_self(), sizeof(cpu_set), &cpu_set);
+ if (CHECK(err, "set_affinity", "cpu #0, err %d\n", err))
+ goto out_destroy;
+ /* spin the loop for a while (random high number) */
+ for (i = 0; i < 1000000; ++i)
+ ++j;
+
+ test_perf_branches__detach(skel);
+ detached = true;
+
+ cb(skel);
+out_destroy:
+ bpf_link__destroy(link);
+out_destroy_skel:
+ if (!detached)
+ test_perf_branches__detach(skel);
+ test_perf_branches__destroy(skel);
+}
+
+static void test_perf_branches_hw(void)
+{
+ struct perf_event_attr attr = {0};
+ int duration = 0;
+ int pfd;
+
+ /* create perf event */
+ attr.size = sizeof(attr);
+ attr.type = PERF_TYPE_HARDWARE;
+ attr.config = PERF_COUNT_HW_CPU_CYCLES;
+ attr.freq = 1;
+ attr.sample_freq = 4000;
+ attr.sample_type = PERF_SAMPLE_BRANCH_STACK;
+ attr.branch_sample_type = PERF_SAMPLE_BRANCH_USER | PERF_SAMPLE_BRANCH_ANY;
+ pfd = syscall(__NR_perf_event_open, &attr, -1, 0, -1, PERF_FLAG_FD_CLOEXEC);
+
+ /*
+ * Some setups don't support branch records (virtual machines, !x86),
+ * so skip test in this case.
+ */
+ if (pfd == -1) {
+ if (errno == ENOENT || errno == EOPNOTSUPP) {
+ printf("%s:SKIP:no PERF_SAMPLE_BRANCH_STACK\n",
+ __func__);
+ test__skip();
+ return;
+ }
+ if (CHECK(pfd < 0, "perf_event_open", "err %d errno %d\n",
+ pfd, errno))
+ return;
+ }
+
+ test_perf_branches_common(pfd, check_good_sample);
+
+ close(pfd);
+}
+
+/*
+ * Tests negative case -- run bpf_read_branch_records() on improperly configured
+ * perf event.
+ */
+static void test_perf_branches_no_hw(void)
+{
+ struct perf_event_attr attr = {0};
+ int duration = 0;
+ int pfd;
+
+ /* create perf event */
+ attr.size = sizeof(attr);
+ attr.type = PERF_TYPE_SOFTWARE;
+ attr.config = PERF_COUNT_SW_CPU_CLOCK;
+ attr.freq = 1;
+ attr.sample_freq = 4000;
+ pfd = syscall(__NR_perf_event_open, &attr, -1, 0, -1, PERF_FLAG_FD_CLOEXEC);
+ if (CHECK(pfd < 0, "perf_event_open", "err %d\n", pfd))
+ return;
+
+ test_perf_branches_common(pfd, check_bad_sample);
+
+ close(pfd);
+}
+
+void test_perf_branches(void)
+{
+ if (test__start_subtest("perf_branches_hw"))
+ test_perf_branches_hw();
+ if (test__start_subtest("perf_branches_no_hw"))
+ test_perf_branches_no_hw();
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/perf_buffer.c b/tools/testing/selftests/bpf/prog_tests/perf_buffer.c
index 1450ea2dd4cc..a122ce3b360e 100644
--- a/tools/testing/selftests/bpf/prog_tests/perf_buffer.c
+++ b/tools/testing/selftests/bpf/prog_tests/perf_buffer.c
@@ -6,6 +6,11 @@
#include <test_progs.h>
#include "bpf/libbpf_internal.h"
+/* AddressSanitizer sometimes crashes due to data dereference below, due to
+ * this being mmap()'ed memory. Disable instrumentation with
+ * no_sanitize_address attribute
+ */
+__attribute__((no_sanitize_address))
static void on_sample(void *ctx, int cpu, void *data, __u32 size)
{
int cpu_data = *(int *)data, duration = 0;
diff --git a/tools/testing/selftests/bpf/prog_tests/pkt_access.c b/tools/testing/selftests/bpf/prog_tests/pkt_access.c
index a2537dfa899c..44b514fabccd 100644
--- a/tools/testing/selftests/bpf/prog_tests/pkt_access.c
+++ b/tools/testing/selftests/bpf/prog_tests/pkt_access.c
@@ -1,5 +1,6 @@
// SPDX-License-Identifier: GPL-2.0
#include <test_progs.h>
+#include <network_helpers.h>
void test_pkt_access(void)
{
diff --git a/tools/testing/selftests/bpf/prog_tests/pkt_md_access.c b/tools/testing/selftests/bpf/prog_tests/pkt_md_access.c
index 5f7aea605019..939015cd6dba 100644
--- a/tools/testing/selftests/bpf/prog_tests/pkt_md_access.c
+++ b/tools/testing/selftests/bpf/prog_tests/pkt_md_access.c
@@ -1,5 +1,6 @@
// SPDX-License-Identifier: GPL-2.0
#include <test_progs.h>
+#include <network_helpers.h>
void test_pkt_md_access(void)
{
diff --git a/tools/testing/selftests/bpf/prog_tests/prog_run_xattr.c b/tools/testing/selftests/bpf/prog_tests/prog_run_xattr.c
index 5dd89b941f53..dde2b7ae7bc9 100644
--- a/tools/testing/selftests/bpf/prog_tests/prog_run_xattr.c
+++ b/tools/testing/selftests/bpf/prog_tests/prog_run_xattr.c
@@ -1,5 +1,6 @@
// SPDX-License-Identifier: GPL-2.0
#include <test_progs.h>
+#include <network_helpers.h>
void test_prog_run_xattr(void)
{
diff --git a/tools/testing/selftests/bpf/prog_tests/queue_stack_map.c b/tools/testing/selftests/bpf/prog_tests/queue_stack_map.c
index faccc66f4e39..f47e7b1cb32c 100644
--- a/tools/testing/selftests/bpf/prog_tests/queue_stack_map.c
+++ b/tools/testing/selftests/bpf/prog_tests/queue_stack_map.c
@@ -1,5 +1,6 @@
// SPDX-License-Identifier: GPL-2.0
#include <test_progs.h>
+#include <network_helpers.h>
enum {
QUEUE,
diff --git a/tools/testing/selftests/bpf/prog_tests/ringbuf.c b/tools/testing/selftests/bpf/prog_tests/ringbuf.c
new file mode 100644
index 000000000000..2bba908dfa63
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/ringbuf.c
@@ -0,0 +1,211 @@
+// SPDX-License-Identifier: GPL-2.0
+#define _GNU_SOURCE
+#include <linux/compiler.h>
+#include <asm/barrier.h>
+#include <test_progs.h>
+#include <sys/mman.h>
+#include <sys/epoll.h>
+#include <time.h>
+#include <sched.h>
+#include <signal.h>
+#include <pthread.h>
+#include <sys/sysinfo.h>
+#include <linux/perf_event.h>
+#include <linux/ring_buffer.h>
+#include "test_ringbuf.skel.h"
+
+#define EDONE 7777
+
+static int duration = 0;
+
+struct sample {
+ int pid;
+ int seq;
+ long value;
+ char comm[16];
+};
+
+static volatile int sample_cnt;
+
+static int process_sample(void *ctx, void *data, size_t len)
+{
+ struct sample *s = data;
+
+ sample_cnt++;
+
+ switch (s->seq) {
+ case 0:
+ CHECK(s->value != 333, "sample1_value", "exp %ld, got %ld\n",
+ 333L, s->value);
+ return 0;
+ case 1:
+ CHECK(s->value != 777, "sample2_value", "exp %ld, got %ld\n",
+ 777L, s->value);
+ return -EDONE;
+ default:
+ /* we don't care about the rest */
+ return 0;
+ }
+}
+
+static struct test_ringbuf *skel;
+static struct ring_buffer *ringbuf;
+
+static void trigger_samples()
+{
+ skel->bss->dropped = 0;
+ skel->bss->total = 0;
+ skel->bss->discarded = 0;
+
+ /* trigger exactly two samples */
+ skel->bss->value = 333;
+ syscall(__NR_getpgid);
+ skel->bss->value = 777;
+ syscall(__NR_getpgid);
+}
+
+static void *poll_thread(void *input)
+{
+ long timeout = (long)input;
+
+ return (void *)(long)ring_buffer__poll(ringbuf, timeout);
+}
+
+void test_ringbuf(void)
+{
+ const size_t rec_sz = BPF_RINGBUF_HDR_SZ + sizeof(struct sample);
+ pthread_t thread;
+ long bg_ret = -1;
+ int err;
+
+ skel = test_ringbuf__open_and_load();
+ if (CHECK(!skel, "skel_open_load", "skeleton open&load failed\n"))
+ return;
+
+ /* only trigger BPF program for current process */
+ skel->bss->pid = getpid();
+
+ ringbuf = ring_buffer__new(bpf_map__fd(skel->maps.ringbuf),
+ process_sample, NULL, NULL);
+ if (CHECK(!ringbuf, "ringbuf_create", "failed to create ringbuf\n"))
+ goto cleanup;
+
+ err = test_ringbuf__attach(skel);
+ if (CHECK(err, "skel_attach", "skeleton attachment failed: %d\n", err))
+ goto cleanup;
+
+ trigger_samples();
+
+ /* 2 submitted + 1 discarded records */
+ CHECK(skel->bss->avail_data != 3 * rec_sz,
+ "err_avail_size", "exp %ld, got %ld\n",
+ 3L * rec_sz, skel->bss->avail_data);
+ CHECK(skel->bss->ring_size != 4096,
+ "err_ring_size", "exp %ld, got %ld\n",
+ 4096L, skel->bss->ring_size);
+ CHECK(skel->bss->cons_pos != 0,
+ "err_cons_pos", "exp %ld, got %ld\n",
+ 0L, skel->bss->cons_pos);
+ CHECK(skel->bss->prod_pos != 3 * rec_sz,
+ "err_prod_pos", "exp %ld, got %ld\n",
+ 3L * rec_sz, skel->bss->prod_pos);
+
+ /* poll for samples */
+ err = ring_buffer__poll(ringbuf, -1);
+
+ /* -EDONE is used as an indicator that we are done */
+ if (CHECK(err != -EDONE, "err_done", "done err: %d\n", err))
+ goto cleanup;
+
+ /* we expect extra polling to return nothing */
+ err = ring_buffer__poll(ringbuf, 0);
+ if (CHECK(err != 0, "extra_samples", "poll result: %d\n", err))
+ goto cleanup;
+
+ CHECK(skel->bss->dropped != 0, "err_dropped", "exp %ld, got %ld\n",
+ 0L, skel->bss->dropped);
+ CHECK(skel->bss->total != 2, "err_total", "exp %ld, got %ld\n",
+ 2L, skel->bss->total);
+ CHECK(skel->bss->discarded != 1, "err_discarded", "exp %ld, got %ld\n",
+ 1L, skel->bss->discarded);
+
+ /* now validate consumer position is updated and returned */
+ trigger_samples();
+ CHECK(skel->bss->cons_pos != 3 * rec_sz,
+ "err_cons_pos", "exp %ld, got %ld\n",
+ 3L * rec_sz, skel->bss->cons_pos);
+ err = ring_buffer__poll(ringbuf, -1);
+ CHECK(err <= 0, "poll_err", "err %d\n", err);
+
+ /* start poll in background w/ long timeout */
+ err = pthread_create(&thread, NULL, poll_thread, (void *)(long)10000);
+ if (CHECK(err, "bg_poll", "pthread_create failed: %d\n", err))
+ goto cleanup;
+
+ /* turn off notifications now */
+ skel->bss->flags = BPF_RB_NO_WAKEUP;
+
+ /* give background thread a bit of a time */
+ usleep(50000);
+ trigger_samples();
+ /* sleeping arbitrarily is bad, but no better way to know that
+ * epoll_wait() **DID NOT** unblock in background thread
+ */
+ usleep(50000);
+ /* background poll should still be blocked */
+ err = pthread_tryjoin_np(thread, (void **)&bg_ret);
+ if (CHECK(err != EBUSY, "try_join", "err %d\n", err))
+ goto cleanup;
+
+ /* BPF side did everything right */
+ CHECK(skel->bss->dropped != 0, "err_dropped", "exp %ld, got %ld\n",
+ 0L, skel->bss->dropped);
+ CHECK(skel->bss->total != 2, "err_total", "exp %ld, got %ld\n",
+ 2L, skel->bss->total);
+ CHECK(skel->bss->discarded != 1, "err_discarded", "exp %ld, got %ld\n",
+ 1L, skel->bss->discarded);
+
+ /* clear flags to return to "adaptive" notification mode */
+ skel->bss->flags = 0;
+
+ /* produce new samples, no notification should be triggered, because
+ * consumer is now behind
+ */
+ trigger_samples();
+
+ /* background poll should still be blocked */
+ err = pthread_tryjoin_np(thread, (void **)&bg_ret);
+ if (CHECK(err != EBUSY, "try_join", "err %d\n", err))
+ goto cleanup;
+
+ /* now force notifications */
+ skel->bss->flags = BPF_RB_FORCE_WAKEUP;
+ sample_cnt = 0;
+ trigger_samples();
+
+ /* now we should get a pending notification */
+ usleep(50000);
+ err = pthread_tryjoin_np(thread, (void **)&bg_ret);
+ if (CHECK(err, "join_bg", "err %d\n", err))
+ goto cleanup;
+
+ if (CHECK(bg_ret != 1, "bg_ret", "epoll_wait result: %ld", bg_ret))
+ goto cleanup;
+
+ /* 3 rounds, 2 samples each */
+ CHECK(sample_cnt != 6, "wrong_sample_cnt",
+ "expected to see %d samples, got %d\n", 6, sample_cnt);
+
+ /* BPF side did everything right */
+ CHECK(skel->bss->dropped != 0, "err_dropped", "exp %ld, got %ld\n",
+ 0L, skel->bss->dropped);
+ CHECK(skel->bss->total != 2, "err_total", "exp %ld, got %ld\n",
+ 2L, skel->bss->total);
+ CHECK(skel->bss->discarded != 1, "err_discarded", "exp %ld, got %ld\n",
+ 1L, skel->bss->discarded);
+
+ test_ringbuf__detach(skel);
+cleanup:
+ ring_buffer__free(ringbuf);
+ test_ringbuf__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/ringbuf_multi.c b/tools/testing/selftests/bpf/prog_tests/ringbuf_multi.c
new file mode 100644
index 000000000000..78e450609803
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/ringbuf_multi.c
@@ -0,0 +1,102 @@
+// SPDX-License-Identifier: GPL-2.0
+#define _GNU_SOURCE
+#include <test_progs.h>
+#include <sys/epoll.h>
+#include "test_ringbuf_multi.skel.h"
+
+static int duration = 0;
+
+struct sample {
+ int pid;
+ int seq;
+ long value;
+ char comm[16];
+};
+
+static int process_sample(void *ctx, void *data, size_t len)
+{
+ int ring = (unsigned long)ctx;
+ struct sample *s = data;
+
+ switch (s->seq) {
+ case 0:
+ CHECK(ring != 1, "sample1_ring", "exp %d, got %d\n", 1, ring);
+ CHECK(s->value != 333, "sample1_value", "exp %ld, got %ld\n",
+ 333L, s->value);
+ break;
+ case 1:
+ CHECK(ring != 2, "sample2_ring", "exp %d, got %d\n", 2, ring);
+ CHECK(s->value != 777, "sample2_value", "exp %ld, got %ld\n",
+ 777L, s->value);
+ break;
+ default:
+ CHECK(true, "extra_sample", "unexpected sample seq %d, val %ld\n",
+ s->seq, s->value);
+ return -1;
+ }
+
+ return 0;
+}
+
+void test_ringbuf_multi(void)
+{
+ struct test_ringbuf_multi *skel;
+ struct ring_buffer *ringbuf;
+ int err;
+
+ skel = test_ringbuf_multi__open_and_load();
+ if (CHECK(!skel, "skel_open_load", "skeleton open&load failed\n"))
+ return;
+
+ /* only trigger BPF program for current process */
+ skel->bss->pid = getpid();
+
+ ringbuf = ring_buffer__new(bpf_map__fd(skel->maps.ringbuf1),
+ process_sample, (void *)(long)1, NULL);
+ if (CHECK(!ringbuf, "ringbuf_create", "failed to create ringbuf\n"))
+ goto cleanup;
+
+ err = ring_buffer__add(ringbuf, bpf_map__fd(skel->maps.ringbuf2),
+ process_sample, (void *)(long)2);
+ if (CHECK(err, "ringbuf_add", "failed to add another ring\n"))
+ goto cleanup;
+
+ err = test_ringbuf_multi__attach(skel);
+ if (CHECK(err, "skel_attach", "skeleton attachment failed: %d\n", err))
+ goto cleanup;
+
+ /* trigger few samples, some will be skipped */
+ skel->bss->target_ring = 0;
+ skel->bss->value = 333;
+ syscall(__NR_getpgid);
+
+ /* skipped, no ringbuf in slot 1 */
+ skel->bss->target_ring = 1;
+ skel->bss->value = 555;
+ syscall(__NR_getpgid);
+
+ skel->bss->target_ring = 2;
+ skel->bss->value = 777;
+ syscall(__NR_getpgid);
+
+ /* poll for samples, should get 2 ringbufs back */
+ err = ring_buffer__poll(ringbuf, -1);
+ if (CHECK(err != 4, "poll_res", "expected 4 records, got %d\n", err))
+ goto cleanup;
+
+ /* expect extra polling to return nothing */
+ err = ring_buffer__poll(ringbuf, 0);
+ if (CHECK(err < 0, "extra_samples", "poll result: %d\n", err))
+ goto cleanup;
+
+ CHECK(skel->bss->dropped != 0, "err_dropped", "exp %ld, got %ld\n",
+ 0L, skel->bss->dropped);
+ CHECK(skel->bss->skipped != 1, "err_skipped", "exp %ld, got %ld\n",
+ 1L, skel->bss->skipped);
+ CHECK(skel->bss->total != 2, "err_total", "exp %ld, got %ld\n",
+ 2L, skel->bss->total);
+
+cleanup:
+ ring_buffer__free(ringbuf);
+ test_ringbuf_multi__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/section_names.c b/tools/testing/selftests/bpf/prog_tests/section_names.c
index 9d9351dc2ded..713167449c98 100644
--- a/tools/testing/selftests/bpf/prog_tests/section_names.c
+++ b/tools/testing/selftests/bpf/prog_tests/section_names.c
@@ -43,18 +43,18 @@ static struct sec_name_test tests[] = {
{"lwt_seg6local", {0, BPF_PROG_TYPE_LWT_SEG6LOCAL, 0}, {-EINVAL, 0} },
{
"cgroup_skb/ingress",
- {0, BPF_PROG_TYPE_CGROUP_SKB, 0},
+ {0, BPF_PROG_TYPE_CGROUP_SKB, BPF_CGROUP_INET_INGRESS},
{0, BPF_CGROUP_INET_INGRESS},
},
{
"cgroup_skb/egress",
- {0, BPF_PROG_TYPE_CGROUP_SKB, 0},
+ {0, BPF_PROG_TYPE_CGROUP_SKB, BPF_CGROUP_INET_EGRESS},
{0, BPF_CGROUP_INET_EGRESS},
},
{"cgroup/skb", {0, BPF_PROG_TYPE_CGROUP_SKB, 0}, {-EINVAL, 0} },
{
"cgroup/sock",
- {0, BPF_PROG_TYPE_CGROUP_SOCK, 0},
+ {0, BPF_PROG_TYPE_CGROUP_SOCK, BPF_CGROUP_INET_SOCK_CREATE},
{0, BPF_CGROUP_INET_SOCK_CREATE},
},
{
@@ -69,26 +69,38 @@ static struct sec_name_test tests[] = {
},
{
"cgroup/dev",
- {0, BPF_PROG_TYPE_CGROUP_DEVICE, 0},
+ {0, BPF_PROG_TYPE_CGROUP_DEVICE, BPF_CGROUP_DEVICE},
{0, BPF_CGROUP_DEVICE},
},
- {"sockops", {0, BPF_PROG_TYPE_SOCK_OPS, 0}, {0, BPF_CGROUP_SOCK_OPS} },
+ {
+ "sockops",
+ {0, BPF_PROG_TYPE_SOCK_OPS, BPF_CGROUP_SOCK_OPS},
+ {0, BPF_CGROUP_SOCK_OPS},
+ },
{
"sk_skb/stream_parser",
- {0, BPF_PROG_TYPE_SK_SKB, 0},
+ {0, BPF_PROG_TYPE_SK_SKB, BPF_SK_SKB_STREAM_PARSER},
{0, BPF_SK_SKB_STREAM_PARSER},
},
{
"sk_skb/stream_verdict",
- {0, BPF_PROG_TYPE_SK_SKB, 0},
+ {0, BPF_PROG_TYPE_SK_SKB, BPF_SK_SKB_STREAM_VERDICT},
{0, BPF_SK_SKB_STREAM_VERDICT},
},
{"sk_skb", {0, BPF_PROG_TYPE_SK_SKB, 0}, {-EINVAL, 0} },
- {"sk_msg", {0, BPF_PROG_TYPE_SK_MSG, 0}, {0, BPF_SK_MSG_VERDICT} },
- {"lirc_mode2", {0, BPF_PROG_TYPE_LIRC_MODE2, 0}, {0, BPF_LIRC_MODE2} },
+ {
+ "sk_msg",
+ {0, BPF_PROG_TYPE_SK_MSG, BPF_SK_MSG_VERDICT},
+ {0, BPF_SK_MSG_VERDICT},
+ },
+ {
+ "lirc_mode2",
+ {0, BPF_PROG_TYPE_LIRC_MODE2, BPF_LIRC_MODE2},
+ {0, BPF_LIRC_MODE2},
+ },
{
"flow_dissector",
- {0, BPF_PROG_TYPE_FLOW_DISSECTOR, 0},
+ {0, BPF_PROG_TYPE_FLOW_DISSECTOR, BPF_FLOW_DISSECTOR},
{0, BPF_FLOW_DISSECTOR},
},
{
@@ -158,17 +170,17 @@ static void test_prog_type_by_name(const struct sec_name_test *test)
&expected_attach_type);
CHECK(rc != test->expected_load.rc, "check_code",
- "prog: unexpected rc=%d for %s", rc, test->sec_name);
+ "prog: unexpected rc=%d for %s\n", rc, test->sec_name);
if (rc)
return;
CHECK(prog_type != test->expected_load.prog_type, "check_prog_type",
- "prog: unexpected prog_type=%d for %s",
+ "prog: unexpected prog_type=%d for %s\n",
prog_type, test->sec_name);
CHECK(expected_attach_type != test->expected_load.expected_attach_type,
- "check_attach_type", "prog: unexpected expected_attach_type=%d for %s",
+ "check_attach_type", "prog: unexpected expected_attach_type=%d for %s\n",
expected_attach_type, test->sec_name);
}
@@ -180,13 +192,13 @@ static void test_attach_type_by_name(const struct sec_name_test *test)
rc = libbpf_attach_type_by_name(test->sec_name, &attach_type);
CHECK(rc != test->expected_attach.rc, "check_ret",
- "attach: unexpected rc=%d for %s", rc, test->sec_name);
+ "attach: unexpected rc=%d for %s\n", rc, test->sec_name);
if (rc)
return;
CHECK(attach_type != test->expected_attach.attach_type,
- "check_attach_type", "attach: unexpected attach_type=%d for %s",
+ "check_attach_type", "attach: unexpected attach_type=%d for %s\n",
attach_type, test->sec_name);
}
diff --git a/tools/testing/selftests/bpf/prog_tests/select_reuseport.c b/tools/testing/selftests/bpf/prog_tests/select_reuseport.c
index 0800036ed654..821b4146b7b6 100644
--- a/tools/testing/selftests/bpf/prog_tests/select_reuseport.c
+++ b/tools/testing/selftests/bpf/prog_tests/select_reuseport.c
@@ -36,6 +36,7 @@ static int result_map, tmp_index_ovr_map, linum_map, data_check_map;
static __u32 expected_results[NR_RESULTS];
static int sk_fds[REUSEPORT_ARRAY_SIZE];
static int reuseport_array = -1, outer_map = -1;
+static enum bpf_map_type inner_map_type;
static int select_by_skb_data_prog;
static int saved_tcp_syncookie = -1;
static struct bpf_object *obj;
@@ -63,13 +64,15 @@ static union sa46 {
} \
})
-static int create_maps(void)
+static int create_maps(enum bpf_map_type inner_type)
{
struct bpf_create_map_attr attr = {};
+ inner_map_type = inner_type;
+
/* Creating reuseport_array */
attr.name = "reuseport_array";
- attr.map_type = BPF_MAP_TYPE_REUSEPORT_SOCKARRAY;
+ attr.map_type = inner_type;
attr.key_size = sizeof(__u32);
attr.value_size = sizeof(__u32);
attr.max_entries = REUSEPORT_ARRAY_SIZE;
@@ -506,11 +509,6 @@ static void test_syncookie(int type, sa_family_t family)
.pass_on_failure = 0,
};
- if (type != SOCK_STREAM) {
- test__skip();
- return;
- }
-
/*
* +1 for TCP-SYN and
* +1 for the TCP-ACK (ack the syncookie)
@@ -728,12 +726,36 @@ static void cleanup_per_test(bool no_inner_map)
static void cleanup(void)
{
- if (outer_map != -1)
+ if (outer_map != -1) {
close(outer_map);
- if (reuseport_array != -1)
+ outer_map = -1;
+ }
+
+ if (reuseport_array != -1) {
close(reuseport_array);
- if (obj)
+ reuseport_array = -1;
+ }
+
+ if (obj) {
bpf_object__close(obj);
+ obj = NULL;
+ }
+
+ memset(expected_results, 0, sizeof(expected_results));
+}
+
+static const char *maptype_str(enum bpf_map_type type)
+{
+ switch (type) {
+ case BPF_MAP_TYPE_REUSEPORT_SOCKARRAY:
+ return "reuseport_sockarray";
+ case BPF_MAP_TYPE_SOCKMAP:
+ return "sockmap";
+ case BPF_MAP_TYPE_SOCKHASH:
+ return "sockhash";
+ default:
+ return "unknown";
+ }
}
static const char *family_str(sa_family_t family)
@@ -760,7 +782,7 @@ static const char *sotype_str(int sotype)
}
}
-#define TEST_INIT(fn, ...) { fn, #fn, __VA_ARGS__ }
+#define TEST_INIT(fn_, ...) { .fn = fn_, .name = #fn_, __VA_ARGS__ }
static void test_config(int sotype, sa_family_t family, bool inany)
{
@@ -768,12 +790,15 @@ static void test_config(int sotype, sa_family_t family, bool inany)
void (*fn)(int sotype, sa_family_t family);
const char *name;
bool no_inner_map;
+ int need_sotype;
} tests[] = {
- TEST_INIT(test_err_inner_map, true /* no_inner_map */),
+ TEST_INIT(test_err_inner_map,
+ .no_inner_map = true),
TEST_INIT(test_err_skb_data),
TEST_INIT(test_err_sk_select_port),
TEST_INIT(test_pass),
- TEST_INIT(test_syncookie),
+ TEST_INIT(test_syncookie,
+ .need_sotype = SOCK_STREAM),
TEST_INIT(test_pass_on_err),
TEST_INIT(test_detach_bpf),
};
@@ -781,7 +806,11 @@ static void test_config(int sotype, sa_family_t family, bool inany)
const struct test *t;
for (t = tests; t < tests + ARRAY_SIZE(tests); t++) {
- snprintf(s, sizeof(s), "%s/%s %s %s",
+ if (t->need_sotype && t->need_sotype != sotype)
+ continue; /* test not compatible with socket type */
+
+ snprintf(s, sizeof(s), "%s %s/%s %s %s",
+ maptype_str(inner_map_type),
family_str(family), sotype_str(sotype),
inany ? "INANY" : "LOOPBACK", t->name);
@@ -816,13 +845,20 @@ static void test_all(void)
test_config(c->sotype, c->family, c->inany);
}
-void test_select_reuseport(void)
+void test_map_type(enum bpf_map_type mt)
{
- if (create_maps())
+ if (create_maps(mt))
goto out;
if (prepare_bpf_obj())
goto out;
+ test_all();
+out:
+ cleanup();
+}
+
+void test_select_reuseport(void)
+{
saved_tcp_fo = read_int_sysctl(TCP_FO_SYSCTL);
if (saved_tcp_fo < 0)
goto out;
@@ -835,8 +871,9 @@ void test_select_reuseport(void)
if (disable_syncookie())
goto out;
- test_all();
+ test_map_type(BPF_MAP_TYPE_REUSEPORT_SOCKARRAY);
+ test_map_type(BPF_MAP_TYPE_SOCKMAP);
+ test_map_type(BPF_MAP_TYPE_SOCKHASH);
out:
- cleanup();
restore_sysctls();
}
diff --git a/tools/testing/selftests/bpf/prog_tests/signal_pending.c b/tools/testing/selftests/bpf/prog_tests/signal_pending.c
index 996e808f43a2..dfcbddcbe4d3 100644
--- a/tools/testing/selftests/bpf/prog_tests/signal_pending.c
+++ b/tools/testing/selftests/bpf/prog_tests/signal_pending.c
@@ -1,5 +1,6 @@
// SPDX-License-Identifier: GPL-2.0
#include <test_progs.h>
+#include <network_helpers.h>
static void sigalrm_handler(int s) {}
static struct sigaction sigalrm_action = {
diff --git a/tools/testing/selftests/bpf/prog_tests/sk_assign.c b/tools/testing/selftests/bpf/prog_tests/sk_assign.c
new file mode 100644
index 000000000000..47fa04adc147
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/sk_assign.c
@@ -0,0 +1,328 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2018 Facebook
+// Copyright (c) 2019 Cloudflare
+// Copyright (c) 2020 Isovalent, Inc.
+/*
+ * Test that the socket assign program is able to redirect traffic towards a
+ * socket, regardless of whether the port or address destination of the traffic
+ * matches the port.
+ */
+
+#define _GNU_SOURCE
+#include <fcntl.h>
+#include <signal.h>
+#include <stdlib.h>
+#include <unistd.h>
+
+#include "test_progs.h"
+
+#define BIND_PORT 1234
+#define CONNECT_PORT 4321
+#define TEST_DADDR (0xC0A80203)
+#define NS_SELF "/proc/self/ns/net"
+#define SERVER_MAP_PATH "/sys/fs/bpf/tc/globals/server_map"
+
+static const struct timeval timeo_sec = { .tv_sec = 3 };
+static const size_t timeo_optlen = sizeof(timeo_sec);
+static int stop, duration;
+
+static bool
+configure_stack(void)
+{
+ char tc_cmd[BUFSIZ];
+
+ /* Move to a new networking namespace */
+ if (CHECK_FAIL(unshare(CLONE_NEWNET)))
+ return false;
+
+ /* Configure necessary links, routes */
+ if (CHECK_FAIL(system("ip link set dev lo up")))
+ return false;
+ if (CHECK_FAIL(system("ip route add local default dev lo")))
+ return false;
+ if (CHECK_FAIL(system("ip -6 route add local default dev lo")))
+ return false;
+
+ /* Load qdisc, BPF program */
+ if (CHECK_FAIL(system("tc qdisc add dev lo clsact")))
+ return false;
+ sprintf(tc_cmd, "%s %s %s %s", "tc filter add dev lo ingress bpf",
+ "direct-action object-file ./test_sk_assign.o",
+ "section classifier/sk_assign_test",
+ (env.verbosity < VERBOSE_VERY) ? " 2>/dev/null" : "");
+ if (CHECK(system(tc_cmd), "BPF load failed;",
+ "run with -vv for more info\n"))
+ return false;
+
+ return true;
+}
+
+static int
+start_server(const struct sockaddr *addr, socklen_t len, int type)
+{
+ int fd;
+
+ fd = socket(addr->sa_family, type, 0);
+ if (CHECK_FAIL(fd == -1))
+ goto out;
+ if (CHECK_FAIL(setsockopt(fd, SOL_SOCKET, SO_RCVTIMEO, &timeo_sec,
+ timeo_optlen)))
+ goto close_out;
+ if (CHECK_FAIL(bind(fd, addr, len) == -1))
+ goto close_out;
+ if (type == SOCK_STREAM && CHECK_FAIL(listen(fd, 128) == -1))
+ goto close_out;
+
+ goto out;
+close_out:
+ close(fd);
+ fd = -1;
+out:
+ return fd;
+}
+
+static int
+connect_to_server(const struct sockaddr *addr, socklen_t len, int type)
+{
+ int fd = -1;
+
+ fd = socket(addr->sa_family, type, 0);
+ if (CHECK_FAIL(fd == -1))
+ goto out;
+ if (CHECK_FAIL(setsockopt(fd, SOL_SOCKET, SO_SNDTIMEO, &timeo_sec,
+ timeo_optlen)))
+ goto close_out;
+ if (CHECK_FAIL(connect(fd, addr, len)))
+ goto close_out;
+
+ goto out;
+close_out:
+ close(fd);
+ fd = -1;
+out:
+ return fd;
+}
+
+static in_port_t
+get_port(int fd)
+{
+ struct sockaddr_storage ss;
+ socklen_t slen = sizeof(ss);
+ in_port_t port = 0;
+
+ if (CHECK_FAIL(getsockname(fd, (struct sockaddr *)&ss, &slen)))
+ return port;
+
+ switch (ss.ss_family) {
+ case AF_INET:
+ port = ((struct sockaddr_in *)&ss)->sin_port;
+ break;
+ case AF_INET6:
+ port = ((struct sockaddr_in6 *)&ss)->sin6_port;
+ break;
+ default:
+ CHECK(1, "Invalid address family", "%d\n", ss.ss_family);
+ }
+ return port;
+}
+
+static ssize_t
+rcv_msg(int srv_client, int type)
+{
+ struct sockaddr_storage ss;
+ char buf[BUFSIZ];
+ socklen_t slen;
+
+ if (type == SOCK_STREAM)
+ return read(srv_client, &buf, sizeof(buf));
+ else
+ return recvfrom(srv_client, &buf, sizeof(buf), 0,
+ (struct sockaddr *)&ss, &slen);
+}
+
+static int
+run_test(int server_fd, const struct sockaddr *addr, socklen_t len, int type)
+{
+ int client = -1, srv_client = -1;
+ char buf[] = "testing";
+ in_port_t port;
+ int ret = 1;
+
+ client = connect_to_server(addr, len, type);
+ if (client == -1) {
+ perror("Cannot connect to server");
+ goto out;
+ }
+
+ if (type == SOCK_STREAM) {
+ srv_client = accept(server_fd, NULL, NULL);
+ if (CHECK_FAIL(srv_client == -1)) {
+ perror("Can't accept connection");
+ goto out;
+ }
+ } else {
+ srv_client = server_fd;
+ }
+ if (CHECK_FAIL(write(client, buf, sizeof(buf)) != sizeof(buf))) {
+ perror("Can't write on client");
+ goto out;
+ }
+ if (CHECK_FAIL(rcv_msg(srv_client, type) != sizeof(buf))) {
+ perror("Can't read on server");
+ goto out;
+ }
+
+ port = get_port(srv_client);
+ if (CHECK_FAIL(!port))
+ goto out;
+ /* SOCK_STREAM is connected via accept(), so the server's local address
+ * will be the CONNECT_PORT rather than the BIND port that corresponds
+ * to the listen socket. SOCK_DGRAM on the other hand is connectionless
+ * so we can't really do the same check there; the server doesn't ever
+ * create a socket with CONNECT_PORT.
+ */
+ if (type == SOCK_STREAM &&
+ CHECK(port != htons(CONNECT_PORT), "Expected", "port %u but got %u",
+ CONNECT_PORT, ntohs(port)))
+ goto out;
+ else if (type == SOCK_DGRAM &&
+ CHECK(port != htons(BIND_PORT), "Expected",
+ "port %u but got %u", BIND_PORT, ntohs(port)))
+ goto out;
+
+ ret = 0;
+out:
+ close(client);
+ if (srv_client != server_fd)
+ close(srv_client);
+ if (ret)
+ WRITE_ONCE(stop, 1);
+ return ret;
+}
+
+static void
+prepare_addr(struct sockaddr *addr, int family, __u16 port, bool rewrite_addr)
+{
+ struct sockaddr_in *addr4;
+ struct sockaddr_in6 *addr6;
+
+ switch (family) {
+ case AF_INET:
+ addr4 = (struct sockaddr_in *)addr;
+ memset(addr4, 0, sizeof(*addr4));
+ addr4->sin_family = family;
+ addr4->sin_port = htons(port);
+ if (rewrite_addr)
+ addr4->sin_addr.s_addr = htonl(TEST_DADDR);
+ else
+ addr4->sin_addr.s_addr = htonl(INADDR_LOOPBACK);
+ break;
+ case AF_INET6:
+ addr6 = (struct sockaddr_in6 *)addr;
+ memset(addr6, 0, sizeof(*addr6));
+ addr6->sin6_family = family;
+ addr6->sin6_port = htons(port);
+ addr6->sin6_addr = in6addr_loopback;
+ if (rewrite_addr)
+ addr6->sin6_addr.s6_addr32[3] = htonl(TEST_DADDR);
+ break;
+ default:
+ fprintf(stderr, "Invalid family %d", family);
+ }
+}
+
+struct test_sk_cfg {
+ const char *name;
+ int family;
+ struct sockaddr *addr;
+ socklen_t len;
+ int type;
+ bool rewrite_addr;
+};
+
+#define TEST(NAME, FAMILY, TYPE, REWRITE) \
+{ \
+ .name = NAME, \
+ .family = FAMILY, \
+ .addr = (FAMILY == AF_INET) ? (struct sockaddr *)&addr4 \
+ : (struct sockaddr *)&addr6, \
+ .len = (FAMILY == AF_INET) ? sizeof(addr4) : sizeof(addr6), \
+ .type = TYPE, \
+ .rewrite_addr = REWRITE, \
+}
+
+void test_sk_assign(void)
+{
+ struct sockaddr_in addr4;
+ struct sockaddr_in6 addr6;
+ struct test_sk_cfg tests[] = {
+ TEST("ipv4 tcp port redir", AF_INET, SOCK_STREAM, false),
+ TEST("ipv4 tcp addr redir", AF_INET, SOCK_STREAM, true),
+ TEST("ipv6 tcp port redir", AF_INET6, SOCK_STREAM, false),
+ TEST("ipv6 tcp addr redir", AF_INET6, SOCK_STREAM, true),
+ TEST("ipv4 udp port redir", AF_INET, SOCK_DGRAM, false),
+ TEST("ipv4 udp addr redir", AF_INET, SOCK_DGRAM, true),
+ TEST("ipv6 udp port redir", AF_INET6, SOCK_DGRAM, false),
+ TEST("ipv6 udp addr redir", AF_INET6, SOCK_DGRAM, true),
+ };
+ int server = -1;
+ int server_map;
+ int self_net;
+
+ self_net = open(NS_SELF, O_RDONLY);
+ if (CHECK_FAIL(self_net < 0)) {
+ perror("Unable to open "NS_SELF);
+ return;
+ }
+
+ if (!configure_stack()) {
+ perror("configure_stack");
+ goto cleanup;
+ }
+
+ server_map = bpf_obj_get(SERVER_MAP_PATH);
+ if (CHECK_FAIL(server_map < 0)) {
+ perror("Unable to open " SERVER_MAP_PATH);
+ goto cleanup;
+ }
+
+ for (int i = 0; i < ARRAY_SIZE(tests) && !READ_ONCE(stop); i++) {
+ struct test_sk_cfg *test = &tests[i];
+ const struct sockaddr *addr;
+ const int zero = 0;
+ int err;
+
+ if (!test__start_subtest(test->name))
+ continue;
+ prepare_addr(test->addr, test->family, BIND_PORT, false);
+ addr = (const struct sockaddr *)test->addr;
+ server = start_server(addr, test->len, test->type);
+ if (server == -1)
+ goto close;
+
+ err = bpf_map_update_elem(server_map, &zero, &server, BPF_ANY);
+ if (CHECK_FAIL(err)) {
+ perror("Unable to update server_map");
+ goto close;
+ }
+
+ /* connect to unbound ports */
+ prepare_addr(test->addr, test->family, CONNECT_PORT,
+ test->rewrite_addr);
+ if (run_test(server, addr, test->len, test->type))
+ goto close;
+
+ close(server);
+ server = -1;
+ }
+
+close:
+ close(server);
+ close(server_map);
+cleanup:
+ if (CHECK_FAIL(unlink(SERVER_MAP_PATH)))
+ perror("Unable to unlink " SERVER_MAP_PATH);
+ if (CHECK_FAIL(setns(self_net, CLONE_NEWNET)))
+ perror("Failed to setns("NS_SELF")");
+ close(self_net);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/skb_ctx.c b/tools/testing/selftests/bpf/prog_tests/skb_ctx.c
index c6d6b685a946..7021b92af313 100644
--- a/tools/testing/selftests/bpf/prog_tests/skb_ctx.c
+++ b/tools/testing/selftests/bpf/prog_tests/skb_ctx.c
@@ -1,5 +1,6 @@
// SPDX-License-Identifier: GPL-2.0
#include <test_progs.h>
+#include <network_helpers.h>
void test_skb_ctx(void)
{
@@ -14,6 +15,7 @@ void test_skb_ctx(void)
.wire_len = 100,
.gso_segs = 8,
.mark = 9,
+ .gso_size = 10,
};
struct bpf_prog_test_run_attr tattr = {
.data_in = &pkt_v4,
diff --git a/tools/testing/selftests/bpf/prog_tests/skb_helpers.c b/tools/testing/selftests/bpf/prog_tests/skb_helpers.c
new file mode 100644
index 000000000000..f302ad84a298
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/skb_helpers.c
@@ -0,0 +1,30 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <test_progs.h>
+#include <network_helpers.h>
+
+void test_skb_helpers(void)
+{
+ struct __sk_buff skb = {
+ .wire_len = 100,
+ .gso_segs = 8,
+ .gso_size = 10,
+ };
+ struct bpf_prog_test_run_attr tattr = {
+ .data_in = &pkt_v4,
+ .data_size_in = sizeof(pkt_v4),
+ .ctx_in = &skb,
+ .ctx_size_in = sizeof(skb),
+ .ctx_out = &skb,
+ .ctx_size_out = sizeof(skb),
+ };
+ struct bpf_object *obj;
+ int err;
+
+ err = bpf_prog_load("./test_skb_helpers.o", BPF_PROG_TYPE_SCHED_CLS, &obj,
+ &tattr.prog_fd);
+ if (CHECK_ATTR(err, "load", "err %d errno %d\n", err, errno))
+ return;
+ err = bpf_prog_test_run_xattr(&tattr);
+ CHECK_ATTR(err, "len", "err %d errno %d\n", err, errno);
+ bpf_object__close(obj);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c b/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c
index aa43e0bd210c..96e7b7f84c65 100644
--- a/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c
+++ b/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c
@@ -1,7 +1,9 @@
// SPDX-License-Identifier: GPL-2.0
// Copyright (c) 2020 Cloudflare
+#include <error.h>
#include "test_progs.h"
+#include "test_skmsg_load_helpers.skel.h"
#define TCP_REPAIR 19 /* TCP sock is under repair right now */
@@ -70,10 +72,43 @@ out:
close(s);
}
+static void test_skmsg_helpers(enum bpf_map_type map_type)
+{
+ struct test_skmsg_load_helpers *skel;
+ int err, map, verdict;
+
+ skel = test_skmsg_load_helpers__open_and_load();
+ if (CHECK_FAIL(!skel)) {
+ perror("test_skmsg_load_helpers__open_and_load");
+ return;
+ }
+
+ verdict = bpf_program__fd(skel->progs.prog_msg_verdict);
+ map = bpf_map__fd(skel->maps.sock_map);
+
+ err = bpf_prog_attach(verdict, map, BPF_SK_MSG_VERDICT, 0);
+ if (CHECK_FAIL(err)) {
+ perror("bpf_prog_attach");
+ goto out;
+ }
+
+ err = bpf_prog_detach2(verdict, map, BPF_SK_MSG_VERDICT);
+ if (CHECK_FAIL(err)) {
+ perror("bpf_prog_detach2");
+ goto out;
+ }
+out:
+ test_skmsg_load_helpers__destroy(skel);
+}
+
void test_sockmap_basic(void)
{
if (test__start_subtest("sockmap create_update_free"))
test_sockmap_create_update_free(BPF_MAP_TYPE_SOCKMAP);
if (test__start_subtest("sockhash create_update_free"))
test_sockmap_create_update_free(BPF_MAP_TYPE_SOCKHASH);
+ if (test__start_subtest("sockmap sk_msg load helpers"))
+ test_skmsg_helpers(BPF_MAP_TYPE_SOCKMAP);
+ if (test__start_subtest("sockhash sk_msg load helpers"))
+ test_skmsg_helpers(BPF_MAP_TYPE_SOCKHASH);
}
diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_ktls.c b/tools/testing/selftests/bpf/prog_tests/sockmap_ktls.c
new file mode 100644
index 000000000000..06b86addc181
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/sockmap_ktls.c
@@ -0,0 +1,124 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2020 Cloudflare
+/*
+ * Tests for sockmap/sockhash holding kTLS sockets.
+ */
+
+#include "test_progs.h"
+
+#define MAX_TEST_NAME 80
+#define TCP_ULP 31
+
+static int tcp_server(int family)
+{
+ int err, s;
+
+ s = socket(family, SOCK_STREAM, 0);
+ if (CHECK_FAIL(s == -1)) {
+ perror("socket");
+ return -1;
+ }
+
+ err = listen(s, SOMAXCONN);
+ if (CHECK_FAIL(err)) {
+ perror("listen");
+ return -1;
+ }
+
+ return s;
+}
+
+static int disconnect(int fd)
+{
+ struct sockaddr unspec = { AF_UNSPEC };
+
+ return connect(fd, &unspec, sizeof(unspec));
+}
+
+/* Disconnect (unhash) a kTLS socket after removing it from sockmap. */
+static void test_sockmap_ktls_disconnect_after_delete(int family, int map)
+{
+ struct sockaddr_storage addr = {0};
+ socklen_t len = sizeof(addr);
+ int err, cli, srv, zero = 0;
+
+ srv = tcp_server(family);
+ if (srv == -1)
+ return;
+
+ err = getsockname(srv, (struct sockaddr *)&addr, &len);
+ if (CHECK_FAIL(err)) {
+ perror("getsockopt");
+ goto close_srv;
+ }
+
+ cli = socket(family, SOCK_STREAM, 0);
+ if (CHECK_FAIL(cli == -1)) {
+ perror("socket");
+ goto close_srv;
+ }
+
+ err = connect(cli, (struct sockaddr *)&addr, len);
+ if (CHECK_FAIL(err)) {
+ perror("connect");
+ goto close_cli;
+ }
+
+ err = bpf_map_update_elem(map, &zero, &cli, 0);
+ if (CHECK_FAIL(err)) {
+ perror("bpf_map_update_elem");
+ goto close_cli;
+ }
+
+ err = setsockopt(cli, IPPROTO_TCP, TCP_ULP, "tls", strlen("tls"));
+ if (CHECK_FAIL(err)) {
+ perror("setsockopt(TCP_ULP)");
+ goto close_cli;
+ }
+
+ err = bpf_map_delete_elem(map, &zero);
+ if (CHECK_FAIL(err)) {
+ perror("bpf_map_delete_elem");
+ goto close_cli;
+ }
+
+ err = disconnect(cli);
+ if (CHECK_FAIL(err))
+ perror("disconnect");
+
+close_cli:
+ close(cli);
+close_srv:
+ close(srv);
+}
+
+static void run_tests(int family, enum bpf_map_type map_type)
+{
+ char test_name[MAX_TEST_NAME];
+ int map;
+
+ map = bpf_create_map(map_type, sizeof(int), sizeof(int), 1, 0);
+ if (CHECK_FAIL(map == -1)) {
+ perror("bpf_map_create");
+ return;
+ }
+
+ snprintf(test_name, MAX_TEST_NAME,
+ "sockmap_ktls disconnect_after_delete %s %s",
+ family == AF_INET ? "IPv4" : "IPv6",
+ map_type == BPF_MAP_TYPE_SOCKMAP ? "SOCKMAP" : "SOCKHASH");
+ if (!test__start_subtest(test_name))
+ return;
+
+ test_sockmap_ktls_disconnect_after_delete(family, map);
+
+ close(map);
+}
+
+void test_sockmap_ktls(void)
+{
+ run_tests(AF_INET, BPF_MAP_TYPE_SOCKMAP);
+ run_tests(AF_INET, BPF_MAP_TYPE_SOCKHASH);
+ run_tests(AF_INET6, BPF_MAP_TYPE_SOCKMAP);
+ run_tests(AF_INET6, BPF_MAP_TYPE_SOCKHASH);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c b/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c
new file mode 100644
index 000000000000..d7d65a700799
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c
@@ -0,0 +1,1635 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2020 Cloudflare
+/*
+ * Test suite for SOCKMAP/SOCKHASH holding listening sockets.
+ * Covers:
+ * 1. BPF map operations - bpf_map_{update,lookup delete}_elem
+ * 2. BPF redirect helpers - bpf_{sk,msg}_redirect_map
+ * 3. BPF reuseport helper - bpf_sk_select_reuseport
+ */
+
+#include <linux/compiler.h>
+#include <errno.h>
+#include <error.h>
+#include <limits.h>
+#include <netinet/in.h>
+#include <pthread.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/select.h>
+#include <unistd.h>
+
+#include <bpf/bpf.h>
+#include <bpf/libbpf.h>
+
+#include "bpf_util.h"
+#include "test_progs.h"
+#include "test_sockmap_listen.skel.h"
+
+#define IO_TIMEOUT_SEC 30
+#define MAX_STRERR_LEN 256
+#define MAX_TEST_NAME 80
+
+#define _FAIL(errnum, fmt...) \
+ ({ \
+ error_at_line(0, (errnum), __func__, __LINE__, fmt); \
+ CHECK_FAIL(true); \
+ })
+#define FAIL(fmt...) _FAIL(0, fmt)
+#define FAIL_ERRNO(fmt...) _FAIL(errno, fmt)
+#define FAIL_LIBBPF(err, msg) \
+ ({ \
+ char __buf[MAX_STRERR_LEN]; \
+ libbpf_strerror((err), __buf, sizeof(__buf)); \
+ FAIL("%s: %s", (msg), __buf); \
+ })
+
+/* Wrappers that fail the test on error and report it. */
+
+#define xaccept_nonblock(fd, addr, len) \
+ ({ \
+ int __ret = \
+ accept_timeout((fd), (addr), (len), IO_TIMEOUT_SEC); \
+ if (__ret == -1) \
+ FAIL_ERRNO("accept"); \
+ __ret; \
+ })
+
+#define xbind(fd, addr, len) \
+ ({ \
+ int __ret = bind((fd), (addr), (len)); \
+ if (__ret == -1) \
+ FAIL_ERRNO("bind"); \
+ __ret; \
+ })
+
+#define xclose(fd) \
+ ({ \
+ int __ret = close((fd)); \
+ if (__ret == -1) \
+ FAIL_ERRNO("close"); \
+ __ret; \
+ })
+
+#define xconnect(fd, addr, len) \
+ ({ \
+ int __ret = connect((fd), (addr), (len)); \
+ if (__ret == -1) \
+ FAIL_ERRNO("connect"); \
+ __ret; \
+ })
+
+#define xgetsockname(fd, addr, len) \
+ ({ \
+ int __ret = getsockname((fd), (addr), (len)); \
+ if (__ret == -1) \
+ FAIL_ERRNO("getsockname"); \
+ __ret; \
+ })
+
+#define xgetsockopt(fd, level, name, val, len) \
+ ({ \
+ int __ret = getsockopt((fd), (level), (name), (val), (len)); \
+ if (__ret == -1) \
+ FAIL_ERRNO("getsockopt(" #name ")"); \
+ __ret; \
+ })
+
+#define xlisten(fd, backlog) \
+ ({ \
+ int __ret = listen((fd), (backlog)); \
+ if (__ret == -1) \
+ FAIL_ERRNO("listen"); \
+ __ret; \
+ })
+
+#define xsetsockopt(fd, level, name, val, len) \
+ ({ \
+ int __ret = setsockopt((fd), (level), (name), (val), (len)); \
+ if (__ret == -1) \
+ FAIL_ERRNO("setsockopt(" #name ")"); \
+ __ret; \
+ })
+
+#define xsend(fd, buf, len, flags) \
+ ({ \
+ ssize_t __ret = send((fd), (buf), (len), (flags)); \
+ if (__ret == -1) \
+ FAIL_ERRNO("send"); \
+ __ret; \
+ })
+
+#define xrecv_nonblock(fd, buf, len, flags) \
+ ({ \
+ ssize_t __ret = recv_timeout((fd), (buf), (len), (flags), \
+ IO_TIMEOUT_SEC); \
+ if (__ret == -1) \
+ FAIL_ERRNO("recv"); \
+ __ret; \
+ })
+
+#define xsocket(family, sotype, flags) \
+ ({ \
+ int __ret = socket(family, sotype, flags); \
+ if (__ret == -1) \
+ FAIL_ERRNO("socket"); \
+ __ret; \
+ })
+
+#define xbpf_map_delete_elem(fd, key) \
+ ({ \
+ int __ret = bpf_map_delete_elem((fd), (key)); \
+ if (__ret == -1) \
+ FAIL_ERRNO("map_delete"); \
+ __ret; \
+ })
+
+#define xbpf_map_lookup_elem(fd, key, val) \
+ ({ \
+ int __ret = bpf_map_lookup_elem((fd), (key), (val)); \
+ if (__ret == -1) \
+ FAIL_ERRNO("map_lookup"); \
+ __ret; \
+ })
+
+#define xbpf_map_update_elem(fd, key, val, flags) \
+ ({ \
+ int __ret = bpf_map_update_elem((fd), (key), (val), (flags)); \
+ if (__ret == -1) \
+ FAIL_ERRNO("map_update"); \
+ __ret; \
+ })
+
+#define xbpf_prog_attach(prog, target, type, flags) \
+ ({ \
+ int __ret = \
+ bpf_prog_attach((prog), (target), (type), (flags)); \
+ if (__ret == -1) \
+ FAIL_ERRNO("prog_attach(" #type ")"); \
+ __ret; \
+ })
+
+#define xbpf_prog_detach2(prog, target, type) \
+ ({ \
+ int __ret = bpf_prog_detach2((prog), (target), (type)); \
+ if (__ret == -1) \
+ FAIL_ERRNO("prog_detach2(" #type ")"); \
+ __ret; \
+ })
+
+#define xpthread_create(thread, attr, func, arg) \
+ ({ \
+ int __ret = pthread_create((thread), (attr), (func), (arg)); \
+ errno = __ret; \
+ if (__ret) \
+ FAIL_ERRNO("pthread_create"); \
+ __ret; \
+ })
+
+#define xpthread_join(thread, retval) \
+ ({ \
+ int __ret = pthread_join((thread), (retval)); \
+ errno = __ret; \
+ if (__ret) \
+ FAIL_ERRNO("pthread_join"); \
+ __ret; \
+ })
+
+static int poll_read(int fd, unsigned int timeout_sec)
+{
+ struct timeval timeout = { .tv_sec = timeout_sec };
+ fd_set rfds;
+ int r;
+
+ FD_ZERO(&rfds);
+ FD_SET(fd, &rfds);
+
+ r = select(fd + 1, &rfds, NULL, NULL, &timeout);
+ if (r == 0)
+ errno = ETIME;
+
+ return r == 1 ? 0 : -1;
+}
+
+static int accept_timeout(int fd, struct sockaddr *addr, socklen_t *len,
+ unsigned int timeout_sec)
+{
+ if (poll_read(fd, timeout_sec))
+ return -1;
+
+ return accept(fd, addr, len);
+}
+
+static int recv_timeout(int fd, void *buf, size_t len, int flags,
+ unsigned int timeout_sec)
+{
+ if (poll_read(fd, timeout_sec))
+ return -1;
+
+ return recv(fd, buf, len, flags);
+}
+
+static void init_addr_loopback4(struct sockaddr_storage *ss, socklen_t *len)
+{
+ struct sockaddr_in *addr4 = memset(ss, 0, sizeof(*ss));
+
+ addr4->sin_family = AF_INET;
+ addr4->sin_port = 0;
+ addr4->sin_addr.s_addr = htonl(INADDR_LOOPBACK);
+ *len = sizeof(*addr4);
+}
+
+static void init_addr_loopback6(struct sockaddr_storage *ss, socklen_t *len)
+{
+ struct sockaddr_in6 *addr6 = memset(ss, 0, sizeof(*ss));
+
+ addr6->sin6_family = AF_INET6;
+ addr6->sin6_port = 0;
+ addr6->sin6_addr = in6addr_loopback;
+ *len = sizeof(*addr6);
+}
+
+static void init_addr_loopback(int family, struct sockaddr_storage *ss,
+ socklen_t *len)
+{
+ switch (family) {
+ case AF_INET:
+ init_addr_loopback4(ss, len);
+ return;
+ case AF_INET6:
+ init_addr_loopback6(ss, len);
+ return;
+ default:
+ FAIL("unsupported address family %d", family);
+ }
+}
+
+static inline struct sockaddr *sockaddr(struct sockaddr_storage *ss)
+{
+ return (struct sockaddr *)ss;
+}
+
+static int enable_reuseport(int s, int progfd)
+{
+ int err, one = 1;
+
+ err = xsetsockopt(s, SOL_SOCKET, SO_REUSEPORT, &one, sizeof(one));
+ if (err)
+ return -1;
+ err = xsetsockopt(s, SOL_SOCKET, SO_ATTACH_REUSEPORT_EBPF, &progfd,
+ sizeof(progfd));
+ if (err)
+ return -1;
+
+ return 0;
+}
+
+static int socket_loopback_reuseport(int family, int sotype, int progfd)
+{
+ struct sockaddr_storage addr;
+ socklen_t len;
+ int err, s;
+
+ init_addr_loopback(family, &addr, &len);
+
+ s = xsocket(family, sotype, 0);
+ if (s == -1)
+ return -1;
+
+ if (progfd >= 0)
+ enable_reuseport(s, progfd);
+
+ err = xbind(s, sockaddr(&addr), len);
+ if (err)
+ goto close;
+
+ if (sotype & SOCK_DGRAM)
+ return s;
+
+ err = xlisten(s, SOMAXCONN);
+ if (err)
+ goto close;
+
+ return s;
+close:
+ xclose(s);
+ return -1;
+}
+
+static int socket_loopback(int family, int sotype)
+{
+ return socket_loopback_reuseport(family, sotype, -1);
+}
+
+static void test_insert_invalid(int family, int sotype, int mapfd)
+{
+ u32 key = 0;
+ u64 value;
+ int err;
+
+ value = -1;
+ err = bpf_map_update_elem(mapfd, &key, &value, BPF_NOEXIST);
+ if (!err || errno != EINVAL)
+ FAIL_ERRNO("map_update: expected EINVAL");
+
+ value = INT_MAX;
+ err = bpf_map_update_elem(mapfd, &key, &value, BPF_NOEXIST);
+ if (!err || errno != EBADF)
+ FAIL_ERRNO("map_update: expected EBADF");
+}
+
+static void test_insert_opened(int family, int sotype, int mapfd)
+{
+ u32 key = 0;
+ u64 value;
+ int err, s;
+
+ s = xsocket(family, sotype, 0);
+ if (s == -1)
+ return;
+
+ errno = 0;
+ value = s;
+ err = bpf_map_update_elem(mapfd, &key, &value, BPF_NOEXIST);
+ if (!err || errno != EOPNOTSUPP)
+ FAIL_ERRNO("map_update: expected EOPNOTSUPP");
+
+ xclose(s);
+}
+
+static void test_insert_bound(int family, int sotype, int mapfd)
+{
+ struct sockaddr_storage addr;
+ socklen_t len;
+ u32 key = 0;
+ u64 value;
+ int err, s;
+
+ init_addr_loopback(family, &addr, &len);
+
+ s = xsocket(family, sotype, 0);
+ if (s == -1)
+ return;
+
+ err = xbind(s, sockaddr(&addr), len);
+ if (err)
+ goto close;
+
+ errno = 0;
+ value = s;
+ err = bpf_map_update_elem(mapfd, &key, &value, BPF_NOEXIST);
+ if (!err || errno != EOPNOTSUPP)
+ FAIL_ERRNO("map_update: expected EOPNOTSUPP");
+close:
+ xclose(s);
+}
+
+static void test_insert(int family, int sotype, int mapfd)
+{
+ u64 value;
+ u32 key;
+ int s;
+
+ s = socket_loopback(family, sotype);
+ if (s < 0)
+ return;
+
+ key = 0;
+ value = s;
+ xbpf_map_update_elem(mapfd, &key, &value, BPF_NOEXIST);
+ xclose(s);
+}
+
+static void test_delete_after_insert(int family, int sotype, int mapfd)
+{
+ u64 value;
+ u32 key;
+ int s;
+
+ s = socket_loopback(family, sotype);
+ if (s < 0)
+ return;
+
+ key = 0;
+ value = s;
+ xbpf_map_update_elem(mapfd, &key, &value, BPF_NOEXIST);
+ xbpf_map_delete_elem(mapfd, &key);
+ xclose(s);
+}
+
+static void test_delete_after_close(int family, int sotype, int mapfd)
+{
+ int err, s;
+ u64 value;
+ u32 key;
+
+ s = socket_loopback(family, sotype);
+ if (s < 0)
+ return;
+
+ key = 0;
+ value = s;
+ xbpf_map_update_elem(mapfd, &key, &value, BPF_NOEXIST);
+
+ xclose(s);
+
+ errno = 0;
+ err = bpf_map_delete_elem(mapfd, &key);
+ if (!err || (errno != EINVAL && errno != ENOENT))
+ /* SOCKMAP and SOCKHASH return different error codes */
+ FAIL_ERRNO("map_delete: expected EINVAL/EINVAL");
+}
+
+static void test_lookup_after_insert(int family, int sotype, int mapfd)
+{
+ u64 cookie, value;
+ socklen_t len;
+ u32 key;
+ int s;
+
+ s = socket_loopback(family, sotype);
+ if (s < 0)
+ return;
+
+ key = 0;
+ value = s;
+ xbpf_map_update_elem(mapfd, &key, &value, BPF_NOEXIST);
+
+ len = sizeof(cookie);
+ xgetsockopt(s, SOL_SOCKET, SO_COOKIE, &cookie, &len);
+
+ xbpf_map_lookup_elem(mapfd, &key, &value);
+
+ if (value != cookie) {
+ FAIL("map_lookup: have %#llx, want %#llx",
+ (unsigned long long)value, (unsigned long long)cookie);
+ }
+
+ xclose(s);
+}
+
+static void test_lookup_after_delete(int family, int sotype, int mapfd)
+{
+ int err, s;
+ u64 value;
+ u32 key;
+
+ s = socket_loopback(family, sotype);
+ if (s < 0)
+ return;
+
+ key = 0;
+ value = s;
+ xbpf_map_update_elem(mapfd, &key, &value, BPF_NOEXIST);
+ xbpf_map_delete_elem(mapfd, &key);
+
+ errno = 0;
+ err = bpf_map_lookup_elem(mapfd, &key, &value);
+ if (!err || errno != ENOENT)
+ FAIL_ERRNO("map_lookup: expected ENOENT");
+
+ xclose(s);
+}
+
+static void test_lookup_32_bit_value(int family, int sotype, int mapfd)
+{
+ u32 key, value32;
+ int err, s;
+
+ s = socket_loopback(family, sotype);
+ if (s < 0)
+ return;
+
+ mapfd = bpf_create_map(BPF_MAP_TYPE_SOCKMAP, sizeof(key),
+ sizeof(value32), 1, 0);
+ if (mapfd < 0) {
+ FAIL_ERRNO("map_create");
+ goto close;
+ }
+
+ key = 0;
+ value32 = s;
+ xbpf_map_update_elem(mapfd, &key, &value32, BPF_NOEXIST);
+
+ errno = 0;
+ err = bpf_map_lookup_elem(mapfd, &key, &value32);
+ if (!err || errno != ENOSPC)
+ FAIL_ERRNO("map_lookup: expected ENOSPC");
+
+ xclose(mapfd);
+close:
+ xclose(s);
+}
+
+static void test_update_existing(int family, int sotype, int mapfd)
+{
+ int s1, s2;
+ u64 value;
+ u32 key;
+
+ s1 = socket_loopback(family, sotype);
+ if (s1 < 0)
+ return;
+
+ s2 = socket_loopback(family, sotype);
+ if (s2 < 0)
+ goto close_s1;
+
+ key = 0;
+ value = s1;
+ xbpf_map_update_elem(mapfd, &key, &value, BPF_NOEXIST);
+
+ value = s2;
+ xbpf_map_update_elem(mapfd, &key, &value, BPF_EXIST);
+ xclose(s2);
+close_s1:
+ xclose(s1);
+}
+
+/* Exercise the code path where we destroy child sockets that never
+ * got accept()'ed, aka orphans, when parent socket gets closed.
+ */
+static void test_destroy_orphan_child(int family, int sotype, int mapfd)
+{
+ struct sockaddr_storage addr;
+ socklen_t len;
+ int err, s, c;
+ u64 value;
+ u32 key;
+
+ s = socket_loopback(family, sotype);
+ if (s < 0)
+ return;
+
+ len = sizeof(addr);
+ err = xgetsockname(s, sockaddr(&addr), &len);
+ if (err)
+ goto close_srv;
+
+ key = 0;
+ value = s;
+ xbpf_map_update_elem(mapfd, &key, &value, BPF_NOEXIST);
+
+ c = xsocket(family, sotype, 0);
+ if (c == -1)
+ goto close_srv;
+
+ xconnect(c, sockaddr(&addr), len);
+ xclose(c);
+close_srv:
+ xclose(s);
+}
+
+/* Perform a passive open after removing listening socket from SOCKMAP
+ * to ensure that callbacks get restored properly.
+ */
+static void test_clone_after_delete(int family, int sotype, int mapfd)
+{
+ struct sockaddr_storage addr;
+ socklen_t len;
+ int err, s, c;
+ u64 value;
+ u32 key;
+
+ s = socket_loopback(family, sotype);
+ if (s < 0)
+ return;
+
+ len = sizeof(addr);
+ err = xgetsockname(s, sockaddr(&addr), &len);
+ if (err)
+ goto close_srv;
+
+ key = 0;
+ value = s;
+ xbpf_map_update_elem(mapfd, &key, &value, BPF_NOEXIST);
+ xbpf_map_delete_elem(mapfd, &key);
+
+ c = xsocket(family, sotype, 0);
+ if (c < 0)
+ goto close_srv;
+
+ xconnect(c, sockaddr(&addr), len);
+ xclose(c);
+close_srv:
+ xclose(s);
+}
+
+/* Check that child socket that got created while parent was in a
+ * SOCKMAP, but got accept()'ed only after the parent has been removed
+ * from SOCKMAP, gets cloned without parent psock state or callbacks.
+ */
+static void test_accept_after_delete(int family, int sotype, int mapfd)
+{
+ struct sockaddr_storage addr;
+ const u32 zero = 0;
+ int err, s, c, p;
+ socklen_t len;
+ u64 value;
+
+ s = socket_loopback(family, sotype | SOCK_NONBLOCK);
+ if (s == -1)
+ return;
+
+ len = sizeof(addr);
+ err = xgetsockname(s, sockaddr(&addr), &len);
+ if (err)
+ goto close_srv;
+
+ value = s;
+ err = xbpf_map_update_elem(mapfd, &zero, &value, BPF_NOEXIST);
+ if (err)
+ goto close_srv;
+
+ c = xsocket(family, sotype, 0);
+ if (c == -1)
+ goto close_srv;
+
+ /* Create child while parent is in sockmap */
+ err = xconnect(c, sockaddr(&addr), len);
+ if (err)
+ goto close_cli;
+
+ /* Remove parent from sockmap */
+ err = xbpf_map_delete_elem(mapfd, &zero);
+ if (err)
+ goto close_cli;
+
+ p = xaccept_nonblock(s, NULL, NULL);
+ if (p == -1)
+ goto close_cli;
+
+ /* Check that child sk_user_data is not set */
+ value = p;
+ xbpf_map_update_elem(mapfd, &zero, &value, BPF_NOEXIST);
+
+ xclose(p);
+close_cli:
+ xclose(c);
+close_srv:
+ xclose(s);
+}
+
+/* Check that child socket that got created and accepted while parent
+ * was in a SOCKMAP is cloned without parent psock state or callbacks.
+ */
+static void test_accept_before_delete(int family, int sotype, int mapfd)
+{
+ struct sockaddr_storage addr;
+ const u32 zero = 0, one = 1;
+ int err, s, c, p;
+ socklen_t len;
+ u64 value;
+
+ s = socket_loopback(family, sotype | SOCK_NONBLOCK);
+ if (s == -1)
+ return;
+
+ len = sizeof(addr);
+ err = xgetsockname(s, sockaddr(&addr), &len);
+ if (err)
+ goto close_srv;
+
+ value = s;
+ err = xbpf_map_update_elem(mapfd, &zero, &value, BPF_NOEXIST);
+ if (err)
+ goto close_srv;
+
+ c = xsocket(family, sotype, 0);
+ if (c == -1)
+ goto close_srv;
+
+ /* Create & accept child while parent is in sockmap */
+ err = xconnect(c, sockaddr(&addr), len);
+ if (err)
+ goto close_cli;
+
+ p = xaccept_nonblock(s, NULL, NULL);
+ if (p == -1)
+ goto close_cli;
+
+ /* Check that child sk_user_data is not set */
+ value = p;
+ xbpf_map_update_elem(mapfd, &one, &value, BPF_NOEXIST);
+
+ xclose(p);
+close_cli:
+ xclose(c);
+close_srv:
+ xclose(s);
+}
+
+struct connect_accept_ctx {
+ int sockfd;
+ unsigned int done;
+ unsigned int nr_iter;
+};
+
+static bool is_thread_done(struct connect_accept_ctx *ctx)
+{
+ return READ_ONCE(ctx->done);
+}
+
+static void *connect_accept_thread(void *arg)
+{
+ struct connect_accept_ctx *ctx = arg;
+ struct sockaddr_storage addr;
+ int family, socktype;
+ socklen_t len;
+ int err, i, s;
+
+ s = ctx->sockfd;
+
+ len = sizeof(addr);
+ err = xgetsockname(s, sockaddr(&addr), &len);
+ if (err)
+ goto done;
+
+ len = sizeof(family);
+ err = xgetsockopt(s, SOL_SOCKET, SO_DOMAIN, &family, &len);
+ if (err)
+ goto done;
+
+ len = sizeof(socktype);
+ err = xgetsockopt(s, SOL_SOCKET, SO_TYPE, &socktype, &len);
+ if (err)
+ goto done;
+
+ for (i = 0; i < ctx->nr_iter; i++) {
+ int c, p;
+
+ c = xsocket(family, socktype, 0);
+ if (c < 0)
+ break;
+
+ err = xconnect(c, (struct sockaddr *)&addr, sizeof(addr));
+ if (err) {
+ xclose(c);
+ break;
+ }
+
+ p = xaccept_nonblock(s, NULL, NULL);
+ if (p < 0) {
+ xclose(c);
+ break;
+ }
+
+ xclose(p);
+ xclose(c);
+ }
+done:
+ WRITE_ONCE(ctx->done, 1);
+ return NULL;
+}
+
+static void test_syn_recv_insert_delete(int family, int sotype, int mapfd)
+{
+ struct connect_accept_ctx ctx = { 0 };
+ struct sockaddr_storage addr;
+ socklen_t len;
+ u32 zero = 0;
+ pthread_t t;
+ int err, s;
+ u64 value;
+
+ s = socket_loopback(family, sotype | SOCK_NONBLOCK);
+ if (s < 0)
+ return;
+
+ len = sizeof(addr);
+ err = xgetsockname(s, sockaddr(&addr), &len);
+ if (err)
+ goto close;
+
+ ctx.sockfd = s;
+ ctx.nr_iter = 1000;
+
+ err = xpthread_create(&t, NULL, connect_accept_thread, &ctx);
+ if (err)
+ goto close;
+
+ value = s;
+ while (!is_thread_done(&ctx)) {
+ err = xbpf_map_update_elem(mapfd, &zero, &value, BPF_NOEXIST);
+ if (err)
+ break;
+
+ err = xbpf_map_delete_elem(mapfd, &zero);
+ if (err)
+ break;
+ }
+
+ xpthread_join(t, NULL);
+close:
+ xclose(s);
+}
+
+static void *listen_thread(void *arg)
+{
+ struct sockaddr unspec = { AF_UNSPEC };
+ struct connect_accept_ctx *ctx = arg;
+ int err, i, s;
+
+ s = ctx->sockfd;
+
+ for (i = 0; i < ctx->nr_iter; i++) {
+ err = xlisten(s, 1);
+ if (err)
+ break;
+ err = xconnect(s, &unspec, sizeof(unspec));
+ if (err)
+ break;
+ }
+
+ WRITE_ONCE(ctx->done, 1);
+ return NULL;
+}
+
+static void test_race_insert_listen(int family, int socktype, int mapfd)
+{
+ struct connect_accept_ctx ctx = { 0 };
+ const u32 zero = 0;
+ const int one = 1;
+ pthread_t t;
+ int err, s;
+ u64 value;
+
+ s = xsocket(family, socktype, 0);
+ if (s < 0)
+ return;
+
+ err = xsetsockopt(s, SOL_SOCKET, SO_REUSEADDR, &one, sizeof(one));
+ if (err)
+ goto close;
+
+ ctx.sockfd = s;
+ ctx.nr_iter = 10000;
+
+ err = pthread_create(&t, NULL, listen_thread, &ctx);
+ if (err)
+ goto close;
+
+ value = s;
+ while (!is_thread_done(&ctx)) {
+ err = bpf_map_update_elem(mapfd, &zero, &value, BPF_NOEXIST);
+ /* Expecting EOPNOTSUPP before listen() */
+ if (err && errno != EOPNOTSUPP) {
+ FAIL_ERRNO("map_update");
+ break;
+ }
+
+ err = bpf_map_delete_elem(mapfd, &zero);
+ /* Expecting no entry after unhash on connect(AF_UNSPEC) */
+ if (err && errno != EINVAL && errno != ENOENT) {
+ FAIL_ERRNO("map_delete");
+ break;
+ }
+ }
+
+ xpthread_join(t, NULL);
+close:
+ xclose(s);
+}
+
+static void zero_verdict_count(int mapfd)
+{
+ unsigned int zero = 0;
+ int key;
+
+ key = SK_DROP;
+ xbpf_map_update_elem(mapfd, &key, &zero, BPF_ANY);
+ key = SK_PASS;
+ xbpf_map_update_elem(mapfd, &key, &zero, BPF_ANY);
+}
+
+enum redir_mode {
+ REDIR_INGRESS,
+ REDIR_EGRESS,
+};
+
+static const char *redir_mode_str(enum redir_mode mode)
+{
+ switch (mode) {
+ case REDIR_INGRESS:
+ return "ingress";
+ case REDIR_EGRESS:
+ return "egress";
+ default:
+ return "unknown";
+ }
+}
+
+static void redir_to_connected(int family, int sotype, int sock_mapfd,
+ int verd_mapfd, enum redir_mode mode)
+{
+ const char *log_prefix = redir_mode_str(mode);
+ struct sockaddr_storage addr;
+ int s, c0, c1, p0, p1;
+ unsigned int pass;
+ socklen_t len;
+ int err, n;
+ u64 value;
+ u32 key;
+ char b;
+
+ zero_verdict_count(verd_mapfd);
+
+ s = socket_loopback(family, sotype | SOCK_NONBLOCK);
+ if (s < 0)
+ return;
+
+ len = sizeof(addr);
+ err = xgetsockname(s, sockaddr(&addr), &len);
+ if (err)
+ goto close_srv;
+
+ c0 = xsocket(family, sotype, 0);
+ if (c0 < 0)
+ goto close_srv;
+ err = xconnect(c0, sockaddr(&addr), len);
+ if (err)
+ goto close_cli0;
+
+ p0 = xaccept_nonblock(s, NULL, NULL);
+ if (p0 < 0)
+ goto close_cli0;
+
+ c1 = xsocket(family, sotype, 0);
+ if (c1 < 0)
+ goto close_peer0;
+ err = xconnect(c1, sockaddr(&addr), len);
+ if (err)
+ goto close_cli1;
+
+ p1 = xaccept_nonblock(s, NULL, NULL);
+ if (p1 < 0)
+ goto close_cli1;
+
+ key = 0;
+ value = p0;
+ err = xbpf_map_update_elem(sock_mapfd, &key, &value, BPF_NOEXIST);
+ if (err)
+ goto close_peer1;
+
+ key = 1;
+ value = p1;
+ err = xbpf_map_update_elem(sock_mapfd, &key, &value, BPF_NOEXIST);
+ if (err)
+ goto close_peer1;
+
+ n = write(mode == REDIR_INGRESS ? c1 : p1, "a", 1);
+ if (n < 0)
+ FAIL_ERRNO("%s: write", log_prefix);
+ if (n == 0)
+ FAIL("%s: incomplete write", log_prefix);
+ if (n < 1)
+ goto close_peer1;
+
+ key = SK_PASS;
+ err = xbpf_map_lookup_elem(verd_mapfd, &key, &pass);
+ if (err)
+ goto close_peer1;
+ if (pass != 1)
+ FAIL("%s: want pass count 1, have %d", log_prefix, pass);
+
+ n = read(c0, &b, 1);
+ if (n < 0)
+ FAIL_ERRNO("%s: read", log_prefix);
+ if (n == 0)
+ FAIL("%s: incomplete read", log_prefix);
+
+close_peer1:
+ xclose(p1);
+close_cli1:
+ xclose(c1);
+close_peer0:
+ xclose(p0);
+close_cli0:
+ xclose(c0);
+close_srv:
+ xclose(s);
+}
+
+static void test_skb_redir_to_connected(struct test_sockmap_listen *skel,
+ struct bpf_map *inner_map, int family,
+ int sotype)
+{
+ int verdict = bpf_program__fd(skel->progs.prog_skb_verdict);
+ int parser = bpf_program__fd(skel->progs.prog_skb_parser);
+ int verdict_map = bpf_map__fd(skel->maps.verdict_map);
+ int sock_map = bpf_map__fd(inner_map);
+ int err;
+
+ err = xbpf_prog_attach(parser, sock_map, BPF_SK_SKB_STREAM_PARSER, 0);
+ if (err)
+ return;
+ err = xbpf_prog_attach(verdict, sock_map, BPF_SK_SKB_STREAM_VERDICT, 0);
+ if (err)
+ goto detach;
+
+ redir_to_connected(family, sotype, sock_map, verdict_map,
+ REDIR_INGRESS);
+
+ xbpf_prog_detach2(verdict, sock_map, BPF_SK_SKB_STREAM_VERDICT);
+detach:
+ xbpf_prog_detach2(parser, sock_map, BPF_SK_SKB_STREAM_PARSER);
+}
+
+static void test_msg_redir_to_connected(struct test_sockmap_listen *skel,
+ struct bpf_map *inner_map, int family,
+ int sotype)
+{
+ int verdict = bpf_program__fd(skel->progs.prog_msg_verdict);
+ int verdict_map = bpf_map__fd(skel->maps.verdict_map);
+ int sock_map = bpf_map__fd(inner_map);
+ int err;
+
+ err = xbpf_prog_attach(verdict, sock_map, BPF_SK_MSG_VERDICT, 0);
+ if (err)
+ return;
+
+ redir_to_connected(family, sotype, sock_map, verdict_map, REDIR_EGRESS);
+
+ xbpf_prog_detach2(verdict, sock_map, BPF_SK_MSG_VERDICT);
+}
+
+static void redir_to_listening(int family, int sotype, int sock_mapfd,
+ int verd_mapfd, enum redir_mode mode)
+{
+ const char *log_prefix = redir_mode_str(mode);
+ struct sockaddr_storage addr;
+ int s, c, p, err, n;
+ unsigned int drop;
+ socklen_t len;
+ u64 value;
+ u32 key;
+
+ zero_verdict_count(verd_mapfd);
+
+ s = socket_loopback(family, sotype | SOCK_NONBLOCK);
+ if (s < 0)
+ return;
+
+ len = sizeof(addr);
+ err = xgetsockname(s, sockaddr(&addr), &len);
+ if (err)
+ goto close_srv;
+
+ c = xsocket(family, sotype, 0);
+ if (c < 0)
+ goto close_srv;
+ err = xconnect(c, sockaddr(&addr), len);
+ if (err)
+ goto close_cli;
+
+ p = xaccept_nonblock(s, NULL, NULL);
+ if (p < 0)
+ goto close_cli;
+
+ key = 0;
+ value = s;
+ err = xbpf_map_update_elem(sock_mapfd, &key, &value, BPF_NOEXIST);
+ if (err)
+ goto close_peer;
+
+ key = 1;
+ value = p;
+ err = xbpf_map_update_elem(sock_mapfd, &key, &value, BPF_NOEXIST);
+ if (err)
+ goto close_peer;
+
+ n = write(mode == REDIR_INGRESS ? c : p, "a", 1);
+ if (n < 0 && errno != EACCES)
+ FAIL_ERRNO("%s: write", log_prefix);
+ if (n == 0)
+ FAIL("%s: incomplete write", log_prefix);
+ if (n < 1)
+ goto close_peer;
+
+ key = SK_DROP;
+ err = xbpf_map_lookup_elem(verd_mapfd, &key, &drop);
+ if (err)
+ goto close_peer;
+ if (drop != 1)
+ FAIL("%s: want drop count 1, have %d", log_prefix, drop);
+
+close_peer:
+ xclose(p);
+close_cli:
+ xclose(c);
+close_srv:
+ xclose(s);
+}
+
+static void test_skb_redir_to_listening(struct test_sockmap_listen *skel,
+ struct bpf_map *inner_map, int family,
+ int sotype)
+{
+ int verdict = bpf_program__fd(skel->progs.prog_skb_verdict);
+ int parser = bpf_program__fd(skel->progs.prog_skb_parser);
+ int verdict_map = bpf_map__fd(skel->maps.verdict_map);
+ int sock_map = bpf_map__fd(inner_map);
+ int err;
+
+ err = xbpf_prog_attach(parser, sock_map, BPF_SK_SKB_STREAM_PARSER, 0);
+ if (err)
+ return;
+ err = xbpf_prog_attach(verdict, sock_map, BPF_SK_SKB_STREAM_VERDICT, 0);
+ if (err)
+ goto detach;
+
+ redir_to_listening(family, sotype, sock_map, verdict_map,
+ REDIR_INGRESS);
+
+ xbpf_prog_detach2(verdict, sock_map, BPF_SK_SKB_STREAM_VERDICT);
+detach:
+ xbpf_prog_detach2(parser, sock_map, BPF_SK_SKB_STREAM_PARSER);
+}
+
+static void test_msg_redir_to_listening(struct test_sockmap_listen *skel,
+ struct bpf_map *inner_map, int family,
+ int sotype)
+{
+ int verdict = bpf_program__fd(skel->progs.prog_msg_verdict);
+ int verdict_map = bpf_map__fd(skel->maps.verdict_map);
+ int sock_map = bpf_map__fd(inner_map);
+ int err;
+
+ err = xbpf_prog_attach(verdict, sock_map, BPF_SK_MSG_VERDICT, 0);
+ if (err)
+ return;
+
+ redir_to_listening(family, sotype, sock_map, verdict_map, REDIR_EGRESS);
+
+ xbpf_prog_detach2(verdict, sock_map, BPF_SK_MSG_VERDICT);
+}
+
+static void test_reuseport_select_listening(int family, int sotype,
+ int sock_map, int verd_map,
+ int reuseport_prog)
+{
+ struct sockaddr_storage addr;
+ unsigned int pass;
+ int s, c, err;
+ socklen_t len;
+ u64 value;
+ u32 key;
+
+ zero_verdict_count(verd_map);
+
+ s = socket_loopback_reuseport(family, sotype | SOCK_NONBLOCK,
+ reuseport_prog);
+ if (s < 0)
+ return;
+
+ len = sizeof(addr);
+ err = xgetsockname(s, sockaddr(&addr), &len);
+ if (err)
+ goto close_srv;
+
+ key = 0;
+ value = s;
+ err = xbpf_map_update_elem(sock_map, &key, &value, BPF_NOEXIST);
+ if (err)
+ goto close_srv;
+
+ c = xsocket(family, sotype, 0);
+ if (c < 0)
+ goto close_srv;
+ err = xconnect(c, sockaddr(&addr), len);
+ if (err)
+ goto close_cli;
+
+ if (sotype == SOCK_STREAM) {
+ int p;
+
+ p = xaccept_nonblock(s, NULL, NULL);
+ if (p < 0)
+ goto close_cli;
+ xclose(p);
+ } else {
+ char b = 'a';
+ ssize_t n;
+
+ n = xsend(c, &b, sizeof(b), 0);
+ if (n == -1)
+ goto close_cli;
+
+ n = xrecv_nonblock(s, &b, sizeof(b), 0);
+ if (n == -1)
+ goto close_cli;
+ }
+
+ key = SK_PASS;
+ err = xbpf_map_lookup_elem(verd_map, &key, &pass);
+ if (err)
+ goto close_cli;
+ if (pass != 1)
+ FAIL("want pass count 1, have %d", pass);
+
+close_cli:
+ xclose(c);
+close_srv:
+ xclose(s);
+}
+
+static void test_reuseport_select_connected(int family, int sotype,
+ int sock_map, int verd_map,
+ int reuseport_prog)
+{
+ struct sockaddr_storage addr;
+ int s, c0, c1, p0, err;
+ unsigned int drop;
+ socklen_t len;
+ u64 value;
+ u32 key;
+
+ zero_verdict_count(verd_map);
+
+ s = socket_loopback_reuseport(family, sotype, reuseport_prog);
+ if (s < 0)
+ return;
+
+ /* Populate sock_map[0] to avoid ENOENT on first connection */
+ key = 0;
+ value = s;
+ err = xbpf_map_update_elem(sock_map, &key, &value, BPF_NOEXIST);
+ if (err)
+ goto close_srv;
+
+ len = sizeof(addr);
+ err = xgetsockname(s, sockaddr(&addr), &len);
+ if (err)
+ goto close_srv;
+
+ c0 = xsocket(family, sotype, 0);
+ if (c0 < 0)
+ goto close_srv;
+
+ err = xconnect(c0, sockaddr(&addr), len);
+ if (err)
+ goto close_cli0;
+
+ if (sotype == SOCK_STREAM) {
+ p0 = xaccept_nonblock(s, NULL, NULL);
+ if (p0 < 0)
+ goto close_cli0;
+ } else {
+ p0 = xsocket(family, sotype, 0);
+ if (p0 < 0)
+ goto close_cli0;
+
+ len = sizeof(addr);
+ err = xgetsockname(c0, sockaddr(&addr), &len);
+ if (err)
+ goto close_cli0;
+
+ err = xconnect(p0, sockaddr(&addr), len);
+ if (err)
+ goto close_cli0;
+ }
+
+ /* Update sock_map[0] to redirect to a connected socket */
+ key = 0;
+ value = p0;
+ err = xbpf_map_update_elem(sock_map, &key, &value, BPF_EXIST);
+ if (err)
+ goto close_peer0;
+
+ c1 = xsocket(family, sotype, 0);
+ if (c1 < 0)
+ goto close_peer0;
+
+ len = sizeof(addr);
+ err = xgetsockname(s, sockaddr(&addr), &len);
+ if (err)
+ goto close_srv;
+
+ errno = 0;
+ err = connect(c1, sockaddr(&addr), len);
+ if (sotype == SOCK_DGRAM) {
+ char b = 'a';
+ ssize_t n;
+
+ n = xsend(c1, &b, sizeof(b), 0);
+ if (n == -1)
+ goto close_cli1;
+
+ n = recv_timeout(c1, &b, sizeof(b), 0, IO_TIMEOUT_SEC);
+ err = n == -1;
+ }
+ if (!err || errno != ECONNREFUSED)
+ FAIL_ERRNO("connect: expected ECONNREFUSED");
+
+ key = SK_DROP;
+ err = xbpf_map_lookup_elem(verd_map, &key, &drop);
+ if (err)
+ goto close_cli1;
+ if (drop != 1)
+ FAIL("want drop count 1, have %d", drop);
+
+close_cli1:
+ xclose(c1);
+close_peer0:
+ xclose(p0);
+close_cli0:
+ xclose(c0);
+close_srv:
+ xclose(s);
+}
+
+/* Check that redirecting across reuseport groups is not allowed. */
+static void test_reuseport_mixed_groups(int family, int sotype, int sock_map,
+ int verd_map, int reuseport_prog)
+{
+ struct sockaddr_storage addr;
+ int s1, s2, c, err;
+ unsigned int drop;
+ socklen_t len;
+ u64 value;
+ u32 key;
+
+ zero_verdict_count(verd_map);
+
+ /* Create two listeners, each in its own reuseport group */
+ s1 = socket_loopback_reuseport(family, sotype, reuseport_prog);
+ if (s1 < 0)
+ return;
+
+ s2 = socket_loopback_reuseport(family, sotype, reuseport_prog);
+ if (s2 < 0)
+ goto close_srv1;
+
+ key = 0;
+ value = s1;
+ err = xbpf_map_update_elem(sock_map, &key, &value, BPF_NOEXIST);
+ if (err)
+ goto close_srv2;
+
+ key = 1;
+ value = s2;
+ err = xbpf_map_update_elem(sock_map, &key, &value, BPF_NOEXIST);
+
+ /* Connect to s2, reuseport BPF selects s1 via sock_map[0] */
+ len = sizeof(addr);
+ err = xgetsockname(s2, sockaddr(&addr), &len);
+ if (err)
+ goto close_srv2;
+
+ c = xsocket(family, sotype, 0);
+ if (c < 0)
+ goto close_srv2;
+
+ err = connect(c, sockaddr(&addr), len);
+ if (sotype == SOCK_DGRAM) {
+ char b = 'a';
+ ssize_t n;
+
+ n = xsend(c, &b, sizeof(b), 0);
+ if (n == -1)
+ goto close_cli;
+
+ n = recv_timeout(c, &b, sizeof(b), 0, IO_TIMEOUT_SEC);
+ err = n == -1;
+ }
+ if (!err || errno != ECONNREFUSED) {
+ FAIL_ERRNO("connect: expected ECONNREFUSED");
+ goto close_cli;
+ }
+
+ /* Expect drop, can't redirect outside of reuseport group */
+ key = SK_DROP;
+ err = xbpf_map_lookup_elem(verd_map, &key, &drop);
+ if (err)
+ goto close_cli;
+ if (drop != 1)
+ FAIL("want drop count 1, have %d", drop);
+
+close_cli:
+ xclose(c);
+close_srv2:
+ xclose(s2);
+close_srv1:
+ xclose(s1);
+}
+
+#define TEST(fn, ...) \
+ { \
+ fn, #fn, __VA_ARGS__ \
+ }
+
+static void test_ops_cleanup(const struct bpf_map *map)
+{
+ const struct bpf_map_def *def;
+ int err, mapfd;
+ u32 key;
+
+ def = bpf_map__def(map);
+ mapfd = bpf_map__fd(map);
+
+ for (key = 0; key < def->max_entries; key++) {
+ err = bpf_map_delete_elem(mapfd, &key);
+ if (err && errno != EINVAL && errno != ENOENT)
+ FAIL_ERRNO("map_delete: expected EINVAL/ENOENT");
+ }
+}
+
+static const char *family_str(sa_family_t family)
+{
+ switch (family) {
+ case AF_INET:
+ return "IPv4";
+ case AF_INET6:
+ return "IPv6";
+ default:
+ return "unknown";
+ }
+}
+
+static const char *map_type_str(const struct bpf_map *map)
+{
+ const struct bpf_map_def *def;
+
+ def = bpf_map__def(map);
+ if (IS_ERR(def))
+ return "invalid";
+
+ switch (def->type) {
+ case BPF_MAP_TYPE_SOCKMAP:
+ return "sockmap";
+ case BPF_MAP_TYPE_SOCKHASH:
+ return "sockhash";
+ default:
+ return "unknown";
+ }
+}
+
+static const char *sotype_str(int sotype)
+{
+ switch (sotype) {
+ case SOCK_DGRAM:
+ return "UDP";
+ case SOCK_STREAM:
+ return "TCP";
+ default:
+ return "unknown";
+ }
+}
+
+static void test_ops(struct test_sockmap_listen *skel, struct bpf_map *map,
+ int family, int sotype)
+{
+ const struct op_test {
+ void (*fn)(int family, int sotype, int mapfd);
+ const char *name;
+ int sotype;
+ } tests[] = {
+ /* insert */
+ TEST(test_insert_invalid),
+ TEST(test_insert_opened),
+ TEST(test_insert_bound, SOCK_STREAM),
+ TEST(test_insert),
+ /* delete */
+ TEST(test_delete_after_insert),
+ TEST(test_delete_after_close),
+ /* lookup */
+ TEST(test_lookup_after_insert),
+ TEST(test_lookup_after_delete),
+ TEST(test_lookup_32_bit_value),
+ /* update */
+ TEST(test_update_existing),
+ /* races with insert/delete */
+ TEST(test_destroy_orphan_child, SOCK_STREAM),
+ TEST(test_syn_recv_insert_delete, SOCK_STREAM),
+ TEST(test_race_insert_listen, SOCK_STREAM),
+ /* child clone */
+ TEST(test_clone_after_delete, SOCK_STREAM),
+ TEST(test_accept_after_delete, SOCK_STREAM),
+ TEST(test_accept_before_delete, SOCK_STREAM),
+ };
+ const char *family_name, *map_name, *sotype_name;
+ const struct op_test *t;
+ char s[MAX_TEST_NAME];
+ int map_fd;
+
+ family_name = family_str(family);
+ map_name = map_type_str(map);
+ sotype_name = sotype_str(sotype);
+ map_fd = bpf_map__fd(map);
+
+ for (t = tests; t < tests + ARRAY_SIZE(tests); t++) {
+ snprintf(s, sizeof(s), "%s %s %s %s", map_name, family_name,
+ sotype_name, t->name);
+
+ if (t->sotype != 0 && t->sotype != sotype)
+ continue;
+
+ if (!test__start_subtest(s))
+ continue;
+
+ t->fn(family, sotype, map_fd);
+ test_ops_cleanup(map);
+ }
+}
+
+static void test_redir(struct test_sockmap_listen *skel, struct bpf_map *map,
+ int family, int sotype)
+{
+ const struct redir_test {
+ void (*fn)(struct test_sockmap_listen *skel,
+ struct bpf_map *map, int family, int sotype);
+ const char *name;
+ } tests[] = {
+ TEST(test_skb_redir_to_connected),
+ TEST(test_skb_redir_to_listening),
+ TEST(test_msg_redir_to_connected),
+ TEST(test_msg_redir_to_listening),
+ };
+ const char *family_name, *map_name;
+ const struct redir_test *t;
+ char s[MAX_TEST_NAME];
+
+ family_name = family_str(family);
+ map_name = map_type_str(map);
+
+ for (t = tests; t < tests + ARRAY_SIZE(tests); t++) {
+ snprintf(s, sizeof(s), "%s %s %s", map_name, family_name,
+ t->name);
+
+ if (!test__start_subtest(s))
+ continue;
+
+ t->fn(skel, map, family, sotype);
+ }
+}
+
+static void test_reuseport(struct test_sockmap_listen *skel,
+ struct bpf_map *map, int family, int sotype)
+{
+ const struct reuseport_test {
+ void (*fn)(int family, int sotype, int socket_map,
+ int verdict_map, int reuseport_prog);
+ const char *name;
+ int sotype;
+ } tests[] = {
+ TEST(test_reuseport_select_listening),
+ TEST(test_reuseport_select_connected),
+ TEST(test_reuseport_mixed_groups),
+ };
+ int socket_map, verdict_map, reuseport_prog;
+ const char *family_name, *map_name, *sotype_name;
+ const struct reuseport_test *t;
+ char s[MAX_TEST_NAME];
+
+ family_name = family_str(family);
+ map_name = map_type_str(map);
+ sotype_name = sotype_str(sotype);
+
+ socket_map = bpf_map__fd(map);
+ verdict_map = bpf_map__fd(skel->maps.verdict_map);
+ reuseport_prog = bpf_program__fd(skel->progs.prog_reuseport);
+
+ for (t = tests; t < tests + ARRAY_SIZE(tests); t++) {
+ snprintf(s, sizeof(s), "%s %s %s %s", map_name, family_name,
+ sotype_name, t->name);
+
+ if (t->sotype != 0 && t->sotype != sotype)
+ continue;
+
+ if (!test__start_subtest(s))
+ continue;
+
+ t->fn(family, sotype, socket_map, verdict_map, reuseport_prog);
+ }
+}
+
+static void run_tests(struct test_sockmap_listen *skel, struct bpf_map *map,
+ int family)
+{
+ test_ops(skel, map, family, SOCK_STREAM);
+ test_ops(skel, map, family, SOCK_DGRAM);
+ test_redir(skel, map, family, SOCK_STREAM);
+ test_reuseport(skel, map, family, SOCK_STREAM);
+ test_reuseport(skel, map, family, SOCK_DGRAM);
+}
+
+void test_sockmap_listen(void)
+{
+ struct test_sockmap_listen *skel;
+
+ skel = test_sockmap_listen__open_and_load();
+ if (!skel) {
+ FAIL("skeleton open/load failed");
+ return;
+ }
+
+ skel->bss->test_sockmap = true;
+ run_tests(skel, skel->maps.sock_map, AF_INET);
+ run_tests(skel, skel->maps.sock_map, AF_INET6);
+
+ skel->bss->test_sockmap = false;
+ run_tests(skel, skel->maps.sock_hash, AF_INET);
+ run_tests(skel, skel->maps.sock_hash, AF_INET6);
+
+ test_sockmap_listen__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/spinlock.c b/tools/testing/selftests/bpf/prog_tests/spinlock.c
index 1ae00cd3174e..7577a77a4c4c 100644
--- a/tools/testing/selftests/bpf/prog_tests/spinlock.c
+++ b/tools/testing/selftests/bpf/prog_tests/spinlock.c
@@ -1,5 +1,19 @@
// SPDX-License-Identifier: GPL-2.0
#include <test_progs.h>
+#include <network_helpers.h>
+
+static void *spin_lock_thread(void *arg)
+{
+ __u32 duration, retval;
+ int err, prog_fd = *(u32 *) arg;
+
+ err = bpf_prog_test_run(prog_fd, 10000, &pkt_v4, sizeof(pkt_v4),
+ NULL, NULL, &retval, &duration);
+ CHECK(err || retval, "",
+ "err %d errno %d retval %d duration %d\n",
+ err, errno, retval, duration);
+ pthread_exit(arg);
+}
void test_spinlock(void)
{
diff --git a/tools/testing/selftests/bpf/prog_tests/tcp_rtt.c b/tools/testing/selftests/bpf/prog_tests/tcp_rtt.c
index f4cd60d6fba2..9013a0c01eed 100644
--- a/tools/testing/selftests/bpf/prog_tests/tcp_rtt.c
+++ b/tools/testing/selftests/bpf/prog_tests/tcp_rtt.c
@@ -1,6 +1,7 @@
// SPDX-License-Identifier: GPL-2.0
#include <test_progs.h>
#include "cgroup_helpers.h"
+#include "network_helpers.h"
struct tcp_rtt_storage {
__u32 invoked;
@@ -87,34 +88,6 @@ static int verify_sk(int map_fd, int client_fd, const char *msg, __u32 invoked,
return err;
}
-static int connect_to_server(int server_fd)
-{
- struct sockaddr_storage addr;
- socklen_t len = sizeof(addr);
- int fd;
-
- fd = socket(AF_INET, SOCK_STREAM, 0);
- if (fd < 0) {
- log_err("Failed to create client socket");
- return -1;
- }
-
- if (getsockname(server_fd, (struct sockaddr *)&addr, &len)) {
- log_err("Failed to get server addr");
- goto out;
- }
-
- if (connect(fd, (const struct sockaddr *)&addr, len) < 0) {
- log_err("Fail to connect to server");
- goto out;
- }
-
- return fd;
-
-out:
- close(fd);
- return -1;
-}
static int run_test(int cgroup_fd, int server_fd)
{
@@ -145,7 +118,7 @@ static int run_test(int cgroup_fd, int server_fd)
goto close_bpf_object;
}
- client_fd = connect_to_server(server_fd);
+ client_fd = connect_to_fd(AF_INET, SOCK_STREAM, server_fd);
if (client_fd < 0) {
err = -1;
goto close_bpf_object;
@@ -180,95 +153,22 @@ close_bpf_object:
return err;
}
-static int start_server(void)
-{
- struct sockaddr_in addr = {
- .sin_family = AF_INET,
- .sin_addr.s_addr = htonl(INADDR_LOOPBACK),
- };
- int fd;
-
- fd = socket(AF_INET, SOCK_STREAM, 0);
- if (fd < 0) {
- log_err("Failed to create server socket");
- return -1;
- }
-
- if (bind(fd, (const struct sockaddr *)&addr, sizeof(addr)) < 0) {
- log_err("Failed to bind socket");
- close(fd);
- return -1;
- }
-
- return fd;
-}
-
-static pthread_mutex_t server_started_mtx = PTHREAD_MUTEX_INITIALIZER;
-static pthread_cond_t server_started = PTHREAD_COND_INITIALIZER;
-
-static void *server_thread(void *arg)
-{
- struct sockaddr_storage addr;
- socklen_t len = sizeof(addr);
- int fd = *(int *)arg;
- int client_fd;
- int err;
-
- err = listen(fd, 1);
-
- pthread_mutex_lock(&server_started_mtx);
- pthread_cond_signal(&server_started);
- pthread_mutex_unlock(&server_started_mtx);
-
- if (CHECK_FAIL(err < 0)) {
- perror("Failed to listed on socket");
- return NULL;
- }
-
- client_fd = accept(fd, (struct sockaddr *)&addr, &len);
- if (CHECK_FAIL(client_fd < 0)) {
- perror("Failed to accept client");
- return NULL;
- }
-
- /* Wait for the next connection (that never arrives)
- * to keep this thread alive to prevent calling
- * close() on client_fd.
- */
- if (CHECK_FAIL(accept(fd, (struct sockaddr *)&addr, &len) >= 0)) {
- perror("Unexpected success in second accept");
- return NULL;
- }
-
- close(client_fd);
-
- return NULL;
-}
-
void test_tcp_rtt(void)
{
int server_fd, cgroup_fd;
- pthread_t tid;
cgroup_fd = test__join_cgroup("/tcp_rtt");
if (CHECK_FAIL(cgroup_fd < 0))
return;
- server_fd = start_server();
+ server_fd = start_server(AF_INET, SOCK_STREAM);
if (CHECK_FAIL(server_fd < 0))
goto close_cgroup_fd;
- if (CHECK_FAIL(pthread_create(&tid, NULL, server_thread,
- (void *)&server_fd)))
- goto close_server_fd;
-
- pthread_mutex_lock(&server_started_mtx);
- pthread_cond_wait(&server_started, &server_started_mtx);
- pthread_mutex_unlock(&server_started_mtx);
-
CHECK_FAIL(run_test(cgroup_fd, server_fd));
-close_server_fd:
+
close(server_fd);
+
close_cgroup_fd:
close(cgroup_fd);
}
diff --git a/tools/testing/selftests/bpf/prog_tests/test_lsm.c b/tools/testing/selftests/bpf/prog_tests/test_lsm.c
new file mode 100644
index 000000000000..b17eb2045c1d
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/test_lsm.c
@@ -0,0 +1,86 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/*
+ * Copyright (C) 2020 Google LLC.
+ */
+
+#include <test_progs.h>
+#include <sys/mman.h>
+#include <sys/wait.h>
+#include <unistd.h>
+#include <malloc.h>
+#include <stdlib.h>
+
+#include "lsm.skel.h"
+
+char *CMD_ARGS[] = {"true", NULL};
+
+#define GET_PAGE_ADDR(ADDR, PAGE_SIZE) \
+ (char *)(((unsigned long) (ADDR + PAGE_SIZE)) & ~(PAGE_SIZE-1))
+
+int stack_mprotect(void)
+{
+ void *buf;
+ long sz;
+ int ret;
+
+ sz = sysconf(_SC_PAGESIZE);
+ if (sz < 0)
+ return sz;
+
+ buf = alloca(sz * 3);
+ ret = mprotect(GET_PAGE_ADDR(buf, sz), sz,
+ PROT_READ | PROT_WRITE | PROT_EXEC);
+ return ret;
+}
+
+int exec_cmd(int *monitored_pid)
+{
+ int child_pid, child_status;
+
+ child_pid = fork();
+ if (child_pid == 0) {
+ *monitored_pid = getpid();
+ execvp(CMD_ARGS[0], CMD_ARGS);
+ return -EINVAL;
+ } else if (child_pid > 0) {
+ waitpid(child_pid, &child_status, 0);
+ return child_status;
+ }
+
+ return -EINVAL;
+}
+
+void test_test_lsm(void)
+{
+ struct lsm *skel = NULL;
+ int err, duration = 0;
+
+ skel = lsm__open_and_load();
+ if (CHECK(!skel, "skel_load", "lsm skeleton failed\n"))
+ goto close_prog;
+
+ err = lsm__attach(skel);
+ if (CHECK(err, "attach", "lsm attach failed: %d\n", err))
+ goto close_prog;
+
+ err = exec_cmd(&skel->bss->monitored_pid);
+ if (CHECK(err < 0, "exec_cmd", "err %d errno %d\n", err, errno))
+ goto close_prog;
+
+ CHECK(skel->bss->bprm_count != 1, "bprm_count", "bprm_count = %d\n",
+ skel->bss->bprm_count);
+
+ skel->bss->monitored_pid = getpid();
+
+ err = stack_mprotect();
+ if (CHECK(errno != EPERM, "stack_mprotect", "want err=EPERM, got %d\n",
+ errno))
+ goto close_prog;
+
+ CHECK(skel->bss->mprotect_count != 1, "mprotect_count",
+ "mprotect_count = %d\n", skel->bss->mprotect_count);
+
+close_prog:
+ lsm__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/test_overhead.c b/tools/testing/selftests/bpf/prog_tests/test_overhead.c
index 465b371a561d..2702df2b2343 100644
--- a/tools/testing/selftests/bpf/prog_tests/test_overhead.c
+++ b/tools/testing/selftests/bpf/prog_tests/test_overhead.c
@@ -61,9 +61,10 @@ void test_test_overhead(void)
const char *raw_tp_name = "raw_tp/task_rename";
const char *fentry_name = "fentry/__set_task_comm";
const char *fexit_name = "fexit/__set_task_comm";
+ const char *fmodret_name = "fmod_ret/__set_task_comm";
const char *kprobe_func = "__set_task_comm";
struct bpf_program *kprobe_prog, *kretprobe_prog, *raw_tp_prog;
- struct bpf_program *fentry_prog, *fexit_prog;
+ struct bpf_program *fentry_prog, *fexit_prog, *fmodret_prog;
struct bpf_object *obj;
struct bpf_link *link;
int err, duration = 0;
@@ -96,6 +97,10 @@ void test_test_overhead(void)
if (CHECK(!fexit_prog, "find_probe",
"prog '%s' not found\n", fexit_name))
goto cleanup;
+ fmodret_prog = bpf_object__find_program_by_title(obj, fmodret_name);
+ if (CHECK(!fmodret_prog, "find_probe",
+ "prog '%s' not found\n", fmodret_name))
+ goto cleanup;
err = bpf_object__load(obj);
if (CHECK(err, "obj_load", "err %d\n", err))
@@ -142,6 +147,13 @@ void test_test_overhead(void)
goto cleanup;
test_run("fexit");
bpf_link__destroy(link);
+
+ /* attach fmod_ret */
+ link = bpf_program__attach_trace(fmodret_prog);
+ if (CHECK(IS_ERR(link), "attach fmod_ret", "err %ld\n", PTR_ERR(link)))
+ goto cleanup;
+ test_run("fmod_ret");
+ bpf_link__destroy(link);
cleanup:
prctl(PR_SET_NAME, comm, 0L, 0L, 0L);
bpf_object__close(obj);
diff --git a/tools/testing/selftests/bpf/prog_tests/trampoline_count.c b/tools/testing/selftests/bpf/prog_tests/trampoline_count.c
index 1f6ccdaed1ac..781c8d11604b 100644
--- a/tools/testing/selftests/bpf/prog_tests/trampoline_count.c
+++ b/tools/testing/selftests/bpf/prog_tests/trampoline_count.c
@@ -55,31 +55,40 @@ void test_trampoline_count(void)
/* attach 'allowed' 40 trampoline programs */
for (i = 0; i < MAX_TRAMP_PROGS; i++) {
obj = bpf_object__open_file(object, NULL);
- if (CHECK(IS_ERR(obj), "obj_open_file", "err %ld\n", PTR_ERR(obj)))
+ if (CHECK(IS_ERR(obj), "obj_open_file", "err %ld\n", PTR_ERR(obj))) {
+ obj = NULL;
goto cleanup;
+ }
err = bpf_object__load(obj);
if (CHECK(err, "obj_load", "err %d\n", err))
goto cleanup;
inst[i].obj = obj;
+ obj = NULL;
if (rand() % 2) {
- link = load(obj, fentry_name);
- if (CHECK(IS_ERR(link), "attach prog", "err %ld\n", PTR_ERR(link)))
+ link = load(inst[i].obj, fentry_name);
+ if (CHECK(IS_ERR(link), "attach prog", "err %ld\n", PTR_ERR(link))) {
+ link = NULL;
goto cleanup;
+ }
inst[i].link_fentry = link;
} else {
- link = load(obj, fexit_name);
- if (CHECK(IS_ERR(link), "attach prog", "err %ld\n", PTR_ERR(link)))
+ link = load(inst[i].obj, fexit_name);
+ if (CHECK(IS_ERR(link), "attach prog", "err %ld\n", PTR_ERR(link))) {
+ link = NULL;
goto cleanup;
+ }
inst[i].link_fexit = link;
}
}
/* and try 1 extra.. */
obj = bpf_object__open_file(object, NULL);
- if (CHECK(IS_ERR(obj), "obj_open_file", "err %ld\n", PTR_ERR(obj)))
+ if (CHECK(IS_ERR(obj), "obj_open_file", "err %ld\n", PTR_ERR(obj))) {
+ obj = NULL;
goto cleanup;
+ }
err = bpf_object__load(obj);
if (CHECK(err, "obj_load", "err %d\n", err))
@@ -104,7 +113,9 @@ void test_trampoline_count(void)
cleanup_extra:
bpf_object__close(obj);
cleanup:
- while (--i) {
+ if (i >= MAX_TRAMP_PROGS)
+ i = MAX_TRAMP_PROGS - 1;
+ for (; i >= 0; i--) {
bpf_link__destroy(inst[i].link_fentry);
bpf_link__destroy(inst[i].link_fexit);
bpf_object__close(inst[i].obj);
diff --git a/tools/testing/selftests/bpf/prog_tests/vmlinux.c b/tools/testing/selftests/bpf/prog_tests/vmlinux.c
new file mode 100644
index 000000000000..72310cfc6474
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/vmlinux.c
@@ -0,0 +1,43 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+
+#include <test_progs.h>
+#include <time.h>
+#include "test_vmlinux.skel.h"
+
+#define MY_TV_NSEC 1337
+
+static void nsleep()
+{
+ struct timespec ts = { .tv_nsec = MY_TV_NSEC };
+
+ (void)syscall(__NR_nanosleep, &ts, NULL);
+}
+
+void test_vmlinux(void)
+{
+ int duration = 0, err;
+ struct test_vmlinux* skel;
+ struct test_vmlinux__bss *bss;
+
+ skel = test_vmlinux__open_and_load();
+ if (CHECK(!skel, "skel_open", "failed to open skeleton\n"))
+ return;
+ bss = skel->bss;
+
+ err = test_vmlinux__attach(skel);
+ if (CHECK(err, "skel_attach", "skeleton attach failed: %d\n", err))
+ goto cleanup;
+
+ /* trigger everything */
+ nsleep();
+
+ CHECK(!bss->tp_called, "tp", "not called\n");
+ CHECK(!bss->raw_tp_called, "raw_tp", "not called\n");
+ CHECK(!bss->tp_btf_called, "tp_btf", "not called\n");
+ CHECK(!bss->kprobe_called, "kprobe", "not called\n");
+ CHECK(!bss->fentry_called, "fentry", "not called\n");
+
+cleanup:
+ test_vmlinux__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/xdp.c b/tools/testing/selftests/bpf/prog_tests/xdp.c
index dcb5ecac778e..48921ff74850 100644
--- a/tools/testing/selftests/bpf/prog_tests/xdp.c
+++ b/tools/testing/selftests/bpf/prog_tests/xdp.c
@@ -1,5 +1,6 @@
// SPDX-License-Identifier: GPL-2.0
#include <test_progs.h>
+#include <network_helpers.h>
void test_xdp(void)
{
diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_adjust_tail.c b/tools/testing/selftests/bpf/prog_tests/xdp_adjust_tail.c
index 3744196d7cba..d5c98f2cb12f 100644
--- a/tools/testing/selftests/bpf/prog_tests/xdp_adjust_tail.c
+++ b/tools/testing/selftests/bpf/prog_tests/xdp_adjust_tail.c
@@ -1,13 +1,14 @@
// SPDX-License-Identifier: GPL-2.0
#include <test_progs.h>
+#include <network_helpers.h>
-void test_xdp_adjust_tail(void)
+void test_xdp_adjust_tail_shrink(void)
{
- const char *file = "./test_adjust_tail.o";
+ const char *file = "./test_xdp_adjust_tail_shrink.o";
+ __u32 duration, retval, size, expect_sz;
struct bpf_object *obj;
- char buf[128];
- __u32 duration, retval, size;
int err, prog_fd;
+ char buf[128];
err = bpf_prog_load(file, BPF_PROG_TYPE_XDP, &obj, &prog_fd);
if (CHECK_FAIL(err))
@@ -20,10 +21,121 @@ void test_xdp_adjust_tail(void)
"ipv4", "err %d errno %d retval %d size %d\n",
err, errno, retval, size);
+ expect_sz = sizeof(pkt_v6) - 20; /* Test shrink with 20 bytes */
err = bpf_prog_test_run(prog_fd, 1, &pkt_v6, sizeof(pkt_v6),
buf, &size, &retval, &duration);
- CHECK(err || retval != XDP_TX || size != 54,
- "ipv6", "err %d errno %d retval %d size %d\n",
+ CHECK(err || retval != XDP_TX || size != expect_sz,
+ "ipv6", "err %d errno %d retval %d size %d expect-size %d\n",
+ err, errno, retval, size, expect_sz);
+ bpf_object__close(obj);
+}
+
+void test_xdp_adjust_tail_grow(void)
+{
+ const char *file = "./test_xdp_adjust_tail_grow.o";
+ struct bpf_object *obj;
+ char buf[4096]; /* avoid segfault: large buf to hold grow results */
+ __u32 duration, retval, size, expect_sz;
+ int err, prog_fd;
+
+ err = bpf_prog_load(file, BPF_PROG_TYPE_XDP, &obj, &prog_fd);
+ if (CHECK_FAIL(err))
+ return;
+
+ err = bpf_prog_test_run(prog_fd, 1, &pkt_v4, sizeof(pkt_v4),
+ buf, &size, &retval, &duration);
+ CHECK(err || retval != XDP_DROP,
+ "ipv4", "err %d errno %d retval %d size %d\n",
err, errno, retval, size);
+
+ expect_sz = sizeof(pkt_v6) + 40; /* Test grow with 40 bytes */
+ err = bpf_prog_test_run(prog_fd, 1, &pkt_v6, sizeof(pkt_v6) /* 74 */,
+ buf, &size, &retval, &duration);
+ CHECK(err || retval != XDP_TX || size != expect_sz,
+ "ipv6", "err %d errno %d retval %d size %d expect-size %d\n",
+ err, errno, retval, size, expect_sz);
+
+ bpf_object__close(obj);
+}
+
+void test_xdp_adjust_tail_grow2(void)
+{
+ const char *file = "./test_xdp_adjust_tail_grow.o";
+ char buf[4096]; /* avoid segfault: large buf to hold grow results */
+ int tailroom = 320; /* SKB_DATA_ALIGN(sizeof(struct skb_shared_info))*/;
+ struct bpf_object *obj;
+ int err, cnt, i;
+ int max_grow;
+
+ struct bpf_prog_test_run_attr tattr = {
+ .repeat = 1,
+ .data_in = &buf,
+ .data_out = &buf,
+ .data_size_in = 0, /* Per test */
+ .data_size_out = 0, /* Per test */
+ };
+
+ err = bpf_prog_load(file, BPF_PROG_TYPE_XDP, &obj, &tattr.prog_fd);
+ if (CHECK_ATTR(err, "load", "err %d errno %d\n", err, errno))
+ return;
+
+ /* Test case-64 */
+ memset(buf, 1, sizeof(buf));
+ tattr.data_size_in = 64; /* Determine test case via pkt size */
+ tattr.data_size_out = 128; /* Limit copy_size */
+ /* Kernel side alloc packet memory area that is zero init */
+ err = bpf_prog_test_run_xattr(&tattr);
+
+ CHECK_ATTR(errno != ENOSPC /* Due limit copy_size in bpf_test_finish */
+ || tattr.retval != XDP_TX
+ || tattr.data_size_out != 192, /* Expected grow size */
+ "case-64",
+ "err %d errno %d retval %d size %d\n",
+ err, errno, tattr.retval, tattr.data_size_out);
+
+ /* Extra checks for data contents */
+ CHECK_ATTR(tattr.data_size_out != 192
+ || buf[0] != 1 || buf[63] != 1 /* 0-63 memset to 1 */
+ || buf[64] != 0 || buf[127] != 0 /* 64-127 memset to 0 */
+ || buf[128] != 1 || buf[191] != 1, /*128-191 memset to 1 */
+ "case-64-data",
+ "err %d errno %d retval %d size %d\n",
+ err, errno, tattr.retval, tattr.data_size_out);
+
+ /* Test case-128 */
+ memset(buf, 2, sizeof(buf));
+ tattr.data_size_in = 128; /* Determine test case via pkt size */
+ tattr.data_size_out = sizeof(buf); /* Copy everything */
+ err = bpf_prog_test_run_xattr(&tattr);
+
+ max_grow = 4096 - XDP_PACKET_HEADROOM - tailroom; /* 3520 */
+ CHECK_ATTR(err
+ || tattr.retval != XDP_TX
+ || tattr.data_size_out != max_grow,/* Expect max grow size */
+ "case-128",
+ "err %d errno %d retval %d size %d expect-size %d\n",
+ err, errno, tattr.retval, tattr.data_size_out, max_grow);
+
+ /* Extra checks for data content: Count grow size, will contain zeros */
+ for (i = 0, cnt = 0; i < sizeof(buf); i++) {
+ if (buf[i] == 0)
+ cnt++;
+ }
+ CHECK_ATTR((cnt != (max_grow - tattr.data_size_in)) /* Grow increase */
+ || tattr.data_size_out != max_grow, /* Total grow size */
+ "case-128-data",
+ "err %d errno %d retval %d size %d grow-size %d\n",
+ err, errno, tattr.retval, tattr.data_size_out, cnt);
+
bpf_object__close(obj);
}
+
+void test_xdp_adjust_tail(void)
+{
+ if (test__start_subtest("xdp_adjust_tail_shrink"))
+ test_xdp_adjust_tail_shrink();
+ if (test__start_subtest("xdp_adjust_tail_grow"))
+ test_xdp_adjust_tail_grow();
+ if (test__start_subtest("xdp_adjust_tail_grow2"))
+ test_xdp_adjust_tail_grow2();
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_attach.c b/tools/testing/selftests/bpf/prog_tests/xdp_attach.c
new file mode 100644
index 000000000000..15ef3531483e
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/xdp_attach.c
@@ -0,0 +1,90 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <test_progs.h>
+
+#define IFINDEX_LO 1
+#define XDP_FLAGS_REPLACE (1U << 4)
+
+void test_xdp_attach(void)
+{
+ __u32 duration = 0, id1, id2, id0 = 0, len;
+ struct bpf_object *obj1, *obj2, *obj3;
+ const char *file = "./test_xdp.o";
+ struct bpf_prog_info info = {};
+ int err, fd1, fd2, fd3;
+ DECLARE_LIBBPF_OPTS(bpf_xdp_set_link_opts, opts,
+ .old_fd = -1);
+
+ len = sizeof(info);
+
+ err = bpf_prog_load(file, BPF_PROG_TYPE_XDP, &obj1, &fd1);
+ if (CHECK_FAIL(err))
+ return;
+ err = bpf_obj_get_info_by_fd(fd1, &info, &len);
+ if (CHECK_FAIL(err))
+ goto out_1;
+ id1 = info.id;
+
+ err = bpf_prog_load(file, BPF_PROG_TYPE_XDP, &obj2, &fd2);
+ if (CHECK_FAIL(err))
+ goto out_1;
+
+ memset(&info, 0, sizeof(info));
+ err = bpf_obj_get_info_by_fd(fd2, &info, &len);
+ if (CHECK_FAIL(err))
+ goto out_2;
+ id2 = info.id;
+
+ err = bpf_prog_load(file, BPF_PROG_TYPE_XDP, &obj3, &fd3);
+ if (CHECK_FAIL(err))
+ goto out_2;
+
+ err = bpf_set_link_xdp_fd_opts(IFINDEX_LO, fd1, XDP_FLAGS_REPLACE,
+ &opts);
+ if (CHECK(err, "load_ok", "initial load failed"))
+ goto out_close;
+
+ err = bpf_get_link_xdp_id(IFINDEX_LO, &id0, 0);
+ if (CHECK(err || id0 != id1, "id1_check",
+ "loaded prog id %u != id1 %u, err %d", id0, id1, err))
+ goto out_close;
+
+ err = bpf_set_link_xdp_fd_opts(IFINDEX_LO, fd2, XDP_FLAGS_REPLACE,
+ &opts);
+ if (CHECK(!err, "load_fail", "load with expected id didn't fail"))
+ goto out;
+
+ opts.old_fd = fd1;
+ err = bpf_set_link_xdp_fd_opts(IFINDEX_LO, fd2, 0, &opts);
+ if (CHECK(err, "replace_ok", "replace valid old_fd failed"))
+ goto out;
+ err = bpf_get_link_xdp_id(IFINDEX_LO, &id0, 0);
+ if (CHECK(err || id0 != id2, "id2_check",
+ "loaded prog id %u != id2 %u, err %d", id0, id2, err))
+ goto out_close;
+
+ err = bpf_set_link_xdp_fd_opts(IFINDEX_LO, fd3, 0, &opts);
+ if (CHECK(!err, "replace_fail", "replace invalid old_fd didn't fail"))
+ goto out;
+
+ err = bpf_set_link_xdp_fd_opts(IFINDEX_LO, -1, 0, &opts);
+ if (CHECK(!err, "remove_fail", "remove invalid old_fd didn't fail"))
+ goto out;
+
+ opts.old_fd = fd2;
+ err = bpf_set_link_xdp_fd_opts(IFINDEX_LO, -1, 0, &opts);
+ if (CHECK(err, "remove_ok", "remove valid old_fd failed"))
+ goto out;
+
+ err = bpf_get_link_xdp_id(IFINDEX_LO, &id0, 0);
+ if (CHECK(err || id0 != 0, "unload_check",
+ "loaded prog id %u != 0, err %d", id0, err))
+ goto out_close;
+out:
+ bpf_set_link_xdp_fd(IFINDEX_LO, -1, 0);
+out_close:
+ bpf_object__close(obj3);
+out_2:
+ bpf_object__close(obj2);
+out_1:
+ bpf_object__close(obj1);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_bpf2bpf.c b/tools/testing/selftests/bpf/prog_tests/xdp_bpf2bpf.c
index 6b56bdc73ebc..2c6c570b21f8 100644
--- a/tools/testing/selftests/bpf/prog_tests/xdp_bpf2bpf.c
+++ b/tools/testing/selftests/bpf/prog_tests/xdp_bpf2bpf.c
@@ -1,20 +1,55 @@
// SPDX-License-Identifier: GPL-2.0
#include <test_progs.h>
+#include <network_helpers.h>
#include <net/if.h>
#include "test_xdp.skel.h"
#include "test_xdp_bpf2bpf.skel.h"
+struct meta {
+ int ifindex;
+ int pkt_len;
+};
+
+static void on_sample(void *ctx, int cpu, void *data, __u32 size)
+{
+ int duration = 0;
+ struct meta *meta = (struct meta *)data;
+ struct ipv4_packet *trace_pkt_v4 = data + sizeof(*meta);
+
+ if (CHECK(size < sizeof(pkt_v4) + sizeof(*meta),
+ "check_size", "size %u < %zu\n",
+ size, sizeof(pkt_v4) + sizeof(*meta)))
+ return;
+
+ if (CHECK(meta->ifindex != if_nametoindex("lo"), "check_meta_ifindex",
+ "meta->ifindex = %d\n", meta->ifindex))
+ return;
+
+ if (CHECK(meta->pkt_len != sizeof(pkt_v4), "check_meta_pkt_len",
+ "meta->pkt_len = %zd\n", sizeof(pkt_v4)))
+ return;
+
+ if (CHECK(memcmp(trace_pkt_v4, &pkt_v4, sizeof(pkt_v4)),
+ "check_packet_content", "content not the same\n"))
+ return;
+
+ *(bool *)ctx = true;
+}
+
void test_xdp_bpf2bpf(void)
{
__u32 duration = 0, retval, size;
char buf[128];
int err, pkt_fd, map_fd;
+ bool passed = false;
struct iphdr *iph = (void *)buf + sizeof(struct ethhdr);
struct iptnl_info value4 = {.family = AF_INET};
struct test_xdp *pkt_skel = NULL;
struct test_xdp_bpf2bpf *ftrace_skel = NULL;
struct vip key4 = {.protocol = 6, .family = AF_INET};
- DECLARE_LIBBPF_OPTS(bpf_object_open_opts, opts);
+ struct bpf_program *prog;
+ struct perf_buffer *pb = NULL;
+ struct perf_buffer_opts pb_opts = {};
/* Load XDP program to introspect */
pkt_skel = test_xdp__open_and_load();
@@ -27,11 +62,21 @@ void test_xdp_bpf2bpf(void)
bpf_map_update_elem(map_fd, &key4, &value4, 0);
/* Load trace program */
- opts.attach_prog_fd = pkt_fd,
- ftrace_skel = test_xdp_bpf2bpf__open_opts(&opts);
+ ftrace_skel = test_xdp_bpf2bpf__open();
if (CHECK(!ftrace_skel, "__open", "ftrace skeleton failed\n"))
goto out;
+ /* Demonstrate the bpf_program__set_attach_target() API rather than
+ * the load with options, i.e. opts.attach_prog_fd.
+ */
+ prog = ftrace_skel->progs.trace_on_entry;
+ bpf_program__set_expected_attach_type(prog, BPF_TRACE_FENTRY);
+ bpf_program__set_attach_target(prog, pkt_fd, "_xdp_tx_iptunnel");
+
+ prog = ftrace_skel->progs.trace_on_exit;
+ bpf_program__set_expected_attach_type(prog, BPF_TRACE_FEXIT);
+ bpf_program__set_attach_target(prog, pkt_fd, "_xdp_tx_iptunnel");
+
err = test_xdp_bpf2bpf__load(ftrace_skel);
if (CHECK(err, "__load", "ftrace skeleton failed\n"))
goto out;
@@ -40,6 +85,14 @@ void test_xdp_bpf2bpf(void)
if (CHECK(err, "ftrace_attach", "ftrace attach failed: %d\n", err))
goto out;
+ /* Set up perf buffer */
+ pb_opts.sample_cb = on_sample;
+ pb_opts.ctx = &passed;
+ pb = perf_buffer__new(bpf_map__fd(ftrace_skel->maps.perf_buf_map),
+ 1, &pb_opts);
+ if (CHECK(IS_ERR(pb), "perf_buf__new", "err %ld\n", PTR_ERR(pb)))
+ goto out;
+
/* Run test program */
err = bpf_prog_test_run(pkt_fd, 1, &pkt_v4, sizeof(pkt_v4),
buf, &size, &retval, &duration);
@@ -50,6 +103,15 @@ void test_xdp_bpf2bpf(void)
err, errno, retval, size))
goto out;
+ /* Make sure bpf_xdp_output() was triggered and it sent the expected
+ * data to the perf ring buffer.
+ */
+ err = perf_buffer__poll(pb, 100);
+ if (CHECK(err < 0, "perf_buffer__poll", "err %d\n", err))
+ goto out;
+
+ CHECK_FAIL(!passed);
+
/* Verify test results */
if (CHECK(ftrace_skel->bss->test_result_fentry != if_nametoindex("lo"),
"result", "fentry failed err %llu\n",
@@ -60,6 +122,8 @@ void test_xdp_bpf2bpf(void)
"fexit failed err %llu\n", ftrace_skel->bss->test_result_fexit);
out:
+ if (pb)
+ perf_buffer__free(pb);
test_xdp__destroy(pkt_skel);
test_xdp_bpf2bpf__destroy(ftrace_skel);
}
diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_devmap_attach.c b/tools/testing/selftests/bpf/prog_tests/xdp_devmap_attach.c
new file mode 100644
index 000000000000..d19dbd668f6a
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/xdp_devmap_attach.c
@@ -0,0 +1,97 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <uapi/linux/bpf.h>
+#include <linux/if_link.h>
+#include <test_progs.h>
+
+#include "test_xdp_devmap_helpers.skel.h"
+#include "test_xdp_with_devmap_helpers.skel.h"
+
+#define IFINDEX_LO 1
+
+struct bpf_devmap_val {
+ u32 ifindex; /* device index */
+ union {
+ int fd; /* prog fd on map write */
+ u32 id; /* prog id on map read */
+ } bpf_prog;
+};
+
+void test_xdp_with_devmap_helpers(void)
+{
+ struct test_xdp_with_devmap_helpers *skel;
+ struct bpf_prog_info info = {};
+ struct bpf_devmap_val val = {
+ .ifindex = IFINDEX_LO,
+ };
+ __u32 len = sizeof(info);
+ __u32 duration = 0, idx = 0;
+ int err, dm_fd, map_fd;
+
+
+ skel = test_xdp_with_devmap_helpers__open_and_load();
+ if (CHECK_FAIL(!skel)) {
+ perror("test_xdp_with_devmap_helpers__open_and_load");
+ return;
+ }
+
+ /* can not attach program with DEVMAPs that allow programs
+ * as xdp generic
+ */
+ dm_fd = bpf_program__fd(skel->progs.xdp_redir_prog);
+ err = bpf_set_link_xdp_fd(IFINDEX_LO, dm_fd, XDP_FLAGS_SKB_MODE);
+ CHECK(err == 0, "Generic attach of program with 8-byte devmap",
+ "should have failed\n");
+
+ dm_fd = bpf_program__fd(skel->progs.xdp_dummy_dm);
+ map_fd = bpf_map__fd(skel->maps.dm_ports);
+ err = bpf_obj_get_info_by_fd(dm_fd, &info, &len);
+ if (CHECK_FAIL(err))
+ goto out_close;
+
+ val.bpf_prog.fd = dm_fd;
+ err = bpf_map_update_elem(map_fd, &idx, &val, 0);
+ CHECK(err, "Add program to devmap entry",
+ "err %d errno %d\n", err, errno);
+
+ err = bpf_map_lookup_elem(map_fd, &idx, &val);
+ CHECK(err, "Read devmap entry", "err %d errno %d\n", err, errno);
+ CHECK(info.id != val.bpf_prog.id, "Expected program id in devmap entry",
+ "expected %u read %u\n", info.id, val.bpf_prog.id);
+
+ /* can not attach BPF_XDP_DEVMAP program to a device */
+ err = bpf_set_link_xdp_fd(IFINDEX_LO, dm_fd, XDP_FLAGS_SKB_MODE);
+ CHECK(err == 0, "Attach of BPF_XDP_DEVMAP program",
+ "should have failed\n");
+
+ val.ifindex = 1;
+ val.bpf_prog.fd = bpf_program__fd(skel->progs.xdp_dummy_prog);
+ err = bpf_map_update_elem(map_fd, &idx, &val, 0);
+ CHECK(err == 0, "Add non-BPF_XDP_DEVMAP program to devmap entry",
+ "should have failed\n");
+
+out_close:
+ test_xdp_with_devmap_helpers__destroy(skel);
+}
+
+void test_neg_xdp_devmap_helpers(void)
+{
+ struct test_xdp_devmap_helpers *skel;
+ __u32 duration = 0;
+
+ skel = test_xdp_devmap_helpers__open_and_load();
+ if (CHECK(skel,
+ "Load of XDP program accessing egress ifindex without attach type",
+ "should have failed\n")) {
+ test_xdp_devmap_helpers__destroy(skel);
+ }
+}
+
+
+void test_xdp_devmap_attach(void)
+{
+ if (test__start_subtest("DEVMAP with programs in entries"))
+ test_xdp_with_devmap_helpers();
+
+ if (test__start_subtest("Verifier check of DEVMAP programs"))
+ test_neg_xdp_devmap_helpers();
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_info.c b/tools/testing/selftests/bpf/prog_tests/xdp_info.c
new file mode 100644
index 000000000000..d2d7a283d72f
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/xdp_info.c
@@ -0,0 +1,68 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/if_link.h>
+#include <test_progs.h>
+
+#define IFINDEX_LO 1
+
+void test_xdp_info(void)
+{
+ __u32 len = sizeof(struct bpf_prog_info), duration = 0, prog_id;
+ const char *file = "./xdp_dummy.o";
+ struct bpf_prog_info info = {};
+ struct bpf_object *obj;
+ int err, prog_fd;
+
+ /* Get prog_id for XDP_ATTACHED_NONE mode */
+
+ err = bpf_get_link_xdp_id(IFINDEX_LO, &prog_id, 0);
+ if (CHECK(err, "get_xdp_none", "errno=%d\n", errno))
+ return;
+ if (CHECK(prog_id, "prog_id_none", "unexpected prog_id=%u\n", prog_id))
+ return;
+
+ err = bpf_get_link_xdp_id(IFINDEX_LO, &prog_id, XDP_FLAGS_SKB_MODE);
+ if (CHECK(err, "get_xdp_none_skb", "errno=%d\n", errno))
+ return;
+ if (CHECK(prog_id, "prog_id_none_skb", "unexpected prog_id=%u\n",
+ prog_id))
+ return;
+
+ /* Setup prog */
+
+ err = bpf_prog_load(file, BPF_PROG_TYPE_XDP, &obj, &prog_fd);
+ if (CHECK_FAIL(err))
+ return;
+
+ err = bpf_obj_get_info_by_fd(prog_fd, &info, &len);
+ if (CHECK(err, "get_prog_info", "errno=%d\n", errno))
+ goto out_close;
+
+ err = bpf_set_link_xdp_fd(IFINDEX_LO, prog_fd, XDP_FLAGS_SKB_MODE);
+ if (CHECK(err, "set_xdp_skb", "errno=%d\n", errno))
+ goto out_close;
+
+ /* Get prog_id for single prog mode */
+
+ err = bpf_get_link_xdp_id(IFINDEX_LO, &prog_id, 0);
+ if (CHECK(err, "get_xdp", "errno=%d\n", errno))
+ goto out;
+ if (CHECK(prog_id != info.id, "prog_id", "prog_id not available\n"))
+ goto out;
+
+ err = bpf_get_link_xdp_id(IFINDEX_LO, &prog_id, XDP_FLAGS_SKB_MODE);
+ if (CHECK(err, "get_xdp_skb", "errno=%d\n", errno))
+ goto out;
+ if (CHECK(prog_id != info.id, "prog_id_skb", "prog_id not available\n"))
+ goto out;
+
+ err = bpf_get_link_xdp_id(IFINDEX_LO, &prog_id, XDP_FLAGS_DRV_MODE);
+ if (CHECK(err, "get_xdp_drv", "errno=%d\n", errno))
+ goto out;
+ if (CHECK(prog_id, "prog_id_drv", "unexpected prog_id=%u\n", prog_id))
+ goto out;
+
+out:
+ bpf_set_link_xdp_fd(IFINDEX_LO, -1, 0);
+out_close:
+ bpf_object__close(obj);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_noinline.c b/tools/testing/selftests/bpf/prog_tests/xdp_noinline.c
index c9404e6b226e..f284f72158ef 100644
--- a/tools/testing/selftests/bpf/prog_tests/xdp_noinline.c
+++ b/tools/testing/selftests/bpf/prog_tests/xdp_noinline.c
@@ -1,5 +1,6 @@
// SPDX-License-Identifier: GPL-2.0
#include <test_progs.h>
+#include <network_helpers.h>
void test_xdp_noinline(void)
{
diff --git a/tools/testing/selftests/bpf/progs/bpf_dctcp.c b/tools/testing/selftests/bpf/progs/bpf_dctcp.c
index b631fb5032d2..3fb4260570b1 100644
--- a/tools/testing/selftests/bpf/progs/bpf_dctcp.c
+++ b/tools/testing/selftests/bpf/progs/bpf_dctcp.c
@@ -6,14 +6,24 @@
* the kernel BPF logic.
*/
+#include <stddef.h>
#include <linux/bpf.h>
#include <linux/types.h>
#include <bpf/bpf_helpers.h>
-#include "bpf_trace_helpers.h"
+#include <bpf/bpf_tracing.h>
#include "bpf_tcp_helpers.h"
char _license[] SEC("license") = "GPL";
+int stg_result = 0;
+
+struct {
+ __uint(type, BPF_MAP_TYPE_SK_STORAGE);
+ __uint(map_flags, BPF_F_NO_PREALLOC);
+ __type(key, int);
+ __type(value, int);
+} sk_stg_map SEC(".maps");
+
#define DCTCP_MAX_ALPHA 1024U
struct dctcp {
@@ -43,12 +53,18 @@ void BPF_PROG(dctcp_init, struct sock *sk)
{
const struct tcp_sock *tp = tcp_sk(sk);
struct dctcp *ca = inet_csk_ca(sk);
+ int *stg;
ca->prior_rcv_nxt = tp->rcv_nxt;
ca->dctcp_alpha = min(dctcp_alpha_on_init, DCTCP_MAX_ALPHA);
ca->loss_cwnd = 0;
ca->ce_state = 0;
+ stg = bpf_sk_storage_get(&sk_stg_map, (void *)tp, NULL, 0);
+ if (stg) {
+ stg_result = *stg;
+ bpf_sk_storage_delete(&sk_stg_map, (void *)tp);
+ }
dctcp_reset(tp, ca);
}
diff --git a/tools/testing/selftests/bpf/progs/bpf_flow.c b/tools/testing/selftests/bpf/progs/bpf_flow.c
index 9941f0ba471e..de6de9221518 100644
--- a/tools/testing/selftests/bpf/progs/bpf_flow.c
+++ b/tools/testing/selftests/bpf/progs/bpf_flow.c
@@ -20,20 +20,20 @@
#include <bpf/bpf_endian.h>
int _version SEC("version") = 1;
-#define PROG(F) SEC(#F) int bpf_func_##F
+#define PROG(F) PROG_(F, _##F)
+#define PROG_(NUM, NAME) SEC("flow_dissector/"#NUM) int bpf_func##NAME
/* These are the identifiers of the BPF programs that will be used in tail
* calls. Name is limited to 16 characters, with the terminating character and
* bpf_func_ above, we have only 6 to work with, anything after will be cropped.
*/
-enum {
- IP,
- IPV6,
- IPV6OP, /* Destination/Hop-by-Hop Options IPv6 Extension header */
- IPV6FR, /* Fragmentation IPv6 Extension Header */
- MPLS,
- VLAN,
-};
+#define IP 0
+#define IPV6 1
+#define IPV6OP 2 /* Destination/Hop-by-Hop Options IPv6 Ext. Header */
+#define IPV6FR 3 /* Fragmentation IPv6 Extension Header */
+#define MPLS 4
+#define VLAN 5
+#define MAX_PROG 6
#define IP_MF 0x2000
#define IP_OFFSET 0x1FFF
@@ -59,7 +59,7 @@ struct frag_hdr {
struct {
__uint(type, BPF_MAP_TYPE_PROG_ARRAY);
- __uint(max_entries, 8);
+ __uint(max_entries, MAX_PROG);
__uint(key_size, sizeof(__u32));
__uint(value_size, sizeof(__u32));
} jmp_table SEC(".maps");
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_bpf_map.c b/tools/testing/selftests/bpf/progs/bpf_iter_bpf_map.c
new file mode 100644
index 000000000000..b57bd6fef208
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_bpf_map.c
@@ -0,0 +1,44 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+/* "undefine" structs in vmlinux.h, because we "override" them below */
+#define bpf_iter_meta bpf_iter_meta___not_used
+#define bpf_iter__bpf_map bpf_iter__bpf_map___not_used
+#include "vmlinux.h"
+#undef bpf_iter_meta
+#undef bpf_iter__bpf_map
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+char _license[] SEC("license") = "GPL";
+
+struct bpf_iter_meta {
+ struct seq_file *seq;
+ __u64 session_id;
+ __u64 seq_num;
+} __attribute__((preserve_access_index));
+
+struct bpf_iter__bpf_map {
+ struct bpf_iter_meta *meta;
+ struct bpf_map *map;
+} __attribute__((preserve_access_index));
+
+SEC("iter/bpf_map")
+int dump_bpf_map(struct bpf_iter__bpf_map *ctx)
+{
+ struct seq_file *seq = ctx->meta->seq;
+ __u64 seq_num = ctx->meta->seq_num;
+ struct bpf_map *map = ctx->map;
+
+ if (map == (void *)0) {
+ BPF_SEQ_PRINTF(seq, " %%%%%% END %%%%%%\n");
+ return 0;
+ }
+
+ if (seq_num == 0)
+ BPF_SEQ_PRINTF(seq, " id refcnt usercnt locked_vm\n");
+
+ BPF_SEQ_PRINTF(seq, "%8u %8ld %8ld %10lu\n", map->id, map->refcnt.counter,
+ map->usercnt.counter,
+ map->memory.user->locked_vm.counter);
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_ipv6_route.c b/tools/testing/selftests/bpf/progs/bpf_iter_ipv6_route.c
new file mode 100644
index 000000000000..c8e9ca74c87b
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_ipv6_route.c
@@ -0,0 +1,78 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+/* "undefine" structs in vmlinux.h, because we "override" them below */
+#define bpf_iter_meta bpf_iter_meta___not_used
+#define bpf_iter__ipv6_route bpf_iter__ipv6_route___not_used
+#include "vmlinux.h"
+#undef bpf_iter_meta
+#undef bpf_iter__ipv6_route
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+struct bpf_iter_meta {
+ struct seq_file *seq;
+ __u64 session_id;
+ __u64 seq_num;
+} __attribute__((preserve_access_index));
+
+struct bpf_iter__ipv6_route {
+ struct bpf_iter_meta *meta;
+ struct fib6_info *rt;
+} __attribute__((preserve_access_index));
+
+char _license[] SEC("license") = "GPL";
+
+extern bool CONFIG_IPV6_SUBTREES __kconfig __weak;
+
+#define RTF_GATEWAY 0x0002
+#define IFNAMSIZ 16
+#define fib_nh_gw_family nh_common.nhc_gw_family
+#define fib_nh_gw6 nh_common.nhc_gw.ipv6
+#define fib_nh_dev nh_common.nhc_dev
+
+SEC("iter/ipv6_route")
+int dump_ipv6_route(struct bpf_iter__ipv6_route *ctx)
+{
+ struct seq_file *seq = ctx->meta->seq;
+ struct fib6_info *rt = ctx->rt;
+ const struct net_device *dev;
+ struct fib6_nh *fib6_nh;
+ unsigned int flags;
+ struct nexthop *nh;
+
+ if (rt == (void *)0)
+ return 0;
+
+ fib6_nh = &rt->fib6_nh[0];
+ flags = rt->fib6_flags;
+
+ /* FIXME: nexthop_is_multipath is not handled here. */
+ nh = rt->nh;
+ if (rt->nh)
+ fib6_nh = &nh->nh_info->fib6_nh;
+
+ BPF_SEQ_PRINTF(seq, "%pi6 %02x ", &rt->fib6_dst.addr, rt->fib6_dst.plen);
+
+ if (CONFIG_IPV6_SUBTREES)
+ BPF_SEQ_PRINTF(seq, "%pi6 %02x ", &rt->fib6_src.addr,
+ rt->fib6_src.plen);
+ else
+ BPF_SEQ_PRINTF(seq, "00000000000000000000000000000000 00 ");
+
+ if (fib6_nh->fib_nh_gw_family) {
+ flags |= RTF_GATEWAY;
+ BPF_SEQ_PRINTF(seq, "%pi6 ", &fib6_nh->fib_nh_gw6);
+ } else {
+ BPF_SEQ_PRINTF(seq, "00000000000000000000000000000000 ");
+ }
+
+ dev = fib6_nh->fib_nh_dev;
+ if (dev)
+ BPF_SEQ_PRINTF(seq, "%08x %08x %08x %08x %8s\n", rt->fib6_metric,
+ rt->fib6_ref.refs.counter, 0, flags, dev->name);
+ else
+ BPF_SEQ_PRINTF(seq, "%08x %08x %08x %08x\n", rt->fib6_metric,
+ rt->fib6_ref.refs.counter, 0, flags);
+
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_netlink.c b/tools/testing/selftests/bpf/progs/bpf_iter_netlink.c
new file mode 100644
index 000000000000..e7b8753eac0b
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_netlink.c
@@ -0,0 +1,82 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+/* "undefine" structs in vmlinux.h, because we "override" them below */
+#define bpf_iter_meta bpf_iter_meta___not_used
+#define bpf_iter__netlink bpf_iter__netlink___not_used
+#include "vmlinux.h"
+#undef bpf_iter_meta
+#undef bpf_iter__netlink
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+char _license[] SEC("license") = "GPL";
+
+#define sk_rmem_alloc sk_backlog.rmem_alloc
+#define sk_refcnt __sk_common.skc_refcnt
+
+struct bpf_iter_meta {
+ struct seq_file *seq;
+ __u64 session_id;
+ __u64 seq_num;
+} __attribute__((preserve_access_index));
+
+struct bpf_iter__netlink {
+ struct bpf_iter_meta *meta;
+ struct netlink_sock *sk;
+} __attribute__((preserve_access_index));
+
+static inline struct inode *SOCK_INODE(struct socket *socket)
+{
+ return &container_of(socket, struct socket_alloc, socket)->vfs_inode;
+}
+
+SEC("iter/netlink")
+int dump_netlink(struct bpf_iter__netlink *ctx)
+{
+ struct seq_file *seq = ctx->meta->seq;
+ struct netlink_sock *nlk = ctx->sk;
+ unsigned long group, ino;
+ struct inode *inode;
+ struct socket *sk;
+ struct sock *s;
+
+ if (nlk == (void *)0)
+ return 0;
+
+ if (ctx->meta->seq_num == 0)
+ BPF_SEQ_PRINTF(seq, "sk Eth Pid Groups "
+ "Rmem Wmem Dump Locks Drops "
+ "Inode\n");
+
+ s = &nlk->sk;
+ BPF_SEQ_PRINTF(seq, "%pK %-3d ", s, s->sk_protocol);
+
+ if (!nlk->groups) {
+ group = 0;
+ } else {
+ /* FIXME: temporary use bpf_probe_read here, needs
+ * verifier support to do direct access.
+ */
+ bpf_probe_read(&group, sizeof(group), &nlk->groups[0]);
+ }
+ BPF_SEQ_PRINTF(seq, "%-10u %08x %-8d %-8d %-5d %-8d ",
+ nlk->portid, (u32)group,
+ s->sk_rmem_alloc.counter,
+ s->sk_wmem_alloc.refs.counter - 1,
+ nlk->cb_running, s->sk_refcnt.refs.counter);
+
+ sk = s->sk_socket;
+ if (!sk) {
+ ino = 0;
+ } else {
+ /* FIXME: container_of inside SOCK_INODE has a forced
+ * type conversion, and direct access cannot be used
+ * with current verifier.
+ */
+ inode = SOCK_INODE(sk);
+ bpf_probe_read(&ino, sizeof(ino), &inode->i_ino);
+ }
+ BPF_SEQ_PRINTF(seq, "%-8u %-8lu\n", s->sk_drops.counter, ino);
+
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_task.c b/tools/testing/selftests/bpf/progs/bpf_iter_task.c
new file mode 100644
index 000000000000..ee754021f98e
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_task.c
@@ -0,0 +1,41 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+/* "undefine" structs in vmlinux.h, because we "override" them below */
+#define bpf_iter_meta bpf_iter_meta___not_used
+#define bpf_iter__task bpf_iter__task___not_used
+#include "vmlinux.h"
+#undef bpf_iter_meta
+#undef bpf_iter__task
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+char _license[] SEC("license") = "GPL";
+
+struct bpf_iter_meta {
+ struct seq_file *seq;
+ __u64 session_id;
+ __u64 seq_num;
+} __attribute__((preserve_access_index));
+
+struct bpf_iter__task {
+ struct bpf_iter_meta *meta;
+ struct task_struct *task;
+} __attribute__((preserve_access_index));
+
+SEC("iter/task")
+int dump_task(struct bpf_iter__task *ctx)
+{
+ struct seq_file *seq = ctx->meta->seq;
+ struct task_struct *task = ctx->task;
+
+ if (task == (void *)0) {
+ BPF_SEQ_PRINTF(seq, " === END ===\n");
+ return 0;
+ }
+
+ if (ctx->meta->seq_num == 0)
+ BPF_SEQ_PRINTF(seq, " tgid gid\n");
+
+ BPF_SEQ_PRINTF(seq, "%8d %8d\n", task->tgid, task->pid);
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_task_file.c b/tools/testing/selftests/bpf/progs/bpf_iter_task_file.c
new file mode 100644
index 000000000000..0f0ec3db20ba
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_task_file.c
@@ -0,0 +1,44 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+/* "undefine" structs in vmlinux.h, because we "override" them below */
+#define bpf_iter_meta bpf_iter_meta___not_used
+#define bpf_iter__task_file bpf_iter__task_file___not_used
+#include "vmlinux.h"
+#undef bpf_iter_meta
+#undef bpf_iter__task_file
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+char _license[] SEC("license") = "GPL";
+
+struct bpf_iter_meta {
+ struct seq_file *seq;
+ __u64 session_id;
+ __u64 seq_num;
+} __attribute__((preserve_access_index));
+
+struct bpf_iter__task_file {
+ struct bpf_iter_meta *meta;
+ struct task_struct *task;
+ __u32 fd;
+ struct file *file;
+} __attribute__((preserve_access_index));
+
+SEC("iter/task_file")
+int dump_task_file(struct bpf_iter__task_file *ctx)
+{
+ struct seq_file *seq = ctx->meta->seq;
+ struct task_struct *task = ctx->task;
+ __u32 fd = ctx->fd;
+ struct file *file = ctx->file;
+
+ if (task == (void *)0 || file == (void *)0)
+ return 0;
+
+ if (ctx->meta->seq_num == 0)
+ BPF_SEQ_PRINTF(seq, " tgid gid fd file\n");
+
+ BPF_SEQ_PRINTF(seq, "%8d %8d %8d %lx\n", task->tgid, task->pid, fd,
+ (long)file->f_op);
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_test_kern1.c b/tools/testing/selftests/bpf/progs/bpf_iter_test_kern1.c
new file mode 100644
index 000000000000..c71a7c283108
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_test_kern1.c
@@ -0,0 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+#define START_CHAR 'a'
+#include "bpf_iter_test_kern_common.h"
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_test_kern2.c b/tools/testing/selftests/bpf/progs/bpf_iter_test_kern2.c
new file mode 100644
index 000000000000..8bdc8dc07444
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_test_kern2.c
@@ -0,0 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+#define START_CHAR 'A'
+#include "bpf_iter_test_kern_common.h"
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_test_kern3.c b/tools/testing/selftests/bpf/progs/bpf_iter_test_kern3.c
new file mode 100644
index 000000000000..13c2c90c835f
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_test_kern3.c
@@ -0,0 +1,33 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+#define bpf_iter_meta bpf_iter_meta___not_used
+#define bpf_iter__task bpf_iter__task___not_used
+#include "vmlinux.h"
+#undef bpf_iter_meta
+#undef bpf_iter__task
+#include <bpf/bpf_helpers.h>
+
+char _license[] SEC("license") = "GPL";
+
+struct bpf_iter_meta {
+ struct seq_file *seq;
+ __u64 session_id;
+ __u64 seq_num;
+} __attribute__((preserve_access_index));
+
+struct bpf_iter__task {
+ struct bpf_iter_meta *meta;
+ struct task_struct *task;
+} __attribute__((preserve_access_index));
+
+SEC("iter/task")
+int dump_task(struct bpf_iter__task *ctx)
+{
+ struct seq_file *seq = ctx->meta->seq;
+ struct task_struct *task = ctx->task;
+ int tgid;
+
+ tgid = task->tgid;
+ bpf_seq_write(seq, &tgid, sizeof(tgid));
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_test_kern4.c b/tools/testing/selftests/bpf/progs/bpf_iter_test_kern4.c
new file mode 100644
index 000000000000..0aa71b333cf3
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_test_kern4.c
@@ -0,0 +1,67 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+#define bpf_iter_meta bpf_iter_meta___not_used
+#define bpf_iter__bpf_map bpf_iter__bpf_map___not_used
+#include "vmlinux.h"
+#undef bpf_iter_meta
+#undef bpf_iter__bpf_map
+#include <bpf/bpf_helpers.h>
+
+char _license[] SEC("license") = "GPL";
+
+struct bpf_iter_meta {
+ struct seq_file *seq;
+ __u64 session_id;
+ __u64 seq_num;
+} __attribute__((preserve_access_index));
+
+struct bpf_iter__bpf_map {
+ struct bpf_iter_meta *meta;
+ struct bpf_map *map;
+} __attribute__((preserve_access_index));
+
+__u32 map1_id = 0, map2_id = 0;
+__u32 map1_accessed = 0, map2_accessed = 0;
+__u64 map1_seqnum = 0, map2_seqnum1 = 0, map2_seqnum2 = 0;
+
+static volatile const __u32 print_len;
+static volatile const __u32 ret1;
+
+SEC("iter/bpf_map")
+int dump_bpf_map(struct bpf_iter__bpf_map *ctx)
+{
+ struct seq_file *seq = ctx->meta->seq;
+ struct bpf_map *map = ctx->map;
+ __u64 seq_num;
+ int i, ret = 0;
+
+ if (map == (void *)0)
+ return 0;
+
+ /* only dump map1_id and map2_id */
+ if (map->id != map1_id && map->id != map2_id)
+ return 0;
+
+ seq_num = ctx->meta->seq_num;
+ if (map->id == map1_id) {
+ map1_seqnum = seq_num;
+ map1_accessed++;
+ }
+
+ if (map->id == map2_id) {
+ if (map2_accessed == 0) {
+ map2_seqnum1 = seq_num;
+ if (ret1)
+ ret = 1;
+ } else {
+ map2_seqnum2 = seq_num;
+ }
+ map2_accessed++;
+ }
+
+ /* fill seq_file buffer */
+ for (i = 0; i < print_len; i++)
+ bpf_seq_write(seq, &seq_num, sizeof(seq_num));
+
+ return ret;
+}
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_test_kern_common.h b/tools/testing/selftests/bpf/progs/bpf_iter_test_kern_common.h
new file mode 100644
index 000000000000..dee1339e6905
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_test_kern_common.h
@@ -0,0 +1,38 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2020 Facebook */
+/* "undefine" structs in vmlinux.h, because we "override" them below */
+#define bpf_iter_meta bpf_iter_meta___not_used
+#define bpf_iter__task bpf_iter__task___not_used
+#include "vmlinux.h"
+#undef bpf_iter_meta
+#undef bpf_iter__task
+#include <bpf/bpf_helpers.h>
+
+char _license[] SEC("license") = "GPL";
+int count = 0;
+
+struct bpf_iter_meta {
+ struct seq_file *seq;
+ __u64 session_id;
+ __u64 seq_num;
+} __attribute__((preserve_access_index));
+
+struct bpf_iter__task {
+ struct bpf_iter_meta *meta;
+ struct task_struct *task;
+} __attribute__((preserve_access_index));
+
+SEC("iter/task")
+int dump_task(struct bpf_iter__task *ctx)
+{
+ struct seq_file *seq = ctx->meta->seq;
+ char c;
+
+ if (count < 4) {
+ c = START_CHAR + count;
+ bpf_seq_write(seq, &c, sizeof(c));
+ count++;
+ }
+
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/btf_dump_test_case_syntax.c b/tools/testing/selftests/bpf/progs/btf_dump_test_case_syntax.c
index d4a02fe44a12..31975c96e2c9 100644
--- a/tools/testing/selftests/bpf/progs/btf_dump_test_case_syntax.c
+++ b/tools/testing/selftests/bpf/progs/btf_dump_test_case_syntax.c
@@ -13,7 +13,7 @@ enum e1 {
enum e2 {
C = 100,
- D = -100,
+ D = 4294967295,
E = 0,
};
diff --git a/tools/testing/selftests/bpf/progs/cgroup_skb_sk_lookup_kern.c b/tools/testing/selftests/bpf/progs/cgroup_skb_sk_lookup_kern.c
new file mode 100644
index 000000000000..3f757e30d7a0
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/cgroup_skb_sk_lookup_kern.c
@@ -0,0 +1,97 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2020 Facebook
+
+#include <linux/bpf.h>
+#include <bpf/bpf_endian.h>
+#include <bpf/bpf_helpers.h>
+
+#include <linux/if_ether.h>
+#include <linux/in.h>
+#include <linux/in6.h>
+#include <linux/ipv6.h>
+#include <linux/tcp.h>
+
+#include <sys/types.h>
+#include <sys/socket.h>
+
+int _version SEC("version") = 1;
+char _license[] SEC("license") = "GPL";
+
+__u16 g_serv_port = 0;
+
+static inline void set_ip(__u32 *dst, const struct in6_addr *src)
+{
+ dst[0] = src->in6_u.u6_addr32[0];
+ dst[1] = src->in6_u.u6_addr32[1];
+ dst[2] = src->in6_u.u6_addr32[2];
+ dst[3] = src->in6_u.u6_addr32[3];
+}
+
+static inline void set_tuple(struct bpf_sock_tuple *tuple,
+ const struct ipv6hdr *ip6h,
+ const struct tcphdr *tcph)
+{
+ set_ip(tuple->ipv6.saddr, &ip6h->daddr);
+ set_ip(tuple->ipv6.daddr, &ip6h->saddr);
+ tuple->ipv6.sport = tcph->dest;
+ tuple->ipv6.dport = tcph->source;
+}
+
+static inline int is_allowed_peer_cg(struct __sk_buff *skb,
+ const struct ipv6hdr *ip6h,
+ const struct tcphdr *tcph)
+{
+ __u64 cgid, acgid, peer_cgid, peer_acgid;
+ struct bpf_sock_tuple tuple;
+ size_t tuple_len = sizeof(tuple.ipv6);
+ struct bpf_sock *peer_sk;
+
+ set_tuple(&tuple, ip6h, tcph);
+
+ peer_sk = bpf_sk_lookup_tcp(skb, &tuple, tuple_len,
+ BPF_F_CURRENT_NETNS, 0);
+ if (!peer_sk)
+ return 0;
+
+ cgid = bpf_skb_cgroup_id(skb);
+ peer_cgid = bpf_sk_cgroup_id(peer_sk);
+
+ acgid = bpf_skb_ancestor_cgroup_id(skb, 2);
+ peer_acgid = bpf_sk_ancestor_cgroup_id(peer_sk, 2);
+
+ bpf_sk_release(peer_sk);
+
+ return cgid && cgid == peer_cgid && acgid && acgid == peer_acgid;
+}
+
+SEC("cgroup_skb/ingress")
+int ingress_lookup(struct __sk_buff *skb)
+{
+ __u32 serv_port_key = 0;
+ struct ipv6hdr ip6h;
+ struct tcphdr tcph;
+
+ if (skb->protocol != bpf_htons(ETH_P_IPV6))
+ return 1;
+
+ /* For SYN packets coming to listening socket skb->remote_port will be
+ * zero, so IPv6/TCP headers are loaded to identify remote peer
+ * instead.
+ */
+ if (bpf_skb_load_bytes(skb, 0, &ip6h, sizeof(ip6h)))
+ return 1;
+
+ if (ip6h.nexthdr != IPPROTO_TCP)
+ return 1;
+
+ if (bpf_skb_load_bytes(skb, sizeof(ip6h), &tcph, sizeof(tcph)))
+ return 1;
+
+ if (!g_serv_port)
+ return 0;
+
+ if (tcph.dest != g_serv_port)
+ return 1;
+
+ return is_allowed_peer_cg(skb, &ip6h, &tcph);
+}
diff --git a/tools/testing/selftests/bpf/progs/connect4_prog.c b/tools/testing/selftests/bpf/progs/connect4_prog.c
index 75085119c5bb..1ab2c5eba86c 100644
--- a/tools/testing/selftests/bpf/progs/connect4_prog.c
+++ b/tools/testing/selftests/bpf/progs/connect4_prog.c
@@ -8,6 +8,9 @@
#include <linux/in.h>
#include <linux/in6.h>
#include <sys/socket.h>
+#include <netinet/tcp.h>
+#include <linux/if.h>
+#include <errno.h>
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_endian.h>
@@ -16,13 +19,95 @@
#define DST_REWRITE_IP4 0x7f000001U
#define DST_REWRITE_PORT4 4444
+#ifndef TCP_CA_NAME_MAX
+#define TCP_CA_NAME_MAX 16
+#endif
+
+#ifndef IFNAMSIZ
+#define IFNAMSIZ 16
+#endif
+
int _version SEC("version") = 1;
+__attribute__ ((noinline))
+int do_bind(struct bpf_sock_addr *ctx)
+{
+ struct sockaddr_in sa = {};
+
+ sa.sin_family = AF_INET;
+ sa.sin_port = bpf_htons(0);
+ sa.sin_addr.s_addr = bpf_htonl(SRC_REWRITE_IP4);
+
+ if (bpf_bind(ctx, (struct sockaddr *)&sa, sizeof(sa)) != 0)
+ return 0;
+
+ return 1;
+}
+
+static __inline int verify_cc(struct bpf_sock_addr *ctx,
+ char expected[TCP_CA_NAME_MAX])
+{
+ char buf[TCP_CA_NAME_MAX];
+ int i;
+
+ if (bpf_getsockopt(ctx, SOL_TCP, TCP_CONGESTION, &buf, sizeof(buf)))
+ return 1;
+
+ for (i = 0; i < TCP_CA_NAME_MAX; i++) {
+ if (buf[i] != expected[i])
+ return 1;
+ if (buf[i] == 0)
+ break;
+ }
+
+ return 0;
+}
+
+static __inline int set_cc(struct bpf_sock_addr *ctx)
+{
+ char reno[TCP_CA_NAME_MAX] = "reno";
+ char cubic[TCP_CA_NAME_MAX] = "cubic";
+
+ if (bpf_setsockopt(ctx, SOL_TCP, TCP_CONGESTION, &reno, sizeof(reno)))
+ return 1;
+ if (verify_cc(ctx, reno))
+ return 1;
+
+ if (bpf_setsockopt(ctx, SOL_TCP, TCP_CONGESTION, &cubic, sizeof(cubic)))
+ return 1;
+ if (verify_cc(ctx, cubic))
+ return 1;
+
+ return 0;
+}
+
+static __inline int bind_to_device(struct bpf_sock_addr *ctx)
+{
+ char veth1[IFNAMSIZ] = "test_sock_addr1";
+ char veth2[IFNAMSIZ] = "test_sock_addr2";
+ char missing[IFNAMSIZ] = "nonexistent_dev";
+ char del_bind[IFNAMSIZ] = "";
+
+ if (bpf_setsockopt(ctx, SOL_SOCKET, SO_BINDTODEVICE,
+ &veth1, sizeof(veth1)))
+ return 1;
+ if (bpf_setsockopt(ctx, SOL_SOCKET, SO_BINDTODEVICE,
+ &veth2, sizeof(veth2)))
+ return 1;
+ if (bpf_setsockopt(ctx, SOL_SOCKET, SO_BINDTODEVICE,
+ &missing, sizeof(missing)) != -ENODEV)
+ return 1;
+ if (bpf_setsockopt(ctx, SOL_SOCKET, SO_BINDTODEVICE,
+ &del_bind, sizeof(del_bind)))
+ return 1;
+
+ return 0;
+}
+
SEC("cgroup/connect4")
int connect_v4_prog(struct bpf_sock_addr *ctx)
{
struct bpf_sock_tuple tuple = {};
- struct sockaddr_in sa;
struct bpf_sock *sk;
/* Verify that new destination is available. */
@@ -32,6 +117,10 @@ int connect_v4_prog(struct bpf_sock_addr *ctx)
tuple.ipv4.daddr = bpf_htonl(DST_REWRITE_IP4);
tuple.ipv4.dport = bpf_htons(DST_REWRITE_PORT4);
+ /* Bind to device and unbind it. */
+ if (bind_to_device(ctx))
+ return 0;
+
if (ctx->type != SOCK_STREAM && ctx->type != SOCK_DGRAM)
return 0;
else if (ctx->type == SOCK_STREAM)
@@ -52,21 +141,15 @@ int connect_v4_prog(struct bpf_sock_addr *ctx)
bpf_sk_release(sk);
+ /* Rewrite congestion control. */
+ if (ctx->type == SOCK_STREAM && set_cc(ctx))
+ return 0;
+
/* Rewrite destination. */
ctx->user_ip4 = bpf_htonl(DST_REWRITE_IP4);
ctx->user_port = bpf_htons(DST_REWRITE_PORT4);
- /* Rewrite source. */
- memset(&sa, 0, sizeof(sa));
-
- sa.sin_family = AF_INET;
- sa.sin_port = bpf_htons(0);
- sa.sin_addr.s_addr = bpf_htonl(SRC_REWRITE_IP4);
-
- if (bpf_bind(ctx, (struct sockaddr *)&sa, sizeof(sa)) != 0)
- return 0;
-
- return 1;
+ return do_bind(ctx) ? 1 : 0;
}
char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/connect_force_port4.c b/tools/testing/selftests/bpf/progs/connect_force_port4.c
new file mode 100644
index 000000000000..7396308677a3
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/connect_force_port4.c
@@ -0,0 +1,83 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <string.h>
+#include <stdbool.h>
+
+#include <linux/bpf.h>
+#include <linux/in.h>
+#include <linux/in6.h>
+#include <sys/socket.h>
+
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_endian.h>
+
+char _license[] SEC("license") = "GPL";
+int _version SEC("version") = 1;
+
+struct svc_addr {
+ __be32 addr;
+ __be16 port;
+};
+
+struct {
+ __uint(type, BPF_MAP_TYPE_SK_STORAGE);
+ __uint(map_flags, BPF_F_NO_PREALLOC);
+ __type(key, int);
+ __type(value, struct svc_addr);
+} service_mapping SEC(".maps");
+
+SEC("cgroup/connect4")
+int connect4(struct bpf_sock_addr *ctx)
+{
+ struct sockaddr_in sa = {};
+ struct svc_addr *orig;
+
+ /* Force local address to 127.0.0.1:22222. */
+ sa.sin_family = AF_INET;
+ sa.sin_port = bpf_htons(22222);
+ sa.sin_addr.s_addr = bpf_htonl(0x7f000001);
+
+ if (bpf_bind(ctx, (struct sockaddr *)&sa, sizeof(sa)) != 0)
+ return 0;
+
+ /* Rewire service 1.2.3.4:60000 to backend 127.0.0.1:60123. */
+ if (ctx->user_port == bpf_htons(60000)) {
+ orig = bpf_sk_storage_get(&service_mapping, ctx->sk, 0,
+ BPF_SK_STORAGE_GET_F_CREATE);
+ if (!orig)
+ return 0;
+
+ orig->addr = ctx->user_ip4;
+ orig->port = ctx->user_port;
+
+ ctx->user_ip4 = bpf_htonl(0x7f000001);
+ ctx->user_port = bpf_htons(60123);
+ }
+ return 1;
+}
+
+SEC("cgroup/getsockname4")
+int getsockname4(struct bpf_sock_addr *ctx)
+{
+ /* Expose local server as 1.2.3.4:60000 to client. */
+ if (ctx->user_port == bpf_htons(60123)) {
+ ctx->user_ip4 = bpf_htonl(0x01020304);
+ ctx->user_port = bpf_htons(60000);
+ }
+ return 1;
+}
+
+SEC("cgroup/getpeername4")
+int getpeername4(struct bpf_sock_addr *ctx)
+{
+ struct svc_addr *orig;
+
+ /* Expose service 1.2.3.4:60000 as peer instead of backend. */
+ if (ctx->user_port == bpf_htons(60123)) {
+ orig = bpf_sk_storage_get(&service_mapping, ctx->sk, 0, 0);
+ if (orig) {
+ ctx->user_ip4 = orig->addr;
+ ctx->user_port = orig->port;
+ }
+ }
+ return 1;
+}
diff --git a/tools/testing/selftests/bpf/progs/connect_force_port6.c b/tools/testing/selftests/bpf/progs/connect_force_port6.c
new file mode 100644
index 000000000000..c1a2b555e9ad
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/connect_force_port6.c
@@ -0,0 +1,94 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <string.h>
+
+#include <linux/bpf.h>
+#include <linux/in.h>
+#include <linux/in6.h>
+#include <sys/socket.h>
+
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_endian.h>
+
+char _license[] SEC("license") = "GPL";
+int _version SEC("version") = 1;
+
+struct svc_addr {
+ __be32 addr[4];
+ __be16 port;
+};
+
+struct {
+ __uint(type, BPF_MAP_TYPE_SK_STORAGE);
+ __uint(map_flags, BPF_F_NO_PREALLOC);
+ __type(key, int);
+ __type(value, struct svc_addr);
+} service_mapping SEC(".maps");
+
+SEC("cgroup/connect6")
+int connect6(struct bpf_sock_addr *ctx)
+{
+ struct sockaddr_in6 sa = {};
+ struct svc_addr *orig;
+
+ /* Force local address to [::1]:22223. */
+ sa.sin6_family = AF_INET6;
+ sa.sin6_port = bpf_htons(22223);
+ sa.sin6_addr.s6_addr32[3] = bpf_htonl(1);
+
+ if (bpf_bind(ctx, (struct sockaddr *)&sa, sizeof(sa)) != 0)
+ return 0;
+
+ /* Rewire service [fc00::1]:60000 to backend [::1]:60124. */
+ if (ctx->user_port == bpf_htons(60000)) {
+ orig = bpf_sk_storage_get(&service_mapping, ctx->sk, 0,
+ BPF_SK_STORAGE_GET_F_CREATE);
+ if (!orig)
+ return 0;
+
+ orig->addr[0] = ctx->user_ip6[0];
+ orig->addr[1] = ctx->user_ip6[1];
+ orig->addr[2] = ctx->user_ip6[2];
+ orig->addr[3] = ctx->user_ip6[3];
+ orig->port = ctx->user_port;
+
+ ctx->user_ip6[0] = 0;
+ ctx->user_ip6[1] = 0;
+ ctx->user_ip6[2] = 0;
+ ctx->user_ip6[3] = bpf_htonl(1);
+ ctx->user_port = bpf_htons(60124);
+ }
+ return 1;
+}
+
+SEC("cgroup/getsockname6")
+int getsockname6(struct bpf_sock_addr *ctx)
+{
+ /* Expose local server as [fc00::1]:60000 to client. */
+ if (ctx->user_port == bpf_htons(60124)) {
+ ctx->user_ip6[0] = bpf_htonl(0xfc000000);
+ ctx->user_ip6[1] = 0;
+ ctx->user_ip6[2] = 0;
+ ctx->user_ip6[3] = bpf_htonl(1);
+ ctx->user_port = bpf_htons(60000);
+ }
+ return 1;
+}
+
+SEC("cgroup/getpeername6")
+int getpeername6(struct bpf_sock_addr *ctx)
+{
+ struct svc_addr *orig;
+
+ /* Expose service [fc00::1]:60000 as peer instead of backend. */
+ if (ctx->user_port == bpf_htons(60124)) {
+ orig = bpf_sk_storage_get(&service_mapping, ctx->sk, 0, 0);
+ if (orig) {
+ ctx->user_ip6[0] = orig->addr[0];
+ ctx->user_ip6[1] = orig->addr[1];
+ ctx->user_ip6[2] = orig->addr[2];
+ ctx->user_ip6[3] = orig->addr[3];
+ ctx->user_port = orig->port;
+ }
+ }
+ return 1;
+}
diff --git a/tools/testing/selftests/bpf/progs/core_reloc_types.h b/tools/testing/selftests/bpf/progs/core_reloc_types.h
index 6d598cfbdb3e..34d84717c946 100644
--- a/tools/testing/selftests/bpf/progs/core_reloc_types.h
+++ b/tools/testing/selftests/bpf/progs/core_reloc_types.h
@@ -379,7 +379,7 @@ struct core_reloc_arrays___equiv_zero_sz_arr {
struct core_reloc_arrays_substruct c[3];
struct core_reloc_arrays_substruct d[1][2];
/* equivalent to flexible array */
- struct core_reloc_arrays_substruct f[0][2];
+ struct core_reloc_arrays_substruct f[][2];
};
struct core_reloc_arrays___fixed_arr {
diff --git a/tools/testing/selftests/bpf/progs/fentry_test.c b/tools/testing/selftests/bpf/progs/fentry_test.c
index 38d3a82144ca..9365b686f84b 100644
--- a/tools/testing/selftests/bpf/progs/fentry_test.c
+++ b/tools/testing/selftests/bpf/progs/fentry_test.c
@@ -2,7 +2,7 @@
/* Copyright (c) 2019 Facebook */
#include <linux/bpf.h>
#include <bpf/bpf_helpers.h>
-#include "bpf_trace_helpers.h"
+#include <bpf/bpf_tracing.h>
char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/fexit_bpf2bpf.c b/tools/testing/selftests/bpf/progs/fexit_bpf2bpf.c
index c329fccf9842..98e1efe14549 100644
--- a/tools/testing/selftests/bpf/progs/fexit_bpf2bpf.c
+++ b/tools/testing/selftests/bpf/progs/fexit_bpf2bpf.c
@@ -5,7 +5,7 @@
#include <linux/bpf.h>
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_endian.h>
-#include "bpf_trace_helpers.h"
+#include <bpf/bpf_tracing.h>
struct sk_buff {
unsigned int len;
diff --git a/tools/testing/selftests/bpf/progs/fexit_bpf2bpf_simple.c b/tools/testing/selftests/bpf/progs/fexit_bpf2bpf_simple.c
index 92f3fa47cf40..85c0b516d6ee 100644
--- a/tools/testing/selftests/bpf/progs/fexit_bpf2bpf_simple.c
+++ b/tools/testing/selftests/bpf/progs/fexit_bpf2bpf_simple.c
@@ -2,7 +2,7 @@
/* Copyright (c) 2019 Facebook */
#include <linux/bpf.h>
#include <bpf/bpf_helpers.h>
-#include "bpf_trace_helpers.h"
+#include <bpf/bpf_tracing.h>
struct sk_buff {
unsigned int len;
diff --git a/tools/testing/selftests/bpf/progs/fexit_test.c b/tools/testing/selftests/bpf/progs/fexit_test.c
index 348109b9ea07..bd1e17d8024c 100644
--- a/tools/testing/selftests/bpf/progs/fexit_test.c
+++ b/tools/testing/selftests/bpf/progs/fexit_test.c
@@ -2,7 +2,7 @@
/* Copyright (c) 2019 Facebook */
#include <linux/bpf.h>
#include <bpf/bpf_helpers.h>
-#include "bpf_trace_helpers.h"
+#include <bpf/bpf_tracing.h>
char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/freplace_connect4.c b/tools/testing/selftests/bpf/progs/freplace_connect4.c
new file mode 100644
index 000000000000..a0ae84230699
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/freplace_connect4.c
@@ -0,0 +1,18 @@
+#include <linux/stddef.h>
+#include <linux/ipv6.h>
+#include <linux/bpf.h>
+#include <linux/in.h>
+#include <sys/socket.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_endian.h>
+
+SEC("freplace/do_bind")
+int new_do_bind(struct bpf_sock_addr *ctx)
+{
+ struct sockaddr_in sa = {};
+
+ bpf_bind(ctx, (struct sockaddr *)&sa, sizeof(sa));
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/kfree_skb.c b/tools/testing/selftests/bpf/progs/kfree_skb.c
index 8f48a909f079..a46a264ce24e 100644
--- a/tools/testing/selftests/bpf/progs/kfree_skb.c
+++ b/tools/testing/selftests/bpf/progs/kfree_skb.c
@@ -4,7 +4,7 @@
#include <stdbool.h>
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_endian.h>
-#include "bpf_trace_helpers.h"
+#include <bpf/bpf_tracing.h>
char _license[] SEC("license") = "GPL";
struct {
diff --git a/tools/testing/selftests/bpf/progs/lsm.c b/tools/testing/selftests/bpf/progs/lsm.c
new file mode 100644
index 000000000000..b4598d4bc4f7
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/lsm.c
@@ -0,0 +1,48 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/*
+ * Copyright 2020 Google LLC.
+ */
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include <errno.h>
+
+char _license[] SEC("license") = "GPL";
+
+int monitored_pid = 0;
+int mprotect_count = 0;
+int bprm_count = 0;
+
+SEC("lsm/file_mprotect")
+int BPF_PROG(test_int_hook, struct vm_area_struct *vma,
+ unsigned long reqprot, unsigned long prot, int ret)
+{
+ if (ret != 0)
+ return ret;
+
+ __u32 pid = bpf_get_current_pid_tgid() >> 32;
+ int is_stack = 0;
+
+ is_stack = (vma->vm_start <= vma->vm_mm->start_stack &&
+ vma->vm_end >= vma->vm_mm->start_stack);
+
+ if (is_stack && monitored_pid == pid) {
+ mprotect_count++;
+ ret = -EPERM;
+ }
+
+ return ret;
+}
+
+SEC("lsm/bprm_committed_creds")
+int BPF_PROG(test_void_hook, struct linux_binprm *bprm)
+{
+ __u32 pid = bpf_get_current_pid_tgid() >> 32;
+
+ if (monitored_pid == pid)
+ bprm_count++;
+
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/modify_return.c b/tools/testing/selftests/bpf/progs/modify_return.c
new file mode 100644
index 000000000000..8b7466a15c6b
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/modify_return.c
@@ -0,0 +1,49 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/*
+ * Copyright 2020 Google LLC.
+ */
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+char _license[] SEC("license") = "GPL";
+
+static int sequence = 0;
+__s32 input_retval = 0;
+
+__u64 fentry_result = 0;
+SEC("fentry/bpf_modify_return_test")
+int BPF_PROG(fentry_test, int a, __u64 b)
+{
+ sequence++;
+ fentry_result = (sequence == 1);
+ return 0;
+}
+
+__u64 fmod_ret_result = 0;
+SEC("fmod_ret/bpf_modify_return_test")
+int BPF_PROG(fmod_ret_test, int a, int *b, int ret)
+{
+ sequence++;
+ /* This is the first fmod_ret program, the ret passed should be 0 */
+ fmod_ret_result = (sequence == 2 && ret == 0);
+ return input_retval;
+}
+
+__u64 fexit_result = 0;
+SEC("fexit/bpf_modify_return_test")
+int BPF_PROG(fexit_test, int a, __u64 b, int ret)
+{
+ sequence++;
+ /* If the input_reval is non-zero a successful modification should have
+ * occurred.
+ */
+ if (input_retval)
+ fexit_result = (sequence == 3 && ret == input_retval);
+ else
+ fexit_result = (sequence == 3 && ret == 4);
+
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/perfbuf_bench.c b/tools/testing/selftests/bpf/progs/perfbuf_bench.c
new file mode 100644
index 000000000000..e5ab4836a641
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/perfbuf_bench.c
@@ -0,0 +1,33 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2020 Facebook
+
+#include <linux/bpf.h>
+#include <stdint.h>
+#include <bpf/bpf_helpers.h>
+
+char _license[] SEC("license") = "GPL";
+
+struct {
+ __uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY);
+ __uint(value_size, sizeof(int));
+ __uint(key_size, sizeof(int));
+} perfbuf SEC(".maps");
+
+const volatile int batch_cnt = 0;
+
+long sample_val = 42;
+long dropped __attribute__((aligned(128))) = 0;
+
+SEC("fentry/__x64_sys_getpgid")
+int bench_perfbuf(void *ctx)
+{
+ __u64 *sample;
+ int i;
+
+ for (i = 0; i < batch_cnt; i++) {
+ if (bpf_perf_event_output(ctx, &perfbuf, BPF_F_CURRENT_CPU,
+ &sample_val, sizeof(sample_val)))
+ __sync_add_and_fetch(&dropped, 1);
+ }
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/ringbuf_bench.c b/tools/testing/selftests/bpf/progs/ringbuf_bench.c
new file mode 100644
index 000000000000..123607d314d6
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/ringbuf_bench.c
@@ -0,0 +1,60 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2020 Facebook
+
+#include <linux/bpf.h>
+#include <stdint.h>
+#include <bpf/bpf_helpers.h>
+
+char _license[] SEC("license") = "GPL";
+
+struct {
+ __uint(type, BPF_MAP_TYPE_RINGBUF);
+} ringbuf SEC(".maps");
+
+const volatile int batch_cnt = 0;
+const volatile long use_output = 0;
+
+long sample_val = 42;
+long dropped __attribute__((aligned(128))) = 0;
+
+const volatile long wakeup_data_size = 0;
+
+static __always_inline long get_flags()
+{
+ long sz;
+
+ if (!wakeup_data_size)
+ return 0;
+
+ sz = bpf_ringbuf_query(&ringbuf, BPF_RB_AVAIL_DATA);
+ return sz >= wakeup_data_size ? BPF_RB_FORCE_WAKEUP : BPF_RB_NO_WAKEUP;
+}
+
+SEC("fentry/__x64_sys_getpgid")
+int bench_ringbuf(void *ctx)
+{
+ long *sample, flags;
+ int i;
+
+ if (!use_output) {
+ for (i = 0; i < batch_cnt; i++) {
+ sample = bpf_ringbuf_reserve(&ringbuf,
+ sizeof(sample_val), 0);
+ if (!sample) {
+ __sync_add_and_fetch(&dropped, 1);
+ } else {
+ *sample = sample_val;
+ flags = get_flags();
+ bpf_ringbuf_submit(sample, flags);
+ }
+ }
+ } else {
+ for (i = 0; i < batch_cnt; i++) {
+ flags = get_flags();
+ if (bpf_ringbuf_output(&ringbuf, &sample_val,
+ sizeof(sample_val), flags))
+ __sync_add_and_fetch(&dropped, 1);
+ }
+ }
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/sockmap_parse_prog.c b/tools/testing/selftests/bpf/progs/sockmap_parse_prog.c
index a5c6d5903b22..ca283af80d4e 100644
--- a/tools/testing/selftests/bpf/progs/sockmap_parse_prog.c
+++ b/tools/testing/selftests/bpf/progs/sockmap_parse_prog.c
@@ -12,7 +12,6 @@ int bpf_prog1(struct __sk_buff *skb)
__u32 lport = skb->local_port;
__u32 rport = skb->remote_port;
__u8 *d = data;
- __u32 len = (__u32) data_end - (__u32) data;
int err;
if (data + 10 > data_end) {
diff --git a/tools/testing/selftests/bpf/progs/test_attach_probe.c b/tools/testing/selftests/bpf/progs/test_attach_probe.c
index dd8fae6660ab..8056a4c6d918 100644
--- a/tools/testing/selftests/bpf/progs/test_attach_probe.c
+++ b/tools/testing/selftests/bpf/progs/test_attach_probe.c
@@ -4,6 +4,7 @@
#include <linux/ptrace.h>
#include <linux/bpf.h>
#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
int kprobe_res = 0;
int kretprobe_res = 0;
@@ -18,7 +19,7 @@ int handle_kprobe(struct pt_regs *ctx)
}
SEC("kretprobe/sys_nanosleep")
-int handle_kretprobe(struct pt_regs *ctx)
+int BPF_KRETPROBE(handle_kretprobe)
{
kretprobe_res = 2;
return 0;
diff --git a/tools/testing/selftests/bpf/progs/test_btf_haskv.c b/tools/testing/selftests/bpf/progs/test_btf_haskv.c
index 88b0566da13d..31538c9ed193 100644
--- a/tools/testing/selftests/bpf/progs/test_btf_haskv.c
+++ b/tools/testing/selftests/bpf/progs/test_btf_haskv.c
@@ -20,20 +20,12 @@ struct bpf_map_def SEC("maps") btf_map = {
BPF_ANNOTATE_KV_PAIR(btf_map, int, struct ipv_counts);
-struct dummy_tracepoint_args {
- unsigned long long pad;
- struct sock *sock;
-};
-
__attribute__((noinline))
-int test_long_fname_2(struct dummy_tracepoint_args *arg)
+int test_long_fname_2(void)
{
struct ipv_counts *counts;
int key = 0;
- if (!arg->sock)
- return 0;
-
counts = bpf_map_lookup_elem(&btf_map, &key);
if (!counts)
return 0;
@@ -44,15 +36,15 @@ int test_long_fname_2(struct dummy_tracepoint_args *arg)
}
__attribute__((noinline))
-int test_long_fname_1(struct dummy_tracepoint_args *arg)
+int test_long_fname_1(void)
{
- return test_long_fname_2(arg);
+ return test_long_fname_2();
}
SEC("dummy_tracepoint")
-int _dummy_tracepoint(struct dummy_tracepoint_args *arg)
+int _dummy_tracepoint(void *arg)
{
- return test_long_fname_1(arg);
+ return test_long_fname_1();
}
char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_btf_map_in_map.c b/tools/testing/selftests/bpf/progs/test_btf_map_in_map.c
new file mode 100644
index 000000000000..e5093796be97
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_btf_map_in_map.c
@@ -0,0 +1,76 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2020 Facebook */
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+
+struct inner_map {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(max_entries, 1);
+ __type(key, int);
+ __type(value, int);
+} inner_map1 SEC(".maps"),
+ inner_map2 SEC(".maps");
+
+struct outer_arr {
+ __uint(type, BPF_MAP_TYPE_ARRAY_OF_MAPS);
+ __uint(max_entries, 3);
+ __uint(key_size, sizeof(int));
+ __uint(value_size, sizeof(int));
+ /* it's possible to use anonymous struct as inner map definition here */
+ __array(values, struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ /* changing max_entries to 2 will fail during load
+ * due to incompatibility with inner_map definition */
+ __uint(max_entries, 1);
+ __type(key, int);
+ __type(value, int);
+ });
+} outer_arr SEC(".maps") = {
+ /* (void *) cast is necessary because we didn't use `struct inner_map`
+ * in __inner(values, ...)
+ * Actually, a conscious effort is required to screw up initialization
+ * of inner map slots, which is a great thing!
+ */
+ .values = { (void *)&inner_map1, 0, (void *)&inner_map2 },
+};
+
+struct outer_hash {
+ __uint(type, BPF_MAP_TYPE_HASH_OF_MAPS);
+ __uint(max_entries, 5);
+ __uint(key_size, sizeof(int));
+ /* Here everything works flawlessly due to reuse of struct inner_map
+ * and compiler will complain at the attempt to use non-inner_map
+ * references below. This is great experience.
+ */
+ __array(values, struct inner_map);
+} outer_hash SEC(".maps") = {
+ .values = {
+ [0] = &inner_map2,
+ [4] = &inner_map1,
+ },
+};
+
+int input = 0;
+
+SEC("raw_tp/sys_enter")
+int handle__sys_enter(void *ctx)
+{
+ struct inner_map *inner_map;
+ int key = 0, val;
+
+ inner_map = bpf_map_lookup_elem(&outer_arr, &key);
+ if (!inner_map)
+ return 1;
+ val = input;
+ bpf_map_update_elem(inner_map, &key, &val, 0);
+
+ inner_map = bpf_map_lookup_elem(&outer_hash, &key);
+ if (!inner_map)
+ return 1;
+ val = input + 1;
+ bpf_map_update_elem(inner_map, &key, &val, 0);
+
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_btf_newkv.c b/tools/testing/selftests/bpf/progs/test_btf_newkv.c
index a924e53c8e9d..6c5560162746 100644
--- a/tools/testing/selftests/bpf/progs/test_btf_newkv.c
+++ b/tools/testing/selftests/bpf/progs/test_btf_newkv.c
@@ -28,20 +28,12 @@ struct {
__type(value, struct ipv_counts);
} btf_map SEC(".maps");
-struct dummy_tracepoint_args {
- unsigned long long pad;
- struct sock *sock;
-};
-
__attribute__((noinline))
-int test_long_fname_2(struct dummy_tracepoint_args *arg)
+int test_long_fname_2(void)
{
struct ipv_counts *counts;
int key = 0;
- if (!arg->sock)
- return 0;
-
counts = bpf_map_lookup_elem(&btf_map, &key);
if (!counts)
return 0;
@@ -57,15 +49,15 @@ int test_long_fname_2(struct dummy_tracepoint_args *arg)
}
__attribute__((noinline))
-int test_long_fname_1(struct dummy_tracepoint_args *arg)
+int test_long_fname_1(void)
{
- return test_long_fname_2(arg);
+ return test_long_fname_2();
}
SEC("dummy_tracepoint")
-int _dummy_tracepoint(struct dummy_tracepoint_args *arg)
+int _dummy_tracepoint(void *arg)
{
- return test_long_fname_1(arg);
+ return test_long_fname_1();
}
char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_btf_nokv.c b/tools/testing/selftests/bpf/progs/test_btf_nokv.c
index 983aedd1c072..506da7fd2da2 100644
--- a/tools/testing/selftests/bpf/progs/test_btf_nokv.c
+++ b/tools/testing/selftests/bpf/progs/test_btf_nokv.c
@@ -17,20 +17,12 @@ struct bpf_map_def SEC("maps") btf_map = {
.max_entries = 4,
};
-struct dummy_tracepoint_args {
- unsigned long long pad;
- struct sock *sock;
-};
-
__attribute__((noinline))
-int test_long_fname_2(struct dummy_tracepoint_args *arg)
+int test_long_fname_2(void)
{
struct ipv_counts *counts;
int key = 0;
- if (!arg->sock)
- return 0;
-
counts = bpf_map_lookup_elem(&btf_map, &key);
if (!counts)
return 0;
@@ -41,15 +33,15 @@ int test_long_fname_2(struct dummy_tracepoint_args *arg)
}
__attribute__((noinline))
-int test_long_fname_1(struct dummy_tracepoint_args *arg)
+int test_long_fname_1(void)
{
- return test_long_fname_2(arg);
+ return test_long_fname_2();
}
SEC("dummy_tracepoint")
-int _dummy_tracepoint(struct dummy_tracepoint_args *arg)
+int _dummy_tracepoint(void *arg)
{
- return test_long_fname_1(arg);
+ return test_long_fname_1();
}
char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_cgroup_link.c b/tools/testing/selftests/bpf/progs/test_cgroup_link.c
new file mode 100644
index 000000000000..77e47b9e4446
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_cgroup_link.c
@@ -0,0 +1,24 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2020 Facebook
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+
+int calls = 0;
+int alt_calls = 0;
+
+SEC("cgroup_skb/egress1")
+int egress(struct __sk_buff *skb)
+{
+ __sync_fetch_and_add(&calls, 1);
+ return 1;
+}
+
+SEC("cgroup_skb/egress2")
+int egress_alt(struct __sk_buff *skb)
+{
+ __sync_fetch_and_add(&alt_calls, 1);
+ return 1;
+}
+
+char _license[] SEC("license") = "GPL";
+
diff --git a/tools/testing/selftests/bpf/progs/test_cls_redirect.c b/tools/testing/selftests/bpf/progs/test_cls_redirect.c
new file mode 100644
index 000000000000..f0b72e86bee5
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_cls_redirect.c
@@ -0,0 +1,1061 @@
+// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
+// Copyright (c) 2019, 2020 Cloudflare
+
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <string.h>
+
+#include <linux/bpf.h>
+#include <linux/icmp.h>
+#include <linux/icmpv6.h>
+#include <linux/if_ether.h>
+#include <linux/in.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+#include <linux/pkt_cls.h>
+#include <linux/tcp.h>
+#include <linux/udp.h>
+
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_endian.h>
+
+#include "test_cls_redirect.h"
+
+#define offsetofend(TYPE, MEMBER) \
+ (offsetof(TYPE, MEMBER) + sizeof((((TYPE *)0)->MEMBER)))
+
+#define IP_OFFSET_MASK (0x1FFF)
+#define IP_MF (0x2000)
+
+char _license[] SEC("license") = "Dual BSD/GPL";
+
+/**
+ * Destination port and IP used for UDP encapsulation.
+ */
+static volatile const __be16 ENCAPSULATION_PORT;
+static volatile const __be32 ENCAPSULATION_IP;
+
+typedef struct {
+ uint64_t processed_packets_total;
+ uint64_t l3_protocol_packets_total_ipv4;
+ uint64_t l3_protocol_packets_total_ipv6;
+ uint64_t l4_protocol_packets_total_tcp;
+ uint64_t l4_protocol_packets_total_udp;
+ uint64_t accepted_packets_total_syn;
+ uint64_t accepted_packets_total_syn_cookies;
+ uint64_t accepted_packets_total_last_hop;
+ uint64_t accepted_packets_total_icmp_echo_request;
+ uint64_t accepted_packets_total_established;
+ uint64_t forwarded_packets_total_gue;
+ uint64_t forwarded_packets_total_gre;
+
+ uint64_t errors_total_unknown_l3_proto;
+ uint64_t errors_total_unknown_l4_proto;
+ uint64_t errors_total_malformed_ip;
+ uint64_t errors_total_fragmented_ip;
+ uint64_t errors_total_malformed_icmp;
+ uint64_t errors_total_unwanted_icmp;
+ uint64_t errors_total_malformed_icmp_pkt_too_big;
+ uint64_t errors_total_malformed_tcp;
+ uint64_t errors_total_malformed_udp;
+ uint64_t errors_total_icmp_echo_replies;
+ uint64_t errors_total_malformed_encapsulation;
+ uint64_t errors_total_encap_adjust_failed;
+ uint64_t errors_total_encap_buffer_too_small;
+ uint64_t errors_total_redirect_loop;
+} metrics_t;
+
+typedef enum {
+ INVALID = 0,
+ UNKNOWN,
+ ECHO_REQUEST,
+ SYN,
+ SYN_COOKIE,
+ ESTABLISHED,
+} verdict_t;
+
+typedef struct {
+ uint16_t src, dst;
+} flow_ports_t;
+
+_Static_assert(
+ sizeof(flow_ports_t) !=
+ offsetofend(struct bpf_sock_tuple, ipv4.dport) -
+ offsetof(struct bpf_sock_tuple, ipv4.sport) - 1,
+ "flow_ports_t must match sport and dport in struct bpf_sock_tuple");
+_Static_assert(
+ sizeof(flow_ports_t) !=
+ offsetofend(struct bpf_sock_tuple, ipv6.dport) -
+ offsetof(struct bpf_sock_tuple, ipv6.sport) - 1,
+ "flow_ports_t must match sport and dport in struct bpf_sock_tuple");
+
+typedef int ret_t;
+
+/* This is a bit of a hack. We need a return value which allows us to
+ * indicate that the regular flow of the program should continue,
+ * while allowing functions to use XDP_PASS and XDP_DROP, etc.
+ */
+static const ret_t CONTINUE_PROCESSING = -1;
+
+/* Convenience macro to call functions which return ret_t.
+ */
+#define MAYBE_RETURN(x) \
+ do { \
+ ret_t __ret = x; \
+ if (__ret != CONTINUE_PROCESSING) \
+ return __ret; \
+ } while (0)
+
+/* Linux packet pointers are either aligned to NET_IP_ALIGN (aka 2 bytes),
+ * or not aligned if the arch supports efficient unaligned access.
+ *
+ * Since the verifier ensures that eBPF packet accesses follow these rules,
+ * we can tell LLVM to emit code as if we always had a larger alignment.
+ * It will yell at us if we end up on a platform where this is not valid.
+ */
+typedef uint8_t *net_ptr __attribute__((align_value(8)));
+
+typedef struct buf {
+ struct __sk_buff *skb;
+ net_ptr head;
+ /* NB: tail musn't have alignment other than 1, otherwise
+ * LLVM will go and eliminate code, e.g. when checking packet lengths.
+ */
+ uint8_t *const tail;
+} buf_t;
+
+static size_t buf_off(const buf_t *buf)
+{
+ /* Clang seems to optimize constructs like
+ * a - b + c
+ * if c is known:
+ * r? = c
+ * r? -= b
+ * r? += a
+ *
+ * This is a problem if a and b are packet pointers,
+ * since the verifier allows subtracting two pointers to
+ * get a scalar, but not a scalar and a pointer.
+ *
+ * Use inline asm to break this optimization.
+ */
+ size_t off = (size_t)buf->head;
+ asm("%0 -= %1" : "+r"(off) : "r"(buf->skb->data));
+ return off;
+}
+
+static bool buf_copy(buf_t *buf, void *dst, size_t len)
+{
+ if (bpf_skb_load_bytes(buf->skb, buf_off(buf), dst, len)) {
+ return false;
+ }
+
+ buf->head += len;
+ return true;
+}
+
+static bool buf_skip(buf_t *buf, const size_t len)
+{
+ /* Check whether off + len is valid in the non-linear part. */
+ if (buf_off(buf) + len > buf->skb->len) {
+ return false;
+ }
+
+ buf->head += len;
+ return true;
+}
+
+/* Returns a pointer to the start of buf, or NULL if len is
+ * larger than the remaining data. Consumes len bytes on a successful
+ * call.
+ *
+ * If scratch is not NULL, the function will attempt to load non-linear
+ * data via bpf_skb_load_bytes. On success, scratch is returned.
+ */
+static void *buf_assign(buf_t *buf, const size_t len, void *scratch)
+{
+ if (buf->head + len > buf->tail) {
+ if (scratch == NULL) {
+ return NULL;
+ }
+
+ return buf_copy(buf, scratch, len) ? scratch : NULL;
+ }
+
+ void *ptr = buf->head;
+ buf->head += len;
+ return ptr;
+}
+
+static bool pkt_skip_ipv4_options(buf_t *buf, const struct iphdr *ipv4)
+{
+ if (ipv4->ihl <= 5) {
+ return true;
+ }
+
+ return buf_skip(buf, (ipv4->ihl - 5) * 4);
+}
+
+static bool ipv4_is_fragment(const struct iphdr *ip)
+{
+ uint16_t frag_off = ip->frag_off & bpf_htons(IP_OFFSET_MASK);
+ return (ip->frag_off & bpf_htons(IP_MF)) != 0 || frag_off > 0;
+}
+
+static struct iphdr *pkt_parse_ipv4(buf_t *pkt, struct iphdr *scratch)
+{
+ struct iphdr *ipv4 = buf_assign(pkt, sizeof(*ipv4), scratch);
+ if (ipv4 == NULL) {
+ return NULL;
+ }
+
+ if (ipv4->ihl < 5) {
+ return NULL;
+ }
+
+ if (!pkt_skip_ipv4_options(pkt, ipv4)) {
+ return NULL;
+ }
+
+ return ipv4;
+}
+
+/* Parse the L4 ports from a packet, assuming a layout like TCP or UDP. */
+static bool pkt_parse_icmp_l4_ports(buf_t *pkt, flow_ports_t *ports)
+{
+ if (!buf_copy(pkt, ports, sizeof(*ports))) {
+ return false;
+ }
+
+ /* Ports in the L4 headers are reversed, since we are parsing an ICMP
+ * payload which is going towards the eyeball.
+ */
+ uint16_t dst = ports->src;
+ ports->src = ports->dst;
+ ports->dst = dst;
+ return true;
+}
+
+static uint16_t pkt_checksum_fold(uint32_t csum)
+{
+ /* The highest reasonable value for an IPv4 header
+ * checksum requires two folds, so we just do that always.
+ */
+ csum = (csum & 0xffff) + (csum >> 16);
+ csum = (csum & 0xffff) + (csum >> 16);
+ return (uint16_t)~csum;
+}
+
+static void pkt_ipv4_checksum(struct iphdr *iph)
+{
+ iph->check = 0;
+
+ /* An IP header without options is 20 bytes. Two of those
+ * are the checksum, which we always set to zero. Hence,
+ * the maximum accumulated value is 18 / 2 * 0xffff = 0x8fff7,
+ * which fits in 32 bit.
+ */
+ _Static_assert(sizeof(struct iphdr) == 20, "iphdr must be 20 bytes");
+ uint32_t acc = 0;
+ uint16_t *ipw = (uint16_t *)iph;
+
+#pragma clang loop unroll(full)
+ for (size_t i = 0; i < sizeof(struct iphdr) / 2; i++) {
+ acc += ipw[i];
+ }
+
+ iph->check = pkt_checksum_fold(acc);
+}
+
+static bool pkt_skip_ipv6_extension_headers(buf_t *pkt,
+ const struct ipv6hdr *ipv6,
+ uint8_t *upper_proto,
+ bool *is_fragment)
+{
+ /* We understand five extension headers.
+ * https://tools.ietf.org/html/rfc8200#section-4.1 states that all
+ * headers should occur once, except Destination Options, which may
+ * occur twice. Hence we give up after 6 headers.
+ */
+ struct {
+ uint8_t next;
+ uint8_t len;
+ } exthdr = {
+ .next = ipv6->nexthdr,
+ };
+ *is_fragment = false;
+
+#pragma clang loop unroll(full)
+ for (int i = 0; i < 6; i++) {
+ switch (exthdr.next) {
+ case IPPROTO_FRAGMENT:
+ *is_fragment = true;
+ /* NB: We don't check that hdrlen == 0 as per spec. */
+ /* fallthrough; */
+
+ case IPPROTO_HOPOPTS:
+ case IPPROTO_ROUTING:
+ case IPPROTO_DSTOPTS:
+ case IPPROTO_MH:
+ if (!buf_copy(pkt, &exthdr, sizeof(exthdr))) {
+ return false;
+ }
+
+ /* hdrlen is in 8-octet units, and excludes the first 8 octets. */
+ if (!buf_skip(pkt,
+ (exthdr.len + 1) * 8 - sizeof(exthdr))) {
+ return false;
+ }
+
+ /* Decode next header */
+ break;
+
+ default:
+ /* The next header is not one of the known extension
+ * headers, treat it as the upper layer header.
+ *
+ * This handles IPPROTO_NONE.
+ *
+ * Encapsulating Security Payload (50) and Authentication
+ * Header (51) also end up here (and will trigger an
+ * unknown proto error later). They have a custom header
+ * format and seem too esoteric to care about.
+ */
+ *upper_proto = exthdr.next;
+ return true;
+ }
+ }
+
+ /* We never found an upper layer header. */
+ return false;
+}
+
+/* This function has to be inlined, because the verifier otherwise rejects it
+ * due to returning a pointer to the stack. This is technically correct, since
+ * scratch is allocated on the stack. However, this usage should be safe since
+ * it's the callers stack after all.
+ */
+static inline __attribute__((__always_inline__)) struct ipv6hdr *
+pkt_parse_ipv6(buf_t *pkt, struct ipv6hdr *scratch, uint8_t *proto,
+ bool *is_fragment)
+{
+ struct ipv6hdr *ipv6 = buf_assign(pkt, sizeof(*ipv6), scratch);
+ if (ipv6 == NULL) {
+ return NULL;
+ }
+
+ if (!pkt_skip_ipv6_extension_headers(pkt, ipv6, proto, is_fragment)) {
+ return NULL;
+ }
+
+ return ipv6;
+}
+
+/* Global metrics, per CPU
+ */
+struct bpf_map_def metrics_map SEC("maps") = {
+ .type = BPF_MAP_TYPE_PERCPU_ARRAY,
+ .key_size = sizeof(unsigned int),
+ .value_size = sizeof(metrics_t),
+ .max_entries = 1,
+};
+
+static metrics_t *get_global_metrics(void)
+{
+ uint64_t key = 0;
+ return bpf_map_lookup_elem(&metrics_map, &key);
+}
+
+static ret_t accept_locally(struct __sk_buff *skb, encap_headers_t *encap)
+{
+ const int payload_off =
+ sizeof(*encap) +
+ sizeof(struct in_addr) * encap->unigue.hop_count;
+ int32_t encap_overhead = payload_off - sizeof(struct ethhdr);
+
+ // Changing the ethertype if the encapsulated packet is ipv6
+ if (encap->gue.proto_ctype == IPPROTO_IPV6) {
+ encap->eth.h_proto = bpf_htons(ETH_P_IPV6);
+ }
+
+ if (bpf_skb_adjust_room(skb, -encap_overhead, BPF_ADJ_ROOM_MAC,
+ BPF_F_ADJ_ROOM_FIXED_GSO |
+ BPF_F_ADJ_ROOM_NO_CSUM_RESET) ||
+ bpf_csum_level(skb, BPF_CSUM_LEVEL_DEC))
+ return TC_ACT_SHOT;
+
+ return bpf_redirect(skb->ifindex, BPF_F_INGRESS);
+}
+
+static ret_t forward_with_gre(struct __sk_buff *skb, encap_headers_t *encap,
+ struct in_addr *next_hop, metrics_t *metrics)
+{
+ metrics->forwarded_packets_total_gre++;
+
+ const int payload_off =
+ sizeof(*encap) +
+ sizeof(struct in_addr) * encap->unigue.hop_count;
+ int32_t encap_overhead =
+ payload_off - sizeof(struct ethhdr) - sizeof(struct iphdr);
+ int32_t delta = sizeof(struct gre_base_hdr) - encap_overhead;
+ uint16_t proto = ETH_P_IP;
+
+ /* Loop protection: the inner packet's TTL is decremented as a safeguard
+ * against any forwarding loop. As the only interesting field is the TTL
+ * hop limit for IPv6, it is easier to use bpf_skb_load_bytes/bpf_skb_store_bytes
+ * as they handle the split packets if needed (no need for the data to be
+ * in the linear section).
+ */
+ if (encap->gue.proto_ctype == IPPROTO_IPV6) {
+ proto = ETH_P_IPV6;
+ uint8_t ttl;
+ int rc;
+
+ rc = bpf_skb_load_bytes(
+ skb, payload_off + offsetof(struct ipv6hdr, hop_limit),
+ &ttl, 1);
+ if (rc != 0) {
+ metrics->errors_total_malformed_encapsulation++;
+ return TC_ACT_SHOT;
+ }
+
+ if (ttl == 0) {
+ metrics->errors_total_redirect_loop++;
+ return TC_ACT_SHOT;
+ }
+
+ ttl--;
+ rc = bpf_skb_store_bytes(
+ skb, payload_off + offsetof(struct ipv6hdr, hop_limit),
+ &ttl, 1, 0);
+ if (rc != 0) {
+ metrics->errors_total_malformed_encapsulation++;
+ return TC_ACT_SHOT;
+ }
+ } else {
+ uint8_t ttl;
+ int rc;
+
+ rc = bpf_skb_load_bytes(
+ skb, payload_off + offsetof(struct iphdr, ttl), &ttl,
+ 1);
+ if (rc != 0) {
+ metrics->errors_total_malformed_encapsulation++;
+ return TC_ACT_SHOT;
+ }
+
+ if (ttl == 0) {
+ metrics->errors_total_redirect_loop++;
+ return TC_ACT_SHOT;
+ }
+
+ /* IPv4 also has a checksum to patch. While the TTL is only one byte,
+ * this function only works for 2 and 4 bytes arguments (the result is
+ * the same).
+ */
+ rc = bpf_l3_csum_replace(
+ skb, payload_off + offsetof(struct iphdr, check), ttl,
+ ttl - 1, 2);
+ if (rc != 0) {
+ metrics->errors_total_malformed_encapsulation++;
+ return TC_ACT_SHOT;
+ }
+
+ ttl--;
+ rc = bpf_skb_store_bytes(
+ skb, payload_off + offsetof(struct iphdr, ttl), &ttl, 1,
+ 0);
+ if (rc != 0) {
+ metrics->errors_total_malformed_encapsulation++;
+ return TC_ACT_SHOT;
+ }
+ }
+
+ if (bpf_skb_adjust_room(skb, delta, BPF_ADJ_ROOM_NET,
+ BPF_F_ADJ_ROOM_FIXED_GSO |
+ BPF_F_ADJ_ROOM_NO_CSUM_RESET) ||
+ bpf_csum_level(skb, BPF_CSUM_LEVEL_INC)) {
+ metrics->errors_total_encap_adjust_failed++;
+ return TC_ACT_SHOT;
+ }
+
+ if (bpf_skb_pull_data(skb, sizeof(encap_gre_t))) {
+ metrics->errors_total_encap_buffer_too_small++;
+ return TC_ACT_SHOT;
+ }
+
+ buf_t pkt = {
+ .skb = skb,
+ .head = (uint8_t *)(long)skb->data,
+ .tail = (uint8_t *)(long)skb->data_end,
+ };
+
+ encap_gre_t *encap_gre = buf_assign(&pkt, sizeof(encap_gre_t), NULL);
+ if (encap_gre == NULL) {
+ metrics->errors_total_encap_buffer_too_small++;
+ return TC_ACT_SHOT;
+ }
+
+ encap_gre->ip.protocol = IPPROTO_GRE;
+ encap_gre->ip.daddr = next_hop->s_addr;
+ encap_gre->ip.saddr = ENCAPSULATION_IP;
+ encap_gre->ip.tot_len =
+ bpf_htons(bpf_ntohs(encap_gre->ip.tot_len) + delta);
+ encap_gre->gre.flags = 0;
+ encap_gre->gre.protocol = bpf_htons(proto);
+ pkt_ipv4_checksum((void *)&encap_gre->ip);
+
+ return bpf_redirect(skb->ifindex, 0);
+}
+
+static ret_t forward_to_next_hop(struct __sk_buff *skb, encap_headers_t *encap,
+ struct in_addr *next_hop, metrics_t *metrics)
+{
+ /* swap L2 addresses */
+ /* This assumes that packets are received from a router.
+ * So just swapping the MAC addresses here will make the packet go back to
+ * the router, which will send it to the appropriate machine.
+ */
+ unsigned char temp[ETH_ALEN];
+ memcpy(temp, encap->eth.h_dest, sizeof(temp));
+ memcpy(encap->eth.h_dest, encap->eth.h_source,
+ sizeof(encap->eth.h_dest));
+ memcpy(encap->eth.h_source, temp, sizeof(encap->eth.h_source));
+
+ if (encap->unigue.next_hop == encap->unigue.hop_count - 1 &&
+ encap->unigue.last_hop_gre) {
+ return forward_with_gre(skb, encap, next_hop, metrics);
+ }
+
+ metrics->forwarded_packets_total_gue++;
+ uint32_t old_saddr = encap->ip.saddr;
+ encap->ip.saddr = encap->ip.daddr;
+ encap->ip.daddr = next_hop->s_addr;
+ if (encap->unigue.next_hop < encap->unigue.hop_count) {
+ encap->unigue.next_hop++;
+ }
+
+ /* Remove ip->saddr, add next_hop->s_addr */
+ const uint64_t off = offsetof(typeof(*encap), ip.check);
+ int ret = bpf_l3_csum_replace(skb, off, old_saddr, next_hop->s_addr, 4);
+ if (ret < 0) {
+ return TC_ACT_SHOT;
+ }
+
+ return bpf_redirect(skb->ifindex, 0);
+}
+
+static ret_t skip_next_hops(buf_t *pkt, int n)
+{
+ switch (n) {
+ case 1:
+ if (!buf_skip(pkt, sizeof(struct in_addr)))
+ return TC_ACT_SHOT;
+ case 0:
+ return CONTINUE_PROCESSING;
+
+ default:
+ return TC_ACT_SHOT;
+ }
+}
+
+/* Get the next hop from the GLB header.
+ *
+ * Sets next_hop->s_addr to 0 if there are no more hops left.
+ * pkt is positioned just after the variable length GLB header
+ * iff the call is successful.
+ */
+static ret_t get_next_hop(buf_t *pkt, encap_headers_t *encap,
+ struct in_addr *next_hop)
+{
+ if (encap->unigue.next_hop > encap->unigue.hop_count) {
+ return TC_ACT_SHOT;
+ }
+
+ /* Skip "used" next hops. */
+ MAYBE_RETURN(skip_next_hops(pkt, encap->unigue.next_hop));
+
+ if (encap->unigue.next_hop == encap->unigue.hop_count) {
+ /* No more next hops, we are at the end of the GLB header. */
+ next_hop->s_addr = 0;
+ return CONTINUE_PROCESSING;
+ }
+
+ if (!buf_copy(pkt, next_hop, sizeof(*next_hop))) {
+ return TC_ACT_SHOT;
+ }
+
+ /* Skip the remainig next hops (may be zero). */
+ return skip_next_hops(pkt, encap->unigue.hop_count -
+ encap->unigue.next_hop - 1);
+}
+
+/* Fill a bpf_sock_tuple to be used with the socket lookup functions.
+ * This is a kludge that let's us work around verifier limitations:
+ *
+ * fill_tuple(&t, foo, sizeof(struct iphdr), 123, 321)
+ *
+ * clang will substitue a costant for sizeof, which allows the verifier
+ * to track it's value. Based on this, it can figure out the constant
+ * return value, and calling code works while still being "generic" to
+ * IPv4 and IPv6.
+ */
+static uint64_t fill_tuple(struct bpf_sock_tuple *tuple, void *iph,
+ uint64_t iphlen, uint16_t sport, uint16_t dport)
+{
+ switch (iphlen) {
+ case sizeof(struct iphdr): {
+ struct iphdr *ipv4 = (struct iphdr *)iph;
+ tuple->ipv4.daddr = ipv4->daddr;
+ tuple->ipv4.saddr = ipv4->saddr;
+ tuple->ipv4.sport = sport;
+ tuple->ipv4.dport = dport;
+ return sizeof(tuple->ipv4);
+ }
+
+ case sizeof(struct ipv6hdr): {
+ struct ipv6hdr *ipv6 = (struct ipv6hdr *)iph;
+ memcpy(&tuple->ipv6.daddr, &ipv6->daddr,
+ sizeof(tuple->ipv6.daddr));
+ memcpy(&tuple->ipv6.saddr, &ipv6->saddr,
+ sizeof(tuple->ipv6.saddr));
+ tuple->ipv6.sport = sport;
+ tuple->ipv6.dport = dport;
+ return sizeof(tuple->ipv6);
+ }
+
+ default:
+ return 0;
+ }
+}
+
+static verdict_t classify_tcp(struct __sk_buff *skb,
+ struct bpf_sock_tuple *tuple, uint64_t tuplen,
+ void *iph, struct tcphdr *tcp)
+{
+ struct bpf_sock *sk =
+ bpf_skc_lookup_tcp(skb, tuple, tuplen, BPF_F_CURRENT_NETNS, 0);
+ if (sk == NULL) {
+ return UNKNOWN;
+ }
+
+ if (sk->state != BPF_TCP_LISTEN) {
+ bpf_sk_release(sk);
+ return ESTABLISHED;
+ }
+
+ if (iph != NULL && tcp != NULL) {
+ /* Kludge: we've run out of arguments, but need the length of the ip header. */
+ uint64_t iphlen = sizeof(struct iphdr);
+ if (tuplen == sizeof(tuple->ipv6)) {
+ iphlen = sizeof(struct ipv6hdr);
+ }
+
+ if (bpf_tcp_check_syncookie(sk, iph, iphlen, tcp,
+ sizeof(*tcp)) == 0) {
+ bpf_sk_release(sk);
+ return SYN_COOKIE;
+ }
+ }
+
+ bpf_sk_release(sk);
+ return UNKNOWN;
+}
+
+static verdict_t classify_udp(struct __sk_buff *skb,
+ struct bpf_sock_tuple *tuple, uint64_t tuplen)
+{
+ struct bpf_sock *sk =
+ bpf_sk_lookup_udp(skb, tuple, tuplen, BPF_F_CURRENT_NETNS, 0);
+ if (sk == NULL) {
+ return UNKNOWN;
+ }
+
+ if (sk->state == BPF_TCP_ESTABLISHED) {
+ bpf_sk_release(sk);
+ return ESTABLISHED;
+ }
+
+ bpf_sk_release(sk);
+ return UNKNOWN;
+}
+
+static verdict_t classify_icmp(struct __sk_buff *skb, uint8_t proto,
+ struct bpf_sock_tuple *tuple, uint64_t tuplen,
+ metrics_t *metrics)
+{
+ switch (proto) {
+ case IPPROTO_TCP:
+ return classify_tcp(skb, tuple, tuplen, NULL, NULL);
+
+ case IPPROTO_UDP:
+ return classify_udp(skb, tuple, tuplen);
+
+ default:
+ metrics->errors_total_malformed_icmp++;
+ return INVALID;
+ }
+}
+
+static verdict_t process_icmpv4(buf_t *pkt, metrics_t *metrics)
+{
+ struct icmphdr icmp;
+ if (!buf_copy(pkt, &icmp, sizeof(icmp))) {
+ metrics->errors_total_malformed_icmp++;
+ return INVALID;
+ }
+
+ /* We should never receive encapsulated echo replies. */
+ if (icmp.type == ICMP_ECHOREPLY) {
+ metrics->errors_total_icmp_echo_replies++;
+ return INVALID;
+ }
+
+ if (icmp.type == ICMP_ECHO) {
+ return ECHO_REQUEST;
+ }
+
+ if (icmp.type != ICMP_DEST_UNREACH || icmp.code != ICMP_FRAG_NEEDED) {
+ metrics->errors_total_unwanted_icmp++;
+ return INVALID;
+ }
+
+ struct iphdr _ip4;
+ const struct iphdr *ipv4 = pkt_parse_ipv4(pkt, &_ip4);
+ if (ipv4 == NULL) {
+ metrics->errors_total_malformed_icmp_pkt_too_big++;
+ return INVALID;
+ }
+
+ /* The source address in the outer IP header is from the entity that
+ * originated the ICMP message. Use the original IP header to restore
+ * the correct flow tuple.
+ */
+ struct bpf_sock_tuple tuple;
+ tuple.ipv4.saddr = ipv4->daddr;
+ tuple.ipv4.daddr = ipv4->saddr;
+
+ if (!pkt_parse_icmp_l4_ports(pkt, (flow_ports_t *)&tuple.ipv4.sport)) {
+ metrics->errors_total_malformed_icmp_pkt_too_big++;
+ return INVALID;
+ }
+
+ return classify_icmp(pkt->skb, ipv4->protocol, &tuple,
+ sizeof(tuple.ipv4), metrics);
+}
+
+static verdict_t process_icmpv6(buf_t *pkt, metrics_t *metrics)
+{
+ struct icmp6hdr icmp6;
+ if (!buf_copy(pkt, &icmp6, sizeof(icmp6))) {
+ metrics->errors_total_malformed_icmp++;
+ return INVALID;
+ }
+
+ /* We should never receive encapsulated echo replies. */
+ if (icmp6.icmp6_type == ICMPV6_ECHO_REPLY) {
+ metrics->errors_total_icmp_echo_replies++;
+ return INVALID;
+ }
+
+ if (icmp6.icmp6_type == ICMPV6_ECHO_REQUEST) {
+ return ECHO_REQUEST;
+ }
+
+ if (icmp6.icmp6_type != ICMPV6_PKT_TOOBIG) {
+ metrics->errors_total_unwanted_icmp++;
+ return INVALID;
+ }
+
+ bool is_fragment;
+ uint8_t l4_proto;
+ struct ipv6hdr _ipv6;
+ const struct ipv6hdr *ipv6 =
+ pkt_parse_ipv6(pkt, &_ipv6, &l4_proto, &is_fragment);
+ if (ipv6 == NULL) {
+ metrics->errors_total_malformed_icmp_pkt_too_big++;
+ return INVALID;
+ }
+
+ if (is_fragment) {
+ metrics->errors_total_fragmented_ip++;
+ return INVALID;
+ }
+
+ /* Swap source and dest addresses. */
+ struct bpf_sock_tuple tuple;
+ memcpy(&tuple.ipv6.saddr, &ipv6->daddr, sizeof(tuple.ipv6.saddr));
+ memcpy(&tuple.ipv6.daddr, &ipv6->saddr, sizeof(tuple.ipv6.daddr));
+
+ if (!pkt_parse_icmp_l4_ports(pkt, (flow_ports_t *)&tuple.ipv6.sport)) {
+ metrics->errors_total_malformed_icmp_pkt_too_big++;
+ return INVALID;
+ }
+
+ return classify_icmp(pkt->skb, l4_proto, &tuple, sizeof(tuple.ipv6),
+ metrics);
+}
+
+static verdict_t process_tcp(buf_t *pkt, void *iph, uint64_t iphlen,
+ metrics_t *metrics)
+{
+ metrics->l4_protocol_packets_total_tcp++;
+
+ struct tcphdr _tcp;
+ struct tcphdr *tcp = buf_assign(pkt, sizeof(_tcp), &_tcp);
+ if (tcp == NULL) {
+ metrics->errors_total_malformed_tcp++;
+ return INVALID;
+ }
+
+ if (tcp->syn) {
+ return SYN;
+ }
+
+ struct bpf_sock_tuple tuple;
+ uint64_t tuplen =
+ fill_tuple(&tuple, iph, iphlen, tcp->source, tcp->dest);
+ return classify_tcp(pkt->skb, &tuple, tuplen, iph, tcp);
+}
+
+static verdict_t process_udp(buf_t *pkt, void *iph, uint64_t iphlen,
+ metrics_t *metrics)
+{
+ metrics->l4_protocol_packets_total_udp++;
+
+ struct udphdr _udp;
+ struct udphdr *udph = buf_assign(pkt, sizeof(_udp), &_udp);
+ if (udph == NULL) {
+ metrics->errors_total_malformed_udp++;
+ return INVALID;
+ }
+
+ struct bpf_sock_tuple tuple;
+ uint64_t tuplen =
+ fill_tuple(&tuple, iph, iphlen, udph->source, udph->dest);
+ return classify_udp(pkt->skb, &tuple, tuplen);
+}
+
+static verdict_t process_ipv4(buf_t *pkt, metrics_t *metrics)
+{
+ metrics->l3_protocol_packets_total_ipv4++;
+
+ struct iphdr _ip4;
+ struct iphdr *ipv4 = pkt_parse_ipv4(pkt, &_ip4);
+ if (ipv4 == NULL) {
+ metrics->errors_total_malformed_ip++;
+ return INVALID;
+ }
+
+ if (ipv4->version != 4) {
+ metrics->errors_total_malformed_ip++;
+ return INVALID;
+ }
+
+ if (ipv4_is_fragment(ipv4)) {
+ metrics->errors_total_fragmented_ip++;
+ return INVALID;
+ }
+
+ switch (ipv4->protocol) {
+ case IPPROTO_ICMP:
+ return process_icmpv4(pkt, metrics);
+
+ case IPPROTO_TCP:
+ return process_tcp(pkt, ipv4, sizeof(*ipv4), metrics);
+
+ case IPPROTO_UDP:
+ return process_udp(pkt, ipv4, sizeof(*ipv4), metrics);
+
+ default:
+ metrics->errors_total_unknown_l4_proto++;
+ return INVALID;
+ }
+}
+
+static verdict_t process_ipv6(buf_t *pkt, metrics_t *metrics)
+{
+ metrics->l3_protocol_packets_total_ipv6++;
+
+ uint8_t l4_proto;
+ bool is_fragment;
+ struct ipv6hdr _ipv6;
+ struct ipv6hdr *ipv6 =
+ pkt_parse_ipv6(pkt, &_ipv6, &l4_proto, &is_fragment);
+ if (ipv6 == NULL) {
+ metrics->errors_total_malformed_ip++;
+ return INVALID;
+ }
+
+ if (ipv6->version != 6) {
+ metrics->errors_total_malformed_ip++;
+ return INVALID;
+ }
+
+ if (is_fragment) {
+ metrics->errors_total_fragmented_ip++;
+ return INVALID;
+ }
+
+ switch (l4_proto) {
+ case IPPROTO_ICMPV6:
+ return process_icmpv6(pkt, metrics);
+
+ case IPPROTO_TCP:
+ return process_tcp(pkt, ipv6, sizeof(*ipv6), metrics);
+
+ case IPPROTO_UDP:
+ return process_udp(pkt, ipv6, sizeof(*ipv6), metrics);
+
+ default:
+ metrics->errors_total_unknown_l4_proto++;
+ return INVALID;
+ }
+}
+
+SEC("classifier/cls_redirect")
+int cls_redirect(struct __sk_buff *skb)
+{
+ metrics_t *metrics = get_global_metrics();
+ if (metrics == NULL) {
+ return TC_ACT_SHOT;
+ }
+
+ metrics->processed_packets_total++;
+
+ /* Pass bogus packets as long as we're not sure they're
+ * destined for us.
+ */
+ if (skb->protocol != bpf_htons(ETH_P_IP)) {
+ return TC_ACT_OK;
+ }
+
+ encap_headers_t *encap;
+
+ /* Make sure that all encapsulation headers are available in
+ * the linear portion of the skb. This makes it easy to manipulate them.
+ */
+ if (bpf_skb_pull_data(skb, sizeof(*encap))) {
+ return TC_ACT_OK;
+ }
+
+ buf_t pkt = {
+ .skb = skb,
+ .head = (uint8_t *)(long)skb->data,
+ .tail = (uint8_t *)(long)skb->data_end,
+ };
+
+ encap = buf_assign(&pkt, sizeof(*encap), NULL);
+ if (encap == NULL) {
+ return TC_ACT_OK;
+ }
+
+ if (encap->ip.ihl != 5) {
+ /* We never have any options. */
+ return TC_ACT_OK;
+ }
+
+ if (encap->ip.daddr != ENCAPSULATION_IP ||
+ encap->ip.protocol != IPPROTO_UDP) {
+ return TC_ACT_OK;
+ }
+
+ /* TODO Check UDP length? */
+ if (encap->udp.dest != ENCAPSULATION_PORT) {
+ return TC_ACT_OK;
+ }
+
+ /* We now know that the packet is destined to us, we can
+ * drop bogus ones.
+ */
+ if (ipv4_is_fragment((void *)&encap->ip)) {
+ metrics->errors_total_fragmented_ip++;
+ return TC_ACT_SHOT;
+ }
+
+ if (encap->gue.variant != 0) {
+ metrics->errors_total_malformed_encapsulation++;
+ return TC_ACT_SHOT;
+ }
+
+ if (encap->gue.control != 0) {
+ metrics->errors_total_malformed_encapsulation++;
+ return TC_ACT_SHOT;
+ }
+
+ if (encap->gue.flags != 0) {
+ metrics->errors_total_malformed_encapsulation++;
+ return TC_ACT_SHOT;
+ }
+
+ if (encap->gue.hlen !=
+ sizeof(encap->unigue) / 4 + encap->unigue.hop_count) {
+ metrics->errors_total_malformed_encapsulation++;
+ return TC_ACT_SHOT;
+ }
+
+ if (encap->unigue.version != 0) {
+ metrics->errors_total_malformed_encapsulation++;
+ return TC_ACT_SHOT;
+ }
+
+ if (encap->unigue.reserved != 0) {
+ return TC_ACT_SHOT;
+ }
+
+ struct in_addr next_hop;
+ MAYBE_RETURN(get_next_hop(&pkt, encap, &next_hop));
+
+ if (next_hop.s_addr == 0) {
+ metrics->accepted_packets_total_last_hop++;
+ return accept_locally(skb, encap);
+ }
+
+ verdict_t verdict;
+ switch (encap->gue.proto_ctype) {
+ case IPPROTO_IPIP:
+ verdict = process_ipv4(&pkt, metrics);
+ break;
+
+ case IPPROTO_IPV6:
+ verdict = process_ipv6(&pkt, metrics);
+ break;
+
+ default:
+ metrics->errors_total_unknown_l3_proto++;
+ return TC_ACT_SHOT;
+ }
+
+ switch (verdict) {
+ case INVALID:
+ /* metrics have already been bumped */
+ return TC_ACT_SHOT;
+
+ case UNKNOWN:
+ return forward_to_next_hop(skb, encap, &next_hop, metrics);
+
+ case ECHO_REQUEST:
+ metrics->accepted_packets_total_icmp_echo_request++;
+ break;
+
+ case SYN:
+ if (encap->unigue.forward_syn) {
+ return forward_to_next_hop(skb, encap, &next_hop,
+ metrics);
+ }
+
+ metrics->accepted_packets_total_syn++;
+ break;
+
+ case SYN_COOKIE:
+ metrics->accepted_packets_total_syn_cookies++;
+ break;
+
+ case ESTABLISHED:
+ metrics->accepted_packets_total_established++;
+ break;
+ }
+
+ return accept_locally(skb, encap);
+}
diff --git a/tools/testing/selftests/bpf/progs/test_cls_redirect.h b/tools/testing/selftests/bpf/progs/test_cls_redirect.h
new file mode 100644
index 000000000000..76eab0aacba0
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_cls_redirect.h
@@ -0,0 +1,54 @@
+/* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */
+/* Copyright 2019, 2020 Cloudflare */
+
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <string.h>
+
+#include <linux/if_ether.h>
+#include <linux/in.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+#include <linux/udp.h>
+
+struct gre_base_hdr {
+ uint16_t flags;
+ uint16_t protocol;
+} __attribute__((packed));
+
+struct guehdr {
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+ uint8_t hlen : 5, control : 1, variant : 2;
+#else
+ uint8_t variant : 2, control : 1, hlen : 5;
+#endif
+ uint8_t proto_ctype;
+ uint16_t flags;
+};
+
+struct unigue {
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+ uint8_t _r : 2, last_hop_gre : 1, forward_syn : 1, version : 4;
+#else
+ uint8_t version : 4, forward_syn : 1, last_hop_gre : 1, _r : 2;
+#endif
+ uint8_t reserved;
+ uint8_t next_hop;
+ uint8_t hop_count;
+ // Next hops go here
+} __attribute__((packed));
+
+typedef struct {
+ struct ethhdr eth;
+ struct iphdr ip;
+ struct gre_base_hdr gre;
+} __attribute__((packed)) encap_gre_t;
+
+typedef struct {
+ struct ethhdr eth;
+ struct iphdr ip;
+ struct udphdr udp;
+ struct guehdr gue;
+ struct unigue unigue;
+} __attribute__((packed)) encap_headers_t;
diff --git a/tools/testing/selftests/bpf/progs/test_enable_stats.c b/tools/testing/selftests/bpf/progs/test_enable_stats.c
new file mode 100644
index 000000000000..01a002ade529
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_enable_stats.c
@@ -0,0 +1,18 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2020 Facebook
+
+#include <linux/bpf.h>
+#include <stdint.h>
+#include <linux/types.h>
+#include <bpf/bpf_helpers.h>
+
+char _license[] SEC("license") = "GPL";
+
+__u64 count = 0;
+
+SEC("raw_tracepoint/sys_enter")
+int test_enable_stats(void *ctx)
+{
+ count += 1;
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/test_get_stack_rawtp_err.c b/tools/testing/selftests/bpf/progs/test_get_stack_rawtp_err.c
new file mode 100644
index 000000000000..8941a41c2a55
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_get_stack_rawtp_err.c
@@ -0,0 +1,26 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+
+#define MAX_STACK_RAWTP 10
+
+SEC("raw_tracepoint/sys_enter")
+int bpf_prog2(void *ctx)
+{
+ __u64 stack[MAX_STACK_RAWTP];
+ int error;
+
+ /* set all the flags which should return -EINVAL */
+ error = bpf_get_stack(ctx, stack, 0, -1);
+ if (error < 0)
+ goto loop;
+
+ return error;
+loop:
+ while (1) {
+ error++;
+ }
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_global_data.c b/tools/testing/selftests/bpf/progs/test_global_data.c
index dd7a4d3dbc0d..1319be1c54ba 100644
--- a/tools/testing/selftests/bpf/progs/test_global_data.c
+++ b/tools/testing/selftests/bpf/progs/test_global_data.c
@@ -68,7 +68,7 @@ static struct foo struct3 = {
bpf_map_update_elem(&result_##map, &key, var, 0); \
} while (0)
-SEC("static_data_load")
+SEC("classifier/static_data_load")
int load_static_data(struct __sk_buff *skb)
{
static const __u64 bar = ~0;
diff --git a/tools/testing/selftests/bpf/progs/test_link_pinning.c b/tools/testing/selftests/bpf/progs/test_link_pinning.c
new file mode 100644
index 000000000000..bbf2a5264dc0
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_link_pinning.c
@@ -0,0 +1,25 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+
+#include <stdbool.h>
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+
+int in = 0;
+int out = 0;
+
+SEC("raw_tp/sys_enter")
+int raw_tp_prog(const void *ctx)
+{
+ out = in;
+ return 0;
+}
+
+SEC("tp_btf/sys_enter")
+int tp_btf_prog(const void *ctx)
+{
+ out = in;
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_mmap.c b/tools/testing/selftests/bpf/progs/test_mmap.c
index 6239596cd14e..4eb42cff5fe9 100644
--- a/tools/testing/selftests/bpf/progs/test_mmap.c
+++ b/tools/testing/selftests/bpf/progs/test_mmap.c
@@ -9,6 +9,14 @@ char _license[] SEC("license") = "GPL";
struct {
__uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(max_entries, 4096);
+ __uint(map_flags, BPF_F_MMAPABLE | BPF_F_RDONLY_PROG);
+ __type(key, __u32);
+ __type(value, char);
+} rdonly_map SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
__uint(max_entries, 512 * 4); /* at least 4 pages of data */
__uint(map_flags, BPF_F_MMAPABLE);
__type(key, __u32);
diff --git a/tools/testing/selftests/bpf/progs/test_ns_current_pid_tgid.c b/tools/testing/selftests/bpf/progs/test_ns_current_pid_tgid.c
new file mode 100644
index 000000000000..1dca70a6de2f
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_ns_current_pid_tgid.c
@@ -0,0 +1,37 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2019 Carlos Neira cneirabustos@gmail.com */
+
+#include <linux/bpf.h>
+#include <stdint.h>
+#include <bpf/bpf_helpers.h>
+
+static volatile struct {
+ __u64 dev;
+ __u64 ino;
+ __u64 pid_tgid;
+ __u64 user_pid_tgid;
+} res;
+
+SEC("raw_tracepoint/sys_enter")
+int trace(void *ctx)
+{
+ __u64 ns_pid_tgid, expected_pid;
+ struct bpf_pidns_info nsdata;
+ __u32 key = 0;
+
+ if (bpf_get_ns_current_pid_tgid(res.dev, res.ino, &nsdata,
+ sizeof(struct bpf_pidns_info)))
+ return 0;
+
+ ns_pid_tgid = (__u64)nsdata.tgid << 32 | nsdata.pid;
+ expected_pid = res.user_pid_tgid;
+
+ if (expected_pid != ns_pid_tgid)
+ return 0;
+
+ res.pid_tgid = ns_pid_tgid;
+
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_obj_id.c b/tools/testing/selftests/bpf/progs/test_obj_id.c
index 98b9de2fafd0..ded71b3ff6b4 100644
--- a/tools/testing/selftests/bpf/progs/test_obj_id.c
+++ b/tools/testing/selftests/bpf/progs/test_obj_id.c
@@ -3,16 +3,8 @@
*/
#include <stddef.h>
#include <linux/bpf.h>
-#include <linux/pkt_cls.h>
#include <bpf/bpf_helpers.h>
-/* It is a dumb bpf program such that it must have no
- * issue to be loaded since testing the verifier is
- * not the focus here.
- */
-
-int _version SEC("version") = 1;
-
struct {
__uint(type, BPF_MAP_TYPE_ARRAY);
__uint(max_entries, 1);
@@ -20,13 +12,13 @@ struct {
__type(value, __u64);
} test_map_id SEC(".maps");
-SEC("test_obj_id_dummy")
-int test_obj_id(struct __sk_buff *skb)
+SEC("raw_tp/sys_enter")
+int test_obj_id(void *ctx)
{
__u32 key = 0;
__u64 *value;
value = bpf_map_lookup_elem(&test_map_id, &key);
- return TC_ACT_OK;
+ return 0;
}
diff --git a/tools/testing/selftests/bpf/progs/test_overhead.c b/tools/testing/selftests/bpf/progs/test_overhead.c
index bfe9fbcb9684..42403d088abc 100644
--- a/tools/testing/selftests/bpf/progs/test_overhead.c
+++ b/tools/testing/selftests/bpf/progs/test_overhead.c
@@ -6,7 +6,6 @@
#include <linux/ptrace.h>
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_tracing.h>
-#include "bpf_trace_helpers.h"
struct task_struct;
@@ -17,11 +16,9 @@ int BPF_KPROBE(prog1, struct task_struct *tsk, const char *buf, bool exec)
}
SEC("kretprobe/__set_task_comm")
-int BPF_KRETPROBE(prog2,
- struct task_struct *tsk, const char *buf, bool exec,
- int ret)
+int BPF_KRETPROBE(prog2, int ret)
{
- return !PT_REGS_PARM1(ctx) && ret;
+ return ret;
}
SEC("raw_tp/task_rename")
@@ -33,12 +30,18 @@ int prog3(struct bpf_raw_tracepoint_args *ctx)
SEC("fentry/__set_task_comm")
int BPF_PROG(prog4, struct task_struct *tsk, const char *buf, bool exec)
{
- return !tsk;
+ return 0;
}
SEC("fexit/__set_task_comm")
int BPF_PROG(prog5, struct task_struct *tsk, const char *buf, bool exec)
{
+ return 0;
+}
+
+SEC("fmod_ret/__set_task_comm")
+int BPF_PROG(prog6, struct task_struct *tsk, const char *buf, bool exec)
+{
return !tsk;
}
diff --git a/tools/testing/selftests/bpf/progs/test_perf_branches.c b/tools/testing/selftests/bpf/progs/test_perf_branches.c
new file mode 100644
index 000000000000..a1ccc831c882
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_perf_branches.c
@@ -0,0 +1,50 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2019 Facebook
+
+#include <stddef.h>
+#include <linux/ptrace.h>
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+int valid = 0;
+int required_size_out = 0;
+int written_stack_out = 0;
+int written_global_out = 0;
+
+struct {
+ __u64 _a;
+ __u64 _b;
+ __u64 _c;
+} fpbe[30] = {0};
+
+SEC("perf_event")
+int perf_branches(void *ctx)
+{
+ __u64 entries[4 * 3] = {0};
+ int required_size, written_stack, written_global;
+
+ /* write to stack */
+ written_stack = bpf_read_branch_records(ctx, entries, sizeof(entries), 0);
+ /* ignore spurious events */
+ if (!written_stack)
+ return 1;
+
+ /* get required size */
+ required_size = bpf_read_branch_records(ctx, NULL, 0,
+ BPF_F_GET_BRANCH_RECORDS_SIZE);
+
+ written_global = bpf_read_branch_records(ctx, fpbe, sizeof(fpbe), 0);
+ /* ignore spurious events */
+ if (!written_global)
+ return 1;
+
+ required_size_out = required_size;
+ written_stack_out = written_stack;
+ written_global_out = written_global;
+ valid = 1;
+
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_perf_buffer.c b/tools/testing/selftests/bpf/progs/test_perf_buffer.c
index ebfcc9f50c35..ad59c4c9aba8 100644
--- a/tools/testing/selftests/bpf/progs/test_perf_buffer.c
+++ b/tools/testing/selftests/bpf/progs/test_perf_buffer.c
@@ -4,7 +4,7 @@
#include <linux/ptrace.h>
#include <linux/bpf.h>
#include <bpf/bpf_helpers.h>
-#include "bpf_trace_helpers.h"
+#include <bpf/bpf_tracing.h>
struct {
__uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY);
diff --git a/tools/testing/selftests/bpf/progs/test_probe_user.c b/tools/testing/selftests/bpf/progs/test_probe_user.c
index d556b1572cc6..89b3532ccc75 100644
--- a/tools/testing/selftests/bpf/progs/test_probe_user.c
+++ b/tools/testing/selftests/bpf/progs/test_probe_user.c
@@ -7,7 +7,6 @@
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_tracing.h>
-#include "bpf_trace_helpers.h"
static struct sockaddr_in old;
diff --git a/tools/testing/selftests/bpf/progs/test_ringbuf.c b/tools/testing/selftests/bpf/progs/test_ringbuf.c
new file mode 100644
index 000000000000..8ba9959b036b
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_ringbuf.c
@@ -0,0 +1,78 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2020 Facebook
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+
+char _license[] SEC("license") = "GPL";
+
+struct sample {
+ int pid;
+ int seq;
+ long value;
+ char comm[16];
+};
+
+struct {
+ __uint(type, BPF_MAP_TYPE_RINGBUF);
+ __uint(max_entries, 1 << 12);
+} ringbuf SEC(".maps");
+
+/* inputs */
+int pid = 0;
+long value = 0;
+long flags = 0;
+
+/* outputs */
+long total = 0;
+long discarded = 0;
+long dropped = 0;
+
+long avail_data = 0;
+long ring_size = 0;
+long cons_pos = 0;
+long prod_pos = 0;
+
+/* inner state */
+long seq = 0;
+
+SEC("tp/syscalls/sys_enter_getpgid")
+int test_ringbuf(void *ctx)
+{
+ int cur_pid = bpf_get_current_pid_tgid() >> 32;
+ struct sample *sample;
+ int zero = 0;
+
+ if (cur_pid != pid)
+ return 0;
+
+ sample = bpf_ringbuf_reserve(&ringbuf, sizeof(*sample), 0);
+ if (!sample) {
+ __sync_fetch_and_add(&dropped, 1);
+ return 1;
+ }
+
+ sample->pid = pid;
+ bpf_get_current_comm(sample->comm, sizeof(sample->comm));
+ sample->value = value;
+
+ sample->seq = seq++;
+ __sync_fetch_and_add(&total, 1);
+
+ if (sample->seq & 1) {
+ /* copy from reserved sample to a new one... */
+ bpf_ringbuf_output(&ringbuf, sample, sizeof(*sample), flags);
+ /* ...and then discard reserved sample */
+ bpf_ringbuf_discard(sample, flags);
+ __sync_fetch_and_add(&discarded, 1);
+ } else {
+ bpf_ringbuf_submit(sample, flags);
+ }
+
+ avail_data = bpf_ringbuf_query(&ringbuf, BPF_RB_AVAIL_DATA);
+ ring_size = bpf_ringbuf_query(&ringbuf, BPF_RB_RING_SIZE);
+ cons_pos = bpf_ringbuf_query(&ringbuf, BPF_RB_CONS_POS);
+ prod_pos = bpf_ringbuf_query(&ringbuf, BPF_RB_PROD_POS);
+
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/test_ringbuf_multi.c b/tools/testing/selftests/bpf/progs/test_ringbuf_multi.c
new file mode 100644
index 000000000000..edf3b6953533
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_ringbuf_multi.c
@@ -0,0 +1,77 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2020 Facebook
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+
+char _license[] SEC("license") = "GPL";
+
+struct sample {
+ int pid;
+ int seq;
+ long value;
+ char comm[16];
+};
+
+struct ringbuf_map {
+ __uint(type, BPF_MAP_TYPE_RINGBUF);
+ __uint(max_entries, 1 << 12);
+} ringbuf1 SEC(".maps"),
+ ringbuf2 SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY_OF_MAPS);
+ __uint(max_entries, 4);
+ __type(key, int);
+ __array(values, struct ringbuf_map);
+} ringbuf_arr SEC(".maps") = {
+ .values = {
+ [0] = &ringbuf1,
+ [2] = &ringbuf2,
+ },
+};
+
+/* inputs */
+int pid = 0;
+int target_ring = 0;
+long value = 0;
+
+/* outputs */
+long total = 0;
+long dropped = 0;
+long skipped = 0;
+
+SEC("tp/syscalls/sys_enter_getpgid")
+int test_ringbuf(void *ctx)
+{
+ int cur_pid = bpf_get_current_pid_tgid() >> 32;
+ struct sample *sample;
+ void *rb;
+ int zero = 0;
+
+ if (cur_pid != pid)
+ return 0;
+
+ rb = bpf_map_lookup_elem(&ringbuf_arr, &target_ring);
+ if (!rb) {
+ skipped += 1;
+ return 1;
+ }
+
+ sample = bpf_ringbuf_reserve(rb, sizeof(*sample), 0);
+ if (!sample) {
+ dropped += 1;
+ return 1;
+ }
+
+ sample->pid = pid;
+ bpf_get_current_comm(sample->comm, sizeof(sample->comm));
+ sample->value = value;
+
+ sample->seq = total;
+ total += 1;
+
+ bpf_ringbuf_submit(sample, 0);
+
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/test_sk_assign.c b/tools/testing/selftests/bpf/progs/test_sk_assign.c
new file mode 100644
index 000000000000..1ecd987005d2
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_sk_assign.c
@@ -0,0 +1,186 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2019 Cloudflare Ltd.
+// Copyright (c) 2020 Isovalent, Inc.
+
+#include <stddef.h>
+#include <stdbool.h>
+#include <string.h>
+#include <linux/bpf.h>
+#include <linux/if_ether.h>
+#include <linux/in.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+#include <linux/pkt_cls.h>
+#include <linux/tcp.h>
+#include <sys/socket.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_endian.h>
+
+/* Pin map under /sys/fs/bpf/tc/globals/<map name> */
+#define PIN_GLOBAL_NS 2
+
+/* Must match struct bpf_elf_map layout from iproute2 */
+struct {
+ __u32 type;
+ __u32 size_key;
+ __u32 size_value;
+ __u32 max_elem;
+ __u32 flags;
+ __u32 id;
+ __u32 pinning;
+} server_map SEC("maps") = {
+ .type = BPF_MAP_TYPE_SOCKMAP,
+ .size_key = sizeof(int),
+ .size_value = sizeof(__u64),
+ .max_elem = 1,
+ .pinning = PIN_GLOBAL_NS,
+};
+
+int _version SEC("version") = 1;
+char _license[] SEC("license") = "GPL";
+
+/* Fill 'tuple' with L3 info, and attempt to find L4. On fail, return NULL. */
+static inline struct bpf_sock_tuple *
+get_tuple(struct __sk_buff *skb, bool *ipv4, bool *tcp)
+{
+ void *data_end = (void *)(long)skb->data_end;
+ void *data = (void *)(long)skb->data;
+ struct bpf_sock_tuple *result;
+ struct ethhdr *eth;
+ __u64 tuple_len;
+ __u8 proto = 0;
+ __u64 ihl_len;
+
+ eth = (struct ethhdr *)(data);
+ if (eth + 1 > data_end)
+ return NULL;
+
+ if (eth->h_proto == bpf_htons(ETH_P_IP)) {
+ struct iphdr *iph = (struct iphdr *)(data + sizeof(*eth));
+
+ if (iph + 1 > data_end)
+ return NULL;
+ if (iph->ihl != 5)
+ /* Options are not supported */
+ return NULL;
+ ihl_len = iph->ihl * 4;
+ proto = iph->protocol;
+ *ipv4 = true;
+ result = (struct bpf_sock_tuple *)&iph->saddr;
+ } else if (eth->h_proto == bpf_htons(ETH_P_IPV6)) {
+ struct ipv6hdr *ip6h = (struct ipv6hdr *)(data + sizeof(*eth));
+
+ if (ip6h + 1 > data_end)
+ return NULL;
+ ihl_len = sizeof(*ip6h);
+ proto = ip6h->nexthdr;
+ *ipv4 = false;
+ result = (struct bpf_sock_tuple *)&ip6h->saddr;
+ } else {
+ return (struct bpf_sock_tuple *)data;
+ }
+
+ if (proto != IPPROTO_TCP && proto != IPPROTO_UDP)
+ return NULL;
+
+ *tcp = (proto == IPPROTO_TCP);
+ return result;
+}
+
+static inline int
+handle_udp(struct __sk_buff *skb, struct bpf_sock_tuple *tuple, bool ipv4)
+{
+ struct bpf_sock_tuple ln = {0};
+ struct bpf_sock *sk;
+ const int zero = 0;
+ size_t tuple_len;
+ __be16 dport;
+ int ret;
+
+ tuple_len = ipv4 ? sizeof(tuple->ipv4) : sizeof(tuple->ipv6);
+ if ((void *)tuple + tuple_len > (void *)(long)skb->data_end)
+ return TC_ACT_SHOT;
+
+ sk = bpf_sk_lookup_udp(skb, tuple, tuple_len, BPF_F_CURRENT_NETNS, 0);
+ if (sk)
+ goto assign;
+
+ dport = ipv4 ? tuple->ipv4.dport : tuple->ipv6.dport;
+ if (dport != bpf_htons(4321))
+ return TC_ACT_OK;
+
+ sk = bpf_map_lookup_elem(&server_map, &zero);
+ if (!sk)
+ return TC_ACT_SHOT;
+
+assign:
+ ret = bpf_sk_assign(skb, sk, 0);
+ bpf_sk_release(sk);
+ return ret;
+}
+
+static inline int
+handle_tcp(struct __sk_buff *skb, struct bpf_sock_tuple *tuple, bool ipv4)
+{
+ struct bpf_sock_tuple ln = {0};
+ struct bpf_sock *sk;
+ const int zero = 0;
+ size_t tuple_len;
+ __be16 dport;
+ int ret;
+
+ tuple_len = ipv4 ? sizeof(tuple->ipv4) : sizeof(tuple->ipv6);
+ if ((void *)tuple + tuple_len > (void *)(long)skb->data_end)
+ return TC_ACT_SHOT;
+
+ sk = bpf_skc_lookup_tcp(skb, tuple, tuple_len, BPF_F_CURRENT_NETNS, 0);
+ if (sk) {
+ if (sk->state != BPF_TCP_LISTEN)
+ goto assign;
+ bpf_sk_release(sk);
+ }
+
+ dport = ipv4 ? tuple->ipv4.dport : tuple->ipv6.dport;
+ if (dport != bpf_htons(4321))
+ return TC_ACT_OK;
+
+ sk = bpf_map_lookup_elem(&server_map, &zero);
+ if (!sk)
+ return TC_ACT_SHOT;
+
+ if (sk->state != BPF_TCP_LISTEN) {
+ bpf_sk_release(sk);
+ return TC_ACT_SHOT;
+ }
+
+assign:
+ ret = bpf_sk_assign(skb, sk, 0);
+ bpf_sk_release(sk);
+ return ret;
+}
+
+SEC("classifier/sk_assign_test")
+int bpf_sk_assign_test(struct __sk_buff *skb)
+{
+ struct bpf_sock_tuple *tuple, ln = {0};
+ bool ipv4 = false;
+ bool tcp = false;
+ int tuple_len;
+ int ret = 0;
+
+ tuple = get_tuple(skb, &ipv4, &tcp);
+ if (!tuple)
+ return TC_ACT_SHOT;
+
+ /* Note that the verifier socket return type for bpf_skc_lookup_tcp()
+ * differs from bpf_sk_lookup_udp(), so even though the C-level type is
+ * the same here, if we try to share the implementations they will
+ * fail to verify because we're crossing pointer types.
+ */
+ if (tcp)
+ ret = handle_tcp(skb, tuple, ipv4);
+ else
+ ret = handle_udp(skb, tuple, ipv4);
+
+ return ret == 0 ? TC_ACT_OK : TC_ACT_SHOT;
+}
diff --git a/tools/testing/selftests/bpf/progs/test_sk_lookup_kern.c b/tools/testing/selftests/bpf/progs/test_sk_lookup_kern.c
index d2b38fa6a5b0..e83d0b48d80c 100644
--- a/tools/testing/selftests/bpf/progs/test_sk_lookup_kern.c
+++ b/tools/testing/selftests/bpf/progs/test_sk_lookup_kern.c
@@ -73,6 +73,7 @@ int bpf_sk_lookup_test0(struct __sk_buff *skb)
tuple_len = ipv4 ? sizeof(tuple->ipv4) : sizeof(tuple->ipv6);
sk = bpf_sk_lookup_tcp(skb, tuple, tuple_len, BPF_F_CURRENT_NETNS, 0);
+ bpf_printk("sk=%d\n", sk ? 1 : 0);
if (sk)
bpf_sk_release(sk);
return sk ? TC_ACT_OK : TC_ACT_UNSPEC;
diff --git a/tools/testing/selftests/bpf/progs/test_skb_ctx.c b/tools/testing/selftests/bpf/progs/test_skb_ctx.c
index 202de3938494..b02ea589ce7e 100644
--- a/tools/testing/selftests/bpf/progs/test_skb_ctx.c
+++ b/tools/testing/selftests/bpf/progs/test_skb_ctx.c
@@ -23,6 +23,8 @@ int process(struct __sk_buff *skb)
return 1;
if (skb->gso_segs != 8)
return 1;
+ if (skb->gso_size != 10)
+ return 1;
return 0;
}
diff --git a/tools/testing/selftests/bpf/progs/test_skb_helpers.c b/tools/testing/selftests/bpf/progs/test_skb_helpers.c
new file mode 100644
index 000000000000..bb3fbf1a29e3
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_skb_helpers.c
@@ -0,0 +1,28 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_endian.h>
+
+#define TEST_COMM_LEN 16
+
+struct {
+ __uint(type, BPF_MAP_TYPE_CGROUP_ARRAY);
+ __uint(max_entries, 1);
+ __type(key, u32);
+ __type(value, u32);
+} cgroup_map SEC(".maps");
+
+char _license[] SEC("license") = "GPL";
+
+SEC("classifier/test_skb_helpers")
+int test_skb_helpers(struct __sk_buff *skb)
+{
+ struct task_struct *task;
+ char comm[TEST_COMM_LEN];
+ __u32 tpid;
+
+ task = (struct task_struct *)bpf_get_current_task();
+ bpf_probe_read_kernel(&tpid , sizeof(tpid), &task->tgid);
+ bpf_probe_read_kernel_str(&comm, sizeof(comm), &task->comm);
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/test_skmsg_load_helpers.c b/tools/testing/selftests/bpf/progs/test_skmsg_load_helpers.c
new file mode 100644
index 000000000000..45e8fc75a739
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_skmsg_load_helpers.c
@@ -0,0 +1,47 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2020 Isovalent, Inc.
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+
+struct {
+ __uint(type, BPF_MAP_TYPE_SOCKMAP);
+ __uint(max_entries, 2);
+ __type(key, __u32);
+ __type(value, __u64);
+} sock_map SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_SOCKHASH);
+ __uint(max_entries, 2);
+ __type(key, __u32);
+ __type(value, __u64);
+} sock_hash SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_SK_STORAGE);
+ __uint(map_flags, BPF_F_NO_PREALLOC);
+ __type(key, __u32);
+ __type(value, __u64);
+} socket_storage SEC(".maps");
+
+SEC("sk_msg")
+int prog_msg_verdict(struct sk_msg_md *msg)
+{
+ struct task_struct *task = (struct task_struct *)bpf_get_current_task();
+ int verdict = SK_PASS;
+ __u32 pid, tpid;
+ __u64 *sk_stg;
+
+ pid = bpf_get_current_pid_tgid() >> 32;
+ sk_stg = bpf_sk_storage_get(&socket_storage, msg->sk, 0, BPF_SK_STORAGE_GET_F_CREATE);
+ if (!sk_stg)
+ return SK_DROP;
+ *sk_stg = pid;
+ bpf_probe_read_kernel(&tpid , sizeof(tpid), &task->tgid);
+ if (pid != tpid)
+ verdict = SK_DROP;
+ bpf_sk_storage_delete(&socket_storage, (void *)msg->sk);
+ return verdict;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/test_sockmap_kern.h b/tools/testing/selftests/bpf/progs/test_sockmap_kern.h
index 9b4d3a68a91a..057036ca1111 100644
--- a/tools/testing/selftests/bpf/test_sockmap_kern.h
+++ b/tools/testing/selftests/bpf/progs/test_sockmap_kern.h
@@ -79,11 +79,18 @@ struct {
struct {
__uint(type, BPF_MAP_TYPE_ARRAY);
- __uint(max_entries, 1);
+ __uint(max_entries, 2);
__type(key, int);
__type(value, int);
} sock_skb_opts SEC(".maps");
+struct {
+ __uint(type, TEST_MAP_TYPE);
+ __uint(max_entries, 20);
+ __uint(key_size, sizeof(int));
+ __uint(value_size, sizeof(int));
+} tls_sock_map SEC(".maps");
+
SEC("sk_skb1")
int bpf_prog1(struct __sk_buff *skb)
{
@@ -110,8 +117,6 @@ int bpf_prog2(struct __sk_buff *skb)
flags = *f;
}
- bpf_printk("sk_skb2: redirect(%iB) flags=%i\n",
- len, flags);
#ifdef SOCKMAP
return bpf_sk_redirect_map(skb, &sock_map, ret, flags);
#else
@@ -120,6 +125,43 @@ int bpf_prog2(struct __sk_buff *skb)
}
+SEC("sk_skb3")
+int bpf_prog3(struct __sk_buff *skb)
+{
+ const int one = 1;
+ int err, *f, ret = SK_PASS;
+ void *data_end;
+ char *c;
+
+ err = bpf_skb_pull_data(skb, 19);
+ if (err)
+ goto tls_out;
+
+ c = (char *)(long)skb->data;
+ data_end = (void *)(long)skb->data_end;
+
+ if (c + 18 < data_end)
+ memcpy(&c[13], "PASS", 4);
+ f = bpf_map_lookup_elem(&sock_skb_opts, &one);
+ if (f && *f) {
+ __u64 flags = 0;
+
+ ret = 0;
+ flags = *f;
+#ifdef SOCKMAP
+ return bpf_sk_redirect_map(skb, &tls_sock_map, ret, flags);
+#else
+ return bpf_sk_redirect_hash(skb, &tls_sock_map, &ret, flags);
+#endif
+ }
+
+ f = bpf_map_lookup_elem(&sock_skb_opts, &one);
+ if (f && *f)
+ ret = SK_DROP;
+tls_out:
+ return ret;
+}
+
SEC("sockops")
int bpf_sockmap(struct bpf_sock_ops *skops)
{
@@ -143,8 +185,6 @@ int bpf_sockmap(struct bpf_sock_ops *skops)
err = bpf_sock_hash_update(skops, &sock_map, &ret,
BPF_NOEXIST);
#endif
- bpf_printk("passive(%i -> %i) map ctx update err: %d\n",
- lport, bpf_ntohl(rport), err);
}
break;
case BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB:
@@ -160,8 +200,6 @@ int bpf_sockmap(struct bpf_sock_ops *skops)
err = bpf_sock_hash_update(skops, &sock_map, &ret,
BPF_NOEXIST);
#endif
- bpf_printk("active(%i -> %i) map ctx update err: %d\n",
- lport, bpf_ntohl(rport), err);
}
break;
default:
@@ -199,72 +237,6 @@ int bpf_prog4(struct sk_msg_md *msg)
}
SEC("sk_msg2")
-int bpf_prog5(struct sk_msg_md *msg)
-{
- int zero = 0, one = 1, two = 2, three = 3, four = 4, five = 5;
- int *start, *end, *start_push, *end_push, *start_pop, *pop;
- int *bytes, len1, len2 = 0, len3, len4;
- int err1 = -1, err2 = -1;
-
- bytes = bpf_map_lookup_elem(&sock_apply_bytes, &zero);
- if (bytes)
- err1 = bpf_msg_apply_bytes(msg, *bytes);
- bytes = bpf_map_lookup_elem(&sock_cork_bytes, &zero);
- if (bytes)
- err2 = bpf_msg_cork_bytes(msg, *bytes);
- len1 = (__u64)msg->data_end - (__u64)msg->data;
- start = bpf_map_lookup_elem(&sock_bytes, &zero);
- end = bpf_map_lookup_elem(&sock_bytes, &one);
- if (start && end) {
- int err;
-
- bpf_printk("sk_msg2: pull(%i:%i)\n",
- start ? *start : 0, end ? *end : 0);
- err = bpf_msg_pull_data(msg, *start, *end, 0);
- if (err)
- bpf_printk("sk_msg2: pull_data err %i\n",
- err);
- len2 = (__u64)msg->data_end - (__u64)msg->data;
- bpf_printk("sk_msg2: length update %i->%i\n",
- len1, len2);
- }
-
- start_push = bpf_map_lookup_elem(&sock_bytes, &two);
- end_push = bpf_map_lookup_elem(&sock_bytes, &three);
- if (start_push && end_push) {
- int err;
-
- bpf_printk("sk_msg2: push(%i:%i)\n",
- start_push ? *start_push : 0,
- end_push ? *end_push : 0);
- err = bpf_msg_push_data(msg, *start_push, *end_push, 0);
- if (err)
- bpf_printk("sk_msg2: push_data err %i\n", err);
- len3 = (__u64)msg->data_end - (__u64)msg->data;
- bpf_printk("sk_msg2: length push_update %i->%i\n",
- len2 ? len2 : len1, len3);
- }
- start_pop = bpf_map_lookup_elem(&sock_bytes, &four);
- pop = bpf_map_lookup_elem(&sock_bytes, &five);
- if (start_pop && pop) {
- int err;
-
- bpf_printk("sk_msg2: pop(%i@%i)\n",
- start_pop, pop);
- err = bpf_msg_pop_data(msg, *start_pop, *pop, 0);
- if (err)
- bpf_printk("sk_msg2: pop_data err %i\n", err);
- len4 = (__u64)msg->data_end - (__u64)msg->data;
- bpf_printk("sk_msg2: length pop_data %i->%i\n",
- len1 ? len1 : 0, len4);
- }
-
- bpf_printk("sk_msg2: data length %i err1 %i err2 %i\n",
- len1, err1, err2);
- return SK_PASS;
-}
-
-SEC("sk_msg3")
int bpf_prog6(struct sk_msg_md *msg)
{
int zero = 0, one = 1, two = 2, three = 3, four = 4, five = 5, key = 0;
@@ -305,86 +277,7 @@ int bpf_prog6(struct sk_msg_md *msg)
#endif
}
-SEC("sk_msg4")
-int bpf_prog7(struct sk_msg_md *msg)
-{
- int *bytes, *start, *end, *start_push, *end_push, *start_pop, *pop, *f;
- int zero = 0, one = 1, two = 2, three = 3, four = 4, five = 5;
- int len1, len2 = 0, len3, len4;
- int err1 = 0, err2 = 0, key = 0;
- __u64 flags = 0;
-
- int err;
- bytes = bpf_map_lookup_elem(&sock_apply_bytes, &zero);
- if (bytes)
- err1 = bpf_msg_apply_bytes(msg, *bytes);
- bytes = bpf_map_lookup_elem(&sock_cork_bytes, &zero);
- if (bytes)
- err2 = bpf_msg_cork_bytes(msg, *bytes);
- len1 = (__u64)msg->data_end - (__u64)msg->data;
-
- start = bpf_map_lookup_elem(&sock_bytes, &zero);
- end = bpf_map_lookup_elem(&sock_bytes, &one);
- if (start && end) {
- bpf_printk("sk_msg2: pull(%i:%i)\n",
- start ? *start : 0, end ? *end : 0);
- err = bpf_msg_pull_data(msg, *start, *end, 0);
- if (err)
- bpf_printk("sk_msg2: pull_data err %i\n",
- err);
- len2 = (__u64)msg->data_end - (__u64)msg->data;
- bpf_printk("sk_msg2: length update %i->%i\n",
- len1, len2);
- }
-
- start_push = bpf_map_lookup_elem(&sock_bytes, &two);
- end_push = bpf_map_lookup_elem(&sock_bytes, &three);
- if (start_push && end_push) {
- bpf_printk("sk_msg4: push(%i:%i)\n",
- start_push ? *start_push : 0,
- end_push ? *end_push : 0);
- err = bpf_msg_push_data(msg, *start_push, *end_push, 0);
- if (err)
- bpf_printk("sk_msg4: push_data err %i\n",
- err);
- len3 = (__u64)msg->data_end - (__u64)msg->data;
- bpf_printk("sk_msg4: length push_update %i->%i\n",
- len2 ? len2 : len1, len3);
- }
-
- start_pop = bpf_map_lookup_elem(&sock_bytes, &four);
- pop = bpf_map_lookup_elem(&sock_bytes, &five);
- if (start_pop && pop) {
- int err;
-
- bpf_printk("sk_msg4: pop(%i@%i)\n",
- start_pop, pop);
- err = bpf_msg_pop_data(msg, *start_pop, *pop, 0);
- if (err)
- bpf_printk("sk_msg4: pop_data err %i\n", err);
- len4 = (__u64)msg->data_end - (__u64)msg->data;
- bpf_printk("sk_msg4: length pop_data %i->%i\n",
- len1 ? len1 : 0, len4);
- }
-
-
- f = bpf_map_lookup_elem(&sock_redir_flags, &zero);
- if (f && *f) {
- key = 2;
- flags = *f;
- }
- bpf_printk("sk_msg3: redirect(%iB) flags=%i err=%i\n",
- len1, flags, err1 ? err1 : err2);
-#ifdef SOCKMAP
- err = bpf_msg_redirect_map(msg, &sock_map_redir, key, flags);
-#else
- err = bpf_msg_redirect_hash(msg, &sock_map_redir, &key, flags);
-#endif
- bpf_printk("sk_msg3: err %i\n", err);
- return err;
-}
-
-SEC("sk_msg5")
+SEC("sk_msg3")
int bpf_prog8(struct sk_msg_md *msg)
{
void *data_end = (void *)(long) msg->data_end;
@@ -401,7 +294,7 @@ int bpf_prog8(struct sk_msg_md *msg)
}
return SK_PASS;
}
-SEC("sk_msg6")
+SEC("sk_msg4")
int bpf_prog9(struct sk_msg_md *msg)
{
void *data_end = (void *)(long) msg->data_end;
@@ -419,7 +312,7 @@ int bpf_prog9(struct sk_msg_md *msg)
return SK_PASS;
}
-SEC("sk_msg7")
+SEC("sk_msg5")
int bpf_prog10(struct sk_msg_md *msg)
{
int *bytes, *start, *end, *start_push, *end_push, *start_pop, *pop;
@@ -443,7 +336,6 @@ int bpf_prog10(struct sk_msg_md *msg)
pop = bpf_map_lookup_elem(&sock_bytes, &five);
if (start_pop && pop)
bpf_msg_pop_data(msg, *start_pop, *pop, 0);
- bpf_printk("return sk drop\n");
return SK_DROP;
}
diff --git a/tools/testing/selftests/bpf/progs/test_sockmap_listen.c b/tools/testing/selftests/bpf/progs/test_sockmap_listen.c
new file mode 100644
index 000000000000..a3a366c57ce1
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_sockmap_listen.c
@@ -0,0 +1,98 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2020 Cloudflare
+
+#include <errno.h>
+#include <stdbool.h>
+#include <linux/bpf.h>
+
+#include <bpf/bpf_helpers.h>
+
+struct {
+ __uint(type, BPF_MAP_TYPE_SOCKMAP);
+ __uint(max_entries, 2);
+ __type(key, __u32);
+ __type(value, __u64);
+} sock_map SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_SOCKHASH);
+ __uint(max_entries, 2);
+ __type(key, __u32);
+ __type(value, __u64);
+} sock_hash SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(max_entries, 2);
+ __type(key, int);
+ __type(value, unsigned int);
+} verdict_map SEC(".maps");
+
+static volatile bool test_sockmap; /* toggled by user-space */
+
+SEC("sk_skb/stream_parser")
+int prog_skb_parser(struct __sk_buff *skb)
+{
+ return skb->len;
+}
+
+SEC("sk_skb/stream_verdict")
+int prog_skb_verdict(struct __sk_buff *skb)
+{
+ unsigned int *count;
+ __u32 zero = 0;
+ int verdict;
+
+ if (test_sockmap)
+ verdict = bpf_sk_redirect_map(skb, &sock_map, zero, 0);
+ else
+ verdict = bpf_sk_redirect_hash(skb, &sock_hash, &zero, 0);
+
+ count = bpf_map_lookup_elem(&verdict_map, &verdict);
+ if (count)
+ (*count)++;
+
+ return verdict;
+}
+
+SEC("sk_msg")
+int prog_msg_verdict(struct sk_msg_md *msg)
+{
+ unsigned int *count;
+ __u32 zero = 0;
+ int verdict;
+
+ if (test_sockmap)
+ verdict = bpf_msg_redirect_map(msg, &sock_map, zero, 0);
+ else
+ verdict = bpf_msg_redirect_hash(msg, &sock_hash, &zero, 0);
+
+ count = bpf_map_lookup_elem(&verdict_map, &verdict);
+ if (count)
+ (*count)++;
+
+ return verdict;
+}
+
+SEC("sk_reuseport")
+int prog_reuseport(struct sk_reuseport_md *reuse)
+{
+ unsigned int *count;
+ int err, verdict;
+ __u32 zero = 0;
+
+ if (test_sockmap)
+ err = bpf_sk_select_reuseport(reuse, &sock_map, &zero, 0);
+ else
+ err = bpf_sk_select_reuseport(reuse, &sock_hash, &zero, 0);
+ verdict = err ? SK_DROP : SK_PASS;
+
+ count = bpf_map_lookup_elem(&verdict_map, &verdict);
+ if (count)
+ (*count)++;
+
+ return verdict;
+}
+
+int _version SEC("version") = 1;
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_sysctl_prog.c b/tools/testing/selftests/bpf/progs/test_sysctl_prog.c
index 2d0b0b82a78a..50525235380e 100644
--- a/tools/testing/selftests/bpf/progs/test_sysctl_prog.c
+++ b/tools/testing/selftests/bpf/progs/test_sysctl_prog.c
@@ -45,7 +45,7 @@ int sysctl_tcp_mem(struct bpf_sysctl *ctx)
unsigned long tcp_mem[3] = {0, 0, 0};
char value[MAX_VALUE_STR_LEN];
unsigned char i, off = 0;
- int ret;
+ volatile int ret;
if (ctx->write)
return 0;
diff --git a/tools/testing/selftests/bpf/progs/test_trampoline_count.c b/tools/testing/selftests/bpf/progs/test_trampoline_count.c
index e51e6e3a81c2..f030e469d05b 100644
--- a/tools/testing/selftests/bpf/progs/test_trampoline_count.c
+++ b/tools/testing/selftests/bpf/progs/test_trampoline_count.c
@@ -2,7 +2,8 @@
#include <stdbool.h>
#include <stddef.h>
#include <linux/bpf.h>
-#include "bpf_trace_helpers.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
struct task_struct;
diff --git a/tools/testing/selftests/bpf/progs/test_vmlinux.c b/tools/testing/selftests/bpf/progs/test_vmlinux.c
new file mode 100644
index 000000000000..5611b564d3b1
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_vmlinux.c
@@ -0,0 +1,84 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+
+#include "vmlinux.h"
+#include <asm/unistd.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_core_read.h>
+
+#define MY_TV_NSEC 1337
+
+bool tp_called = false;
+bool raw_tp_called = false;
+bool tp_btf_called = false;
+bool kprobe_called = false;
+bool fentry_called = false;
+
+SEC("tp/syscalls/sys_enter_nanosleep")
+int handle__tp(struct trace_event_raw_sys_enter *args)
+{
+ struct __kernel_timespec *ts;
+
+ if (args->id != __NR_nanosleep)
+ return 0;
+
+ ts = (void *)args->args[0];
+ if (BPF_CORE_READ(ts, tv_nsec) != MY_TV_NSEC)
+ return 0;
+
+ tp_called = true;
+ return 0;
+}
+
+SEC("raw_tp/sys_enter")
+int BPF_PROG(handle__raw_tp, struct pt_regs *regs, long id)
+{
+ struct __kernel_timespec *ts;
+
+ if (id != __NR_nanosleep)
+ return 0;
+
+ ts = (void *)PT_REGS_PARM1_CORE(regs);
+ if (BPF_CORE_READ(ts, tv_nsec) != MY_TV_NSEC)
+ return 0;
+
+ raw_tp_called = true;
+ return 0;
+}
+
+SEC("tp_btf/sys_enter")
+int BPF_PROG(handle__tp_btf, struct pt_regs *regs, long id)
+{
+ struct __kernel_timespec *ts;
+
+ if (id != __NR_nanosleep)
+ return 0;
+
+ ts = (void *)PT_REGS_PARM1_CORE(regs);
+ if (BPF_CORE_READ(ts, tv_nsec) != MY_TV_NSEC)
+ return 0;
+
+ tp_btf_called = true;
+ return 0;
+}
+
+SEC("kprobe/hrtimer_nanosleep")
+int BPF_KPROBE(handle__kprobe,
+ ktime_t rqtp, enum hrtimer_mode mode, clockid_t clockid)
+{
+ if (rqtp == MY_TV_NSEC)
+ kprobe_called = true;
+ return 0;
+}
+
+SEC("fentry/hrtimer_nanosleep")
+int BPF_PROG(handle__fentry,
+ ktime_t rqtp, enum hrtimer_mode mode, clockid_t clockid)
+{
+ if (rqtp == MY_TV_NSEC)
+ fentry_called = true;
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_xdp_adjust_tail_grow.c b/tools/testing/selftests/bpf/progs/test_xdp_adjust_tail_grow.c
new file mode 100644
index 000000000000..3d66599eee2e
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_xdp_adjust_tail_grow.c
@@ -0,0 +1,33 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+
+SEC("xdp_adjust_tail_grow")
+int _xdp_adjust_tail_grow(struct xdp_md *xdp)
+{
+ void *data_end = (void *)(long)xdp->data_end;
+ void *data = (void *)(long)xdp->data;
+ unsigned int data_len;
+ int offset = 0;
+
+ /* Data length determine test case */
+ data_len = data_end - data;
+
+ if (data_len == 54) { /* sizeof(pkt_v4) */
+ offset = 4096; /* test too large offset */
+ } else if (data_len == 74) { /* sizeof(pkt_v6) */
+ offset = 40;
+ } else if (data_len == 64) {
+ offset = 128;
+ } else if (data_len == 128) {
+ offset = 4096 - 256 - 320 - data_len; /* Max tail grow 3520 */
+ } else {
+ return XDP_ABORTED; /* No matching test */
+ }
+
+ if (bpf_xdp_adjust_tail(xdp, offset))
+ return XDP_DROP;
+ return XDP_TX;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_adjust_tail.c b/tools/testing/selftests/bpf/progs/test_xdp_adjust_tail_shrink.c
index b7fc85769bdc..22065a9cfb25 100644
--- a/tools/testing/selftests/bpf/progs/test_adjust_tail.c
+++ b/tools/testing/selftests/bpf/progs/test_xdp_adjust_tail_shrink.c
@@ -1,5 +1,5 @@
-/* SPDX-License-Identifier: GPL-2.0
- * Copyright (c) 2018 Facebook
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2018 Facebook
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of version 2 of the GNU General Public
@@ -11,15 +11,15 @@
int _version SEC("version") = 1;
-SEC("xdp_adjust_tail")
-int _xdp_adjust_tail(struct xdp_md *xdp)
+SEC("xdp_adjust_tail_shrink")
+int _xdp_adjust_tail_shrink(struct xdp_md *xdp)
{
void *data_end = (void *)(long)xdp->data_end;
void *data = (void *)(long)xdp->data;
int offset = 0;
- if (data_end - data == 54)
- offset = 256;
+ if (data_end - data == 54) /* sizeof(pkt_v4) */
+ offset = 256; /* shrink too much */
else
offset = 20;
if (bpf_xdp_adjust_tail(xdp, 0 - offset))
diff --git a/tools/testing/selftests/bpf/progs/test_xdp_bpf2bpf.c b/tools/testing/selftests/bpf/progs/test_xdp_bpf2bpf.c
index cb8a04ab7a78..a038e827f850 100644
--- a/tools/testing/selftests/bpf/progs/test_xdp_bpf2bpf.c
+++ b/tools/testing/selftests/bpf/progs/test_xdp_bpf2bpf.c
@@ -1,7 +1,9 @@
// SPDX-License-Identifier: GPL-2.0
#include <linux/bpf.h>
+#include <bpf/bpf_tracing.h>
#include <bpf/bpf_helpers.h>
-#include "bpf_trace_helpers.h"
+
+char _license[] SEC("license") = "GPL";
struct net_device {
/* Structure does not need to contain all entries,
@@ -27,16 +29,38 @@ struct xdp_buff {
struct xdp_rxq_info *rxq;
} __attribute__((preserve_access_index));
+struct meta {
+ int ifindex;
+ int pkt_len;
+};
+
+struct {
+ __uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY);
+ __uint(key_size, sizeof(int));
+ __uint(value_size, sizeof(int));
+} perf_buf_map SEC(".maps");
+
__u64 test_result_fentry = 0;
-SEC("fentry/_xdp_tx_iptunnel")
+SEC("fentry/FUNC")
int BPF_PROG(trace_on_entry, struct xdp_buff *xdp)
{
+ struct meta meta;
+ void *data_end = (void *)(long)xdp->data_end;
+ void *data = (void *)(long)xdp->data;
+
+ meta.ifindex = xdp->rxq->dev->ifindex;
+ meta.pkt_len = data_end - data;
+ bpf_xdp_output(xdp, &perf_buf_map,
+ ((__u64) meta.pkt_len << 32) |
+ BPF_F_CURRENT_CPU,
+ &meta, sizeof(meta));
+
test_result_fentry = xdp->rxq->dev->ifindex;
return 0;
}
__u64 test_result_fexit = 0;
-SEC("fexit/_xdp_tx_iptunnel")
+SEC("fexit/FUNC")
int BPF_PROG(trace_on_exit, struct xdp_buff *xdp, int ret)
{
test_result_fexit = ret;
diff --git a/tools/testing/selftests/bpf/progs/test_xdp_devmap_helpers.c b/tools/testing/selftests/bpf/progs/test_xdp_devmap_helpers.c
new file mode 100644
index 000000000000..e5c0f131c8a7
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_xdp_devmap_helpers.c
@@ -0,0 +1,22 @@
+// SPDX-License-Identifier: GPL-2.0
+/* fails to load without expected_attach_type = BPF_XDP_DEVMAP
+ * because of access to egress_ifindex
+ */
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+
+SEC("xdp_dm_log")
+int xdpdm_devlog(struct xdp_md *ctx)
+{
+ char fmt[] = "devmap redirect: dev %u -> dev %u len %u\n";
+ void *data_end = (void *)(long)ctx->data_end;
+ void *data = (void *)(long)ctx->data;
+ unsigned int len = data_end - data;
+
+ bpf_trace_printk(fmt, sizeof(fmt),
+ ctx->ingress_ifindex, ctx->egress_ifindex, len);
+
+ return XDP_PASS;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_xdp_with_devmap_helpers.c b/tools/testing/selftests/bpf/progs/test_xdp_with_devmap_helpers.c
new file mode 100644
index 000000000000..deef0e050863
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_xdp_with_devmap_helpers.c
@@ -0,0 +1,44 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+
+struct {
+ __uint(type, BPF_MAP_TYPE_DEVMAP);
+ __uint(key_size, sizeof(__u32));
+ __uint(value_size, sizeof(struct bpf_devmap_val));
+ __uint(max_entries, 4);
+} dm_ports SEC(".maps");
+
+SEC("xdp_redir")
+int xdp_redir_prog(struct xdp_md *ctx)
+{
+ return bpf_redirect_map(&dm_ports, 1, 0);
+}
+
+/* invalid program on DEVMAP entry;
+ * SEC name means expected attach type not set
+ */
+SEC("xdp_dummy")
+int xdp_dummy_prog(struct xdp_md *ctx)
+{
+ return XDP_PASS;
+}
+
+/* valid program on DEVMAP entry via SEC name;
+ * has access to egress and ingress ifindex
+ */
+SEC("xdp_devmap")
+int xdp_dummy_dm(struct xdp_md *ctx)
+{
+ char fmt[] = "devmap redirect: dev %u -> dev %u len %u\n";
+ void *data_end = (void *)(long)ctx->data_end;
+ void *data = (void *)(long)ctx->data;
+ unsigned int len = data_end - data;
+
+ bpf_trace_printk(fmt, sizeof(fmt),
+ ctx->ingress_ifindex, ctx->egress_ifindex, len);
+
+ return XDP_PASS;
+}
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/trigger_bench.c b/tools/testing/selftests/bpf/progs/trigger_bench.c
new file mode 100644
index 000000000000..8b36b6640e7e
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/trigger_bench.c
@@ -0,0 +1,47 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2020 Facebook
+
+#include <linux/bpf.h>
+#include <asm/unistd.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+char _license[] SEC("license") = "GPL";
+
+long hits = 0;
+
+SEC("tp/syscalls/sys_enter_getpgid")
+int bench_trigger_tp(void *ctx)
+{
+ __sync_add_and_fetch(&hits, 1);
+ return 0;
+}
+
+SEC("raw_tp/sys_enter")
+int BPF_PROG(bench_trigger_raw_tp, struct pt_regs *regs, long id)
+{
+ if (id == __NR_getpgid)
+ __sync_add_and_fetch(&hits, 1);
+ return 0;
+}
+
+SEC("kprobe/__x64_sys_getpgid")
+int bench_trigger_kprobe(void *ctx)
+{
+ __sync_add_and_fetch(&hits, 1);
+ return 0;
+}
+
+SEC("fentry/__x64_sys_getpgid")
+int bench_trigger_fentry(void *ctx)
+{
+ __sync_add_and_fetch(&hits, 1);
+ return 0;
+}
+
+SEC("fmod_ret/__x64_sys_getpgid")
+int bench_trigger_fmodret(void *ctx)
+{
+ __sync_add_and_fetch(&hits, 1);
+ return -22;
+}
diff --git a/tools/testing/selftests/bpf/test_bpftool.py b/tools/testing/selftests/bpf/test_bpftool.py
new file mode 100644
index 000000000000..4fed2dc25c0a
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_bpftool.py
@@ -0,0 +1,178 @@
+# SPDX-License-Identifier: GPL-2.0
+# Copyright (c) 2020 SUSE LLC.
+
+import collections
+import functools
+import json
+import os
+import socket
+import subprocess
+import unittest
+
+
+# Add the source tree of bpftool and /usr/local/sbin to PATH
+cur_dir = os.path.dirname(os.path.realpath(__file__))
+bpftool_dir = os.path.abspath(os.path.join(cur_dir, "..", "..", "..", "..",
+ "tools", "bpf", "bpftool"))
+os.environ["PATH"] = bpftool_dir + ":/usr/local/sbin:" + os.environ["PATH"]
+
+
+class IfaceNotFoundError(Exception):
+ pass
+
+
+class UnprivilegedUserError(Exception):
+ pass
+
+
+def _bpftool(args, json=True):
+ _args = ["bpftool"]
+ if json:
+ _args.append("-j")
+ _args.extend(args)
+
+ return subprocess.check_output(_args)
+
+
+def bpftool(args):
+ return _bpftool(args, json=False).decode("utf-8")
+
+
+def bpftool_json(args):
+ res = _bpftool(args)
+ return json.loads(res)
+
+
+def get_default_iface():
+ for iface in socket.if_nameindex():
+ if iface[1] != "lo":
+ return iface[1]
+ raise IfaceNotFoundError("Could not find any network interface to probe")
+
+
+def default_iface(f):
+ @functools.wraps(f)
+ def wrapper(*args, **kwargs):
+ iface = get_default_iface()
+ return f(*args, iface, **kwargs)
+ return wrapper
+
+
+class TestBpftool(unittest.TestCase):
+ @classmethod
+ def setUpClass(cls):
+ if os.getuid() != 0:
+ raise UnprivilegedUserError(
+ "This test suite needs root privileges")
+
+ @default_iface
+ def test_feature_dev_json(self, iface):
+ unexpected_helpers = [
+ "bpf_probe_write_user",
+ "bpf_trace_printk",
+ ]
+ expected_keys = [
+ "syscall_config",
+ "program_types",
+ "map_types",
+ "helpers",
+ "misc",
+ ]
+
+ res = bpftool_json(["feature", "probe", "dev", iface])
+ # Check if the result has all expected keys.
+ self.assertCountEqual(res.keys(), expected_keys)
+ # Check if unexpected helpers are not included in helpers probes
+ # result.
+ for helpers in res["helpers"].values():
+ for unexpected_helper in unexpected_helpers:
+ self.assertNotIn(unexpected_helper, helpers)
+
+ def test_feature_kernel(self):
+ test_cases = [
+ bpftool_json(["feature", "probe", "kernel"]),
+ bpftool_json(["feature", "probe"]),
+ bpftool_json(["feature"]),
+ ]
+ unexpected_helpers = [
+ "bpf_probe_write_user",
+ "bpf_trace_printk",
+ ]
+ expected_keys = [
+ "syscall_config",
+ "system_config",
+ "program_types",
+ "map_types",
+ "helpers",
+ "misc",
+ ]
+
+ for tc in test_cases:
+ # Check if the result has all expected keys.
+ self.assertCountEqual(tc.keys(), expected_keys)
+ # Check if unexpected helpers are not included in helpers probes
+ # result.
+ for helpers in tc["helpers"].values():
+ for unexpected_helper in unexpected_helpers:
+ self.assertNotIn(unexpected_helper, helpers)
+
+ def test_feature_kernel_full(self):
+ test_cases = [
+ bpftool_json(["feature", "probe", "kernel", "full"]),
+ bpftool_json(["feature", "probe", "full"]),
+ ]
+ expected_helpers = [
+ "bpf_probe_write_user",
+ "bpf_trace_printk",
+ ]
+
+ for tc in test_cases:
+ # Check if expected helpers are included at least once in any
+ # helpers list for any program type. Unfortunately we cannot assume
+ # that they will be included in all program types or a specific
+ # subset of programs. It depends on the kernel version and
+ # configuration.
+ found_helpers = False
+
+ for helpers in tc["helpers"].values():
+ if all(expected_helper in helpers
+ for expected_helper in expected_helpers):
+ found_helpers = True
+ break
+
+ self.assertTrue(found_helpers)
+
+ def test_feature_kernel_full_vs_not_full(self):
+ full_res = bpftool_json(["feature", "probe", "full"])
+ not_full_res = bpftool_json(["feature", "probe"])
+ not_full_set = set()
+ full_set = set()
+
+ for helpers in full_res["helpers"].values():
+ for helper in helpers:
+ full_set.add(helper)
+
+ for helpers in not_full_res["helpers"].values():
+ for helper in helpers:
+ not_full_set.add(helper)
+
+ self.assertCountEqual(full_set - not_full_set,
+ {"bpf_probe_write_user", "bpf_trace_printk"})
+ self.assertCountEqual(not_full_set - full_set, set())
+
+ def test_feature_macros(self):
+ expected_patterns = [
+ r"/\*\*\* System call availability \*\*\*/",
+ r"#define HAVE_BPF_SYSCALL",
+ r"/\*\*\* eBPF program types \*\*\*/",
+ r"#define HAVE.*PROG_TYPE",
+ r"/\*\*\* eBPF map types \*\*\*/",
+ r"#define HAVE.*MAP_TYPE",
+ r"/\*\*\* eBPF helper functions \*\*\*/",
+ r"#define HAVE.*HELPER",
+ r"/\*\*\* eBPF misc features \*\*\*/",
+ ]
+
+ res = bpftool(["feature", "probe", "macros"])
+ for pattern in expected_patterns:
+ self.assertRegex(res, pattern)
diff --git a/tools/testing/selftests/bpf/test_bpftool.sh b/tools/testing/selftests/bpf/test_bpftool.sh
new file mode 100755
index 000000000000..66690778e36d
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_bpftool.sh
@@ -0,0 +1,5 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+# Copyright (c) 2020 SUSE LLC.
+
+python3 -m unittest -v test_bpftool.TestBpftool
diff --git a/tools/testing/selftests/bpf/test_btf.c b/tools/testing/selftests/bpf/test_btf.c
index 8da77cda5f4a..305fae8f80a9 100644
--- a/tools/testing/selftests/bpf/test_btf.c
+++ b/tools/testing/selftests/bpf/test_btf.c
@@ -2854,7 +2854,7 @@ static struct btf_raw_test raw_tests[] = {
.value_type_id = 1,
.max_entries = 4,
.btf_load_err = true,
- .err_str = "vlen != 0",
+ .err_str = "Invalid func linkage",
},
{
diff --git a/tools/testing/selftests/bpf/test_current_pid_tgid_new_ns.c b/tools/testing/selftests/bpf/test_current_pid_tgid_new_ns.c
new file mode 100644
index 000000000000..ed253f252cd0
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_current_pid_tgid_new_ns.c
@@ -0,0 +1,159 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Carlos Neira cneirabustos@gmail.com */
+#define _GNU_SOURCE
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <unistd.h>
+#include <sys/syscall.h>
+#include <sched.h>
+#include <sys/wait.h>
+#include <sys/mount.h>
+#include "test_progs.h"
+
+#define CHECK_NEWNS(condition, tag, format...) ({ \
+ int __ret = !!(condition); \
+ if (__ret) { \
+ printf("%s:FAIL:%s ", __func__, tag); \
+ printf(format); \
+ } else { \
+ printf("%s:PASS:%s\n", __func__, tag); \
+ } \
+ __ret; \
+})
+
+struct bss {
+ __u64 dev;
+ __u64 ino;
+ __u64 pid_tgid;
+ __u64 user_pid_tgid;
+};
+
+int main(int argc, char **argv)
+{
+ pid_t pid;
+ int exit_code = 1;
+ struct stat st;
+
+ printf("Testing bpf_get_ns_current_pid_tgid helper in new ns\n");
+
+ if (stat("/proc/self/ns/pid", &st)) {
+ perror("stat failed on /proc/self/ns/pid ns\n");
+ printf("%s:FAILED\n", argv[0]);
+ return exit_code;
+ }
+
+ if (CHECK_NEWNS(unshare(CLONE_NEWPID | CLONE_NEWNS),
+ "unshare CLONE_NEWPID | CLONE_NEWNS", "error errno=%d\n", errno))
+ return exit_code;
+
+ pid = fork();
+ if (pid == -1) {
+ perror("Fork() failed\n");
+ printf("%s:FAILED\n", argv[0]);
+ return exit_code;
+ }
+
+ if (pid > 0) {
+ int status;
+
+ usleep(5);
+ waitpid(pid, &status, 0);
+ return 0;
+ } else {
+
+ pid = fork();
+ if (pid == -1) {
+ perror("Fork() failed\n");
+ printf("%s:FAILED\n", argv[0]);
+ return exit_code;
+ }
+
+ if (pid > 0) {
+ int status;
+ waitpid(pid, &status, 0);
+ return 0;
+ } else {
+ if (CHECK_NEWNS(mount("none", "/proc", NULL, MS_PRIVATE|MS_REC, NULL),
+ "Unmounting proc", "Cannot umount proc! errno=%d\n", errno))
+ return exit_code;
+
+ if (CHECK_NEWNS(mount("proc", "/proc", "proc", MS_NOSUID|MS_NOEXEC|MS_NODEV, NULL),
+ "Mounting proc", "Cannot mount proc! errno=%d\n", errno))
+ return exit_code;
+
+ const char *probe_name = "raw_tracepoint/sys_enter";
+ const char *file = "test_ns_current_pid_tgid.o";
+ struct bpf_link *link = NULL;
+ struct bpf_program *prog;
+ struct bpf_map *bss_map;
+ struct bpf_object *obj;
+ int exit_code = 1;
+ int err, key = 0;
+ struct bss bss;
+ struct stat st;
+ __u64 id;
+
+ obj = bpf_object__open_file(file, NULL);
+ if (CHECK_NEWNS(IS_ERR(obj), "obj_open", "err %ld\n", PTR_ERR(obj)))
+ return exit_code;
+
+ err = bpf_object__load(obj);
+ if (CHECK_NEWNS(err, "obj_load", "err %d errno %d\n", err, errno))
+ goto cleanup;
+
+ bss_map = bpf_object__find_map_by_name(obj, "test_ns_.bss");
+ if (CHECK_NEWNS(!bss_map, "find_bss_map", "failed\n"))
+ goto cleanup;
+
+ prog = bpf_object__find_program_by_title(obj, probe_name);
+ if (CHECK_NEWNS(!prog, "find_prog", "prog '%s' not found\n",
+ probe_name))
+ goto cleanup;
+
+ memset(&bss, 0, sizeof(bss));
+ pid_t tid = syscall(SYS_gettid);
+ pid_t pid = getpid();
+
+ id = (__u64) tid << 32 | pid;
+ bss.user_pid_tgid = id;
+
+ if (CHECK_NEWNS(stat("/proc/self/ns/pid", &st),
+ "stat new ns", "Failed to stat /proc/self/ns/pid errno=%d\n", errno))
+ goto cleanup;
+
+ bss.dev = st.st_dev;
+ bss.ino = st.st_ino;
+
+ err = bpf_map_update_elem(bpf_map__fd(bss_map), &key, &bss, 0);
+ if (CHECK_NEWNS(err, "setting_bss", "failed to set bss : %d\n", err))
+ goto cleanup;
+
+ link = bpf_program__attach_raw_tracepoint(prog, "sys_enter");
+ if (CHECK_NEWNS(IS_ERR(link), "attach_raw_tp", "err %ld\n",
+ PTR_ERR(link))) {
+ link = NULL;
+ goto cleanup;
+ }
+
+ /* trigger some syscalls */
+ usleep(1);
+
+ err = bpf_map_lookup_elem(bpf_map__fd(bss_map), &key, &bss);
+ if (CHECK_NEWNS(err, "set_bss", "failed to get bss : %d\n", err))
+ goto cleanup;
+
+ if (CHECK_NEWNS(id != bss.pid_tgid, "Compare user pid/tgid vs. bpf pid/tgid",
+ "User pid/tgid %llu BPF pid/tgid %llu\n", id, bss.pid_tgid))
+ goto cleanup;
+
+ exit_code = 0;
+ printf("%s:PASS\n", argv[0]);
+cleanup:
+ if (!link) {
+ bpf_link__destroy(link);
+ link = NULL;
+ }
+ bpf_object__close(obj);
+ }
+ }
+}
diff --git a/tools/testing/selftests/bpf/test_maps.c b/tools/testing/selftests/bpf/test_maps.c
index 02eae1e864c2..6a12a0e01e07 100644
--- a/tools/testing/selftests/bpf/test_maps.c
+++ b/tools/testing/selftests/bpf/test_maps.c
@@ -756,11 +756,7 @@ static void test_sockmap(unsigned int tasks, void *data)
/* Test update without programs */
for (i = 0; i < 6; i++) {
err = bpf_map_update_elem(fd, &i, &sfd[i], BPF_ANY);
- if (i < 2 && !err) {
- printf("Allowed update sockmap '%i:%i' not in ESTABLISHED\n",
- i, sfd[i]);
- goto out_sockmap;
- } else if (i >= 2 && err) {
+ if (err) {
printf("Failed noprog update sockmap '%i:%i'\n",
i, sfd[i]);
goto out_sockmap;
@@ -1398,23 +1394,25 @@ static void test_map_rdonly(void)
key = 1;
value = 1234;
- /* Insert key=1 element. */
+ /* Try to insert key=1 element. */
assert(bpf_map_update_elem(fd, &key, &value, BPF_ANY) == -1 &&
errno == EPERM);
- /* Check that key=2 is not found. */
+ /* Check that key=1 is not found. */
assert(bpf_map_lookup_elem(fd, &key, &value) == -1 && errno == ENOENT);
assert(bpf_map_get_next_key(fd, &key, &value) == -1 && errno == ENOENT);
+
+ close(fd);
}
-static void test_map_wronly(void)
+static void test_map_wronly_hash(void)
{
int fd, key = 0, value = 0;
fd = bpf_create_map(BPF_MAP_TYPE_HASH, sizeof(key), sizeof(value),
MAP_SIZE, map_flags | BPF_F_WRONLY);
if (fd < 0) {
- printf("Failed to create map for read only test '%s'!\n",
+ printf("Failed to create map for write only test '%s'!\n",
strerror(errno));
exit(1);
}
@@ -1424,9 +1422,49 @@ static void test_map_wronly(void)
/* Insert key=1 element. */
assert(bpf_map_update_elem(fd, &key, &value, BPF_ANY) == 0);
- /* Check that key=2 is not found. */
+ /* Check that reading elements and keys from the map is not allowed. */
assert(bpf_map_lookup_elem(fd, &key, &value) == -1 && errno == EPERM);
assert(bpf_map_get_next_key(fd, &key, &value) == -1 && errno == EPERM);
+
+ close(fd);
+}
+
+static void test_map_wronly_stack_or_queue(enum bpf_map_type map_type)
+{
+ int fd, value = 0;
+
+ assert(map_type == BPF_MAP_TYPE_QUEUE ||
+ map_type == BPF_MAP_TYPE_STACK);
+ fd = bpf_create_map(map_type, 0, sizeof(value), MAP_SIZE,
+ map_flags | BPF_F_WRONLY);
+ /* Stack/Queue maps do not support BPF_F_NO_PREALLOC */
+ if (map_flags & BPF_F_NO_PREALLOC) {
+ assert(fd < 0 && errno == EINVAL);
+ return;
+ }
+ if (fd < 0) {
+ printf("Failed to create map '%s'!\n", strerror(errno));
+ exit(1);
+ }
+
+ value = 1234;
+ assert(bpf_map_update_elem(fd, NULL, &value, BPF_ANY) == 0);
+
+ /* Peek element should fail */
+ assert(bpf_map_lookup_elem(fd, NULL, &value) == -1 && errno == EPERM);
+
+ /* Pop element should fail */
+ assert(bpf_map_lookup_and_delete_elem(fd, NULL, &value) == -1 &&
+ errno == EPERM);
+
+ close(fd);
+}
+
+static void test_map_wronly(void)
+{
+ test_map_wronly_hash();
+ test_map_wronly_stack_or_queue(BPF_MAP_TYPE_STACK);
+ test_map_wronly_stack_or_queue(BPF_MAP_TYPE_QUEUE);
}
static void prepare_reuseport_grp(int type, int map_fd, size_t map_elem_size,
diff --git a/tools/testing/selftests/bpf/test_progs.c b/tools/testing/selftests/bpf/test_progs.c
index bab1e6f1d8f1..54fa5fa688ce 100644
--- a/tools/testing/selftests/bpf/test_progs.c
+++ b/tools/testing/selftests/bpf/test_progs.c
@@ -1,11 +1,16 @@
// SPDX-License-Identifier: GPL-2.0-only
/* Copyright (c) 2017 Facebook
*/
+#define _GNU_SOURCE
#include "test_progs.h"
#include "cgroup_helpers.h"
#include "bpf_rlimit.h"
#include <argp.h>
+#include <pthread.h>
+#include <sched.h>
+#include <signal.h>
#include <string.h>
+#include <execinfo.h> /* backtrace */
/* defined in test_progs.h */
struct test_env env = {};
@@ -27,6 +32,20 @@ struct prog_test_def {
int old_error_cnt;
};
+/* Override C runtime library's usleep() implementation to ensure nanosleep()
+ * is always called. Usleep is frequently used in selftests as a way to
+ * trigger kprobe and tracepoints.
+ */
+int usleep(useconds_t usec)
+{
+ struct timespec ts = {
+ .tv_sec = usec / 1000000,
+ .tv_nsec = (usec % 1000000) * 1000,
+ };
+
+ return syscall(__NR_nanosleep, &ts, NULL);
+}
+
static bool should_run(struct test_selector *sel, int num, const char *name)
{
int i;
@@ -74,6 +93,34 @@ static void skip_account(void)
}
}
+static void stdio_restore(void);
+
+/* A bunch of tests set custom affinity per-thread and/or per-process. Reset
+ * it after each test/sub-test.
+ */
+static void reset_affinity() {
+
+ cpu_set_t cpuset;
+ int i, err;
+
+ CPU_ZERO(&cpuset);
+ for (i = 0; i < env.nr_cpus; i++)
+ CPU_SET(i, &cpuset);
+
+ err = sched_setaffinity(0, sizeof(cpuset), &cpuset);
+ if (err < 0) {
+ stdio_restore();
+ fprintf(stderr, "Failed to reset process affinity: %d!\n", err);
+ exit(-1);
+ }
+ err = pthread_setaffinity_np(pthread_self(), sizeof(cpuset), &cpuset);
+ if (err < 0) {
+ stdio_restore();
+ fprintf(stderr, "Failed to reset thread affinity: %d!\n", err);
+ exit(-1);
+ }
+}
+
void test__end_subtest()
{
struct prog_test_def *test = env.test;
@@ -91,6 +138,8 @@ void test__end_subtest()
test->test_num, test->subtest_num,
test->subtest_name, sub_error_cnt ? "FAIL" : "OK");
+ reset_affinity();
+
free(test->subtest_name);
test->subtest_name = NULL;
}
@@ -173,30 +222,13 @@ int test__join_cgroup(const char *path)
return fd;
}
-struct ipv4_packet pkt_v4 = {
- .eth.h_proto = __bpf_constant_htons(ETH_P_IP),
- .iph.ihl = 5,
- .iph.protocol = IPPROTO_TCP,
- .iph.tot_len = __bpf_constant_htons(MAGIC_BYTES),
- .tcp.urg_ptr = 123,
- .tcp.doff = 5,
-};
-
-struct ipv6_packet pkt_v6 = {
- .eth.h_proto = __bpf_constant_htons(ETH_P_IPV6),
- .iph.nexthdr = IPPROTO_TCP,
- .iph.payload_len = __bpf_constant_htons(MAGIC_BYTES),
- .tcp.urg_ptr = 123,
- .tcp.doff = 5,
-};
-
int bpf_find_map(const char *test, struct bpf_object *obj, const char *name)
{
struct bpf_map *map;
map = bpf_object__find_map_by_name(obj, name);
if (!map) {
- printf("%s:FAIL:map '%s' not found\n", test, name);
+ fprintf(stdout, "%s:FAIL:map '%s' not found\n", test, name);
test__fail();
return -1;
}
@@ -302,25 +334,13 @@ int extract_build_id(char *build_id, size_t size)
len = size;
memcpy(build_id, line, len);
build_id[len] = '\0';
+ free(line);
return 0;
err:
fclose(fp);
return -1;
}
-void *spin_lock_thread(void *arg)
-{
- __u32 duration, retval;
- int err, prog_fd = *(u32 *) arg;
-
- err = bpf_prog_test_run(prog_fd, 10000, &pkt_v4, sizeof(pkt_v4),
- NULL, NULL, &retval, &duration);
- CHECK(err || retval, "",
- "err %d errno %d retval %d duration %d\n",
- err, errno, retval, duration);
- pthread_exit(arg);
-}
-
/* extern declarations for test funcs */
#define DEFINE_TEST(name) extern void test_##name(void);
#include <prog_tests/tests.h>
@@ -367,10 +387,22 @@ static int libbpf_print_fn(enum libbpf_print_level level,
{
if (env.verbosity < VERBOSE_VERY && level == LIBBPF_DEBUG)
return 0;
- vprintf(format, args);
+ vfprintf(stdout, format, args);
return 0;
}
+static void free_str_set(const struct str_set *set)
+{
+ int i;
+
+ if (!set)
+ return;
+
+ for (i = 0; i < set->cnt; i++)
+ free((void *)set->strs[i]);
+ free(set->strs);
+}
+
static int parse_str_list(const char *s, struct str_set *set)
{
char *input, *state = NULL, *next, **tmp, **strs = NULL;
@@ -406,66 +438,6 @@ err:
return -ENOMEM;
}
-int parse_num_list(const char *s, struct test_selector *sel)
-{
- int i, set_len = 0, num, start = 0, end = -1;
- bool *set = NULL, *tmp, parsing_end = false;
- char *next;
-
- while (s[0]) {
- errno = 0;
- num = strtol(s, &next, 10);
- if (errno)
- return -errno;
-
- if (parsing_end)
- end = num;
- else
- start = num;
-
- if (!parsing_end && *next == '-') {
- s = next + 1;
- parsing_end = true;
- continue;
- } else if (*next == ',') {
- parsing_end = false;
- s = next + 1;
- end = num;
- } else if (*next == '\0') {
- parsing_end = false;
- s = next;
- end = num;
- } else {
- return -EINVAL;
- }
-
- if (start > end)
- return -EINVAL;
-
- if (end + 1 > set_len) {
- set_len = end + 1;
- tmp = realloc(set, set_len);
- if (!tmp) {
- free(set);
- return -ENOMEM;
- }
- set = tmp;
- }
- for (i = start; i <= end; i++) {
- set[i] = true;
- }
-
- }
-
- if (!set)
- return -EINVAL;
-
- sel->num_set = set;
- sel->num_set_len = set_len;
-
- return 0;
-}
-
extern int extra_prog_load_log_flags;
static error_t parse_arg(int key, char *arg, struct argp_state *state)
@@ -479,13 +451,15 @@ static error_t parse_arg(int key, char *arg, struct argp_state *state)
if (subtest_str) {
*subtest_str = '\0';
if (parse_num_list(subtest_str + 1,
- &env->subtest_selector)) {
+ &env->subtest_selector.num_set,
+ &env->subtest_selector.num_set_len)) {
fprintf(stderr,
"Failed to parse subtest numbers.\n");
return -EINVAL;
}
}
- if (parse_num_list(arg, &env->test_selector)) {
+ if (parse_num_list(arg, &env->test_selector.num_set,
+ &env->test_selector.num_set_len)) {
fprintf(stderr, "Failed to parse test numbers.\n");
return -EINVAL;
}
@@ -613,10 +587,27 @@ int cd_flavor_subdir(const char *exec_name)
if (!flavor)
return 0;
flavor++;
- printf("Switching to flavor '%s' subdirectory...\n", flavor);
+ fprintf(stdout, "Switching to flavor '%s' subdirectory...\n", flavor);
return chdir(flavor);
}
+#define MAX_BACKTRACE_SZ 128
+void crash_handler(int signum)
+{
+ void *bt[MAX_BACKTRACE_SZ];
+ size_t sz;
+
+ sz = backtrace(bt, ARRAY_SIZE(bt));
+
+ if (env.test)
+ dump_test_log(env.test, true);
+ if (env.stdout)
+ stdio_restore();
+
+ fprintf(stderr, "Caught signal #%d!\nStack trace:\n", signum);
+ backtrace_symbols_fd(bt, sz, STDERR_FILENO);
+}
+
int main(int argc, char **argv)
{
static const struct argp argp = {
@@ -624,8 +615,14 @@ int main(int argc, char **argv)
.parser = parse_arg,
.doc = argp_program_doc,
};
+ struct sigaction sigact = {
+ .sa_handler = crash_handler,
+ .sa_flags = SA_RESETHAND,
+ };
int err, i;
+ sigaction(SIGSEGV, &sigact, NULL);
+
err = argp_parse(&argp, argc, argv, 0, NULL, &env);
if (err)
return err;
@@ -639,6 +636,12 @@ int main(int argc, char **argv)
srand(time(NULL));
env.jit_enabled = is_jit_enabled();
+ env.nr_cpus = libbpf_num_possible_cpus();
+ if (env.nr_cpus < 0) {
+ fprintf(stderr, "Failed to get number of CPUs: %d!\n",
+ env.nr_cpus);
+ return -1;
+ }
stdio_hijack();
for (i = 0; i < prog_test_cnt; i++) {
@@ -669,18 +672,19 @@ int main(int argc, char **argv)
test->test_num, test->test_name,
test->error_cnt ? "FAIL" : "OK");
+ reset_affinity();
if (test->need_cgroup_cleanup)
cleanup_cgroup_environment();
}
stdio_restore();
- printf("Summary: %d/%d PASSED, %d SKIPPED, %d FAILED\n",
- env.succ_cnt, env.sub_succ_cnt, env.skip_cnt, env.fail_cnt);
+ fprintf(stdout, "Summary: %d/%d PASSED, %d SKIPPED, %d FAILED\n",
+ env.succ_cnt, env.sub_succ_cnt, env.skip_cnt, env.fail_cnt);
- free(env.test_selector.blacklist.strs);
- free(env.test_selector.whitelist.strs);
+ free_str_set(&env.test_selector.blacklist);
+ free_str_set(&env.test_selector.whitelist);
free(env.test_selector.num_set);
- free(env.subtest_selector.blacklist.strs);
- free(env.subtest_selector.whitelist.strs);
+ free_str_set(&env.subtest_selector.blacklist);
+ free_str_set(&env.subtest_selector.whitelist);
free(env.subtest_selector.num_set);
return env.fail_cnt ? EXIT_FAILURE : EXIT_SUCCESS;
diff --git a/tools/testing/selftests/bpf/test_progs.h b/tools/testing/selftests/bpf/test_progs.h
index bcfa9ef23fda..f4503c926aca 100644
--- a/tools/testing/selftests/bpf/test_progs.h
+++ b/tools/testing/selftests/bpf/test_progs.h
@@ -37,6 +37,7 @@ typedef __u16 __sum16;
#include "bpf_util.h"
#include <bpf/bpf_endian.h>
#include "trace_helpers.h"
+#include "testing_helpers.h"
#include "flow_dissector_load.h"
enum verbosity {
@@ -71,6 +72,7 @@ struct test_env {
FILE *stderr;
char *log_buf;
size_t log_cnt;
+ int nr_cpus;
int succ_cnt; /* successful tests */
int sub_succ_cnt; /* successful sub-tests */
@@ -86,33 +88,22 @@ extern void test__skip(void);
extern void test__fail(void);
extern int test__join_cgroup(const char *path);
-#define MAGIC_BYTES 123
-
-/* ipv4 test vector */
-struct ipv4_packet {
- struct ethhdr eth;
- struct iphdr iph;
- struct tcphdr tcp;
-} __packed;
-extern struct ipv4_packet pkt_v4;
-
-/* ipv6 test vector */
-struct ipv6_packet {
- struct ethhdr eth;
- struct ipv6hdr iph;
- struct tcphdr tcp;
-} __packed;
-extern struct ipv6_packet pkt_v6;
+#define PRINT_FAIL(format...) \
+ ({ \
+ test__fail(); \
+ fprintf(stdout, "%s:FAIL:%d ", __func__, __LINE__); \
+ fprintf(stdout, ##format); \
+ })
#define _CHECK(condition, tag, duration, format...) ({ \
int __ret = !!(condition); \
int __save_errno = errno; \
if (__ret) { \
test__fail(); \
- printf("%s:FAIL:%s ", __func__, tag); \
- printf(format); \
+ fprintf(stdout, "%s:FAIL:%s ", __func__, tag); \
+ fprintf(stdout, ##format); \
} else { \
- printf("%s:PASS:%s %d nsec\n", \
+ fprintf(stdout, "%s:PASS:%s %d nsec\n", \
__func__, tag, duration); \
} \
errno = __save_errno; \
@@ -124,7 +115,7 @@ extern struct ipv6_packet pkt_v6;
int __save_errno = errno; \
if (__ret) { \
test__fail(); \
- printf("%s:FAIL:%d\n", __func__, __LINE__); \
+ fprintf(stdout, "%s:FAIL:%d\n", __func__, __LINE__); \
} \
errno = __save_errno; \
__ret; \
@@ -135,10 +126,6 @@ extern struct ipv6_packet pkt_v6;
#define CHECK_ATTR(condition, tag, format...) \
_CHECK(condition, tag, tattr.duration, format)
-#define MAGIC_VAL 0x1234
-#define NUM_ITER 100000
-#define VIP_NUM 5
-
static inline __u64 ptr_to_u64(const void *ptr)
{
return (__u64) (unsigned long) ptr;
@@ -148,7 +135,6 @@ int bpf_find_map(const char *test, struct bpf_object *obj, const char *name);
int compare_map_keys(int map1_fd, int map2_fd);
int compare_stack_ips(int smap_fd, int amap_fd, int stack_trace_len);
int extract_build_id(char *build_id, size_t size);
-void *spin_lock_thread(void *arg);
#ifdef __x86_64__
#define SYS_NANOSLEEP_KPROBE_NAME "__x64_sys_nanosleep"
diff --git a/tools/testing/selftests/bpf/test_sock_addr.c b/tools/testing/selftests/bpf/test_sock_addr.c
index 61fd95b89af8..0358814c67dc 100644
--- a/tools/testing/selftests/bpf/test_sock_addr.c
+++ b/tools/testing/selftests/bpf/test_sock_addr.c
@@ -677,7 +677,7 @@ static int bind4_prog_load(const struct sock_addr_test *test)
uint8_t u4_addr8[4];
uint16_t u4_addr16[2];
uint32_t u4_addr32;
- } ip4;
+ } ip4, port;
struct sockaddr_in addr4_rw;
if (inet_pton(AF_INET, SERV4_IP, (void *)&ip4) != 1) {
@@ -685,6 +685,8 @@ static int bind4_prog_load(const struct sock_addr_test *test)
return -1;
}
+ port.u4_addr32 = htons(SERV4_PORT);
+
if (mk_sockaddr(AF_INET, SERV4_REWRITE_IP, SERV4_REWRITE_PORT,
(struct sockaddr *)&addr4_rw, sizeof(addr4_rw)) == -1)
return -1;
@@ -696,49 +698,65 @@ static int bind4_prog_load(const struct sock_addr_test *test)
/* if (sk.family == AF_INET && */
BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6,
offsetof(struct bpf_sock_addr, family)),
- BPF_JMP_IMM(BPF_JNE, BPF_REG_7, AF_INET, 24),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_7, AF_INET, 32),
/* (sk.type == SOCK_DGRAM || sk.type == SOCK_STREAM) && */
BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6,
offsetof(struct bpf_sock_addr, type)),
BPF_JMP_IMM(BPF_JNE, BPF_REG_7, SOCK_DGRAM, 1),
BPF_JMP_A(1),
- BPF_JMP_IMM(BPF_JNE, BPF_REG_7, SOCK_STREAM, 20),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_7, SOCK_STREAM, 28),
/* 1st_byte_of_user_ip4 == expected && */
BPF_LDX_MEM(BPF_B, BPF_REG_7, BPF_REG_6,
offsetof(struct bpf_sock_addr, user_ip4)),
- BPF_JMP_IMM(BPF_JNE, BPF_REG_7, ip4.u4_addr8[0], 18),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_7, ip4.u4_addr8[0], 26),
/* 2nd_byte_of_user_ip4 == expected && */
BPF_LDX_MEM(BPF_B, BPF_REG_7, BPF_REG_6,
offsetof(struct bpf_sock_addr, user_ip4) + 1),
- BPF_JMP_IMM(BPF_JNE, BPF_REG_7, ip4.u4_addr8[1], 16),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_7, ip4.u4_addr8[1], 24),
/* 3rd_byte_of_user_ip4 == expected && */
BPF_LDX_MEM(BPF_B, BPF_REG_7, BPF_REG_6,
offsetof(struct bpf_sock_addr, user_ip4) + 2),
- BPF_JMP_IMM(BPF_JNE, BPF_REG_7, ip4.u4_addr8[2], 14),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_7, ip4.u4_addr8[2], 22),
/* 4th_byte_of_user_ip4 == expected && */
BPF_LDX_MEM(BPF_B, BPF_REG_7, BPF_REG_6,
offsetof(struct bpf_sock_addr, user_ip4) + 3),
- BPF_JMP_IMM(BPF_JNE, BPF_REG_7, ip4.u4_addr8[3], 12),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_7, ip4.u4_addr8[3], 20),
/* 1st_half_of_user_ip4 == expected && */
BPF_LDX_MEM(BPF_H, BPF_REG_7, BPF_REG_6,
offsetof(struct bpf_sock_addr, user_ip4)),
- BPF_JMP_IMM(BPF_JNE, BPF_REG_7, ip4.u4_addr16[0], 10),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_7, ip4.u4_addr16[0], 18),
/* 2nd_half_of_user_ip4 == expected && */
BPF_LDX_MEM(BPF_H, BPF_REG_7, BPF_REG_6,
offsetof(struct bpf_sock_addr, user_ip4) + 2),
- BPF_JMP_IMM(BPF_JNE, BPF_REG_7, ip4.u4_addr16[1], 8),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_7, ip4.u4_addr16[1], 16),
- /* whole_user_ip4 == expected) { */
+ /* whole_user_ip4 == expected && */
BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6,
offsetof(struct bpf_sock_addr, user_ip4)),
BPF_LD_IMM64(BPF_REG_8, ip4.u4_addr32), /* See [2]. */
+ BPF_JMP_REG(BPF_JNE, BPF_REG_7, BPF_REG_8, 12),
+
+ /* 1st_byte_of_user_port == expected && */
+ BPF_LDX_MEM(BPF_B, BPF_REG_7, BPF_REG_6,
+ offsetof(struct bpf_sock_addr, user_port)),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_7, port.u4_addr8[0], 10),
+
+ /* 1st_half_of_user_port == expected && */
+ BPF_LDX_MEM(BPF_H, BPF_REG_7, BPF_REG_6,
+ offsetof(struct bpf_sock_addr, user_port)),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_7, port.u4_addr16[0], 8),
+
+ /* user_port == expected) { */
+ BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6,
+ offsetof(struct bpf_sock_addr, user_port)),
+ BPF_LD_IMM64(BPF_REG_8, port.u4_addr32), /* See [2]. */
BPF_JMP_REG(BPF_JNE, BPF_REG_7, BPF_REG_8, 4),
/* user_ip4 = addr4_rw.sin_addr */
diff --git a/tools/testing/selftests/bpf/test_sockmap.c b/tools/testing/selftests/bpf/test_sockmap.c
index 779e11da979c..37695fc8096a 100644
--- a/tools/testing/selftests/bpf/test_sockmap.c
+++ b/tools/testing/selftests/bpf/test_sockmap.c
@@ -54,7 +54,7 @@ static void running_handler(int a);
#define S1_PORT 10000
#define S2_PORT 10001
-#define BPF_SOCKMAP_FILENAME "test_sockmap_kern.o"
+#define BPF_SOCKMAP_FILENAME "test_sockmap_kern.o"
#define BPF_SOCKHASH_FILENAME "test_sockhash_kern.o"
#define CG_PATH "/sockmap"
@@ -63,14 +63,12 @@ int s1, s2, c1, c2, p1, p2;
int test_cnt;
int passed;
int failed;
-int map_fd[8];
-struct bpf_map *maps[8];
+int map_fd[9];
+struct bpf_map *maps[9];
int prog_fd[11];
int txmsg_pass;
-int txmsg_noisy;
int txmsg_redir;
-int txmsg_redir_noisy;
int txmsg_drop;
int txmsg_apply;
int txmsg_cork;
@@ -81,7 +79,10 @@ int txmsg_end_push;
int txmsg_start_pop;
int txmsg_pop;
int txmsg_ingress;
-int txmsg_skb;
+int txmsg_redir_skb;
+int txmsg_ktls_skb;
+int txmsg_ktls_skb_drop;
+int txmsg_ktls_skb_redir;
int ktls;
int peek_flag;
@@ -89,15 +90,13 @@ static const struct option long_options[] = {
{"help", no_argument, NULL, 'h' },
{"cgroup", required_argument, NULL, 'c' },
{"rate", required_argument, NULL, 'r' },
- {"verbose", no_argument, NULL, 'v' },
+ {"verbose", optional_argument, NULL, 'v' },
{"iov_count", required_argument, NULL, 'i' },
{"length", required_argument, NULL, 'l' },
{"test", required_argument, NULL, 't' },
{"data_test", no_argument, NULL, 'd' },
{"txmsg", no_argument, &txmsg_pass, 1 },
- {"txmsg_noisy", no_argument, &txmsg_noisy, 1 },
{"txmsg_redir", no_argument, &txmsg_redir, 1 },
- {"txmsg_redir_noisy", no_argument, &txmsg_redir_noisy, 1},
{"txmsg_drop", no_argument, &txmsg_drop, 1 },
{"txmsg_apply", required_argument, NULL, 'a'},
{"txmsg_cork", required_argument, NULL, 'k'},
@@ -108,12 +107,108 @@ static const struct option long_options[] = {
{"txmsg_start_pop", required_argument, NULL, 'w'},
{"txmsg_pop", required_argument, NULL, 'x'},
{"txmsg_ingress", no_argument, &txmsg_ingress, 1 },
- {"txmsg_skb", no_argument, &txmsg_skb, 1 },
+ {"txmsg_redir_skb", no_argument, &txmsg_redir_skb, 1 },
{"ktls", no_argument, &ktls, 1 },
{"peek", no_argument, &peek_flag, 1 },
+ {"whitelist", required_argument, NULL, 'n' },
+ {"blacklist", required_argument, NULL, 'b' },
{0, 0, NULL, 0 }
};
+struct test_env {
+ const char *type;
+ const char *subtest;
+ const char *prepend;
+
+ int test_num;
+ int subtest_num;
+
+ int succ_cnt;
+ int fail_cnt;
+ int fail_last;
+};
+
+struct test_env env;
+
+struct sockmap_options {
+ int verbose;
+ bool base;
+ bool sendpage;
+ bool data_test;
+ bool drop_expected;
+ int iov_count;
+ int iov_length;
+ int rate;
+ char *map;
+ char *whitelist;
+ char *blacklist;
+ char *prepend;
+};
+
+struct _test {
+ char *title;
+ void (*tester)(int cg_fd, struct sockmap_options *opt);
+};
+
+static void test_start(void)
+{
+ env.subtest_num++;
+}
+
+static void test_fail(void)
+{
+ env.fail_cnt++;
+}
+
+static void test_pass(void)
+{
+ env.succ_cnt++;
+}
+
+static void test_reset(void)
+{
+ txmsg_start = txmsg_end = 0;
+ txmsg_start_pop = txmsg_pop = 0;
+ txmsg_start_push = txmsg_end_push = 0;
+ txmsg_pass = txmsg_drop = txmsg_redir = 0;
+ txmsg_apply = txmsg_cork = 0;
+ txmsg_ingress = txmsg_redir_skb = 0;
+ txmsg_ktls_skb = txmsg_ktls_skb_drop = txmsg_ktls_skb_redir = 0;
+}
+
+static int test_start_subtest(const struct _test *t, struct sockmap_options *o)
+{
+ env.type = o->map;
+ env.subtest = t->title;
+ env.prepend = o->prepend;
+ env.test_num++;
+ env.subtest_num = 0;
+ env.fail_last = env.fail_cnt;
+ test_reset();
+ return 0;
+}
+
+static void test_end_subtest(void)
+{
+ int error = env.fail_cnt - env.fail_last;
+ int type = strcmp(env.type, BPF_SOCKMAP_FILENAME);
+
+ if (!error)
+ test_pass();
+
+ fprintf(stdout, "#%2d/%2d %8s:%s:%s:%s\n",
+ env.test_num, env.subtest_num,
+ !type ? "sockmap" : "sockhash",
+ env.prepend ? : "",
+ env.subtest, error ? "FAIL" : "OK");
+}
+
+static void test_print_results(void)
+{
+ fprintf(stdout, "Pass: %d Fail: %d\n",
+ env.succ_cnt, env.fail_cnt);
+}
+
static void usage(char *argv[])
{
int i;
@@ -296,7 +391,7 @@ static int sockmap_init_sockets(int verbose)
return errno;
}
- if (verbose) {
+ if (verbose > 1) {
printf("connected sockets: c1 <-> p1, c2 <-> p2\n");
printf("cgroups binding: c1(%i) <-> s1(%i) - - - c2(%i) <-> s2(%i)\n",
c1, s1, c2, s2);
@@ -311,17 +406,6 @@ struct msg_stats {
struct timespec end;
};
-struct sockmap_options {
- int verbose;
- bool base;
- bool sendpage;
- bool data_test;
- bool drop_expected;
- int iov_count;
- int iov_length;
- int rate;
-};
-
static int msg_loop_sendpage(int fd, int iov_length, int cnt,
struct msg_stats *s,
struct sockmap_options *opt)
@@ -345,14 +429,18 @@ static int msg_loop_sendpage(int fd, int iov_length, int cnt,
clock_gettime(CLOCK_MONOTONIC, &s->start);
for (i = 0; i < cnt; i++) {
- int sent = sendfile(fd, fp, NULL, iov_length);
+ int sent;
+
+ errno = 0;
+ sent = sendfile(fd, fp, NULL, iov_length);
if (!drop && sent < 0) {
- perror("send loop error");
+ perror("sendpage loop error");
fclose(file);
return sent;
} else if (drop && sent >= 0) {
- printf("sendpage loop error expected: %i\n", sent);
+ printf("sendpage loop error expected: %i errno %i\n",
+ sent, errno);
fclose(file);
return -EIO;
}
@@ -418,14 +506,41 @@ unwind_iov:
static int msg_verify_data(struct msghdr *msg, int size, int chunk_sz)
{
- int i, j, bytes_cnt = 0;
+ int i, j = 0, bytes_cnt = 0;
unsigned char k = 0;
for (i = 0; i < msg->msg_iovlen; i++) {
unsigned char *d = msg->msg_iov[i].iov_base;
- for (j = 0;
- j < msg->msg_iov[i].iov_len && size; j++) {
+ /* Special case test for skb ingress + ktls */
+ if (i == 0 && txmsg_ktls_skb) {
+ if (msg->msg_iov[i].iov_len < 4)
+ return -EIO;
+ if (txmsg_ktls_skb_redir) {
+ if (memcmp(&d[13], "PASS", 4) != 0) {
+ fprintf(stderr,
+ "detected redirect ktls_skb data error with skb ingress update @iov[%i]:%i \"%02x %02x %02x %02x\" != \"PASS\"\n", i, 0, d[13], d[14], d[15], d[16]);
+ return -EIO;
+ }
+ d[13] = 0;
+ d[14] = 1;
+ d[15] = 2;
+ d[16] = 3;
+ j = 13;
+ } else if (txmsg_ktls_skb) {
+ if (memcmp(d, "PASS", 4) != 0) {
+ fprintf(stderr,
+ "detected ktls_skb data error with skb ingress update @iov[%i]:%i \"%02x %02x %02x %02x\" != \"PASS\"\n", i, 0, d[0], d[1], d[2], d[3]);
+ return -EIO;
+ }
+ d[0] = 0;
+ d[1] = 1;
+ d[2] = 2;
+ d[3] = 3;
+ }
+ }
+
+ for (; j < msg->msg_iov[i].iov_len && size; j++) {
if (d[j] != k++) {
fprintf(stderr,
"detected data corruption @iov[%i]:%i %02x != %02x, %02x ?= %02x\n",
@@ -464,13 +579,18 @@ static int msg_loop(int fd, int iov_count, int iov_length, int cnt,
if (tx) {
clock_gettime(CLOCK_MONOTONIC, &s->start);
for (i = 0; i < cnt; i++) {
- int sent = sendmsg(fd, &msg, flags);
+ int sent;
+
+ errno = 0;
+ sent = sendmsg(fd, &msg, flags);
if (!drop && sent < 0) {
- perror("send loop error");
+ perror("sendmsg loop error");
goto out_errno;
} else if (drop && sent >= 0) {
- printf("send loop error expected: %i\n", sent);
+ fprintf(stderr,
+ "sendmsg loop error expected: %i errno %i\n",
+ sent, errno);
errno = -EIO;
goto out_errno;
}
@@ -497,9 +617,10 @@ static int msg_loop(int fd, int iov_count, int iov_length, int cnt,
* paths.
*/
total_bytes = (float)iov_count * (float)iov_length * (float)cnt;
- txmsg_pop_total = txmsg_pop;
if (txmsg_apply)
- txmsg_pop_total *= (total_bytes / txmsg_apply);
+ txmsg_pop_total = txmsg_pop * (total_bytes / txmsg_apply);
+ else
+ txmsg_pop_total = txmsg_pop * cnt;
total_bytes -= txmsg_pop_total;
err = clock_gettime(CLOCK_MONOTONIC, &s->start);
if (err < 0)
@@ -633,14 +754,18 @@ static int sendmsg_test(struct sockmap_options *opt)
rxpid = fork();
if (rxpid == 0) {
- if (opt->drop_expected)
- exit(0);
+ iov_buf -= (txmsg_pop - txmsg_start_pop + 1);
+ if (opt->drop_expected || txmsg_ktls_skb_drop)
+ _exit(0);
+
+ if (!iov_buf) /* zero bytes sent case */
+ _exit(0);
if (opt->sendpage)
iov_count = 1;
err = msg_loop(rx_fd, iov_count, iov_buf,
cnt, &s, false, opt);
- if (opt->verbose)
+ if (opt->verbose > 1)
fprintf(stderr,
"msg_loop_rx: iov_count %i iov_buf %i cnt %i err %i\n",
iov_count, iov_buf, cnt, err);
@@ -648,7 +773,7 @@ static int sendmsg_test(struct sockmap_options *opt)
sent_Bps = sentBps(s);
recvd_Bps = recvdBps(s);
}
- if (opt->verbose)
+ if (opt->verbose > 1)
fprintf(stdout,
"rx_sendmsg: TX: %zuB %fB/s %fGB/s RX: %zuB %fB/s %fGB/s %s\n",
s.bytes_sent, sent_Bps, sent_Bps/giga,
@@ -678,7 +803,7 @@ static int sendmsg_test(struct sockmap_options *opt)
sent_Bps = sentBps(s);
recvd_Bps = recvdBps(s);
}
- if (opt->verbose)
+ if (opt->verbose > 1)
fprintf(stdout,
"tx_sendmsg: TX: %zuB %fB/s %f GB/s RX: %zuB %fB/s %fGB/s\n",
s.bytes_sent, sent_Bps, sent_Bps/giga,
@@ -694,14 +819,14 @@ static int sendmsg_test(struct sockmap_options *opt)
if (WIFEXITED(rx_status)) {
err = WEXITSTATUS(rx_status);
if (err) {
- fprintf(stderr, "rx thread exited with err %d. ", err);
+ fprintf(stderr, "rx thread exited with err %d.\n", err);
goto out;
}
}
if (WIFEXITED(tx_status)) {
err = WEXITSTATUS(tx_status);
if (err)
- fprintf(stderr, "tx thread exited with err %d. ", err);
+ fprintf(stderr, "tx thread exited with err %d.\n", err);
}
out:
return err;
@@ -783,6 +908,7 @@ static int forever_ping_pong(int rate, struct sockmap_options *opt)
}
enum {
+ SELFTESTS,
PING_PONG,
SENDMSG,
BASE,
@@ -816,8 +942,28 @@ static int run_options(struct sockmap_options *options, int cg_fd, int test)
return err;
}
+ /* Attach programs to TLS sockmap */
+ if (txmsg_ktls_skb) {
+ err = bpf_prog_attach(prog_fd[0], map_fd[8],
+ BPF_SK_SKB_STREAM_PARSER, 0);
+ if (err) {
+ fprintf(stderr,
+ "ERROR: bpf_prog_attach (TLS sockmap %i->%i): %d (%s)\n",
+ prog_fd[0], map_fd[8], err, strerror(errno));
+ return err;
+ }
+
+ err = bpf_prog_attach(prog_fd[2], map_fd[8],
+ BPF_SK_SKB_STREAM_VERDICT, 0);
+ if (err) {
+ fprintf(stderr, "ERROR: bpf_prog_attach (TLS sockmap): %d (%s)\n",
+ err, strerror(errno));
+ return err;
+ }
+ }
+
/* Attach to cgroups */
- err = bpf_prog_attach(prog_fd[2], cg_fd, BPF_CGROUP_SOCK_OPS, 0);
+ err = bpf_prog_attach(prog_fd[3], cg_fd, BPF_CGROUP_SOCK_OPS, 0);
if (err) {
fprintf(stderr, "ERROR: bpf_prog_attach (groups): %d (%s)\n",
err, strerror(errno));
@@ -833,19 +979,14 @@ run:
/* Attach txmsg program to sockmap */
if (txmsg_pass)
- tx_prog_fd = prog_fd[3];
- else if (txmsg_noisy)
tx_prog_fd = prog_fd[4];
else if (txmsg_redir)
tx_prog_fd = prog_fd[5];
- else if (txmsg_redir_noisy)
- tx_prog_fd = prog_fd[6];
- else if (txmsg_drop)
- tx_prog_fd = prog_fd[9];
- /* apply and cork must be last */
else if (txmsg_apply)
- tx_prog_fd = prog_fd[7];
+ tx_prog_fd = prog_fd[6];
else if (txmsg_cork)
+ tx_prog_fd = prog_fd[7];
+ else if (txmsg_drop)
tx_prog_fd = prog_fd[8];
else
tx_prog_fd = 0;
@@ -870,7 +1011,7 @@ run:
goto out;
}
- if (txmsg_redir || txmsg_redir_noisy)
+ if (txmsg_redir)
redir_fd = c2;
else
redir_fd = c1;
@@ -1018,7 +1159,35 @@ run:
}
}
- if (txmsg_skb) {
+ if (txmsg_ktls_skb) {
+ int ingress = BPF_F_INGRESS;
+
+ i = 0;
+ err = bpf_map_update_elem(map_fd[8], &i, &p2, BPF_ANY);
+ if (err) {
+ fprintf(stderr,
+ "ERROR: bpf_map_update_elem (c1 sockmap): %d (%s)\n",
+ err, strerror(errno));
+ }
+
+ if (txmsg_ktls_skb_redir) {
+ i = 1;
+ err = bpf_map_update_elem(map_fd[7],
+ &i, &ingress, BPF_ANY);
+ if (err) {
+ fprintf(stderr,
+ "ERROR: bpf_map_update_elem (txmsg_ingress): %d (%s)\n",
+ err, strerror(errno));
+ }
+ }
+
+ if (txmsg_ktls_skb_drop) {
+ i = 1;
+ err = bpf_map_update_elem(map_fd[7], &i, &i, BPF_ANY);
+ }
+ }
+
+ if (txmsg_redir_skb) {
int skb_fd = (test == SENDMSG || test == SENDPAGE) ?
p2 : p1;
int ingress = BPF_F_INGRESS;
@@ -1033,8 +1202,7 @@ run:
}
i = 3;
- err = bpf_map_update_elem(map_fd[0],
- &i, &skb_fd, BPF_ANY);
+ err = bpf_map_update_elem(map_fd[0], &i, &skb_fd, BPF_ANY);
if (err) {
fprintf(stderr,
"ERROR: bpf_map_update_elem (c1 sockmap): %d (%s)\n",
@@ -1068,9 +1236,12 @@ run:
fprintf(stderr, "unknown test\n");
out:
/* Detatch and zero all the maps */
- bpf_prog_detach2(prog_fd[2], cg_fd, BPF_CGROUP_SOCK_OPS);
+ bpf_prog_detach2(prog_fd[3], cg_fd, BPF_CGROUP_SOCK_OPS);
bpf_prog_detach2(prog_fd[0], map_fd[0], BPF_SK_SKB_STREAM_PARSER);
bpf_prog_detach2(prog_fd[1], map_fd[0], BPF_SK_SKB_STREAM_VERDICT);
+ bpf_prog_detach2(prog_fd[0], map_fd[8], BPF_SK_SKB_STREAM_PARSER);
+ bpf_prog_detach2(prog_fd[2], map_fd[8], BPF_SK_SKB_STREAM_VERDICT);
+
if (tx_prog_fd >= 0)
bpf_prog_detach2(tx_prog_fd, map_fd[1], BPF_SK_MSG_VERDICT);
@@ -1112,12 +1283,8 @@ static void test_options(char *options)
if (txmsg_pass)
strncat(options, "pass,", OPTSTRING);
- if (txmsg_noisy)
- strncat(options, "pass_noisy,", OPTSTRING);
if (txmsg_redir)
strncat(options, "redir,", OPTSTRING);
- if (txmsg_redir_noisy)
- strncat(options, "redir_noisy,", OPTSTRING);
if (txmsg_drop)
strncat(options, "drop,", OPTSTRING);
if (txmsg_apply) {
@@ -1143,8 +1310,10 @@ static void test_options(char *options)
}
if (txmsg_ingress)
strncat(options, "ingress,", OPTSTRING);
- if (txmsg_skb)
- strncat(options, "skb,", OPTSTRING);
+ if (txmsg_redir_skb)
+ strncat(options, "redir_skb,", OPTSTRING);
+ if (txmsg_ktls_skb)
+ strncat(options, "ktls_skb,", OPTSTRING);
if (ktls)
strncat(options, "ktls,", OPTSTRING);
if (peek_flag)
@@ -1168,416 +1337,317 @@ static int __test_exec(int cgrp, int test, struct sockmap_options *opt)
test_options(options);
- fprintf(stdout,
- "[TEST %i]: (%i, %i, %i, %s, %s): ",
- test_cnt, opt->rate, opt->iov_count, opt->iov_length,
- test_to_str(test), options);
- fflush(stdout);
+ if (opt->verbose) {
+ fprintf(stdout,
+ " [TEST %i]: (%i, %i, %i, %s, %s): ",
+ test_cnt, opt->rate, opt->iov_count, opt->iov_length,
+ test_to_str(test), options);
+ fflush(stdout);
+ }
err = run_options(opt, cgrp, test);
- fprintf(stdout, "%s\n", !err ? "PASS" : "FAILED");
+ if (opt->verbose)
+ fprintf(stdout, " %s\n", !err ? "PASS" : "FAILED");
test_cnt++;
!err ? passed++ : failed++;
free(options);
return err;
}
-static int test_exec(int cgrp, struct sockmap_options *opt)
-{
- int err = __test_exec(cgrp, SENDMSG, opt);
-
- if (err)
- goto out;
-
- err = __test_exec(cgrp, SENDPAGE, opt);
-out:
- return err;
-}
-
-static int test_loop(int cgrp)
-{
- struct sockmap_options opt;
-
- int err, i, l, r;
-
- opt.verbose = 0;
- opt.base = false;
- opt.sendpage = false;
- opt.data_test = false;
- opt.drop_expected = false;
- opt.iov_count = 0;
- opt.iov_length = 0;
- opt.rate = 0;
-
- r = 1;
- for (i = 1; i < 100; i += 33) {
- for (l = 1; l < 100; l += 33) {
- opt.rate = r;
- opt.iov_count = i;
- opt.iov_length = l;
- err = test_exec(cgrp, &opt);
- if (err)
- goto out;
- }
- }
- sched_yield();
-out:
- return err;
-}
-
-static int test_txmsg(int cgrp)
+static void test_exec(int cgrp, struct sockmap_options *opt)
{
+ int type = strcmp(opt->map, BPF_SOCKMAP_FILENAME);
int err;
- txmsg_pass = txmsg_noisy = txmsg_redir_noisy = txmsg_drop = 0;
- txmsg_apply = txmsg_cork = 0;
- txmsg_ingress = txmsg_skb = 0;
-
- txmsg_pass = 1;
- err = test_loop(cgrp);
- txmsg_pass = 0;
- if (err)
- goto out;
-
- txmsg_redir = 1;
- err = test_loop(cgrp);
- txmsg_redir = 0;
- if (err)
- goto out;
-
- txmsg_drop = 1;
- err = test_loop(cgrp);
- txmsg_drop = 0;
- if (err)
- goto out;
-
- txmsg_redir = 1;
- txmsg_ingress = 1;
- err = test_loop(cgrp);
- txmsg_redir = 0;
- txmsg_ingress = 0;
- if (err)
- goto out;
-out:
- txmsg_pass = 0;
- txmsg_redir = 0;
- txmsg_drop = 0;
- return err;
+ if (type == 0) {
+ test_start();
+ err = __test_exec(cgrp, SENDMSG, opt);
+ if (err)
+ test_fail();
+ } else {
+ test_start();
+ err = __test_exec(cgrp, SENDPAGE, opt);
+ if (err)
+ test_fail();
+ }
}
-static int test_send(struct sockmap_options *opt, int cgrp)
+static void test_send_one(struct sockmap_options *opt, int cgrp)
{
- int err;
-
opt->iov_length = 1;
opt->iov_count = 1;
opt->rate = 1;
- err = test_exec(cgrp, opt);
- if (err)
- goto out;
+ test_exec(cgrp, opt);
opt->iov_length = 1;
opt->iov_count = 1024;
opt->rate = 1;
- err = test_exec(cgrp, opt);
- if (err)
- goto out;
+ test_exec(cgrp, opt);
opt->iov_length = 1024;
opt->iov_count = 1;
opt->rate = 1;
- err = test_exec(cgrp, opt);
- if (err)
- goto out;
+ test_exec(cgrp, opt);
- opt->iov_length = 1;
+}
+
+static void test_send_many(struct sockmap_options *opt, int cgrp)
+{
+ opt->iov_length = 3;
opt->iov_count = 1;
opt->rate = 512;
- err = test_exec(cgrp, opt);
- if (err)
- goto out;
-
- opt->iov_length = 256;
- opt->iov_count = 1024;
- opt->rate = 2;
- err = test_exec(cgrp, opt);
- if (err)
- goto out;
+ test_exec(cgrp, opt);
opt->rate = 100;
opt->iov_count = 1;
opt->iov_length = 5;
- err = test_exec(cgrp, opt);
- if (err)
- goto out;
-out:
- sched_yield();
- return err;
+ test_exec(cgrp, opt);
}
-static int test_mixed(int cgrp)
+static void test_send_large(struct sockmap_options *opt, int cgrp)
{
- struct sockmap_options opt = {0};
- int err;
+ opt->iov_length = 256;
+ opt->iov_count = 1024;
+ opt->rate = 2;
+ test_exec(cgrp, opt);
+}
- txmsg_pass = txmsg_noisy = txmsg_redir_noisy = txmsg_drop = 0;
- txmsg_apply = txmsg_cork = 0;
- txmsg_start = txmsg_end = 0;
- txmsg_start_push = txmsg_end_push = 0;
- txmsg_start_pop = txmsg_pop = 0;
+static void test_send(struct sockmap_options *opt, int cgrp)
+{
+ test_send_one(opt, cgrp);
+ test_send_many(opt, cgrp);
+ test_send_large(opt, cgrp);
+ sched_yield();
+}
+static void test_txmsg_pass(int cgrp, struct sockmap_options *opt)
+{
/* Test small and large iov_count values with pass/redir/apply/cork */
txmsg_pass = 1;
- txmsg_redir = 0;
- txmsg_apply = 1;
- txmsg_cork = 0;
- err = test_send(&opt, cgrp);
- if (err)
- goto out;
+ test_send(opt, cgrp);
+}
- txmsg_pass = 1;
- txmsg_redir = 0;
- txmsg_apply = 0;
- txmsg_cork = 1;
- err = test_send(&opt, cgrp);
- if (err)
- goto out;
+static void test_txmsg_redir(int cgrp, struct sockmap_options *opt)
+{
+ txmsg_redir = 1;
+ test_send(opt, cgrp);
+}
- txmsg_pass = 1;
- txmsg_redir = 0;
- txmsg_apply = 1;
- txmsg_cork = 1;
- err = test_send(&opt, cgrp);
- if (err)
- goto out;
+static void test_txmsg_drop(int cgrp, struct sockmap_options *opt)
+{
+ txmsg_drop = 1;
+ test_send(opt, cgrp);
+}
- txmsg_pass = 1;
- txmsg_redir = 0;
- txmsg_apply = 1024;
- txmsg_cork = 0;
- err = test_send(&opt, cgrp);
- if (err)
- goto out;
+static void test_txmsg_ingress_redir(int cgrp, struct sockmap_options *opt)
+{
+ txmsg_pass = txmsg_drop = 0;
+ txmsg_ingress = txmsg_redir = 1;
+ test_send(opt, cgrp);
+}
- txmsg_pass = 1;
- txmsg_redir = 0;
- txmsg_apply = 0;
- txmsg_cork = 1024;
- err = test_send(&opt, cgrp);
- if (err)
- goto out;
+static void test_txmsg_skb(int cgrp, struct sockmap_options *opt)
+{
+ bool data = opt->data_test;
+ int k = ktls;
- txmsg_pass = 1;
- txmsg_redir = 0;
- txmsg_apply = 1024;
- txmsg_cork = 1024;
- err = test_send(&opt, cgrp);
- if (err)
- goto out;
+ opt->data_test = true;
+ ktls = 1;
+ txmsg_pass = txmsg_drop = 0;
+ txmsg_ingress = txmsg_redir = 0;
+ txmsg_ktls_skb = 1;
txmsg_pass = 1;
- txmsg_redir = 0;
- txmsg_cork = 4096;
- txmsg_apply = 4096;
- err = test_send(&opt, cgrp);
- if (err)
- goto out;
- txmsg_pass = 0;
- txmsg_redir = 1;
- txmsg_apply = 1;
- txmsg_cork = 0;
- err = test_send(&opt, cgrp);
- if (err)
- goto out;
+ /* Using data verification so ensure iov layout is
+ * expected from test receiver side. e.g. has enough
+ * bytes to write test code.
+ */
+ opt->iov_length = 100;
+ opt->iov_count = 1;
+ opt->rate = 1;
+ test_exec(cgrp, opt);
- txmsg_pass = 0;
- txmsg_redir = 1;
- txmsg_apply = 0;
- txmsg_cork = 1;
- err = test_send(&opt, cgrp);
- if (err)
- goto out;
+ txmsg_ktls_skb_drop = 1;
+ test_exec(cgrp, opt);
- txmsg_pass = 0;
- txmsg_redir = 1;
- txmsg_apply = 1024;
- txmsg_cork = 0;
- err = test_send(&opt, cgrp);
- if (err)
- goto out;
+ txmsg_ktls_skb_drop = 0;
+ txmsg_ktls_skb_redir = 1;
+ test_exec(cgrp, opt);
- txmsg_pass = 0;
- txmsg_redir = 1;
- txmsg_apply = 0;
- txmsg_cork = 1024;
- err = test_send(&opt, cgrp);
- if (err)
- goto out;
+ opt->data_test = data;
+ ktls = k;
+}
+
+
+/* Test cork with hung data. This tests poor usage patterns where
+ * cork can leave data on the ring if user program is buggy and
+ * doesn't flush them somehow. They do take some time however
+ * because they wait for a timeout. Test pass, redir and cork with
+ * apply logic. Use cork size of 4097 with send_large to avoid
+ * aligning cork size with send size.
+ */
+static void test_txmsg_cork_hangs(int cgrp, struct sockmap_options *opt)
+{
+ txmsg_pass = 1;
+ txmsg_redir = 0;
+ txmsg_cork = 4097;
+ txmsg_apply = 4097;
+ test_send_large(opt, cgrp);
txmsg_pass = 0;
txmsg_redir = 1;
- txmsg_apply = 1024;
- txmsg_cork = 1024;
- err = test_send(&opt, cgrp);
- if (err)
- goto out;
+ txmsg_apply = 0;
+ txmsg_cork = 4097;
+ test_send_large(opt, cgrp);
txmsg_pass = 0;
txmsg_redir = 1;
- txmsg_cork = 4096;
- txmsg_apply = 4096;
- err = test_send(&opt, cgrp);
- if (err)
- goto out;
-out:
- return err;
+ txmsg_apply = 4097;
+ txmsg_cork = 4097;
+ test_send_large(opt, cgrp);
}
-static int test_start_end(int cgrp)
+static void test_txmsg_pull(int cgrp, struct sockmap_options *opt)
{
- struct sockmap_options opt = {0};
- int err, i;
+ /* Test basic start/end */
+ txmsg_start = 1;
+ txmsg_end = 2;
+ test_send(opt, cgrp);
+
+ /* Test >4k pull */
+ txmsg_start = 4096;
+ txmsg_end = 9182;
+ test_send_large(opt, cgrp);
- /* Test basic start/end with lots of iov_count and iov_lengths */
+ /* Test pull + redirect */
+ txmsg_redir = 0;
txmsg_start = 1;
txmsg_end = 2;
- txmsg_start_push = 1;
- txmsg_end_push = 2;
- txmsg_start_pop = 1;
- txmsg_pop = 1;
- err = test_txmsg(cgrp);
- if (err)
- goto out;
+ test_send(opt, cgrp);
- /* Cut a byte of pushed data but leave reamining in place */
+ /* Test pull + cork */
+ txmsg_redir = 0;
+ txmsg_cork = 512;
txmsg_start = 1;
txmsg_end = 2;
- txmsg_start_push = 1;
- txmsg_end_push = 3;
+ test_send_many(opt, cgrp);
+
+ /* Test pull + cork + redirect */
+ txmsg_redir = 1;
+ txmsg_cork = 512;
+ txmsg_start = 1;
+ txmsg_end = 2;
+ test_send_many(opt, cgrp);
+}
+
+static void test_txmsg_pop(int cgrp, struct sockmap_options *opt)
+{
+ /* Test basic pop */
txmsg_start_pop = 1;
- txmsg_pop = 1;
- err = test_txmsg(cgrp);
- if (err)
- goto out;
+ txmsg_pop = 2;
+ test_send_many(opt, cgrp);
- /* Test start/end with cork */
- opt.rate = 16;
- opt.iov_count = 1;
- opt.iov_length = 100;
- txmsg_cork = 1600;
-
- txmsg_start_pop = 0;
- txmsg_pop = 0;
-
- for (i = 99; i <= 1600; i += 500) {
- txmsg_start = 0;
- txmsg_end = i;
- txmsg_start_push = 0;
- txmsg_end_push = i;
- err = test_exec(cgrp, &opt);
- if (err)
- goto out;
- }
+ /* Test pop with >4k */
+ txmsg_start_pop = 4096;
+ txmsg_pop = 4096;
+ test_send_large(opt, cgrp);
- /* Test pop data in middle of cork */
- for (i = 99; i <= 1600; i += 500) {
- txmsg_start_pop = 10;
- txmsg_pop = i;
- err = test_exec(cgrp, &opt);
- if (err)
- goto out;
- }
- txmsg_start_pop = 0;
- txmsg_pop = 0;
-
- /* Test start/end with cork but pull data in middle */
- for (i = 199; i <= 1600; i += 500) {
- txmsg_start = 100;
- txmsg_end = i;
- txmsg_start_push = 100;
- txmsg_end_push = i;
- err = test_exec(cgrp, &opt);
- if (err)
- goto out;
- }
+ /* Test pop + redirect */
+ txmsg_redir = 1;
+ txmsg_start_pop = 1;
+ txmsg_pop = 2;
+ test_send_many(opt, cgrp);
- /* Test start/end with cork pulling last sg entry */
- txmsg_start = 1500;
- txmsg_end = 1600;
- txmsg_start_push = 1500;
- txmsg_end_push = 1600;
- err = test_exec(cgrp, &opt);
- if (err)
- goto out;
+ /* Test pop + cork */
+ txmsg_redir = 0;
+ txmsg_cork = 512;
+ txmsg_start_pop = 1;
+ txmsg_pop = 2;
+ test_send_many(opt, cgrp);
- /* Test pop with cork pulling last sg entry */
- txmsg_start_pop = 1500;
- txmsg_pop = 1600;
- err = test_exec(cgrp, &opt);
- if (err)
- goto out;
- txmsg_start_pop = 0;
- txmsg_pop = 0;
-
- /* Test start/end pull of single byte in last page */
- txmsg_start = 1111;
- txmsg_end = 1112;
- txmsg_start_push = 1111;
- txmsg_end_push = 1112;
- err = test_exec(cgrp, &opt);
- if (err)
- goto out;
+ /* Test pop + redirect + cork */
+ txmsg_redir = 1;
+ txmsg_cork = 4;
+ txmsg_start_pop = 1;
+ txmsg_pop = 2;
+ test_send_many(opt, cgrp);
+}
- /* Test pop of single byte in last page */
- txmsg_start_pop = 1111;
- txmsg_pop = 1112;
- err = test_exec(cgrp, &opt);
- if (err)
- goto out;
+static void test_txmsg_push(int cgrp, struct sockmap_options *opt)
+{
+ /* Test basic push */
+ txmsg_start_push = 1;
+ txmsg_end_push = 1;
+ test_send(opt, cgrp);
- /* Test start/end with end < start */
- txmsg_start = 1111;
- txmsg_end = 0;
- txmsg_start_push = 1111;
- txmsg_end_push = 0;
- err = test_exec(cgrp, &opt);
- if (err)
- goto out;
+ /* Test push 4kB >4k */
+ txmsg_start_push = 4096;
+ txmsg_end_push = 4096;
+ test_send_large(opt, cgrp);
- /* Test start/end with end > data */
- txmsg_start = 0;
- txmsg_end = 1601;
- txmsg_start_push = 0;
- txmsg_end_push = 1601;
- err = test_exec(cgrp, &opt);
- if (err)
- goto out;
+ /* Test push + redirect */
+ txmsg_redir = 1;
+ txmsg_start_push = 1;
+ txmsg_end_push = 2;
+ test_send_many(opt, cgrp);
- /* Test start/end with start > data */
- txmsg_start = 1601;
- txmsg_end = 1600;
- txmsg_start_push = 1601;
- txmsg_end_push = 1600;
- err = test_exec(cgrp, &opt);
- if (err)
- goto out;
+ /* Test push + cork */
+ txmsg_redir = 0;
+ txmsg_cork = 512;
+ txmsg_start_push = 1;
+ txmsg_end_push = 2;
+ test_send_many(opt, cgrp);
+}
- /* Test pop with start > data */
- txmsg_start_pop = 1601;
- txmsg_pop = 1;
- err = test_exec(cgrp, &opt);
- if (err)
- goto out;
+static void test_txmsg_push_pop(int cgrp, struct sockmap_options *opt)
+{
+ txmsg_start_push = 1;
+ txmsg_end_push = 10;
+ txmsg_start_pop = 5;
+ txmsg_pop = 4;
+ test_send_large(opt, cgrp);
+}
- /* Test pop with pop range > data */
- txmsg_start_pop = 1599;
- txmsg_pop = 10;
- err = test_exec(cgrp, &opt);
-out:
- txmsg_start = 0;
- txmsg_end = 0;
- sched_yield();
- return err;
+static void test_txmsg_apply(int cgrp, struct sockmap_options *opt)
+{
+ txmsg_pass = 1;
+ txmsg_redir = 0;
+ txmsg_apply = 1;
+ txmsg_cork = 0;
+ test_send_one(opt, cgrp);
+
+ txmsg_pass = 0;
+ txmsg_redir = 1;
+ txmsg_apply = 1;
+ txmsg_cork = 0;
+ test_send_one(opt, cgrp);
+
+ txmsg_pass = 1;
+ txmsg_redir = 0;
+ txmsg_apply = 1024;
+ txmsg_cork = 0;
+ test_send_large(opt, cgrp);
+
+ txmsg_pass = 0;
+ txmsg_redir = 1;
+ txmsg_apply = 1024;
+ txmsg_cork = 0;
+ test_send_large(opt, cgrp);
+}
+
+static void test_txmsg_cork(int cgrp, struct sockmap_options *opt)
+{
+ txmsg_pass = 1;
+ txmsg_redir = 0;
+ txmsg_apply = 0;
+ txmsg_cork = 1;
+ test_send(opt, cgrp);
+
+ txmsg_pass = 1;
+ txmsg_redir = 0;
+ txmsg_apply = 1;
+ txmsg_cork = 1;
+ test_send(opt, cgrp);
}
char *map_names[] = {
@@ -1589,11 +1659,13 @@ char *map_names[] = {
"sock_bytes",
"sock_redir_flags",
"sock_skb_opts",
+ "tls_sock_map",
};
int prog_attach_type[] = {
BPF_SK_SKB_STREAM_PARSER,
BPF_SK_SKB_STREAM_VERDICT,
+ BPF_SK_SKB_STREAM_VERDICT,
BPF_CGROUP_SOCK_OPS,
BPF_SK_MSG_VERDICT,
BPF_SK_MSG_VERDICT,
@@ -1607,6 +1679,7 @@ int prog_attach_type[] = {
int prog_type[] = {
BPF_PROG_TYPE_SK_SKB,
BPF_PROG_TYPE_SK_SKB,
+ BPF_PROG_TYPE_SK_SKB,
BPF_PROG_TYPE_SOCK_OPS,
BPF_PROG_TYPE_SK_MSG,
BPF_PROG_TYPE_SK_MSG,
@@ -1662,73 +1735,117 @@ static int populate_progs(char *bpf_file)
return 0;
}
-static int __test_suite(int cg_fd, char *bpf_file)
+struct _test test[] = {
+ {"txmsg test passthrough", test_txmsg_pass},
+ {"txmsg test redirect", test_txmsg_redir},
+ {"txmsg test drop", test_txmsg_drop},
+ {"txmsg test ingress redirect", test_txmsg_ingress_redir},
+ {"txmsg test skb", test_txmsg_skb},
+ {"txmsg test apply", test_txmsg_apply},
+ {"txmsg test cork", test_txmsg_cork},
+ {"txmsg test hanging corks", test_txmsg_cork_hangs},
+ {"txmsg test push_data", test_txmsg_push},
+ {"txmsg test pull-data", test_txmsg_pull},
+ {"txmsg test pop-data", test_txmsg_pop},
+ {"txmsg test push/pop data", test_txmsg_push_pop},
+};
+
+static int check_whitelist(struct _test *t, struct sockmap_options *opt)
{
- int err, cleanup = cg_fd;
+ char *entry, *ptr;
+
+ if (!opt->whitelist)
+ return 0;
+ ptr = strdup(opt->whitelist);
+ if (!ptr)
+ return -ENOMEM;
+ entry = strtok(ptr, ",");
+ while (entry) {
+ if ((opt->prepend && strstr(opt->prepend, entry) != 0) ||
+ strstr(opt->map, entry) != 0 ||
+ strstr(t->title, entry) != 0)
+ return 0;
+ entry = strtok(NULL, ",");
+ }
+ return -EINVAL;
+}
- err = populate_progs(bpf_file);
+static int check_blacklist(struct _test *t, struct sockmap_options *opt)
+{
+ char *entry, *ptr;
+
+ if (!opt->blacklist)
+ return -EINVAL;
+ ptr = strdup(opt->blacklist);
+ if (!ptr)
+ return -ENOMEM;
+ entry = strtok(ptr, ",");
+ while (entry) {
+ if ((opt->prepend && strstr(opt->prepend, entry) != 0) ||
+ strstr(opt->map, entry) != 0 ||
+ strstr(t->title, entry) != 0)
+ return 0;
+ entry = strtok(NULL, ",");
+ }
+ return -EINVAL;
+}
+
+static int __test_selftests(int cg_fd, struct sockmap_options *opt)
+{
+ int i, err;
+
+ err = populate_progs(opt->map);
if (err < 0) {
fprintf(stderr, "ERROR: (%i) load bpf failed\n", err);
return err;
}
- if (cg_fd < 0) {
- if (setup_cgroup_environment()) {
- fprintf(stderr, "ERROR: cgroup env failed\n");
- return -EINVAL;
- }
+ /* Tests basic commands and APIs */
+ for (i = 0; i < sizeof(test)/sizeof(struct _test); i++) {
+ struct _test t = test[i];
- cg_fd = create_and_get_cgroup(CG_PATH);
- if (cg_fd < 0) {
- fprintf(stderr,
- "ERROR: (%i) open cg path failed: %s\n",
- cg_fd, optarg);
- return cg_fd;
- }
+ if (check_whitelist(&t, opt) != 0)
+ continue;
+ if (check_blacklist(&t, opt) == 0)
+ continue;
- if (join_cgroup(CG_PATH)) {
- fprintf(stderr, "ERROR: failed to join cgroup\n");
- return -EINVAL;
- }
+ test_start_subtest(&t, opt);
+ t.tester(cg_fd, opt);
+ test_end_subtest();
}
- /* Tests basic commands and APIs with range of iov values */
- txmsg_start = txmsg_end = txmsg_start_push = txmsg_end_push = 0;
- err = test_txmsg(cg_fd);
- if (err)
- goto out;
+ return err;
+}
- /* Tests interesting combinations of APIs used together */
- err = test_mixed(cg_fd);
- if (err)
- goto out;
+static void test_selftests_sockmap(int cg_fd, struct sockmap_options *opt)
+{
+ opt->map = BPF_SOCKMAP_FILENAME;
+ __test_selftests(cg_fd, opt);
+}
- /* Tests pull_data API using start/end API */
- err = test_start_end(cg_fd);
- if (err)
- goto out;
+static void test_selftests_sockhash(int cg_fd, struct sockmap_options *opt)
+{
+ opt->map = BPF_SOCKHASH_FILENAME;
+ __test_selftests(cg_fd, opt);
+}
-out:
- printf("Summary: %i PASSED %i FAILED\n", passed, failed);
- if (cleanup < 0) {
- cleanup_cgroup_environment();
- close(cg_fd);
- }
- return err;
+static void test_selftests_ktls(int cg_fd, struct sockmap_options *opt)
+{
+ opt->map = BPF_SOCKHASH_FILENAME;
+ opt->prepend = "ktls";
+ ktls = 1;
+ __test_selftests(cg_fd, opt);
+ ktls = 0;
}
-static int test_suite(int cg_fd)
+static int test_selftest(int cg_fd, struct sockmap_options *opt)
{
- int err;
- err = __test_suite(cg_fd, BPF_SOCKMAP_FILENAME);
- if (err)
- goto out;
- err = __test_suite(cg_fd, BPF_SOCKHASH_FILENAME);
-out:
- if (cg_fd > -1)
- close(cg_fd);
- return err;
+ test_selftests_sockmap(cg_fd, opt);
+ test_selftests_sockhash(cg_fd, opt);
+ test_selftests_ktls(cg_fd, opt);
+ test_print_results();
+ return 0;
}
int main(int argc, char **argv)
@@ -1737,12 +1854,10 @@ int main(int argc, char **argv)
struct sockmap_options options = {0};
int opt, longindex, err, cg_fd = 0;
char *bpf_file = BPF_SOCKMAP_FILENAME;
- int test = PING_PONG;
-
- if (argc < 2)
- return test_suite(-1);
+ int test = SELFTESTS;
+ bool cg_created = 0;
- while ((opt = getopt_long(argc, argv, ":dhvc:r:i:l:t:p:q:",
+ while ((opt = getopt_long(argc, argv, ":dhv:c:r:i:l:t:p:q:n:b:",
long_options, &longindex)) != -1) {
switch (opt) {
case 's':
@@ -1783,6 +1898,8 @@ int main(int argc, char **argv)
break;
case 'v':
options.verbose = 1;
+ if (optarg)
+ options.verbose = atoi(optarg);
break;
case 'i':
iov_count = atoi(optarg);
@@ -1809,6 +1926,15 @@ int main(int argc, char **argv)
return -1;
}
break;
+ case 'n':
+ options.whitelist = strdup(optarg);
+ if (!options.whitelist)
+ return -ENOMEM;
+ break;
+ case 'b':
+ options.blacklist = strdup(optarg);
+ if (!options.blacklist)
+ return -ENOMEM;
case 0:
break;
case 'h':
@@ -1818,13 +1944,30 @@ int main(int argc, char **argv)
}
}
- if (argc <= 3 && cg_fd)
- return test_suite(cg_fd);
-
if (!cg_fd) {
- fprintf(stderr, "%s requires cgroup option: --cgroup <path>\n",
- argv[0]);
- return -1;
+ if (setup_cgroup_environment()) {
+ fprintf(stderr, "ERROR: cgroup env failed\n");
+ return -EINVAL;
+ }
+
+ cg_fd = create_and_get_cgroup(CG_PATH);
+ if (cg_fd < 0) {
+ fprintf(stderr,
+ "ERROR: (%i) open cg path failed: %s\n",
+ cg_fd, strerror(errno));
+ return cg_fd;
+ }
+
+ if (join_cgroup(CG_PATH)) {
+ fprintf(stderr, "ERROR: failed to join cgroup\n");
+ return -EINVAL;
+ }
+ cg_created = 1;
+ }
+
+ if (test == SELFTESTS) {
+ err = test_selftest(cg_fd, &options);
+ goto out;
}
err = populate_progs(bpf_file);
@@ -1843,6 +1986,13 @@ int main(int argc, char **argv)
options.rate = rate;
err = run_options(&options, cg_fd, test);
+out:
+ if (options.whitelist)
+ free(options.whitelist);
+ if (options.blacklist)
+ free(options.blacklist);
+ if (cg_created)
+ cleanup_cgroup_environment();
close(cg_fd);
return err;
}
diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c
index 87eaa49609a0..78a6bae56ea6 100644
--- a/tools/testing/selftests/bpf/test_verifier.c
+++ b/tools/testing/selftests/bpf/test_verifier.c
@@ -50,7 +50,7 @@
#define MAX_INSNS BPF_MAXINSNS
#define MAX_TEST_INSNS 1000000
#define MAX_FIXUPS 8
-#define MAX_NR_MAPS 19
+#define MAX_NR_MAPS 20
#define MAX_TEST_RUNS 8
#define POINTER_VALUE 0xcafe4all
#define TEST_DATA_LEN 64
@@ -86,6 +86,7 @@ struct bpf_test {
int fixup_map_array_small[MAX_FIXUPS];
int fixup_sk_storage_map[MAX_FIXUPS];
int fixup_map_event_output[MAX_FIXUPS];
+ int fixup_map_reuseport_array[MAX_FIXUPS];
const char *errstr;
const char *errstr_unpriv;
uint32_t insn_processed;
@@ -637,6 +638,7 @@ static void do_test_fixup(struct bpf_test *test, enum bpf_prog_type prog_type,
int *fixup_map_array_small = test->fixup_map_array_small;
int *fixup_sk_storage_map = test->fixup_sk_storage_map;
int *fixup_map_event_output = test->fixup_map_event_output;
+ int *fixup_map_reuseport_array = test->fixup_map_reuseport_array;
if (test->fill_helper) {
test->fill_insns = calloc(MAX_TEST_INSNS, sizeof(struct bpf_insn));
@@ -806,12 +808,28 @@ static void do_test_fixup(struct bpf_test *test, enum bpf_prog_type prog_type,
fixup_map_event_output++;
} while (*fixup_map_event_output);
}
+ if (*fixup_map_reuseport_array) {
+ map_fds[19] = __create_map(BPF_MAP_TYPE_REUSEPORT_SOCKARRAY,
+ sizeof(u32), sizeof(u64), 1, 0);
+ do {
+ prog[*fixup_map_reuseport_array].imm = map_fds[19];
+ fixup_map_reuseport_array++;
+ } while (*fixup_map_reuseport_array);
+ }
}
+struct libcap {
+ struct __user_cap_header_struct hdr;
+ struct __user_cap_data_struct data[2];
+};
+
static int set_admin(bool admin)
{
cap_t caps;
- const cap_value_t cap_val = CAP_SYS_ADMIN;
+ /* need CAP_BPF, CAP_NET_ADMIN, CAP_PERFMON to load progs */
+ const cap_value_t cap_net_admin = CAP_NET_ADMIN;
+ const cap_value_t cap_sys_admin = CAP_SYS_ADMIN;
+ struct libcap *cap;
int ret = -1;
caps = cap_get_proc();
@@ -819,11 +837,26 @@ static int set_admin(bool admin)
perror("cap_get_proc");
return -1;
}
- if (cap_set_flag(caps, CAP_EFFECTIVE, 1, &cap_val,
+ cap = (struct libcap *)caps;
+ if (cap_set_flag(caps, CAP_EFFECTIVE, 1, &cap_sys_admin, CAP_CLEAR)) {
+ perror("cap_set_flag clear admin");
+ goto out;
+ }
+ if (cap_set_flag(caps, CAP_EFFECTIVE, 1, &cap_net_admin,
admin ? CAP_SET : CAP_CLEAR)) {
- perror("cap_set_flag");
+ perror("cap_set_flag set_or_clear net");
goto out;
}
+ /* libcap is likely old and simply ignores CAP_BPF and CAP_PERFMON,
+ * so update effective bits manually
+ */
+ if (admin) {
+ cap->data[1].effective |= 1 << (38 /* CAP_PERFMON */ - 32);
+ cap->data[1].effective |= 1 << (39 /* CAP_BPF */ - 32);
+ } else {
+ cap->data[1].effective &= ~(1 << (38 - 32));
+ cap->data[1].effective &= ~(1 << (39 - 32));
+ }
if (cap_set_proc(caps)) {
perror("cap_set_proc");
goto out;
@@ -943,7 +976,12 @@ static void do_test_single(struct bpf_test *test, bool unpriv,
attr.insns = prog;
attr.insns_cnt = prog_len;
attr.license = "GPL";
- attr.log_level = verbose || expected_ret == VERBOSE_ACCEPT ? 1 : 4;
+ if (verbose)
+ attr.log_level = 1;
+ else if (expected_ret == VERBOSE_ACCEPT)
+ attr.log_level = 2;
+ else
+ attr.log_level = 4;
attr.prog_flags = pflags;
fd_prog = bpf_load_program_xattr(&attr, bpf_vlog, sizeof(bpf_vlog));
@@ -1052,9 +1090,11 @@ fail_log:
static bool is_admin(void)
{
+ cap_flag_value_t net_priv = CAP_CLEAR;
+ bool perfmon_priv = false;
+ bool bpf_priv = false;
+ struct libcap *cap;
cap_t caps;
- cap_flag_value_t sysadmin = CAP_CLEAR;
- const cap_value_t cap_val = CAP_SYS_ADMIN;
#ifdef CAP_IS_SUPPORTED
if (!CAP_IS_SUPPORTED(CAP_SETFCAP)) {
@@ -1067,11 +1107,14 @@ static bool is_admin(void)
perror("cap_get_proc");
return false;
}
- if (cap_get_flag(caps, cap_val, CAP_EFFECTIVE, &sysadmin))
- perror("cap_get_flag");
+ cap = (struct libcap *)caps;
+ bpf_priv = cap->data[1].effective & (1 << (39/* CAP_BPF */ - 32));
+ perfmon_priv = cap->data[1].effective & (1 << (38/* CAP_PERFMON */ - 32));
+ if (cap_get_flag(caps, CAP_NET_ADMIN, CAP_EFFECTIVE, &net_priv))
+ perror("cap_get_flag NET");
if (cap_free(caps))
perror("cap_free");
- return (sysadmin == CAP_SET);
+ return bpf_priv && perfmon_priv && net_priv == CAP_SET;
}
static void get_unpriv_disabled()
diff --git a/tools/testing/selftests/bpf/testing_helpers.c b/tools/testing/selftests/bpf/testing_helpers.c
new file mode 100644
index 000000000000..0af6337a8962
--- /dev/null
+++ b/tools/testing/selftests/bpf/testing_helpers.c
@@ -0,0 +1,66 @@
+// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
+/* Copyright (C) 2020 Facebook, Inc. */
+#include <stdlib.h>
+#include <errno.h>
+#include "testing_helpers.h"
+
+int parse_num_list(const char *s, bool **num_set, int *num_set_len)
+{
+ int i, set_len = 0, new_len, num, start = 0, end = -1;
+ bool *set = NULL, *tmp, parsing_end = false;
+ char *next;
+
+ while (s[0]) {
+ errno = 0;
+ num = strtol(s, &next, 10);
+ if (errno)
+ return -errno;
+
+ if (parsing_end)
+ end = num;
+ else
+ start = num;
+
+ if (!parsing_end && *next == '-') {
+ s = next + 1;
+ parsing_end = true;
+ continue;
+ } else if (*next == ',') {
+ parsing_end = false;
+ s = next + 1;
+ end = num;
+ } else if (*next == '\0') {
+ parsing_end = false;
+ s = next;
+ end = num;
+ } else {
+ return -EINVAL;
+ }
+
+ if (start > end)
+ return -EINVAL;
+
+ if (end + 1 > set_len) {
+ new_len = end + 1;
+ tmp = realloc(set, new_len);
+ if (!tmp) {
+ free(set);
+ return -ENOMEM;
+ }
+ for (i = set_len; i < start; i++)
+ tmp[i] = false;
+ set = tmp;
+ set_len = new_len;
+ }
+ for (i = start; i <= end; i++)
+ set[i] = true;
+ }
+
+ if (!set)
+ return -EINVAL;
+
+ *num_set = set;
+ *num_set_len = set_len;
+
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/testing_helpers.h b/tools/testing/selftests/bpf/testing_helpers.h
new file mode 100644
index 000000000000..923b51762759
--- /dev/null
+++ b/tools/testing/selftests/bpf/testing_helpers.h
@@ -0,0 +1,5 @@
+/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */
+/* Copyright (C) 2020 Facebook, Inc. */
+#include <stdbool.h>
+
+int parse_num_list(const char *s, bool **set, int *set_len);
diff --git a/tools/testing/selftests/bpf/trace_helpers.c b/tools/testing/selftests/bpf/trace_helpers.c
index 7f989b3e4e22..4d0e913bbb22 100644
--- a/tools/testing/selftests/bpf/trace_helpers.c
+++ b/tools/testing/selftests/bpf/trace_helpers.c
@@ -4,12 +4,15 @@
#include <string.h>
#include <assert.h>
#include <errno.h>
+#include <fcntl.h>
#include <poll.h>
#include <unistd.h>
#include <linux/perf_event.h>
#include <sys/mman.h>
#include "trace_helpers.h"
+#define DEBUGFS "/sys/kernel/debug/tracing/"
+
#define MAX_SYMS 300000
static struct ksym syms[MAX_SYMS];
static int sym_cnt;
@@ -86,3 +89,23 @@ long ksym_get_addr(const char *name)
return 0;
}
+
+void read_trace_pipe(void)
+{
+ int trace_fd;
+
+ trace_fd = open(DEBUGFS "trace_pipe", O_RDONLY, 0);
+ if (trace_fd < 0)
+ return;
+
+ while (1) {
+ static char buf[4096];
+ ssize_t sz;
+
+ sz = read(trace_fd, buf, sizeof(buf) - 1);
+ if (sz > 0) {
+ buf[sz] = 0;
+ puts(buf);
+ }
+ }
+}
diff --git a/tools/testing/selftests/bpf/trace_helpers.h b/tools/testing/selftests/bpf/trace_helpers.h
index 0383c9b8adc1..25ef597dd03f 100644
--- a/tools/testing/selftests/bpf/trace_helpers.h
+++ b/tools/testing/selftests/bpf/trace_helpers.h
@@ -12,5 +12,6 @@ struct ksym {
int load_kallsyms(void);
struct ksym *ksym_search(long key);
long ksym_get_addr(const char *name);
+void read_trace_pipe(void);
#endif
diff --git a/tools/testing/selftests/bpf/verifier/.gitignore b/tools/testing/selftests/bpf/verifier/.gitignore
index 45984a364647..89c4a3d37544 100644
--- a/tools/testing/selftests/bpf/verifier/.gitignore
+++ b/tools/testing/selftests/bpf/verifier/.gitignore
@@ -1 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0-only
tests.h
diff --git a/tools/testing/selftests/bpf/verifier/and.c b/tools/testing/selftests/bpf/verifier/and.c
index e0fad1548737..d781bc86e100 100644
--- a/tools/testing/selftests/bpf/verifier/and.c
+++ b/tools/testing/selftests/bpf/verifier/and.c
@@ -15,7 +15,7 @@
BPF_EXIT_INSN(),
},
.fixup_map_hash_48b = { 3 },
- .errstr = "R0 max value is outside of the array range",
+ .errstr = "R0 max value is outside of the allowed memory range",
.result = REJECT,
.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
},
@@ -44,7 +44,7 @@
BPF_EXIT_INSN(),
},
.fixup_map_hash_48b = { 3 },
- .errstr = "R0 max value is outside of the array range",
+ .errstr = "R0 max value is outside of the allowed memory range",
.result = REJECT,
.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
},
diff --git a/tools/testing/selftests/bpf/verifier/array_access.c b/tools/testing/selftests/bpf/verifier/array_access.c
index f3c33e128709..1c4b1939f5a8 100644
--- a/tools/testing/selftests/bpf/verifier/array_access.c
+++ b/tools/testing/selftests/bpf/verifier/array_access.c
@@ -117,7 +117,7 @@
BPF_EXIT_INSN(),
},
.fixup_map_hash_48b = { 3 },
- .errstr = "R0 min value is outside of the array range",
+ .errstr = "R0 min value is outside of the allowed memory range",
.result = REJECT,
.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
},
@@ -137,7 +137,7 @@
BPF_EXIT_INSN(),
},
.fixup_map_hash_48b = { 3 },
- .errstr = "R0 unbounded memory access, make sure to bounds check any array access into a map",
+ .errstr = "R0 unbounded memory access, make sure to bounds check any such access",
.result = REJECT,
.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
},
diff --git a/tools/testing/selftests/bpf/verifier/bounds.c b/tools/testing/selftests/bpf/verifier/bounds.c
index d55f476f2237..4d6645f2874c 100644
--- a/tools/testing/selftests/bpf/verifier/bounds.c
+++ b/tools/testing/selftests/bpf/verifier/bounds.c
@@ -20,7 +20,7 @@
BPF_EXIT_INSN(),
},
.fixup_map_hash_8b = { 3 },
- .errstr = "R0 max value is outside of the array range",
+ .errstr = "R0 max value is outside of the allowed memory range",
.result = REJECT,
},
{
@@ -146,7 +146,7 @@
BPF_EXIT_INSN(),
},
.fixup_map_hash_8b = { 3 },
- .errstr = "R0 min value is outside of the array range",
+ .errstr = "R0 min value is outside of the allowed memory range",
.result = REJECT
},
{
@@ -238,7 +238,7 @@
BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
BPF_LD_MAP_FD(BPF_REG_1, 0),
BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 9),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 8),
/* r1 = [0x00, 0xff] */
BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0),
BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 0xffffff80 >> 1),
@@ -253,22 +253,18 @@
* [0xffff'ffff'0000'0080, 0xffff'ffff'ffff'ffff]
*/
BPF_ALU64_IMM(BPF_SUB, BPF_REG_1, 0xffffff80 >> 1),
- /* r1 = 0 or
- * [0x00ff'ffff'ff00'0000, 0x00ff'ffff'ffff'ffff]
- */
- BPF_ALU64_IMM(BPF_RSH, BPF_REG_1, 8),
- /* no-op or OOB pointer computation */
+ /* error on OOB pointer computation */
BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
- /* potentially OOB access */
- BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0),
/* exit */
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
.fixup_map_hash_8b = { 3 },
/* not actually fully unbounded, but the bound is very high */
- .errstr = "R0 unbounded memory access",
- .result = REJECT
+ .errstr_unpriv = "R1 has unknown scalar with mixed signed bounds, pointer arithmetic with it prohibited for !root",
+ .result_unpriv = REJECT,
+ .errstr = "value -4294967168 makes map_value pointer be out of bounds",
+ .result = REJECT,
},
{
"bounds check after truncation of boundary-crossing range (2)",
@@ -278,7 +274,7 @@
BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
BPF_LD_MAP_FD(BPF_REG_1, 0),
BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 9),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 8),
/* r1 = [0x00, 0xff] */
BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0),
BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 0xffffff80 >> 1),
@@ -295,22 +291,18 @@
* [0xffff'ffff'0000'0080, 0xffff'ffff'ffff'ffff]
*/
BPF_ALU64_IMM(BPF_SUB, BPF_REG_1, 0xffffff80 >> 1),
- /* r1 = 0 or
- * [0x00ff'ffff'ff00'0000, 0x00ff'ffff'ffff'ffff]
- */
- BPF_ALU64_IMM(BPF_RSH, BPF_REG_1, 8),
- /* no-op or OOB pointer computation */
+ /* error on OOB pointer computation */
BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
- /* potentially OOB access */
- BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0),
/* exit */
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
.fixup_map_hash_8b = { 3 },
/* not actually fully unbounded, but the bound is very high */
- .errstr = "R0 unbounded memory access",
- .result = REJECT
+ .errstr_unpriv = "R1 has unknown scalar with mixed signed bounds, pointer arithmetic with it prohibited for !root",
+ .result_unpriv = REJECT,
+ .errstr = "value -4294967168 makes map_value pointer be out of bounds",
+ .result = REJECT,
},
{
"bounds check after wrapping 32-bit addition",
@@ -362,7 +354,7 @@
BPF_EXIT_INSN(),
},
.fixup_map_hash_8b = { 3 },
- .errstr = "R0 max value is outside of the array range",
+ .errstr = "R0 max value is outside of the allowed memory range",
.result = REJECT
},
{
@@ -411,16 +403,14 @@
BPF_ALU32_IMM(BPF_RSH, BPF_REG_1, 31),
/* r1 = 0xffff'fffe (NOT 0!) */
BPF_ALU32_IMM(BPF_SUB, BPF_REG_1, 2),
- /* computes OOB pointer */
+ /* error on computing OOB pointer */
BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
- /* OOB access */
- BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0),
/* exit */
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
.fixup_map_hash_8b = { 3 },
- .errstr = "R0 invalid mem access",
+ .errstr = "math between map_value pointer and 4294967294 is not allowed",
.result = REJECT,
},
{
@@ -506,3 +496,64 @@
.errstr = "map_value pointer and 1000000000000",
.result = REJECT
},
+{
+ "bounds check mixed 32bit and 64bit arithmetic. test1",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_MOV64_IMM(BPF_REG_1, -1),
+ BPF_ALU64_IMM(BPF_LSH, BPF_REG_1, 32),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 1),
+ /* r1 = 0xffffFFFF00000001 */
+ BPF_JMP32_IMM(BPF_JGT, BPF_REG_1, 1, 3),
+ /* check ALU64 op keeps 32bit bounds */
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 1),
+ BPF_JMP32_IMM(BPF_JGT, BPF_REG_1, 2, 1),
+ BPF_JMP_A(1),
+ /* invalid ldx if bounds are lost above */
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, -1),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT
+},
+{
+ "bounds check mixed 32bit and 64bit arithmetic. test2",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_MOV64_IMM(BPF_REG_1, -1),
+ BPF_ALU64_IMM(BPF_LSH, BPF_REG_1, 32),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 1),
+ /* r1 = 0xffffFFFF00000001 */
+ BPF_MOV64_IMM(BPF_REG_2, 3),
+ /* r1 = 0x2 */
+ BPF_ALU32_IMM(BPF_ADD, BPF_REG_1, 1),
+ /* check ALU32 op zero extends 64bit bounds */
+ BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_2, 1),
+ BPF_JMP_A(1),
+ /* invalid ldx if bounds are lost above */
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, -1),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT
+},
+{
+ "assigning 32bit bounds to 64bit for wA = 0, wB = wA",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_8, BPF_REG_1,
+ offsetof(struct __sk_buff, data_end)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_1,
+ offsetof(struct __sk_buff, data)),
+ BPF_MOV32_IMM(BPF_REG_9, 0),
+ BPF_MOV32_REG(BPF_REG_2, BPF_REG_9),
+ BPF_MOV64_REG(BPF_REG_6, BPF_REG_7),
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_6, BPF_REG_2),
+ BPF_MOV64_REG(BPF_REG_3, BPF_REG_6),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_3, 8),
+ BPF_JMP_REG(BPF_JGT, BPF_REG_3, BPF_REG_8, 1),
+ BPF_LDX_MEM(BPF_W, BPF_REG_5, BPF_REG_6, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .result = ACCEPT,
+ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+},
diff --git a/tools/testing/selftests/bpf/verifier/bpf_get_stack.c b/tools/testing/selftests/bpf/verifier/bpf_get_stack.c
index f24d50f09dbe..69b048cf46d9 100644
--- a/tools/testing/selftests/bpf/verifier/bpf_get_stack.c
+++ b/tools/testing/selftests/bpf/verifier/bpf_get_stack.c
@@ -9,17 +9,17 @@
BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 28),
BPF_MOV64_REG(BPF_REG_7, BPF_REG_0),
- BPF_MOV64_IMM(BPF_REG_9, sizeof(struct test_val)),
+ BPF_MOV64_IMM(BPF_REG_9, sizeof(struct test_val)/2),
BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
BPF_MOV64_REG(BPF_REG_2, BPF_REG_7),
- BPF_MOV64_IMM(BPF_REG_3, sizeof(struct test_val)),
+ BPF_MOV64_IMM(BPF_REG_3, sizeof(struct test_val)/2),
BPF_MOV64_IMM(BPF_REG_4, 256),
BPF_EMIT_CALL(BPF_FUNC_get_stack),
BPF_MOV64_IMM(BPF_REG_1, 0),
BPF_MOV64_REG(BPF_REG_8, BPF_REG_0),
BPF_ALU64_IMM(BPF_LSH, BPF_REG_8, 32),
BPF_ALU64_IMM(BPF_ARSH, BPF_REG_8, 32),
- BPF_JMP_REG(BPF_JSLT, BPF_REG_1, BPF_REG_8, 16),
+ BPF_JMP_REG(BPF_JSGT, BPF_REG_1, BPF_REG_8, 16),
BPF_ALU64_REG(BPF_SUB, BPF_REG_9, BPF_REG_8),
BPF_MOV64_REG(BPF_REG_2, BPF_REG_7),
BPF_ALU64_REG(BPF_ADD, BPF_REG_2, BPF_REG_8),
@@ -29,7 +29,7 @@
BPF_MOV64_REG(BPF_REG_3, BPF_REG_2),
BPF_ALU64_REG(BPF_ADD, BPF_REG_3, BPF_REG_1),
BPF_MOV64_REG(BPF_REG_1, BPF_REG_7),
- BPF_MOV64_IMM(BPF_REG_5, sizeof(struct test_val)),
+ BPF_MOV64_IMM(BPF_REG_5, sizeof(struct test_val)/2),
BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_5),
BPF_JMP_REG(BPF_JGE, BPF_REG_3, BPF_REG_1, 4),
BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
diff --git a/tools/testing/selftests/bpf/verifier/calls.c b/tools/testing/selftests/bpf/verifier/calls.c
index 2d752c4f8d9d..94258c6b5235 100644
--- a/tools/testing/selftests/bpf/verifier/calls.c
+++ b/tools/testing/selftests/bpf/verifier/calls.c
@@ -19,7 +19,7 @@
BPF_MOV64_IMM(BPF_REG_0, 2),
BPF_EXIT_INSN(),
},
- .errstr_unpriv = "function calls to other bpf functions are allowed for root only",
+ .errstr_unpriv = "function calls to other bpf functions are allowed for",
.result_unpriv = REJECT,
.result = ACCEPT,
.retval = 1,
@@ -105,7 +105,7 @@
.prog_type = BPF_PROG_TYPE_SCHED_CLS,
.fixup_map_hash_8b = { 16 },
.result = REJECT,
- .errstr = "R0 min value is outside of the array range",
+ .errstr = "R0 min value is outside of the allowed memory range",
},
{
"calls: overlapping caller/callee",
@@ -315,7 +315,7 @@
BPF_MOV64_REG(BPF_REG_0, BPF_REG_1),
BPF_EXIT_INSN(),
},
- .errstr_unpriv = "allowed for root only",
+ .errstr_unpriv = "allowed for",
.result_unpriv = REJECT,
.result = ACCEPT,
.retval = POINTER_VALUE,
@@ -346,7 +346,7 @@
BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_2),
BPF_EXIT_INSN(),
},
- .errstr_unpriv = "allowed for root only",
+ .errstr_unpriv = "allowed for",
.result_unpriv = REJECT,
.result = ACCEPT,
.retval = TEST_DATA_LEN + TEST_DATA_LEN - ETH_HLEN - ETH_HLEN,
@@ -397,7 +397,7 @@
BPF_MOV64_IMM(BPF_REG_0, 1),
BPF_EXIT_INSN(),
},
- .errstr_unpriv = "function calls to other bpf functions are allowed for root only",
+ .errstr_unpriv = "function calls to other bpf functions are allowed for",
.fixup_map_hash_48b = { 3 },
.result_unpriv = REJECT,
.result = ACCEPT,
@@ -1064,7 +1064,7 @@
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
- .errstr_unpriv = "allowed for root only",
+ .errstr_unpriv = "allowed for",
.result_unpriv = REJECT,
.errstr = "R0 !read_ok",
.result = REJECT,
@@ -1977,7 +1977,7 @@
BPF_EXIT_INSN(),
},
.prog_type = BPF_PROG_TYPE_SOCKET_FILTER,
- .errstr_unpriv = "function calls to other bpf functions are allowed for root only",
+ .errstr_unpriv = "function calls to other bpf functions are allowed for",
.result_unpriv = REJECT,
.result = ACCEPT,
},
@@ -2003,7 +2003,7 @@
BPF_EXIT_INSN(),
},
.prog_type = BPF_PROG_TYPE_SOCKET_FILTER,
- .errstr_unpriv = "function calls to other bpf functions are allowed for root only",
+ .errstr_unpriv = "function calls to other bpf functions are allowed for",
.errstr = "!read_ok",
.result = REJECT,
},
@@ -2028,7 +2028,7 @@
BPF_EXIT_INSN(),
},
.prog_type = BPF_PROG_TYPE_SOCKET_FILTER,
- .errstr_unpriv = "function calls to other bpf functions are allowed for root only",
+ .errstr_unpriv = "function calls to other bpf functions are allowed for",
.errstr = "!read_ok",
.result = REJECT,
},
diff --git a/tools/testing/selftests/bpf/verifier/const_or.c b/tools/testing/selftests/bpf/verifier/const_or.c
index 84446dfc7c1d..6c214c58e8d4 100644
--- a/tools/testing/selftests/bpf/verifier/const_or.c
+++ b/tools/testing/selftests/bpf/verifier/const_or.c
@@ -6,7 +6,7 @@
BPF_MOV64_IMM(BPF_REG_2, 34),
BPF_ALU64_IMM(BPF_OR, BPF_REG_2, 13),
BPF_MOV64_IMM(BPF_REG_3, 0),
- BPF_EMIT_CALL(BPF_FUNC_probe_read),
+ BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel),
BPF_EXIT_INSN(),
},
.result = ACCEPT,
@@ -20,7 +20,7 @@
BPF_MOV64_IMM(BPF_REG_2, 34),
BPF_ALU64_IMM(BPF_OR, BPF_REG_2, 24),
BPF_MOV64_IMM(BPF_REG_3, 0),
- BPF_EMIT_CALL(BPF_FUNC_probe_read),
+ BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel),
BPF_EXIT_INSN(),
},
.errstr = "invalid stack type R1 off=-48 access_size=58",
@@ -36,7 +36,7 @@
BPF_MOV64_IMM(BPF_REG_4, 13),
BPF_ALU64_REG(BPF_OR, BPF_REG_2, BPF_REG_4),
BPF_MOV64_IMM(BPF_REG_3, 0),
- BPF_EMIT_CALL(BPF_FUNC_probe_read),
+ BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel),
BPF_EXIT_INSN(),
},
.result = ACCEPT,
@@ -51,7 +51,7 @@
BPF_MOV64_IMM(BPF_REG_4, 24),
BPF_ALU64_REG(BPF_OR, BPF_REG_2, BPF_REG_4),
BPF_MOV64_IMM(BPF_REG_3, 0),
- BPF_EMIT_CALL(BPF_FUNC_probe_read),
+ BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel),
BPF_EXIT_INSN(),
},
.errstr = "invalid stack type R1 off=-48 access_size=58",
diff --git a/tools/testing/selftests/bpf/verifier/ctx.c b/tools/testing/selftests/bpf/verifier/ctx.c
index 92762c08f5e3..93d6b1641481 100644
--- a/tools/testing/selftests/bpf/verifier/ctx.c
+++ b/tools/testing/selftests/bpf/verifier/ctx.c
@@ -91,3 +91,108 @@
.result = REJECT,
.errstr = "variable ctx access var_off=(0x0; 0x4)",
},
+{
+ "pass ctx or null check, 1: ctx",
+ .insns = {
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_get_netns_cookie),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_CGROUP_SOCK_ADDR,
+ .expected_attach_type = BPF_CGROUP_UDP6_SENDMSG,
+ .result = ACCEPT,
+},
+{
+ "pass ctx or null check, 2: null",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_get_netns_cookie),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_CGROUP_SOCK_ADDR,
+ .expected_attach_type = BPF_CGROUP_UDP6_SENDMSG,
+ .result = ACCEPT,
+},
+{
+ "pass ctx or null check, 3: 1",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_1, 1),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_get_netns_cookie),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_CGROUP_SOCK_ADDR,
+ .expected_attach_type = BPF_CGROUP_UDP6_SENDMSG,
+ .result = REJECT,
+ .errstr = "R1 type=inv expected=ctx",
+},
+{
+ "pass ctx or null check, 4: ctx - const",
+ .insns = {
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -612),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_get_netns_cookie),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_CGROUP_SOCK_ADDR,
+ .expected_attach_type = BPF_CGROUP_UDP6_SENDMSG,
+ .result = REJECT,
+ .errstr = "dereference of modified ctx ptr",
+},
+{
+ "pass ctx or null check, 5: null (connect)",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_get_netns_cookie),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_CGROUP_SOCK_ADDR,
+ .expected_attach_type = BPF_CGROUP_INET4_CONNECT,
+ .result = ACCEPT,
+},
+{
+ "pass ctx or null check, 6: null (bind)",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_get_netns_cookie),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_CGROUP_SOCK,
+ .expected_attach_type = BPF_CGROUP_INET4_POST_BIND,
+ .result = ACCEPT,
+},
+{
+ "pass ctx or null check, 7: ctx (bind)",
+ .insns = {
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_get_socket_cookie),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_CGROUP_SOCK,
+ .expected_attach_type = BPF_CGROUP_INET4_POST_BIND,
+ .result = ACCEPT,
+},
+{
+ "pass ctx or null check, 8: null (bind)",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_get_socket_cookie),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_CGROUP_SOCK,
+ .expected_attach_type = BPF_CGROUP_INET4_POST_BIND,
+ .result = REJECT,
+ .errstr = "R1 type=inv expected=ctx",
+},
diff --git a/tools/testing/selftests/bpf/verifier/ctx_skb.c b/tools/testing/selftests/bpf/verifier/ctx_skb.c
index d438193804b2..2e16b8e268f2 100644
--- a/tools/testing/selftests/bpf/verifier/ctx_skb.c
+++ b/tools/testing/selftests/bpf/verifier/ctx_skb.c
@@ -1011,6 +1011,53 @@
.prog_type = BPF_PROG_TYPE_SCHED_CLS,
},
{
+ "read gso_size from CGROUP_SKB",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+ offsetof(struct __sk_buff, gso_size)),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_CGROUP_SKB,
+},
+{
+ "read gso_size from CGROUP_SKB",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_1,
+ offsetof(struct __sk_buff, gso_size)),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_CGROUP_SKB,
+},
+{
+ "write gso_size from CGROUP_SKB",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_0,
+ offsetof(struct __sk_buff, gso_size)),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = REJECT,
+ .result_unpriv = REJECT,
+ .errstr = "invalid bpf_context access off=176 size=4",
+ .prog_type = BPF_PROG_TYPE_CGROUP_SKB,
+},
+{
+ "read gso_size from CLS",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+ offsetof(struct __sk_buff, gso_size)),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+},
+{
"check wire_len is not readable by sockets",
.insns = {
BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
diff --git a/tools/testing/selftests/bpf/verifier/dead_code.c b/tools/testing/selftests/bpf/verifier/dead_code.c
index 50a8a63be4ac..5cf361d8eb1c 100644
--- a/tools/testing/selftests/bpf/verifier/dead_code.c
+++ b/tools/testing/selftests/bpf/verifier/dead_code.c
@@ -85,7 +85,7 @@
BPF_MOV64_IMM(BPF_REG_0, 12),
BPF_EXIT_INSN(),
},
- .errstr_unpriv = "function calls to other bpf functions are allowed for root only",
+ .errstr_unpriv = "function calls to other bpf functions are allowed for",
.result_unpriv = REJECT,
.result = ACCEPT,
.retval = 7,
@@ -103,7 +103,7 @@
BPF_MOV64_IMM(BPF_REG_0, 12),
BPF_EXIT_INSN(),
},
- .errstr_unpriv = "function calls to other bpf functions are allowed for root only",
+ .errstr_unpriv = "function calls to other bpf functions are allowed for",
.result_unpriv = REJECT,
.result = ACCEPT,
.retval = 7,
@@ -121,7 +121,7 @@
BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, -5),
BPF_EXIT_INSN(),
},
- .errstr_unpriv = "function calls to other bpf functions are allowed for root only",
+ .errstr_unpriv = "function calls to other bpf functions are allowed for",
.result_unpriv = REJECT,
.result = ACCEPT,
.retval = 7,
@@ -137,7 +137,7 @@
BPF_MOV64_REG(BPF_REG_0, BPF_REG_1),
BPF_EXIT_INSN(),
},
- .errstr_unpriv = "function calls to other bpf functions are allowed for root only",
+ .errstr_unpriv = "function calls to other bpf functions are allowed for",
.result_unpriv = REJECT,
.result = ACCEPT,
.retval = 2,
@@ -152,7 +152,7 @@
BPF_MOV64_REG(BPF_REG_0, BPF_REG_1),
BPF_EXIT_INSN(),
},
- .errstr_unpriv = "function calls to other bpf functions are allowed for root only",
+ .errstr_unpriv = "function calls to other bpf functions are allowed for",
.result_unpriv = REJECT,
.result = ACCEPT,
.retval = 2,
diff --git a/tools/testing/selftests/bpf/verifier/direct_value_access.c b/tools/testing/selftests/bpf/verifier/direct_value_access.c
index b9fb28e8e224..988f46a1a4c7 100644
--- a/tools/testing/selftests/bpf/verifier/direct_value_access.c
+++ b/tools/testing/selftests/bpf/verifier/direct_value_access.c
@@ -68,7 +68,7 @@
},
.fixup_map_array_48b = { 1 },
.result = REJECT,
- .errstr = "R1 min value is outside of the array range",
+ .errstr = "R1 min value is outside of the allowed memory range",
},
{
"direct map access, write test 7",
@@ -220,7 +220,7 @@
},
.fixup_map_array_small = { 1 },
.result = REJECT,
- .errstr = "R1 min value is outside of the array range",
+ .errstr = "R1 min value is outside of the allowed memory range",
},
{
"direct map access, write test 19",
diff --git a/tools/testing/selftests/bpf/verifier/event_output.c b/tools/testing/selftests/bpf/verifier/event_output.c
index 130553e19eca..99f8f582c02b 100644
--- a/tools/testing/selftests/bpf/verifier/event_output.c
+++ b/tools/testing/selftests/bpf/verifier/event_output.c
@@ -92,3 +92,27 @@
.result = ACCEPT,
.retval = 1,
},
+{
+ "perfevent for cgroup dev",
+ .insns = { __PERF_EVENT_INSNS__ },
+ .prog_type = BPF_PROG_TYPE_CGROUP_DEVICE,
+ .fixup_map_event_output = { 4 },
+ .result = ACCEPT,
+ .retval = 1,
+},
+{
+ "perfevent for cgroup sysctl",
+ .insns = { __PERF_EVENT_INSNS__ },
+ .prog_type = BPF_PROG_TYPE_CGROUP_SYSCTL,
+ .fixup_map_event_output = { 4 },
+ .result = ACCEPT,
+ .retval = 1,
+},
+{
+ "perfevent for cgroup sockopt",
+ .insns = { __PERF_EVENT_INSNS__ },
+ .prog_type = BPF_PROG_TYPE_CGROUP_SOCKOPT,
+ .fixup_map_event_output = { 4 },
+ .result = ACCEPT,
+ .retval = 1,
+},
diff --git a/tools/testing/selftests/bpf/verifier/helper_access_var_len.c b/tools/testing/selftests/bpf/verifier/helper_access_var_len.c
index 67ab12410050..87c4e7900083 100644
--- a/tools/testing/selftests/bpf/verifier/helper_access_var_len.c
+++ b/tools/testing/selftests/bpf/verifier/helper_access_var_len.c
@@ -19,7 +19,7 @@
BPF_MOV64_IMM(BPF_REG_4, 0),
BPF_JMP_REG(BPF_JGE, BPF_REG_4, BPF_REG_2, 2),
BPF_MOV64_IMM(BPF_REG_3, 0),
- BPF_EMIT_CALL(BPF_FUNC_probe_read),
+ BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel),
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
@@ -36,7 +36,7 @@
BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1, -128),
BPF_ALU64_IMM(BPF_AND, BPF_REG_2, 64),
BPF_MOV64_IMM(BPF_REG_3, 0),
- BPF_EMIT_CALL(BPF_FUNC_probe_read),
+ BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel),
BPF_EXIT_INSN(),
},
.errstr = "invalid indirect read from stack off -64+0 size 64",
@@ -55,7 +55,7 @@
BPF_MOV64_IMM(BPF_REG_4, 0),
BPF_JMP_REG(BPF_JGE, BPF_REG_4, BPF_REG_2, 2),
BPF_MOV64_IMM(BPF_REG_3, 0),
- BPF_EMIT_CALL(BPF_FUNC_probe_read),
+ BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel),
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
@@ -84,7 +84,7 @@
BPF_MOV64_IMM(BPF_REG_4, 0),
BPF_JMP_REG(BPF_JGE, BPF_REG_4, BPF_REG_2, 2),
BPF_MOV64_IMM(BPF_REG_3, 0),
- BPF_EMIT_CALL(BPF_FUNC_probe_read),
+ BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel),
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
@@ -112,7 +112,7 @@
BPF_MOV64_IMM(BPF_REG_4, 0),
BPF_JMP_REG(BPF_JSGE, BPF_REG_4, BPF_REG_2, 2),
BPF_MOV64_IMM(BPF_REG_3, 0),
- BPF_EMIT_CALL(BPF_FUNC_probe_read),
+ BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel),
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
@@ -132,7 +132,7 @@
BPF_JMP_REG(BPF_JGE, BPF_REG_4, BPF_REG_2, 3),
BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, 1),
BPF_MOV64_IMM(BPF_REG_3, 0),
- BPF_EMIT_CALL(BPF_FUNC_probe_read),
+ BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel),
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
@@ -152,7 +152,7 @@
BPF_MOV64_IMM(BPF_REG_4, 0),
BPF_JMP_REG(BPF_JGE, BPF_REG_4, BPF_REG_2, 2),
BPF_MOV64_IMM(BPF_REG_3, 0),
- BPF_EMIT_CALL(BPF_FUNC_probe_read),
+ BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel),
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
@@ -171,7 +171,7 @@
BPF_MOV64_IMM(BPF_REG_4, 0),
BPF_JMP_REG(BPF_JGE, BPF_REG_4, BPF_REG_2, 2),
BPF_MOV64_IMM(BPF_REG_3, 0),
- BPF_EMIT_CALL(BPF_FUNC_probe_read),
+ BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel),
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
@@ -190,7 +190,7 @@
BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1, -128),
BPF_JMP_IMM(BPF_JGT, BPF_REG_2, 64, 3),
BPF_MOV64_IMM(BPF_REG_3, 0),
- BPF_EMIT_CALL(BPF_FUNC_probe_read),
+ BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel),
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
@@ -208,7 +208,7 @@
BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1, -128),
BPF_JMP_IMM(BPF_JSGT, BPF_REG_2, 64, 3),
BPF_MOV64_IMM(BPF_REG_3, 0),
- BPF_EMIT_CALL(BPF_FUNC_probe_read),
+ BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel),
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
@@ -233,7 +233,7 @@
BPF_MOV64_IMM(BPF_REG_4, 0),
BPF_JMP_REG(BPF_JSGE, BPF_REG_4, BPF_REG_2, 2),
BPF_MOV64_IMM(BPF_REG_3, 0),
- BPF_EMIT_CALL(BPF_FUNC_probe_read),
+ BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel),
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
@@ -259,7 +259,7 @@
BPF_MOV64_IMM(BPF_REG_4, 0),
BPF_JMP_REG(BPF_JSGE, BPF_REG_4, BPF_REG_2, 2),
BPF_MOV64_IMM(BPF_REG_3, 0),
- BPF_EMIT_CALL(BPF_FUNC_probe_read),
+ BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel),
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
@@ -286,7 +286,7 @@
BPF_MOV64_IMM(BPF_REG_4, 0),
BPF_JMP_REG(BPF_JSGE, BPF_REG_4, BPF_REG_2, 2),
BPF_MOV64_IMM(BPF_REG_3, 0),
- BPF_EMIT_CALL(BPF_FUNC_probe_read),
+ BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel),
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
@@ -313,12 +313,12 @@
BPF_MOV64_IMM(BPF_REG_4, 0),
BPF_JMP_REG(BPF_JSGE, BPF_REG_4, BPF_REG_2, 2),
BPF_MOV64_IMM(BPF_REG_3, 0),
- BPF_EMIT_CALL(BPF_FUNC_probe_read),
+ BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel),
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
.fixup_map_hash_48b = { 4 },
- .errstr = "R1 min value is outside of the array range",
+ .errstr = "R1 min value is outside of the allowed memory range",
.result = REJECT,
.prog_type = BPF_PROG_TYPE_TRACEPOINT,
},
@@ -468,7 +468,7 @@
BPF_MOV64_IMM(BPF_REG_1, 0),
BPF_MOV64_IMM(BPF_REG_2, 0),
BPF_MOV64_IMM(BPF_REG_3, 0),
- BPF_EMIT_CALL(BPF_FUNC_probe_read),
+ BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel),
BPF_EXIT_INSN(),
},
.errstr = "R1 type=inv expected=fp",
@@ -481,7 +481,7 @@
BPF_MOV64_IMM(BPF_REG_1, 0),
BPF_MOV64_IMM(BPF_REG_2, 1),
BPF_MOV64_IMM(BPF_REG_3, 0),
- BPF_EMIT_CALL(BPF_FUNC_probe_read),
+ BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel),
BPF_EXIT_INSN(),
},
.errstr = "R1 type=inv expected=fp",
@@ -495,7 +495,7 @@
BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8),
BPF_MOV64_IMM(BPF_REG_2, 0),
BPF_MOV64_IMM(BPF_REG_3, 0),
- BPF_EMIT_CALL(BPF_FUNC_probe_read),
+ BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel),
BPF_EXIT_INSN(),
},
.result = ACCEPT,
@@ -513,7 +513,7 @@
BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
BPF_MOV64_IMM(BPF_REG_2, 0),
BPF_MOV64_IMM(BPF_REG_3, 0),
- BPF_EMIT_CALL(BPF_FUNC_probe_read),
+ BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel),
BPF_EXIT_INSN(),
},
.fixup_map_hash_8b = { 3 },
@@ -534,7 +534,7 @@
BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8),
BPF_MOV64_IMM(BPF_REG_3, 0),
- BPF_EMIT_CALL(BPF_FUNC_probe_read),
+ BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel),
BPF_EXIT_INSN(),
},
.fixup_map_hash_8b = { 3 },
@@ -554,7 +554,7 @@
BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_0, 0),
BPF_JMP_IMM(BPF_JGT, BPF_REG_2, 8, 2),
BPF_MOV64_IMM(BPF_REG_3, 0),
- BPF_EMIT_CALL(BPF_FUNC_probe_read),
+ BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel),
BPF_EXIT_INSN(),
},
.fixup_map_hash_8b = { 3 },
@@ -580,7 +580,7 @@
BPF_ALU64_IMM(BPF_AND, BPF_REG_2, 63),
BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, 1),
BPF_MOV64_IMM(BPF_REG_3, 0),
- BPF_EMIT_CALL(BPF_FUNC_probe_read),
+ BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel),
BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_10, -16),
BPF_EXIT_INSN(),
},
@@ -607,7 +607,7 @@
BPF_ALU64_IMM(BPF_AND, BPF_REG_2, 32),
BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, 32),
BPF_MOV64_IMM(BPF_REG_3, 0),
- BPF_EMIT_CALL(BPF_FUNC_probe_read),
+ BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel),
BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_10, -16),
BPF_EXIT_INSN(),
},
diff --git a/tools/testing/selftests/bpf/verifier/helper_value_access.c b/tools/testing/selftests/bpf/verifier/helper_value_access.c
index 7572e403ddb9..1c7882ddfa63 100644
--- a/tools/testing/selftests/bpf/verifier/helper_value_access.c
+++ b/tools/testing/selftests/bpf/verifier/helper_value_access.c
@@ -10,7 +10,7 @@
BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
BPF_MOV64_IMM(BPF_REG_2, sizeof(struct test_val)),
BPF_MOV64_IMM(BPF_REG_3, 0),
- BPF_EMIT_CALL(BPF_FUNC_probe_read),
+ BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel),
BPF_EXIT_INSN(),
},
.fixup_map_hash_48b = { 3 },
@@ -29,7 +29,7 @@
BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
BPF_MOV64_IMM(BPF_REG_2, 8),
BPF_MOV64_IMM(BPF_REG_3, 0),
- BPF_EMIT_CALL(BPF_FUNC_probe_read),
+ BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel),
BPF_EXIT_INSN(),
},
.fixup_map_hash_48b = { 3 },
@@ -67,7 +67,7 @@
BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
BPF_MOV64_IMM(BPF_REG_2, sizeof(struct test_val) + 8),
BPF_MOV64_IMM(BPF_REG_3, 0),
- BPF_EMIT_CALL(BPF_FUNC_probe_read),
+ BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel),
BPF_EXIT_INSN(),
},
.fixup_map_hash_48b = { 3 },
@@ -87,7 +87,7 @@
BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
BPF_MOV64_IMM(BPF_REG_2, -8),
BPF_MOV64_IMM(BPF_REG_3, 0),
- BPF_EMIT_CALL(BPF_FUNC_probe_read),
+ BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel),
BPF_EXIT_INSN(),
},
.fixup_map_hash_48b = { 3 },
@@ -109,7 +109,7 @@
BPF_MOV64_IMM(BPF_REG_2,
sizeof(struct test_val) - offsetof(struct test_val, foo)),
BPF_MOV64_IMM(BPF_REG_3, 0),
- BPF_EMIT_CALL(BPF_FUNC_probe_read),
+ BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel),
BPF_EXIT_INSN(),
},
.fixup_map_hash_48b = { 3 },
@@ -129,7 +129,7 @@
BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, offsetof(struct test_val, foo)),
BPF_MOV64_IMM(BPF_REG_2, 8),
BPF_MOV64_IMM(BPF_REG_3, 0),
- BPF_EMIT_CALL(BPF_FUNC_probe_read),
+ BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel),
BPF_EXIT_INSN(),
},
.fixup_map_hash_48b = { 3 },
@@ -170,7 +170,7 @@
BPF_MOV64_IMM(BPF_REG_2,
sizeof(struct test_val) - offsetof(struct test_val, foo) + 8),
BPF_MOV64_IMM(BPF_REG_3, 0),
- BPF_EMIT_CALL(BPF_FUNC_probe_read),
+ BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel),
BPF_EXIT_INSN(),
},
.fixup_map_hash_48b = { 3 },
@@ -191,7 +191,7 @@
BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, offsetof(struct test_val, foo)),
BPF_MOV64_IMM(BPF_REG_2, -8),
BPF_MOV64_IMM(BPF_REG_3, 0),
- BPF_EMIT_CALL(BPF_FUNC_probe_read),
+ BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel),
BPF_EXIT_INSN(),
},
.fixup_map_hash_48b = { 3 },
@@ -212,7 +212,7 @@
BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, offsetof(struct test_val, foo)),
BPF_MOV64_IMM(BPF_REG_2, -1),
BPF_MOV64_IMM(BPF_REG_3, 0),
- BPF_EMIT_CALL(BPF_FUNC_probe_read),
+ BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel),
BPF_EXIT_INSN(),
},
.fixup_map_hash_48b = { 3 },
@@ -235,7 +235,7 @@
BPF_MOV64_IMM(BPF_REG_2,
sizeof(struct test_val) - offsetof(struct test_val, foo)),
BPF_MOV64_IMM(BPF_REG_3, 0),
- BPF_EMIT_CALL(BPF_FUNC_probe_read),
+ BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel),
BPF_EXIT_INSN(),
},
.fixup_map_hash_48b = { 3 },
@@ -256,7 +256,7 @@
BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_3),
BPF_MOV64_IMM(BPF_REG_2, 8),
BPF_MOV64_IMM(BPF_REG_3, 0),
- BPF_EMIT_CALL(BPF_FUNC_probe_read),
+ BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel),
BPF_EXIT_INSN(),
},
.fixup_map_hash_48b = { 3 },
@@ -280,7 +280,7 @@
BPF_EXIT_INSN(),
},
.fixup_map_hash_48b = { 3 },
- .errstr = "R1 min value is outside of the array range",
+ .errstr = "R1 min value is outside of the allowed memory range",
.result = REJECT,
.prog_type = BPF_PROG_TYPE_TRACEPOINT,
},
@@ -300,7 +300,7 @@
sizeof(struct test_val) -
offsetof(struct test_val, foo) + 8),
BPF_MOV64_IMM(BPF_REG_3, 0),
- BPF_EMIT_CALL(BPF_FUNC_probe_read),
+ BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel),
BPF_EXIT_INSN(),
},
.fixup_map_hash_48b = { 3 },
@@ -322,7 +322,7 @@
BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_3),
BPF_MOV64_IMM(BPF_REG_2, -8),
BPF_MOV64_IMM(BPF_REG_3, 0),
- BPF_EMIT_CALL(BPF_FUNC_probe_read),
+ BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel),
BPF_EXIT_INSN(),
},
.fixup_map_hash_48b = { 3 },
@@ -344,7 +344,7 @@
BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_3),
BPF_MOV64_IMM(BPF_REG_2, -1),
BPF_MOV64_IMM(BPF_REG_3, 0),
- BPF_EMIT_CALL(BPF_FUNC_probe_read),
+ BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel),
BPF_EXIT_INSN(),
},
.fixup_map_hash_48b = { 3 },
@@ -368,7 +368,7 @@
BPF_MOV64_IMM(BPF_REG_2,
sizeof(struct test_val) - offsetof(struct test_val, foo)),
BPF_MOV64_IMM(BPF_REG_3, 0),
- BPF_EMIT_CALL(BPF_FUNC_probe_read),
+ BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel),
BPF_EXIT_INSN(),
},
.fixup_map_hash_48b = { 3 },
@@ -390,7 +390,7 @@
BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_3),
BPF_MOV64_IMM(BPF_REG_2, 8),
BPF_MOV64_IMM(BPF_REG_3, 0),
- BPF_EMIT_CALL(BPF_FUNC_probe_read),
+ BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel),
BPF_EXIT_INSN(),
},
.fixup_map_hash_48b = { 3 },
@@ -415,7 +415,7 @@
BPF_EXIT_INSN(),
},
.fixup_map_hash_48b = { 3 },
- .errstr = "R1 min value is outside of the array range",
+ .errstr = "R1 min value is outside of the allowed memory range",
.result = REJECT,
.prog_type = BPF_PROG_TYPE_TRACEPOINT,
},
@@ -433,7 +433,7 @@
BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_3),
BPF_MOV64_IMM(BPF_REG_2, 1),
BPF_MOV64_IMM(BPF_REG_3, 0),
- BPF_EMIT_CALL(BPF_FUNC_probe_read),
+ BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel),
BPF_EXIT_INSN(),
},
.fixup_map_hash_48b = { 3 },
@@ -458,7 +458,7 @@
sizeof(struct test_val) -
offsetof(struct test_val, foo) + 1),
BPF_MOV64_IMM(BPF_REG_3, 0),
- BPF_EMIT_CALL(BPF_FUNC_probe_read),
+ BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel),
BPF_EXIT_INSN(),
},
.fixup_map_hash_48b = { 3 },
@@ -926,7 +926,7 @@
},
.fixup_map_hash_16b = { 3, 10 },
.result = REJECT,
- .errstr = "R2 unbounded memory access, make sure to bounds check any array access into a map",
+ .errstr = "R2 unbounded memory access, make sure to bounds check any such access",
.prog_type = BPF_PROG_TYPE_TRACEPOINT,
},
{
diff --git a/tools/testing/selftests/bpf/verifier/precise.c b/tools/testing/selftests/bpf/verifier/precise.c
index 02151f8c940f..6dc8003ffc70 100644
--- a/tools/testing/selftests/bpf/verifier/precise.c
+++ b/tools/testing/selftests/bpf/verifier/precise.c
@@ -31,14 +31,14 @@
BPF_MOV64_REG(BPF_REG_1, BPF_REG_FP),
BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8),
BPF_MOV64_IMM(BPF_REG_3, 0),
- BPF_EMIT_CALL(BPF_FUNC_probe_read),
+ BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel),
BPF_EXIT_INSN(),
},
.prog_type = BPF_PROG_TYPE_TRACEPOINT,
.fixup_map_array_48b = { 1 },
.result = VERBOSE_ACCEPT,
.errstr =
- "26: (85) call bpf_probe_read#4\
+ "26: (85) call bpf_probe_read_kernel#113\
last_idx 26 first_idx 20\
regs=4 stack=0 before 25\
regs=4 stack=0 before 24\
@@ -91,7 +91,7 @@
BPF_MOV64_REG(BPF_REG_1, BPF_REG_FP),
BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8),
BPF_MOV64_IMM(BPF_REG_3, 0),
- BPF_EMIT_CALL(BPF_FUNC_probe_read),
+ BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel),
BPF_EXIT_INSN(),
},
.prog_type = BPF_PROG_TYPE_TRACEPOINT,
@@ -99,7 +99,7 @@
.result = VERBOSE_ACCEPT,
.flags = BPF_F_TEST_STATE_FREQ,
.errstr =
- "26: (85) call bpf_probe_read#4\
+ "26: (85) call bpf_probe_read_kernel#113\
last_idx 26 first_idx 22\
regs=4 stack=0 before 25\
regs=4 stack=0 before 24\
diff --git a/tools/testing/selftests/bpf/verifier/prevent_map_lookup.c b/tools/testing/selftests/bpf/verifier/prevent_map_lookup.c
index da7a4b37cb98..fc4e301260f6 100644
--- a/tools/testing/selftests/bpf/verifier/prevent_map_lookup.c
+++ b/tools/testing/selftests/bpf/verifier/prevent_map_lookup.c
@@ -1,34 +1,4 @@
{
- "prevent map lookup in sockmap",
- .insns = {
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_EXIT_INSN(),
- },
- .fixup_map_sockmap = { 3 },
- .result = REJECT,
- .errstr = "cannot pass map_type 15 into func bpf_map_lookup_elem",
- .prog_type = BPF_PROG_TYPE_SOCK_OPS,
-},
-{
- "prevent map lookup in sockhash",
- .insns = {
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_EXIT_INSN(),
- },
- .fixup_map_sockhash = { 3 },
- .result = REJECT,
- .errstr = "cannot pass map_type 18 into func bpf_map_lookup_elem",
- .prog_type = BPF_PROG_TYPE_SOCK_OPS,
-},
-{
"prevent map lookup in stack trace",
.insns = {
BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
diff --git a/tools/testing/selftests/bpf/verifier/ref_tracking.c b/tools/testing/selftests/bpf/verifier/ref_tracking.c
index 604b46151736..056e0273bf12 100644
--- a/tools/testing/selftests/bpf/verifier/ref_tracking.c
+++ b/tools/testing/selftests/bpf/verifier/ref_tracking.c
@@ -821,3 +821,36 @@
.result = REJECT,
.errstr = "invalid mem access",
},
+{
+ "reference tracking: branch tracking valid pointer null comparison",
+ .insns = {
+ BPF_SK_LOOKUP(sk_lookup_tcp),
+ BPF_MOV64_REG(BPF_REG_6, BPF_REG_0),
+ BPF_MOV64_IMM(BPF_REG_3, 1),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_6, 0, 1),
+ BPF_MOV64_IMM(BPF_REG_3, 0),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_6, 0, 2),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+ BPF_EMIT_CALL(BPF_FUNC_sk_release),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .result = ACCEPT,
+},
+{
+ "reference tracking: branch tracking valid pointer value comparison",
+ .insns = {
+ BPF_SK_LOOKUP(sk_lookup_tcp),
+ BPF_MOV64_REG(BPF_REG_6, BPF_REG_0),
+ BPF_MOV64_IMM(BPF_REG_3, 1),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_6, 0, 4),
+ BPF_MOV64_IMM(BPF_REG_3, 0),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_6, 1234, 2),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+ BPF_EMIT_CALL(BPF_FUNC_sk_release),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .errstr = "Unreleased reference",
+ .result = REJECT,
+},
diff --git a/tools/testing/selftests/bpf/verifier/sock.c b/tools/testing/selftests/bpf/verifier/sock.c
index 9ed192e14f5f..b1aac2641498 100644
--- a/tools/testing/selftests/bpf/verifier/sock.c
+++ b/tools/testing/selftests/bpf/verifier/sock.c
@@ -222,7 +222,7 @@
BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2),
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
- BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_0, offsetofend(struct bpf_sock, state)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_0, offsetofend(struct bpf_sock, rx_queue_mapping)),
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
@@ -516,3 +516,118 @@
.prog_type = BPF_PROG_TYPE_XDP,
.result = ACCEPT,
},
+{
+ "bpf_map_lookup_elem(sockmap, &key)",
+ .insns = {
+ BPF_ST_MEM(BPF_W, BPF_REG_10, -4, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map_sockmap = { 3 },
+ .prog_type = BPF_PROG_TYPE_SK_SKB,
+ .result = REJECT,
+ .errstr = "Unreleased reference id=2 alloc_insn=5",
+},
+{
+ "bpf_map_lookup_elem(sockhash, &key)",
+ .insns = {
+ BPF_ST_MEM(BPF_W, BPF_REG_10, -4, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map_sockhash = { 3 },
+ .prog_type = BPF_PROG_TYPE_SK_SKB,
+ .result = REJECT,
+ .errstr = "Unreleased reference id=2 alloc_insn=5",
+},
+{
+ "bpf_map_lookup_elem(sockmap, &key); sk->type [fullsock field]; bpf_sk_release(sk)",
+ .insns = {
+ BPF_ST_MEM(BPF_W, BPF_REG_10, -4, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
+ BPF_EXIT_INSN(),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_0, offsetof(struct bpf_sock, type)),
+ BPF_EMIT_CALL(BPF_FUNC_sk_release),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map_sockmap = { 3 },
+ .prog_type = BPF_PROG_TYPE_SK_SKB,
+ .result = ACCEPT,
+},
+{
+ "bpf_map_lookup_elem(sockhash, &key); sk->type [fullsock field]; bpf_sk_release(sk)",
+ .insns = {
+ BPF_ST_MEM(BPF_W, BPF_REG_10, -4, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
+ BPF_EXIT_INSN(),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_0, offsetof(struct bpf_sock, type)),
+ BPF_EMIT_CALL(BPF_FUNC_sk_release),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map_sockhash = { 3 },
+ .prog_type = BPF_PROG_TYPE_SK_SKB,
+ .result = ACCEPT,
+},
+{
+ "bpf_sk_select_reuseport(ctx, reuseport_array, &key, flags)",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_4, 0),
+ BPF_ST_MEM(BPF_W, BPF_REG_10, -4, 0),
+ BPF_MOV64_REG(BPF_REG_3, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_3, -4),
+ BPF_LD_MAP_FD(BPF_REG_2, 0),
+ BPF_EMIT_CALL(BPF_FUNC_sk_select_reuseport),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map_reuseport_array = { 4 },
+ .prog_type = BPF_PROG_TYPE_SK_REUSEPORT,
+ .result = ACCEPT,
+},
+{
+ "bpf_sk_select_reuseport(ctx, sockmap, &key, flags)",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_4, 0),
+ BPF_ST_MEM(BPF_W, BPF_REG_10, -4, 0),
+ BPF_MOV64_REG(BPF_REG_3, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_3, -4),
+ BPF_LD_MAP_FD(BPF_REG_2, 0),
+ BPF_EMIT_CALL(BPF_FUNC_sk_select_reuseport),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map_sockmap = { 4 },
+ .prog_type = BPF_PROG_TYPE_SK_REUSEPORT,
+ .result = ACCEPT,
+},
+{
+ "bpf_sk_select_reuseport(ctx, sockhash, &key, flags)",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_4, 0),
+ BPF_ST_MEM(BPF_W, BPF_REG_10, -4, 0),
+ BPF_MOV64_REG(BPF_REG_3, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_3, -4),
+ BPF_LD_MAP_FD(BPF_REG_2, 0),
+ BPF_EMIT_CALL(BPF_FUNC_sk_select_reuseport),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map_sockmap = { 4 },
+ .prog_type = BPF_PROG_TYPE_SK_REUSEPORT,
+ .result = ACCEPT,
+},
diff --git a/tools/testing/selftests/bpf/verifier/stack_ptr.c b/tools/testing/selftests/bpf/verifier/stack_ptr.c
index 7276620ef242..8bfeb77c60bd 100644
--- a/tools/testing/selftests/bpf/verifier/stack_ptr.c
+++ b/tools/testing/selftests/bpf/verifier/stack_ptr.c
@@ -315,3 +315,43 @@
},
.result = ACCEPT,
},
+{
+ "store PTR_TO_STACK in R10 to array map using BPF_B",
+ .insns = {
+ /* Load pointer to map. */
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2),
+ BPF_MOV64_IMM(BPF_REG_0, 2),
+ BPF_EXIT_INSN(),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+ /* Copy R10 to R9. */
+ BPF_MOV64_REG(BPF_REG_9, BPF_REG_10),
+ /* Pollute other registers with unaligned values. */
+ BPF_MOV64_IMM(BPF_REG_2, -1),
+ BPF_MOV64_IMM(BPF_REG_3, -1),
+ BPF_MOV64_IMM(BPF_REG_4, -1),
+ BPF_MOV64_IMM(BPF_REG_5, -1),
+ BPF_MOV64_IMM(BPF_REG_6, -1),
+ BPF_MOV64_IMM(BPF_REG_7, -1),
+ BPF_MOV64_IMM(BPF_REG_8, -1),
+ /* Store both R9 and R10 with BPF_B and read back. */
+ BPF_STX_MEM(BPF_B, BPF_REG_1, BPF_REG_10, 0),
+ BPF_LDX_MEM(BPF_B, BPF_REG_2, BPF_REG_1, 0),
+ BPF_STX_MEM(BPF_B, BPF_REG_1, BPF_REG_9, 0),
+ BPF_LDX_MEM(BPF_B, BPF_REG_3, BPF_REG_1, 0),
+ /* Should read back as same value. */
+ BPF_JMP_REG(BPF_JEQ, BPF_REG_2, BPF_REG_3, 2),
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ BPF_MOV64_IMM(BPF_REG_0, 42),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map_array_48b = { 3 },
+ .result = ACCEPT,
+ .retval = 42,
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+},
diff --git a/tools/testing/selftests/bpf/verifier/value_illegal_alu.c b/tools/testing/selftests/bpf/verifier/value_illegal_alu.c
index 7f6c232cd842..ed1c2cea1dea 100644
--- a/tools/testing/selftests/bpf/verifier/value_illegal_alu.c
+++ b/tools/testing/selftests/bpf/verifier/value_illegal_alu.c
@@ -88,6 +88,7 @@
BPF_EXIT_INSN(),
},
.fixup_map_hash_48b = { 3 },
+ .errstr_unpriv = "leaking pointer from stack off -8",
.errstr = "R0 invalid mem access 'inv'",
.result = REJECT,
.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
diff --git a/tools/testing/selftests/bpf/verifier/value_or_null.c b/tools/testing/selftests/bpf/verifier/value_or_null.c
index 860d4a71cd83..3ecb70a3d939 100644
--- a/tools/testing/selftests/bpf/verifier/value_or_null.c
+++ b/tools/testing/selftests/bpf/verifier/value_or_null.c
@@ -150,3 +150,22 @@
.result_unpriv = REJECT,
.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
},
+{
+ "map lookup and null branch prediction",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_1, 10),
+ BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_1, -8),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+ BPF_MOV64_REG(BPF_REG_6, BPF_REG_0),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_6, 0, 2),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_6, 0, 1),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_10, 10),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map_hash_8b = { 4 },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .result = ACCEPT,
+},
diff --git a/tools/testing/selftests/bpf/verifier/value_ptr_arith.c b/tools/testing/selftests/bpf/verifier/value_ptr_arith.c
index a53d99cebd9f..97ee658e1242 100644
--- a/tools/testing/selftests/bpf/verifier/value_ptr_arith.c
+++ b/tools/testing/selftests/bpf/verifier/value_ptr_arith.c
@@ -50,7 +50,7 @@
.fixup_map_array_48b = { 8 },
.result = ACCEPT,
.result_unpriv = REJECT,
- .errstr_unpriv = "R0 min value is outside of the array range",
+ .errstr_unpriv = "R0 min value is outside of the allowed memory range",
.retval = 1,
},
{
@@ -325,7 +325,7 @@
},
.fixup_map_array_48b = { 3 },
.result = REJECT,
- .errstr = "R0 min value is outside of the array range",
+ .errstr = "R0 min value is outside of the allowed memory range",
.result_unpriv = REJECT,
.errstr_unpriv = "R0 pointer arithmetic of map value goes out of range",
},
@@ -601,7 +601,7 @@
},
.fixup_map_array_48b = { 3 },
.result = REJECT,
- .errstr = "R1 max value is outside of the array range",
+ .errstr = "R1 max value is outside of the allowed memory range",
.errstr_unpriv = "R1 pointer arithmetic of map value goes out of range",
.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
},
@@ -726,7 +726,7 @@
},
.fixup_map_array_48b = { 3 },
.result = REJECT,
- .errstr = "R0 min value is outside of the array range",
+ .errstr = "R0 min value is outside of the allowed memory range",
},
{
"map access: value_ptr -= known scalar, 2",
diff --git a/tools/testing/selftests/breakpoints/.gitignore b/tools/testing/selftests/breakpoints/.gitignore
index a23bb4a6f06c..def2e97dab9a 100644
--- a/tools/testing/selftests/breakpoints/.gitignore
+++ b/tools/testing/selftests/breakpoints/.gitignore
@@ -1,2 +1,3 @@
+# SPDX-License-Identifier: GPL-2.0-only
breakpoint_test
step_after_suspend_test
diff --git a/tools/testing/selftests/capabilities/.gitignore b/tools/testing/selftests/capabilities/.gitignore
index b732dd0d4738..426d9adca67c 100644
--- a/tools/testing/selftests/capabilities/.gitignore
+++ b/tools/testing/selftests/capabilities/.gitignore
@@ -1,2 +1,3 @@
+# SPDX-License-Identifier: GPL-2.0-only
test_execve
validate_cap
diff --git a/tools/testing/selftests/cgroup/.gitignore b/tools/testing/selftests/cgroup/.gitignore
index 7f9835624793..aa6de65b0838 100644
--- a/tools/testing/selftests/cgroup/.gitignore
+++ b/tools/testing/selftests/cgroup/.gitignore
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
test_memcontrol
test_core
test_freezer
diff --git a/tools/testing/selftests/cgroup/Makefile b/tools/testing/selftests/cgroup/Makefile
index 66aafe1f5746..967f268fde74 100644
--- a/tools/testing/selftests/cgroup/Makefile
+++ b/tools/testing/selftests/cgroup/Makefile
@@ -11,6 +11,6 @@ TEST_GEN_PROGS += test_freezer
include ../lib.mk
-$(OUTPUT)/test_memcontrol: cgroup_util.c
-$(OUTPUT)/test_core: cgroup_util.c
-$(OUTPUT)/test_freezer: cgroup_util.c
+$(OUTPUT)/test_memcontrol: cgroup_util.c ../clone3/clone3_selftests.h
+$(OUTPUT)/test_core: cgroup_util.c ../clone3/clone3_selftests.h
+$(OUTPUT)/test_freezer: cgroup_util.c ../clone3/clone3_selftests.h
diff --git a/tools/testing/selftests/cgroup/cgroup_util.c b/tools/testing/selftests/cgroup/cgroup_util.c
index 8f7131dcf1ff..8a637ca7d73a 100644
--- a/tools/testing/selftests/cgroup/cgroup_util.c
+++ b/tools/testing/selftests/cgroup/cgroup_util.c
@@ -15,6 +15,7 @@
#include <unistd.h>
#include "cgroup_util.h"
+#include "../clone3/clone3_selftests.h"
static ssize_t read_text(const char *path, char *buf, size_t max_len)
{
@@ -331,12 +332,112 @@ int cg_run(const char *cgroup,
}
}
+pid_t clone_into_cgroup(int cgroup_fd)
+{
+#ifdef CLONE_ARGS_SIZE_VER2
+ pid_t pid;
+
+ struct clone_args args = {
+ .flags = CLONE_INTO_CGROUP,
+ .exit_signal = SIGCHLD,
+ .cgroup = cgroup_fd,
+ };
+
+ pid = sys_clone3(&args, sizeof(struct clone_args));
+ /*
+ * Verify that this is a genuine test failure:
+ * ENOSYS -> clone3() not available
+ * E2BIG -> CLONE_INTO_CGROUP not available
+ */
+ if (pid < 0 && (errno == ENOSYS || errno == E2BIG))
+ goto pretend_enosys;
+
+ return pid;
+
+pretend_enosys:
+#endif
+ errno = ENOSYS;
+ return -ENOSYS;
+}
+
+int clone_reap(pid_t pid, int options)
+{
+ int ret;
+ siginfo_t info = {
+ .si_signo = 0,
+ };
+
+again:
+ ret = waitid(P_PID, pid, &info, options | __WALL | __WNOTHREAD);
+ if (ret < 0) {
+ if (errno == EINTR)
+ goto again;
+ return -1;
+ }
+
+ if (options & WEXITED) {
+ if (WIFEXITED(info.si_status))
+ return WEXITSTATUS(info.si_status);
+ }
+
+ if (options & WSTOPPED) {
+ if (WIFSTOPPED(info.si_status))
+ return WSTOPSIG(info.si_status);
+ }
+
+ if (options & WCONTINUED) {
+ if (WIFCONTINUED(info.si_status))
+ return 0;
+ }
+
+ return -1;
+}
+
+int dirfd_open_opath(const char *dir)
+{
+ return open(dir, O_DIRECTORY | O_CLOEXEC | O_NOFOLLOW | O_PATH);
+}
+
+#define close_prot_errno(fd) \
+ if (fd >= 0) { \
+ int _e_ = errno; \
+ close(fd); \
+ errno = _e_; \
+ }
+
+static int clone_into_cgroup_run_nowait(const char *cgroup,
+ int (*fn)(const char *cgroup, void *arg),
+ void *arg)
+{
+ int cgroup_fd;
+ pid_t pid;
+
+ cgroup_fd = dirfd_open_opath(cgroup);
+ if (cgroup_fd < 0)
+ return -1;
+
+ pid = clone_into_cgroup(cgroup_fd);
+ close_prot_errno(cgroup_fd);
+ if (pid == 0)
+ exit(fn(cgroup, arg));
+
+ return pid;
+}
+
int cg_run_nowait(const char *cgroup,
int (*fn)(const char *cgroup, void *arg),
void *arg)
{
int pid;
+ pid = clone_into_cgroup_run_nowait(cgroup, fn, arg);
+ if (pid > 0)
+ return pid;
+
+ /* Genuine test failure. */
+ if (pid < 0 && errno != ENOSYS)
+ return -1;
+
pid = fork();
if (pid == 0) {
char buf[64];
@@ -450,3 +551,28 @@ int proc_read_strstr(int pid, bool thread, const char *item, const char *needle)
return strstr(buf, needle) ? 0 : -1;
}
+
+int clone_into_cgroup_run_wait(const char *cgroup)
+{
+ int cgroup_fd;
+ pid_t pid;
+
+ cgroup_fd = dirfd_open_opath(cgroup);
+ if (cgroup_fd < 0)
+ return -1;
+
+ pid = clone_into_cgroup(cgroup_fd);
+ close_prot_errno(cgroup_fd);
+ if (pid < 0)
+ return -1;
+
+ if (pid == 0)
+ exit(EXIT_SUCCESS);
+
+ /*
+ * We don't care whether this fails. We only care whether the initial
+ * clone succeeded.
+ */
+ (void)clone_reap(pid, WEXITED);
+ return 0;
+}
diff --git a/tools/testing/selftests/cgroup/cgroup_util.h b/tools/testing/selftests/cgroup/cgroup_util.h
index 49c54fbdb229..5a1305dd1f0b 100644
--- a/tools/testing/selftests/cgroup/cgroup_util.h
+++ b/tools/testing/selftests/cgroup/cgroup_util.h
@@ -50,3 +50,7 @@ extern int cg_wait_for_proc_count(const char *cgroup, int count);
extern int cg_killall(const char *cgroup);
extern ssize_t proc_read_text(int pid, bool thread, const char *item, char *buf, size_t size);
extern int proc_read_strstr(int pid, bool thread, const char *item, const char *needle);
+extern pid_t clone_into_cgroup(int cgroup_fd);
+extern int clone_reap(pid_t pid, int options);
+extern int clone_into_cgroup_run_wait(const char *cgroup);
+extern int dirfd_open_opath(const char *dir);
diff --git a/tools/testing/selftests/cgroup/test_core.c b/tools/testing/selftests/cgroup/test_core.c
index e19ce940cd6a..3df648c37876 100644
--- a/tools/testing/selftests/cgroup/test_core.c
+++ b/tools/testing/selftests/cgroup/test_core.c
@@ -2,7 +2,10 @@
#include <linux/limits.h>
#include <sys/types.h>
+#include <sys/mman.h>
+#include <sys/wait.h>
#include <unistd.h>
+#include <fcntl.h>
#include <stdio.h>
#include <errno.h>
#include <signal.h>
@@ -12,6 +15,115 @@
#include "../kselftest.h"
#include "cgroup_util.h"
+static int touch_anon(char *buf, size_t size)
+{
+ int fd;
+ char *pos = buf;
+
+ fd = open("/dev/urandom", O_RDONLY);
+ if (fd < 0)
+ return -1;
+
+ while (size > 0) {
+ ssize_t ret = read(fd, pos, size);
+
+ if (ret < 0) {
+ if (errno != EINTR) {
+ close(fd);
+ return -1;
+ }
+ } else {
+ pos += ret;
+ size -= ret;
+ }
+ }
+ close(fd);
+
+ return 0;
+}
+
+static int alloc_and_touch_anon_noexit(const char *cgroup, void *arg)
+{
+ int ppid = getppid();
+ size_t size = (size_t)arg;
+ void *buf;
+
+ buf = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON,
+ 0, 0);
+ if (buf == MAP_FAILED)
+ return -1;
+
+ if (touch_anon((char *)buf, size)) {
+ munmap(buf, size);
+ return -1;
+ }
+
+ while (getppid() == ppid)
+ sleep(1);
+
+ munmap(buf, size);
+ return 0;
+}
+
+/*
+ * Create a child process that allocates and touches 100MB, then waits to be
+ * killed. Wait until the child is attached to the cgroup, kill all processes
+ * in that cgroup and wait until "cgroup.procs" is empty. At this point try to
+ * destroy the empty cgroup. The test helps detect race conditions between
+ * dying processes leaving the cgroup and cgroup destruction path.
+ */
+static int test_cgcore_destroy(const char *root)
+{
+ int ret = KSFT_FAIL;
+ char *cg_test = NULL;
+ int child_pid;
+ char buf[PAGE_SIZE];
+
+ cg_test = cg_name(root, "cg_test");
+
+ if (!cg_test)
+ goto cleanup;
+
+ for (int i = 0; i < 10; i++) {
+ if (cg_create(cg_test))
+ goto cleanup;
+
+ child_pid = cg_run_nowait(cg_test, alloc_and_touch_anon_noexit,
+ (void *) MB(100));
+
+ if (child_pid < 0)
+ goto cleanup;
+
+ /* wait for the child to enter cgroup */
+ if (cg_wait_for_proc_count(cg_test, 1))
+ goto cleanup;
+
+ if (cg_killall(cg_test))
+ goto cleanup;
+
+ /* wait for cgroup to be empty */
+ while (1) {
+ if (cg_read(cg_test, "cgroup.procs", buf, sizeof(buf)))
+ goto cleanup;
+ if (buf[0] == '\0')
+ break;
+ usleep(1000);
+ }
+
+ if (rmdir(cg_test))
+ goto cleanup;
+
+ if (waitpid(child_pid, NULL, 0) < 0)
+ goto cleanup;
+ }
+ ret = KSFT_PASS;
+cleanup:
+ if (cg_test)
+ cg_destroy(cg_test);
+ free(cg_test);
+ return ret;
+}
+
/*
* A(0) - B(0) - C(1)
* \ D(0)
@@ -25,8 +137,11 @@
static int test_cgcore_populated(const char *root)
{
int ret = KSFT_FAIL;
+ int err;
char *cg_test_a = NULL, *cg_test_b = NULL;
char *cg_test_c = NULL, *cg_test_d = NULL;
+ int cgroup_fd = -EBADF;
+ pid_t pid;
cg_test_a = cg_name(root, "cg_test_a");
cg_test_b = cg_name(root, "cg_test_a/cg_test_b");
@@ -78,6 +193,52 @@ static int test_cgcore_populated(const char *root)
if (cg_read_strcmp(cg_test_d, "cgroup.events", "populated 0\n"))
goto cleanup;
+ /* Test that we can directly clone into a new cgroup. */
+ cgroup_fd = dirfd_open_opath(cg_test_d);
+ if (cgroup_fd < 0)
+ goto cleanup;
+
+ pid = clone_into_cgroup(cgroup_fd);
+ if (pid < 0) {
+ if (errno == ENOSYS)
+ goto cleanup_pass;
+ goto cleanup;
+ }
+
+ if (pid == 0) {
+ if (raise(SIGSTOP))
+ exit(EXIT_FAILURE);
+ exit(EXIT_SUCCESS);
+ }
+
+ err = cg_read_strcmp(cg_test_d, "cgroup.events", "populated 1\n");
+
+ (void)clone_reap(pid, WSTOPPED);
+ (void)kill(pid, SIGCONT);
+ (void)clone_reap(pid, WEXITED);
+
+ if (err)
+ goto cleanup;
+
+ if (cg_read_strcmp(cg_test_d, "cgroup.events", "populated 0\n"))
+ goto cleanup;
+
+ /* Remove cgroup. */
+ if (cg_test_d) {
+ cg_destroy(cg_test_d);
+ free(cg_test_d);
+ cg_test_d = NULL;
+ }
+
+ pid = clone_into_cgroup(cgroup_fd);
+ if (pid < 0)
+ goto cleanup_pass;
+ if (pid == 0)
+ exit(EXIT_SUCCESS);
+ (void)clone_reap(pid, WEXITED);
+ goto cleanup;
+
+cleanup_pass:
ret = KSFT_PASS;
cleanup:
@@ -93,6 +254,8 @@ cleanup:
free(cg_test_c);
free(cg_test_b);
free(cg_test_a);
+ if (cgroup_fd >= 0)
+ close(cgroup_fd);
return ret;
}
@@ -136,6 +299,16 @@ static int test_cgcore_invalid_domain(const char *root)
if (errno != EOPNOTSUPP)
goto cleanup;
+ if (!clone_into_cgroup_run_wait(child))
+ goto cleanup;
+
+ if (errno == ENOSYS)
+ goto cleanup_pass;
+
+ if (errno != EOPNOTSUPP)
+ goto cleanup;
+
+cleanup_pass:
ret = KSFT_PASS;
cleanup:
@@ -345,6 +518,9 @@ static int test_cgcore_internal_process_constraint(const char *root)
if (!cg_enter_current(parent))
goto cleanup;
+ if (!clone_into_cgroup_run_wait(parent))
+ goto cleanup;
+
ret = KSFT_PASS;
cleanup:
@@ -512,6 +688,7 @@ struct corecg_test {
T(test_cgcore_populated),
T(test_cgcore_proc_migration),
T(test_cgcore_thread_migration),
+ T(test_cgcore_destroy),
};
#undef T
diff --git a/tools/testing/selftests/clone3/.gitignore b/tools/testing/selftests/clone3/.gitignore
index 0dc4f32c6cb8..a81085742d40 100644
--- a/tools/testing/selftests/clone3/.gitignore
+++ b/tools/testing/selftests/clone3/.gitignore
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
clone3
clone3_clear_sighand
clone3_set_tid
diff --git a/tools/testing/selftests/clone3/clone3_selftests.h b/tools/testing/selftests/clone3/clone3_selftests.h
index a3f2c8ad8bcc..91c1a78ddb39 100644
--- a/tools/testing/selftests/clone3/clone3_selftests.h
+++ b/tools/testing/selftests/clone3/clone3_selftests.h
@@ -5,12 +5,24 @@
#define _GNU_SOURCE
#include <sched.h>
+#include <linux/sched.h>
+#include <linux/types.h>
#include <stdint.h>
#include <syscall.h>
-#include <linux/types.h>
+#include <sys/wait.h>
+
+#include "../kselftest.h"
#define ptr_to_u64(ptr) ((__u64)((uintptr_t)(ptr)))
+#ifndef CLONE_INTO_CGROUP
+#define CLONE_INTO_CGROUP 0x200000000ULL /* Clone into a specific cgroup given the right permissions. */
+#endif
+
+#ifndef CLONE_ARGS_SIZE_VER0
+#define CLONE_ARGS_SIZE_VER0 64
+#endif
+
#ifndef __NR_clone3
#define __NR_clone3 -1
struct clone_args {
@@ -22,10 +34,13 @@ struct clone_args {
__aligned_u64 stack;
__aligned_u64 stack_size;
__aligned_u64 tls;
+#define CLONE_ARGS_SIZE_VER1 80
__aligned_u64 set_tid;
__aligned_u64 set_tid_size;
+#define CLONE_ARGS_SIZE_VER2 88
+ __aligned_u64 cgroup;
};
-#endif
+#endif /* __NR_clone3 */
static pid_t sys_clone3(struct clone_args *args, size_t size)
{
diff --git a/tools/testing/selftests/dmabuf-heaps/dmabuf-heap.c b/tools/testing/selftests/dmabuf-heaps/dmabuf-heap.c
index cd5e1f602ac9..909da9cdda97 100644
--- a/tools/testing/selftests/dmabuf-heaps/dmabuf-heap.c
+++ b/tools/testing/selftests/dmabuf-heaps/dmabuf-heap.c
@@ -351,6 +351,7 @@ static int test_alloc_errors(char *heap_name)
}
printf("Expected error checking passed\n");
+ ret = 0;
out:
if (dmabuf_fd >= 0)
close(dmabuf_fd);
diff --git a/tools/testing/selftests/drivers/.gitignore b/tools/testing/selftests/drivers/.gitignore
index f6aebcc27b76..ca74f2e1c719 100644
--- a/tools/testing/selftests/drivers/.gitignore
+++ b/tools/testing/selftests/drivers/.gitignore
@@ -1 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0-only
/dma-buf/udmabuf
diff --git a/tools/testing/selftests/drivers/net/mlxsw/blackhole_routes.sh b/tools/testing/selftests/drivers/net/mlxsw/blackhole_routes.sh
index 5ba5bef44d5b..bdffe698e1d1 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/blackhole_routes.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/blackhole_routes.sh
@@ -45,6 +45,7 @@ ALL_TESTS="
blackhole_ipv6
"
NUM_NETIFS=4
+: ${TIMEOUT:=20000} # ms
source $lib_dir/tc_common.sh
source $lib_dir/lib.sh
@@ -123,7 +124,7 @@ blackhole_ipv4()
skip_hw dst_ip 198.51.100.1 src_ip 192.0.2.1 ip_proto icmp \
action pass
- ip -4 route show 198.51.100.0/30 | grep -q offload
+ busywait "$TIMEOUT" wait_for_offload ip -4 route show 198.51.100.0/30
check_err $? "route not marked as offloaded when should"
ping_do $h1 198.51.100.1
@@ -147,7 +148,7 @@ blackhole_ipv6()
skip_hw dst_ip 2001:db8:2::1 src_ip 2001:db8:1::1 \
ip_proto icmpv6 action pass
- ip -6 route show 2001:db8:2::/120 | grep -q offload
+ busywait "$TIMEOUT" wait_for_offload ip -6 route show 2001:db8:2::/120
check_err $? "route not marked as offloaded when should"
ping6_do $h1 2001:db8:2::1
diff --git a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_acl_drops.sh b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_acl_drops.sh
new file mode 100755
index 000000000000..b32ba5fec59d
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_acl_drops.sh
@@ -0,0 +1,151 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Test devlink-trap ACL drops functionality over mlxsw.
+
+lib_dir=$(dirname $0)/../../../net/forwarding
+
+ALL_TESTS="
+ ingress_flow_action_drop_test
+ egress_flow_action_drop_test
+"
+NUM_NETIFS=4
+source $lib_dir/tc_common.sh
+source $lib_dir/lib.sh
+source $lib_dir/devlink_lib.sh
+
+h1_create()
+{
+ simple_if_init $h1
+}
+
+h1_destroy()
+{
+ simple_if_fini $h1
+}
+
+h2_create()
+{
+ simple_if_init $h2
+}
+
+h2_destroy()
+{
+ simple_if_fini $h2
+}
+
+switch_create()
+{
+ ip link add dev br0 type bridge vlan_filtering 1 mcast_snooping 0
+
+ ip link set dev $swp1 master br0
+ ip link set dev $swp2 master br0
+
+ ip link set dev br0 up
+ ip link set dev $swp1 up
+ ip link set dev $swp2 up
+
+ tc qdisc add dev $swp1 clsact
+ tc qdisc add dev $swp2 clsact
+}
+
+switch_destroy()
+{
+ tc qdisc del dev $swp2 clsact
+ tc qdisc del dev $swp1 clsact
+
+ ip link set dev $swp2 down
+ ip link set dev $swp1 down
+
+ ip link del dev br0
+}
+
+setup_prepare()
+{
+ h1=${NETIFS[p1]}
+ swp1=${NETIFS[p2]}
+
+ swp2=${NETIFS[p3]}
+ h2=${NETIFS[p4]}
+
+ h1mac=$(mac_get $h1)
+ h2mac=$(mac_get $h2)
+
+ vrf_prepare
+
+ h1_create
+ h2_create
+
+ switch_create
+}
+
+cleanup()
+{
+ pre_cleanup
+
+ switch_destroy
+
+ h2_destroy
+ h1_destroy
+
+ vrf_cleanup
+}
+
+ingress_flow_action_drop_test()
+{
+ local mz_pid
+
+ tc filter add dev $swp2 egress protocol ip pref 1 handle 101 \
+ flower src_mac $h1mac action pass
+
+ tc filter add dev $swp1 ingress protocol ip pref 1 handle 101 \
+ flower dst_ip 192.0.2.2 action drop
+
+ $MZ $h1 -c 0 -p 100 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+ -t ip -d 1msec -q &
+ mz_pid=$!
+
+ RET=0
+
+ devlink_trap_drop_test ingress_flow_action_drop $swp2 101
+
+ log_test "ingress_flow_action_drop"
+
+ tc filter del dev $swp1 ingress protocol ip pref 1 handle 101 flower
+
+ devlink_trap_drop_cleanup $mz_pid $swp2 ip 1 101
+}
+
+egress_flow_action_drop_test()
+{
+ local mz_pid
+
+ tc filter add dev $swp2 egress protocol ip pref 2 handle 102 \
+ flower src_mac $h1mac action pass
+
+ tc filter add dev $swp2 egress protocol ip pref 1 handle 101 \
+ flower dst_ip 192.0.2.2 action drop
+
+ $MZ $h1 -c 0 -p 100 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+ -t ip -d 1msec -q &
+ mz_pid=$!
+
+ RET=0
+
+ devlink_trap_drop_test egress_flow_action_drop $swp2 102
+
+ log_test "egress_flow_action_drop"
+
+ tc filter del dev $swp2 egress protocol ip pref 1 handle 101 flower
+
+ devlink_trap_drop_cleanup $mz_pid $swp2 ip 2 102
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_control.sh b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_control.sh
new file mode 100755
index 000000000000..a37273473c1b
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_control.sh
@@ -0,0 +1,688 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Test devlink-trap control trap functionality over mlxsw. Each registered
+# control packet trap is tested to make sure it is triggered under the right
+# conditions.
+#
+# +---------------------------------+
+# | H1 (vrf) |
+# | + $h1 |
+# | | 192.0.2.1/24 |
+# | | 2001:db8:1::1/64 |
+# | | |
+# | | default via 192.0.2.2 |
+# | | default via 2001:db8:1::2 |
+# +----|----------------------------+
+# |
+# +----|----------------------------------------------------------------------+
+# | SW | |
+# | + $rp1 |
+# | 192.0.2.2/24 |
+# | 2001:db8:1::2/64 |
+# | |
+# | 2001:db8:2::2/64 |
+# | 198.51.100.2/24 |
+# | + $rp2 |
+# | | |
+# +----|----------------------------------------------------------------------+
+# |
+# +----|----------------------------+
+# | | default via 198.51.100.2 |
+# | | default via 2001:db8:2::2 |
+# | | |
+# | | 2001:db8:2::1/64 |
+# | | 198.51.100.1/24 |
+# | + $h2 |
+# | H2 (vrf) |
+# +---------------------------------+
+
+lib_dir=$(dirname $0)/../../../net/forwarding
+
+ALL_TESTS="
+ stp_test
+ lacp_test
+ lldp_test
+ igmp_query_test
+ igmp_v1_report_test
+ igmp_v2_report_test
+ igmp_v3_report_test
+ igmp_v2_leave_test
+ mld_query_test
+ mld_v1_report_test
+ mld_v2_report_test
+ mld_v1_done_test
+ ipv4_dhcp_test
+ ipv6_dhcp_test
+ arp_request_test
+ arp_response_test
+ ipv6_neigh_solicit_test
+ ipv6_neigh_advert_test
+ ipv4_bfd_test
+ ipv6_bfd_test
+ ipv4_ospf_test
+ ipv6_ospf_test
+ ipv4_bgp_test
+ ipv6_bgp_test
+ ipv4_vrrp_test
+ ipv6_vrrp_test
+ ipv4_pim_test
+ ipv6_pim_test
+ uc_loopback_test
+ local_route_test
+ external_route_test
+ ipv6_uc_dip_link_local_scope_test
+ ipv4_router_alert_test
+ ipv6_router_alert_test
+ ipv6_dip_all_nodes_test
+ ipv6_dip_all_routers_test
+ ipv6_router_solicit_test
+ ipv6_router_advert_test
+ ipv6_redirect_test
+ ptp_event_test
+ ptp_general_test
+ flow_action_sample_test
+ flow_action_trap_test
+"
+NUM_NETIFS=4
+source $lib_dir/lib.sh
+source $lib_dir/devlink_lib.sh
+
+h1_create()
+{
+ simple_if_init $h1 192.0.2.1/24 2001:db8:1::1/64
+
+ ip -4 route add default vrf v$h1 nexthop via 192.0.2.2
+ ip -6 route add default vrf v$h1 nexthop via 2001:db8:1::2
+}
+
+h1_destroy()
+{
+ ip -6 route del default vrf v$h1 nexthop via 2001:db8:1::2
+ ip -4 route del default vrf v$h1 nexthop via 192.0.2.2
+
+ simple_if_fini $h1 192.0.2.1/24 2001:db8:1::1/64
+}
+
+h2_create()
+{
+ simple_if_init $h2 198.51.100.1/24 2001:db8:2::1/64
+
+ ip -4 route add default vrf v$h2 nexthop via 198.51.100.2
+ ip -6 route add default vrf v$h2 nexthop via 2001:db8:2::2
+}
+
+h2_destroy()
+{
+ ip -6 route del default vrf v$h2 nexthop via 2001:db8:2::2
+ ip -4 route del default vrf v$h2 nexthop via 198.51.100.2
+
+ simple_if_fini $h2 198.51.100.1/24 2001:db8:2::1/64
+}
+
+router_create()
+{
+ ip link set dev $rp1 up
+ ip link set dev $rp2 up
+
+ __addr_add_del $rp1 add 192.0.2.2/24 2001:db8:1::2/64
+ __addr_add_del $rp2 add 198.51.100.2/24 2001:db8:2::2/64
+}
+
+router_destroy()
+{
+ __addr_add_del $rp2 del 198.51.100.2/24 2001:db8:2::2/64
+ __addr_add_del $rp1 del 192.0.2.2/24 2001:db8:1::2/64
+
+ ip link set dev $rp2 down
+ ip link set dev $rp1 down
+}
+
+setup_prepare()
+{
+ h1=${NETIFS[p1]}
+ rp1=${NETIFS[p2]}
+
+ rp2=${NETIFS[p3]}
+ h2=${NETIFS[p4]}
+
+ vrf_prepare
+ forwarding_enable
+
+ h1_create
+ h2_create
+ router_create
+}
+
+cleanup()
+{
+ pre_cleanup
+
+ router_destroy
+ h2_destroy
+ h1_destroy
+
+ forwarding_restore
+ vrf_cleanup
+}
+
+stp_test()
+{
+ devlink_trap_stats_test "STP" "stp" $MZ $h1 -c 1 -t bpdu -q
+}
+
+lacp_payload_get()
+{
+ local source_mac=$1; shift
+ local p
+
+ p=$(:
+ )"01:80:C2:00:00:02:"$( : ETH daddr
+ )"$source_mac:"$( : ETH saddr
+ )"88:09:"$( : ETH type
+ )
+ echo $p
+}
+
+lacp_test()
+{
+ local h1mac=$(mac_get $h1)
+
+ devlink_trap_stats_test "LACP" "lacp" $MZ $h1 -c 1 \
+ $(lacp_payload_get $h1mac) -p 100 -q
+}
+
+lldp_payload_get()
+{
+ local source_mac=$1; shift
+ local p
+
+ p=$(:
+ )"01:80:C2:00:00:0E:"$( : ETH daddr
+ )"$source_mac:"$( : ETH saddr
+ )"88:CC:"$( : ETH type
+ )
+ echo $p
+}
+
+lldp_test()
+{
+ local h1mac=$(mac_get $h1)
+
+ devlink_trap_stats_test "LLDP" "lldp" $MZ $h1 -c 1 \
+ $(lldp_payload_get $h1mac) -p 100 -q
+}
+
+igmp_query_test()
+{
+ # IGMP (IP Protocol 2) Membership Query (Type 0x11)
+ devlink_trap_stats_test "IGMP Membership Query" "igmp_query" \
+ $MZ $h1 -c 1 -a own -b 01:00:5E:00:00:01 \
+ -A 192.0.2.1 -B 224.0.0.1 -t ip proto=2,p=11 -p 100 -q
+}
+
+igmp_v1_report_test()
+{
+ # IGMP (IP Protocol 2) Version 1 Membership Report (Type 0x12)
+ devlink_trap_stats_test "IGMP Version 1 Membership Report" \
+ "igmp_v1_report" $MZ $h1 -c 1 -a own -b 01:00:5E:00:00:01 \
+ -A 192.0.2.1 -B 244.0.0.1 -t ip proto=2,p=12 -p 100 -q
+}
+
+igmp_v2_report_test()
+{
+ # IGMP (IP Protocol 2) Version 2 Membership Report (Type 0x16)
+ devlink_trap_stats_test "IGMP Version 2 Membership Report" \
+ "igmp_v2_report" $MZ $h1 -c 1 -a own -b 01:00:5E:00:00:01 \
+ -A 192.0.2.1 -B 244.0.0.1 -t ip proto=2,p=16 -p 100 -q
+}
+
+igmp_v3_report_test()
+{
+ # IGMP (IP Protocol 2) Version 3 Membership Report (Type 0x22)
+ devlink_trap_stats_test "IGMP Version 3 Membership Report" \
+ "igmp_v3_report" $MZ $h1 -c 1 -a own -b 01:00:5E:00:00:01 \
+ -A 192.0.2.1 -B 244.0.0.1 -t ip proto=2,p=22 -p 100 -q
+}
+
+igmp_v2_leave_test()
+{
+ # IGMP (IP Protocol 2) Version 2 Leave Group (Type 0x17)
+ devlink_trap_stats_test "IGMP Version 2 Leave Group" \
+ "igmp_v2_leave" $MZ $h1 -c 1 -a own -b 01:00:5E:00:00:02 \
+ -A 192.0.2.1 -B 224.0.0.2 -t ip proto=2,p=17 -p 100 -q
+}
+
+mld_payload_get()
+{
+ local type=$1; shift
+ local p
+
+ type=$(printf "%x" $type)
+ p=$(:
+ )"3A:"$( : Next Header - ICMPv6
+ )"00:"$( : Hdr Ext Len
+ )"00:00:00:00:00:00:"$( : Options and Padding
+ )"$type:"$( : ICMPv6.type
+ )"00:"$( : ICMPv6.code
+ )"00:"$( : ICMPv6.checksum
+ )
+ echo $p
+}
+
+mld_query_test()
+{
+ # MLD Multicast Listener Query (Type 130)
+ devlink_trap_stats_test "MLD Multicast Listener Query" "mld_query" \
+ $MZ $h1 -6 -c 1 -A fe80::1 -B ff02::1 \
+ -t ip hop=1,next=0,payload=$(mld_payload_get 130) -p 100 -q
+}
+
+mld_v1_report_test()
+{
+ # MLD Version 1 Multicast Listener Report (Type 131)
+ devlink_trap_stats_test "MLD Version 1 Multicast Listener Report" \
+ "mld_v1_report" $MZ $h1 -6 -c 1 -A fe80::1 -B ff02::16 \
+ -t ip hop=1,next=0,payload=$(mld_payload_get 131) -p 100 -q
+}
+
+mld_v2_report_test()
+{
+ # MLD Version 2 Multicast Listener Report (Type 143)
+ devlink_trap_stats_test "MLD Version 2 Multicast Listener Report" \
+ "mld_v2_report" $MZ $h1 -6 -c 1 -A fe80::1 -B ff02::16 \
+ -t ip hop=1,next=0,payload=$(mld_payload_get 143) -p 100 -q
+}
+
+mld_v1_done_test()
+{
+ # MLD Version 1 Multicast Listener Done (Type 132)
+ devlink_trap_stats_test "MLD Version 1 Multicast Listener Done" \
+ "mld_v1_done" $MZ $h1 -6 -c 1 -A fe80::1 -B ff02::16 \
+ -t ip hop=1,next=0,payload=$(mld_payload_get 132) -p 100 -q
+}
+
+ipv4_dhcp_test()
+{
+ devlink_trap_stats_test "IPv4 DHCP Port 67" "ipv4_dhcp" \
+ $MZ $h1 -c 1 -a own -b bcast -A 0.0.0.0 -B 255.255.255.255 \
+ -t udp sp=68,dp=67 -p 100 -q
+
+ devlink_trap_stats_test "IPv4 DHCP Port 68" "ipv4_dhcp" \
+ $MZ $h1 -c 1 -a own -b $(mac_get $rp1) -A 192.0.2.1 \
+ -B 255.255.255.255 -t udp sp=67,dp=68 -p 100 -q
+}
+
+ipv6_dhcp_test()
+{
+ devlink_trap_stats_test "IPv6 DHCP Port 547" "ipv6_dhcp" \
+ $MZ $h1 -6 -c 1 -A fe80::1 -B ff02::1:2 -t udp sp=546,dp=547 \
+ -p 100 -q
+
+ devlink_trap_stats_test "IPv6 DHCP Port 546" "ipv6_dhcp" \
+ $MZ $h1 -6 -c 1 -A fe80::1 -B ff02::1:2 -t udp sp=547,dp=546 \
+ -p 100 -q
+}
+
+arp_request_test()
+{
+ devlink_trap_stats_test "ARP Request" "arp_request" \
+ $MZ $h1 -c 1 -a own -b bcast -t arp request -p 100 -q
+}
+
+arp_response_test()
+{
+ devlink_trap_stats_test "ARP Response" "arp_response" \
+ $MZ $h1 -c 1 -a own -b $(mac_get $rp1) -t arp reply -p 100 -q
+}
+
+icmpv6_header_get()
+{
+ local type=$1; shift
+ local p
+
+ type=$(printf "%x" $type)
+ p=$(:
+ )"$type:"$( : ICMPv6.type
+ )"00:"$( : ICMPv6.code
+ )"00:"$( : ICMPv6.checksum
+ )
+ echo $p
+}
+
+ipv6_neigh_solicit_test()
+{
+ devlink_trap_stats_test "IPv6 Neighbour Solicitation" \
+ "ipv6_neigh_solicit" $MZ $h1 -6 -c 1 \
+ -A fe80::1 -B ff02::1:ff00:02 \
+ -t ip hop=1,next=58,payload=$(icmpv6_header_get 135) -p 100 -q
+}
+
+ipv6_neigh_advert_test()
+{
+ devlink_trap_stats_test "IPv6 Neighbour Advertisement" \
+ "ipv6_neigh_advert" $MZ $h1 -6 -c 1 -a own -b $(mac_get $rp1) \
+ -A fe80::1 -B 2001:db8:1::2 \
+ -t ip hop=1,next=58,payload=$(icmpv6_header_get 136) -p 100 -q
+}
+
+ipv4_bfd_test()
+{
+ devlink_trap_stats_test "IPv4 BFD Control - Port 3784" "ipv4_bfd" \
+ $MZ $h1 -c 1 -a own -b $(mac_get $rp1) \
+ -A 192.0.2.1 -B 192.0.2.2 -t udp sp=49153,dp=3784 -p 100 -q
+
+ devlink_trap_stats_test "IPv4 BFD Echo - Port 3785" "ipv4_bfd" \
+ $MZ $h1 -c 1 -a own -b $(mac_get $rp1) \
+ -A 192.0.2.1 -B 192.0.2.2 -t udp sp=49153,dp=3785 -p 100 -q
+}
+
+ipv6_bfd_test()
+{
+ devlink_trap_stats_test "IPv6 BFD Control - Port 3784" "ipv6_bfd" \
+ $MZ $h1 -6 -c 1 -a own -b $(mac_get $rp1) \
+ -A 2001:db8:1::1 -B 2001:db8:1::2 \
+ -t udp sp=49153,dp=3784 -p 100 -q
+
+ devlink_trap_stats_test "IPv6 BFD Echo - Port 3785" "ipv6_bfd" \
+ $MZ $h1 -6 -c 1 -a own -b $(mac_get $rp1) \
+ -A 2001:db8:1::1 -B 2001:db8:1::2 \
+ -t udp sp=49153,dp=3785 -p 100 -q
+}
+
+ipv4_ospf_test()
+{
+ devlink_trap_stats_test "IPv4 OSPF - Multicast" "ipv4_ospf" \
+ $MZ $h1 -c 1 -a own -b 01:00:5e:00:00:05 \
+ -A 192.0.2.1 -B 224.0.0.5 -t ip proto=89 -p 100 -q
+
+ devlink_trap_stats_test "IPv4 OSPF - Unicast" "ipv4_ospf" \
+ $MZ $h1 -c 1 -a own -b $(mac_get $rp1) \
+ -A 192.0.2.1 -B 192.0.2.2 -t ip proto=89 -p 100 -q
+}
+
+ipv6_ospf_test()
+{
+ devlink_trap_stats_test "IPv6 OSPF - Multicast" "ipv6_ospf" \
+ $MZ $h1 -6 -c 1 -a own -b 33:33:00:00:00:05 \
+ -A fe80::1 -B ff02::5 -t ip next=89 -p 100 -q
+
+ devlink_trap_stats_test "IPv6 OSPF - Unicast" "ipv6_ospf" \
+ $MZ $h1 -6 -c 1 -a own -b $(mac_get $rp1) \
+ -A 2001:db8:1::1 -B 2001:db8:1::2 -t ip next=89 -p 100 -q
+}
+
+ipv4_bgp_test()
+{
+ devlink_trap_stats_test "IPv4 BGP" "ipv4_bgp" \
+ $MZ $h1 -c 1 -a own -b $(mac_get $rp1) \
+ -A 192.0.2.1 -B 192.0.2.2 -t tcp sp=54321,dp=179,flags=rst \
+ -p 100 -q
+}
+
+ipv6_bgp_test()
+{
+ devlink_trap_stats_test "IPv6 BGP" "ipv6_bgp" \
+ $MZ $h1 -6 -c 1 -a own -b $(mac_get $rp1) \
+ -A 2001:db8:1::1 -B 2001:db8:1::2 \
+ -t tcp sp=54321,dp=179,flags=rst -p 100 -q
+}
+
+ipv4_vrrp_test()
+{
+ devlink_trap_stats_test "IPv4 VRRP" "ipv4_vrrp" \
+ $MZ $h1 -c 1 -a own -b 01:00:5e:00:00:12 \
+ -A 192.0.2.1 -B 224.0.0.18 -t ip proto=112 -p 100 -q
+}
+
+ipv6_vrrp_test()
+{
+ devlink_trap_stats_test "IPv6 VRRP" "ipv6_vrrp" \
+ $MZ $h1 -6 -c 1 -a own -b 33:33:00:00:00:12 \
+ -A fe80::1 -B ff02::12 -t ip next=112 -p 100 -q
+}
+
+ipv4_pim_test()
+{
+ devlink_trap_stats_test "IPv4 PIM - Multicast" "ipv4_pim" \
+ $MZ $h1 -c 1 -a own -b 01:00:5e:00:00:0d \
+ -A 192.0.2.1 -B 224.0.0.13 -t ip proto=103 -p 100 -q
+
+ devlink_trap_stats_test "IPv4 PIM - Unicast" "ipv4_pim" \
+ $MZ $h1 -c 1 -a own -b $(mac_get $rp1) \
+ -A 192.0.2.1 -B 192.0.2.2 -t ip proto=103 -p 100 -q
+}
+
+ipv6_pim_test()
+{
+ devlink_trap_stats_test "IPv6 PIM - Multicast" "ipv6_pim" \
+ $MZ $h1 -6 -c 1 -a own -b 33:33:00:00:00:0d \
+ -A fe80::1 -B ff02::d -t ip next=103 -p 100 -q
+
+ devlink_trap_stats_test "IPv6 PIM - Unicast" "ipv6_pim" \
+ $MZ $h1 -6 -c 1 -a own -b $(mac_get $rp1) \
+ -A fe80::1 -B 2001:db8:1::2 -t ip next=103 -p 100 -q
+}
+
+uc_loopback_test()
+{
+ # Add neighbours to the fake destination IPs, so that the packets are
+ # routed in the device and not trapped due to an unresolved neighbour
+ # exception.
+ ip -4 neigh add 192.0.2.3 lladdr 00:11:22:33:44:55 nud permanent \
+ dev $rp1
+ ip -6 neigh add 2001:db8:1::3 lladdr 00:11:22:33:44:55 nud permanent \
+ dev $rp1
+
+ devlink_trap_stats_test "IPv4 Unicast Loopback" "uc_loopback" \
+ $MZ $h1 -c 1 -a own -b $(mac_get $rp1) \
+ -A 192.0.2.1 -B 192.0.2.3 -t udp sp=54321,dp=12345 -p 100 -q
+
+ devlink_trap_stats_test "IPv6 Unicast Loopback" "uc_loopback" \
+ $MZ $h1 -6 -c 1 -a own -b $(mac_get $rp1) \
+ -A 2001:db8:1::1 -B 2001:db8:1::3 -t udp sp=54321,dp=12345 \
+ -p 100 -q
+
+ ip -6 neigh del 2001:db8:1::3 dev $rp1
+ ip -4 neigh del 192.0.2.3 dev $rp1
+}
+
+local_route_test()
+{
+ # Use a fake source IP to prevent the trap from being triggered twice
+ # when the router sends back a port unreachable message.
+ devlink_trap_stats_test "IPv4 Local Route" "local_route" \
+ $MZ $h1 -c 1 -a own -b $(mac_get $rp1) \
+ -A 192.0.2.3 -B 192.0.2.2 -t udp sp=54321,dp=12345 -p 100 -q
+
+ devlink_trap_stats_test "IPv6 Local Route" "local_route" \
+ $MZ $h1 -6 -c 1 -a own -b $(mac_get $rp1) \
+ -A 2001:db8:1::3 -B 2001:db8:1::2 -t udp sp=54321,sp=12345 \
+ -p 100 -q
+}
+
+external_route_test()
+{
+ # Add a dummy device through which the incoming packets should be
+ # routed.
+ ip link add name dummy10 up type dummy
+ ip address add 203.0.113.1/24 dev dummy10
+ ip -6 address add 2001:db8:10::1/64 dev dummy10
+
+ devlink_trap_stats_test "IPv4 External Route" "external_route" \
+ $MZ $h1 -c 1 -a own -b $(mac_get $rp1) \
+ -A 192.0.2.1 -B 203.0.113.2 -t udp sp=54321,dp=12345 -p 100 -q
+
+ devlink_trap_stats_test "IPv6 External Route" "external_route" \
+ $MZ $h1 -6 -c 1 -a own -b $(mac_get $rp1) \
+ -A 2001:db8:1::1 -B 2001:db8:10::2 -t udp sp=54321,sp=12345 \
+ -p 100 -q
+
+ ip -6 address del 2001:db8:10::1/64 dev dummy10
+ ip address del 203.0.113.1/24 dev dummy10
+ ip link del dev dummy10
+}
+
+ipv6_uc_dip_link_local_scope_test()
+{
+ # Add a dummy link-local prefix route to allow the packet to be routed.
+ ip -6 route add fe80:1::/64 dev $rp2
+
+ devlink_trap_stats_test \
+ "IPv6 Unicast Destination IP With Link-Local Scope" \
+ "ipv6_uc_dip_link_local_scope" \
+ $MZ $h1 -6 -c 1 -a own -b $(mac_get $rp1) \
+ -A fe80::1 -B fe80:1::2 -t udp sp=54321,sp=12345 \
+ -p 100 -q
+
+ ip -6 route del fe80:1::/64 dev $rp2
+}
+
+ipv4_router_alert_get()
+{
+ local p
+
+ # https://en.wikipedia.org/wiki/IPv4#Options
+ p=$(:
+ )"94:"$( : Option Number
+ )"04:"$( : Option Length
+ )"00:00:"$( : Option Data
+ )
+ echo $p
+}
+
+ipv4_router_alert_test()
+{
+ devlink_trap_stats_test "IPv4 Router Alert" "ipv4_router_alert" \
+ $MZ $h1 -c 1 -a own -b $(mac_get $rp1) \
+ -A 192.0.2.1 -B 198.51.100.3 \
+ -t ip option=$(ipv4_router_alert_get) -p 100 -q
+}
+
+ipv6_router_alert_get()
+{
+ local p
+
+ # https://en.wikipedia.org/wiki/IPv6_packet#Hop-by-hop_options_and_destination_options
+ # https://tools.ietf.org/html/rfc2711#section-2.1
+ p=$(:
+ )"11:"$( : Next Header - UDP
+ )"00:"$( : Hdr Ext Len
+ )"05:02:00:00:00:00:"$( : Option Data
+ )
+ echo $p
+}
+
+ipv6_router_alert_test()
+{
+ devlink_trap_stats_test "IPv6 Router Alert" "ipv6_router_alert" \
+ $MZ $h1 -6 -c 1 -a own -b $(mac_get $rp1) \
+ -A 2001:db8:1::1 -B 2001:db8:1::3 \
+ -t ip next=0,payload=$(ipv6_router_alert_get) -p 100 -q
+}
+
+ipv6_dip_all_nodes_test()
+{
+ devlink_trap_stats_test "IPv6 Destination IP \"All Nodes Address\"" \
+ "ipv6_dip_all_nodes" \
+ $MZ $h1 -6 -c 1 -a own -b 33:33:00:00:00:01 \
+ -A 2001:db8:1::1 -B ff02::1 -t udp sp=12345,dp=54321 -p 100 -q
+}
+
+ipv6_dip_all_routers_test()
+{
+ devlink_trap_stats_test "IPv6 Destination IP \"All Routers Address\"" \
+ "ipv6_dip_all_routers" \
+ $MZ $h1 -6 -c 1 -a own -b 33:33:00:00:00:02 \
+ -A 2001:db8:1::1 -B ff02::2 -t udp sp=12345,dp=54321 -p 100 -q
+}
+
+ipv6_router_solicit_test()
+{
+ devlink_trap_stats_test "IPv6 Router Solicitation" \
+ "ipv6_router_solicit" \
+ $MZ $h1 -6 -c 1 -a own -b 33:33:00:00:00:02 \
+ -A fe80::1 -B ff02::2 \
+ -t ip hop=1,next=58,payload=$(icmpv6_header_get 133) -p 100 -q
+}
+
+ipv6_router_advert_test()
+{
+ devlink_trap_stats_test "IPv6 Router Advertisement" \
+ "ipv6_router_advert" \
+ $MZ $h1 -6 -c 1 -a own -b 33:33:00:00:00:01 \
+ -A fe80::1 -B ff02::1 \
+ -t ip hop=1,next=58,payload=$(icmpv6_header_get 134) -p 100 -q
+}
+
+ipv6_redirect_test()
+{
+ devlink_trap_stats_test "IPv6 Redirect Message" \
+ "ipv6_redirect" \
+ $MZ $h1 -6 -c 1 -a own -b $(mac_get $rp1) \
+ -A fe80::1 -B 2001:db8:1::2 \
+ -t ip hop=1,next=58,payload=$(icmpv6_header_get 137) -p 100 -q
+}
+
+ptp_event_test()
+{
+ # PTP is only supported on Spectrum-1, for now.
+ [[ "$DEVLINK_VIDDID" != "15b3:cb84" ]] && return
+
+ # PTP Sync (0)
+ devlink_trap_stats_test "PTP Time-Critical Event Message" "ptp_event" \
+ $MZ $h1 -c 1 -a own -b 01:00:5e:00:01:81 \
+ -A 192.0.2.1 -B 224.0.1.129 \
+ -t udp sp=12345,dp=319,payload=10 -p 100 -q
+}
+
+ptp_general_test()
+{
+ # PTP is only supported on Spectrum-1, for now.
+ [[ "$DEVLINK_VIDDID" != "15b3:cb84" ]] && return
+
+ # PTP Announce (b)
+ devlink_trap_stats_test "PTP General Message" "ptp_general" \
+ $MZ $h1 -c 1 -a own -b 01:00:5e:00:01:81 \
+ -A 192.0.2.1 -B 224.0.1.129 \
+ -t udp sp=12345,dp=320,payload=1b -p 100 -q
+}
+
+flow_action_sample_test()
+{
+ # Install a filter that samples every incoming packet.
+ tc qdisc add dev $rp1 clsact
+ tc filter add dev $rp1 ingress proto all pref 1 handle 101 matchall \
+ skip_sw action sample rate 1 group 1
+
+ devlink_trap_stats_test "Flow Sampling" "flow_action_sample" \
+ $MZ $h1 -c 1 -a own -b $(mac_get $rp1) \
+ -A 192.0.2.1 -B 198.51.100.1 -t udp sp=12345,dp=54321 -p 100 -q
+
+ tc filter del dev $rp1 ingress proto all pref 1 handle 101 matchall
+ tc qdisc del dev $rp1 clsact
+}
+
+flow_action_trap_test()
+{
+ # Install a filter that traps a specific flow.
+ tc qdisc add dev $rp1 clsact
+ tc filter add dev $rp1 ingress proto ip pref 1 handle 101 flower \
+ skip_sw ip_proto udp src_port 12345 dst_port 54321 action trap
+
+ devlink_trap_stats_test "Flow Trapping (Logging)" "flow_action_trap" \
+ $MZ $h1 -c 1 -a own -b $(mac_get $rp1) \
+ -A 192.0.2.1 -B 198.51.100.1 -t udp sp=12345,dp=54321 -p 100 -q
+
+ tc filter del dev $rp1 ingress proto ip pref 1 handle 101 flower
+ tc qdisc del dev $rp1 clsact
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_l2_drops.sh b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_l2_drops.sh
index 58cdbfb608e9..a4c2812e9807 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_l2_drops.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_l2_drops.sh
@@ -96,7 +96,6 @@ source_mac_is_multicast_test()
{
local trap_name="source_mac_is_multicast"
local smac=01:02:03:04:05:06
- local group_name="l2_drops"
local mz_pid
tc filter add dev $swp2 egress protocol ip pref 1 handle 101 \
@@ -107,18 +106,17 @@ source_mac_is_multicast_test()
RET=0
- devlink_trap_drop_test $trap_name $group_name $swp2
+ devlink_trap_drop_test $trap_name $swp2 101
log_test "Source MAC is multicast"
- devlink_trap_drop_cleanup $mz_pid $swp2 ip
+ devlink_trap_drop_cleanup $mz_pid $swp2 ip 1 101
}
__vlan_tag_mismatch_test()
{
local trap_name="vlan_tag_mismatch"
local dmac=de:ad:be:ef:13:37
- local group_name="l2_drops"
local opt=$1; shift
local mz_pid
@@ -132,7 +130,7 @@ __vlan_tag_mismatch_test()
$MZ $h1 "$opt" -c 0 -p 100 -a own -b $dmac -t ip -d 1msec -q &
mz_pid=$!
- devlink_trap_drop_test $trap_name $group_name $swp2
+ devlink_trap_drop_test $trap_name $swp2 101
# Add PVID and make sure packets are no longer dropped.
bridge vlan add vid 1 dev $swp1 pvid untagged master
@@ -140,7 +138,7 @@ __vlan_tag_mismatch_test()
devlink_trap_stats_idle_test $trap_name
check_err $? "Trap stats not idle when packets should not be dropped"
- devlink_trap_group_stats_idle_test $group_name
+ devlink_trap_group_stats_idle_test $(devlink_trap_group_get $trap_name)
check_err $? "Trap group stats not idle with when packets should not be dropped"
tc_check_packets "dev $swp2 egress" 101 0
@@ -148,7 +146,7 @@ __vlan_tag_mismatch_test()
devlink_trap_action_set $trap_name "drop"
- devlink_trap_drop_cleanup $mz_pid $swp2 ip
+ devlink_trap_drop_cleanup $mz_pid $swp2 ip 1 101
}
vlan_tag_mismatch_untagged_test()
@@ -179,7 +177,6 @@ ingress_vlan_filter_test()
{
local trap_name="ingress_vlan_filter"
local dmac=de:ad:be:ef:13:37
- local group_name="l2_drops"
local mz_pid
local vid=10
@@ -193,7 +190,7 @@ ingress_vlan_filter_test()
$MZ $h1 -Q $vid -c 0 -p 100 -a own -b $dmac -t ip -d 1msec -q &
mz_pid=$!
- devlink_trap_drop_test $trap_name $group_name $swp2
+ devlink_trap_drop_test $trap_name $swp2 101
# Add the VLAN on the bridge port and make sure packets are no longer
# dropped.
@@ -202,7 +199,7 @@ ingress_vlan_filter_test()
devlink_trap_stats_idle_test $trap_name
check_err $? "Trap stats not idle when packets should not be dropped"
- devlink_trap_group_stats_idle_test $group_name
+ devlink_trap_group_stats_idle_test $(devlink_trap_group_get $trap_name)
check_err $? "Trap group stats not idle with when packets should not be dropped"
tc_check_packets "dev $swp2 egress" 101 0
@@ -212,7 +209,7 @@ ingress_vlan_filter_test()
log_test "Ingress VLAN filter"
- devlink_trap_drop_cleanup $mz_pid $swp2 ip
+ devlink_trap_drop_cleanup $mz_pid $swp2 ip 1 101
bridge vlan del vid $vid dev $swp1 master
bridge vlan del vid $vid dev $swp2 master
@@ -222,7 +219,6 @@ __ingress_stp_filter_test()
{
local trap_name="ingress_spanning_tree_filter"
local dmac=de:ad:be:ef:13:37
- local group_name="l2_drops"
local state=$1; shift
local mz_pid
local vid=20
@@ -237,7 +233,7 @@ __ingress_stp_filter_test()
$MZ $h1 -Q $vid -c 0 -p 100 -a own -b $dmac -t ip -d 1msec -q &
mz_pid=$!
- devlink_trap_drop_test $trap_name $group_name $swp2
+ devlink_trap_drop_test $trap_name $swp2 101
# Change STP state to forwarding and make sure packets are no longer
# dropped.
@@ -246,7 +242,7 @@ __ingress_stp_filter_test()
devlink_trap_stats_idle_test $trap_name
check_err $? "Trap stats not idle when packets should not be dropped"
- devlink_trap_group_stats_idle_test $group_name
+ devlink_trap_group_stats_idle_test $(devlink_trap_group_get $trap_name)
check_err $? "Trap group stats not idle with when packets should not be dropped"
tc_check_packets "dev $swp2 egress" 101 0
@@ -254,7 +250,7 @@ __ingress_stp_filter_test()
devlink_trap_action_set $trap_name "drop"
- devlink_trap_drop_cleanup $mz_pid $swp2 ip
+ devlink_trap_drop_cleanup $mz_pid $swp2 ip 1 101
bridge vlan del vid $vid dev $swp1 master
bridge vlan del vid $vid dev $swp2 master
@@ -292,7 +288,6 @@ port_list_is_empty_uc_test()
{
local trap_name="port_list_is_empty"
local dmac=de:ad:be:ef:13:37
- local group_name="l2_drops"
local mz_pid
# Disable unicast flooding on both ports, so that packets cannot egress
@@ -308,7 +303,7 @@ port_list_is_empty_uc_test()
$MZ $h1 -c 0 -p 100 -a own -b $dmac -t ip -d 1msec -q &
mz_pid=$!
- devlink_trap_drop_test $trap_name $group_name $swp2
+ devlink_trap_drop_test $trap_name $swp2 101
# Allow packets to be flooded to one port.
ip link set dev $swp2 type bridge_slave flood on
@@ -316,7 +311,7 @@ port_list_is_empty_uc_test()
devlink_trap_stats_idle_test $trap_name
check_err $? "Trap stats not idle when packets should not be dropped"
- devlink_trap_group_stats_idle_test $group_name
+ devlink_trap_group_stats_idle_test $(devlink_trap_group_get $trap_name)
check_err $? "Trap group stats not idle with when packets should not be dropped"
tc_check_packets "dev $swp2 egress" 101 0
@@ -326,7 +321,7 @@ port_list_is_empty_uc_test()
log_test "Port list is empty - unicast"
- devlink_trap_drop_cleanup $mz_pid $swp2 ip
+ devlink_trap_drop_cleanup $mz_pid $swp2 ip 1 101
ip link set dev $swp1 type bridge_slave flood on
}
@@ -335,7 +330,6 @@ port_list_is_empty_mc_test()
{
local trap_name="port_list_is_empty"
local dmac=01:00:5e:00:00:01
- local group_name="l2_drops"
local dip=239.0.0.1
local mz_pid
@@ -354,7 +348,7 @@ port_list_is_empty_mc_test()
$MZ $h1 -c 0 -p 100 -a own -b $dmac -t ip -B $dip -d 1msec -q &
mz_pid=$!
- devlink_trap_drop_test $trap_name $group_name $swp2
+ devlink_trap_drop_test $trap_name $swp2 101
# Allow packets to be flooded to one port.
ip link set dev $swp2 type bridge_slave mcast_flood on
@@ -362,7 +356,7 @@ port_list_is_empty_mc_test()
devlink_trap_stats_idle_test $trap_name
check_err $? "Trap stats not idle when packets should not be dropped"
- devlink_trap_group_stats_idle_test $group_name
+ devlink_trap_group_stats_idle_test $(devlink_trap_group_get $trap_name)
check_err $? "Trap group stats not idle with when packets should not be dropped"
tc_check_packets "dev $swp2 egress" 101 0
@@ -372,7 +366,7 @@ port_list_is_empty_mc_test()
log_test "Port list is empty - multicast"
- devlink_trap_drop_cleanup $mz_pid $swp2 ip
+ devlink_trap_drop_cleanup $mz_pid $swp2 ip 1 101
ip link set dev $swp1 type bridge_slave mcast_flood on
}
@@ -387,7 +381,6 @@ port_loopback_filter_uc_test()
{
local trap_name="port_loopback_filter"
local dmac=de:ad:be:ef:13:37
- local group_name="l2_drops"
local mz_pid
# Make sure packets can only egress the input port.
@@ -401,7 +394,7 @@ port_loopback_filter_uc_test()
$MZ $h1 -c 0 -p 100 -a own -b $dmac -t ip -d 1msec -q &
mz_pid=$!
- devlink_trap_drop_test $trap_name $group_name $swp2
+ devlink_trap_drop_test $trap_name $swp2 101
# Allow packets to be flooded.
ip link set dev $swp2 type bridge_slave flood on
@@ -409,7 +402,7 @@ port_loopback_filter_uc_test()
devlink_trap_stats_idle_test $trap_name
check_err $? "Trap stats not idle when packets should not be dropped"
- devlink_trap_group_stats_idle_test $group_name
+ devlink_trap_group_stats_idle_test $(devlink_trap_group_get $trap_name)
check_err $? "Trap group stats not idle with when packets should not be dropped"
tc_check_packets "dev $swp2 egress" 101 0
@@ -419,7 +412,7 @@ port_loopback_filter_uc_test()
log_test "Port loopback filter - unicast"
- devlink_trap_drop_cleanup $mz_pid $swp2 ip
+ devlink_trap_drop_cleanup $mz_pid $swp2 ip 1 101
}
port_loopback_filter_test()
diff --git a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_l3_drops.sh b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_l3_drops.sh
index d88d8e47d11b..f5abb1ebd392 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_l3_drops.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_l3_drops.sh
@@ -161,7 +161,6 @@ ping_check()
non_ip_test()
{
local trap_name="non_ip"
- local group_name="l3_drops"
local mz_pid
RET=0
@@ -176,11 +175,11 @@ non_ip_test()
00:00 de:ad:be:ef" &
mz_pid=$!
- devlink_trap_drop_test $trap_name $group_name $rp2
+ devlink_trap_drop_test $trap_name $rp2 101
log_test "Non IP"
- devlink_trap_drop_cleanup $mz_pid $rp2 "ip"
+ devlink_trap_drop_cleanup $mz_pid $rp2 "ip" 1 101
}
__uc_dip_over_mc_dmac_test()
@@ -190,7 +189,6 @@ __uc_dip_over_mc_dmac_test()
local dip=$1; shift
local flags=${1:-""}; shift
local trap_name="uc_dip_over_mc_dmac"
- local group_name="l3_drops"
local dmac=01:02:03:04:05:06
local mz_pid
@@ -206,11 +204,11 @@ __uc_dip_over_mc_dmac_test()
-B $dip -d 1msec -q &
mz_pid=$!
- devlink_trap_drop_test $trap_name $group_name $rp2
+ devlink_trap_drop_test $trap_name $rp2 101
log_test "Unicast destination IP over multicast destination MAC: $desc"
- devlink_trap_drop_cleanup $mz_pid $rp2 $proto
+ devlink_trap_drop_cleanup $mz_pid $rp2 $proto 1 101
}
uc_dip_over_mc_dmac_test()
@@ -227,7 +225,6 @@ __sip_is_loopback_test()
local dip=$1; shift
local flags=${1:-""}; shift
local trap_name="sip_is_loopback_address"
- local group_name="l3_drops"
local mz_pid
RET=0
@@ -242,11 +239,11 @@ __sip_is_loopback_test()
-b $rp1mac -B $dip -d 1msec -q &
mz_pid=$!
- devlink_trap_drop_test $trap_name $group_name $rp2
+ devlink_trap_drop_test $trap_name $rp2 101
log_test "Source IP is loopback address: $desc"
- devlink_trap_drop_cleanup $mz_pid $rp2 $proto
+ devlink_trap_drop_cleanup $mz_pid $rp2 $proto 1 101
}
sip_is_loopback_test()
@@ -262,7 +259,6 @@ __dip_is_loopback_test()
local dip=$1; shift
local flags=${1:-""}; shift
local trap_name="dip_is_loopback_address"
- local group_name="l3_drops"
local mz_pid
RET=0
@@ -277,11 +273,11 @@ __dip_is_loopback_test()
-B $dip -d 1msec -q &
mz_pid=$!
- devlink_trap_drop_test $trap_name $group_name $rp2
+ devlink_trap_drop_test $trap_name $rp2 101
log_test "Destination IP is loopback address: $desc"
- devlink_trap_drop_cleanup $mz_pid $rp2 $proto
+ devlink_trap_drop_cleanup $mz_pid $rp2 $proto 1 101
}
dip_is_loopback_test()
@@ -298,7 +294,6 @@ __sip_is_mc_test()
local dip=$1; shift
local flags=${1:-""}; shift
local trap_name="sip_is_mc"
- local group_name="l3_drops"
local mz_pid
RET=0
@@ -313,11 +308,11 @@ __sip_is_mc_test()
-b $rp1mac -B $dip -d 1msec -q &
mz_pid=$!
- devlink_trap_drop_test $trap_name $group_name $rp2
+ devlink_trap_drop_test $trap_name $rp2 101
log_test "Source IP is multicast: $desc"
- devlink_trap_drop_cleanup $mz_pid $rp2 $proto
+ devlink_trap_drop_cleanup $mz_pid $rp2 $proto 1 101
}
sip_is_mc_test()
@@ -329,7 +324,6 @@ sip_is_mc_test()
ipv4_sip_is_limited_bc_test()
{
local trap_name="ipv4_sip_is_limited_bc"
- local group_name="l3_drops"
local sip=255.255.255.255
local mz_pid
@@ -345,11 +339,11 @@ ipv4_sip_is_limited_bc_test()
-B $h2_ipv4 -d 1msec -q &
mz_pid=$!
- devlink_trap_drop_test $trap_name $group_name $rp2
+ devlink_trap_drop_test $trap_name $rp2 101
log_test "IPv4 source IP is limited broadcast"
- devlink_trap_drop_cleanup $mz_pid $rp2 "ip"
+ devlink_trap_drop_cleanup $mz_pid $rp2 "ip" 1 101
}
ipv4_payload_get()
@@ -382,7 +376,6 @@ __ipv4_header_corrupted_test()
local ihl=$1; shift
local checksum=$1; shift
local trap_name="ip_header_corrupted"
- local group_name="l3_drops"
local payload
local mz_pid
@@ -399,11 +392,11 @@ __ipv4_header_corrupted_test()
$MZ $h1 -c 0 -d 1msec -a $h1mac -b $rp1mac -q p=$payload &
mz_pid=$!
- devlink_trap_drop_test $trap_name $group_name $rp2
+ devlink_trap_drop_test $trap_name $rp2 101
log_test "IP header corrupted: $desc: IPv4"
- devlink_trap_drop_cleanup $mz_pid $rp2 "ip"
+ devlink_trap_drop_cleanup $mz_pid $rp2 "ip" 1 101
}
ipv6_payload_get()
@@ -429,7 +422,6 @@ __ipv6_header_corrupted_test()
local desc=$1; shift
local ipver=$1; shift
local trap_name="ip_header_corrupted"
- local group_name="l3_drops"
local payload
local mz_pid
@@ -446,11 +438,11 @@ __ipv6_header_corrupted_test()
$MZ $h1 -c 0 -d 1msec -a $h1mac -b $rp1mac -q p=$payload &
mz_pid=$!
- devlink_trap_drop_test $trap_name $group_name $rp2
+ devlink_trap_drop_test $trap_name $rp2 101
log_test "IP header corrupted: $desc: IPv6"
- devlink_trap_drop_cleanup $mz_pid $rp2 "ip"
+ devlink_trap_drop_cleanup $mz_pid $rp2 "ip" 1 101
}
ip_header_corrupted_test()
@@ -469,7 +461,6 @@ ip_header_corrupted_test()
ipv6_mc_dip_reserved_scope_test()
{
local trap_name="ipv6_mc_dip_reserved_scope"
- local group_name="l3_drops"
local dip=FF00::
local mz_pid
@@ -485,17 +476,16 @@ ipv6_mc_dip_reserved_scope_test()
"33:33:00:00:00:00" -B $dip -d 1msec -q &
mz_pid=$!
- devlink_trap_drop_test $trap_name $group_name $rp2
+ devlink_trap_drop_test $trap_name $rp2 101
log_test "IPv6 multicast destination IP reserved scope"
- devlink_trap_drop_cleanup $mz_pid $rp2 "ipv6"
+ devlink_trap_drop_cleanup $mz_pid $rp2 "ipv6" 1 101
}
ipv6_mc_dip_interface_local_scope_test()
{
local trap_name="ipv6_mc_dip_interface_local_scope"
- local group_name="l3_drops"
local dip=FF01::
local mz_pid
@@ -511,11 +501,11 @@ ipv6_mc_dip_interface_local_scope_test()
"33:33:00:00:00:00" -B $dip -d 1msec -q &
mz_pid=$!
- devlink_trap_drop_test $trap_name $group_name $rp2
+ devlink_trap_drop_test $trap_name $rp2 101
log_test "IPv6 multicast destination IP interface-local scope"
- devlink_trap_drop_cleanup $mz_pid $rp2 "ipv6"
+ devlink_trap_drop_cleanup $mz_pid $rp2 "ipv6" 1 101
}
__blackhole_route_test()
@@ -526,7 +516,6 @@ __blackhole_route_test()
local dip=$1; shift
local ip_proto=${1:-"icmp"}; shift
local trap_name="blackhole_route"
- local group_name="l3_drops"
local mz_pid
RET=0
@@ -542,10 +531,10 @@ __blackhole_route_test()
-B $dip -d 1msec -q &
mz_pid=$!
- devlink_trap_drop_test $trap_name $group_name $rp2
+ devlink_trap_drop_test $trap_name $rp2 101
log_test "Blackhole route: IPv$flags"
- devlink_trap_drop_cleanup $mz_pid $rp2 $proto
+ devlink_trap_drop_cleanup $mz_pid $rp2 $proto 1 101
ip -$flags route del blackhole $subnet
}
@@ -558,7 +547,6 @@ blackhole_route_test()
irif_disabled_test()
{
local trap_name="irif_disabled"
- local group_name="l3_drops"
local t0_packets t0_bytes
local t1_packets t1_bytes
local mz_pid
@@ -613,7 +601,6 @@ irif_disabled_test()
erif_disabled_test()
{
local trap_name="erif_disabled"
- local group_name="l3_drops"
local t0_packets t0_bytes
local t1_packets t1_bytes
local mz_pid
@@ -641,13 +628,9 @@ erif_disabled_test()
mz_pid=$!
sleep 5
- # In order to see this trap we need a route that points to disabled RIF.
- # When ipv6 address is flushed, there is a delay and the routes are
- # deleted before the RIF and we cannot get state that we have route
- # to disabled RIF.
- # Delete IPv6 address first and then check this trap with flushing IPv4.
- ip -6 add flush dev br0
- ip -4 add flush dev br0
+ # Unlinking the port from the bridge will disable the RIF associated
+ # with br0 as it is no longer an upper of any mlxsw port.
+ ip link set dev $rp1 nomaster
t1_packets=$(devlink_trap_rx_packets_get $trap_name)
t1_bytes=$(devlink_trap_rx_bytes_get $trap_name)
@@ -659,7 +642,6 @@ erif_disabled_test()
log_test "Egress RIF disabled"
kill $mz_pid && wait $mz_pid &> /dev/null
- ip link set dev $rp1 nomaster
__addr_add_del $rp1 add 192.0.2.2/24 2001:db8:1::2/64
ip link del dev br0 type bridge
devlink_trap_action_set $trap_name "drop"
diff --git a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_l3_exceptions.sh b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_l3_exceptions.sh
index 2bc6df42d597..1fedfc9da434 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_l3_exceptions.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_l3_exceptions.sh
@@ -169,7 +169,6 @@ trap_action_check()
mtu_value_is_too_small_test()
{
local trap_name="mtu_value_is_too_small"
- local group_name="l3_drops"
local expected_action="trap"
local mz_pid
@@ -191,7 +190,7 @@ mtu_value_is_too_small_test()
-B 198.51.100.1 -q &
mz_pid=$!
- devlink_trap_exception_test $trap_name $group_name
+ devlink_trap_exception_test $trap_name
tc_check_packets_hitting "dev $h1 ingress" 101
check_err $? "Packets were not received to h1"
@@ -208,7 +207,6 @@ __ttl_value_is_too_small_test()
{
local ttl_val=$1; shift
local trap_name="ttl_value_is_too_small"
- local group_name="l3_drops"
local expected_action="trap"
local mz_pid
@@ -227,7 +225,7 @@ __ttl_value_is_too_small_test()
-b $rp1mac -B 198.51.100.1 -q &
mz_pid=$!
- devlink_trap_exception_test $trap_name $group_name
+ devlink_trap_exception_test $trap_name
tc_check_packets_hitting "dev $h1 ingress" 101
check_err $? "Packets were not received to h1"
@@ -271,7 +269,6 @@ __mc_reverse_path_forwarding_test()
local proto=$1; shift
local flags=${1:-""}; shift
local trap_name="mc_reverse_path_forwarding"
- local group_name="l3_drops"
local expected_action="trap"
local mz_pid
@@ -292,7 +289,7 @@ __mc_reverse_path_forwarding_test()
mz_pid=$!
- devlink_trap_exception_test $trap_name $group_name
+ devlink_trap_exception_test $trap_name
tc_check_packets "dev $rp2 egress" 101 0
check_err $? "Packets were not dropped"
@@ -322,7 +319,6 @@ __reject_route_test()
local unreachable=$1; shift
local flags=${1:-""}; shift
local trap_name="reject_route"
- local group_name="l3_drops"
local expected_action="trap"
local mz_pid
@@ -341,7 +337,7 @@ __reject_route_test()
-B $dst_ip -q &
mz_pid=$!
- devlink_trap_exception_test $trap_name $group_name
+ devlink_trap_exception_test $trap_name
tc_check_packets_hitting "dev $h1 ingress" 101
check_err $? "ICMP packet was not received to h1"
@@ -370,7 +366,6 @@ __host_miss_test()
local desc=$1; shift
local dip=$1; shift
local trap_name="unresolved_neigh"
- local group_name="l3_drops"
local expected_action="trap"
local mz_pid
@@ -405,7 +400,6 @@ __invalid_nexthop_test()
local subnet=$1; shift
local via_add=$1; shift
local trap_name="unresolved_neigh"
- local group_name="l3_drops"
local expected_action="trap"
local mz_pid
@@ -494,7 +488,6 @@ vrf_without_routes_destroy()
ipv4_lpm_miss_test()
{
local trap_name="ipv4_lpm_miss"
- local group_name="l3_drops"
local expected_action="trap"
local mz_pid
@@ -511,7 +504,7 @@ ipv4_lpm_miss_test()
-B 203.0.113.1 -q &
mz_pid=$!
- devlink_trap_exception_test $trap_name $group_name
+ devlink_trap_exception_test $trap_name
log_test "LPM miss: IPv4"
@@ -522,7 +515,6 @@ ipv4_lpm_miss_test()
ipv6_lpm_miss_test()
{
local trap_name="ipv6_lpm_miss"
- local group_name="l3_drops"
local expected_action="trap"
local mz_pid
@@ -539,7 +531,7 @@ ipv6_lpm_miss_test()
-B 2001:db8::1 -q &
mz_pid=$!
- devlink_trap_exception_test $trap_name $group_name
+ devlink_trap_exception_test $trap_name
log_test "LPM miss: IPv6"
diff --git a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_policer.sh b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_policer.sh
new file mode 100755
index 000000000000..47edf099a17e
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_policer.sh
@@ -0,0 +1,384 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Test devlink-trap policer functionality over mlxsw.
+
+# +---------------------------------+
+# | H1 (vrf) |
+# | + $h1 |
+# | | 192.0.2.1/24 |
+# | | |
+# | | default via 192.0.2.2 |
+# +----|----------------------------+
+# |
+# +----|----------------------------------------------------------------------+
+# | SW | |
+# | + $rp1 |
+# | 192.0.2.2/24 |
+# | |
+# | 198.51.100.2/24 |
+# | + $rp2 |
+# | | |
+# +----|----------------------------------------------------------------------+
+# |
+# +----|----------------------------+
+# | | default via 198.51.100.2 |
+# | | |
+# | | 198.51.100.1/24 |
+# | + $h2 |
+# | H2 (vrf) |
+# +---------------------------------+
+
+lib_dir=$(dirname $0)/../../../net/forwarding
+
+ALL_TESTS="
+ rate_limits_test
+ burst_limits_test
+ rate_test
+ burst_test
+"
+NUM_NETIFS=4
+source $lib_dir/tc_common.sh
+source $lib_dir/lib.sh
+source $lib_dir/devlink_lib.sh
+
+h1_create()
+{
+ simple_if_init $h1 192.0.2.1/24
+ mtu_set $h1 10000
+
+ ip -4 route add default vrf v$h1 nexthop via 192.0.2.2
+}
+
+h1_destroy()
+{
+ ip -4 route del default vrf v$h1 nexthop via 192.0.2.2
+
+ mtu_restore $h1
+ simple_if_fini $h1 192.0.2.1/24
+}
+
+h2_create()
+{
+ simple_if_init $h2 198.51.100.1/24
+ mtu_set $h2 10000
+
+ ip -4 route add default vrf v$h2 nexthop via 198.51.100.2
+}
+
+h2_destroy()
+{
+ ip -4 route del default vrf v$h2 nexthop via 198.51.100.2
+
+ mtu_restore $h2
+ simple_if_fini $h2 198.51.100.1/24
+}
+
+router_create()
+{
+ ip link set dev $rp1 up
+ ip link set dev $rp2 up
+
+ __addr_add_del $rp1 add 192.0.2.2/24
+ __addr_add_del $rp2 add 198.51.100.2/24
+ mtu_set $rp1 10000
+ mtu_set $rp2 10000
+
+ ip -4 route add blackhole 198.51.100.100
+
+ devlink trap set $DEVLINK_DEV trap blackhole_route action trap
+}
+
+router_destroy()
+{
+ devlink trap set $DEVLINK_DEV trap blackhole_route action drop
+
+ ip -4 route del blackhole 198.51.100.100
+
+ mtu_restore $rp2
+ mtu_restore $rp1
+ __addr_add_del $rp2 del 198.51.100.2/24
+ __addr_add_del $rp1 del 192.0.2.2/24
+
+ ip link set dev $rp2 down
+ ip link set dev $rp1 down
+}
+
+setup_prepare()
+{
+ h1=${NETIFS[p1]}
+ rp1=${NETIFS[p2]}
+
+ rp2=${NETIFS[p3]}
+ h2=${NETIFS[p4]}
+
+ rp1_mac=$(mac_get $rp1)
+
+ vrf_prepare
+
+ h1_create
+ h2_create
+
+ router_create
+}
+
+cleanup()
+{
+ pre_cleanup
+
+ router_destroy
+
+ h2_destroy
+ h1_destroy
+
+ vrf_cleanup
+
+ # Reload to ensure devlink-trap settings are back to default.
+ devlink_reload
+}
+
+rate_limits_test()
+{
+ RET=0
+
+ devlink trap policer set $DEVLINK_DEV policer 1 rate 0 &> /dev/null
+ check_fail $? "Policer rate was changed to rate lower than limit"
+ devlink trap policer set $DEVLINK_DEV policer 1 \
+ rate 2000000001 &> /dev/null
+ check_fail $? "Policer rate was changed to rate higher than limit"
+
+ devlink trap policer set $DEVLINK_DEV policer 1 rate 1
+ check_err $? "Failed to set policer rate to minimum"
+ devlink trap policer set $DEVLINK_DEV policer 1 rate 2000000000
+ check_err $? "Failed to set policer rate to maximum"
+
+ log_test "Trap policer rate limits"
+}
+
+burst_limits_test()
+{
+ RET=0
+
+ devlink trap policer set $DEVLINK_DEV policer 1 burst 0 &> /dev/null
+ check_fail $? "Policer burst size was changed to 0"
+ devlink trap policer set $DEVLINK_DEV policer 1 burst 17 &> /dev/null
+ check_fail $? "Policer burst size was changed to burst size that is not power of 2"
+ devlink trap policer set $DEVLINK_DEV policer 1 burst 8 &> /dev/null
+ check_fail $? "Policer burst size was changed to burst size lower than limit"
+ devlink trap policer set $DEVLINK_DEV policer 1 \
+ burst $((2**25)) &> /dev/null
+ check_fail $? "Policer burst size was changed to burst size higher than limit"
+
+ devlink trap policer set $DEVLINK_DEV policer 1 burst 16
+ check_err $? "Failed to set policer burst size to minimum"
+ devlink trap policer set $DEVLINK_DEV policer 1 burst $((2**24))
+ check_err $? "Failed to set policer burst size to maximum"
+
+ log_test "Trap policer burst size limits"
+}
+
+trap_rate_get()
+{
+ local t0 t1
+
+ t0=$(devlink_trap_rx_packets_get blackhole_route)
+ sleep 10
+ t1=$(devlink_trap_rx_packets_get blackhole_route)
+
+ echo $(((t1 - t0) / 10))
+}
+
+policer_drop_rate_get()
+{
+ local id=$1; shift
+ local t0 t1
+
+ t0=$(devlink_trap_policer_rx_dropped_get $id)
+ sleep 10
+ t1=$(devlink_trap_policer_rx_dropped_get $id)
+
+ echo $(((t1 - t0) / 10))
+}
+
+__rate_test()
+{
+ local rate pct drop_rate
+ local id=$1; shift
+
+ RET=0
+
+ devlink trap policer set $DEVLINK_DEV policer $id rate 1000 burst 16
+ devlink trap group set $DEVLINK_DEV group l3_drops policer $id
+
+ # Send packets at highest possible rate and make sure they are dropped
+ # by the policer. Make sure measured received rate is about 1000 pps
+ log_info "=== Tx rate: Highest, Policer rate: 1000 pps ==="
+
+ start_traffic $h1 192.0.2.1 198.51.100.100 $rp1_mac
+
+ sleep 5 # Take measurements when rate is stable
+
+ rate=$(trap_rate_get)
+ pct=$((100 * (rate - 1000) / 1000))
+ ((-5 <= pct && pct <= 5))
+ check_err $? "Expected rate 1000 pps, got $rate pps, which is $pct% off. Required accuracy is +-5%"
+ log_info "Expected rate 1000 pps, measured rate $rate pps"
+
+ drop_rate=$(policer_drop_rate_get $id)
+ (( drop_rate > 0 ))
+ check_err $? "Expected non-zero policer drop rate, got 0"
+ log_info "Measured policer drop rate of $drop_rate pps"
+
+ stop_traffic
+
+ # Send packets at a rate of 1000 pps and make sure they are not dropped
+ # by the policer
+ log_info "=== Tx rate: 1000 pps, Policer rate: 1000 pps ==="
+
+ start_traffic $h1 192.0.2.1 198.51.100.100 $rp1_mac -d 1msec
+
+ sleep 5 # Take measurements when rate is stable
+
+ drop_rate=$(policer_drop_rate_get $id)
+ (( drop_rate == 0 ))
+ check_err $? "Expected zero policer drop rate, got a drop rate of $drop_rate pps"
+ log_info "Measured policer drop rate of $drop_rate pps"
+
+ stop_traffic
+
+ # Unbind the policer and send packets at highest possible rate. Make
+ # sure they are not dropped by the policer and that the measured
+ # received rate is higher than 1000 pps
+ log_info "=== Tx rate: Highest, Policer rate: No policer ==="
+
+ devlink trap group set $DEVLINK_DEV group l3_drops nopolicer
+
+ start_traffic $h1 192.0.2.1 198.51.100.100 $rp1_mac
+
+ rate=$(trap_rate_get)
+ (( rate > 1000 ))
+ check_err $? "Expected rate higher than 1000 pps, got $rate pps"
+ log_info "Measured rate $rate pps"
+
+ drop_rate=$(policer_drop_rate_get $id)
+ (( drop_rate == 0 ))
+ check_err $? "Expected zero policer drop rate, got a drop rate of $drop_rate pps"
+ log_info "Measured policer drop rate of $drop_rate pps"
+
+ stop_traffic
+
+ log_test "Trap policer rate"
+}
+
+rate_test()
+{
+ local id
+
+ for id in $(devlink_trap_policer_ids_get); do
+ echo
+ log_info "Running rate test for policer $id"
+ __rate_test $id
+ done
+}
+
+__burst_test()
+{
+ local t0_rx t0_drop t1_rx t1_drop rx drop
+ local id=$1; shift
+
+ RET=0
+
+ devlink trap policer set $DEVLINK_DEV policer $id rate 1000 burst 32
+ devlink trap group set $DEVLINK_DEV group l3_drops policer $id
+
+ # Send a burst of 64 packets and make sure that about 32 are received
+ # and the rest are dropped by the policer
+ log_info "=== Tx burst size: 64, Policer burst size: 32 pps ==="
+
+ t0_rx=$(devlink_trap_rx_packets_get blackhole_route)
+ t0_drop=$(devlink_trap_policer_rx_dropped_get $id)
+
+ start_traffic $h1 192.0.2.1 198.51.100.100 $rp1_mac -c 64
+
+ t1_rx=$(devlink_trap_rx_packets_get blackhole_route)
+ t1_drop=$(devlink_trap_policer_rx_dropped_get $id)
+
+ rx=$((t1_rx - t0_rx))
+ pct=$((100 * (rx - 32) / 32))
+ ((-20 <= pct && pct <= 20))
+ check_err $? "Expected burst size of 32 packets, got $rx packets, which is $pct% off. Required accuracy is +-20%"
+ log_info "Expected burst size of 32 packets, measured burst size of $rx packets"
+
+ drop=$((t1_drop - t0_drop))
+ (( drop > 0 ))
+ check_err $? "Expected non-zero policer drops, got 0"
+ log_info "Measured policer drops of $drop packets"
+
+ # Send a burst of 16 packets and make sure that 16 are received
+ # and that none are dropped by the policer
+ log_info "=== Tx burst size: 16, Policer burst size: 32 pps ==="
+
+ t0_rx=$(devlink_trap_rx_packets_get blackhole_route)
+ t0_drop=$(devlink_trap_policer_rx_dropped_get $id)
+
+ start_traffic $h1 192.0.2.1 198.51.100.100 $rp1_mac -c 16
+
+ t1_rx=$(devlink_trap_rx_packets_get blackhole_route)
+ t1_drop=$(devlink_trap_policer_rx_dropped_get $id)
+
+ rx=$((t1_rx - t0_rx))
+ (( rx == 16 ))
+ check_err $? "Expected burst size of 16 packets, got $rx packets"
+ log_info "Expected burst size of 16 packets, measured burst size of $rx packets"
+
+ drop=$((t1_drop - t0_drop))
+ (( drop == 0 ))
+ check_err $? "Expected zero policer drops, got $drop"
+ log_info "Measured policer drops of $drop packets"
+
+ # Unbind the policer and send a burst of 64 packets. Make sure that
+ # 64 packets are received and that none are dropped by the policer
+ log_info "=== Tx burst size: 64, Policer burst size: No policer ==="
+
+ devlink trap group set $DEVLINK_DEV group l3_drops nopolicer
+
+ t0_rx=$(devlink_trap_rx_packets_get blackhole_route)
+ t0_drop=$(devlink_trap_policer_rx_dropped_get $id)
+
+ start_traffic $h1 192.0.2.1 198.51.100.100 $rp1_mac -c 64
+
+ t1_rx=$(devlink_trap_rx_packets_get blackhole_route)
+ t1_drop=$(devlink_trap_policer_rx_dropped_get $id)
+
+ rx=$((t1_rx - t0_rx))
+ (( rx == 64 ))
+ check_err $? "Expected burst size of 64 packets, got $rx packets"
+ log_info "Expected burst size of 64 packets, measured burst size of $rx packets"
+
+ drop=$((t1_drop - t0_drop))
+ (( drop == 0 ))
+ check_err $? "Expected zero policer drops, got $drop"
+ log_info "Measured policer drops of $drop packets"
+
+ log_test "Trap policer burst size"
+}
+
+burst_test()
+{
+ local id
+
+ for id in $(devlink_trap_policer_ids_get); do
+ echo
+ log_info "Running burst size test for policer $id"
+ __burst_test $id
+ done
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_tunnel_ipip.sh b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_tunnel_ipip.sh
index 039629bb92a3..8817851da7a9 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_tunnel_ipip.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_tunnel_ipip.sh
@@ -140,7 +140,6 @@ ecn_payload_get()
ecn_decap_test()
{
local trap_name="decap_error"
- local group_name="tunnel_drops"
local desc=$1; shift
local ecn_desc=$1; shift
local outer_tos=$1; shift
@@ -161,7 +160,7 @@ ecn_decap_test()
mz_pid=$!
- devlink_trap_exception_test $trap_name $group_name
+ devlink_trap_exception_test $trap_name
tc_check_packets "dev $swp1 egress" 101 0
check_err $? "Packets were not dropped"
@@ -200,7 +199,6 @@ ipip_payload_get()
no_matching_tunnel_test()
{
local trap_name="decap_error"
- local group_name="tunnel_drops"
local desc=$1; shift
local sip=$1; shift
local mz_pid
@@ -218,7 +216,7 @@ no_matching_tunnel_test()
-A $sip -B 192.0.2.65 -t ip len=48,proto=47,p=$payload -q &
mz_pid=$!
- devlink_trap_exception_test $trap_name $group_name
+ devlink_trap_exception_test $trap_name
tc_check_packets "dev $swp1 egress" 101 0
check_err $? "Packets were not dropped"
diff --git a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_tunnel_vxlan.sh b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_tunnel_vxlan.sh
index fd19161dd4ec..10e0f3dbc930 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_tunnel_vxlan.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_tunnel_vxlan.sh
@@ -159,7 +159,6 @@ ecn_payload_get()
ecn_decap_test()
{
local trap_name="decap_error"
- local group_name="tunnel_drops"
local desc=$1; shift
local ecn_desc=$1; shift
local outer_tos=$1; shift
@@ -177,7 +176,7 @@ ecn_decap_test()
-t udp sp=12345,dp=$VXPORT,tos=$outer_tos,p=$payload -q &
mz_pid=$!
- devlink_trap_exception_test $trap_name $group_name
+ devlink_trap_exception_test $trap_name
tc_check_packets "dev $swp1 egress" 101 0
check_err $? "Packets were not dropped"
@@ -228,7 +227,6 @@ short_payload_get()
corrupted_packet_test()
{
local trap_name="decap_error"
- local group_name="tunnel_drops"
local desc=$1; shift
local payload_get=$1; shift
local mz_pid
@@ -246,7 +244,7 @@ corrupted_packet_test()
-B 192.0.2.17 -t udp sp=12345,dp=$VXPORT,p=$payload -q &
mz_pid=$!
- devlink_trap_exception_test $trap_name $group_name
+ devlink_trap_exception_test $trap_name
tc_check_packets "dev $swp1 egress" 101 0
check_err $? "Packets were not dropped"
@@ -297,7 +295,6 @@ mc_smac_payload_get()
overlay_smac_is_mc_test()
{
local trap_name="overlay_smac_is_mc"
- local group_name="tunnel_drops"
local mz_pid
RET=0
@@ -314,11 +311,11 @@ overlay_smac_is_mc_test()
-B 192.0.2.17 -t udp sp=12345,dp=$VXPORT,p=$payload -q &
mz_pid=$!
- devlink_trap_drop_test $trap_name $group_name $swp1
+ devlink_trap_drop_test $trap_name $swp1 101
log_test "Overlay source MAC is multicast"
- devlink_trap_drop_cleanup $mz_pid $swp1 "ip"
+ devlink_trap_drop_cleanup $mz_pid $swp1 "ip" 1 101
}
trap cleanup EXIT
diff --git a/tools/testing/selftests/drivers/net/mlxsw/extack.sh b/tools/testing/selftests/drivers/net/mlxsw/extack.sh
index d72d8488a3b2..7a0a99c1d22f 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/extack.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/extack.sh
@@ -8,7 +8,8 @@ lib_dir=$(dirname $0)/../../../net/forwarding
ALL_TESTS="
netdev_pre_up_test
vxlan_vlan_add_test
- port_vlan_add_test
+ vxlan_bridge_create_test
+ bridge_create_test
"
NUM_NETIFS=2
source $lib_dir/lib.sh
@@ -106,32 +107,56 @@ vxlan_vlan_add_test()
ip link del dev br1
}
-port_vlan_add_test()
+vxlan_bridge_create_test()
{
RET=0
- ip link add name br1 up type bridge vlan_filtering 1 mcast_snooping 0
-
# Unsupported configuration: mlxsw demands VXLAN with "noudpcsum".
ip link add name vx1 up type vxlan id 1000 \
local 192.0.2.17 remote 192.0.2.18 \
dstport 4789 tos inherit ttl 100
- ip link set dev $swp1 master br1
- check_err $?
-
- bridge vlan del dev $swp1 vid 1
+ # Test with VLAN-aware bridge.
+ ip link add name br1 up type bridge vlan_filtering 1 mcast_snooping 0
ip link set dev vx1 master br1
+
+ ip link set dev $swp1 master br1 2>&1 > /dev/null \
+ | grep -q mlxsw_spectrum
check_err $?
- bridge vlan add dev $swp1 vid 1 pvid untagged 2>&1 >/dev/null \
+ # Test with VLAN-unaware bridge.
+ ip link set dev br1 type bridge vlan_filtering 0
+
+ ip link set dev $swp1 master br1 2>&1 > /dev/null \
| grep -q mlxsw_spectrum
check_err $?
- log_test "extack - map VLAN at port"
+ log_test "extack - bridge creation with VXLAN"
+ ip link del dev br1
ip link del dev vx1
+}
+
+bridge_create_test()
+{
+ RET=0
+
+ ip link add name br1 up type bridge vlan_filtering 1
+ ip link add name br2 up type bridge vlan_filtering 1
+
+ ip link set dev $swp1 master br1
+ check_err $?
+
+ # Only one VLAN-aware bridge is supported, so this should fail with
+ # an extack.
+ ip link set dev $swp2 master br2 2>&1 > /dev/null \
+ | grep -q mlxsw_spectrum
+ check_err $?
+
+ log_test "extack - multiple VLAN-aware bridges creation"
+
+ ip link del dev br2
ip link del dev br1
}
diff --git a/tools/testing/selftests/drivers/net/mlxsw/mlxsw_lib.sh b/tools/testing/selftests/drivers/net/mlxsw/mlxsw_lib.sh
new file mode 100644
index 000000000000..cbe50f260a40
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/mlxsw_lib.sh
@@ -0,0 +1,13 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+##############################################################################
+# Defines
+
+if [[ ! -v MLXSW_CHIP ]]; then
+ MLXSW_CHIP=$(devlink -j dev info $DEVLINK_DEV | jq -r '.[][]["driver"]')
+ if [ -z "$MLXSW_CHIP" ]; then
+ echo "SKIP: Device $DEVLINK_DEV doesn't support devlink info command"
+ exit 1
+ fi
+fi
diff --git a/tools/testing/selftests/drivers/net/mlxsw/qos_defprio.sh b/tools/testing/selftests/drivers/net/mlxsw/qos_defprio.sh
index eff6393ce974..71066bc4b886 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/qos_defprio.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/qos_defprio.sh
@@ -114,23 +114,12 @@ ping_ipv4()
ping_test $h1 192.0.2.2
}
-wait_for_packets()
-{
- local t0=$1; shift
- local prio_observe=$1; shift
-
- local t1=$(ethtool_stats_get $swp1 rx_frames_prio_$prio_observe)
- local delta=$((t1 - t0))
- echo $delta
- ((delta >= 10))
-}
-
__test_defprio()
{
local prio_install=$1; shift
local prio_observe=$1; shift
- local delta
local key
+ local t1
local i
RET=0
@@ -139,9 +128,10 @@ __test_defprio()
local t0=$(ethtool_stats_get $swp1 rx_frames_prio_$prio_observe)
mausezahn -q $h1 -d 100m -c 10 -t arp reply
- delta=$(busywait "$HIT_TIMEOUT" wait_for_packets $t0 $prio_observe)
+ t1=$(busywait "$HIT_TIMEOUT" until_counter_is ">= $((t0 + 10))" \
+ ethtool_stats_get $swp1 rx_frames_prio_$prio_observe)
- check_err $? "Default priority $prio_install/$prio_observe: Expected to capture 10 packets, got $delta."
+ check_err $? "Default priority $prio_install/$prio_observe: Expected to capture 10 packets, got $((t1 - t0))."
log_test "Default priority $prio_install/$prio_observe"
defprio_uninstall $swp1 $prio_install
diff --git a/tools/testing/selftests/drivers/net/mlxsw/qos_dscp_router.sh b/tools/testing/selftests/drivers/net/mlxsw/qos_dscp_router.sh
index c745ce3befee..4cb2aa65278a 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/qos_dscp_router.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/qos_dscp_router.sh
@@ -31,6 +31,7 @@ ALL_TESTS="
ping_ipv4
test_update
test_no_update
+ test_pedit_norewrite
test_dscp_leftover
"
@@ -56,6 +57,11 @@ zero()
echo 0
}
+three()
+{
+ echo 3
+}
+
h1_create()
{
simple_if_init $h1 192.0.2.1/28
@@ -103,6 +109,9 @@ switch_create()
simple_if_init $swp1 192.0.2.2/28
__simple_if_init $swp2 v$swp1 192.0.2.17/28
+ tc qdisc add dev $swp1 clsact
+ tc qdisc add dev $swp2 clsact
+
lldptool -T -i $swp1 -V APP $(dscp_map 0) >/dev/null
lldptool -T -i $swp2 -V APP $(dscp_map 0) >/dev/null
lldpad_app_wait_set $swp1
@@ -115,6 +124,9 @@ switch_destroy()
lldptool -T -i $swp1 -V APP -d $(dscp_map 0) >/dev/null
lldpad_app_wait_del
+ tc qdisc del dev $swp2 clsact
+ tc qdisc del dev $swp1 clsact
+
__simple_if_fini $swp2 192.0.2.17/28
simple_if_fini $swp1 192.0.2.2/28
}
@@ -223,18 +235,36 @@ __test_update()
test_update()
{
+ echo "Test net.ipv4.ip_forward_update_priority=1"
__test_update 1 reprioritize
}
test_no_update()
{
+ echo "Test net.ipv4.ip_forward_update_priority=0"
__test_update 0 echo
}
+# Test that when DSCP is updated in pedit, the DSCP rewrite is turned off.
+test_pedit_norewrite()
+{
+ echo "Test no DSCP rewrite after DSCP is updated by pedit"
+
+ tc filter add dev $swp1 ingress handle 101 pref 1 prot ip flower \
+ action pedit ex munge ip dsfield set $((3 << 2)) retain 0xfc \
+ action skbedit priority 3
+
+ __test_update 0 three
+
+ tc filter del dev $swp1 ingress pref 1
+}
+
# Test that when the last APP rule is removed, the prio->DSCP map is properly
# set to zeroes, and that the last APP rule does not stay active in the ASIC.
test_dscp_leftover()
{
+ echo "Test that last removed DSCP rule is deconfigured correctly"
+
lldptool -T -i $swp2 -V APP -d $(dscp_map 0) >/dev/null
lldpad_app_wait_del
diff --git a/tools/testing/selftests/drivers/net/mlxsw/qos_mc_aware.sh b/tools/testing/selftests/drivers/net/mlxsw/qos_mc_aware.sh
index 24dd8ed48580..b025daea062d 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/qos_mc_aware.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/qos_mc_aware.sh
@@ -300,7 +300,7 @@ test_uc_aware()
local i
for ((i = 0; i < attempts; ++i)); do
- if $ARPING -c 1 -I $h1 -b 192.0.2.66 -q -w 0.1; then
+ if $ARPING -c 1 -I $h1 -b 192.0.2.66 -q -w 1; then
((passes++))
fi
diff --git a/tools/testing/selftests/drivers/net/mlxsw/router_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/router_scale.sh
index d231649b4f01..e93878d42596 100644
--- a/tools/testing/selftests/drivers/net/mlxsw/router_scale.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/router_scale.sh
@@ -2,16 +2,15 @@
# SPDX-License-Identifier: GPL-2.0
ROUTER_NUM_NETIFS=4
+: ${TIMEOUT:=20000} # ms
router_h1_create()
{
simple_if_init $h1 192.0.1.1/24
- ip route add 193.0.0.0/8 via 192.0.1.2 dev $h1
}
router_h1_destroy()
{
- ip route del 193.0.0.0/8 via 192.0.1.2 dev $h1
simple_if_fini $h1 192.0.1.1/24
}
@@ -64,13 +63,15 @@ router_setup_prepare()
router_create
}
-router_offload_validate()
+wait_for_routes()
{
- local route_count=$1
- local offloaded_count
+ local t0=$1; shift
+ local route_count=$1; shift
- offloaded_count=$(ip route | grep -o 'offload' | wc -l)
- [[ $offloaded_count -ge $route_count ]]
+ local t1=$(ip route | grep -o 'offload' | wc -l)
+ local delta=$((t1 - t0))
+ echo $delta
+ [[ $delta -ge $route_count ]]
}
router_routes_create()
@@ -90,8 +91,8 @@ router_routes_create()
break 3
fi
- echo route add 193.${i}.${j}.${k}/32 via \
- 192.0.2.1 dev $rp2 >> $ROUTE_FILE
+ echo route add 193.${i}.${j}.${k}/32 dev $rp2 \
+ >> $ROUTE_FILE
((count++))
done
done
@@ -111,45 +112,19 @@ router_test()
{
local route_count=$1
local should_fail=$2
- local count=0
+ local delta
RET=0
+ local t0=$(ip route | grep -o 'offload' | wc -l)
router_routes_create $route_count
+ delta=$(busywait "$TIMEOUT" wait_for_routes $t0 $route_count)
- router_offload_validate $route_count
- check_err_fail $should_fail $? "Offload of $route_count routes"
+ check_err_fail $should_fail $? "Offload routes: Expected $route_count, got $delta."
if [[ $RET -ne 0 ]] || [[ $should_fail -eq 1 ]]; then
return
fi
- tc filter add dev $h2 ingress protocol ip pref 1 flower \
- skip_sw dst_ip 193.0.0.0/8 action drop
-
- for i in {0..255}
- do
- for j in {0..255}
- do
- for k in {0..255}
- do
- if [[ $count -eq $route_count ]]; then
- break 3
- fi
-
- $MZ $h1 -c 1 -p 64 -a $h1mac -b $rp1mac \
- -A 192.0.1.1 -B 193.${i}.${j}.${k} \
- -t ip -q
- ((count++))
- done
- done
- done
-
- tc_check_packets "dev $h2 ingress" 1 $route_count
- check_err $? "Offload mismatch"
-
- tc filter del dev $h2 ingress protocol ip pref 1 flower \
- skip_sw dst_ip 193.0.0.0/8 action drop
-
router_routes_destroy
}
diff --git a/tools/testing/selftests/drivers/net/mlxsw/rtnetlink.sh b/tools/testing/selftests/drivers/net/mlxsw/rtnetlink.sh
index 5c39e5f6a480..f4031002d5e9 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/rtnetlink.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/rtnetlink.sh
@@ -32,6 +32,7 @@ ALL_TESTS="
devlink_reload_test
"
NUM_NETIFS=2
+: ${TIMEOUT:=20000} # ms
source $lib_dir/lib.sh
source $lib_dir/devlink_lib.sh
@@ -360,20 +361,24 @@ vlan_rif_refcount_test()
ip link add link br0 name br0.10 up type vlan id 10
ip -6 address add 2001:db8:1::1/64 dev br0.10
- ip -6 route get fibmatch 2001:db8:1::2 dev br0.10 | grep -q offload
+ busywait "$TIMEOUT" wait_for_offload \
+ ip -6 route get fibmatch 2001:db8:1::2 dev br0.10
check_err $? "vlan rif was not created before adding port to vlan"
bridge vlan add vid 10 dev $swp1
- ip -6 route get fibmatch 2001:db8:1::2 dev br0.10 | grep -q offload
+ busywait "$TIMEOUT" wait_for_offload \
+ ip -6 route get fibmatch 2001:db8:1::2 dev br0.10
check_err $? "vlan rif was destroyed after adding port to vlan"
bridge vlan del vid 10 dev $swp1
- ip -6 route get fibmatch 2001:db8:1::2 dev br0.10 | grep -q offload
+ busywait "$TIMEOUT" wait_for_offload \
+ ip -6 route get fibmatch 2001:db8:1::2 dev br0.10
check_err $? "vlan rif was destroyed after removing port from vlan"
ip link set dev $swp1 nomaster
- ip -6 route get fibmatch 2001:db8:1::2 dev br0.10 | grep -q offload
- check_fail $? "vlan rif was not destroyed after unlinking port from bridge"
+ busywait "$TIMEOUT" not wait_for_offload \
+ ip -6 route get fibmatch 2001:db8:1::2 dev br0.10
+ check_err $? "vlan rif was not destroyed after unlinking port from bridge"
log_test "vlan rif refcount"
@@ -401,22 +406,28 @@ subport_rif_refcount_test()
ip -6 address add 2001:db8:1::1/64 dev bond1
ip -6 address add 2001:db8:2::1/64 dev bond1.10
- ip -6 route get fibmatch 2001:db8:1::2 dev bond1 | grep -q offload
+ busywait "$TIMEOUT" wait_for_offload \
+ ip -6 route get fibmatch 2001:db8:1::2 dev bond1
check_err $? "subport rif was not created on lag device"
- ip -6 route get fibmatch 2001:db8:2::2 dev bond1.10 | grep -q offload
+ busywait "$TIMEOUT" wait_for_offload \
+ ip -6 route get fibmatch 2001:db8:2::2 dev bond1.10
check_err $? "subport rif was not created on vlan device"
ip link set dev $swp1 nomaster
- ip -6 route get fibmatch 2001:db8:1::2 dev bond1 | grep -q offload
+ busywait "$TIMEOUT" wait_for_offload \
+ ip -6 route get fibmatch 2001:db8:1::2 dev bond1
check_err $? "subport rif of lag device was destroyed when should not"
- ip -6 route get fibmatch 2001:db8:2::2 dev bond1.10 | grep -q offload
+ busywait "$TIMEOUT" wait_for_offload \
+ ip -6 route get fibmatch 2001:db8:2::2 dev bond1.10
check_err $? "subport rif of vlan device was destroyed when should not"
ip link set dev $swp2 nomaster
- ip -6 route get fibmatch 2001:db8:1::2 dev bond1 | grep -q offload
- check_fail $? "subport rif of lag device was not destroyed when should"
- ip -6 route get fibmatch 2001:db8:2::2 dev bond1.10 | grep -q offload
- check_fail $? "subport rif of vlan device was not destroyed when should"
+ busywait "$TIMEOUT" not wait_for_offload \
+ ip -6 route get fibmatch 2001:db8:1::2 dev bond1
+ check_err $? "subport rif of lag device was not destroyed when should"
+ busywait "$TIMEOUT" not wait_for_offload \
+ ip -6 route get fibmatch 2001:db8:2::2 dev bond1.10
+ check_err $? "subport rif of vlan device was not destroyed when should"
log_test "subport rif refcount"
@@ -575,7 +586,8 @@ bridge_extern_learn_test()
bridge fdb add de:ad:be:ef:13:37 dev $swp1 master extern_learn
- bridge fdb show brport $swp1 | grep de:ad:be:ef:13:37 | grep -q offload
+ busywait "$TIMEOUT" wait_for_offload \
+ bridge fdb show brport $swp1 de:ad:be:ef:13:37
check_err $? "fdb entry not marked as offloaded when should"
log_test "externally learned fdb entry"
@@ -595,9 +607,11 @@ neigh_offload_test()
ip -6 neigh add 2001:db8:1::2 lladdr de:ad:be:ef:13:37 nud perm \
dev $swp1
- ip -4 neigh show dev $swp1 | grep 192.0.2.2 | grep -q offload
+ busywait "$TIMEOUT" wait_for_offload \
+ ip -4 neigh show dev $swp1 192.0.2.2
check_err $? "ipv4 neigh entry not marked as offloaded when should"
- ip -6 neigh show dev $swp1 | grep 2001:db8:1::2 | grep -q offload
+ busywait "$TIMEOUT" wait_for_offload \
+ ip -6 neigh show dev $swp1 2001:db8:1::2
check_err $? "ipv6 neigh entry not marked as offloaded when should"
log_test "neighbour offload indication"
@@ -623,25 +637,31 @@ nexthop_offload_test()
ip -6 route add 2001:db8:2::/64 vrf v$swp1 \
nexthop via 2001:db8:1::2 dev $swp1
- ip -4 route show 198.51.100.0/24 vrf v$swp1 | grep -q offload
+ busywait "$TIMEOUT" wait_for_offload \
+ ip -4 route show 198.51.100.0/24 vrf v$swp1
check_err $? "ipv4 nexthop not marked as offloaded when should"
- ip -6 route show 2001:db8:2::/64 vrf v$swp1 | grep -q offload
+ busywait "$TIMEOUT" wait_for_offload \
+ ip -6 route show 2001:db8:2::/64 vrf v$swp1
check_err $? "ipv6 nexthop not marked as offloaded when should"
ip link set dev $swp2 down
sleep 1
- ip -4 route show 198.51.100.0/24 vrf v$swp1 | grep -q offload
- check_fail $? "ipv4 nexthop marked as offloaded when should not"
- ip -6 route show 2001:db8:2::/64 vrf v$swp1 | grep -q offload
- check_fail $? "ipv6 nexthop marked as offloaded when should not"
+ busywait "$TIMEOUT" not wait_for_offload \
+ ip -4 route show 198.51.100.0/24 vrf v$swp1
+ check_err $? "ipv4 nexthop marked as offloaded when should not"
+ busywait "$TIMEOUT" not wait_for_offload \
+ ip -6 route show 2001:db8:2::/64 vrf v$swp1
+ check_err $? "ipv6 nexthop marked as offloaded when should not"
ip link set dev $swp2 up
setup_wait
- ip -4 route show 198.51.100.0/24 vrf v$swp1 | grep -q offload
+ busywait "$TIMEOUT" wait_for_offload \
+ ip -4 route show 198.51.100.0/24 vrf v$swp1
check_err $? "ipv4 nexthop not marked as offloaded after neigh add"
- ip -6 route show 2001:db8:2::/64 vrf v$swp1 | grep -q offload
+ busywait "$TIMEOUT" wait_for_offload \
+ ip -6 route show 2001:db8:2::/64 vrf v$swp1
check_err $? "ipv6 nexthop not marked as offloaded after neigh add"
log_test "nexthop offload indication"
diff --git a/tools/testing/selftests/drivers/net/mlxsw/sch_ets.sh b/tools/testing/selftests/drivers/net/mlxsw/sch_ets.sh
index c9fc4d4885c1..94c37124a840 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/sch_ets.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/sch_ets.sh
@@ -56,11 +56,19 @@ switch_destroy()
}
# Callback from sch_ets_tests.sh
-get_stats()
+collect_stats()
{
- local band=$1; shift
+ local -a streams=("$@")
+ local stream
- ethtool_stats_get "$h2" rx_octets_prio_$band
+ # Wait for qdisc counter update so that we don't get it mid-way through.
+ busywait_for_counter 1000 +1 \
+ qdisc_parent_stats_get $swp2 10:$((${streams[0]} + 1)) .bytes \
+ > /dev/null
+
+ for stream in ${streams[@]}; do
+ qdisc_parent_stats_get $swp2 10:$((stream + 1)) .bytes
+ done
}
bail_on_lldpad
diff --git a/tools/testing/selftests/drivers/net/mlxsw/sch_red_core.sh b/tools/testing/selftests/drivers/net/mlxsw/sch_red_core.sh
new file mode 100644
index 000000000000..0d347d48c112
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/sch_red_core.sh
@@ -0,0 +1,533 @@
+# SPDX-License-Identifier: GPL-2.0
+
+# This test sends a >1Gbps stream of traffic from H1, to the switch, which
+# forwards it to a 1Gbps port. This 1Gbps stream is then looped back to the
+# switch and forwarded to the port under test $swp3, which is also 1Gbps.
+#
+# This way, $swp3 should be 100% filled with traffic without any of it spilling
+# to the backlog. Any extra packets sent should almost 1:1 go to backlog. That
+# is what H2 is used for--it sends the extra traffic to create backlog.
+#
+# A RED Qdisc is installed on $swp3. The configuration is such that the minimum
+# and maximum size are 1 byte apart, so there is a very clear border under which
+# no marking or dropping takes place, and above which everything is marked or
+# dropped.
+#
+# The test uses the buffer build-up behavior to test the installed RED.
+#
+# In order to test WRED, $swp3 actually contains RED under PRIO, with two
+# different configurations. Traffic is prioritized using 802.1p and relies on
+# the implicit mlxsw configuration, where packet priority is taken 1:1 from the
+# 802.1p marking.
+#
+# +--------------------------+ +--------------------------+
+# | H1 | | H2 |
+# | + $h1.10 | | + $h2.10 |
+# | | 192.0.2.1/28 | | | 192.0.2.2/28 |
+# | | | | | |
+# | | $h1.11 + | | | $h2.11 + |
+# | | 192.0.2.17/28 | | | | 192.0.2.18/28 | |
+# | | | | | | | |
+# | \______ ______/ | | \______ ______/ |
+# | \ / | | \ / |
+# | + $h1 | | + $h2 |
+# +-------------|------------+ +-------------|------------+
+# | >1Gbps |
+# +-------------|------------------------------------------------|------------+
+# | SW + $swp1 + $swp2 |
+# | _______/ \___________ ___________/ \_______ |
+# | / \ / \ |
+# | +-|-----------------+ | +-|-----------------+ | |
+# | | + $swp1.10 | | | + $swp2.10 | | |
+# | | | | .-------------+ $swp5.10 | | |
+# | | BR1_10 | | | | | | |
+# | | | | | | BR2_10 | | |
+# | | + $swp2.10 | | | | | | |
+# | +-|-----------------+ | | | + $swp3.10 | | |
+# | | | | +-|-----------------+ | |
+# | | +-----------------|-+ | | +-----------------|-+ |
+# | | | $swp1.11 + | | | | $swp2.11 + | |
+# | | | | | .-----------------+ $swp5.11 | |
+# | | | BR1_11 | | | | | | |
+# | | | | | | | | BR2_11 | |
+# | | | $swp2.11 + | | | | | | |
+# | | +-----------------|-+ | | | | $swp3.11 + | |
+# | | | | | | +-----------------|-+ |
+# | \_______ ___________/ | | \___________ _______/ |
+# | \ / \ / \ / |
+# | + $swp4 + $swp5 + $swp3 |
+# +-------------|----------------------|-------------------------|------------+
+# | | | 1Gbps
+# \________1Gbps_________/ |
+# +----------------------------|------------+
+# | H3 + $h3 |
+# | _____________________/ \_______ |
+# | / \ |
+# | | | |
+# | + $h3.10 $h3.11 + |
+# | 192.0.2.3/28 192.0.2.19/28 |
+# +-----------------------------------------+
+
+NUM_NETIFS=8
+CHECK_TC="yes"
+lib_dir=$(dirname $0)/../../../net/forwarding
+source $lib_dir/lib.sh
+source $lib_dir/devlink_lib.sh
+source qos_lib.sh
+
+ipaddr()
+{
+ local host=$1; shift
+ local vlan=$1; shift
+
+ echo 192.0.2.$((16 * (vlan - 10) + host))
+}
+
+host_create()
+{
+ local dev=$1; shift
+ local host=$1; shift
+
+ simple_if_init $dev
+ mtu_set $dev 10000
+
+ vlan_create $dev 10 v$dev $(ipaddr $host 10)/28
+ ip link set dev $dev.10 type vlan egress 0:0
+
+ vlan_create $dev 11 v$dev $(ipaddr $host 11)/28
+ ip link set dev $dev.11 type vlan egress 0:1
+}
+
+host_destroy()
+{
+ local dev=$1; shift
+
+ vlan_destroy $dev 11
+ vlan_destroy $dev 10
+ mtu_restore $dev
+ simple_if_fini $dev
+}
+
+h1_create()
+{
+ host_create $h1 1
+}
+
+h1_destroy()
+{
+ host_destroy $h1
+}
+
+h2_create()
+{
+ host_create $h2 2
+
+ # Some of the tests in this suite use multicast traffic. As this traffic
+ # enters BR2_10 resp. BR2_11, it is flooded to all other ports. Thus
+ # e.g. traffic ingressing through $swp2 is flooded to $swp3 (the
+ # intended destination) and $swp5 (which is intended as ingress for
+ # another stream of traffic).
+ #
+ # This is generally not a problem, but if the $swp5 throughput is lower
+ # than $swp2 throughput, there will be a build-up at $swp5. That may
+ # cause packets to fail to queue up at $swp3 due to shared buffer
+ # quotas, and the test to spuriously fail.
+ #
+ # Prevent this by setting the speed of $h2 to 1Gbps.
+
+ ethtool -s $h2 speed 1000 autoneg off
+}
+
+h2_destroy()
+{
+ ethtool -s $h2 autoneg on
+ host_destroy $h2
+}
+
+h3_create()
+{
+ host_create $h3 3
+ ethtool -s $h3 speed 1000 autoneg off
+}
+
+h3_destroy()
+{
+ ethtool -s $h3 autoneg on
+ host_destroy $h3
+}
+
+switch_create()
+{
+ local intf
+ local vlan
+
+ ip link add dev br1_10 type bridge
+ ip link add dev br1_11 type bridge
+
+ ip link add dev br2_10 type bridge
+ ip link add dev br2_11 type bridge
+
+ for intf in $swp1 $swp2 $swp3 $swp4 $swp5; do
+ ip link set dev $intf up
+ mtu_set $intf 10000
+ done
+
+ for intf in $swp1 $swp4; do
+ for vlan in 10 11; do
+ vlan_create $intf $vlan
+ ip link set dev $intf.$vlan master br1_$vlan
+ ip link set dev $intf.$vlan up
+ done
+ done
+
+ for intf in $swp2 $swp3 $swp5; do
+ for vlan in 10 11; do
+ vlan_create $intf $vlan
+ ip link set dev $intf.$vlan master br2_$vlan
+ ip link set dev $intf.$vlan up
+ done
+ done
+
+ ip link set dev $swp4.10 type vlan egress 0:0
+ ip link set dev $swp4.11 type vlan egress 0:1
+ for intf in $swp1 $swp2 $swp5; do
+ for vlan in 10 11; do
+ ip link set dev $intf.$vlan type vlan ingress 0:0 1:1
+ done
+ done
+
+ for intf in $swp2 $swp3 $swp4 $swp5; do
+ ethtool -s $intf speed 1000 autoneg off
+ done
+
+ ip link set dev br1_10 up
+ ip link set dev br1_11 up
+ ip link set dev br2_10 up
+ ip link set dev br2_11 up
+
+ local size=$(devlink_pool_size_thtype 0 | cut -d' ' -f 1)
+ devlink_port_pool_th_set $swp3 8 $size
+}
+
+switch_destroy()
+{
+ local intf
+ local vlan
+
+ devlink_port_pool_th_restore $swp3 8
+
+ tc qdisc del dev $swp3 root 2>/dev/null
+
+ ip link set dev br2_11 down
+ ip link set dev br2_10 down
+ ip link set dev br1_11 down
+ ip link set dev br1_10 down
+
+ for intf in $swp5 $swp4 $swp3 $swp2; do
+ ethtool -s $intf autoneg on
+ done
+
+ for intf in $swp5 $swp3 $swp2 $swp4 $swp1; do
+ for vlan in 11 10; do
+ ip link set dev $intf.$vlan down
+ ip link set dev $intf.$vlan nomaster
+ vlan_destroy $intf $vlan
+ done
+
+ mtu_restore $intf
+ ip link set dev $intf down
+ done
+
+ ip link del dev br2_11
+ ip link del dev br2_10
+ ip link del dev br1_11
+ ip link del dev br1_10
+}
+
+setup_prepare()
+{
+ h1=${NETIFS[p1]}
+ swp1=${NETIFS[p2]}
+
+ swp2=${NETIFS[p3]}
+ h2=${NETIFS[p4]}
+
+ swp3=${NETIFS[p5]}
+ h3=${NETIFS[p6]}
+
+ swp4=${NETIFS[p7]}
+ swp5=${NETIFS[p8]}
+
+ h3_mac=$(mac_get $h3)
+
+ vrf_prepare
+
+ h1_create
+ h2_create
+ h3_create
+ switch_create
+}
+
+cleanup()
+{
+ pre_cleanup
+
+ switch_destroy
+ h3_destroy
+ h2_destroy
+ h1_destroy
+
+ vrf_cleanup
+}
+
+ping_ipv4()
+{
+ ping_test $h1.10 $(ipaddr 3 10) " from host 1, vlan 10"
+ ping_test $h1.11 $(ipaddr 3 11) " from host 1, vlan 11"
+ ping_test $h2.10 $(ipaddr 3 10) " from host 2, vlan 10"
+ ping_test $h2.11 $(ipaddr 3 11) " from host 2, vlan 11"
+}
+
+get_tc()
+{
+ local vlan=$1; shift
+
+ echo $((vlan - 10))
+}
+
+get_qdisc_handle()
+{
+ local vlan=$1; shift
+
+ local tc=$(get_tc $vlan)
+ local band=$((8 - tc))
+
+ # Handle is 107: for TC1, 108: for TC0.
+ echo "10$band:"
+}
+
+get_qdisc_backlog()
+{
+ local vlan=$1; shift
+
+ qdisc_stats_get $swp3 $(get_qdisc_handle $vlan) .backlog
+}
+
+get_mc_transmit_queue()
+{
+ local vlan=$1; shift
+
+ local tc=$(($(get_tc $vlan) + 8))
+ ethtool_stats_get $swp3 tc_transmit_queue_tc_$tc
+}
+
+get_nmarked()
+{
+ local vlan=$1; shift
+
+ ethtool_stats_get $swp3 ecn_marked
+}
+
+get_qdisc_npackets()
+{
+ local vlan=$1; shift
+
+ busywait_for_counter 1100 +1 \
+ qdisc_stats_get $swp3 $(get_qdisc_handle $vlan) .packets
+}
+
+# This sends traffic in an attempt to build a backlog of $size. Returns 0 on
+# success. After 10 failed attempts it bails out and returns 1. It dumps the
+# backlog size to stdout.
+build_backlog()
+{
+ local vlan=$1; shift
+ local size=$1; shift
+ local proto=$1; shift
+
+ local tc=$((vlan - 10))
+ local band=$((8 - tc))
+ local cur=-1
+ local i=0
+
+ while :; do
+ local cur=$(busywait 1100 until_counter_is "> $cur" \
+ get_qdisc_backlog $vlan)
+ local diff=$((size - cur))
+ local pkts=$(((diff + 7999) / 8000))
+
+ if ((cur >= size)); then
+ echo $cur
+ return 0
+ elif ((i++ > 10)); then
+ echo $cur
+ return 1
+ fi
+
+ $MZ $h2.$vlan -p 8000 -a own -b $h3_mac \
+ -A $(ipaddr 2 $vlan) -B $(ipaddr 3 $vlan) \
+ -t $proto -q -c $pkts "$@"
+ done
+}
+
+check_marking()
+{
+ local vlan=$1; shift
+ local cond=$1; shift
+
+ local npackets_0=$(get_qdisc_npackets $vlan)
+ local nmarked_0=$(get_nmarked $vlan)
+ sleep 5
+ local npackets_1=$(get_qdisc_npackets $vlan)
+ local nmarked_1=$(get_nmarked $vlan)
+
+ local nmarked_d=$((nmarked_1 - nmarked_0))
+ local npackets_d=$((npackets_1 - npackets_0))
+ local pct=$((100 * nmarked_d / npackets_d))
+
+ echo $pct
+ ((pct $cond))
+}
+
+ecn_test_common()
+{
+ local name=$1; shift
+ local vlan=$1; shift
+ local limit=$1; shift
+ local backlog
+ local pct
+
+ # Build the below-the-limit backlog using UDP. We could use TCP just
+ # fine, but this way we get a proof that UDP is accepted when queue
+ # length is below the limit. The main stream is using TCP, and if the
+ # limit is misconfigured, we would see this traffic being ECN marked.
+ RET=0
+ backlog=$(build_backlog $vlan $((2 * limit / 3)) udp)
+ check_err $? "Could not build the requested backlog"
+ pct=$(check_marking $vlan "== 0")
+ check_err $? "backlog $backlog / $limit Got $pct% marked packets, expected == 0."
+ log_test "TC $((vlan - 10)): $name backlog < limit"
+
+ # Now push TCP, because non-TCP traffic would be early-dropped after the
+ # backlog crosses the limit, and we want to make sure that the backlog
+ # is above the limit.
+ RET=0
+ backlog=$(build_backlog $vlan $((3 * limit / 2)) tcp tos=0x01)
+ check_err $? "Could not build the requested backlog"
+ pct=$(check_marking $vlan ">= 95")
+ check_err $? "backlog $backlog / $limit Got $pct% marked packets, expected >= 95."
+ log_test "TC $((vlan - 10)): $name backlog > limit"
+}
+
+do_ecn_test()
+{
+ local vlan=$1; shift
+ local limit=$1; shift
+ local name=ECN
+
+ start_tcp_traffic $h1.$vlan $(ipaddr 1 $vlan) $(ipaddr 3 $vlan) \
+ $h3_mac tos=0x01
+ sleep 1
+
+ ecn_test_common "$name" $vlan $limit
+
+ # Up there we saw that UDP gets accepted when backlog is below the
+ # limit. Now that it is above, it should all get dropped, and backlog
+ # building should fail.
+ RET=0
+ build_backlog $vlan $((2 * limit)) udp >/dev/null
+ check_fail $? "UDP traffic went into backlog instead of being early-dropped"
+ log_test "TC $((vlan - 10)): $name backlog > limit: UDP early-dropped"
+
+ stop_traffic
+ sleep 1
+}
+
+do_ecn_nodrop_test()
+{
+ local vlan=$1; shift
+ local limit=$1; shift
+ local name="ECN nodrop"
+
+ start_tcp_traffic $h1.$vlan $(ipaddr 1 $vlan) $(ipaddr 3 $vlan) \
+ $h3_mac tos=0x01
+ sleep 1
+
+ ecn_test_common "$name" $vlan $limit
+
+ # Up there we saw that UDP gets accepted when backlog is below the
+ # limit. Now that it is above, in nodrop mode, make sure it goes to
+ # backlog as well.
+ RET=0
+ build_backlog $vlan $((2 * limit)) udp >/dev/null
+ check_err $? "UDP traffic was early-dropped instead of getting into backlog"
+ log_test "TC $((vlan - 10)): $name backlog > limit: UDP not dropped"
+
+ stop_traffic
+ sleep 1
+}
+
+do_red_test()
+{
+ local vlan=$1; shift
+ local limit=$1; shift
+ local backlog
+ local pct
+
+ # Use ECN-capable TCP to verify there's no marking even though the queue
+ # is above limit.
+ start_tcp_traffic $h1.$vlan $(ipaddr 1 $vlan) $(ipaddr 3 $vlan) \
+ $h3_mac tos=0x01
+
+ # Pushing below the queue limit should work.
+ RET=0
+ backlog=$(build_backlog $vlan $((2 * limit / 3)) tcp tos=0x01)
+ check_err $? "Could not build the requested backlog"
+ pct=$(check_marking $vlan "== 0")
+ check_err $? "backlog $backlog / $limit Got $pct% marked packets, expected == 0."
+ log_test "TC $((vlan - 10)): RED backlog < limit"
+
+ # Pushing above should not.
+ RET=0
+ backlog=$(build_backlog $vlan $((3 * limit / 2)) tcp tos=0x01)
+ check_fail $? "Traffic went into backlog instead of being early-dropped"
+ pct=$(check_marking $vlan "== 0")
+ check_err $? "backlog $backlog / $limit Got $pct% marked packets, expected == 0."
+ local diff=$((limit - backlog))
+ pct=$((100 * diff / limit))
+ ((0 <= pct && pct <= 5))
+ check_err $? "backlog $backlog / $limit expected <= 5% distance"
+ log_test "TC $((vlan - 10)): RED backlog > limit"
+
+ stop_traffic
+ sleep 1
+}
+
+do_mc_backlog_test()
+{
+ local vlan=$1; shift
+ local limit=$1; shift
+ local backlog
+ local pct
+
+ RET=0
+
+ start_tcp_traffic $h1.$vlan $(ipaddr 1 $vlan) $(ipaddr 3 $vlan) bc
+ start_tcp_traffic $h2.$vlan $(ipaddr 2 $vlan) $(ipaddr 3 $vlan) bc
+
+ qbl=$(busywait 5000 until_counter_is ">= 500000" \
+ get_qdisc_backlog $vlan)
+ check_err $? "Could not build MC backlog"
+
+ # Verify that we actually see the backlog on BUM TC. Do a busywait as
+ # well, performance blips might cause false fail.
+ local ebl
+ ebl=$(busywait 5000 until_counter_is ">= 500000" \
+ get_mc_transmit_queue $vlan)
+ check_err $? "MC backlog reported by qdisc not visible in ethtool"
+
+ stop_traffic
+ stop_traffic
+
+ log_test "TC $((vlan - 10)): Qdisc reports MC backlog"
+}
diff --git a/tools/testing/selftests/drivers/net/mlxsw/sch_red_ets.sh b/tools/testing/selftests/drivers/net/mlxsw/sch_red_ets.sh
new file mode 100755
index 000000000000..1c36c576613b
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/sch_red_ets.sh
@@ -0,0 +1,94 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+ALL_TESTS="
+ ping_ipv4
+ ecn_test
+ ecn_nodrop_test
+ red_test
+ mc_backlog_test
+"
+: ${QDISC:=ets}
+source sch_red_core.sh
+
+# do_ecn_test first build 2/3 of the requested backlog and expects no marking,
+# and then builds 3/2 of it and does expect marking. The values of $BACKLOG1 and
+# $BACKLOG2 are far enough not to overlap, so that we can assume that if we do
+# see (do not see) marking, it is actually due to the configuration of that one
+# TC, and not due to configuration of the other TC leaking over.
+BACKLOG1=200000
+BACKLOG2=500000
+
+install_qdisc()
+{
+ local -a args=("$@")
+
+ tc qdisc add dev $swp3 root handle 10: $QDISC \
+ bands 8 priomap 7 6 5 4 3 2 1 0
+ tc qdisc add dev $swp3 parent 10:8 handle 108: red \
+ limit 1000000 min $BACKLOG1 max $((BACKLOG1 + 1)) \
+ probability 1.0 avpkt 8000 burst 38 "${args[@]}"
+ tc qdisc add dev $swp3 parent 10:7 handle 107: red \
+ limit 1000000 min $BACKLOG2 max $((BACKLOG2 + 1)) \
+ probability 1.0 avpkt 8000 burst 63 "${args[@]}"
+ sleep 1
+}
+
+uninstall_qdisc()
+{
+ tc qdisc del dev $swp3 parent 10:7
+ tc qdisc del dev $swp3 parent 10:8
+ tc qdisc del dev $swp3 root
+}
+
+ecn_test()
+{
+ install_qdisc ecn
+
+ do_ecn_test 10 $BACKLOG1
+ do_ecn_test 11 $BACKLOG2
+
+ uninstall_qdisc
+}
+
+ecn_nodrop_test()
+{
+ install_qdisc ecn nodrop
+
+ do_ecn_nodrop_test 10 $BACKLOG1
+ do_ecn_nodrop_test 11 $BACKLOG2
+
+ uninstall_qdisc
+}
+
+red_test()
+{
+ install_qdisc
+
+ do_red_test 10 $BACKLOG1
+ do_red_test 11 $BACKLOG2
+
+ uninstall_qdisc
+}
+
+mc_backlog_test()
+{
+ install_qdisc
+
+ # Note that the backlog numbers here do not correspond to RED
+ # configuration, but are arbitrary.
+ do_mc_backlog_test 10 $BACKLOG1
+ do_mc_backlog_test 11 $BACKLOG2
+
+ uninstall_qdisc
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+bail_on_lldpad
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/drivers/net/mlxsw/sch_red_prio.sh b/tools/testing/selftests/drivers/net/mlxsw/sch_red_prio.sh
new file mode 100755
index 000000000000..76820a0e9a1b
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/sch_red_prio.sh
@@ -0,0 +1,5 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+QDISC=prio
+source sch_red_ets.sh
diff --git a/tools/testing/selftests/drivers/net/mlxsw/sch_red_root.sh b/tools/testing/selftests/drivers/net/mlxsw/sch_red_root.sh
new file mode 100755
index 000000000000..558667ea11ec
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/sch_red_root.sh
@@ -0,0 +1,68 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+ALL_TESTS="
+ ping_ipv4
+ ecn_test
+ ecn_nodrop_test
+ red_test
+ mc_backlog_test
+"
+source sch_red_core.sh
+
+BACKLOG=300000
+
+install_qdisc()
+{
+ local -a args=("$@")
+
+ tc qdisc add dev $swp3 root handle 108: red \
+ limit 1000000 min $BACKLOG max $((BACKLOG + 1)) \
+ probability 1.0 avpkt 8000 burst 38 "${args[@]}"
+ sleep 1
+}
+
+uninstall_qdisc()
+{
+ tc qdisc del dev $swp3 root
+}
+
+ecn_test()
+{
+ install_qdisc ecn
+ do_ecn_test 10 $BACKLOG
+ uninstall_qdisc
+}
+
+ecn_nodrop_test()
+{
+ install_qdisc ecn nodrop
+ do_ecn_nodrop_test 10 $BACKLOG
+ uninstall_qdisc
+}
+
+red_test()
+{
+ install_qdisc
+ do_red_test 10 $BACKLOG
+ uninstall_qdisc
+}
+
+mc_backlog_test()
+{
+ install_qdisc
+ # Note that the backlog value here does not correspond to RED
+ # configuration, but is arbitrary.
+ do_mc_backlog_test 10 $BACKLOG
+ uninstall_qdisc
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+bail_on_lldpad
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/drivers/net/mlxsw/sharedbuffer.sh b/tools/testing/selftests/drivers/net/mlxsw/sharedbuffer.sh
new file mode 100755
index 000000000000..7d9e73a43a49
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/sharedbuffer.sh
@@ -0,0 +1,222 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+ALL_TESTS="
+ port_pool_test
+ port_tc_ip_test
+ port_tc_arp_test
+"
+
+NUM_NETIFS=2
+source ../../../net/forwarding/lib.sh
+source ../../../net/forwarding/devlink_lib.sh
+source mlxsw_lib.sh
+
+SB_POOL_ING=0
+SB_POOL_EGR_CPU=10
+
+SB_ITC_CPU_IP=2
+SB_ITC_CPU_ARP=2
+SB_ITC=0
+
+h1_create()
+{
+ simple_if_init $h1 192.0.1.1/24
+}
+
+h1_destroy()
+{
+ simple_if_fini $h1 192.0.1.1/24
+}
+
+h2_create()
+{
+ simple_if_init $h2 192.0.1.2/24
+}
+
+h2_destroy()
+{
+ simple_if_fini $h2 192.0.1.2/24
+}
+
+sb_occ_pool_check()
+{
+ local dl_port=$1; shift
+ local pool=$1; shift
+ local exp_max_occ=$1
+ local max_occ
+ local err=0
+
+ max_occ=$(devlink sb -j occupancy show $dl_port \
+ | jq -e ".[][][\"pool\"][\"$pool\"][\"max\"]")
+
+ if [[ "$max_occ" -ne "$exp_max_occ" ]]; then
+ err=1
+ fi
+
+ echo $max_occ
+ return $err
+}
+
+sb_occ_itc_check()
+{
+ local dl_port=$1; shift
+ local itc=$1; shift
+ local exp_max_occ=$1
+ local max_occ
+ local err=0
+
+ max_occ=$(devlink sb -j occupancy show $dl_port \
+ | jq -e ".[][][\"itc\"][\"$itc\"][\"max\"]")
+
+ if [[ "$max_occ" -ne "$exp_max_occ" ]]; then
+ err=1
+ fi
+
+ echo $max_occ
+ return $err
+}
+
+sb_occ_etc_check()
+{
+ local dl_port=$1; shift
+ local etc=$1; shift
+ local exp_max_occ=$1; shift
+ local max_occ
+ local err=0
+
+ max_occ=$(devlink sb -j occupancy show $dl_port \
+ | jq -e ".[][][\"etc\"][\"$etc\"][\"max\"]")
+
+ if [[ "$max_occ" -ne "$exp_max_occ" ]]; then
+ err=1
+ fi
+
+ echo $max_occ
+ return $err
+}
+
+port_pool_test()
+{
+ local exp_max_occ=288
+ local max_occ
+
+ devlink sb occupancy clearmax $DEVLINK_DEV
+
+ $MZ $h1 -c 1 -p 160 -a $h1mac -b $h2mac -A 192.0.1.1 -B 192.0.1.2 \
+ -t ip -q
+
+ devlink sb occupancy snapshot $DEVLINK_DEV
+
+ RET=0
+ max_occ=$(sb_occ_pool_check $dl_port1 $SB_POOL_ING $exp_max_occ)
+ check_err $? "Expected iPool($SB_POOL_ING) max occupancy to be $exp_max_occ, but got $max_occ"
+ log_test "physical port's($h1) ingress pool"
+
+ RET=0
+ max_occ=$(sb_occ_pool_check $dl_port2 $SB_POOL_ING $exp_max_occ)
+ check_err $? "Expected iPool($SB_POOL_ING) max occupancy to be $exp_max_occ, but got $max_occ"
+ log_test "physical port's($h2) ingress pool"
+
+ RET=0
+ max_occ=$(sb_occ_pool_check $cpu_dl_port $SB_POOL_EGR_CPU $exp_max_occ)
+ check_err $? "Expected ePool($SB_POOL_EGR_CPU) max occupancy to be $exp_max_occ, but got $max_occ"
+ log_test "CPU port's egress pool"
+}
+
+port_tc_ip_test()
+{
+ local exp_max_occ=288
+ local max_occ
+
+ devlink sb occupancy clearmax $DEVLINK_DEV
+
+ $MZ $h1 -c 1 -p 160 -a $h1mac -b $h2mac -A 192.0.1.1 -B 192.0.1.2 \
+ -t ip -q
+
+ devlink sb occupancy snapshot $DEVLINK_DEV
+
+ RET=0
+ max_occ=$(sb_occ_itc_check $dl_port2 $SB_ITC $exp_max_occ)
+ check_err $? "Expected ingress TC($SB_ITC) max occupancy to be $exp_max_occ, but got $max_occ"
+ log_test "physical port's($h1) ingress TC - IP packet"
+
+ RET=0
+ max_occ=$(sb_occ_itc_check $dl_port2 $SB_ITC $exp_max_occ)
+ check_err $? "Expected ingress TC($SB_ITC) max occupancy to be $exp_max_occ, but got $max_occ"
+ log_test "physical port's($h2) ingress TC - IP packet"
+
+ RET=0
+ max_occ=$(sb_occ_etc_check $cpu_dl_port $SB_ITC_CPU_IP $exp_max_occ)
+ check_err $? "Expected egress TC($SB_ITC_CPU_IP) max occupancy to be $exp_max_occ, but got $max_occ"
+ log_test "CPU port's egress TC - IP packet"
+}
+
+port_tc_arp_test()
+{
+ local exp_max_occ=96
+ local max_occ
+
+ if [[ $MLXSW_CHIP != "mlxsw_spectrum" ]]; then
+ exp_max_occ=144
+ fi
+
+ devlink sb occupancy clearmax $DEVLINK_DEV
+
+ $MZ $h1 -c 1 -p 160 -a $h1mac -A 192.0.1.1 -t arp -q
+
+ devlink sb occupancy snapshot $DEVLINK_DEV
+
+ RET=0
+ max_occ=$(sb_occ_itc_check $dl_port2 $SB_ITC $exp_max_occ)
+ check_err $? "Expected ingress TC($SB_ITC) max occupancy to be $exp_max_occ, but got $max_occ"
+ log_test "physical port's($h1) ingress TC - ARP packet"
+
+ RET=0
+ max_occ=$(sb_occ_itc_check $dl_port2 $SB_ITC $exp_max_occ)
+ check_err $? "Expected ingress TC($SB_ITC) max occupancy to be $exp_max_occ, but got $max_occ"
+ log_test "physical port's($h2) ingress TC - ARP packet"
+
+ RET=0
+ max_occ=$(sb_occ_etc_check $cpu_dl_port $SB_ITC_CPU_ARP $exp_max_occ)
+ check_err $? "Expected egress TC($SB_ITC_IP2ME) max occupancy to be $exp_max_occ, but got $max_occ"
+ log_test "CPU port's egress TC - ARP packet"
+}
+
+setup_prepare()
+{
+ h1=${NETIFS[p1]}
+ h2=${NETIFS[p2]}
+
+ h1mac=$(mac_get $h1)
+ h2mac=$(mac_get $h2)
+
+ dl_port1=$(devlink_port_by_netdev $h1)
+ dl_port2=$(devlink_port_by_netdev $h2)
+
+ cpu_dl_port=$(devlink_cpu_port_get)
+
+ vrf_prepare
+
+ h1_create
+ h2_create
+}
+
+cleanup()
+{
+ pre_cleanup
+
+ h2_destroy
+ h1_destroy
+
+ vrf_cleanup
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/drivers/net/mlxsw/sharedbuffer_configuration.py b/tools/testing/selftests/drivers/net/mlxsw/sharedbuffer_configuration.py
new file mode 100755
index 000000000000..0d4b9327c9b3
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/sharedbuffer_configuration.py
@@ -0,0 +1,416 @@
+#!/usr/bin/python
+# SPDX-License-Identifier: GPL-2.0
+
+import subprocess
+import json as j
+import random
+
+
+class SkipTest(Exception):
+ pass
+
+
+class RandomValuePicker:
+ """
+ Class for storing shared buffer configuration. Can handle 3 different
+ objects, pool, tcbind and portpool. Provide an interface to get random
+ values for a specific object type as the follow:
+ 1. Pool:
+ - random size
+
+ 2. TcBind:
+ - random pool number
+ - random threshold
+
+ 3. PortPool:
+ - random threshold
+ """
+ def __init__(self, pools):
+ self._pools = []
+ for pool in pools:
+ self._pools.append(pool)
+
+ def _cell_size(self):
+ return self._pools[0]["cell_size"]
+
+ def _get_static_size(self, th):
+ # For threshold of 16, this works out to be about 12MB on Spectrum-1,
+ # and about 17MB on Spectrum-2.
+ return th * 8000 * self._cell_size()
+
+ def _get_size(self):
+ return self._get_static_size(16)
+
+ def _get_thtype(self):
+ return "static"
+
+ def _get_th(self, pool):
+ # Threshold value could be any integer between 3 to 16
+ th = random.randint(3, 16)
+ if pool["thtype"] == "dynamic":
+ return th
+ else:
+ return self._get_static_size(th)
+
+ def _get_pool(self, direction):
+ ing_pools = []
+ egr_pools = []
+ for pool in self._pools:
+ if pool["type"] == "ingress":
+ ing_pools.append(pool)
+ else:
+ egr_pools.append(pool)
+ if direction == "ingress":
+ arr = ing_pools
+ else:
+ arr = egr_pools
+ return arr[random.randint(0, len(arr) - 1)]
+
+ def get_value(self, objid):
+ if isinstance(objid, Pool):
+ if objid["pool"] in [4, 8, 9, 10]:
+ # The threshold type of pools 4, 8, 9 and 10 cannot be changed
+ raise SkipTest()
+ else:
+ return (self._get_size(), self._get_thtype())
+ if isinstance(objid, TcBind):
+ if objid["tc"] >= 8:
+ # Multicast TCs cannot be changed
+ raise SkipTest()
+ else:
+ pool = self._get_pool(objid["type"])
+ th = self._get_th(pool)
+ pool_n = pool["pool"]
+ return (pool_n, th)
+ if isinstance(objid, PortPool):
+ pool_n = objid["pool"]
+ pool = self._pools[pool_n]
+ assert pool["pool"] == pool_n
+ th = self._get_th(pool)
+ return (th,)
+
+
+class RecordValuePickerException(Exception):
+ pass
+
+
+class RecordValuePicker:
+ """
+ Class for storing shared buffer configuration. Can handle 2 different
+ objects, pool and tcbind. Provide an interface to get the stored values per
+ object type.
+ """
+ def __init__(self, objlist):
+ self._recs = []
+ for item in objlist:
+ self._recs.append({"objid": item, "value": item.var_tuple()})
+
+ def get_value(self, objid):
+ if isinstance(objid, Pool) and objid["pool"] in [4, 8, 9, 10]:
+ # The threshold type of pools 4, 8, 9 and 10 cannot be changed
+ raise SkipTest()
+ if isinstance(objid, TcBind) and objid["tc"] >= 8:
+ # Multicast TCs cannot be changed
+ raise SkipTest()
+ for rec in self._recs:
+ if rec["objid"].weak_eq(objid):
+ return rec["value"]
+ raise RecordValuePickerException()
+
+
+def run_cmd(cmd, json=False):
+ out = subprocess.check_output(cmd, shell=True)
+ if json:
+ return j.loads(out)
+ return out
+
+
+def run_json_cmd(cmd):
+ return run_cmd(cmd, json=True)
+
+
+def log_test(test_name, err_msg=None):
+ if err_msg:
+ print("\t%s" % err_msg)
+ print("TEST: %-80s [FAIL]" % test_name)
+ else:
+ print("TEST: %-80s [ OK ]" % test_name)
+
+
+class CommonItem(dict):
+ varitems = []
+
+ def var_tuple(self):
+ ret = []
+ self.varitems.sort()
+ for key in self.varitems:
+ ret.append(self[key])
+ return tuple(ret)
+
+ def weak_eq(self, other):
+ for key in self:
+ if key in self.varitems:
+ continue
+ if self[key] != other[key]:
+ return False
+ return True
+
+
+class CommonList(list):
+ def get_by(self, by_obj):
+ for item in self:
+ if item.weak_eq(by_obj):
+ return item
+ return None
+
+ def del_by(self, by_obj):
+ for item in self:
+ if item.weak_eq(by_obj):
+ self.remove(item)
+
+
+class Pool(CommonItem):
+ varitems = ["size", "thtype"]
+
+ def dl_set(self, dlname, size, thtype):
+ run_cmd("devlink sb pool set {} sb {} pool {} size {} thtype {}".format(dlname, self["sb"],
+ self["pool"],
+ size, thtype))
+
+
+class PoolList(CommonList):
+ pass
+
+
+def get_pools(dlname, direction=None):
+ d = run_json_cmd("devlink sb pool show -j")
+ pools = PoolList()
+ for pooldict in d["pool"][dlname]:
+ if not direction or direction == pooldict["type"]:
+ pools.append(Pool(pooldict))
+ return pools
+
+
+def do_check_pools(dlname, pools, vp):
+ for pool in pools:
+ pre_pools = get_pools(dlname)
+ try:
+ (size, thtype) = vp.get_value(pool)
+ except SkipTest:
+ continue
+ pool.dl_set(dlname, size, thtype)
+ post_pools = get_pools(dlname)
+ pool = post_pools.get_by(pool)
+
+ err_msg = None
+ if pool["size"] != size:
+ err_msg = "Incorrect pool size (got {}, expected {})".format(pool["size"], size)
+ if pool["thtype"] != thtype:
+ err_msg = "Incorrect pool threshold type (got {}, expected {})".format(pool["thtype"], thtype)
+
+ pre_pools.del_by(pool)
+ post_pools.del_by(pool)
+ if pre_pools != post_pools:
+ err_msg = "Other pool setup changed as well"
+ log_test("pool {} of sb {} set verification".format(pool["pool"],
+ pool["sb"]), err_msg)
+
+
+def check_pools(dlname, pools):
+ # Save defaults
+ record_vp = RecordValuePicker(pools)
+
+ # For each pool, set random size and static threshold type
+ do_check_pools(dlname, pools, RandomValuePicker(pools))
+
+ # Restore defaults
+ do_check_pools(dlname, pools, record_vp)
+
+
+class TcBind(CommonItem):
+ varitems = ["pool", "threshold"]
+
+ def __init__(self, port, d):
+ super(TcBind, self).__init__(d)
+ self["dlportname"] = port.name
+
+ def dl_set(self, pool, th):
+ run_cmd("devlink sb tc bind set {} sb {} tc {} type {} pool {} th {}".format(self["dlportname"],
+ self["sb"],
+ self["tc"],
+ self["type"],
+ pool, th))
+
+
+class TcBindList(CommonList):
+ pass
+
+
+def get_tcbinds(ports, verify_existence=False):
+ d = run_json_cmd("devlink sb tc bind show -j -n")
+ tcbinds = TcBindList()
+ for port in ports:
+ err_msg = None
+ if port.name not in d["tc_bind"] or len(d["tc_bind"][port.name]) == 0:
+ err_msg = "No tc bind for port"
+ else:
+ for tcbinddict in d["tc_bind"][port.name]:
+ tcbinds.append(TcBind(port, tcbinddict))
+ if verify_existence:
+ log_test("tc bind existence for port {} verification".format(port.name), err_msg)
+ return tcbinds
+
+
+def do_check_tcbind(ports, tcbinds, vp):
+ for tcbind in tcbinds:
+ pre_tcbinds = get_tcbinds(ports)
+ try:
+ (pool, th) = vp.get_value(tcbind)
+ except SkipTest:
+ continue
+ tcbind.dl_set(pool, th)
+ post_tcbinds = get_tcbinds(ports)
+ tcbind = post_tcbinds.get_by(tcbind)
+
+ err_msg = None
+ if tcbind["pool"] != pool:
+ err_msg = "Incorrect pool (got {}, expected {})".format(tcbind["pool"], pool)
+ if tcbind["threshold"] != th:
+ err_msg = "Incorrect threshold (got {}, expected {})".format(tcbind["threshold"], th)
+
+ pre_tcbinds.del_by(tcbind)
+ post_tcbinds.del_by(tcbind)
+ if pre_tcbinds != post_tcbinds:
+ err_msg = "Other tc bind setup changed as well"
+ log_test("tc bind {}-{} of sb {} set verification".format(tcbind["dlportname"],
+ tcbind["tc"],
+ tcbind["sb"]), err_msg)
+
+
+def check_tcbind(dlname, ports, pools):
+ tcbinds = get_tcbinds(ports, verify_existence=True)
+
+ # Save defaults
+ record_vp = RecordValuePicker(tcbinds)
+
+ # Bind each port and unicast TC (TCs < 8) to a random pool and a random
+ # threshold
+ do_check_tcbind(ports, tcbinds, RandomValuePicker(pools))
+
+ # Restore defaults
+ do_check_tcbind(ports, tcbinds, record_vp)
+
+
+class PortPool(CommonItem):
+ varitems = ["threshold"]
+
+ def __init__(self, port, d):
+ super(PortPool, self).__init__(d)
+ self["dlportname"] = port.name
+
+ def dl_set(self, th):
+ run_cmd("devlink sb port pool set {} sb {} pool {} th {}".format(self["dlportname"],
+ self["sb"],
+ self["pool"], th))
+
+
+class PortPoolList(CommonList):
+ pass
+
+
+def get_portpools(ports, verify_existence=False):
+ d = run_json_cmd("devlink sb port pool -j -n")
+ portpools = PortPoolList()
+ for port in ports:
+ err_msg = None
+ if port.name not in d["port_pool"] or len(d["port_pool"][port.name]) == 0:
+ err_msg = "No port pool for port"
+ else:
+ for portpooldict in d["port_pool"][port.name]:
+ portpools.append(PortPool(port, portpooldict))
+ if verify_existence:
+ log_test("port pool existence for port {} verification".format(port.name), err_msg)
+ return portpools
+
+
+def do_check_portpool(ports, portpools, vp):
+ for portpool in portpools:
+ pre_portpools = get_portpools(ports)
+ (th,) = vp.get_value(portpool)
+ portpool.dl_set(th)
+ post_portpools = get_portpools(ports)
+ portpool = post_portpools.get_by(portpool)
+
+ err_msg = None
+ if portpool["threshold"] != th:
+ err_msg = "Incorrect threshold (got {}, expected {})".format(portpool["threshold"], th)
+
+ pre_portpools.del_by(portpool)
+ post_portpools.del_by(portpool)
+ if pre_portpools != post_portpools:
+ err_msg = "Other port pool setup changed as well"
+ log_test("port pool {}-{} of sb {} set verification".format(portpool["dlportname"],
+ portpool["pool"],
+ portpool["sb"]), err_msg)
+
+
+def check_portpool(dlname, ports, pools):
+ portpools = get_portpools(ports, verify_existence=True)
+
+ # Save defaults
+ record_vp = RecordValuePicker(portpools)
+
+ # For each port pool, set a random threshold
+ do_check_portpool(ports, portpools, RandomValuePicker(pools))
+
+ # Restore defaults
+ do_check_portpool(ports, portpools, record_vp)
+
+
+class Port:
+ def __init__(self, name):
+ self.name = name
+
+
+class PortList(list):
+ pass
+
+
+def get_ports(dlname):
+ d = run_json_cmd("devlink port show -j")
+ ports = PortList()
+ for name in d["port"]:
+ if name.find(dlname) == 0 and d["port"][name]["flavour"] == "physical":
+ ports.append(Port(name))
+ return ports
+
+
+def get_device():
+ devices_info = run_json_cmd("devlink -j dev info")["info"]
+ for d in devices_info:
+ if "mlxsw_spectrum" in devices_info[d]["driver"]:
+ return d
+ return None
+
+
+class UnavailableDevlinkNameException(Exception):
+ pass
+
+
+def test_sb_configuration():
+ # Use static seed
+ random.seed(0)
+
+ dlname = get_device()
+ if not dlname:
+ raise UnavailableDevlinkNameException()
+
+ ports = get_ports(dlname)
+ pools = get_pools(dlname)
+
+ check_pools(dlname, pools)
+ check_tcbind(dlname, ports, pools)
+ check_portpool(dlname, ports, pools)
+
+
+test_sb_configuration()
diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/resource_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/resource_scale.sh
index 7b2acba82a49..fd583a171db7 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/resource_scale.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/resource_scale.sh
@@ -8,8 +8,9 @@ source $lib_dir/lib.sh
source $lib_dir/tc_common.sh
source $lib_dir/devlink_lib.sh
-if [ "$DEVLINK_VIDDID" != "15b3:cf6c" ]; then
- echo "SKIP: test is tailored for Mellanox Spectrum-2"
+if [[ "$DEVLINK_VIDDID" != "15b3:cf6c" && \
+ "$DEVLINK_VIDDID" != "15b3:cf70" ]]; then
+ echo "SKIP: test is tailored for Mellanox Spectrum-2 and Spectrum-3"
exit 1
fi
diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/tc_flower_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/tc_flower_scale.sh
index a0795227216e..efd798a85931 100644
--- a/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/tc_flower_scale.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/tc_flower_scale.sh
@@ -8,9 +8,9 @@ tc_flower_get_target()
# The driver associates a counter with each tc filter, which means the
# number of supported filters is bounded by the number of available
# counters.
- # Currently, the driver supports 12K (12,288) flow counters and six of
+ # Currently, the driver supports 30K (30,720) flow counters and six of
# these are used for multicast routing.
- local target=12282
+ local target=30714
if ((! should_fail)); then
echo $target
diff --git a/tools/testing/selftests/drivers/net/mlxsw/tc_action_hw_stats.sh b/tools/testing/selftests/drivers/net/mlxsw/tc_action_hw_stats.sh
new file mode 100755
index 000000000000..20ed98fe5a60
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/tc_action_hw_stats.sh
@@ -0,0 +1,130 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+lib_dir=$(dirname $0)/../../../net/forwarding
+
+ALL_TESTS="
+ default_hw_stats_test
+ immediate_hw_stats_test
+ delayed_hw_stats_test
+ disabled_hw_stats_test
+"
+NUM_NETIFS=2
+
+source $lib_dir/tc_common.sh
+source $lib_dir/lib.sh
+source $lib_dir/devlink_lib.sh
+
+h1_create()
+{
+ simple_if_init $h1 192.0.2.1/24
+}
+
+h1_destroy()
+{
+ simple_if_fini $h1 192.0.2.1/24
+}
+
+switch_create()
+{
+ simple_if_init $swp1 192.0.2.2/24
+ tc qdisc add dev $swp1 clsact
+}
+
+switch_destroy()
+{
+ tc qdisc del dev $swp1 clsact
+ simple_if_fini $swp1 192.0.2.2/24
+}
+
+hw_stats_test()
+{
+ RET=0
+
+ local name=$1
+ local action_hw_stats=$2
+ local occ_delta=$3
+ local expected_packet_count=$4
+
+ local orig_occ=$(devlink_resource_get "counters" "flow" | jq '.["occ"]')
+
+ tc filter add dev $swp1 ingress protocol ip pref 1 handle 101 flower \
+ skip_sw dst_ip 192.0.2.2 action drop $action_hw_stats
+ check_err $? "Failed to add rule with $name hw_stats"
+
+ local new_occ=$(devlink_resource_get "counters" "flow" | jq '.["occ"]')
+ local expected_occ=$((orig_occ + occ_delta))
+ [ "$new_occ" == "$expected_occ" ]
+ check_err $? "Expected occupancy of $expected_occ, got $new_occ"
+
+ $MZ $h1 -c 1 -p 64 -a $h1mac -b $swp1mac -A 192.0.2.1 -B 192.0.2.2 \
+ -t ip -q
+
+ tc_check_packets "dev $swp1 ingress" 101 $expected_packet_count
+ check_err $? "Did not match incoming packet"
+
+ tc filter del dev $swp1 ingress protocol ip pref 1 handle 101 flower
+
+ log_test "$name hw_stats"
+}
+
+default_hw_stats_test()
+{
+ hw_stats_test "default" "" 2 1
+}
+
+immediate_hw_stats_test()
+{
+ hw_stats_test "immediate" "hw_stats immediate" 2 1
+}
+
+delayed_hw_stats_test()
+{
+ RET=0
+
+ tc filter add dev $swp1 ingress protocol ip pref 1 handle 101 flower \
+ skip_sw dst_ip 192.0.2.2 action drop hw_stats delayed
+ check_fail $? "Unexpected success in adding rule with delayed hw_stats"
+
+ log_test "delayed hw_stats"
+}
+
+disabled_hw_stats_test()
+{
+ hw_stats_test "disabled" "hw_stats disabled" 0 0
+}
+
+setup_prepare()
+{
+ h1=${NETIFS[p1]}
+ swp1=${NETIFS[p2]}
+
+ h1mac=$(mac_get $h1)
+ swp1mac=$(mac_get $swp1)
+
+ vrf_prepare
+
+ h1_create
+ switch_create
+}
+
+cleanup()
+{
+ pre_cleanup
+
+ switch_destroy
+ h1_destroy
+
+ vrf_cleanup
+}
+
+check_tc_action_hw_stats_support
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/drivers/net/mlxsw/tc_flower_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/tc_flower_scale.sh
index a6d733d2a4b4..cc0f07e72cf2 100644
--- a/tools/testing/selftests/drivers/net/mlxsw/tc_flower_scale.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/tc_flower_scale.sh
@@ -2,9 +2,9 @@
# SPDX-License-Identifier: GPL-2.0
# Test for resource limit of offloaded flower rules. The test adds a given
-# number of flower matches for different IPv6 addresses, then generates traffic,
-# and ensures each was hit exactly once. This file contains functions to set up
-# a testing topology and run the test, and is meant to be sourced from a test
+# number of flower matches for different IPv6 addresses, then check the offload
+# indication for all of the tc flower rules. This file contains functions to set
+# up a testing topology and run the test, and is meant to be sourced from a test
# script that calls the testing routine with a given number of rules.
TC_FLOWER_NUM_NETIFS=2
@@ -94,22 +94,15 @@ __tc_flower_test()
tc_flower_rules_create $count $should_fail
- for ((i = 0; i < count; ++i)); do
- $MZ $h1 -q -c 1 -t ip -p 20 -b bc -6 \
- -A 2001:db8:2::1 \
- -B $(tc_flower_addr $i)
- done
-
- MISMATCHES=$(
- tc -j -s filter show dev $h2 ingress |
- jq -r '[ .[] | select(.kind == "flower") | .options |
- values as $rule | .actions[].stats.packets |
- select(. != 1) | "\(.) on \($rule.keys.dst_ip)" ] |
- join(", ")'
- )
-
- test -z "$MISMATCHES"
- check_err $? "Expected to capture 1 packet for each IP, but got $MISMATCHES"
+ offload_count=$(tc -j -s filter show dev $h2 ingress |
+ jq -r '[ .[] | select(.kind == "flower") |
+ .options | .in_hw ]' | jq .[] | wc -l)
+ [[ $((offload_count - 1)) -eq $count ]]
+ if [[ $should_fail -eq 0 ]]; then
+ check_err $? "Offload mismatch"
+ else
+ check_err_fail $should_fail $? "Offload more than expacted"
+ fi
}
tc_flower_test()
diff --git a/tools/testing/selftests/drivers/net/mlxsw/tc_restrictions.sh b/tools/testing/selftests/drivers/net/mlxsw/tc_restrictions.sh
new file mode 100755
index 000000000000..9241250c5921
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/tc_restrictions.sh
@@ -0,0 +1,318 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+lib_dir=$(dirname $0)/../../../net/forwarding
+
+ALL_TESTS="
+ shared_block_drop_test
+ egress_redirect_test
+ multi_mirror_test
+ matchall_sample_egress_test
+ matchall_mirror_behind_flower_ingress_test
+ matchall_sample_behind_flower_ingress_test
+ matchall_mirror_behind_flower_egress_test
+"
+NUM_NETIFS=2
+
+source $lib_dir/tc_common.sh
+source $lib_dir/lib.sh
+
+switch_create()
+{
+ simple_if_init $swp1 192.0.2.1/24
+ simple_if_init $swp2 192.0.2.2/24
+}
+
+switch_destroy()
+{
+ simple_if_fini $swp2 192.0.2.2/24
+ simple_if_fini $swp1 192.0.2.1/24
+}
+
+shared_block_drop_test()
+{
+ RET=0
+
+ # It is forbidden in mlxsw driver to have mixed-bound
+ # shared block with a drop rule.
+
+ tc qdisc add dev $swp1 ingress_block 22 clsact
+ check_err $? "Failed to create clsact with ingress block"
+
+ tc filter add block 22 protocol ip pref 1 handle 101 flower \
+ skip_sw dst_ip 192.0.2.2 action drop
+ check_err $? "Failed to add drop rule to ingress bound block"
+
+ tc qdisc add dev $swp2 ingress_block 22 clsact
+ check_err $? "Failed to create another clsact with ingress shared block"
+
+ tc qdisc del dev $swp2 clsact
+
+ tc qdisc add dev $swp2 egress_block 22 clsact
+ check_fail $? "Incorrect success to create another clsact with egress shared block"
+
+ tc filter del block 22 protocol ip pref 1 handle 101 flower
+
+ tc qdisc add dev $swp2 egress_block 22 clsact
+ check_err $? "Failed to create another clsact with egress shared block after blocker drop rule removed"
+
+ tc filter add block 22 protocol ip pref 1 handle 101 flower \
+ skip_sw dst_ip 192.0.2.2 action drop
+ check_fail $? "Incorrect success to add drop rule to mixed bound block"
+
+ tc qdisc del dev $swp1 clsact
+
+ tc qdisc add dev $swp1 egress_block 22 clsact
+ check_err $? "Failed to create another clsact with egress shared block"
+
+ tc filter add block 22 protocol ip pref 1 handle 101 flower \
+ skip_sw dst_ip 192.0.2.2 action drop
+ check_err $? "Failed to add drop rule to egress bound shared block"
+
+ tc filter del block 22 protocol ip pref 1 handle 101 flower
+
+ tc qdisc del dev $swp2 clsact
+ tc qdisc del dev $swp1 clsact
+
+ log_test "shared block drop"
+}
+
+egress_redirect_test()
+{
+ RET=0
+
+ # It is forbidden in mlxsw driver to have mirred redirect on
+ # egress-bound block.
+
+ tc qdisc add dev $swp1 ingress_block 22 clsact
+ check_err $? "Failed to create clsact with ingress block"
+
+ tc filter add block 22 protocol ip pref 1 handle 101 flower \
+ skip_sw dst_ip 192.0.2.2 \
+ action mirred egress redirect dev $swp2
+ check_err $? "Failed to add redirect rule to ingress bound block"
+
+ tc qdisc add dev $swp2 ingress_block 22 clsact
+ check_err $? "Failed to create another clsact with ingress shared block"
+
+ tc qdisc del dev $swp2 clsact
+
+ tc qdisc add dev $swp2 egress_block 22 clsact
+ check_fail $? "Incorrect success to create another clsact with egress shared block"
+
+ tc filter del block 22 protocol ip pref 1 handle 101 flower
+
+ tc qdisc add dev $swp2 egress_block 22 clsact
+ check_err $? "Failed to create another clsact with egress shared block after blocker redirect rule removed"
+
+ tc filter add block 22 protocol ip pref 1 handle 101 flower \
+ skip_sw dst_ip 192.0.2.2 \
+ action mirred egress redirect dev $swp2
+ check_fail $? "Incorrect success to add redirect rule to mixed bound block"
+
+ tc qdisc del dev $swp1 clsact
+
+ tc qdisc add dev $swp1 egress_block 22 clsact
+ check_err $? "Failed to create another clsact with egress shared block"
+
+ tc filter add block 22 protocol ip pref 1 handle 101 flower \
+ skip_sw dst_ip 192.0.2.2 \
+ action mirred egress redirect dev $swp2
+ check_fail $? "Incorrect success to add redirect rule to egress bound shared block"
+
+ tc qdisc del dev $swp2 clsact
+
+ tc filter add block 22 protocol ip pref 1 handle 101 flower \
+ skip_sw dst_ip 192.0.2.2 \
+ action mirred egress redirect dev $swp2
+ check_fail $? "Incorrect success to add redirect rule to egress bound block"
+
+ tc qdisc del dev $swp1 clsact
+
+ log_test "shared block drop"
+}
+
+multi_mirror_test()
+{
+ RET=0
+
+ # It is forbidden in mlxsw driver to have multiple mirror
+ # actions in a single rule.
+
+ tc qdisc add dev $swp1 clsact
+
+ tc filter add dev $swp1 ingress protocol ip pref 1 handle 101 flower \
+ skip_sw dst_ip 192.0.2.2 \
+ action mirred egress mirror dev $swp2
+ check_err $? "Failed to add rule with single mirror action"
+
+ tc filter del dev $swp1 ingress protocol ip pref 1 handle 101 flower
+
+ tc filter add dev $swp1 ingress protocol ip pref 1 handle 101 flower \
+ skip_sw dst_ip 192.0.2.2 \
+ action mirred egress mirror dev $swp2 \
+ action mirred egress mirror dev $swp1
+ check_fail $? "Incorrect success to add rule with two mirror actions"
+
+ tc qdisc del dev $swp1 clsact
+
+ log_test "multi mirror"
+}
+
+matchall_sample_egress_test()
+{
+ RET=0
+
+ # It is forbidden in mlxsw driver to have matchall with sample action
+ # bound on egress
+
+ tc qdisc add dev $swp1 clsact
+
+ tc filter add dev $swp1 ingress protocol all pref 1 handle 101 \
+ matchall skip_sw action sample rate 100 group 1
+ check_err $? "Failed to add rule with sample action on ingress"
+
+ tc filter del dev $swp1 ingress protocol all pref 1 handle 101 matchall
+
+ tc filter add dev $swp1 egress protocol all pref 1 handle 101 \
+ matchall skip_sw action sample rate 100 group 1
+ check_fail $? "Incorrect success to add rule with sample action on egress"
+
+ tc qdisc del dev $swp1 clsact
+
+ log_test "matchall sample egress"
+}
+
+matchall_behind_flower_ingress_test()
+{
+ local action=$1
+ local action_args=$2
+
+ RET=0
+
+ # On ingress, all matchall-mirror and matchall-sample
+ # rules have to be in front of the flower rules
+
+ tc qdisc add dev $swp1 clsact
+
+ tc filter add dev $swp1 ingress protocol ip pref 10 handle 101 flower \
+ skip_sw dst_ip 192.0.2.2 action drop
+
+ tc filter add dev $swp1 ingress protocol all pref 9 handle 102 \
+ matchall skip_sw action $action_args
+ check_err $? "Failed to add matchall rule in front of a flower rule"
+
+ tc filter del dev $swp1 ingress protocol all pref 9 handle 102 matchall
+
+ tc filter add dev $swp1 ingress protocol all pref 11 handle 102 \
+ matchall skip_sw action $action_args
+ check_fail $? "Incorrect success to add matchall rule behind a flower rule"
+
+ tc filter del dev $swp1 ingress protocol ip pref 10 handle 101 flower
+
+ tc filter add dev $swp1 ingress protocol all pref 9 handle 102 \
+ matchall skip_sw action $action_args
+
+ tc filter add dev $swp1 ingress protocol ip pref 10 handle 101 flower \
+ skip_sw dst_ip 192.0.2.2 action drop
+ check_err $? "Failed to add flower rule behind a matchall rule"
+
+ tc filter del dev $swp1 ingress protocol ip pref 10 handle 101 flower
+
+ tc filter add dev $swp1 ingress protocol ip pref 8 handle 101 flower \
+ skip_sw dst_ip 192.0.2.2 action drop
+ check_fail $? "Incorrect success to add flower rule in front of a matchall rule"
+
+ tc qdisc del dev $swp1 clsact
+
+ log_test "matchall $action flower ingress"
+}
+
+matchall_mirror_behind_flower_ingress_test()
+{
+ matchall_behind_flower_ingress_test "mirror" "mirred egress mirror dev $swp2"
+}
+
+matchall_sample_behind_flower_ingress_test()
+{
+ matchall_behind_flower_ingress_test "sample" "sample rate 100 group 1"
+}
+
+matchall_behind_flower_egress_test()
+{
+ local action=$1
+ local action_args=$2
+
+ RET=0
+
+ # On egress, all matchall-mirror rules have to be behind the flower rules
+
+ tc qdisc add dev $swp1 clsact
+
+ tc filter add dev $swp1 egress protocol ip pref 10 handle 101 flower \
+ skip_sw dst_ip 192.0.2.2 action drop
+
+ tc filter add dev $swp1 egress protocol all pref 11 handle 102 \
+ matchall skip_sw action $action_args
+ check_err $? "Failed to add matchall rule in front of a flower rule"
+
+ tc filter del dev $swp1 egress protocol all pref 11 handle 102 matchall
+
+ tc filter add dev $swp1 egress protocol all pref 9 handle 102 \
+ matchall skip_sw action $action_args
+ check_fail $? "Incorrect success to add matchall rule behind a flower rule"
+
+ tc filter del dev $swp1 egress protocol ip pref 10 handle 101 flower
+
+ tc filter add dev $swp1 egress protocol all pref 11 handle 102 \
+ matchall skip_sw action $action_args
+
+ tc filter add dev $swp1 egress protocol ip pref 10 handle 101 flower \
+ skip_sw dst_ip 192.0.2.2 action drop
+ check_err $? "Failed to add flower rule behind a matchall rule"
+
+ tc filter del dev $swp1 egress protocol ip pref 10 handle 101 flower
+
+ tc filter add dev $swp1 egress protocol ip pref 12 handle 101 flower \
+ skip_sw dst_ip 192.0.2.2 action drop
+ check_fail $? "Incorrect success to add flower rule in front of a matchall rule"
+
+ tc qdisc del dev $swp1 clsact
+
+ log_test "matchall $action flower egress"
+}
+
+matchall_mirror_behind_flower_egress_test()
+{
+ matchall_behind_flower_egress_test "mirror" "mirred egress mirror dev $swp2"
+}
+
+setup_prepare()
+{
+ swp1=${NETIFS[p1]}
+ swp2=${NETIFS[p2]}
+
+ vrf_prepare
+
+ switch_create
+}
+
+cleanup()
+{
+ pre_cleanup
+
+ switch_destroy
+
+ vrf_cleanup
+}
+
+check_tc_shblock_support
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/drivers/net/mlxsw/vxlan.sh b/tools/testing/selftests/drivers/net/mlxsw/vxlan.sh
index 4632f51af7ab..729a86cc4ede 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/vxlan.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/vxlan.sh
@@ -9,6 +9,7 @@ lib_dir=$(dirname $0)/../../../net/forwarding
ALL_TESTS="sanitization_test offload_indication_test \
sanitization_vlan_aware_test offload_indication_vlan_aware_test"
NUM_NETIFS=2
+: ${TIMEOUT:=20000} # ms
source $lib_dir/lib.sh
setup_prepare()
@@ -470,8 +471,8 @@ offload_indication_fdb_flood_test()
bridge fdb append 00:00:00:00:00:00 dev vxlan0 self dst 198.51.100.2
- bridge fdb show brport vxlan0 | grep 00:00:00:00:00:00 \
- | grep -q offload
+ busywait "$TIMEOUT" wait_for_offload grep_bridge_fdb 00:00:00:00:00:00 \
+ bridge fdb show brport vxlan0
check_err $?
bridge fdb del 00:00:00:00:00:00 dev vxlan0 self
@@ -486,11 +487,11 @@ offload_indication_fdb_bridge_test()
bridge fdb add de:ad:be:ef:13:37 dev vxlan0 self master static \
dst 198.51.100.2
- bridge fdb show brport vxlan0 | grep de:ad:be:ef:13:37 | grep self \
- | grep -q offload
+ busywait "$TIMEOUT" wait_for_offload grep_bridge_fdb \
+ de:ad:be:ef:13:37 self bridge fdb show brport vxlan0
check_err $?
- bridge fdb show brport vxlan0 | grep de:ad:be:ef:13:37 | grep -v self \
- | grep -q offload
+ busywait "$TIMEOUT" wait_for_offload grep_bridge_fdb \
+ de:ad:be:ef:13:37 self -v bridge fdb show brport vxlan0
check_err $?
log_test "vxlan entry offload indication - initial state"
@@ -500,9 +501,9 @@ offload_indication_fdb_bridge_test()
RET=0
bridge fdb del de:ad:be:ef:13:37 dev vxlan0 master
- bridge fdb show brport vxlan0 | grep de:ad:be:ef:13:37 | grep self \
- | grep -q offload
- check_fail $?
+ busywait "$TIMEOUT" not wait_for_offload grep_bridge_fdb \
+ de:ad:be:ef:13:37 self bridge fdb show brport vxlan0
+ check_err $?
log_test "vxlan entry offload indication - after removal from bridge"
@@ -511,11 +512,11 @@ offload_indication_fdb_bridge_test()
RET=0
bridge fdb add de:ad:be:ef:13:37 dev vxlan0 master static
- bridge fdb show brport vxlan0 | grep de:ad:be:ef:13:37 | grep self \
- | grep -q offload
+ busywait "$TIMEOUT" wait_for_offload grep_bridge_fdb \
+ de:ad:be:ef:13:37 self bridge fdb show brport vxlan0
check_err $?
- bridge fdb show brport vxlan0 | grep de:ad:be:ef:13:37 | grep -v self \
- | grep -q offload
+ busywait "$TIMEOUT" wait_for_offload grep_bridge_fdb \
+ de:ad:be:ef:13:37 self -v bridge fdb show brport vxlan0
check_err $?
log_test "vxlan entry offload indication - after re-add to bridge"
@@ -525,9 +526,9 @@ offload_indication_fdb_bridge_test()
RET=0
bridge fdb del de:ad:be:ef:13:37 dev vxlan0 self
- bridge fdb show brport vxlan0 | grep de:ad:be:ef:13:37 | grep -v self \
- | grep -q offload
- check_fail $?
+ busywait "$TIMEOUT" not wait_for_offload grep_bridge_fdb \
+ de:ad:be:ef:13:37 self -v bridge fdb show brport vxlan0
+ check_err $?
log_test "vxlan entry offload indication - after removal from vxlan"
@@ -536,11 +537,11 @@ offload_indication_fdb_bridge_test()
RET=0
bridge fdb add de:ad:be:ef:13:37 dev vxlan0 self dst 198.51.100.2
- bridge fdb show brport vxlan0 | grep de:ad:be:ef:13:37 | grep self \
- | grep -q offload
+ busywait "$TIMEOUT" wait_for_offload grep_bridge_fdb \
+ de:ad:be:ef:13:37 self bridge fdb show brport vxlan0
check_err $?
- bridge fdb show brport vxlan0 | grep de:ad:be:ef:13:37 | grep -v self \
- | grep -q offload
+ busywait "$TIMEOUT" wait_for_offload grep_bridge_fdb \
+ de:ad:be:ef:13:37 self -v bridge fdb show brport vxlan0
check_err $?
log_test "vxlan entry offload indication - after re-add to vxlan"
@@ -558,27 +559,32 @@ offload_indication_decap_route_test()
{
RET=0
- ip route show table local | grep 198.51.100.1 | grep -q offload
+ busywait "$TIMEOUT" wait_for_offload \
+ ip route show table local 198.51.100.1
check_err $?
ip link set dev vxlan0 down
- ip route show table local | grep 198.51.100.1 | grep -q offload
+ busywait "$TIMEOUT" wait_for_offload \
+ ip route show table local 198.51.100.1
check_err $?
ip link set dev vxlan1 down
- ip route show table local | grep 198.51.100.1 | grep -q offload
- check_fail $?
+ busywait "$TIMEOUT" not wait_for_offload \
+ ip route show table local 198.51.100.1
+ check_err $?
log_test "vxlan decap route - vxlan device down"
RET=0
ip link set dev vxlan1 up
- ip route show table local | grep 198.51.100.1 | grep -q offload
+ busywait "$TIMEOUT" wait_for_offload \
+ ip route show table local 198.51.100.1
check_err $?
ip link set dev vxlan0 up
- ip route show table local | grep 198.51.100.1 | grep -q offload
+ busywait "$TIMEOUT" wait_for_offload \
+ ip route show table local 198.51.100.1
check_err $?
log_test "vxlan decap route - vxlan device up"
@@ -586,11 +592,13 @@ offload_indication_decap_route_test()
RET=0
ip address delete 198.51.100.1/32 dev lo
- ip route show table local | grep 198.51.100.1 | grep -q offload
- check_fail $?
+ busywait "$TIMEOUT" not wait_for_offload \
+ ip route show table local 198.51.100.1
+ check_err $?
ip address add 198.51.100.1/32 dev lo
- ip route show table local | grep 198.51.100.1 | grep -q offload
+ busywait "$TIMEOUT" wait_for_offload \
+ ip route show table local 198.51.100.1
check_err $?
log_test "vxlan decap route - add local route"
@@ -598,16 +606,19 @@ offload_indication_decap_route_test()
RET=0
ip link set dev $swp1 nomaster
- ip route show table local | grep 198.51.100.1 | grep -q offload
+ busywait "$TIMEOUT" wait_for_offload \
+ ip route show table local 198.51.100.1
check_err $?
ip link set dev $swp2 nomaster
- ip route show table local | grep 198.51.100.1 | grep -q offload
- check_fail $?
+ busywait "$TIMEOUT" not wait_for_offload \
+ ip route show table local 198.51.100.1
+ check_err $?
ip link set dev $swp1 master br0
ip link set dev $swp2 master br1
- ip route show table local | grep 198.51.100.1 | grep -q offload
+ busywait "$TIMEOUT" wait_for_offload \
+ ip route show table local 198.51.100.1
check_err $?
log_test "vxlan decap route - local ports enslavement"
@@ -615,12 +626,14 @@ offload_indication_decap_route_test()
RET=0
ip link del dev br0
- ip route show table local | grep 198.51.100.1 | grep -q offload
+ busywait "$TIMEOUT" wait_for_offload \
+ ip route show table local 198.51.100.1
check_err $?
ip link del dev br1
- ip route show table local | grep 198.51.100.1 | grep -q offload
- check_fail $?
+ busywait "$TIMEOUT" not wait_for_offload \
+ ip route show table local 198.51.100.1
+ check_err $?
log_test "vxlan decap route - bridge device deletion"
@@ -632,16 +645,19 @@ offload_indication_decap_route_test()
ip link set dev $swp2 master br1
ip link set dev vxlan0 master br0
ip link set dev vxlan1 master br1
- ip route show table local | grep 198.51.100.1 | grep -q offload
+ busywait "$TIMEOUT" wait_for_offload \
+ ip route show table local 198.51.100.1
check_err $?
ip link del dev vxlan0
- ip route show table local | grep 198.51.100.1 | grep -q offload
+ busywait "$TIMEOUT" wait_for_offload \
+ ip route show table local 198.51.100.1
check_err $?
ip link del dev vxlan1
- ip route show table local | grep 198.51.100.1 | grep -q offload
- check_fail $?
+ busywait "$TIMEOUT" not wait_for_offload \
+ ip route show table local 198.51.100.1
+ check_err $?
log_test "vxlan decap route - vxlan device deletion"
@@ -656,12 +672,15 @@ check_fdb_offloaded()
local mac=00:11:22:33:44:55
local zmac=00:00:00:00:00:00
- bridge fdb show dev vxlan0 | grep $mac | grep self | grep -q offload
+ busywait "$TIMEOUT" wait_for_offload grep_bridge_fdb $mac self \
+ bridge fdb show dev vxlan0
check_err $?
- bridge fdb show dev vxlan0 | grep $mac | grep master | grep -q offload
+ busywait "$TIMEOUT" wait_for_offload grep_bridge_fdb $mac master \
+ bridge fdb show dev vxlan0
check_err $?
- bridge fdb show dev vxlan0 | grep $zmac | grep self | grep -q offload
+ busywait "$TIMEOUT" wait_for_offload grep_bridge_fdb $zmac self \
+ bridge fdb show dev vxlan0
check_err $?
}
@@ -672,13 +691,15 @@ check_vxlan_fdb_not_offloaded()
bridge fdb show dev vxlan0 | grep $mac | grep -q self
check_err $?
- bridge fdb show dev vxlan0 | grep $mac | grep self | grep -q offload
- check_fail $?
+ busywait "$TIMEOUT" not wait_for_offload grep_bridge_fdb $mac self \
+ bridge fdb show dev vxlan0
+ check_err $?
bridge fdb show dev vxlan0 | grep $zmac | grep -q self
check_err $?
- bridge fdb show dev vxlan0 | grep $zmac | grep self | grep -q offload
- check_fail $?
+ busywait "$TIMEOUT" not wait_for_offload grep_bridge_fdb $zmac self \
+ bridge fdb show dev vxlan0
+ check_err $?
}
check_bridge_fdb_not_offloaded()
@@ -688,8 +709,9 @@ check_bridge_fdb_not_offloaded()
bridge fdb show dev vxlan0 | grep $mac | grep -q master
check_err $?
- bridge fdb show dev vxlan0 | grep $mac | grep master | grep -q offload
- check_fail $?
+ busywait "$TIMEOUT" not wait_for_offload grep_bridge_fdb $mac master \
+ bridge fdb show dev vxlan0
+ check_err $?
}
__offload_indication_join_vxlan_first()
@@ -771,12 +793,14 @@ __offload_indication_join_vxlan_last()
ip link set dev $swp1 master br0
- bridge fdb show dev vxlan0 | grep $zmac | grep self | grep -q offload
- check_fail $?
+ busywait "$TIMEOUT" not wait_for_offload grep_bridge_fdb $zmac self \
+ bridge fdb show dev vxlan0
+ check_err $?
ip link set dev vxlan0 master br0
- bridge fdb show dev vxlan0 | grep $zmac | grep self | grep -q offload
+ busywait "$TIMEOUT" wait_for_offload grep_bridge_fdb $zmac self \
+ bridge fdb show dev vxlan0
check_err $?
log_test "offload indication - attach vxlan last"
@@ -854,20 +878,26 @@ sanitization_vlan_aware_test()
bridge vlan del vid 10 dev vxlan20
bridge vlan add vid 20 dev vxlan20 pvid untagged
- # Test that offloading of an unsupported tunnel fails when it is
- # triggered by addition of VLAN to a local port
- RET=0
+ # Test that when two VXLAN tunnels with conflicting configurations
+ # (i.e., different TTL) are enslaved to the same VLAN-aware bridge,
+ # then the enslavement of a port to the bridge is denied.
- # TOS must be set to inherit
- ip link set dev vxlan10 type vxlan tos 42
+ # Use the offload indication of the local route to ensure the VXLAN
+ # configuration was correctly rollbacked.
+ ip address add 198.51.100.1/32 dev lo
- ip link set dev $swp1 master br0
- bridge vlan add vid 10 dev $swp1 &> /dev/null
+ ip link set dev vxlan10 type vxlan ttl 10
+ ip link set dev $swp1 master br0 &> /dev/null
check_fail $?
- log_test "vlan-aware - failed vlan addition to a local port"
+ busywait "$TIMEOUT" not wait_for_offload \
+ ip route show table local 198.51.100.1
+ check_err $?
+
+ log_test "vlan-aware - failed enslavement to bridge due to conflict"
- ip link set dev vxlan10 type vxlan tos inherit
+ ip link set dev vxlan10 type vxlan ttl 20
+ ip address del 198.51.100.1/32 dev lo
ip link del dev vxlan20
ip link del dev vxlan10
@@ -924,11 +954,11 @@ offload_indication_vlan_aware_fdb_test()
bridge fdb add de:ad:be:ef:13:37 dev vxlan10 self master static \
dst 198.51.100.2 vlan 10
- bridge fdb show brport vxlan10 | grep de:ad:be:ef:13:37 | grep self \
- | grep -q offload
+ busywait "$TIMEOUT" wait_for_offload grep_bridge_fdb \
+ de:ad:be:ef:13:37 self bridge fdb show brport vxlan10
check_err $?
- bridge fdb show brport vxlan10 | grep de:ad:be:ef:13:37 | grep -v self \
- | grep -q offload
+ busywait "$TIMEOUT" wait_for_offload grep_bridge_fdb \
+ de:ad:be:ef:13:37 self -v bridge fdb show brport vxlan10
check_err $?
log_test "vxlan entry offload indication - initial state"
@@ -938,9 +968,9 @@ offload_indication_vlan_aware_fdb_test()
RET=0
bridge fdb del de:ad:be:ef:13:37 dev vxlan10 master vlan 10
- bridge fdb show brport vxlan10 | grep de:ad:be:ef:13:37 | grep self \
- | grep -q offload
- check_fail $?
+ busywait "$TIMEOUT" not wait_for_offload grep_bridge_fdb \
+ de:ad:be:ef:13:37 self bridge fdb show brport vxlan10
+ check_err $?
log_test "vxlan entry offload indication - after removal from bridge"
@@ -949,11 +979,11 @@ offload_indication_vlan_aware_fdb_test()
RET=0
bridge fdb add de:ad:be:ef:13:37 dev vxlan10 master static vlan 10
- bridge fdb show brport vxlan10 | grep de:ad:be:ef:13:37 | grep self \
- | grep -q offload
+ busywait "$TIMEOUT" wait_for_offload grep_bridge_fdb \
+ de:ad:be:ef:13:37 self bridge fdb show brport vxlan10
check_err $?
- bridge fdb show brport vxlan10 | grep de:ad:be:ef:13:37 | grep -v self \
- | grep -q offload
+ busywait "$TIMEOUT" wait_for_offload grep_bridge_fdb \
+ de:ad:be:ef:13:37 self -v bridge fdb show brport vxlan10
check_err $?
log_test "vxlan entry offload indication - after re-add to bridge"
@@ -963,9 +993,9 @@ offload_indication_vlan_aware_fdb_test()
RET=0
bridge fdb del de:ad:be:ef:13:37 dev vxlan10 self
- bridge fdb show brport vxlan10 | grep de:ad:be:ef:13:37 | grep -v self \
- | grep -q offload
- check_fail $?
+ busywait "$TIMEOUT" not wait_for_offload grep_bridge_fdb \
+ de:ad:be:ef:13:37 self -v bridge fdb show brport vxlan10
+ check_err $?
log_test "vxlan entry offload indication - after removal from vxlan"
@@ -974,11 +1004,11 @@ offload_indication_vlan_aware_fdb_test()
RET=0
bridge fdb add de:ad:be:ef:13:37 dev vxlan10 self dst 198.51.100.2
- bridge fdb show brport vxlan10 | grep de:ad:be:ef:13:37 | grep self \
- | grep -q offload
+ busywait "$TIMEOUT" wait_for_offload grep_bridge_fdb \
+ de:ad:be:ef:13:37 self bridge fdb show brport vxlan10
check_err $?
- bridge fdb show brport vxlan10 | grep de:ad:be:ef:13:37 | grep -v self \
- | grep -q offload
+ busywait "$TIMEOUT" wait_for_offload grep_bridge_fdb \
+ de:ad:be:ef:13:37 self -v bridge fdb show brport vxlan10
check_err $?
log_test "vxlan entry offload indication - after re-add to vxlan"
@@ -990,28 +1020,31 @@ offload_indication_vlan_aware_decap_route_test()
{
RET=0
- ip route show table local | grep 198.51.100.1 | grep -q offload
+ busywait "$TIMEOUT" wait_for_offload \
+ ip route show table local 198.51.100.1
check_err $?
# Toggle PVID flag on one VxLAN device and make sure route is still
# marked as offloaded
bridge vlan add vid 10 dev vxlan10 untagged
- ip route show table local | grep 198.51.100.1 | grep -q offload
+ busywait "$TIMEOUT" wait_for_offload \
+ ip route show table local 198.51.100.1
check_err $?
# Toggle PVID flag on second VxLAN device and make sure route is no
# longer marked as offloaded
bridge vlan add vid 20 dev vxlan20 untagged
- ip route show table local | grep 198.51.100.1 | grep -q offload
- check_fail $?
+ busywait "$TIMEOUT" not wait_for_offload \
+ ip route show table local 198.51.100.1
+ check_err $?
# Toggle PVID flag back and make sure route is marked as offloaded
bridge vlan add vid 10 dev vxlan10 pvid untagged
bridge vlan add vid 20 dev vxlan20 pvid untagged
- ip route show table local | grep 198.51.100.1 | grep -q offload
+ busywait "$TIMEOUT" wait_for_offload ip route show table local 198.51.100.1
check_err $?
log_test "vxlan decap route - vni map/unmap"
@@ -1064,35 +1097,33 @@ offload_indication_vlan_aware_l3vni_test()
ip link set dev vxlan0 master br0
bridge vlan add dev vxlan0 vid 10 pvid untagged
- # No local port or router port is member in the VLAN, so tunnel should
- # not be offloaded
- bridge fdb show brport vxlan0 | grep $zmac | grep self \
- | grep -q offload
- check_fail $? "vxlan tunnel offloaded when should not"
+ busywait "$TIMEOUT" wait_for_offload grep_bridge_fdb $zmac self \
+ bridge fdb show brport vxlan0
+ check_err $? "vxlan tunnel not offloaded when should"
# Configure a VLAN interface and make sure tunnel is offloaded
ip link add link br0 name br10 up type vlan id 10
sysctl_set net.ipv6.conf.br10.disable_ipv6 0
ip -6 address add 2001:db8:1::1/64 dev br10
- bridge fdb show brport vxlan0 | grep $zmac | grep self \
- | grep -q offload
+ busywait "$TIMEOUT" wait_for_offload grep_bridge_fdb $zmac self \
+ bridge fdb show brport vxlan0
check_err $? "vxlan tunnel not offloaded when should"
# Unlink the VXLAN device, make sure tunnel is no longer offloaded,
# then add it back to the bridge and make sure it is offloaded
ip link set dev vxlan0 nomaster
- bridge fdb show brport vxlan0 | grep $zmac | grep self \
- | grep -q offload
- check_fail $? "vxlan tunnel offloaded after unlinked from bridge"
+ busywait "$TIMEOUT" not wait_for_offload grep_bridge_fdb $zmac self \
+ bridge fdb show brport vxlan0
+ check_err $? "vxlan tunnel offloaded after unlinked from bridge"
ip link set dev vxlan0 master br0
- bridge fdb show brport vxlan0 | grep $zmac | grep self \
- | grep -q offload
- check_fail $? "vxlan tunnel offloaded despite no matching vid"
+ busywait "$TIMEOUT" not wait_for_offload grep_bridge_fdb $zmac self \
+ bridge fdb show brport vxlan0
+ check_err $? "vxlan tunnel offloaded despite no matching vid"
bridge vlan add dev vxlan0 vid 10 pvid untagged
- bridge fdb show brport vxlan0 | grep $zmac | grep self \
- | grep -q offload
+ busywait "$TIMEOUT" wait_for_offload grep_bridge_fdb $zmac self \
+ bridge fdb show brport vxlan0
check_err $? "vxlan tunnel not offloaded after adding vid"
log_test "vxlan - l3 vni"
diff --git a/tools/testing/selftests/drivers/net/netdevsim/devlink.sh b/tools/testing/selftests/drivers/net/netdevsim/devlink.sh
index 025a84c2ab5a..de4b32fc4223 100755
--- a/tools/testing/selftests/drivers/net/netdevsim/devlink.sh
+++ b/tools/testing/selftests/drivers/net/netdevsim/devlink.sh
@@ -141,6 +141,44 @@ regions_test()
check_region_snapshot_count dummy post-first-delete 2
+ devlink region new $DL_HANDLE/dummy snapshot 25
+ check_err $? "Failed to create a new snapshot with id 25"
+
+ check_region_snapshot_count dummy post-first-request 3
+
+ devlink region dump $DL_HANDLE/dummy snapshot 25 >> /dev/null
+ check_err $? "Failed to dump snapshot with id 25"
+
+ devlink region read $DL_HANDLE/dummy snapshot 25 addr 0 len 1 >> /dev/null
+ check_err $? "Failed to read snapshot with id 25 (1 byte)"
+
+ devlink region read $DL_HANDLE/dummy snapshot 25 addr 128 len 128 >> /dev/null
+ check_err $? "Failed to read snapshot with id 25 (128 bytes)"
+
+ devlink region read $DL_HANDLE/dummy snapshot 25 addr 128 len $((1<<32)) >> /dev/null
+ check_err $? "Failed to read snapshot with id 25 (oversized)"
+
+ devlink region read $DL_HANDLE/dummy snapshot 25 addr $((1<<32)) len 128 >> /dev/null 2>&1
+ check_fail $? "Bad read of snapshot with id 25 did not fail"
+
+ devlink region del $DL_HANDLE/dummy snapshot 25
+ check_err $? "Failed to delete snapshot with id 25"
+
+ check_region_snapshot_count dummy post-second-delete 2
+
+ sid=$(devlink -j region new $DL_HANDLE/dummy | jq '.[][][][]')
+ check_err $? "Failed to create a new snapshot with id allocated by the kernel"
+
+ check_region_snapshot_count dummy post-first-request 3
+
+ devlink region dump $DL_HANDLE/dummy snapshot $sid >> /dev/null
+ check_err $? "Failed to dump a snapshot with id allocated by the kernel"
+
+ devlink region del $DL_HANDLE/dummy snapshot $sid
+ check_err $? "Failed to delete snapshot with id allocated by the kernel"
+
+ check_region_snapshot_count dummy post-first-request 2
+
log_test "regions test"
}
@@ -367,6 +405,11 @@ dummy_reporter_test()
{
RET=0
+ check_reporter_info dummy healthy 0 0 0 true
+
+ devlink health set $DL_HANDLE reporter dummy auto_recover false
+ check_err $? "Failed to dummy reporter auto_recover option"
+
check_reporter_info dummy healthy 0 0 0 false
local BREAK_MSG="foo bar"
diff --git a/tools/testing/selftests/drivers/net/netdevsim/devlink_trap.sh b/tools/testing/selftests/drivers/net/netdevsim/devlink_trap.sh
index f101ab9441e2..da49ad2761b5 100755
--- a/tools/testing/selftests/drivers/net/netdevsim/devlink_trap.sh
+++ b/tools/testing/selftests/drivers/net/netdevsim/devlink_trap.sh
@@ -16,6 +16,8 @@ ALL_TESTS="
trap_group_action_test
bad_trap_group_test
trap_group_stats_test
+ trap_policer_test
+ trap_policer_bind_test
port_del_test
dev_del_test
"
@@ -23,6 +25,7 @@ NETDEVSIM_PATH=/sys/bus/netdevsim/
DEV_ADDR=1337
DEV=netdevsim${DEV_ADDR}
DEVLINK_DEV=netdevsim/${DEV}
+DEBUGFS_DIR=/sys/kernel/debug/netdevsim/$DEV/
SLEEP_TIME=1
NETDEV=""
NUM_NETIFS=0
@@ -103,6 +106,11 @@ trap_metadata_test()
for trap_name in $(devlink_traps_get); do
devlink_trap_metadata_test $trap_name "input_port"
check_err $? "Input port not reported as metadata of trap $trap_name"
+ if [ $trap_name == "ingress_flow_action_drop" ] ||
+ [ $trap_name == "egress_flow_action_drop" ]; then
+ devlink_trap_metadata_test $trap_name "flow_action_cookie"
+ check_err $? "Flow action cookie not reported as metadata of trap $trap_name"
+ fi
done
log_test "Trap metadata"
@@ -251,6 +259,123 @@ trap_group_stats_test()
log_test "Trap group statistics"
}
+trap_policer_test()
+{
+ local packets_t0
+ local packets_t1
+
+ RET=0
+
+ if [ $(devlink_trap_policers_num_get) -eq 0 ]; then
+ check_err 1 "Failed to dump policers"
+ fi
+
+ devlink trap policer set $DEVLINK_DEV policer 1337 &> /dev/null
+ check_fail $? "Did not get an error for setting a non-existing policer"
+ devlink trap policer show $DEVLINK_DEV policer 1337 &> /dev/null
+ check_fail $? "Did not get an error for getting a non-existing policer"
+
+ devlink trap policer set $DEVLINK_DEV policer 1 rate 2000 burst 16
+ check_err $? "Failed to set valid parameters for a valid policer"
+ if [ $(devlink_trap_policer_rate_get 1) -ne 2000 ]; then
+ check_err 1 "Policer rate was not changed"
+ fi
+ if [ $(devlink_trap_policer_burst_get 1) -ne 16 ]; then
+ check_err 1 "Policer burst size was not changed"
+ fi
+
+ devlink trap policer set $DEVLINK_DEV policer 1 rate 0 &> /dev/null
+ check_fail $? "Policer rate was changed to rate lower than limit"
+ devlink trap policer set $DEVLINK_DEV policer 1 rate 9000 &> /dev/null
+ check_fail $? "Policer rate was changed to rate higher than limit"
+ devlink trap policer set $DEVLINK_DEV policer 1 burst 2 &> /dev/null
+ check_fail $? "Policer burst size was changed to burst size lower than limit"
+ devlink trap policer set $DEVLINK_DEV policer 1 rate 65537 &> /dev/null
+ check_fail $? "Policer burst size was changed to burst size higher than limit"
+ echo "y" > $DEBUGFS_DIR/fail_trap_policer_set
+ devlink trap policer set $DEVLINK_DEV policer 1 rate 3000 &> /dev/null
+ check_fail $? "Managed to set policer rate when should not"
+ echo "n" > $DEBUGFS_DIR/fail_trap_policer_set
+ if [ $(devlink_trap_policer_rate_get 1) -ne 2000 ]; then
+ check_err 1 "Policer rate was changed to an invalid value"
+ fi
+ if [ $(devlink_trap_policer_burst_get 1) -ne 16 ]; then
+ check_err 1 "Policer burst size was changed to an invalid value"
+ fi
+
+ packets_t0=$(devlink_trap_policer_rx_dropped_get 1)
+ sleep .5
+ packets_t1=$(devlink_trap_policer_rx_dropped_get 1)
+ if [ ! $packets_t1 -gt $packets_t0 ]; then
+ check_err 1 "Policer drop counter was not incremented"
+ fi
+
+ echo "y"> $DEBUGFS_DIR/fail_trap_policer_counter_get
+ devlink -s trap policer show $DEVLINK_DEV policer 1 &> /dev/null
+ check_fail $? "Managed to read policer drop counter when should not"
+ echo "n"> $DEBUGFS_DIR/fail_trap_policer_counter_get
+ devlink -s trap policer show $DEVLINK_DEV policer 1 &> /dev/null
+ check_err $? "Did not manage to read policer drop counter when should"
+
+ log_test "Trap policer"
+}
+
+trap_group_check_policer()
+{
+ local group_name=$1; shift
+
+ devlink -j -p trap group show $DEVLINK_DEV group $group_name \
+ | jq -e '.[][][]["policer"]' &> /dev/null
+}
+
+trap_policer_bind_test()
+{
+ RET=0
+
+ devlink trap group set $DEVLINK_DEV group l2_drops policer 1
+ check_err $? "Failed to bind a valid policer"
+ if [ $(devlink_trap_group_policer_get "l2_drops") -ne 1 ]; then
+ check_err 1 "Bound policer was not changed"
+ fi
+
+ devlink trap group set $DEVLINK_DEV group l2_drops policer 1337 \
+ &> /dev/null
+ check_fail $? "Did not get an error for binding a non-existing policer"
+ if [ $(devlink_trap_group_policer_get "l2_drops") -ne 1 ]; then
+ check_err 1 "Bound policer was changed when should not"
+ fi
+
+ devlink trap group set $DEVLINK_DEV group l2_drops policer 0
+ check_err $? "Failed to unbind a policer when using ID 0"
+ trap_group_check_policer "l2_drops"
+ check_fail $? "Trap group has a policer after unbinding with ID 0"
+
+ devlink trap group set $DEVLINK_DEV group l2_drops policer 1
+ check_err $? "Failed to bind a valid policer"
+
+ devlink trap group set $DEVLINK_DEV group l2_drops nopolicer
+ check_err $? "Failed to unbind a policer when using 'nopolicer' keyword"
+ trap_group_check_policer "l2_drops"
+ check_fail $? "Trap group has a policer after unbinding with 'nopolicer' keyword"
+
+ devlink trap group set $DEVLINK_DEV group l2_drops policer 1
+ check_err $? "Failed to bind a valid policer"
+
+ echo "y"> $DEBUGFS_DIR/fail_trap_group_set
+ devlink trap group set $DEVLINK_DEV group l2_drops policer 2 \
+ &> /dev/null
+ check_fail $? "Managed to bind a policer when should not"
+ echo "n"> $DEBUGFS_DIR/fail_trap_group_set
+ devlink trap group set $DEVLINK_DEV group l2_drops policer 2
+ check_err $? "Did not manage to bind a policer when should"
+
+ devlink trap group set $DEVLINK_DEV group l2_drops action drop \
+ policer 1337 &> /dev/null
+ check_fail $? "Did not get an error for partially modified trap group"
+
+ log_test "Trap policer binding"
+}
+
port_del_test()
{
local group_name
diff --git a/tools/testing/selftests/efivarfs/.gitignore b/tools/testing/selftests/efivarfs/.gitignore
index 33618493562b..807407f7f58b 100644
--- a/tools/testing/selftests/efivarfs/.gitignore
+++ b/tools/testing/selftests/efivarfs/.gitignore
@@ -1,2 +1,3 @@
+# SPDX-License-Identifier: GPL-2.0-only
create-read
open-unlink
diff --git a/tools/testing/selftests/exec/.gitignore b/tools/testing/selftests/exec/.gitignore
index b02279da6fa1..94b02a18f230 100644
--- a/tools/testing/selftests/exec/.gitignore
+++ b/tools/testing/selftests/exec/.gitignore
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
subdir*
script*
execveat
@@ -8,3 +9,4 @@ execveat.ephemeral
execveat.denatured
/recursion-depth
xxxxxxxx*
+pipe
diff --git a/tools/testing/selftests/exec/Makefile b/tools/testing/selftests/exec/Makefile
index 33339e31e365..4453b8f8def3 100644
--- a/tools/testing/selftests/exec/Makefile
+++ b/tools/testing/selftests/exec/Makefile
@@ -3,8 +3,9 @@ CFLAGS = -Wall
CFLAGS += -Wno-nonnull
CFLAGS += -D_GNU_SOURCE
+TEST_PROGS := binfmt_script
TEST_GEN_PROGS := execveat
-TEST_GEN_FILES := execveat.symlink execveat.denatured script subdir
+TEST_GEN_FILES := execveat.symlink execveat.denatured script subdir pipe
# Makefile is a run-time dependency, since it's accessed by the execveat test
TEST_FILES := Makefile
diff --git a/tools/testing/selftests/exec/binfmt_script b/tools/testing/selftests/exec/binfmt_script
new file mode 100755
index 000000000000..05f94a741c7a
--- /dev/null
+++ b/tools/testing/selftests/exec/binfmt_script
@@ -0,0 +1,171 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+#
+# Test that truncation of bprm->buf doesn't cause unexpected execs paths, along
+# with various other pathological cases.
+import os, subprocess
+
+# Relevant commits
+#
+# b5372fe5dc84 ("exec: load_script: Do not exec truncated interpreter path")
+# 6eb3c3d0a52d ("exec: increase BINPRM_BUF_SIZE to 256")
+
+# BINPRM_BUF_SIZE
+SIZE=256
+
+NAME_MAX=int(subprocess.check_output(["getconf", "NAME_MAX", "."]))
+
+test_num=0
+
+code='''#!/usr/bin/perl
+print "Executed interpreter! Args:\n";
+print "0 : '$0'\n";
+$counter = 1;
+foreach my $a (@ARGV) {
+ print "$counter : '$a'\n";
+ $counter++;
+}
+'''
+
+##
+# test - produce a binfmt_script hashbang line for testing
+#
+# @size: bytes for bprm->buf line, including hashbang but not newline
+# @good: whether this script is expected to execute correctly
+# @hashbang: the special 2 bytes for running binfmt_script
+# @leading: any leading whitespace before the executable path
+# @root: start of executable pathname
+# @target: end of executable pathname
+# @arg: bytes following the executable pathname
+# @fill: character to fill between @root and @target to reach @size bytes
+# @newline: character to use as newline, not counted towards @size
+# ...
+def test(name, size, good=True, leading="", root="./", target="/perl",
+ fill="A", arg="", newline="\n", hashbang="#!"):
+ global test_num, tests, NAME_MAX
+ test_num += 1
+ if test_num > tests:
+ raise ValueError("more binfmt_script tests than expected! (want %d, expected %d)"
+ % (test_num, tests))
+
+ middle = ""
+ remaining = size - len(hashbang) - len(leading) - len(root) - len(target) - len(arg)
+ # The middle of the pathname must not exceed NAME_MAX
+ while remaining >= NAME_MAX:
+ middle += fill * (NAME_MAX - 1)
+ middle += '/'
+ remaining -= NAME_MAX
+ middle += fill * remaining
+
+ dirpath = root + middle
+ binary = dirpath + target
+ if len(target):
+ os.makedirs(dirpath, mode=0o755, exist_ok=True)
+ open(binary, "w").write(code)
+ os.chmod(binary, 0o755)
+
+ buf=hashbang + leading + root + middle + target + arg + newline
+ if len(newline) > 0:
+ buf += 'echo this is not really perl\n'
+
+ script = "binfmt_script-%s" % (name)
+ open(script, "w").write(buf)
+ os.chmod(script, 0o755)
+
+ proc = subprocess.Popen(["./%s" % (script)], shell=True,
+ stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
+ stdout = proc.communicate()[0]
+
+ if proc.returncode == 0 and b'Executed interpreter' in stdout:
+ if good:
+ print("ok %d - binfmt_script %s (successful good exec)"
+ % (test_num, name))
+ else:
+ print("not ok %d - binfmt_script %s succeeded when it should have failed"
+ % (test_num, name))
+ else:
+ if good:
+ print("not ok %d - binfmt_script %s failed when it should have succeeded (rc:%d)"
+ % (test_num, name, proc.returncode))
+ else:
+ print("ok %d - binfmt_script %s (correctly failed bad exec)"
+ % (test_num, name))
+
+ # Clean up crazy binaries
+ os.unlink(script)
+ if len(target):
+ elements = binary.split('/')
+ os.unlink(binary)
+ elements.pop()
+ while len(elements) > 1:
+ os.rmdir("/".join(elements))
+ elements.pop()
+
+tests=27
+print("TAP version 1.3")
+print("1..%d" % (tests))
+
+### FAIL (8 tests)
+
+# Entire path is well past the BINFMT_BUF_SIZE.
+test(name="too-big", size=SIZE+80, good=False)
+# Path is right at max size, making it impossible to tell if it was truncated.
+test(name="exact", size=SIZE, good=False)
+# Same as above, but with leading whitespace.
+test(name="exact-space", size=SIZE, good=False, leading=" ")
+# Huge buffer of only whitespace.
+test(name="whitespace-too-big", size=SIZE+71, good=False, root="",
+ fill=" ", target="")
+# A good path, but it gets truncated due to leading whitespace.
+test(name="truncated", size=SIZE+17, good=False, leading=" " * 19)
+# Entirely empty except for #!
+test(name="empty", size=2, good=False, root="",
+ fill="", target="", newline="")
+# Within size, but entirely spaces
+test(name="spaces", size=SIZE-1, good=False, root="", fill=" ",
+ target="", newline="")
+# Newline before binary.
+test(name="newline-prefix", size=SIZE-1, good=False, leading="\n",
+ root="", fill=" ", target="")
+
+### ok (19 tests)
+
+# The original test case that was broken by commit:
+# 8099b047ecc4 ("exec: load_script: don't blindly truncate shebang string")
+test(name="test.pl", size=439, leading=" ",
+ root="./nix/store/bwav8kz8b3y471wjsybgzw84mrh4js9-perl-5.28.1/bin",
+ arg=" -I/nix/store/x6yyav38jgr924nkna62q3pkp0dgmzlx-perl5.28.1-File-Slurp-9999.25/lib/perl5/site_perl -I/nix/store/ha8v67sl8dac92r9z07vzr4gv1y9nwqz-perl5.28.1-Net-DBus-1.1.0/lib/perl5/site_perl -I/nix/store/dcrkvnjmwh69ljsvpbdjjdnqgwx90a9d-perl5.28.1-XML-Parser-2.44/lib/perl5/site_perl -I/nix/store/rmji88k2zz7h4zg97385bygcydrf2q8h-perl5.28.1-XML-Twig-3.52/lib/perl5/site_perl")
+# One byte under size, leaving newline visible.
+test(name="one-under", size=SIZE-1)
+# Two bytes under size, leaving newline visible.
+test(name="two-under", size=SIZE-2)
+# Exact size, but trailing whitespace visible instead of newline
+test(name="exact-trunc-whitespace", size=SIZE, arg=" ")
+# Exact size, but trailing space and first arg char visible instead of newline.
+test(name="exact-trunc-arg", size=SIZE, arg=" f")
+# One bute under, with confirmed non-truncated arg since newline now visible.
+test(name="one-under-full-arg", size=SIZE-1, arg=" f")
+# Short read buffer by one byte.
+test(name="one-under-no-nl", size=SIZE-1, newline="")
+# Short read buffer by half buffer size.
+test(name="half-under-no-nl", size=int(SIZE/2), newline="")
+# One byte under with whitespace arg. leaving wenline visible.
+test(name="one-under-trunc-arg", size=SIZE-1, arg=" ")
+# One byte under with whitespace leading. leaving wenline visible.
+test(name="one-under-leading", size=SIZE-1, leading=" ")
+# One byte under with whitespace leading and as arg. leaving newline visible.
+test(name="one-under-leading-trunc-arg", size=SIZE-1, leading=" ", arg=" ")
+# Same as above, but with 2 bytes under
+test(name="two-under-no-nl", size=SIZE-2, newline="")
+test(name="two-under-trunc-arg", size=SIZE-2, arg=" ")
+test(name="two-under-leading", size=SIZE-2, leading=" ")
+test(name="two-under-leading-trunc-arg", size=SIZE-2, leading=" ", arg=" ")
+# Same as above, but with buffer half filled
+test(name="two-under-no-nl", size=int(SIZE/2), newline="")
+test(name="two-under-trunc-arg", size=int(SIZE/2), arg=" ")
+test(name="two-under-leading", size=int(SIZE/2), leading=" ")
+test(name="two-under-lead-trunc-arg", size=int(SIZE/2), leading=" ", arg=" ")
+
+if test_num != tests:
+ raise ValueError("fewer binfmt_script tests than expected! (ran %d, expected %d"
+ % (test_num, tests))
diff --git a/tools/testing/selftests/exec/execveat.c b/tools/testing/selftests/exec/execveat.c
index cbb6efbdb786..67bf7254a48f 100644
--- a/tools/testing/selftests/exec/execveat.c
+++ b/tools/testing/selftests/exec/execveat.c
@@ -5,7 +5,9 @@
* Selftests for execveat(2).
*/
+#ifndef _GNU_SOURCE
#define _GNU_SOURCE /* to get O_PATH, AT_EMPTY_PATH */
+#endif
#include <sys/sendfile.h>
#include <sys/stat.h>
#include <sys/syscall.h>
@@ -311,6 +313,10 @@ static int run_tests(void)
fail += check_execveat_fail(AT_FDCWD, fullname_symlink,
AT_SYMLINK_NOFOLLOW, ELOOP);
+ /* Non-regular file failure */
+ fail += check_execveat_fail(dot_dfd, "pipe", 0, EACCES);
+ unlink("pipe");
+
/* Shell script wrapping executable file: */
/* dfd + path */
fail += check_execveat(subdir_dfd, "../script", 0);
@@ -384,6 +390,8 @@ static void prerequisites(void)
fd = open("subdir.ephemeral/script", O_RDWR|O_CREAT|O_TRUNC, 0755);
write(fd, script, strlen(script));
close(fd);
+
+ mkfifo("pipe", 0755);
}
int main(int argc, char **argv)
diff --git a/tools/testing/selftests/filesystems/.gitignore b/tools/testing/selftests/filesystems/.gitignore
index 8449cf6716ce..f0c0ff20d6cf 100644
--- a/tools/testing/selftests/filesystems/.gitignore
+++ b/tools/testing/selftests/filesystems/.gitignore
@@ -1,2 +1,3 @@
+# SPDX-License-Identifier: GPL-2.0-only
dnotify_test
devpts_pts
diff --git a/tools/testing/selftests/filesystems/binderfs/.gitignore b/tools/testing/selftests/filesystems/binderfs/.gitignore
index 8a5d9bf63dd4..8e5cf9084894 100644
--- a/tools/testing/selftests/filesystems/binderfs/.gitignore
+++ b/tools/testing/selftests/filesystems/binderfs/.gitignore
@@ -1 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0-only
binderfs_test
diff --git a/tools/testing/selftests/filesystems/binderfs/Makefile b/tools/testing/selftests/filesystems/binderfs/Makefile
index 58cb659b56b4..8af25ae96049 100644
--- a/tools/testing/selftests/filesystems/binderfs/Makefile
+++ b/tools/testing/selftests/filesystems/binderfs/Makefile
@@ -1,6 +1,8 @@
# SPDX-License-Identifier: GPL-2.0
-CFLAGS += -I../../../../../usr/include/
+CFLAGS += -I../../../../../usr/include/ -pthread
TEST_GEN_PROGS := binderfs_test
+binderfs_test: binderfs_test.c ../../kselftest.h ../../kselftest_harness.h
+
include ../../lib.mk
diff --git a/tools/testing/selftests/filesystems/binderfs/binderfs_test.c b/tools/testing/selftests/filesystems/binderfs/binderfs_test.c
index 8c2ed962e1c7..8a6b507e34a8 100644
--- a/tools/testing/selftests/filesystems/binderfs/binderfs_test.c
+++ b/tools/testing/selftests/filesystems/binderfs/binderfs_test.c
@@ -3,114 +3,47 @@
#define _GNU_SOURCE
#include <errno.h>
#include <fcntl.h>
+#include <pthread.h>
#include <sched.h>
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
+#include <sys/fsuid.h>
#include <sys/ioctl.h>
#include <sys/mount.h>
+#include <sys/socket.h>
#include <sys/stat.h>
+#include <sys/sysinfo.h>
#include <sys/types.h>
+#include <sys/wait.h>
#include <unistd.h>
#include <linux/android/binder.h>
#include <linux/android/binderfs.h>
-#include "../../kselftest.h"
-
-static ssize_t write_nointr(int fd, const void *buf, size_t count)
-{
- ssize_t ret;
-again:
- ret = write(fd, buf, count);
- if (ret < 0 && errno == EINTR)
- goto again;
-
- return ret;
-}
-static void write_to_file(const char *filename, const void *buf, size_t count,
- int allowed_errno)
-{
- int fd, saved_errno;
- ssize_t ret;
+#include "../../kselftest.h"
+#include "../../kselftest_harness.h"
- fd = open(filename, O_WRONLY | O_CLOEXEC);
- if (fd < 0)
- ksft_exit_fail_msg("%s - Failed to open file %s\n",
- strerror(errno), filename);
+#define DEFAULT_THREADS 4
- ret = write_nointr(fd, buf, count);
- if (ret < 0) {
- if (allowed_errno && (errno == allowed_errno)) {
- close(fd);
- return;
- }
+#define PTR_TO_INT(p) ((int)((intptr_t)(p)))
+#define INT_TO_PTR(u) ((void *)((intptr_t)(u)))
- goto on_error;
+#define close_prot_errno_disarm(fd) \
+ if (fd >= 0) { \
+ int _e_ = errno; \
+ close(fd); \
+ errno = _e_; \
+ fd = -EBADF; \
}
- if ((size_t)ret != count)
- goto on_error;
-
- close(fd);
- return;
-
-on_error:
- saved_errno = errno;
- close(fd);
- errno = saved_errno;
-
- if (ret < 0)
- ksft_exit_fail_msg("%s - Failed to write to file %s\n",
- strerror(errno), filename);
-
- ksft_exit_fail_msg("Failed to write to file %s\n", filename);
-}
-
-static void change_to_userns(void)
-{
- int ret;
- uid_t uid;
- gid_t gid;
- /* {g,u}id_map files only allow a max of 4096 bytes written to them */
- char idmap[4096];
-
- uid = getuid();
- gid = getgid();
-
- ret = unshare(CLONE_NEWUSER);
- if (ret < 0)
- ksft_exit_fail_msg("%s - Failed to unshare user namespace\n",
- strerror(errno));
-
- write_to_file("/proc/self/setgroups", "deny", strlen("deny"), ENOENT);
-
- ret = snprintf(idmap, sizeof(idmap), "0 %d 1", uid);
- if (ret < 0 || (size_t)ret >= sizeof(idmap))
- ksft_exit_fail_msg("%s - Failed to prepare uid mapping\n",
- strerror(errno));
-
- write_to_file("/proc/self/uid_map", idmap, strlen(idmap), 0);
-
- ret = snprintf(idmap, sizeof(idmap), "0 %d 1", gid);
- if (ret < 0 || (size_t)ret >= sizeof(idmap))
- ksft_exit_fail_msg("%s - Failed to prepare uid mapping\n",
- strerror(errno));
-
- write_to_file("/proc/self/gid_map", idmap, strlen(idmap), 0);
-
- ret = setgid(0);
- if (ret)
- ksft_exit_fail_msg("%s - Failed to setgid(0)\n",
- strerror(errno));
+#define log_exit(format, ...) \
+ ({ \
+ fprintf(stderr, format "\n", ##__VA_ARGS__); \
+ exit(EXIT_FAILURE); \
+ })
- ret = setuid(0);
- if (ret)
- ksft_exit_fail_msg("%s - Failed to setgid(0)\n",
- strerror(errno));
-}
-
-static void change_to_mountns(void)
+static void change_mountns(void)
{
int ret;
@@ -132,36 +65,31 @@ static void rmdir_protect_errno(const char *dir)
errno = saved_errno;
}
-static void __do_binderfs_test(void)
+static int __do_binderfs_test(void)
{
int fd, ret, saved_errno;
size_t len;
ssize_t wret;
- bool keep = false;
struct binderfs_device device = { 0 };
struct binder_version version = { 0 };
+ char binderfs_mntpt[] = P_tmpdir "/binderfs_XXXXXX",
+ device_path[sizeof(P_tmpdir "/binderfs_XXXXXX/") + BINDERFS_MAX_NAME];
- change_to_mountns();
-
- ret = mkdir("/dev/binderfs", 0755);
- if (ret < 0) {
- if (errno != EEXIST)
- ksft_exit_fail_msg(
- "%s - Failed to create binderfs mountpoint\n",
- strerror(errno));
+ change_mountns();
- keep = true;
- }
+ if (!mkdtemp(binderfs_mntpt))
+ ksft_exit_fail_msg(
+ "%s - Failed to create binderfs mountpoint\n",
+ strerror(errno));
- ret = mount(NULL, "/dev/binderfs", "binder", 0, 0);
+ ret = mount(NULL, binderfs_mntpt, "binder", 0, 0);
if (ret < 0) {
if (errno != ENODEV)
ksft_exit_fail_msg("%s - Failed to mount binderfs\n",
strerror(errno));
- keep ? : rmdir_protect_errno("/dev/binderfs");
- ksft_exit_skip(
- "The Android binderfs filesystem is not available\n");
+ rmdir_protect_errno(binderfs_mntpt);
+ return 1;
}
/* binderfs mount test passed */
@@ -169,7 +97,8 @@ static void __do_binderfs_test(void)
memcpy(device.name, "my-binder", strlen("my-binder"));
- fd = open("/dev/binderfs/binder-control", O_RDONLY | O_CLOEXEC);
+ snprintf(device_path, sizeof(device_path), "%s/binder-control", binderfs_mntpt);
+ fd = open(device_path, O_RDONLY | O_CLOEXEC);
if (fd < 0)
ksft_exit_fail_msg(
"%s - Failed to open binder-control device\n",
@@ -180,7 +109,7 @@ static void __do_binderfs_test(void)
close(fd);
errno = saved_errno;
if (ret < 0) {
- keep ? : rmdir_protect_errno("/dev/binderfs");
+ rmdir_protect_errno(binderfs_mntpt);
ksft_exit_fail_msg(
"%s - Failed to allocate new binder device\n",
strerror(errno));
@@ -193,9 +122,10 @@ static void __do_binderfs_test(void)
/* binder device allocation test passed */
ksft_inc_pass_cnt();
- fd = open("/dev/binderfs/my-binder", O_CLOEXEC | O_RDONLY);
+ snprintf(device_path, sizeof(device_path), "%s/my-binder", binderfs_mntpt);
+ fd = open(device_path, O_CLOEXEC | O_RDONLY);
if (fd < 0) {
- keep ? : rmdir_protect_errno("/dev/binderfs");
+ rmdir_protect_errno(binderfs_mntpt);
ksft_exit_fail_msg("%s - Failed to open my-binder device\n",
strerror(errno));
}
@@ -205,7 +135,7 @@ static void __do_binderfs_test(void)
close(fd);
errno = saved_errno;
if (ret < 0) {
- keep ? : rmdir_protect_errno("/dev/binderfs");
+ rmdir_protect_errno(binderfs_mntpt);
ksft_exit_fail_msg(
"%s - Failed to open perform BINDER_VERSION request\n",
strerror(errno));
@@ -217,9 +147,9 @@ static void __do_binderfs_test(void)
/* binder transaction with binderfs binder device passed */
ksft_inc_pass_cnt();
- ret = unlink("/dev/binderfs/my-binder");
+ ret = unlink(device_path);
if (ret < 0) {
- keep ? : rmdir_protect_errno("/dev/binderfs");
+ rmdir_protect_errno(binderfs_mntpt);
ksft_exit_fail_msg("%s - Failed to delete binder device\n",
strerror(errno));
}
@@ -227,12 +157,13 @@ static void __do_binderfs_test(void)
/* binder device removal passed */
ksft_inc_pass_cnt();
- ret = unlink("/dev/binderfs/binder-control");
+ snprintf(device_path, sizeof(device_path), "%s/binder-control", binderfs_mntpt);
+ ret = unlink(device_path);
if (!ret) {
- keep ? : rmdir_protect_errno("/dev/binderfs");
+ rmdir_protect_errno(binderfs_mntpt);
ksft_exit_fail_msg("Managed to delete binder-control device\n");
} else if (errno != EPERM) {
- keep ? : rmdir_protect_errno("/dev/binderfs");
+ rmdir_protect_errno(binderfs_mntpt);
ksft_exit_fail_msg(
"%s - Failed to delete binder-control device but exited with unexpected error code\n",
strerror(errno));
@@ -242,34 +173,341 @@ static void __do_binderfs_test(void)
ksft_inc_xfail_cnt();
on_error:
- ret = umount2("/dev/binderfs", MNT_DETACH);
- keep ?: rmdir_protect_errno("/dev/binderfs");
+ ret = umount2(binderfs_mntpt, MNT_DETACH);
+ rmdir_protect_errno(binderfs_mntpt);
if (ret < 0)
ksft_exit_fail_msg("%s - Failed to unmount binderfs\n",
strerror(errno));
/* binderfs unmount test passed */
ksft_inc_pass_cnt();
+ return 0;
}
-static void binderfs_test_privileged()
+static int wait_for_pid(pid_t pid)
{
- if (geteuid() != 0)
- ksft_print_msg(
- "Tests are not run as root. Skipping privileged tests\n");
- else
- __do_binderfs_test();
+ int status, ret;
+
+again:
+ ret = waitpid(pid, &status, 0);
+ if (ret == -1) {
+ if (errno == EINTR)
+ goto again;
+
+ return -1;
+ }
+
+ if (!WIFEXITED(status))
+ return -1;
+
+ return WEXITSTATUS(status);
+}
+
+static int setid_userns_root(void)
+{
+ if (setuid(0))
+ return -1;
+ if (setgid(0))
+ return -1;
+
+ setfsuid(0);
+ setfsgid(0);
+
+ return 0;
+}
+
+enum idmap_type {
+ UID_MAP,
+ GID_MAP,
+};
+
+static ssize_t read_nointr(int fd, void *buf, size_t count)
+{
+ ssize_t ret;
+again:
+ ret = read(fd, buf, count);
+ if (ret < 0 && errno == EINTR)
+ goto again;
+
+ return ret;
+}
+
+static ssize_t write_nointr(int fd, const void *buf, size_t count)
+{
+ ssize_t ret;
+again:
+ ret = write(fd, buf, count);
+ if (ret < 0 && errno == EINTR)
+ goto again;
+
+ return ret;
+}
+
+static int write_id_mapping(enum idmap_type type, pid_t pid, const char *buf,
+ size_t buf_size)
+{
+ int fd;
+ int ret;
+ char path[4096];
+
+ if (type == GID_MAP) {
+ int setgroups_fd;
+
+ snprintf(path, sizeof(path), "/proc/%d/setgroups", pid);
+ setgroups_fd = open(path, O_WRONLY | O_CLOEXEC | O_NOFOLLOW);
+ if (setgroups_fd < 0 && errno != ENOENT)
+ return -1;
+
+ if (setgroups_fd >= 0) {
+ ret = write_nointr(setgroups_fd, "deny", sizeof("deny") - 1);
+ close_prot_errno_disarm(setgroups_fd);
+ if (ret != sizeof("deny") - 1)
+ return -1;
+ }
+ }
+
+ switch (type) {
+ case UID_MAP:
+ ret = snprintf(path, sizeof(path), "/proc/%d/uid_map", pid);
+ break;
+ case GID_MAP:
+ ret = snprintf(path, sizeof(path), "/proc/%d/gid_map", pid);
+ break;
+ default:
+ return -1;
+ }
+ if (ret < 0 || ret >= sizeof(path))
+ return -E2BIG;
+
+ fd = open(path, O_WRONLY | O_CLOEXEC | O_NOFOLLOW);
+ if (fd < 0)
+ return -1;
+
+ ret = write_nointr(fd, buf, buf_size);
+ close_prot_errno_disarm(fd);
+ if (ret != buf_size)
+ return -1;
+
+ return 0;
+}
+
+static void change_userns(int syncfds[2])
+{
+ int ret;
+ char buf;
+
+ close_prot_errno_disarm(syncfds[1]);
+
+ ret = unshare(CLONE_NEWUSER);
+ if (ret < 0)
+ ksft_exit_fail_msg("%s - Failed to unshare user namespace\n",
+ strerror(errno));
+
+ ret = write_nointr(syncfds[0], "1", 1);
+ if (ret != 1)
+ ksft_exit_fail_msg("write_nointr() failed\n");
+
+ ret = read_nointr(syncfds[0], &buf, 1);
+ if (ret != 1)
+ ksft_exit_fail_msg("read_nointr() failed\n");
+
+ close_prot_errno_disarm(syncfds[0]);
+
+ if (setid_userns_root())
+ ksft_exit_fail_msg("setid_userns_root() failed");
+}
+
+static void change_idmaps(int syncfds[2], pid_t pid)
+{
+ int ret;
+ char buf;
+ char id_map[4096];
+
+ close_prot_errno_disarm(syncfds[0]);
+
+ ret = read_nointr(syncfds[1], &buf, 1);
+ if (ret != 1)
+ ksft_exit_fail_msg("read_nointr() failed\n");
+
+ snprintf(id_map, sizeof(id_map), "0 %d 1\n", getuid());
+ ret = write_id_mapping(UID_MAP, pid, id_map, strlen(id_map));
+ if (ret)
+ ksft_exit_fail_msg("write_id_mapping(UID_MAP) failed");
+
+ snprintf(id_map, sizeof(id_map), "0 %d 1\n", getgid());
+ ret = write_id_mapping(GID_MAP, pid, id_map, strlen(id_map));
+ if (ret)
+ ksft_exit_fail_msg("write_id_mapping(GID_MAP) failed");
+
+ ret = write_nointr(syncfds[1], "1", 1);
+ if (ret != 1)
+ ksft_exit_fail_msg("write_nointr() failed");
+
+ close_prot_errno_disarm(syncfds[1]);
}
-static void binderfs_test_unprivileged()
+static void *binder_version_thread(void *data)
{
- change_to_userns();
- __do_binderfs_test();
+ int fd = PTR_TO_INT(data);
+ struct binder_version version = { 0 };
+ int ret;
+
+ ret = ioctl(fd, BINDER_VERSION, &version);
+ if (ret < 0)
+ ksft_print_msg("%s - Failed to open perform BINDER_VERSION request\n", strerror(errno));
+
+ pthread_exit(data);
}
-int main(int argc, char *argv[])
+/*
+ * Regression test:
+ * 2669b8b0c798 ("binder: prevent UAF for binderfs devices")
+ * f0fe2c0f050d ("binder: prevent UAF for binderfs devices II")
+ * 211b64e4b5b6 ("binderfs: use refcount for binder control devices too")
+ */
+TEST(binderfs_stress)
{
- binderfs_test_privileged();
- binderfs_test_unprivileged();
- ksft_exit_pass();
+ int fds[1000];
+ int syncfds[2];
+ pid_t pid;
+ int fd, ret;
+ size_t len;
+ struct binderfs_device device = { 0 };
+ char binderfs_mntpt[] = P_tmpdir "/binderfs_XXXXXX",
+ device_path[sizeof(P_tmpdir "/binderfs_XXXXXX/") + BINDERFS_MAX_NAME];
+
+ ret = socketpair(PF_LOCAL, SOCK_STREAM | SOCK_CLOEXEC, 0, syncfds);
+ if (ret < 0)
+ ksft_exit_fail_msg("%s - Failed to create socket pair", strerror(errno));
+
+ pid = fork();
+ if (pid < 0) {
+ close_prot_errno_disarm(syncfds[0]);
+ close_prot_errno_disarm(syncfds[1]);
+ ksft_exit_fail_msg("%s - Failed to fork", strerror(errno));
+ }
+
+ if (pid == 0) {
+ int i, j, k, nthreads;
+ pthread_attr_t attr;
+ pthread_t threads[DEFAULT_THREADS];
+ change_userns(syncfds);
+ change_mountns();
+
+ if (!mkdtemp(binderfs_mntpt))
+ log_exit("%s - Failed to create binderfs mountpoint\n",
+ strerror(errno));
+
+ ret = mount(NULL, binderfs_mntpt, "binder", 0, 0);
+ if (ret < 0)
+ log_exit("%s - Failed to mount binderfs\n", strerror(errno));
+
+ for (int i = 0; i < ARRAY_SIZE(fds); i++) {
+
+ snprintf(device_path, sizeof(device_path),
+ "%s/binder-control", binderfs_mntpt);
+ fd = open(device_path, O_RDONLY | O_CLOEXEC);
+ if (fd < 0)
+ log_exit("%s - Failed to open binder-control device\n", strerror(errno));
+
+ memset(&device, 0, sizeof(device));
+ snprintf(device.name, sizeof(device.name), "%d", i);
+ ret = ioctl(fd, BINDER_CTL_ADD, &device);
+ close_prot_errno_disarm(fd);
+ if (ret < 0)
+ log_exit("%s - Failed to allocate new binder device\n", strerror(errno));
+
+ snprintf(device_path, sizeof(device_path), "%s/%d",
+ binderfs_mntpt, i);
+ fds[i] = open(device_path, O_RDONLY | O_CLOEXEC);
+ if (fds[i] < 0)
+ log_exit("%s - Failed to open binder device\n", strerror(errno));
+ }
+
+ ret = umount2(binderfs_mntpt, MNT_DETACH);
+ rmdir_protect_errno(binderfs_mntpt);
+ if (ret < 0)
+ log_exit("%s - Failed to unmount binderfs\n", strerror(errno));
+
+ nthreads = get_nprocs_conf();
+ if (nthreads > DEFAULT_THREADS)
+ nthreads = DEFAULT_THREADS;
+
+ pthread_attr_init(&attr);
+ for (k = 0; k < ARRAY_SIZE(fds); k++) {
+ for (i = 0; i < nthreads; i++) {
+ ret = pthread_create(&threads[i], &attr, binder_version_thread, INT_TO_PTR(fds[k]));
+ if (ret) {
+ ksft_print_msg("%s - Failed to create thread %d\n", strerror(errno), i);
+ break;
+ }
+ }
+
+ for (j = 0; j < i; j++) {
+ void *fdptr = NULL;
+
+ ret = pthread_join(threads[j], &fdptr);
+ if (ret)
+ ksft_print_msg("%s - Failed to join thread %d for fd %d\n", strerror(errno), j, PTR_TO_INT(fdptr));
+ }
+ }
+ pthread_attr_destroy(&attr);
+
+ for (k = 0; k < ARRAY_SIZE(fds); k++)
+ close(fds[k]);
+
+ exit(EXIT_SUCCESS);
+ }
+
+ change_idmaps(syncfds, pid);
+
+ ret = wait_for_pid(pid);
+ if (ret)
+ ksft_exit_fail_msg("wait_for_pid() failed");
}
+
+TEST(binderfs_test_privileged)
+{
+ if (geteuid() != 0)
+ XFAIL(return, "Tests are not run as root. Skipping privileged tests");
+
+ if (__do_binderfs_test() == 1)
+ XFAIL(return, "The Android binderfs filesystem is not available");
+}
+
+TEST(binderfs_test_unprivileged)
+{
+ int ret;
+ int syncfds[2];
+ pid_t pid;
+
+ ret = socketpair(PF_LOCAL, SOCK_STREAM | SOCK_CLOEXEC, 0, syncfds);
+ if (ret < 0)
+ ksft_exit_fail_msg("%s - Failed to create socket pair", strerror(errno));
+
+ pid = fork();
+ if (pid < 0) {
+ close_prot_errno_disarm(syncfds[0]);
+ close_prot_errno_disarm(syncfds[1]);
+ ksft_exit_fail_msg("%s - Failed to fork", strerror(errno));
+ }
+
+ if (pid == 0) {
+ change_userns(syncfds);
+ if (__do_binderfs_test() == 1)
+ exit(2);
+ exit(EXIT_SUCCESS);
+ }
+
+ change_idmaps(syncfds, pid);
+
+ ret = wait_for_pid(pid);
+ if (ret) {
+ if (ret == 2)
+ XFAIL(return, "The Android binderfs filesystem is not available");
+ else
+ ksft_exit_fail_msg("wait_for_pid() failed");
+ }
+}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/filesystems/epoll/.gitignore b/tools/testing/selftests/filesystems/epoll/.gitignore
index 9ae8db44ec14..9090157258b1 100644
--- a/tools/testing/selftests/filesystems/epoll/.gitignore
+++ b/tools/testing/selftests/filesystems/epoll/.gitignore
@@ -1 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0-only
epoll_wakeup_test
diff --git a/tools/testing/selftests/filesystems/epoll/epoll_wakeup_test.c b/tools/testing/selftests/filesystems/epoll/epoll_wakeup_test.c
index 37a04dab56f0..d979ff14775a 100644
--- a/tools/testing/selftests/filesystems/epoll/epoll_wakeup_test.c
+++ b/tools/testing/selftests/filesystems/epoll/epoll_wakeup_test.c
@@ -3,17 +3,19 @@
#define _GNU_SOURCE
#include <poll.h>
#include <unistd.h>
+#include <assert.h>
#include <signal.h>
#include <pthread.h>
#include <sys/epoll.h>
#include <sys/socket.h>
+#include <sys/eventfd.h>
#include "../../kselftest_harness.h"
struct epoll_mtcontext
{
int efd[3];
int sfd[4];
- int count;
+ volatile int count;
pthread_t main;
pthread_t waiter;
@@ -3071,4 +3073,213 @@ TEST(epoll58)
close(ctx.sfd[3]);
}
+static void *epoll59_thread(void *ctx_)
+{
+ struct epoll_mtcontext *ctx = ctx_;
+ struct epoll_event e;
+ int i;
+
+ for (i = 0; i < 100000; i++) {
+ while (ctx->count == 0)
+ ;
+
+ e.events = EPOLLIN | EPOLLERR | EPOLLET;
+ epoll_ctl(ctx->efd[0], EPOLL_CTL_MOD, ctx->sfd[0], &e);
+ ctx->count = 0;
+ }
+
+ return NULL;
+}
+
+/*
+ * t0
+ * (p) \
+ * e0
+ * (et) /
+ * e0
+ *
+ * Based on https://bugzilla.kernel.org/show_bug.cgi?id=205933
+ */
+TEST(epoll59)
+{
+ pthread_t emitter;
+ struct pollfd pfd;
+ struct epoll_event e;
+ struct epoll_mtcontext ctx = { 0 };
+ int i, ret;
+
+ signal(SIGUSR1, signal_handler);
+
+ ctx.efd[0] = epoll_create1(0);
+ ASSERT_GE(ctx.efd[0], 0);
+
+ ctx.sfd[0] = eventfd(1, 0);
+ ASSERT_GE(ctx.sfd[0], 0);
+
+ e.events = EPOLLIN | EPOLLERR | EPOLLET;
+ ASSERT_EQ(epoll_ctl(ctx.efd[0], EPOLL_CTL_ADD, ctx.sfd[0], &e), 0);
+
+ ASSERT_EQ(pthread_create(&emitter, NULL, epoll59_thread, &ctx), 0);
+
+ for (i = 0; i < 100000; i++) {
+ ret = epoll_wait(ctx.efd[0], &e, 1, 1000);
+ ASSERT_GT(ret, 0);
+
+ while (ctx.count != 0)
+ ;
+ ctx.count = 1;
+ }
+ if (pthread_tryjoin_np(emitter, NULL) < 0) {
+ pthread_kill(emitter, SIGUSR1);
+ pthread_join(emitter, NULL);
+ }
+ close(ctx.efd[0]);
+ close(ctx.sfd[0]);
+}
+
+enum {
+ EPOLL60_EVENTS_NR = 10,
+};
+
+struct epoll60_ctx {
+ volatile int stopped;
+ int ready;
+ int waiters;
+ int epfd;
+ int evfd[EPOLL60_EVENTS_NR];
+};
+
+static void *epoll60_wait_thread(void *ctx_)
+{
+ struct epoll60_ctx *ctx = ctx_;
+ struct epoll_event e;
+ sigset_t sigmask;
+ uint64_t v;
+ int ret;
+
+ /* Block SIGUSR1 */
+ sigemptyset(&sigmask);
+ sigaddset(&sigmask, SIGUSR1);
+ sigprocmask(SIG_SETMASK, &sigmask, NULL);
+
+ /* Prepare empty mask for epoll_pwait() */
+ sigemptyset(&sigmask);
+
+ while (!ctx->stopped) {
+ /* Mark we are ready */
+ __atomic_fetch_add(&ctx->ready, 1, __ATOMIC_ACQUIRE);
+
+ /* Start when all are ready */
+ while (__atomic_load_n(&ctx->ready, __ATOMIC_ACQUIRE) &&
+ !ctx->stopped);
+
+ /* Account this waiter */
+ __atomic_fetch_add(&ctx->waiters, 1, __ATOMIC_ACQUIRE);
+
+ ret = epoll_pwait(ctx->epfd, &e, 1, 2000, &sigmask);
+ if (ret != 1) {
+ /* We expect only signal delivery on stop */
+ assert(ret < 0 && errno == EINTR && "Lost wakeup!\n");
+ assert(ctx->stopped);
+ break;
+ }
+
+ ret = read(e.data.fd, &v, sizeof(v));
+ /* Since we are on ET mode, thus each thread gets its own fd. */
+ assert(ret == sizeof(v));
+
+ __atomic_fetch_sub(&ctx->waiters, 1, __ATOMIC_RELEASE);
+ }
+
+ return NULL;
+}
+
+static inline unsigned long long msecs(void)
+{
+ struct timespec ts;
+ unsigned long long msecs;
+
+ clock_gettime(CLOCK_REALTIME, &ts);
+ msecs = ts.tv_sec * 1000ull;
+ msecs += ts.tv_nsec / 1000000ull;
+
+ return msecs;
+}
+
+static inline int count_waiters(struct epoll60_ctx *ctx)
+{
+ return __atomic_load_n(&ctx->waiters, __ATOMIC_ACQUIRE);
+}
+
+TEST(epoll60)
+{
+ struct epoll60_ctx ctx = { 0 };
+ pthread_t waiters[ARRAY_SIZE(ctx.evfd)];
+ struct epoll_event e;
+ int i, n, ret;
+
+ signal(SIGUSR1, signal_handler);
+
+ ctx.epfd = epoll_create1(0);
+ ASSERT_GE(ctx.epfd, 0);
+
+ /* Create event fds */
+ for (i = 0; i < ARRAY_SIZE(ctx.evfd); i++) {
+ ctx.evfd[i] = eventfd(0, EFD_NONBLOCK);
+ ASSERT_GE(ctx.evfd[i], 0);
+
+ e.events = EPOLLIN | EPOLLET;
+ e.data.fd = ctx.evfd[i];
+ ASSERT_EQ(epoll_ctl(ctx.epfd, EPOLL_CTL_ADD, ctx.evfd[i], &e), 0);
+ }
+
+ /* Create waiter threads */
+ for (i = 0; i < ARRAY_SIZE(waiters); i++)
+ ASSERT_EQ(pthread_create(&waiters[i], NULL,
+ epoll60_wait_thread, &ctx), 0);
+
+ for (i = 0; i < 300; i++) {
+ uint64_t v = 1, ms;
+
+ /* Wait for all to be ready */
+ while (__atomic_load_n(&ctx.ready, __ATOMIC_ACQUIRE) !=
+ ARRAY_SIZE(ctx.evfd))
+ ;
+
+ /* Steady, go */
+ __atomic_fetch_sub(&ctx.ready, ARRAY_SIZE(ctx.evfd),
+ __ATOMIC_ACQUIRE);
+
+ /* Wait all have gone to kernel */
+ while (count_waiters(&ctx) != ARRAY_SIZE(ctx.evfd))
+ ;
+
+ /* 1ms should be enough to schedule away */
+ usleep(1000);
+
+ /* Quickly signal all handles at once */
+ for (n = 0; n < ARRAY_SIZE(ctx.evfd); n++) {
+ ret = write(ctx.evfd[n], &v, sizeof(v));
+ ASSERT_EQ(ret, sizeof(v));
+ }
+
+ /* Busy loop for 1s and wait for all waiters to wake up */
+ ms = msecs();
+ while (count_waiters(&ctx) && msecs() < ms + 1000)
+ ;
+
+ ASSERT_EQ(count_waiters(&ctx), 0);
+ }
+ ctx.stopped = 1;
+ /* Stop waiters */
+ for (i = 0; i < ARRAY_SIZE(waiters); i++)
+ ret = pthread_kill(waiters[i], SIGUSR1);
+ for (i = 0; i < ARRAY_SIZE(waiters); i++)
+ pthread_join(waiters[i], NULL);
+
+ for (i = 0; i < ARRAY_SIZE(waiters); i++)
+ close(ctx.evfd[i]);
+ close(ctx.epfd);
+}
+
TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/firmware/Makefile b/tools/testing/selftests/firmware/Makefile
index 012b2cf69c11..40211cd8f0e6 100644
--- a/tools/testing/selftests/firmware/Makefile
+++ b/tools/testing/selftests/firmware/Makefile
@@ -1,13 +1,10 @@
# SPDX-License-Identifier: GPL-2.0-only
# Makefile for firmware loading selftests
-
-# No binaries, but make sure arg-less "make" doesn't trigger "run_tests"
-all:
+CFLAGS = -Wall \
+ -O2
TEST_PROGS := fw_run_tests.sh
TEST_FILES := fw_fallback.sh fw_filesystem.sh fw_lib.sh
+TEST_GEN_FILES := fw_namespace
include ../lib.mk
-
-# Nothing to clean up.
-clean:
diff --git a/tools/testing/selftests/firmware/fw_filesystem.sh b/tools/testing/selftests/firmware/fw_filesystem.sh
index 56894477c8bd..fcc281373b4d 100755
--- a/tools/testing/selftests/firmware/fw_filesystem.sh
+++ b/tools/testing/selftests/firmware/fw_filesystem.sh
@@ -86,6 +86,29 @@ else
fi
fi
+# Try platform (EFI embedded fw) loading too
+if [ ! -e "$DIR"/trigger_request_platform ]; then
+ echo "$0: firmware loading: platform trigger not present, ignoring test" >&2
+else
+ if printf '\000' >"$DIR"/trigger_request_platform 2> /dev/null; then
+ echo "$0: empty filename should not succeed (platform)" >&2
+ exit 1
+ fi
+
+ # Note we echo a non-existing name, since files on the file-system
+ # are preferred over firmware embedded inside the platform's firmware
+ # The test adds a fake entry with the requested name to the platform's
+ # fw list, so the name does not matter as long as it does not exist
+ if ! echo -n "nope-$NAME" >"$DIR"/trigger_request_platform ; then
+ echo "$0: could not trigger request platform" >&2
+ exit 1
+ fi
+
+ # The test verifies itself that the loaded firmware contents matches
+ # the contents for the fake platform fw entry it added.
+ echo "$0: platform loading works"
+fi
+
### Batched requests tests
test_config_present()
{
diff --git a/tools/testing/selftests/firmware/fw_namespace.c b/tools/testing/selftests/firmware/fw_namespace.c
new file mode 100644
index 000000000000..5ebc1aec7923
--- /dev/null
+++ b/tools/testing/selftests/firmware/fw_namespace.c
@@ -0,0 +1,151 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Test triggering of loading of firmware from different mount
+ * namespaces. Expect firmware to be always loaded from the mount
+ * namespace of PID 1. */
+#define _GNU_SOURCE
+#include <errno.h>
+#include <fcntl.h>
+#include <sched.h>
+#include <stdarg.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/mount.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <unistd.h>
+
+#ifndef CLONE_NEWNS
+# define CLONE_NEWNS 0x00020000
+#endif
+
+static char *fw_path = NULL;
+
+static void die(char *fmt, ...)
+{
+ va_list ap;
+
+ va_start(ap, fmt);
+ vfprintf(stderr, fmt, ap);
+ va_end(ap);
+ if (fw_path)
+ unlink(fw_path);
+ umount("/lib/firmware");
+ exit(EXIT_FAILURE);
+}
+
+static void trigger_fw(const char *fw_name, const char *sys_path)
+{
+ int fd;
+
+ fd = open(sys_path, O_WRONLY);
+ if (fd < 0)
+ die("open failed: %s\n",
+ strerror(errno));
+ if (write(fd, fw_name, strlen(fw_name)) != strlen(fw_name))
+ exit(EXIT_FAILURE);
+ close(fd);
+}
+
+static void setup_fw(const char *fw_path)
+{
+ int fd;
+ const char fw[] = "ABCD0123";
+
+ fd = open(fw_path, O_WRONLY | O_CREAT, 0600);
+ if (fd < 0)
+ die("open failed: %s\n",
+ strerror(errno));
+ if (write(fd, fw, sizeof(fw) -1) != sizeof(fw) -1)
+ die("write failed: %s\n",
+ strerror(errno));
+ close(fd);
+}
+
+static bool test_fw_in_ns(const char *fw_name, const char *sys_path, bool block_fw_in_parent_ns)
+{
+ pid_t child;
+
+ if (block_fw_in_parent_ns)
+ if (mount("test", "/lib/firmware", "tmpfs", MS_RDONLY, NULL) == -1)
+ die("blocking firmware in parent ns failed\n");
+
+ child = fork();
+ if (child == -1) {
+ die("fork failed: %s\n",
+ strerror(errno));
+ }
+ if (child != 0) { /* parent */
+ pid_t pid;
+ int status;
+
+ pid = waitpid(child, &status, 0);
+ if (pid == -1) {
+ die("waitpid failed: %s\n",
+ strerror(errno));
+ }
+ if (pid != child) {
+ die("waited for %d got %d\n",
+ child, pid);
+ }
+ if (!WIFEXITED(status)) {
+ die("child did not terminate cleanly\n");
+ }
+ if (block_fw_in_parent_ns)
+ umount("/lib/firmware");
+ return WEXITSTATUS(status) == EXIT_SUCCESS ? true : false;
+ }
+
+ if (unshare(CLONE_NEWNS) != 0) {
+ die("unshare(CLONE_NEWNS) failed: %s\n",
+ strerror(errno));
+ }
+ if (mount(NULL, "/", NULL, MS_SLAVE|MS_REC, NULL) == -1)
+ die("remount root in child ns failed\n");
+
+ if (!block_fw_in_parent_ns) {
+ if (mount("test", "/lib/firmware", "tmpfs", MS_RDONLY, NULL) == -1)
+ die("blocking firmware in child ns failed\n");
+ } else
+ umount("/lib/firmware");
+
+ trigger_fw(fw_name, sys_path);
+
+ exit(EXIT_SUCCESS);
+}
+
+int main(int argc, char **argv)
+{
+ const char *fw_name = "test-firmware.bin";
+ char *sys_path;
+ if (argc != 2)
+ die("usage: %s sys_path\n", argv[0]);
+
+ /* Mount tmpfs to /lib/firmware so we don't have to assume
+ that it is writable for us.*/
+ if (mount("test", "/lib/firmware", "tmpfs", 0, NULL) == -1)
+ die("mounting tmpfs to /lib/firmware failed\n");
+
+ sys_path = argv[1];
+ asprintf(&fw_path, "/lib/firmware/%s", fw_name);
+
+ setup_fw(fw_path);
+
+ setvbuf(stdout, NULL, _IONBF, 0);
+ /* Positive case: firmware in PID1 mount namespace */
+ printf("Testing with firmware in parent namespace (assumed to be same file system as PID1)\n");
+ if (!test_fw_in_ns(fw_name, sys_path, false))
+ die("error: failed to access firmware\n");
+
+ /* Negative case: firmware in child mount namespace, expected to fail */
+ printf("Testing with firmware in child namespace\n");
+ if (test_fw_in_ns(fw_name, sys_path, true))
+ die("error: firmware access did not fail\n");
+
+ unlink(fw_path);
+ free(fw_path);
+ umount("/lib/firmware");
+ exit(EXIT_SUCCESS);
+}
diff --git a/tools/testing/selftests/firmware/fw_run_tests.sh b/tools/testing/selftests/firmware/fw_run_tests.sh
index 8e14d555c197..777377078d5e 100755
--- a/tools/testing/selftests/firmware/fw_run_tests.sh
+++ b/tools/testing/selftests/firmware/fw_run_tests.sh
@@ -61,6 +61,10 @@ run_test_config_0003()
check_mods
check_setup
+echo "Running namespace test: "
+$TEST_DIR/fw_namespace $DIR/trigger_request
+echo "OK"
+
if [ -f $FW_FORCE_SYSFS_FALLBACK ]; then
run_test_config_0001
run_test_config_0002
diff --git a/tools/testing/selftests/ftrace/.gitignore b/tools/testing/selftests/ftrace/.gitignore
index 98d8a5a63049..2659417cb2c7 100644
--- a/tools/testing/selftests/ftrace/.gitignore
+++ b/tools/testing/selftests/ftrace/.gitignore
@@ -1 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0-only
logs
diff --git a/tools/testing/selftests/ftrace/config b/tools/testing/selftests/ftrace/config
index c2c8de4fafff..e59d985eeff0 100644
--- a/tools/testing/selftests/ftrace/config
+++ b/tools/testing/selftests/ftrace/config
@@ -11,5 +11,6 @@ CONFIG_PREEMPTIRQ_DELAY_TEST=m
CONFIG_MODULES=y
CONFIG_MODULE_UNLOAD=y
CONFIG_SAMPLES=y
+CONFIG_SAMPLE_FTRACE_DIRECT=m
CONFIG_SAMPLE_TRACE_PRINTK=m
CONFIG_KALLSYMS_ALL=y
diff --git a/tools/testing/selftests/ftrace/ftracetest b/tools/testing/selftests/ftrace/ftracetest
index 063ecb290a5a..a4605b5ee66d 100755
--- a/tools/testing/selftests/ftrace/ftracetest
+++ b/tools/testing/selftests/ftrace/ftracetest
@@ -17,6 +17,7 @@ echo " -v|--verbose Increase verbosity of test messages"
echo " -vv Alias of -v -v (Show all results in stdout)"
echo " -vvv Alias of -v -v -v (Show all commands immediately)"
echo " --fail-unsupported Treat UNSUPPORTED as a failure"
+echo " --fail-unresolved Treat UNRESOLVED as a failure"
echo " -d|--debug Debug mode (trace all shell commands)"
echo " -l|--logdir <dir> Save logs on the <dir>"
echo " If <dir> is -, all logs output in console only"
@@ -29,8 +30,25 @@ err_ret=1
# kselftest skip code is 4
err_skip=4
+# cgroup RT scheduling prevents chrt commands from succeeding, which
+# induces failures in test wakeup tests. Disable for the duration of
+# the tests.
+
+readonly sched_rt_runtime=/proc/sys/kernel/sched_rt_runtime_us
+
+sched_rt_runtime_orig=$(cat $sched_rt_runtime)
+
+setup() {
+ echo -1 > $sched_rt_runtime
+}
+
+cleanup() {
+ echo $sched_rt_runtime_orig > $sched_rt_runtime
+}
+
errexit() { # message
echo "Error: $1" 1>&2
+ cleanup
exit $err_ret
}
@@ -39,6 +57,8 @@ if [ `id -u` -ne 0 ]; then
errexit "this must be run by root user"
fi
+setup
+
# Utilities
absdir() { # file_path
(cd `dirname $1`; pwd)
@@ -93,6 +113,10 @@ parse_opts() { # opts
UNSUPPORTED_RESULT=1
shift 1
;;
+ --fail-unresolved)
+ UNRESOLVED_RESULT=1
+ shift 1
+ ;;
--logdir|-l)
LOG_DIR=$2
shift 2
@@ -157,6 +181,7 @@ KEEP_LOG=0
DEBUG=0
VERBOSE=0
UNSUPPORTED_RESULT=0
+UNRESOLVED_RESULT=0
STOP_FAILURE=0
# Parse command-line options
parse_opts $*
@@ -235,6 +260,7 @@ TOTAL_RESULT=0
INSTANCE=
CASENO=0
+
testcase() { # testfile
CASENO=$((CASENO+1))
desc=`grep "^#[ \t]*description:" $1 | cut -f2 -d:`
@@ -260,7 +286,7 @@ eval_result() { # sigval
$UNRESOLVED)
prlog " [${color_blue}UNRESOLVED${color_reset}]"
UNRESOLVED_CASES="$UNRESOLVED_CASES $CASENO"
- return 1 # this is a kind of bug.. something happened.
+ return $UNRESOLVED_RESULT # depends on use case
;;
$UNTESTED)
prlog " [${color_blue}UNTESTED${color_reset}]"
@@ -273,7 +299,7 @@ eval_result() { # sigval
return $UNSUPPORTED_RESULT # depends on use case
;;
$XFAIL)
- prlog " [${color_red}XFAIL${color_reset}]"
+ prlog " [${color_green}XFAIL${color_reset}]"
XFAILED_CASES="$XFAILED_CASES $CASENO"
return 0
;;
@@ -406,5 +432,7 @@ prlog "# of unsupported: " `echo $UNSUPPORTED_CASES | wc -w`
prlog "# of xfailed: " `echo $XFAILED_CASES | wc -w`
prlog "# of undefined(test bug): " `echo $UNDEFINED_CASES | wc -w`
+cleanup
+
# if no error, return 0
exit $TOTAL_RESULT
diff --git a/tools/testing/selftests/ftrace/test.d/event/event-no-pid.tc b/tools/testing/selftests/ftrace/test.d/event/event-no-pid.tc
new file mode 100644
index 000000000000..f0f366f18d0c
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/event/event-no-pid.tc
@@ -0,0 +1,125 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# description: event tracing - restricts events based on pid notrace filtering
+# flags: instance
+
+do_reset() {
+ echo > set_event
+ echo > set_event_pid
+ echo > set_event_notrace_pid
+ echo 0 > options/event-fork
+ echo 0 > events/enable
+ clear_trace
+ echo 1 > tracing_on
+}
+
+fail() { #msg
+ cat trace
+ do_reset
+ echo $1
+ exit_fail
+}
+
+count_pid() {
+ pid=$@
+ cat trace | grep -v '^#' | sed -e 's/[^-]*-\([0-9]*\).*/\1/' | grep $pid | wc -l
+}
+
+count_no_pid() {
+ pid=$1
+ cat trace | grep -v '^#' | sed -e 's/[^-]*-\([0-9]*\).*/\1/' | grep -v $pid | wc -l
+}
+
+enable_system() {
+ system=$1
+
+ if [ -d events/$system ]; then
+ echo 1 > events/$system/enable
+ fi
+}
+
+enable_events() {
+ echo 0 > tracing_on
+ # Enable common groups of events, as all events can allow for
+ # events to be traced via scheduling that we don't care to test.
+ enable_system syscalls
+ enable_system rcu
+ enable_system block
+ enable_system exceptions
+ enable_system irq
+ enable_system net
+ enable_system power
+ enable_system signal
+ enable_system sock
+ enable_system timer
+ enable_system thermal
+ echo 1 > tracing_on
+}
+
+if [ ! -f set_event -o ! -d events/sched ]; then
+ echo "event tracing is not supported"
+ exit_unsupported
+fi
+
+if [ ! -f set_event_pid -o ! -f set_event_notrace_pid ]; then
+ echo "event pid notrace filtering is not supported"
+ exit_unsupported
+fi
+
+echo 0 > options/event-fork
+
+do_reset
+
+read mypid rest < /proc/self/stat
+
+echo $mypid > set_event_notrace_pid
+grep -q $mypid set_event_notrace_pid
+
+enable_events
+
+yield
+
+echo 0 > tracing_on
+
+cnt=`count_pid $mypid`
+if [ $cnt -ne 0 ]; then
+ fail "Filtered out task has events"
+fi
+
+cnt=`count_no_pid $mypid`
+if [ $cnt -eq 0 ]; then
+ fail "No other events were recorded"
+fi
+
+do_reset
+
+echo $mypid > set_event_notrace_pid
+echo 1 > options/event-fork
+
+enable_events
+
+yield &
+child=$!
+echo "child = $child"
+wait $child
+
+echo 0 > tracing_on
+
+cnt=`count_pid $mypid`
+if [ $cnt -ne 0 ]; then
+ fail "Filtered out task has events"
+fi
+
+cnt=`count_pid $child`
+if [ $cnt -ne 0 ]; then
+ fail "Child of filtered out taskhas events"
+fi
+
+cnt=`count_no_pid $mypid`
+if [ $cnt -eq 0 ]; then
+ fail "No other events were recorded"
+fi
+
+do_reset
+
+exit 0
diff --git a/tools/testing/selftests/ftrace/test.d/ftrace/fgraph-filter-stack.tc b/tools/testing/selftests/ftrace/test.d/ftrace/fgraph-filter-stack.tc
index aefab0c66d54..f59853857ad2 100644
--- a/tools/testing/selftests/ftrace/test.d/ftrace/fgraph-filter-stack.tc
+++ b/tools/testing/selftests/ftrace/test.d/ftrace/fgraph-filter-stack.tc
@@ -10,10 +10,7 @@ if ! grep -q function_graph available_tracers; then
exit_unsupported
fi
-if [ ! -f set_ftrace_filter ]; then
- echo "set_ftrace_filter not found? Is dynamic ftrace not set?"
- exit_unsupported
-fi
+check_filter_file set_ftrace_filter
do_reset() {
if [ -e /proc/sys/kernel/stack_tracer_enabled ]; then
diff --git a/tools/testing/selftests/ftrace/test.d/ftrace/fgraph-filter.tc b/tools/testing/selftests/ftrace/test.d/ftrace/fgraph-filter.tc
index c8a5209f2119..d610f47edd90 100644
--- a/tools/testing/selftests/ftrace/test.d/ftrace/fgraph-filter.tc
+++ b/tools/testing/selftests/ftrace/test.d/ftrace/fgraph-filter.tc
@@ -9,6 +9,8 @@ if ! grep -q function_graph available_tracers; then
exit_unsupported
fi
+check_filter_file set_ftrace_filter
+
fail() { # msg
echo $1
exit_fail
diff --git a/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-glob.tc b/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-glob.tc
index f4e92afab14b..28936f434ee5 100644
--- a/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-glob.tc
+++ b/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-glob.tc
@@ -9,6 +9,8 @@ if ! grep -q function available_tracers; then
exit_unsupported
fi
+check_filter_file set_ftrace_filter
+
disable_tracing
clear_trace
diff --git a/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-notrace-pid.tc b/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-notrace-pid.tc
new file mode 100644
index 000000000000..71db68a7975f
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-notrace-pid.tc
@@ -0,0 +1,105 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# description: ftrace - function pid notrace filters
+# flags: instance
+
+# Make sure that function pid matching filter with notrace works.
+
+if ! grep -q function available_tracers; then
+ echo "no function tracer configured"
+ exit_unsupported
+fi
+
+if [ ! -f set_ftrace_notrace_pid ]; then
+ echo "set_ftrace_notrace_pid not found? Is function tracer not set?"
+ exit_unsupported
+fi
+
+check_filter_file set_ftrace_filter
+
+do_function_fork=1
+
+if [ ! -f options/function-fork ]; then
+ do_function_fork=0
+ echo "no option for function-fork found. Option will not be tested."
+fi
+
+read PID _ < /proc/self/stat
+
+if [ $do_function_fork -eq 1 ]; then
+ # default value of function-fork option
+ orig_value=`grep function-fork trace_options`
+fi
+
+do_reset() {
+ if [ $do_function_fork -eq 0 ]; then
+ return
+ fi
+
+ echo > set_ftrace_notrace_pid
+ echo $orig_value > trace_options
+}
+
+fail() { # msg
+ do_reset
+ echo $1
+ exit_fail
+}
+
+do_test() {
+ disable_tracing
+
+ echo do_execve* > set_ftrace_filter
+ echo *do_fork >> set_ftrace_filter
+
+ echo $PID > set_ftrace_notrace_pid
+ echo function > current_tracer
+
+ if [ $do_function_fork -eq 1 ]; then
+ # don't allow children to be traced
+ echo nofunction-fork > trace_options
+ fi
+
+ enable_tracing
+ yield
+
+ count_pid=`cat trace | grep -v ^# | grep $PID | wc -l`
+ count_other=`cat trace | grep -v ^# | grep -v $PID | wc -l`
+
+ # count_pid should be 0
+ if [ $count_pid -ne 0 -o $count_other -eq 0 ]; then
+ fail "PID filtering not working? traced task = $count_pid; other tasks = $count_other "
+ fi
+
+ disable_tracing
+ clear_trace
+
+ if [ $do_function_fork -eq 0 ]; then
+ return
+ fi
+
+ # allow children to be traced
+ echo function-fork > trace_options
+
+ # With pid in both set_ftrace_pid and set_ftrace_notrace_pid
+ # there should not be any tasks traced.
+
+ echo $PID > set_ftrace_pid
+
+ enable_tracing
+ yield
+
+ count_pid=`cat trace | grep -v ^# | grep $PID | wc -l`
+ count_other=`cat trace | grep -v ^# | grep -v $PID | wc -l`
+
+ # both should be zero
+ if [ $count_pid -ne 0 -o $count_other -ne 0 ]; then
+ fail "PID filtering not following fork? traced task = $count_pid; other tasks = $count_other "
+ fi
+}
+
+do_test
+
+do_reset
+
+exit 0
diff --git a/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-pid.tc b/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-pid.tc
index f2ee1e889e13..d58403c4b7cd 100644
--- a/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-pid.tc
+++ b/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-pid.tc
@@ -16,10 +16,7 @@ if [ ! -f set_ftrace_pid ]; then
exit_unsupported
fi
-if [ ! -f set_ftrace_filter ]; then
- echo "set_ftrace_filter not found? Is function tracer not set?"
- exit_unsupported
-fi
+check_filter_file set_ftrace_filter
do_function_fork=1
diff --git a/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-stacktrace.tc b/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-stacktrace.tc
index 1a52f2883fe0..b2aff786c1a2 100644
--- a/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-stacktrace.tc
+++ b/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-stacktrace.tc
@@ -3,7 +3,7 @@
# description: ftrace - stacktrace filter command
# flags: instance
-[ ! -f set_ftrace_filter ] && exit_unsupported
+check_filter_file set_ftrace_filter
echo _do_fork:stacktrace >> set_ftrace_filter
diff --git a/tools/testing/selftests/ftrace/test.d/ftrace/func_event_triggers.tc b/tools/testing/selftests/ftrace/test.d/ftrace/func_event_triggers.tc
index ca2ffd7957f9..e9b1fd534e96 100644
--- a/tools/testing/selftests/ftrace/test.d/ftrace/func_event_triggers.tc
+++ b/tools/testing/selftests/ftrace/test.d/ftrace/func_event_triggers.tc
@@ -11,10 +11,7 @@
#
# The triggers are set within the set_ftrace_filter file
-if [ ! -f set_ftrace_filter ]; then
- echo "set_ftrace_filter not found? Is dynamic ftrace not set?"
- exit_unsupported
-fi
+check_filter_file set_ftrace_filter
do_reset() {
reset_ftrace_filter
diff --git a/tools/testing/selftests/ftrace/test.d/ftrace/func_mod_trace.tc b/tools/testing/selftests/ftrace/test.d/ftrace/func_mod_trace.tc
index 9330c873f9fe..1a4b4a442d33 100644
--- a/tools/testing/selftests/ftrace/test.d/ftrace/func_mod_trace.tc
+++ b/tools/testing/selftests/ftrace/test.d/ftrace/func_mod_trace.tc
@@ -2,7 +2,7 @@
# SPDX-License-Identifier: GPL-2.0
# description: ftrace - function trace on module
-[ ! -f set_ftrace_filter ] && exit_unsupported
+check_filter_file set_ftrace_filter
: "mod: allows to filter a non exist function"
echo 'non_exist_func:mod:non_exist_module' > set_ftrace_filter
diff --git a/tools/testing/selftests/ftrace/test.d/ftrace/func_profiler.tc b/tools/testing/selftests/ftrace/test.d/ftrace/func_profiler.tc
index dfbae637c60c..a3dadb6b93b4 100644
--- a/tools/testing/selftests/ftrace/test.d/ftrace/func_profiler.tc
+++ b/tools/testing/selftests/ftrace/test.d/ftrace/func_profiler.tc
@@ -18,10 +18,7 @@ if ! grep -q function_graph available_tracers; then
exit_unsupported;
fi
-if [ ! -f set_ftrace_filter ]; then
- echo "set_ftrace_filter not found? Is dynamic ftrace not set?"
- exit_unsupported
-fi
+check_filter_file set_ftrace_filter
if [ ! -f function_profile_enabled ]; then
echo "function_profile_enabled not found, function profiling enabled?"
diff --git a/tools/testing/selftests/ftrace/test.d/ftrace/func_set_ftrace_file.tc b/tools/testing/selftests/ftrace/test.d/ftrace/func_set_ftrace_file.tc
index 51f6e6146bd9..70bad441fa7d 100644
--- a/tools/testing/selftests/ftrace/test.d/ftrace/func_set_ftrace_file.tc
+++ b/tools/testing/selftests/ftrace/test.d/ftrace/func_set_ftrace_file.tc
@@ -10,10 +10,7 @@
#
# The triggers are set within the set_ftrace_filter file
-if [ ! -f set_ftrace_filter ]; then
- echo "set_ftrace_filter not found? Is dynamic ftrace not set?"
- exit_unsupported
-fi
+check_filter_file set_ftrace_filter
fail() { # mesg
echo $1
diff --git a/tools/testing/selftests/ftrace/test.d/ftrace/func_stack_tracer.tc b/tools/testing/selftests/ftrace/test.d/ftrace/func_stack_tracer.tc
index b414f0e3c646..51e9e80bc0e6 100644
--- a/tools/testing/selftests/ftrace/test.d/ftrace/func_stack_tracer.tc
+++ b/tools/testing/selftests/ftrace/test.d/ftrace/func_stack_tracer.tc
@@ -8,6 +8,8 @@ if [ ! -f stack_trace ]; then
exit_unsupported
fi
+check_filter_file stack_trace_filter
+
echo > stack_trace_filter
echo 0 > stack_max_size
echo 1 > /proc/sys/kernel/stack_tracer_enabled
diff --git a/tools/testing/selftests/ftrace/test.d/ftrace/func_traceonoff_triggers.tc b/tools/testing/selftests/ftrace/test.d/ftrace/func_traceonoff_triggers.tc
index 0c04282d33dd..3ed173f2944a 100644
--- a/tools/testing/selftests/ftrace/test.d/ftrace/func_traceonoff_triggers.tc
+++ b/tools/testing/selftests/ftrace/test.d/ftrace/func_traceonoff_triggers.tc
@@ -11,10 +11,7 @@
#
# The triggers are set within the set_ftrace_filter file
-if [ ! -f set_ftrace_filter ]; then
- echo "set_ftrace_filter not found? Is dynamic ftrace not set?"
- exit_unsupported
-fi
+check_filter_file set_ftrace_filter
fail() { # mesg
echo $1
@@ -41,7 +38,7 @@ fi
echo '** ENABLE EVENTS'
-echo 1 > events/enable
+echo 1 > events/sched/enable
echo '** ENABLE TRACING'
enable_tracing
diff --git a/tools/testing/selftests/ftrace/test.d/ftrace/tracing-error-log.tc b/tools/testing/selftests/ftrace/test.d/ftrace/tracing-error-log.tc
index 021c03fd885d..23465823532b 100644
--- a/tools/testing/selftests/ftrace/test.d/ftrace/tracing-error-log.tc
+++ b/tools/testing/selftests/ftrace/test.d/ftrace/tracing-error-log.tc
@@ -14,6 +14,8 @@ if [ ! -f set_event ]; then
exit_unsupported
fi
+[ -f error_log ] || exit_unsupported
+
ftrace_errlog_check 'event filter parse error' '((sig >= 10 && sig < 15) || dsig ^== 17) && comm != bash' 'events/signal/signal_generate/filter'
exit 0
diff --git a/tools/testing/selftests/ftrace/test.d/functions b/tools/testing/selftests/ftrace/test.d/functions
index 5d4550591ff9..697c77ef2e2b 100644
--- a/tools/testing/selftests/ftrace/test.d/functions
+++ b/tools/testing/selftests/ftrace/test.d/functions
@@ -1,3 +1,9 @@
+check_filter_file() { # check filter file introduced by dynamic ftrace
+ if [ ! -f "$1" ]; then
+ echo "$1 not found? Is dynamic ftrace not set?"
+ exit_unsupported
+ fi
+}
clear_trace() { # reset trace output
echo > trace
@@ -113,12 +119,14 @@ yield() {
ping $LOCALHOST -c 1 || sleep .001 || usleep 1 || sleep 1
}
+# Since probe event command may include backslash, explicitly use printf "%s"
+# to NOT interpret it.
ftrace_errlog_check() { # err-prefix command-with-error-pos-by-^ command-file
- pos=$(echo -n "${2%^*}" | wc -c) # error position
- command=$(echo "$2" | tr -d ^)
+ pos=$(printf "%s" "${2%^*}" | wc -c) # error position
+ command=$(printf "%s" "$2" | tr -d ^)
echo "Test command: $command"
echo > error_log
- (! echo "$command" >> "$3" ) 2> /dev/null
+ (! printf "%s" "$command" >> "$3" ) 2> /dev/null
grep "$1: error:" -A 3 error_log
N=$(tail -n 1 error_log | wc -c)
# " Command: " and "^\n" => 13
diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_type.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_type.tc
index 1bcb67dcae26..81490ecaaa92 100644
--- a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_type.tc
+++ b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_type.tc
@@ -38,7 +38,7 @@ for width in 64 32 16 8; do
echo 0 > events/kprobes/testprobe/enable
: "Confirm the arguments is recorded in given types correctly"
- ARGS=`grep "testprobe" trace | sed -e 's/.* arg1=\(.*\) arg2=\(.*\) arg3=\(.*\) arg4=\(.*\)/\1 \2 \3 \4/'`
+ ARGS=`grep "testprobe" trace | head -n 1 | sed -e 's/.* arg1=\(.*\) arg2=\(.*\) arg3=\(.*\) arg4=\(.*\)/\1 \2 \3 \4/'`
check_types $ARGS $width
: "Clear event for next loop"
diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_ftrace.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_ftrace.tc
index 7650a82db3f5..df5072815b87 100644
--- a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_ftrace.tc
+++ b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_ftrace.tc
@@ -5,6 +5,8 @@
[ -f kprobe_events ] || exit_unsupported # this is configurable
grep "function" available_tracers || exit_unsupported # this is configurable
+check_filter_file set_ftrace_filter
+
# prepare
echo nop > current_tracer
echo _do_fork > set_ftrace_filter
diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_syntax_errors.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_syntax_errors.tc
index ef1e9bafb098..eb0f4ab4e070 100644
--- a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_syntax_errors.tc
+++ b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_syntax_errors.tc
@@ -91,7 +91,9 @@ esac
if grep -q "Create/append/" README && grep -q "imm-value" README; then
echo 'p:kprobes/testevent _do_fork' > kprobe_events
check_error '^r:kprobes/testevent do_exit' # DIFF_PROBE_TYPE
-echo 'p:kprobes/testevent _do_fork abcd=\1' > kprobe_events
+
+# Explicitly use printf "%s" to not interpret \1
+printf "%s" 'p:kprobes/testevent _do_fork abcd=\1' > kprobe_events
check_error 'p:kprobes/testevent _do_fork ^bcd=\1' # DIFF_ARG_TYPE
check_error 'p:kprobes/testevent _do_fork ^abcd=\1:u8' # DIFF_ARG_TYPE
check_error 'p:kprobes/testevent _do_fork ^abcd=\"foo"' # DIFF_ARG_TYPE
diff --git a/tools/testing/selftests/ftrace/test.d/preemptirq/irqsoff_tracer.tc b/tools/testing/selftests/ftrace/test.d/preemptirq/irqsoff_tracer.tc
index cbd174334a48..2b82c80edf69 100644
--- a/tools/testing/selftests/ftrace/test.d/preemptirq/irqsoff_tracer.tc
+++ b/tools/testing/selftests/ftrace/test.d/preemptirq/irqsoff_tracer.tc
@@ -17,7 +17,14 @@ unsup() { #msg
exit_unsupported
}
-modprobe $MOD || unsup "$MOD module not available"
+unres() { #msg
+ reset_tracer
+ rmmod $MOD || true
+ echo $1
+ exit_unresolved
+}
+
+modprobe $MOD || unres "$MOD module not available"
rmmod $MOD
grep -q "preemptoff" available_tracers || unsup "preemptoff tracer not enabled"
diff --git a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-field-variable-support.tc b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-field-variable-support.tc
index 77be6e1f6e7b..e232059a8ab2 100644
--- a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-field-variable-support.tc
+++ b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-field-variable-support.tc
@@ -17,6 +17,11 @@ if [ ! -f synthetic_events ]; then
exit_unsupported
fi
+if [ ! -f events/sched/sched_process_fork/hist ]; then
+ echo "hist trigger is not supported"
+ exit_unsupported
+fi
+
echo "Test field variable support"
echo 'wakeup_latency u64 lat; pid_t pid; int prio; char comm[16]' > synthetic_events
diff --git a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-inter-event-combined-hist.tc b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-inter-event-combined-hist.tc
index f3eb8aacec0e..07cfcb8157b6 100644
--- a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-inter-event-combined-hist.tc
+++ b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-inter-event-combined-hist.tc
@@ -17,6 +17,11 @@ if [ ! -f synthetic_events ]; then
exit_unsupported
fi
+if [ ! -f events/sched/sched_process_fork/hist ]; then
+ echo "hist trigger is not supported"
+ exit_unsupported
+fi
+
echo "Test create synthetic event"
echo 'waking_latency u64 lat pid_t pid' > synthetic_events
diff --git a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-multi-actions-accept.tc b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-multi-actions-accept.tc
index d281f056f980..73e413c2ca26 100644
--- a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-multi-actions-accept.tc
+++ b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-multi-actions-accept.tc
@@ -17,6 +17,11 @@ if [ ! -f synthetic_events ]; then
exit_unsupported
fi
+if [ ! -f events/sched/sched_process_fork/hist ]; then
+ echo "hist trigger is not supported"
+ exit_unsupported
+fi
+
echo "Test multiple actions on hist trigger"
echo 'wakeup_latency u64 lat; pid_t pid' >> synthetic_events
TRIGGER1=events/sched/sched_wakeup/trigger
diff --git a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-onmatch-action-hist.tc b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-onmatch-action-hist.tc
index a708f0e7858a..ebe0ad827f9f 100644
--- a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-onmatch-action-hist.tc
+++ b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-onmatch-action-hist.tc
@@ -17,6 +17,11 @@ if [ ! -f synthetic_events ]; then
exit_unsupported
fi
+if [ ! -f events/sched/sched_process_fork/hist ]; then
+ echo "hist trigger is not supported"
+ exit_unsupported
+fi
+
echo "Test create synthetic event"
echo 'wakeup_latency u64 lat pid_t pid char comm[16]' > synthetic_events
diff --git a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-onmatch-onmax-action-hist.tc b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-onmatch-onmax-action-hist.tc
index dfce6932d8be..2a2ef767249e 100644
--- a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-onmatch-onmax-action-hist.tc
+++ b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-onmatch-onmax-action-hist.tc
@@ -17,6 +17,11 @@ if [ ! -f synthetic_events ]; then
exit_unsupported
fi
+if [ ! -f events/sched/sched_process_fork/hist ]; then
+ echo "hist trigger is not supported"
+ exit_unsupported
+fi
+
echo "Test create synthetic event"
echo 'wakeup_latency u64 lat pid_t pid char comm[16]' > synthetic_events
diff --git a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-onmax-action-hist.tc b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-onmax-action-hist.tc
index 0035995c2194..98d73bfb0296 100644
--- a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-onmax-action-hist.tc
+++ b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-onmax-action-hist.tc
@@ -17,6 +17,11 @@ if [ ! -f synthetic_events ]; then
exit_unsupported
fi
+if [ ! -f events/sched/sched_process_fork/hist ]; then
+ echo "hist trigger is not supported"
+ exit_unsupported
+fi
+
echo "Test create synthetic event"
echo 'wakeup_latency u64 lat pid_t pid char comm[16]' > synthetic_events
diff --git a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-snapshot-action-hist.tc b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-snapshot-action-hist.tc
index f546c1b66a9b..01b01b9c4e07 100644
--- a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-snapshot-action-hist.tc
+++ b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-snapshot-action-hist.tc
@@ -12,6 +12,11 @@ if [ ! -f set_event ]; then
exit_unsupported
fi
+if [ ! -f events/sched/sched_process_fork/hist ]; then
+ echo "hist trigger is not supported"
+ exit_unsupported
+fi
+
if [ ! -f snapshot ]; then
echo "snapshot is not supported"
exit_unsupported
diff --git a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-trace-action-hist.tc b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-trace-action-hist.tc
index 8021d60aafec..c3baa486aeb4 100644
--- a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-trace-action-hist.tc
+++ b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-trace-action-hist.tc
@@ -17,6 +17,11 @@ if [ ! -f synthetic_events ]; then
exit_unsupported
fi
+if [ ! -f events/sched/sched_process_fork/hist ]; then
+ echo "hist trigger is not supported"
+ exit_unsupported
+fi
+
grep -q "trace(<synthetic_event>" README || exit_unsupported # version issue
echo "Test create synthetic event"
diff --git a/tools/testing/selftests/ftrace/test.d/trigger/trigger-multihist.tc b/tools/testing/selftests/ftrace/test.d/trigger/trigger-multihist.tc
index 18fdaab9f570..68ff3f45c720 100644
--- a/tools/testing/selftests/ftrace/test.d/trigger/trigger-multihist.tc
+++ b/tools/testing/selftests/ftrace/test.d/trigger/trigger-multihist.tc
@@ -23,7 +23,7 @@ if [ ! -f events/sched/sched_process_fork/hist ]; then
exit_unsupported
fi
-echo "Test histogram multiple tiggers"
+echo "Test histogram multiple triggers"
echo 'hist:keys=parent_pid:vals=child_pid' > events/sched/sched_process_fork/trigger
echo 'hist:keys=parent_comm:vals=child_pid' >> events/sched/sched_process_fork/trigger
diff --git a/tools/testing/selftests/futex/functional/.gitignore b/tools/testing/selftests/futex/functional/.gitignore
index a09f57061902..0efcd494daab 100644
--- a/tools/testing/selftests/futex/functional/.gitignore
+++ b/tools/testing/selftests/futex/functional/.gitignore
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
futex_requeue_pi
futex_requeue_pi_mismatched_ops
futex_requeue_pi_signal_restart
diff --git a/tools/testing/selftests/gen_kselftest_tar.sh b/tools/testing/selftests/gen_kselftest_tar.sh
index 8b2b6088540d..4a974bc03385 100755
--- a/tools/testing/selftests/gen_kselftest_tar.sh
+++ b/tools/testing/selftests/gen_kselftest_tar.sh
@@ -49,6 +49,11 @@ main()
# directory
./kselftest_install.sh "$install_dir"
(cd "$install_work"; tar $copts "$dest"/kselftest${ext} $install_name)
+
+ # Don't put the message at the actual end as people may be parsing the
+ # "archive created" line in their scripts.
+ echo -e "\nConsider using 'make gen_tar' instead of this script\n"
+
echo "Kselftest archive kselftest${ext} created!"
# clean up top-level install work directory
diff --git a/tools/testing/selftests/gpio/.gitignore b/tools/testing/selftests/gpio/.gitignore
index 7d14f743d1a4..4c69408f3e84 100644
--- a/tools/testing/selftests/gpio/.gitignore
+++ b/tools/testing/selftests/gpio/.gitignore
@@ -1 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0-only
gpio-mockup-chardev
diff --git a/tools/testing/selftests/gpio/Makefile b/tools/testing/selftests/gpio/Makefile
index 0bb80619db58..32bdc978a711 100644
--- a/tools/testing/selftests/gpio/Makefile
+++ b/tools/testing/selftests/gpio/Makefile
@@ -1,13 +1,13 @@
# SPDX-License-Identifier: GPL-2.0
-MOUNT_CFLAGS := $(shell pkg-config --cflags mount 2>/dev/null)
-MOUNT_LDLIBS := $(shell pkg-config --libs mount 2>/dev/null)
-ifeq ($(MOUNT_LDLIBS),)
-MOUNT_LDLIBS := -lmount -I/usr/include/libmount
+VAR_CFLAGS := $(shell pkg-config --cflags mount 2>/dev/null)
+VAR_LDLIBS := $(shell pkg-config --libs mount 2>/dev/null)
+ifeq ($(VAR_LDLIBS),)
+VAR_LDLIBS := -lmount -I/usr/include/libmount
endif
-CFLAGS += -O2 -g -std=gnu99 -Wall -I../../../../usr/include/ $(MOUNT_CFLAGS)
-LDLIBS += $(MOUNT_LDLIBS)
+CFLAGS += -O2 -g -std=gnu99 -Wall -I../../../../usr/include/ $(VAR_CFLAGS)
+LDLIBS += $(VAR_LDLIBS)
TEST_PROGS := gpio-mockup.sh
TEST_FILES := gpio-mockup-sysfs.sh
diff --git a/tools/testing/selftests/ia64/.gitignore b/tools/testing/selftests/ia64/.gitignore
index ab806edc8732..e962fb2a08d5 100644
--- a/tools/testing/selftests/ia64/.gitignore
+++ b/tools/testing/selftests/ia64/.gitignore
@@ -1 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0-only
aliasing-test
diff --git a/tools/testing/selftests/intel_pstate/.gitignore b/tools/testing/selftests/intel_pstate/.gitignore
index 3bfcbae5fa13..862de222a3f3 100644
--- a/tools/testing/selftests/intel_pstate/.gitignore
+++ b/tools/testing/selftests/intel_pstate/.gitignore
@@ -1,2 +1,3 @@
+# SPDX-License-Identifier: GPL-2.0-only
aperf
msr
diff --git a/tools/testing/selftests/intel_pstate/Makefile b/tools/testing/selftests/intel_pstate/Makefile
index 7340fd6a9a9f..39f0fa2a8fd6 100644
--- a/tools/testing/selftests/intel_pstate/Makefile
+++ b/tools/testing/selftests/intel_pstate/Makefile
@@ -1,6 +1,6 @@
# SPDX-License-Identifier: GPL-2.0
CFLAGS := $(CFLAGS) -Wall -D_GNU_SOURCE
-LDLIBS := $(LDLIBS) -lm
+LDLIBS += -lm
uname_M := $(shell uname -m 2>/dev/null || echo not)
ARCH ?= $(shell echo $(uname_M) | sed -e s/i.86/x86/ -e s/x86_64/x86/)
diff --git a/tools/testing/selftests/ipc/.gitignore b/tools/testing/selftests/ipc/.gitignore
index 9af04c9353c0..9ed280e4c704 100644
--- a/tools/testing/selftests/ipc/.gitignore
+++ b/tools/testing/selftests/ipc/.gitignore
@@ -1,2 +1,3 @@
+# SPDX-License-Identifier: GPL-2.0-only
msgque_test
msgque
diff --git a/tools/testing/selftests/ipc/msgque.c b/tools/testing/selftests/ipc/msgque.c
index 4c156aeab6b8..5ec4d9e18806 100644
--- a/tools/testing/selftests/ipc/msgque.c
+++ b/tools/testing/selftests/ipc/msgque.c
@@ -137,7 +137,7 @@ int dump_queue(struct msgque_data *msgque)
for (kern_id = 0; kern_id < 256; kern_id++) {
ret = msgctl(kern_id, MSG_STAT, &ds);
if (ret < 0) {
- if (errno == -EINVAL)
+ if (errno == EINVAL)
continue;
printf("Failed to get stats for IPC queue with id %d\n",
kern_id);
diff --git a/tools/testing/selftests/ir/.gitignore b/tools/testing/selftests/ir/.gitignore
index 070ea0c75fb8..0bbada8c1811 100644
--- a/tools/testing/selftests/ir/.gitignore
+++ b/tools/testing/selftests/ir/.gitignore
@@ -1 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0-only
ir_loopback
diff --git a/tools/testing/selftests/kcmp/.gitignore b/tools/testing/selftests/kcmp/.gitignore
index 5a9b3732b2de..38ccdfe80ef7 100644
--- a/tools/testing/selftests/kcmp/.gitignore
+++ b/tools/testing/selftests/kcmp/.gitignore
@@ -1,2 +1,3 @@
+# SPDX-License-Identifier: GPL-2.0-only
kcmp_test
kcmp-test-file
diff --git a/tools/testing/selftests/kmod/kmod.sh b/tools/testing/selftests/kmod/kmod.sh
index 8b944cf042f6..3702dbcc90a7 100755
--- a/tools/testing/selftests/kmod/kmod.sh
+++ b/tools/testing/selftests/kmod/kmod.sh
@@ -61,6 +61,8 @@ ALL_TESTS="$ALL_TESTS 0006:10:1"
ALL_TESTS="$ALL_TESTS 0007:5:1"
ALL_TESTS="$ALL_TESTS 0008:150:1"
ALL_TESTS="$ALL_TESTS 0009:150:1"
+ALL_TESTS="$ALL_TESTS 0010:1:1"
+ALL_TESTS="$ALL_TESTS 0011:1:1"
# Kselftest framework requirement - SKIP code is 4.
ksft_skip=4
@@ -149,6 +151,7 @@ function load_req_mod()
test_finish()
{
+ echo "$MODPROBE" > /proc/sys/kernel/modprobe
echo "Test completed"
}
@@ -443,6 +446,30 @@ kmod_test_0009()
config_expect_result ${FUNCNAME[0]} SUCCESS
}
+kmod_test_0010()
+{
+ kmod_defaults_driver
+ config_num_threads 1
+ echo "/KMOD_TEST_NONEXISTENT" > /proc/sys/kernel/modprobe
+ config_trigger ${FUNCNAME[0]}
+ config_expect_result ${FUNCNAME[0]} -ENOENT
+ echo "$MODPROBE" > /proc/sys/kernel/modprobe
+}
+
+kmod_test_0011()
+{
+ kmod_defaults_driver
+ config_num_threads 1
+ # This causes the kernel to not even try executing modprobe. The error
+ # code is still -ENOENT like when modprobe doesn't exist, so we can't
+ # easily test for the exact difference. But this still is a useful test
+ # since there was a bug where request_module() returned 0 in this case.
+ echo > /proc/sys/kernel/modprobe
+ config_trigger ${FUNCNAME[0]}
+ config_expect_result ${FUNCNAME[0]} -ENOENT
+ echo "$MODPROBE" > /proc/sys/kernel/modprobe
+}
+
list_tests()
{
echo "Test ID list:"
@@ -460,6 +487,8 @@ list_tests()
echo "0007 x $(get_test_count 0007) - multithreaded tests with default setup test request_module() and get_fs_type()"
echo "0008 x $(get_test_count 0008) - multithreaded - push kmod_concurrent over max_modprobes for request_module()"
echo "0009 x $(get_test_count 0009) - multithreaded - push kmod_concurrent over max_modprobes for get_fs_type()"
+ echo "0010 x $(get_test_count 0010) - test nonexistent modprobe path"
+ echo "0011 x $(get_test_count 0011) - test completely disabling module autoloading"
}
usage()
@@ -505,18 +534,23 @@ function test_num()
fi
}
-function get_test_count()
+function get_test_data()
{
test_num $1
- TEST_DATA=$(echo $ALL_TESTS | awk '{print $'$1'}')
+ local field_num=$(echo $1 | sed 's/^0*//')
+ echo $ALL_TESTS | awk '{print $'$field_num'}'
+}
+
+function get_test_count()
+{
+ TEST_DATA=$(get_test_data $1)
LAST_TWO=${TEST_DATA#*:*}
echo ${LAST_TWO%:*}
}
function get_test_enabled()
{
- test_num $1
- TEST_DATA=$(echo $ALL_TESTS | awk '{print $'$1'}')
+ TEST_DATA=$(get_test_data $1)
echo ${TEST_DATA#*:*:}
}
@@ -611,6 +645,7 @@ test_reqs
allow_user_defaults
load_req_mod
+MODPROBE=$(</proc/sys/kernel/modprobe)
trap "test_finish" EXIT
parse_args $@
diff --git a/tools/testing/selftests/kselftest/runner.sh b/tools/testing/selftests/kselftest/runner.sh
index e84d901f8567..676b3a8b114d 100644
--- a/tools/testing/selftests/kselftest/runner.sh
+++ b/tools/testing/selftests/kselftest/runner.sh
@@ -33,7 +33,7 @@ tap_timeout()
{
# Make sure tests will time out if utility is available.
if [ -x /usr/bin/timeout ] ; then
- /usr/bin/timeout "$kselftest_timeout" "$1"
+ /usr/bin/timeout --foreground "$kselftest_timeout" "$1"
else
"$1"
fi
diff --git a/tools/testing/selftests/kselftest_deps.sh b/tools/testing/selftests/kselftest_deps.sh
new file mode 100755
index 000000000000..bbc04646346b
--- /dev/null
+++ b/tools/testing/selftests/kselftest_deps.sh
@@ -0,0 +1,272 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+# kselftest_deps.sh
+#
+# Checks for kselftest build dependencies on the build system.
+# Copyright (c) 2020 Shuah Khan <skhan@linuxfoundation.org>
+#
+#
+
+usage()
+{
+
+echo -e "Usage: $0 -[p] <compiler> [test_name]\n"
+echo -e "\tkselftest_deps.sh [-p] gcc"
+echo -e "\tkselftest_deps.sh [-p] gcc vm"
+echo -e "\tkselftest_deps.sh [-p] aarch64-linux-gnu-gcc"
+echo -e "\tkselftest_deps.sh [-p] aarch64-linux-gnu-gcc vm\n"
+echo "- Should be run in selftests directory in the kernel repo."
+echo "- Checks if Kselftests can be built/cross-built on a system."
+echo "- Parses all test/sub-test Makefile to find library dependencies."
+echo "- Runs compile test on a trivial C file with LDLIBS specified"
+echo " in the test Makefiles to identify missing library dependencies."
+echo "- Prints suggested target list for a system filtering out tests"
+echo " failed the build dependency check from the TARGETS in Selftests"
+echo " main Makefile when optional -p is specified."
+echo "- Prints pass/fail dependency check for each tests/sub-test."
+echo "- Prints pass/fail targets and libraries."
+echo "- Default: runs dependency checks on all tests."
+echo "- Optional test name can be specified to check dependencies for it."
+exit 1
+
+}
+
+# Start main()
+main()
+{
+
+base_dir=`pwd`
+# Make sure we're in the selftests top-level directory.
+if [ $(basename "$base_dir") != "selftests" ]; then
+ echo -e "\tPlease run $0 in"
+ echo -e "\ttools/testing/selftests directory ..."
+ exit 1
+fi
+
+print_targets=0
+
+while getopts "p" arg; do
+ case $arg in
+ p)
+ print_targets=1
+ shift;;
+ esac
+done
+
+if [ $# -eq 0 ]
+then
+ usage
+fi
+
+# Compiler
+CC=$1
+
+tmp_file=$(mktemp).c
+trap "rm -f $tmp_file.o $tmp_file $tmp_file.bin" EXIT
+#echo $tmp_file
+
+pass=$(mktemp).out
+trap "rm -f $pass" EXIT
+#echo $pass
+
+fail=$(mktemp).out
+trap "rm -f $fail" EXIT
+#echo $fail
+
+# Generate tmp source fire for compile test
+cat << "EOF" > $tmp_file
+int main()
+{
+}
+EOF
+
+# Save results
+total_cnt=0
+fail_trgts=()
+fail_libs=()
+fail_cnt=0
+pass_trgts=()
+pass_libs=()
+pass_cnt=0
+
+# Get all TARGETS from selftests Makefile
+targets=$(egrep "^TARGETS +|^TARGETS =" Makefile | cut -d "=" -f2)
+
+# Single test case
+if [ $# -eq 2 ]
+then
+ test=$2/Makefile
+
+ l1_test $test
+ l2_test $test
+ l3_test $test
+
+ print_results $1 $2
+ exit $?
+fi
+
+# Level 1: LDLIBS set static.
+#
+# Find all LDLIBS set statically for all executables built by a Makefile
+# and filter out VAR_LDLIBS to discard the following:
+# gpio/Makefile:LDLIBS += $(VAR_LDLIBS)
+# Append space at the end of the list to append more tests.
+
+l1_tests=$(grep -r --include=Makefile "^LDLIBS" | \
+ grep -v "VAR_LDLIBS" | awk -F: '{print $1}')
+
+# Level 2: LDLIBS set dynamically.
+#
+# Level 2
+# Some tests have multiple valid LDLIBS lines for individual sub-tests
+# that need dependency checks. Find them and append them to the tests
+# e.g: vm/Makefile:$(OUTPUT)/userfaultfd: LDLIBS += -lpthread
+# Filter out VAR_LDLIBS to discard the following:
+# memfd/Makefile:$(OUTPUT)/fuse_mnt: LDLIBS += $(VAR_LDLIBS)
+# Append space at the end of the list to append more tests.
+
+l2_tests=$(grep -r --include=Makefile ": LDLIBS" | \
+ grep -v "VAR_LDLIBS" | awk -F: '{print $1}')
+
+# Level 3
+# gpio, memfd and others use pkg-config to find mount and fuse libs
+# respectively and save it in VAR_LDLIBS. If pkg-config doesn't find
+# any, VAR_LDLIBS set to default.
+# Use the default value and filter out pkg-config for dependency check.
+# e.g:
+# gpio/Makefile
+# VAR_LDLIBS := $(shell pkg-config --libs mount) 2>/dev/null)
+# memfd/Makefile
+# VAR_LDLIBS := $(shell pkg-config fuse --libs 2>/dev/null)
+
+l3_tests=$(grep -r --include=Makefile "^VAR_LDLIBS" | \
+ grep -v "pkg-config" | awk -F: '{print $1}')
+
+#echo $l1_tests
+#echo $l2_1_tests
+#echo $l3_tests
+
+all_tests
+print_results $1 $2
+
+exit $?
+}
+# end main()
+
+all_tests()
+{
+ for test in $l1_tests; do
+ l1_test $test
+ done
+
+ for test in $l2_tests; do
+ l2_test $test
+ done
+
+ for test in $l3_tests; do
+ l3_test $test
+ done
+}
+
+# Use same parsing used for l1_tests and pick libraries this time.
+l1_test()
+{
+ test_libs=$(grep --include=Makefile "^LDLIBS" $test | \
+ grep -v "VAR_LDLIBS" | \
+ sed -e 's/\:/ /' | \
+ sed -e 's/+/ /' | cut -d "=" -f 2)
+
+ check_libs $test $test_libs
+}
+
+# Use same parsing used for l2__tests and pick libraries this time.
+l2_test()
+{
+ test_libs=$(grep --include=Makefile ": LDLIBS" $test | \
+ grep -v "VAR_LDLIBS" | \
+ sed -e 's/\:/ /' | sed -e 's/+/ /' | \
+ cut -d "=" -f 2)
+
+ check_libs $test $test_libs
+}
+
+l3_test()
+{
+ test_libs=$(grep --include=Makefile "^VAR_LDLIBS" $test | \
+ grep -v "pkg-config" | sed -e 's/\:/ /' |
+ sed -e 's/+/ /' | cut -d "=" -f 2)
+
+ check_libs $test $test_libs
+}
+
+check_libs()
+{
+
+if [[ ! -z "${test_libs// }" ]]
+then
+
+ #echo $test_libs
+
+ for lib in $test_libs; do
+
+ let total_cnt+=1
+ $CC -o $tmp_file.bin $lib $tmp_file > /dev/null 2>&1
+ if [ $? -ne 0 ]; then
+ echo "FAIL: $test dependency check: $lib" >> $fail
+ let fail_cnt+=1
+ fail_libs+="$lib "
+ fail_target=$(echo "$test" | cut -d "/" -f1)
+ fail_trgts+="$fail_target "
+ targets=$(echo "$targets" | grep -v "$fail_target")
+ else
+ echo "PASS: $test dependency check passed $lib" >> $pass
+ let pass_cnt+=1
+ pass_libs+="$lib "
+ pass_trgts+="$(echo "$test" | cut -d "/" -f1) "
+ fi
+
+ done
+fi
+}
+
+print_results()
+{
+ echo -e "========================================================";
+ echo -e "Kselftest Dependency Check for [$0 $1 $2] results..."
+
+ if [ $print_targets -ne 0 ]
+ then
+ echo -e "Suggested Selftest Targets for your configuration:"
+ echo -e "$targets";
+ fi
+
+ echo -e "========================================================";
+ echo -e "Checked tests defining LDLIBS dependencies"
+ echo -e "--------------------------------------------------------";
+ echo -e "Total tests with Dependencies:"
+ echo -e "$total_cnt Pass: $pass_cnt Fail: $fail_cnt";
+
+ if [ $pass_cnt -ne 0 ]; then
+ echo -e "--------------------------------------------------------";
+ cat $pass
+ echo -e "--------------------------------------------------------";
+ echo -e "Targets passed build dependency check on system:"
+ echo -e "$(echo "$pass_trgts" | xargs -n1 | sort -u | xargs)"
+ fi
+
+ if [ $fail_cnt -ne 0 ]; then
+ echo -e "--------------------------------------------------------";
+ cat $fail
+ echo -e "--------------------------------------------------------";
+ echo -e "Targets failed build dependency check on system:"
+ echo -e "$(echo "$fail_trgts" | xargs -n1 | sort -u | xargs)"
+ echo -e "--------------------------------------------------------";
+ echo -e "Missing libraries system"
+ echo -e "$(echo "$fail_libs" | xargs -n1 | sort -u | xargs)"
+ fi
+
+ echo -e "--------------------------------------------------------";
+ echo -e "========================================================";
+}
+
+main "$@"
diff --git a/tools/testing/selftests/kselftest_harness.h b/tools/testing/selftests/kselftest_harness.h
index 5336b26506ab..c9f03ef93338 100644
--- a/tools/testing/selftests/kselftest_harness.h
+++ b/tools/testing/selftests/kselftest_harness.h
@@ -168,9 +168,17 @@
#define __TEST_IMPL(test_name, _signal) \
static void test_name(struct __test_metadata *_metadata); \
+ static inline void wrapper_##test_name( \
+ struct __test_metadata *_metadata, \
+ struct __fixture_variant_metadata *variant) \
+ { \
+ test_name(_metadata); \
+ } \
static struct __test_metadata _##test_name##_object = \
- { .name = "global." #test_name, \
- .fn = &test_name, .termsig = _signal, \
+ { .name = #test_name, \
+ .fn = &wrapper_##test_name, \
+ .fixture = &_fixture_global, \
+ .termsig = _signal, \
.timeout = TEST_TIMEOUT_DEFAULT, }; \
static void __attribute__((constructor)) _register_##test_name(void) \
{ \
@@ -212,10 +220,13 @@
* populated and cleaned up using FIXTURE_SETUP() and FIXTURE_TEARDOWN().
*/
#define FIXTURE(fixture_name) \
+ FIXTURE_VARIANT(fixture_name); \
+ static struct __fixture_metadata _##fixture_name##_fixture_object = \
+ { .name = #fixture_name, }; \
static void __attribute__((constructor)) \
_register_##fixture_name##_data(void) \
{ \
- __fixture_count++; \
+ __register_fixture(&_##fixture_name##_fixture_object); \
} \
FIXTURE_DATA(fixture_name)
@@ -241,7 +252,10 @@
#define FIXTURE_SETUP(fixture_name) \
void fixture_name##_setup( \
struct __test_metadata __attribute__((unused)) *_metadata, \
- FIXTURE_DATA(fixture_name) __attribute__((unused)) *self)
+ FIXTURE_DATA(fixture_name) __attribute__((unused)) *self, \
+ const FIXTURE_VARIANT(fixture_name) \
+ __attribute__((unused)) *variant)
+
/**
* FIXTURE_TEARDOWN(fixture_name)
* *_metadata* is included so that EXPECT_* and ASSERT_* work correctly.
@@ -264,6 +278,59 @@
FIXTURE_DATA(fixture_name) __attribute__((unused)) *self)
/**
+ * FIXTURE_VARIANT(fixture_name) - Optionally called once per fixture
+ * to declare fixture variant
+ *
+ * @fixture_name: fixture name
+ *
+ * .. code-block:: c
+ *
+ * FIXTURE_VARIANT(datatype name) {
+ * type property1;
+ * ...
+ * };
+ *
+ * Defines type of constant parameters provided to FIXTURE_SETUP() and TEST_F()
+ * as *variant*. Variants allow the same tests to be run with different
+ * arguments.
+ */
+#define FIXTURE_VARIANT(fixture_name) struct _fixture_variant_##fixture_name
+
+/**
+ * FIXTURE_VARIANT_ADD(fixture_name, variant_name) - Called once per fixture
+ * variant to setup and register the data
+ *
+ * @fixture_name: fixture name
+ * @variant_name: name of the parameter set
+ *
+ * .. code-block:: c
+ *
+ * FIXTURE_ADD(datatype name) {
+ * .property1 = val1;
+ * ...
+ * };
+ *
+ * Defines a variant of the test fixture, provided to FIXTURE_SETUP() and
+ * TEST_F() as *variant*. Tests of each fixture will be run once for each
+ * variant.
+ */
+#define FIXTURE_VARIANT_ADD(fixture_name, variant_name) \
+ extern FIXTURE_VARIANT(fixture_name) \
+ _##fixture_name##_##variant_name##_variant; \
+ static struct __fixture_variant_metadata \
+ _##fixture_name##_##variant_name##_object = \
+ { .name = #variant_name, \
+ .data = &_##fixture_name##_##variant_name##_variant}; \
+ static void __attribute__((constructor)) \
+ _register_##fixture_name##_##variant_name(void) \
+ { \
+ __register_fixture_variant(&_##fixture_name##_fixture_object, \
+ &_##fixture_name##_##variant_name##_object); \
+ } \
+ FIXTURE_VARIANT(fixture_name) \
+ _##fixture_name##_##variant_name##_variant =
+
+/**
* TEST_F(fixture_name, test_name) - Emits test registration and helpers for
* fixture-based test cases
*
@@ -293,24 +360,27 @@
#define __TEST_F_IMPL(fixture_name, test_name, signal, tmout) \
static void fixture_name##_##test_name( \
struct __test_metadata *_metadata, \
- FIXTURE_DATA(fixture_name) *self); \
+ FIXTURE_DATA(fixture_name) *self, \
+ const FIXTURE_VARIANT(fixture_name) *variant); \
static inline void wrapper_##fixture_name##_##test_name( \
- struct __test_metadata *_metadata) \
+ struct __test_metadata *_metadata, \
+ struct __fixture_variant_metadata *variant) \
{ \
/* fixture data is alloced, setup, and torn down per call. */ \
FIXTURE_DATA(fixture_name) self; \
memset(&self, 0, sizeof(FIXTURE_DATA(fixture_name))); \
- fixture_name##_setup(_metadata, &self); \
+ fixture_name##_setup(_metadata, &self, variant->data); \
/* Let setup failure terminate early. */ \
if (!_metadata->passed) \
return; \
- fixture_name##_##test_name(_metadata, &self); \
+ fixture_name##_##test_name(_metadata, &self, variant->data); \
fixture_name##_teardown(_metadata, &self); \
} \
static struct __test_metadata \
_##fixture_name##_##test_name##_object = { \
- .name = #fixture_name "." #test_name, \
+ .name = #test_name, \
.fn = &wrapper_##fixture_name##_##test_name, \
+ .fixture = &_##fixture_name##_fixture_object, \
.termsig = signal, \
.timeout = tmout, \
}; \
@@ -321,7 +391,9 @@
} \
static void fixture_name##_##test_name( \
struct __test_metadata __attribute__((unused)) *_metadata, \
- FIXTURE_DATA(fixture_name) __attribute__((unused)) *self)
+ FIXTURE_DATA(fixture_name) __attribute__((unused)) *self, \
+ const FIXTURE_VARIANT(fixture_name) \
+ __attribute__((unused)) *variant)
/**
* TEST_HARNESS_MAIN - Simple wrapper to run the test harness
@@ -631,28 +703,84 @@
} \
} while (0); OPTIONAL_HANDLER(_assert)
+/* List helpers */
+#define __LIST_APPEND(head, item) \
+{ \
+ /* Circular linked list where only prev is circular. */ \
+ if (head == NULL) { \
+ head = item; \
+ item->next = NULL; \
+ item->prev = item; \
+ return; \
+ } \
+ if (__constructor_order == _CONSTRUCTOR_ORDER_FORWARD) { \
+ item->next = NULL; \
+ item->prev = head->prev; \
+ item->prev->next = item; \
+ head->prev = item; \
+ } else { \
+ item->next = head; \
+ item->next->prev = item; \
+ item->prev = item; \
+ head = item; \
+ } \
+}
+
+struct __test_metadata;
+struct __fixture_variant_metadata;
+
+/* Contains all the information about a fixture. */
+struct __fixture_metadata {
+ const char *name;
+ struct __test_metadata *tests;
+ struct __fixture_variant_metadata *variant;
+ struct __fixture_metadata *prev, *next;
+} _fixture_global __attribute__((unused)) = {
+ .name = "global",
+ .prev = &_fixture_global,
+};
+
+static struct __fixture_metadata *__fixture_list = &_fixture_global;
+static int __constructor_order;
+
+#define _CONSTRUCTOR_ORDER_FORWARD 1
+#define _CONSTRUCTOR_ORDER_BACKWARD -1
+
+static inline void __register_fixture(struct __fixture_metadata *f)
+{
+ __LIST_APPEND(__fixture_list, f);
+}
+
+struct __fixture_variant_metadata {
+ const char *name;
+ const void *data;
+ struct __fixture_variant_metadata *prev, *next;
+};
+
+static inline void
+__register_fixture_variant(struct __fixture_metadata *f,
+ struct __fixture_variant_metadata *variant)
+{
+ __LIST_APPEND(f->variant, variant);
+}
+
/* Contains all the information for test execution and status checking. */
struct __test_metadata {
const char *name;
- void (*fn)(struct __test_metadata *);
+ void (*fn)(struct __test_metadata *,
+ struct __fixture_variant_metadata *);
+ pid_t pid; /* pid of test when being run */
+ struct __fixture_metadata *fixture;
int termsig;
int passed;
int trigger; /* extra handler after the evaluation */
- int timeout;
+ int timeout; /* seconds to wait for test timeout */
+ bool timed_out; /* did this test timeout instead of exiting? */
__u8 step;
bool no_print; /* manual trigger when TH_LOG_STREAM is not available */
struct __test_metadata *prev, *next;
};
-/* Storage for the (global) tests to be run. */
-static struct __test_metadata *__test_list;
-static unsigned int __test_count;
-static unsigned int __fixture_count;
-static int __constructor_order;
-
-#define _CONSTRUCTOR_ORDER_FORWARD 1
-#define _CONSTRUCTOR_ORDER_BACKWARD -1
-
/*
* Since constructors are called in reverse order, reverse the test
* list so tests are run in source declaration order.
@@ -664,25 +792,7 @@ static int __constructor_order;
*/
static inline void __register_test(struct __test_metadata *t)
{
- __test_count++;
- /* Circular linked list where only prev is circular. */
- if (__test_list == NULL) {
- __test_list = t;
- t->next = NULL;
- t->prev = t;
- return;
- }
- if (__constructor_order == _CONSTRUCTOR_ORDER_FORWARD) {
- t->next = NULL;
- t->prev = __test_list->prev;
- t->prev->next = t;
- __test_list->prev = t;
- } else {
- t->next = __test_list;
- t->next->prev = t;
- t->prev = t;
- __test_list = t;
- }
+ __LIST_APPEND(t->fixture->tests, t);
}
static inline int __bail(int for_realz, bool no_print, __u8 step)
@@ -695,84 +805,160 @@ static inline int __bail(int for_realz, bool no_print, __u8 step)
return 0;
}
-void __run_test(struct __test_metadata *t)
+struct __test_metadata *__active_test;
+static void __timeout_handler(int sig, siginfo_t *info, void *ucontext)
+{
+ struct __test_metadata *t = __active_test;
+
+ /* Sanity check handler execution environment. */
+ if (!t) {
+ fprintf(TH_LOG_STREAM,
+ "no active test in SIGALRM handler!?\n");
+ abort();
+ }
+ if (sig != SIGALRM || sig != info->si_signo) {
+ fprintf(TH_LOG_STREAM,
+ "%s: SIGALRM handler caught signal %d!?\n",
+ t->name, sig != SIGALRM ? sig : info->si_signo);
+ abort();
+ }
+
+ t->timed_out = true;
+ kill(t->pid, SIGKILL);
+}
+
+void __wait_for_test(struct __test_metadata *t)
{
- pid_t child_pid;
+ struct sigaction action = {
+ .sa_sigaction = __timeout_handler,
+ .sa_flags = SA_SIGINFO,
+ };
+ struct sigaction saved_action;
int status;
+ if (sigaction(SIGALRM, &action, &saved_action)) {
+ t->passed = 0;
+ fprintf(TH_LOG_STREAM,
+ "%s: unable to install SIGALRM handler\n",
+ t->name);
+ return;
+ }
+ __active_test = t;
+ t->timed_out = false;
+ alarm(t->timeout);
+ waitpid(t->pid, &status, 0);
+ alarm(0);
+ if (sigaction(SIGALRM, &saved_action, NULL)) {
+ t->passed = 0;
+ fprintf(TH_LOG_STREAM,
+ "%s: unable to uninstall SIGALRM handler\n",
+ t->name);
+ return;
+ }
+ __active_test = NULL;
+
+ if (t->timed_out) {
+ t->passed = 0;
+ fprintf(TH_LOG_STREAM,
+ "%s: Test terminated by timeout\n", t->name);
+ } else if (WIFEXITED(status)) {
+ t->passed = t->termsig == -1 ? !WEXITSTATUS(status) : 0;
+ if (t->termsig != -1) {
+ fprintf(TH_LOG_STREAM,
+ "%s: Test exited normally "
+ "instead of by signal (code: %d)\n",
+ t->name,
+ WEXITSTATUS(status));
+ } else if (!t->passed) {
+ fprintf(TH_LOG_STREAM,
+ "%s: Test failed at step #%d\n",
+ t->name,
+ WEXITSTATUS(status));
+ }
+ } else if (WIFSIGNALED(status)) {
+ t->passed = 0;
+ if (WTERMSIG(status) == SIGABRT) {
+ fprintf(TH_LOG_STREAM,
+ "%s: Test terminated by assertion\n",
+ t->name);
+ } else if (WTERMSIG(status) == t->termsig) {
+ t->passed = 1;
+ } else {
+ fprintf(TH_LOG_STREAM,
+ "%s: Test terminated unexpectedly "
+ "by signal %d\n",
+ t->name,
+ WTERMSIG(status));
+ }
+ } else {
+ fprintf(TH_LOG_STREAM,
+ "%s: Test ended in some other way [%u]\n",
+ t->name,
+ status);
+ }
+}
+
+void __run_test(struct __fixture_metadata *f,
+ struct __fixture_variant_metadata *variant,
+ struct __test_metadata *t)
+{
+ /* reset test struct */
t->passed = 1;
t->trigger = 0;
- printf("[ RUN ] %s\n", t->name);
- alarm(t->timeout);
- child_pid = fork();
- if (child_pid < 0) {
+ t->step = 0;
+ t->no_print = 0;
+
+ printf("[ RUN ] %s%s%s.%s\n",
+ f->name, variant->name[0] ? "." : "", variant->name, t->name);
+ t->pid = fork();
+ if (t->pid < 0) {
printf("ERROR SPAWNING TEST CHILD\n");
t->passed = 0;
- } else if (child_pid == 0) {
- t->fn(t);
+ } else if (t->pid == 0) {
+ t->fn(t, variant);
/* return the step that failed or 0 */
_exit(t->passed ? 0 : t->step);
} else {
- /* TODO(wad) add timeout support. */
- waitpid(child_pid, &status, 0);
- if (WIFEXITED(status)) {
- t->passed = t->termsig == -1 ? !WEXITSTATUS(status) : 0;
- if (t->termsig != -1) {
- fprintf(TH_LOG_STREAM,
- "%s: Test exited normally "
- "instead of by signal (code: %d)\n",
- t->name,
- WEXITSTATUS(status));
- } else if (!t->passed) {
- fprintf(TH_LOG_STREAM,
- "%s: Test failed at step #%d\n",
- t->name,
- WEXITSTATUS(status));
- }
- } else if (WIFSIGNALED(status)) {
- t->passed = 0;
- if (WTERMSIG(status) == SIGABRT) {
- fprintf(TH_LOG_STREAM,
- "%s: Test terminated by assertion\n",
- t->name);
- } else if (WTERMSIG(status) == t->termsig) {
- t->passed = 1;
- } else {
- fprintf(TH_LOG_STREAM,
- "%s: Test terminated unexpectedly "
- "by signal %d\n",
- t->name,
- WTERMSIG(status));
- }
- } else {
- fprintf(TH_LOG_STREAM,
- "%s: Test ended in some other way [%u]\n",
- t->name,
- status);
- }
+ __wait_for_test(t);
}
- printf("[ %4s ] %s\n", (t->passed ? "OK" : "FAIL"), t->name);
- alarm(0);
+ printf("[ %4s ] %s%s%s.%s\n", (t->passed ? "OK" : "FAIL"),
+ f->name, variant->name[0] ? "." : "", variant->name, t->name);
}
static int test_harness_run(int __attribute__((unused)) argc,
char __attribute__((unused)) **argv)
{
+ struct __fixture_variant_metadata no_variant = { .name = "", };
+ struct __fixture_variant_metadata *v;
+ struct __fixture_metadata *f;
struct __test_metadata *t;
int ret = 0;
+ unsigned int case_count = 0, test_count = 0;
unsigned int count = 0;
unsigned int pass_count = 0;
+ for (f = __fixture_list; f; f = f->next) {
+ for (v = f->variant ?: &no_variant; v; v = v->next) {
+ case_count++;
+ for (t = f->tests; t; t = t->next)
+ test_count++;
+ }
+ }
+
/* TODO(wad) add optional arguments similar to gtest. */
printf("[==========] Running %u tests from %u test cases.\n",
- __test_count, __fixture_count + 1);
- for (t = __test_list; t; t = t->next) {
- count++;
- __run_test(t);
- if (t->passed)
- pass_count++;
- else
- ret = 1;
+ test_count, case_count);
+ for (f = __fixture_list; f; f = f->next) {
+ for (v = f->variant ?: &no_variant; v; v = v->next) {
+ for (t = f->tests; t; t = t->next) {
+ count++;
+ __run_test(f, v, t);
+ if (t->passed)
+ pass_count++;
+ else
+ ret = 1;
+ }
+ }
}
printf("[==========] %u / %u tests passed.\n", pass_count, count);
printf("[ %s ]\n", (ret ? "FAILED" : "PASSED"));
diff --git a/tools/testing/selftests/kvm/.gitignore b/tools/testing/selftests/kvm/.gitignore
index 30072c3f52fb..f159718f90c0 100644
--- a/tools/testing/selftests/kvm/.gitignore
+++ b/tools/testing/selftests/kvm/.gitignore
@@ -1,5 +1,7 @@
-/s390x/sync_regs_test
+# SPDX-License-Identifier: GPL-2.0-only
/s390x/memop
+/s390x/resets
+/s390x/sync_regs_test
/x86_64/cr4_cpuid_sync_test
/x86_64/evmcs_test
/x86_64/hyperv_cpuid
@@ -8,6 +10,8 @@
/x86_64/set_sregs_test
/x86_64/smm_test
/x86_64/state_test
+/x86_64/vmx_preemption_timer_test
+/x86_64/svm_vmcall_test
/x86_64/sync_regs_test
/x86_64/vmx_close_while_nested_test
/x86_64/vmx_dirty_log_test
@@ -15,5 +19,8 @@
/x86_64/vmx_tsc_adjust_test
/x86_64/xss_msr_test
/clear_dirty_log_test
+/demand_paging_test
/dirty_log_test
/kvm_create_max_vcpus
+/set_memory_region_test
+/steal_time
diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile
index d91c53b726e6..b4ff112e5c7e 100644
--- a/tools/testing/selftests/kvm/Makefile
+++ b/tools/testing/selftests/kvm/Makefile
@@ -5,9 +5,35 @@ all:
top_srcdir = ../../../..
KSFT_KHDR_INSTALL := 1
+
+# For cross-builds to work, UNAME_M has to map to ARCH and arch specific
+# directories and targets in this Makefile. "uname -m" doesn't map to
+# arch specific sub-directory names.
+#
+# UNAME_M variable to used to run the compiles pointing to the right arch
+# directories and build the right targets for these supported architectures.
+#
+# TEST_GEN_PROGS and LIBKVM are set using UNAME_M variable.
+# LINUX_TOOL_ARCH_INCLUDE is set using ARCH variable.
+#
+# x86_64 targets are named to include x86_64 as a suffix and directories
+# for includes are in x86_64 sub-directory. s390x and aarch64 follow the
+# same convention. "uname -m" doesn't result in the correct mapping for
+# s390x and aarch64.
+#
+# No change necessary for x86_64
UNAME_M := $(shell uname -m)
-LIBKVM = lib/assert.c lib/elf.c lib/io.c lib/kvm_util.c lib/sparsebit.c
+# Set UNAME_M for arm64 compile/install to work
+ifeq ($(ARCH),arm64)
+ UNAME_M := aarch64
+endif
+# Set UNAME_M s390x compile/install to work
+ifeq ($(ARCH),s390)
+ UNAME_M := s390x
+endif
+
+LIBKVM = lib/assert.c lib/elf.c lib/io.c lib/kvm_util.c lib/sparsebit.c lib/test_util.c
LIBKVM_x86_64 = lib/x86_64/processor.c lib/x86_64/vmx.c lib/x86_64/svm.c lib/x86_64/ucall.c
LIBKVM_aarch64 = lib/aarch64/processor.c lib/aarch64/ucall.c
LIBKVM_s390x = lib/s390x/processor.c lib/s390x/ucall.c
@@ -20,26 +46,36 @@ TEST_GEN_PROGS_x86_64 += x86_64/platform_info_test
TEST_GEN_PROGS_x86_64 += x86_64/set_sregs_test
TEST_GEN_PROGS_x86_64 += x86_64/smm_test
TEST_GEN_PROGS_x86_64 += x86_64/state_test
+TEST_GEN_PROGS_x86_64 += x86_64/vmx_preemption_timer_test
+TEST_GEN_PROGS_x86_64 += x86_64/svm_vmcall_test
TEST_GEN_PROGS_x86_64 += x86_64/sync_regs_test
TEST_GEN_PROGS_x86_64 += x86_64/vmx_close_while_nested_test
TEST_GEN_PROGS_x86_64 += x86_64/vmx_dirty_log_test
TEST_GEN_PROGS_x86_64 += x86_64/vmx_set_nested_state_test
TEST_GEN_PROGS_x86_64 += x86_64/vmx_tsc_adjust_test
TEST_GEN_PROGS_x86_64 += x86_64/xss_msr_test
-TEST_GEN_PROGS_x86_64 += x86_64/svm_vmcall_test
+TEST_GEN_PROGS_x86_64 += x86_64/debug_regs
TEST_GEN_PROGS_x86_64 += clear_dirty_log_test
+TEST_GEN_PROGS_x86_64 += demand_paging_test
TEST_GEN_PROGS_x86_64 += dirty_log_test
TEST_GEN_PROGS_x86_64 += kvm_create_max_vcpus
+TEST_GEN_PROGS_x86_64 += set_memory_region_test
+TEST_GEN_PROGS_x86_64 += steal_time
TEST_GEN_PROGS_aarch64 += clear_dirty_log_test
+TEST_GEN_PROGS_aarch64 += demand_paging_test
TEST_GEN_PROGS_aarch64 += dirty_log_test
TEST_GEN_PROGS_aarch64 += kvm_create_max_vcpus
+TEST_GEN_PROGS_aarch64 += set_memory_region_test
+TEST_GEN_PROGS_aarch64 += steal_time
TEST_GEN_PROGS_s390x = s390x/memop
-TEST_GEN_PROGS_s390x += s390x/sync_regs_test
TEST_GEN_PROGS_s390x += s390x/resets
+TEST_GEN_PROGS_s390x += s390x/sync_regs_test
+TEST_GEN_PROGS_s390x += demand_paging_test
TEST_GEN_PROGS_s390x += dirty_log_test
TEST_GEN_PROGS_s390x += kvm_create_max_vcpus
+TEST_GEN_PROGS_s390x += set_memory_region_test
TEST_GEN_PROGS += $(TEST_GEN_PROGS_$(UNAME_M))
LIBKVM += $(LIBKVM_$(UNAME_M))
@@ -47,7 +83,7 @@ LIBKVM += $(LIBKVM_$(UNAME_M))
INSTALL_HDR_PATH = $(top_srcdir)/usr
LINUX_HDR_PATH = $(INSTALL_HDR_PATH)/include/
LINUX_TOOL_INCLUDE = $(top_srcdir)/tools/include
-LINUX_TOOL_ARCH_INCLUDE = $(top_srcdir)/tools/arch/x86/include
+LINUX_TOOL_ARCH_INCLUDE = $(top_srcdir)/tools/arch/$(ARCH)/include
CFLAGS += -Wall -Wstrict-prototypes -Wuninitialized -O2 -g -std=gnu99 \
-fno-stack-protector -fno-PIE -I$(LINUX_TOOL_INCLUDE) \
-I$(LINUX_TOOL_ARCH_INCLUDE) -I$(LINUX_HDR_PATH) -Iinclude \
@@ -78,6 +114,7 @@ $(LIBKVM_OBJ): $(OUTPUT)/%.o: %.c
$(OUTPUT)/libkvm.a: $(LIBKVM_OBJ)
$(AR) crs $@ $^
+x := $(shell mkdir -p $(sort $(dir $(TEST_GEN_PROGS))))
all: $(STATIC_LIBS)
$(TEST_GEN_PROGS): $(STATIC_LIBS)
diff --git a/tools/testing/selftests/kvm/clear_dirty_log_test.c b/tools/testing/selftests/kvm/clear_dirty_log_test.c
index 749336937d37..11672ec6f74e 100644
--- a/tools/testing/selftests/kvm/clear_dirty_log_test.c
+++ b/tools/testing/selftests/kvm/clear_dirty_log_test.c
@@ -1,2 +1,6 @@
#define USE_CLEAR_DIRTY_LOG
+#define KVM_DIRTY_LOG_MANUAL_PROTECT_ENABLE (1 << 0)
+#define KVM_DIRTY_LOG_INITIALLY_SET (1 << 1)
+#define KVM_DIRTY_LOG_MANUAL_CAPS (KVM_DIRTY_LOG_MANUAL_PROTECT_ENABLE | \
+ KVM_DIRTY_LOG_INITIALLY_SET)
#include "dirty_log_test.c"
diff --git a/tools/testing/selftests/kvm/demand_paging_test.c b/tools/testing/selftests/kvm/demand_paging_test.c
new file mode 100644
index 000000000000..360cd3ea4cd6
--- /dev/null
+++ b/tools/testing/selftests/kvm/demand_paging_test.c
@@ -0,0 +1,661 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * KVM demand paging test
+ * Adapted from dirty_log_test.c
+ *
+ * Copyright (C) 2018, Red Hat, Inc.
+ * Copyright (C) 2019, Google, Inc.
+ */
+
+#define _GNU_SOURCE /* for program_invocation_name */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/syscall.h>
+#include <unistd.h>
+#include <asm/unistd.h>
+#include <time.h>
+#include <poll.h>
+#include <pthread.h>
+#include <linux/bitmap.h>
+#include <linux/bitops.h>
+#include <linux/userfaultfd.h>
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "processor.h"
+
+#ifdef __NR_userfaultfd
+
+/* The memory slot index demand page */
+#define TEST_MEM_SLOT_INDEX 1
+
+/* Default guest test virtual memory offset */
+#define DEFAULT_GUEST_TEST_MEM 0xc0000000
+
+#define DEFAULT_GUEST_TEST_MEM_SIZE (1 << 30) /* 1G */
+
+#ifdef PRINT_PER_PAGE_UPDATES
+#define PER_PAGE_DEBUG(...) printf(__VA_ARGS__)
+#else
+#define PER_PAGE_DEBUG(...) _no_printf(__VA_ARGS__)
+#endif
+
+#ifdef PRINT_PER_VCPU_UPDATES
+#define PER_VCPU_DEBUG(...) printf(__VA_ARGS__)
+#else
+#define PER_VCPU_DEBUG(...) _no_printf(__VA_ARGS__)
+#endif
+
+#define MAX_VCPUS 512
+
+/*
+ * Guest/Host shared variables. Ensure addr_gva2hva() and/or
+ * sync_global_to/from_guest() are used when accessing from
+ * the host. READ/WRITE_ONCE() should also be used with anything
+ * that may change.
+ */
+static uint64_t host_page_size;
+static uint64_t guest_page_size;
+
+static char *guest_data_prototype;
+
+/*
+ * Guest physical memory offset of the testing memory slot.
+ * This will be set to the topmost valid physical address minus
+ * the test memory size.
+ */
+static uint64_t guest_test_phys_mem;
+
+/*
+ * Guest virtual memory offset of the testing memory slot.
+ * Must not conflict with identity mapped test code.
+ */
+static uint64_t guest_test_virt_mem = DEFAULT_GUEST_TEST_MEM;
+
+struct vcpu_args {
+ uint64_t gva;
+ uint64_t pages;
+
+ /* Only used by the host userspace part of the vCPU thread */
+ int vcpu_id;
+ struct kvm_vm *vm;
+};
+
+static struct vcpu_args vcpu_args[MAX_VCPUS];
+
+/*
+ * Continuously write to the first 8 bytes of each page in the demand paging
+ * memory region.
+ */
+static void guest_code(uint32_t vcpu_id)
+{
+ uint64_t gva;
+ uint64_t pages;
+ int i;
+
+ /* Make sure vCPU args data structure is not corrupt. */
+ GUEST_ASSERT(vcpu_args[vcpu_id].vcpu_id == vcpu_id);
+
+ gva = vcpu_args[vcpu_id].gva;
+ pages = vcpu_args[vcpu_id].pages;
+
+ for (i = 0; i < pages; i++) {
+ uint64_t addr = gva + (i * guest_page_size);
+
+ addr &= ~(host_page_size - 1);
+ *(uint64_t *)addr = 0x0123456789ABCDEF;
+ }
+
+ GUEST_SYNC(1);
+}
+
+static void *vcpu_worker(void *data)
+{
+ int ret;
+ struct vcpu_args *args = (struct vcpu_args *)data;
+ struct kvm_vm *vm = args->vm;
+ int vcpu_id = args->vcpu_id;
+ struct kvm_run *run;
+ struct timespec start, end, ts_diff;
+
+ vcpu_args_set(vm, vcpu_id, 1, vcpu_id);
+ run = vcpu_state(vm, vcpu_id);
+
+ clock_gettime(CLOCK_MONOTONIC, &start);
+
+ /* Let the guest access its memory */
+ ret = _vcpu_run(vm, vcpu_id);
+ TEST_ASSERT(ret == 0, "vcpu_run failed: %d\n", ret);
+ if (get_ucall(vm, vcpu_id, NULL) != UCALL_SYNC) {
+ TEST_ASSERT(false,
+ "Invalid guest sync status: exit_reason=%s\n",
+ exit_reason_str(run->exit_reason));
+ }
+
+ clock_gettime(CLOCK_MONOTONIC, &end);
+ ts_diff = timespec_sub(end, start);
+ PER_VCPU_DEBUG("vCPU %d execution time: %ld.%.9lds\n", vcpu_id,
+ ts_diff.tv_sec, ts_diff.tv_nsec);
+
+ return NULL;
+}
+
+#define PAGE_SHIFT_4K 12
+#define PTES_PER_4K_PT 512
+
+static struct kvm_vm *create_vm(enum vm_guest_mode mode, int vcpus,
+ uint64_t vcpu_memory_bytes)
+{
+ struct kvm_vm *vm;
+ uint64_t pages = DEFAULT_GUEST_PHY_PAGES;
+
+ /* Account for a few pages per-vCPU for stacks */
+ pages += DEFAULT_STACK_PGS * vcpus;
+
+ /*
+ * Reserve twice the ammount of memory needed to map the test region and
+ * the page table / stacks region, at 4k, for page tables. Do the
+ * calculation with 4K page size: the smallest of all archs. (e.g., 64K
+ * page size guest will need even less memory for page tables).
+ */
+ pages += (2 * pages) / PTES_PER_4K_PT;
+ pages += ((2 * vcpus * vcpu_memory_bytes) >> PAGE_SHIFT_4K) /
+ PTES_PER_4K_PT;
+ pages = vm_adjust_num_guest_pages(mode, pages);
+
+ pr_info("Testing guest mode: %s\n", vm_guest_mode_string(mode));
+
+ vm = _vm_create(mode, pages, O_RDWR);
+ kvm_vm_elf_load(vm, program_invocation_name, 0, 0);
+#ifdef __x86_64__
+ vm_create_irqchip(vm);
+#endif
+ return vm;
+}
+
+static int handle_uffd_page_request(int uffd, uint64_t addr)
+{
+ pid_t tid;
+ struct timespec start;
+ struct timespec end;
+ struct uffdio_copy copy;
+ int r;
+
+ tid = syscall(__NR_gettid);
+
+ copy.src = (uint64_t)guest_data_prototype;
+ copy.dst = addr;
+ copy.len = host_page_size;
+ copy.mode = 0;
+
+ clock_gettime(CLOCK_MONOTONIC, &start);
+
+ r = ioctl(uffd, UFFDIO_COPY, &copy);
+ if (r == -1) {
+ pr_info("Failed Paged in 0x%lx from thread %d with errno: %d\n",
+ addr, tid, errno);
+ return r;
+ }
+
+ clock_gettime(CLOCK_MONOTONIC, &end);
+
+ PER_PAGE_DEBUG("UFFDIO_COPY %d \t%ld ns\n", tid,
+ timespec_to_ns(timespec_sub(end, start)));
+ PER_PAGE_DEBUG("Paged in %ld bytes at 0x%lx from thread %d\n",
+ host_page_size, addr, tid);
+
+ return 0;
+}
+
+bool quit_uffd_thread;
+
+struct uffd_handler_args {
+ int uffd;
+ int pipefd;
+ useconds_t delay;
+};
+
+static void *uffd_handler_thread_fn(void *arg)
+{
+ struct uffd_handler_args *uffd_args = (struct uffd_handler_args *)arg;
+ int uffd = uffd_args->uffd;
+ int pipefd = uffd_args->pipefd;
+ useconds_t delay = uffd_args->delay;
+ int64_t pages = 0;
+ struct timespec start, end, ts_diff;
+
+ clock_gettime(CLOCK_MONOTONIC, &start);
+ while (!quit_uffd_thread) {
+ struct uffd_msg msg;
+ struct pollfd pollfd[2];
+ char tmp_chr;
+ int r;
+ uint64_t addr;
+
+ pollfd[0].fd = uffd;
+ pollfd[0].events = POLLIN;
+ pollfd[1].fd = pipefd;
+ pollfd[1].events = POLLIN;
+
+ r = poll(pollfd, 2, -1);
+ switch (r) {
+ case -1:
+ pr_info("poll err");
+ continue;
+ case 0:
+ continue;
+ case 1:
+ break;
+ default:
+ pr_info("Polling uffd returned %d", r);
+ return NULL;
+ }
+
+ if (pollfd[0].revents & POLLERR) {
+ pr_info("uffd revents has POLLERR");
+ return NULL;
+ }
+
+ if (pollfd[1].revents & POLLIN) {
+ r = read(pollfd[1].fd, &tmp_chr, 1);
+ TEST_ASSERT(r == 1,
+ "Error reading pipefd in UFFD thread\n");
+ return NULL;
+ }
+
+ if (!pollfd[0].revents & POLLIN)
+ continue;
+
+ r = read(uffd, &msg, sizeof(msg));
+ if (r == -1) {
+ if (errno == EAGAIN)
+ continue;
+ pr_info("Read of uffd gor errno %d", errno);
+ return NULL;
+ }
+
+ if (r != sizeof(msg)) {
+ pr_info("Read on uffd returned unexpected size: %d bytes", r);
+ return NULL;
+ }
+
+ if (!(msg.event & UFFD_EVENT_PAGEFAULT))
+ continue;
+
+ if (delay)
+ usleep(delay);
+ addr = msg.arg.pagefault.address;
+ r = handle_uffd_page_request(uffd, addr);
+ if (r < 0)
+ return NULL;
+ pages++;
+ }
+
+ clock_gettime(CLOCK_MONOTONIC, &end);
+ ts_diff = timespec_sub(end, start);
+ PER_VCPU_DEBUG("userfaulted %ld pages over %ld.%.9lds. (%f/sec)\n",
+ pages, ts_diff.tv_sec, ts_diff.tv_nsec,
+ pages / ((double)ts_diff.tv_sec + (double)ts_diff.tv_nsec / 100000000.0));
+
+ return NULL;
+}
+
+static int setup_demand_paging(struct kvm_vm *vm,
+ pthread_t *uffd_handler_thread, int pipefd,
+ useconds_t uffd_delay,
+ struct uffd_handler_args *uffd_args,
+ void *hva, uint64_t len)
+{
+ int uffd;
+ struct uffdio_api uffdio_api;
+ struct uffdio_register uffdio_register;
+
+ uffd = syscall(__NR_userfaultfd, O_CLOEXEC | O_NONBLOCK);
+ if (uffd == -1) {
+ pr_info("uffd creation failed\n");
+ return -1;
+ }
+
+ uffdio_api.api = UFFD_API;
+ uffdio_api.features = 0;
+ if (ioctl(uffd, UFFDIO_API, &uffdio_api) == -1) {
+ pr_info("ioctl uffdio_api failed\n");
+ return -1;
+ }
+
+ uffdio_register.range.start = (uint64_t)hva;
+ uffdio_register.range.len = len;
+ uffdio_register.mode = UFFDIO_REGISTER_MODE_MISSING;
+ if (ioctl(uffd, UFFDIO_REGISTER, &uffdio_register) == -1) {
+ pr_info("ioctl uffdio_register failed\n");
+ return -1;
+ }
+
+ if ((uffdio_register.ioctls & UFFD_API_RANGE_IOCTLS) !=
+ UFFD_API_RANGE_IOCTLS) {
+ pr_info("unexpected userfaultfd ioctl set\n");
+ return -1;
+ }
+
+ uffd_args->uffd = uffd;
+ uffd_args->pipefd = pipefd;
+ uffd_args->delay = uffd_delay;
+ pthread_create(uffd_handler_thread, NULL, uffd_handler_thread_fn,
+ uffd_args);
+
+ PER_VCPU_DEBUG("Created uffd thread for HVA range [%p, %p)\n",
+ hva, hva + len);
+
+ return 0;
+}
+
+static void run_test(enum vm_guest_mode mode, bool use_uffd,
+ useconds_t uffd_delay, int vcpus,
+ uint64_t vcpu_memory_bytes)
+{
+ pthread_t *vcpu_threads;
+ pthread_t *uffd_handler_threads = NULL;
+ struct uffd_handler_args *uffd_args = NULL;
+ struct timespec start, end, ts_diff;
+ int *pipefds = NULL;
+ struct kvm_vm *vm;
+ uint64_t guest_num_pages;
+ int vcpu_id;
+ int r;
+
+ vm = create_vm(mode, vcpus, vcpu_memory_bytes);
+
+ guest_page_size = vm_get_page_size(vm);
+
+ TEST_ASSERT(vcpu_memory_bytes % guest_page_size == 0,
+ "Guest memory size is not guest page size aligned.");
+
+ guest_num_pages = (vcpus * vcpu_memory_bytes) / guest_page_size;
+ guest_num_pages = vm_adjust_num_guest_pages(mode, guest_num_pages);
+
+ /*
+ * If there should be more memory in the guest test region than there
+ * can be pages in the guest, it will definitely cause problems.
+ */
+ TEST_ASSERT(guest_num_pages < vm_get_max_gfn(vm),
+ "Requested more guest memory than address space allows.\n"
+ " guest pages: %lx max gfn: %x vcpus: %d wss: %lx]\n",
+ guest_num_pages, vm_get_max_gfn(vm), vcpus,
+ vcpu_memory_bytes);
+
+ host_page_size = getpagesize();
+ TEST_ASSERT(vcpu_memory_bytes % host_page_size == 0,
+ "Guest memory size is not host page size aligned.");
+
+ guest_test_phys_mem = (vm_get_max_gfn(vm) - guest_num_pages) *
+ guest_page_size;
+ guest_test_phys_mem &= ~(host_page_size - 1);
+
+#ifdef __s390x__
+ /* Align to 1M (segment size) */
+ guest_test_phys_mem &= ~((1 << 20) - 1);
+#endif
+
+ pr_info("guest physical test memory offset: 0x%lx\n", guest_test_phys_mem);
+
+ /* Add an extra memory slot for testing demand paging */
+ vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS,
+ guest_test_phys_mem,
+ TEST_MEM_SLOT_INDEX,
+ guest_num_pages, 0);
+
+ /* Do mapping for the demand paging memory slot */
+ virt_map(vm, guest_test_virt_mem, guest_test_phys_mem, guest_num_pages, 0);
+
+ ucall_init(vm, NULL);
+
+ guest_data_prototype = malloc(host_page_size);
+ TEST_ASSERT(guest_data_prototype,
+ "Failed to allocate buffer for guest data pattern");
+ memset(guest_data_prototype, 0xAB, host_page_size);
+
+ vcpu_threads = malloc(vcpus * sizeof(*vcpu_threads));
+ TEST_ASSERT(vcpu_threads, "Memory allocation failed");
+
+ if (use_uffd) {
+ uffd_handler_threads =
+ malloc(vcpus * sizeof(*uffd_handler_threads));
+ TEST_ASSERT(uffd_handler_threads, "Memory allocation failed");
+
+ uffd_args = malloc(vcpus * sizeof(*uffd_args));
+ TEST_ASSERT(uffd_args, "Memory allocation failed");
+
+ pipefds = malloc(sizeof(int) * vcpus * 2);
+ TEST_ASSERT(pipefds, "Unable to allocate memory for pipefd");
+ }
+
+ for (vcpu_id = 0; vcpu_id < vcpus; vcpu_id++) {
+ vm_paddr_t vcpu_gpa;
+ void *vcpu_hva;
+
+ vm_vcpu_add_default(vm, vcpu_id, guest_code);
+
+ vcpu_gpa = guest_test_phys_mem + (vcpu_id * vcpu_memory_bytes);
+ PER_VCPU_DEBUG("Added VCPU %d with test mem gpa [%lx, %lx)\n",
+ vcpu_id, vcpu_gpa, vcpu_gpa + vcpu_memory_bytes);
+
+ /* Cache the HVA pointer of the region */
+ vcpu_hva = addr_gpa2hva(vm, vcpu_gpa);
+
+ if (use_uffd) {
+ /*
+ * Set up user fault fd to handle demand paging
+ * requests.
+ */
+ r = pipe2(&pipefds[vcpu_id * 2],
+ O_CLOEXEC | O_NONBLOCK);
+ TEST_ASSERT(!r, "Failed to set up pipefd");
+
+ r = setup_demand_paging(vm,
+ &uffd_handler_threads[vcpu_id],
+ pipefds[vcpu_id * 2],
+ uffd_delay, &uffd_args[vcpu_id],
+ vcpu_hva, vcpu_memory_bytes);
+ if (r < 0)
+ exit(-r);
+ }
+
+#ifdef __x86_64__
+ vcpu_set_cpuid(vm, vcpu_id, kvm_get_supported_cpuid());
+#endif
+
+ vcpu_args[vcpu_id].vm = vm;
+ vcpu_args[vcpu_id].vcpu_id = vcpu_id;
+ vcpu_args[vcpu_id].gva = guest_test_virt_mem +
+ (vcpu_id * vcpu_memory_bytes);
+ vcpu_args[vcpu_id].pages = vcpu_memory_bytes / guest_page_size;
+ }
+
+ /* Export the shared variables to the guest */
+ sync_global_to_guest(vm, host_page_size);
+ sync_global_to_guest(vm, guest_page_size);
+ sync_global_to_guest(vm, vcpu_args);
+
+ pr_info("Finished creating vCPUs and starting uffd threads\n");
+
+ clock_gettime(CLOCK_MONOTONIC, &start);
+
+ for (vcpu_id = 0; vcpu_id < vcpus; vcpu_id++) {
+ pthread_create(&vcpu_threads[vcpu_id], NULL, vcpu_worker,
+ &vcpu_args[vcpu_id]);
+ }
+
+ pr_info("Started all vCPUs\n");
+
+ /* Wait for the vcpu threads to quit */
+ for (vcpu_id = 0; vcpu_id < vcpus; vcpu_id++) {
+ pthread_join(vcpu_threads[vcpu_id], NULL);
+ PER_VCPU_DEBUG("Joined thread for vCPU %d\n", vcpu_id);
+ }
+
+ pr_info("All vCPU threads joined\n");
+
+ clock_gettime(CLOCK_MONOTONIC, &end);
+
+ if (use_uffd) {
+ char c;
+
+ /* Tell the user fault fd handler threads to quit */
+ for (vcpu_id = 0; vcpu_id < vcpus; vcpu_id++) {
+ r = write(pipefds[vcpu_id * 2 + 1], &c, 1);
+ TEST_ASSERT(r == 1, "Unable to write to pipefd");
+
+ pthread_join(uffd_handler_threads[vcpu_id], NULL);
+ }
+ }
+
+ ts_diff = timespec_sub(end, start);
+ pr_info("Total guest execution time: %ld.%.9lds\n",
+ ts_diff.tv_sec, ts_diff.tv_nsec);
+ pr_info("Overall demand paging rate: %f pgs/sec\n",
+ guest_num_pages / ((double)ts_diff.tv_sec + (double)ts_diff.tv_nsec / 100000000.0));
+
+ ucall_uninit(vm);
+ kvm_vm_free(vm);
+
+ free(guest_data_prototype);
+ free(vcpu_threads);
+ if (use_uffd) {
+ free(uffd_handler_threads);
+ free(uffd_args);
+ free(pipefds);
+ }
+}
+
+struct guest_mode {
+ bool supported;
+ bool enabled;
+};
+static struct guest_mode guest_modes[NUM_VM_MODES];
+
+#define guest_mode_init(mode, supported, enabled) ({ \
+ guest_modes[mode] = (struct guest_mode){ supported, enabled }; \
+})
+
+static void help(char *name)
+{
+ int i;
+
+ puts("");
+ printf("usage: %s [-h] [-m mode] [-u] [-d uffd_delay_usec]\n"
+ " [-b memory] [-v vcpus]\n", name);
+ printf(" -m: specify the guest mode ID to test\n"
+ " (default: test all supported modes)\n"
+ " This option may be used multiple times.\n"
+ " Guest mode IDs:\n");
+ for (i = 0; i < NUM_VM_MODES; ++i) {
+ printf(" %d: %s%s\n", i, vm_guest_mode_string(i),
+ guest_modes[i].supported ? " (supported)" : "");
+ }
+ printf(" -u: use User Fault FD to handle vCPU page\n"
+ " faults.\n");
+ printf(" -d: add a delay in usec to the User Fault\n"
+ " FD handler to simulate demand paging\n"
+ " overheads. Ignored without -u.\n");
+ printf(" -b: specify the size of the memory region which should be\n"
+ " demand paged by each vCPU. e.g. 10M or 3G.\n"
+ " Default: 1G\n");
+ printf(" -v: specify the number of vCPUs to run.\n");
+ puts("");
+ exit(0);
+}
+
+int main(int argc, char *argv[])
+{
+ bool mode_selected = false;
+ uint64_t vcpu_memory_bytes = DEFAULT_GUEST_TEST_MEM_SIZE;
+ int vcpus = 1;
+ unsigned int mode;
+ int opt, i;
+ bool use_uffd = false;
+ useconds_t uffd_delay = 0;
+
+#ifdef __x86_64__
+ guest_mode_init(VM_MODE_PXXV48_4K, true, true);
+#endif
+#ifdef __aarch64__
+ guest_mode_init(VM_MODE_P40V48_4K, true, true);
+ guest_mode_init(VM_MODE_P40V48_64K, true, true);
+ {
+ unsigned int limit = kvm_check_cap(KVM_CAP_ARM_VM_IPA_SIZE);
+
+ if (limit >= 52)
+ guest_mode_init(VM_MODE_P52V48_64K, true, true);
+ if (limit >= 48) {
+ guest_mode_init(VM_MODE_P48V48_4K, true, true);
+ guest_mode_init(VM_MODE_P48V48_64K, true, true);
+ }
+ }
+#endif
+#ifdef __s390x__
+ guest_mode_init(VM_MODE_P40V48_4K, true, true);
+#endif
+
+ while ((opt = getopt(argc, argv, "hm:ud:b:v:")) != -1) {
+ switch (opt) {
+ case 'm':
+ if (!mode_selected) {
+ for (i = 0; i < NUM_VM_MODES; ++i)
+ guest_modes[i].enabled = false;
+ mode_selected = true;
+ }
+ mode = strtoul(optarg, NULL, 10);
+ TEST_ASSERT(mode < NUM_VM_MODES,
+ "Guest mode ID %d too big", mode);
+ guest_modes[mode].enabled = true;
+ break;
+ case 'u':
+ use_uffd = true;
+ break;
+ case 'd':
+ uffd_delay = strtoul(optarg, NULL, 0);
+ TEST_ASSERT(uffd_delay >= 0,
+ "A negative UFFD delay is not supported.");
+ break;
+ case 'b':
+ vcpu_memory_bytes = parse_size(optarg);
+ break;
+ case 'v':
+ vcpus = atoi(optarg);
+ TEST_ASSERT(vcpus > 0,
+ "Must have a positive number of vCPUs");
+ TEST_ASSERT(vcpus <= MAX_VCPUS,
+ "This test does not currently support\n"
+ "more than %d vCPUs.", MAX_VCPUS);
+ break;
+ case 'h':
+ default:
+ help(argv[0]);
+ break;
+ }
+ }
+
+ for (i = 0; i < NUM_VM_MODES; ++i) {
+ if (!guest_modes[i].enabled)
+ continue;
+ TEST_ASSERT(guest_modes[i].supported,
+ "Guest mode ID %d (%s) not supported.",
+ i, vm_guest_mode_string(i));
+ run_test(i, use_uffd, uffd_delay, vcpus, vcpu_memory_bytes);
+ }
+
+ return 0;
+}
+
+#else /* __NR_userfaultfd */
+
+#warning "missing __NR_userfaultfd definition"
+
+int main(void)
+{
+ print_skip("__NR_userfaultfd must be present for userfaultfd test");
+ return KSFT_SKIP;
+}
+
+#endif /* __NR_userfaultfd */
diff --git a/tools/testing/selftests/kvm/dirty_log_test.c b/tools/testing/selftests/kvm/dirty_log_test.c
index 5614222a6628..752ec158ac59 100644
--- a/tools/testing/selftests/kvm/dirty_log_test.c
+++ b/tools/testing/selftests/kvm/dirty_log_test.c
@@ -166,24 +166,22 @@ static void *vcpu_worker(void *data)
pages_count += TEST_PAGES_PER_LOOP;
generate_random_array(guest_array, TEST_PAGES_PER_LOOP);
} else {
- TEST_ASSERT(false,
- "Invalid guest sync status: "
- "exit_reason=%s\n",
- exit_reason_str(run->exit_reason));
+ TEST_FAIL("Invalid guest sync status: "
+ "exit_reason=%s\n",
+ exit_reason_str(run->exit_reason));
}
}
- DEBUG("Dirtied %"PRIu64" pages\n", pages_count);
+ pr_info("Dirtied %"PRIu64" pages\n", pages_count);
return NULL;
}
-static void vm_dirty_log_verify(unsigned long *bmap)
+static void vm_dirty_log_verify(enum vm_guest_mode mode, unsigned long *bmap)
{
+ uint64_t step = vm_num_host_pages(mode, 1);
uint64_t page;
uint64_t *value_ptr;
- uint64_t step = host_page_size >= guest_page_size ? 1 :
- guest_page_size / host_page_size;
for (page = 0; page < host_num_pages; page += step) {
value_ptr = host_test_mem + page * host_page_size;
@@ -252,6 +250,8 @@ static struct kvm_vm *create_vm(enum vm_guest_mode mode, uint32_t vcpuid,
struct kvm_vm *vm;
uint64_t extra_pg_pages = extra_mem_pages / 512 * 2;
+ pr_info("Testing guest mode: %s\n", vm_guest_mode_string(mode));
+
vm = _vm_create(mode, DEFAULT_GUEST_PHY_PAGES + extra_pg_pages, O_RDWR);
kvm_vm_elf_load(vm, program_invocation_name, 0, 0);
#ifdef __x86_64__
@@ -264,6 +264,10 @@ static struct kvm_vm *create_vm(enum vm_guest_mode mode, uint32_t vcpuid,
#define DIRTY_MEM_BITS 30 /* 1G */
#define PAGE_SHIFT_4K 12
+#ifdef USE_CLEAR_DIRTY_LOG
+static u64 dirty_log_manual_caps;
+#endif
+
static void run_test(enum vm_guest_mode mode, unsigned long iterations,
unsigned long interval, uint64_t phys_offset)
{
@@ -289,14 +293,11 @@ static void run_test(enum vm_guest_mode mode, unsigned long iterations,
* case where the size is not aligned to 64 pages.
*/
guest_num_pages = (1ul << (DIRTY_MEM_BITS -
- vm_get_page_shift(vm))) + 16;
-#ifdef __s390x__
- /* Round up to multiple of 1M (segment size) */
- guest_num_pages = (guest_num_pages + 0xff) & ~0xffUL;
-#endif
+ vm_get_page_shift(vm))) + 3;
+ guest_num_pages = vm_adjust_num_guest_pages(mode, guest_num_pages);
+
host_page_size = getpagesize();
- host_num_pages = (guest_num_pages * guest_page_size) / host_page_size +
- !!((guest_num_pages * guest_page_size) % host_page_size);
+ host_num_pages = vm_num_host_pages(mode, guest_num_pages);
if (!phys_offset) {
guest_test_phys_mem = (vm_get_max_gfn(vm) -
@@ -311,7 +312,7 @@ static void run_test(enum vm_guest_mode mode, unsigned long iterations,
guest_test_phys_mem &= ~((1 << 20) - 1);
#endif
- DEBUG("guest physical test memory offset: 0x%lx\n", guest_test_phys_mem);
+ pr_info("guest physical test memory offset: 0x%lx\n", guest_test_phys_mem);
bmap = bitmap_alloc(host_num_pages);
host_bmap_track = bitmap_alloc(host_num_pages);
@@ -320,7 +321,7 @@ static void run_test(enum vm_guest_mode mode, unsigned long iterations,
struct kvm_enable_cap cap = {};
cap.cap = KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2;
- cap.args[0] = 1;
+ cap.args[0] = dirty_log_manual_caps;
vm_enable_cap(vm, &cap);
#endif
@@ -332,8 +333,7 @@ static void run_test(enum vm_guest_mode mode, unsigned long iterations,
KVM_MEM_LOG_DIRTY_PAGES);
/* Do mapping for the dirty track memory slot */
- virt_map(vm, guest_test_virt_mem, guest_test_phys_mem,
- guest_num_pages * guest_page_size, 0);
+ virt_map(vm, guest_test_virt_mem, guest_test_phys_mem, guest_num_pages, 0);
/* Cache the HVA pointer of the region */
host_test_mem = addr_gpa2hva(vm, (vm_paddr_t)guest_test_phys_mem);
@@ -341,9 +341,7 @@ static void run_test(enum vm_guest_mode mode, unsigned long iterations,
#ifdef __x86_64__
vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid());
#endif
-#ifdef __aarch64__
ucall_init(vm, NULL);
-#endif
/* Export the shared variables to the guest */
sync_global_to_guest(vm, host_page_size);
@@ -369,7 +367,7 @@ static void run_test(enum vm_guest_mode mode, unsigned long iterations,
kvm_vm_clear_dirty_log(vm, TEST_MEM_SLOT_INDEX, bmap, 0,
host_num_pages);
#endif
- vm_dirty_log_verify(bmap);
+ vm_dirty_log_verify(mode, bmap);
iteration++;
sync_global_to_guest(vm, iteration);
}
@@ -378,9 +376,9 @@ static void run_test(enum vm_guest_mode mode, unsigned long iterations,
host_quit = true;
pthread_join(vcpu_thread, NULL);
- DEBUG("Total bits checked: dirty (%"PRIu64"), clear (%"PRIu64"), "
- "track_next (%"PRIu64")\n", host_dirty_count, host_clear_count,
- host_track_next_count);
+ pr_info("Total bits checked: dirty (%"PRIu64"), clear (%"PRIu64"), "
+ "track_next (%"PRIu64")\n", host_dirty_count, host_clear_count,
+ host_track_next_count);
free(bmap);
free(host_bmap_track);
@@ -388,15 +386,14 @@ static void run_test(enum vm_guest_mode mode, unsigned long iterations,
kvm_vm_free(vm);
}
-struct vm_guest_mode_params {
+struct guest_mode {
bool supported;
bool enabled;
};
-struct vm_guest_mode_params vm_guest_mode_params[NUM_VM_MODES];
+static struct guest_mode guest_modes[NUM_VM_MODES];
-#define vm_guest_mode_params_init(mode, supported, enabled) \
-({ \
- vm_guest_mode_params[mode] = (struct vm_guest_mode_params){ supported, enabled }; \
+#define guest_mode_init(mode, supported, enabled) ({ \
+ guest_modes[mode] = (struct guest_mode){ supported, enabled }; \
})
static void help(char *name)
@@ -419,7 +416,7 @@ static void help(char *name)
" Guest mode IDs:\n");
for (i = 0; i < NUM_VM_MODES; ++i) {
printf(" %d: %s%s\n", i, vm_guest_mode_string(i),
- vm_guest_mode_params[i].supported ? " (supported)" : "");
+ guest_modes[i].supported ? " (supported)" : "");
}
puts("");
exit(0);
@@ -433,34 +430,38 @@ int main(int argc, char *argv[])
uint64_t phys_offset = 0;
unsigned int mode;
int opt, i;
-#ifdef __aarch64__
- unsigned int host_ipa_limit;
-#endif
#ifdef USE_CLEAR_DIRTY_LOG
- if (!kvm_check_cap(KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2)) {
- fprintf(stderr, "KVM_CLEAR_DIRTY_LOG not available, skipping tests\n");
+ dirty_log_manual_caps =
+ kvm_check_cap(KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2);
+ if (!dirty_log_manual_caps) {
+ print_skip("KVM_CLEAR_DIRTY_LOG not available");
exit(KSFT_SKIP);
}
+ dirty_log_manual_caps &= (KVM_DIRTY_LOG_MANUAL_PROTECT_ENABLE |
+ KVM_DIRTY_LOG_INITIALLY_SET);
#endif
#ifdef __x86_64__
- vm_guest_mode_params_init(VM_MODE_PXXV48_4K, true, true);
+ guest_mode_init(VM_MODE_PXXV48_4K, true, true);
#endif
#ifdef __aarch64__
- vm_guest_mode_params_init(VM_MODE_P40V48_4K, true, true);
- vm_guest_mode_params_init(VM_MODE_P40V48_64K, true, true);
-
- host_ipa_limit = kvm_check_cap(KVM_CAP_ARM_VM_IPA_SIZE);
- if (host_ipa_limit >= 52)
- vm_guest_mode_params_init(VM_MODE_P52V48_64K, true, true);
- if (host_ipa_limit >= 48) {
- vm_guest_mode_params_init(VM_MODE_P48V48_4K, true, true);
- vm_guest_mode_params_init(VM_MODE_P48V48_64K, true, true);
+ guest_mode_init(VM_MODE_P40V48_4K, true, true);
+ guest_mode_init(VM_MODE_P40V48_64K, true, true);
+
+ {
+ unsigned int limit = kvm_check_cap(KVM_CAP_ARM_VM_IPA_SIZE);
+
+ if (limit >= 52)
+ guest_mode_init(VM_MODE_P52V48_64K, true, true);
+ if (limit >= 48) {
+ guest_mode_init(VM_MODE_P48V48_4K, true, true);
+ guest_mode_init(VM_MODE_P48V48_64K, true, true);
+ }
}
#endif
#ifdef __s390x__
- vm_guest_mode_params_init(VM_MODE_P40V48_4K, true, true);
+ guest_mode_init(VM_MODE_P40V48_4K, true, true);
#endif
while ((opt = getopt(argc, argv, "hi:I:p:m:")) != -1) {
@@ -477,13 +478,13 @@ int main(int argc, char *argv[])
case 'm':
if (!mode_selected) {
for (i = 0; i < NUM_VM_MODES; ++i)
- vm_guest_mode_params[i].enabled = false;
+ guest_modes[i].enabled = false;
mode_selected = true;
}
mode = strtoul(optarg, NULL, 10);
TEST_ASSERT(mode < NUM_VM_MODES,
"Guest mode ID %d too big", mode);
- vm_guest_mode_params[mode].enabled = true;
+ guest_modes[mode].enabled = true;
break;
case 'h':
default:
@@ -495,15 +496,15 @@ int main(int argc, char *argv[])
TEST_ASSERT(iterations > 2, "Iterations must be greater than two");
TEST_ASSERT(interval > 0, "Interval must be greater than zero");
- DEBUG("Test iterations: %"PRIu64", interval: %"PRIu64" (ms)\n",
- iterations, interval);
+ pr_info("Test iterations: %"PRIu64", interval: %"PRIu64" (ms)\n",
+ iterations, interval);
srandom(time(0));
for (i = 0; i < NUM_VM_MODES; ++i) {
- if (!vm_guest_mode_params[i].enabled)
+ if (!guest_modes[i].enabled)
continue;
- TEST_ASSERT(vm_guest_mode_params[i].supported,
+ TEST_ASSERT(guest_modes[i].supported,
"Guest mode ID %d (%s) not supported.",
i, vm_guest_mode_string(i));
run_test(i, iterations, interval, phys_offset);
diff --git a/tools/testing/selftests/kvm/include/evmcs.h b/tools/testing/selftests/kvm/include/evmcs.h
index 4912d23844bc..a034438b6266 100644
--- a/tools/testing/selftests/kvm/include/evmcs.h
+++ b/tools/testing/selftests/kvm/include/evmcs.h
@@ -16,6 +16,8 @@
#define u32 uint32_t
#define u64 uint64_t
+#define EVMCS_VERSION 1
+
extern bool enable_evmcs;
struct hv_vp_assist_page {
@@ -217,8 +219,8 @@ struct hv_enlightened_vmcs {
#define HV_X64_MSR_VP_ASSIST_PAGE_ADDRESS_MASK \
(~((1ull << HV_X64_MSR_VP_ASSIST_PAGE_ADDRESS_SHIFT) - 1))
-struct hv_enlightened_vmcs *current_evmcs;
-struct hv_vp_assist_page *current_vp_assist;
+extern struct hv_enlightened_vmcs *current_evmcs;
+extern struct hv_vp_assist_page *current_vp_assist;
int vcpu_enable_evmcs(struct kvm_vm *vm, int vcpu_id);
diff --git a/tools/testing/selftests/kvm/include/kvm_util.h b/tools/testing/selftests/kvm/include/kvm_util.h
index ae0d14c2540a..919e161dd289 100644
--- a/tools/testing/selftests/kvm/include/kvm_util.h
+++ b/tools/testing/selftests/kvm/include/kvm_util.h
@@ -10,13 +10,15 @@
#include "test_util.h"
#include "asm/kvm.h"
+#include "linux/list.h"
#include "linux/kvm.h"
#include <sys/ioctl.h>
#include "sparsebit.h"
-/* Callers of kvm_util only have an incomplete/opaque description of the
+/*
+ * Callers of kvm_util only have an incomplete/opaque description of the
* structure kvm_util is using to maintain the state of a VM.
*/
struct kvm_vm;
@@ -24,12 +26,6 @@ struct kvm_vm;
typedef uint64_t vm_paddr_t; /* Virtual Machine (Guest) physical address */
typedef uint64_t vm_vaddr_t; /* Virtual Machine (Guest) virtual address */
-#ifndef NDEBUG
-#define DEBUG(...) printf(__VA_ARGS__);
-#else
-#define DEBUG(...)
-#endif
-
/* Minimum allocated guest virtual and physical addresses */
#define KVM_UTIL_MIN_VADDR 0x2000
@@ -84,6 +80,23 @@ void kvm_vm_elf_load(struct kvm_vm *vm, const char *filename,
uint32_t data_memslot, uint32_t pgd_memslot);
void vm_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent);
+
+/*
+ * VM VCPU Dump
+ *
+ * Input Args:
+ * stream - Output FILE stream
+ * vm - Virtual Machine
+ * vcpuid - VCPU ID
+ * indent - Left margin indent amount
+ *
+ * Output Args: None
+ *
+ * Return: None
+ *
+ * Dumps the current state of the VCPU specified by @vcpuid, within the VM
+ * given by @vm, to the FILE stream given by @stream.
+ */
void vcpu_dump(FILE *stream, struct kvm_vm *vm, uint32_t vcpuid,
uint8_t indent);
@@ -100,25 +113,65 @@ int _vcpu_ioctl(struct kvm_vm *vm, uint32_t vcpuid, unsigned long ioctl,
void *arg);
void vm_ioctl(struct kvm_vm *vm, unsigned long ioctl, void *arg);
void vm_mem_region_set_flags(struct kvm_vm *vm, uint32_t slot, uint32_t flags);
+void vm_mem_region_move(struct kvm_vm *vm, uint32_t slot, uint64_t new_gpa);
+void vm_mem_region_delete(struct kvm_vm *vm, uint32_t slot);
void vm_vcpu_add(struct kvm_vm *vm, uint32_t vcpuid);
vm_vaddr_t vm_vaddr_alloc(struct kvm_vm *vm, size_t sz, vm_vaddr_t vaddr_min,
uint32_t data_memslot, uint32_t pgd_memslot);
void virt_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr,
- size_t size, uint32_t pgd_memslot);
+ unsigned int npages, uint32_t pgd_memslot);
void *addr_gpa2hva(struct kvm_vm *vm, vm_paddr_t gpa);
void *addr_gva2hva(struct kvm_vm *vm, vm_vaddr_t gva);
vm_paddr_t addr_hva2gpa(struct kvm_vm *vm, void *hva);
+
+/*
+ * Address Guest Virtual to Guest Physical
+ *
+ * Input Args:
+ * vm - Virtual Machine
+ * gva - VM virtual address
+ *
+ * Output Args: None
+ *
+ * Return:
+ * Equivalent VM physical address
+ *
+ * Returns the VM physical address of the translated VM virtual
+ * address given by @gva.
+ */
vm_paddr_t addr_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva);
struct kvm_run *vcpu_state(struct kvm_vm *vm, uint32_t vcpuid);
void vcpu_run(struct kvm_vm *vm, uint32_t vcpuid);
int _vcpu_run(struct kvm_vm *vm, uint32_t vcpuid);
void vcpu_run_complete_io(struct kvm_vm *vm, uint32_t vcpuid);
+void vcpu_set_guest_debug(struct kvm_vm *vm, uint32_t vcpuid,
+ struct kvm_guest_debug *debug);
void vcpu_set_mp_state(struct kvm_vm *vm, uint32_t vcpuid,
struct kvm_mp_state *mp_state);
void vcpu_regs_get(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_regs *regs);
void vcpu_regs_set(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_regs *regs);
+
+/*
+ * VM VCPU Args Set
+ *
+ * Input Args:
+ * vm - Virtual Machine
+ * vcpuid - VCPU ID
+ * num - number of arguments
+ * ... - arguments, each of type uint64_t
+ *
+ * Output Args: None
+ *
+ * Return: None
+ *
+ * Sets the first @num function input registers of the VCPU with @vcpuid,
+ * per the C calling convention of the architecture, to the values given
+ * as variable args. Each of the variable args is expected to be of type
+ * uint64_t. The maximum @num can be is specific to the architecture.
+ */
void vcpu_args_set(struct kvm_vm *vm, uint32_t vcpuid, unsigned int num, ...);
+
void vcpu_sregs_get(struct kvm_vm *vm, uint32_t vcpuid,
struct kvm_sregs *sregs);
void vcpu_sregs_set(struct kvm_vm *vm, uint32_t vcpuid,
@@ -147,15 +200,57 @@ int vcpu_nested_state_set(struct kvm_vm *vm, uint32_t vcpuid,
const char *exit_reason_str(unsigned int exit_reason);
void virt_pgd_alloc(struct kvm_vm *vm, uint32_t pgd_memslot);
+
+/*
+ * VM Virtual Page Map
+ *
+ * Input Args:
+ * vm - Virtual Machine
+ * vaddr - VM Virtual Address
+ * paddr - VM Physical Address
+ * memslot - Memory region slot for new virtual translation tables
+ *
+ * Output Args: None
+ *
+ * Return: None
+ *
+ * Within @vm, creates a virtual translation for the page starting
+ * at @vaddr to the page starting at @paddr.
+ */
void virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr,
- uint32_t pgd_memslot);
+ uint32_t memslot);
+
vm_paddr_t vm_phy_page_alloc(struct kvm_vm *vm, vm_paddr_t paddr_min,
uint32_t memslot);
vm_paddr_t vm_phy_pages_alloc(struct kvm_vm *vm, size_t num,
vm_paddr_t paddr_min, uint32_t memslot);
-struct kvm_vm *vm_create_default(uint32_t vcpuid, uint64_t extra_mem_size,
+/*
+ * Create a VM with reasonable defaults
+ *
+ * Input Args:
+ * vcpuid - The id of the single VCPU to add to the VM.
+ * extra_mem_pages - The number of extra pages to add (this will
+ * decide how much extra space we will need to
+ * setup the page tables using memslot 0)
+ * guest_code - The vCPU's entry point
+ *
+ * Output Args: None
+ *
+ * Return:
+ * Pointer to opaque structure that describes the created VM.
+ */
+struct kvm_vm *vm_create_default(uint32_t vcpuid, uint64_t extra_mem_pages,
void *guest_code);
+
+/*
+ * Adds a vCPU with reasonable defaults (e.g. a stack)
+ *
+ * Input Args:
+ * vm - Virtual Machine
+ * vcpuid - The id of the VCPU to add to the VM.
+ * guest_code - The vCPU's entry point
+ */
void vm_vcpu_add_default(struct kvm_vm *vm, uint32_t vcpuid, void *guest_code);
bool vm_is_unrestricted_guest(struct kvm_vm *vm);
@@ -163,6 +258,22 @@ bool vm_is_unrestricted_guest(struct kvm_vm *vm);
unsigned int vm_get_page_size(struct kvm_vm *vm);
unsigned int vm_get_page_shift(struct kvm_vm *vm);
unsigned int vm_get_max_gfn(struct kvm_vm *vm);
+int vm_get_fd(struct kvm_vm *vm);
+
+unsigned int vm_calc_num_guest_pages(enum vm_guest_mode mode, size_t size);
+unsigned int vm_num_host_pages(enum vm_guest_mode mode, unsigned int num_guest_pages);
+unsigned int vm_num_guest_pages(enum vm_guest_mode mode, unsigned int num_host_pages);
+static inline unsigned int
+vm_adjust_num_guest_pages(enum vm_guest_mode mode, unsigned int num_guest_pages)
+{
+ unsigned int n;
+ n = vm_num_guest_pages(mode, vm_num_host_pages(mode, num_guest_pages));
+#ifdef __s390x__
+ /* s390 requires 1M aligned guest sizes */
+ n = (n + 255) & ~255;
+#endif
+ return n;
+}
struct kvm_userspace_memory_region *
kvm_userspace_memory_region_find(struct kvm_vm *vm, uint64_t start,
@@ -203,13 +314,30 @@ void ucall_uninit(struct kvm_vm *vm);
void ucall(uint64_t cmd, int nargs, ...);
uint64_t get_ucall(struct kvm_vm *vm, uint32_t vcpu_id, struct ucall *uc);
+#define GUEST_SYNC_ARGS(stage, arg1, arg2, arg3, arg4) \
+ ucall(UCALL_SYNC, 6, "hello", stage, arg1, arg2, arg3, arg4)
#define GUEST_SYNC(stage) ucall(UCALL_SYNC, 2, "hello", stage)
#define GUEST_DONE() ucall(UCALL_DONE, 0)
-#define GUEST_ASSERT(_condition) do { \
- if (!(_condition)) \
- ucall(UCALL_ABORT, 2, \
- "Failed guest assert: " \
- #_condition, __LINE__); \
+#define __GUEST_ASSERT(_condition, _nargs, _args...) do { \
+ if (!(_condition)) \
+ ucall(UCALL_ABORT, 2 + _nargs, \
+ "Failed guest assert: " \
+ #_condition, __LINE__, _args); \
} while (0)
+#define GUEST_ASSERT(_condition) \
+ __GUEST_ASSERT((_condition), 0, 0)
+
+#define GUEST_ASSERT_1(_condition, arg1) \
+ __GUEST_ASSERT((_condition), 1, (arg1))
+
+#define GUEST_ASSERT_2(_condition, arg1, arg2) \
+ __GUEST_ASSERT((_condition), 2, (arg1), (arg2))
+
+#define GUEST_ASSERT_3(_condition, arg1, arg2, arg3) \
+ __GUEST_ASSERT((_condition), 3, (arg1), (arg2), (arg3))
+
+#define GUEST_ASSERT_4(_condition, arg1, arg2, arg3, arg4) \
+ __GUEST_ASSERT((_condition), 4, (arg1), (arg2), (arg3), (arg4))
+
#endif /* SELFTEST_KVM_UTIL_H */
diff --git a/tools/testing/selftests/kvm/include/test_util.h b/tools/testing/selftests/kvm/include/test_util.h
index a41db6fb7e24..5eb01bf51b86 100644
--- a/tools/testing/selftests/kvm/include/test_util.h
+++ b/tools/testing/selftests/kvm/include/test_util.h
@@ -19,12 +19,28 @@
#include <fcntl.h>
#include "kselftest.h"
+static inline int _no_printf(const char *format, ...) { return 0; }
+
+#ifdef DEBUG
+#define pr_debug(...) printf(__VA_ARGS__)
+#else
+#define pr_debug(...) _no_printf(__VA_ARGS__)
+#endif
+#ifndef QUIET
+#define pr_info(...) printf(__VA_ARGS__)
+#else
+#define pr_info(...) _no_printf(__VA_ARGS__)
+#endif
+
+void print_skip(const char *fmt, ...) __attribute__((format(printf, 1, 2)));
+
ssize_t test_write(int fd, const void *buf, size_t count);
ssize_t test_read(int fd, void *buf, size_t count);
int test_seq_read(const char *path, char **bufp, size_t *sizep);
void test_assert(bool exp, const char *exp_str,
- const char *file, unsigned int line, const char *fmt, ...);
+ const char *file, unsigned int line, const char *fmt, ...)
+ __attribute__((format(printf, 5, 6)));
#define TEST_ASSERT(e, fmt, ...) \
test_assert((e), #e, __FILE__, __LINE__, fmt, ##__VA_ARGS__)
@@ -39,4 +55,14 @@ void test_assert(bool exp, const char *exp_str,
#a, #b, #a, (unsigned long) __a, #b, (unsigned long) __b); \
} while (0)
+#define TEST_FAIL(fmt, ...) \
+ TEST_ASSERT(false, fmt, ##__VA_ARGS__)
+
+size_t parse_size(const char *size);
+
+int64_t timespec_to_ns(struct timespec ts);
+struct timespec timespec_add_ns(struct timespec ts, int64_t ns);
+struct timespec timespec_add(struct timespec ts1, struct timespec ts2);
+struct timespec timespec_sub(struct timespec ts1, struct timespec ts2);
+
#endif /* SELFTEST_KVM_TEST_UTIL_H */
diff --git a/tools/testing/selftests/kvm/include/x86_64/processor.h b/tools/testing/selftests/kvm/include/x86_64/processor.h
index 7428513a4c68..82b7fe16a824 100644
--- a/tools/testing/selftests/kvm/include/x86_64/processor.h
+++ b/tools/testing/selftests/kvm/include/x86_64/processor.h
@@ -79,13 +79,16 @@ static inline uint64_t get_desc64_base(const struct desc64 *desc)
static inline uint64_t rdtsc(void)
{
uint32_t eax, edx;
-
+ uint64_t tsc_val;
/*
* The lfence is to wait (on Intel CPUs) until all previous
- * instructions have been executed.
+ * instructions have been executed. If software requires RDTSC to be
+ * executed prior to execution of any subsequent instruction, it can
+ * execute LFENCE immediately after RDTSC
*/
- __asm__ __volatile__("lfence; rdtsc" : "=a"(eax), "=d"(edx));
- return ((uint64_t)edx) << 32 | eax;
+ __asm__ __volatile__("lfence; rdtsc; lfence" : "=a"(eax), "=d"(edx));
+ tsc_val = ((uint64_t)edx) << 32 | eax;
+ return tsc_val;
}
static inline uint64_t rdtscp(uint32_t *aux)
diff --git a/tools/testing/selftests/kvm/include/x86_64/svm_util.h b/tools/testing/selftests/kvm/include/x86_64/svm_util.h
index cd037917fece..674151d24fcf 100644
--- a/tools/testing/selftests/kvm/include/x86_64/svm_util.h
+++ b/tools/testing/selftests/kvm/include/x86_64/svm_util.h
@@ -35,4 +35,14 @@ void generic_svm_setup(struct svm_test_data *svm, void *guest_rip, void *guest_r
void run_guest(struct vmcb *vmcb, uint64_t vmcb_gpa);
void nested_svm_check_supported(void);
+static inline bool cpu_has_svm(void)
+{
+ u32 eax = 0x80000001, ecx;
+
+ asm("cpuid" :
+ "=a" (eax), "=c" (ecx) : "0" (eax) : "ebx", "edx");
+
+ return ecx & CPUID_SVM;
+}
+
#endif /* SELFTEST_KVM_SVM_UTILS_H */
diff --git a/tools/testing/selftests/kvm/include/x86_64/vmx.h b/tools/testing/selftests/kvm/include/x86_64/vmx.h
index 3d27069b9ed9..ccff3e6e2704 100644
--- a/tools/testing/selftests/kvm/include/x86_64/vmx.h
+++ b/tools/testing/selftests/kvm/include/x86_64/vmx.h
@@ -575,6 +575,33 @@ struct vmx_pages {
void *eptp;
};
+union vmx_basic {
+ u64 val;
+ struct {
+ u32 revision;
+ u32 size:13,
+ reserved1:3,
+ width:1,
+ dual:1,
+ type:4,
+ insouts:1,
+ ctrl:1,
+ vm_entry_exception_ctrl:1,
+ reserved2:7;
+ };
+};
+
+union vmx_ctrl_msr {
+ u64 val;
+ struct {
+ u32 set, clr;
+ };
+};
+
+union vmx_basic basic;
+union vmx_ctrl_msr ctrl_pin_rev;
+union vmx_ctrl_msr ctrl_exit_rev;
+
struct vmx_pages *vcpu_alloc_vmx(struct kvm_vm *vm, vm_vaddr_t *p_vmx_gva);
bool prepare_for_vmx_operation(struct vmx_pages *vmx);
void prepare_vmcs(struct vmx_pages *vmx, void *guest_rip, void *guest_rsp);
diff --git a/tools/testing/selftests/kvm/kvm_create_max_vcpus.c b/tools/testing/selftests/kvm/kvm_create_max_vcpus.c
index 6f38c3dc0d56..0299cd81b8ba 100644
--- a/tools/testing/selftests/kvm/kvm_create_max_vcpus.c
+++ b/tools/testing/selftests/kvm/kvm_create_max_vcpus.c
@@ -24,8 +24,8 @@ void test_vcpu_creation(int first_vcpu_id, int num_vcpus)
struct kvm_vm *vm;
int i;
- printf("Testing creating %d vCPUs, with IDs %d...%d.\n",
- num_vcpus, first_vcpu_id, first_vcpu_id + num_vcpus - 1);
+ pr_info("Testing creating %d vCPUs, with IDs %d...%d.\n",
+ num_vcpus, first_vcpu_id, first_vcpu_id + num_vcpus - 1);
vm = vm_create(VM_MODE_DEFAULT, DEFAULT_GUEST_PHY_PAGES, O_RDWR);
@@ -41,8 +41,8 @@ int main(int argc, char *argv[])
int kvm_max_vcpu_id = kvm_check_cap(KVM_CAP_MAX_VCPU_ID);
int kvm_max_vcpus = kvm_check_cap(KVM_CAP_MAX_VCPUS);
- printf("KVM_CAP_MAX_VCPU_ID: %d\n", kvm_max_vcpu_id);
- printf("KVM_CAP_MAX_VCPUS: %d\n", kvm_max_vcpus);
+ pr_info("KVM_CAP_MAX_VCPU_ID: %d\n", kvm_max_vcpu_id);
+ pr_info("KVM_CAP_MAX_VCPUS: %d\n", kvm_max_vcpus);
/*
* Upstream KVM prior to 4.8 does not support KVM_CAP_MAX_VCPU_ID.
diff --git a/tools/testing/selftests/kvm/lib/aarch64/processor.c b/tools/testing/selftests/kvm/lib/aarch64/processor.c
index 86036a59a668..2afa6618b396 100644
--- a/tools/testing/selftests/kvm/lib/aarch64/processor.c
+++ b/tools/testing/selftests/kvm/lib/aarch64/processor.c
@@ -130,7 +130,7 @@ void _virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr,
ptep = addr_gpa2hva(vm, pte_addr(vm, *ptep)) + pte_index(vm, vaddr) * 8;
break;
default:
- TEST_ASSERT(false, "Page table levels must be 2, 3, or 4");
+ TEST_FAIL("Page table levels must be 2, 3, or 4");
}
*ptep = paddr | 3;
@@ -173,20 +173,19 @@ vm_paddr_t addr_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva)
goto unmapped_gva;
break;
default:
- TEST_ASSERT(false, "Page table levels must be 2, 3, or 4");
+ TEST_FAIL("Page table levels must be 2, 3, or 4");
}
return pte_addr(vm, *ptep) + (gva & (vm->page_size - 1));
unmapped_gva:
- TEST_ASSERT(false, "No mapping for vm virtual address, "
- "gva: 0x%lx", gva);
+ TEST_FAIL("No mapping for vm virtual address, gva: 0x%lx", gva);
exit(1);
}
static void pte_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent, uint64_t page, int level)
{
-#ifdef DEBUG_VM
+#ifdef DEBUG
static const char * const type[] = { "", "pud", "pmd", "pte" };
uint64_t pte, *ptep;
@@ -197,7 +196,7 @@ static void pte_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent, uint64_t p
ptep = addr_gpa2hva(vm, pte);
if (!*ptep)
continue;
- printf("%*s%s: %lx: %lx at %p\n", indent, "", type[level], pte, *ptep, ptep);
+ fprintf(stream, "%*s%s: %lx: %lx at %p\n", indent, "", type[level], pte, *ptep, ptep);
pte_dump(stream, vm, indent + 1, pte_addr(vm, *ptep), level + 1);
}
#endif
@@ -215,7 +214,7 @@ void virt_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent)
ptep = addr_gpa2hva(vm, pgd);
if (!*ptep)
continue;
- printf("%*spgd: %lx: %lx at %p\n", indent, "", pgd, *ptep, ptep);
+ fprintf(stream, "%*spgd: %lx: %lx at %p\n", indent, "", pgd, *ptep, ptep);
pte_dump(stream, vm, indent + 1, pte_addr(vm, *ptep), level);
}
}
@@ -262,11 +261,11 @@ void aarch64_vcpu_setup(struct kvm_vm *vm, int vcpuid, struct kvm_vcpu_init *ini
switch (vm->mode) {
case VM_MODE_P52V48_4K:
- TEST_ASSERT(false, "AArch64 does not support 4K sized pages "
- "with 52-bit physical address ranges");
+ TEST_FAIL("AArch64 does not support 4K sized pages "
+ "with 52-bit physical address ranges");
case VM_MODE_PXXV48_4K:
- TEST_ASSERT(false, "AArch64 does not support 4K sized pages "
- "with ANY-bit physical address ranges");
+ TEST_FAIL("AArch64 does not support 4K sized pages "
+ "with ANY-bit physical address ranges");
case VM_MODE_P52V48_64K:
tcr_el1 |= 1ul << 14; /* TG0 = 64KB */
tcr_el1 |= 6ul << 32; /* IPS = 52 bits */
@@ -288,7 +287,7 @@ void aarch64_vcpu_setup(struct kvm_vm *vm, int vcpuid, struct kvm_vcpu_init *ini
tcr_el1 |= 2ul << 32; /* IPS = 40 bits */
break;
default:
- TEST_ASSERT(false, "Unknown guest mode, mode: 0x%x", vm->mode);
+ TEST_FAIL("Unknown guest mode, mode: 0x%x", vm->mode);
}
sctlr_el1 |= (1 << 0) | (1 << 2) | (1 << 12) /* M | C | I */;
@@ -333,3 +332,21 @@ void vm_vcpu_add_default(struct kvm_vm *vm, uint32_t vcpuid, void *guest_code)
{
aarch64_vcpu_add_default(vm, vcpuid, NULL, guest_code);
}
+
+void vcpu_args_set(struct kvm_vm *vm, uint32_t vcpuid, unsigned int num, ...)
+{
+ va_list ap;
+ int i;
+
+ TEST_ASSERT(num >= 1 && num <= 8, "Unsupported number of args,\n"
+ " num: %u\n", num);
+
+ va_start(ap, num);
+
+ for (i = 0; i < num; i++) {
+ set_reg(vm, vcpuid, ARM64_CORE_REG(regs.regs[i]),
+ va_arg(ap, uint64_t));
+ }
+
+ va_end(ap);
+}
diff --git a/tools/testing/selftests/kvm/lib/aarch64/ucall.c b/tools/testing/selftests/kvm/lib/aarch64/ucall.c
index 6cd91970fbad..c8e0ec20d3bf 100644
--- a/tools/testing/selftests/kvm/lib/aarch64/ucall.c
+++ b/tools/testing/selftests/kvm/lib/aarch64/ucall.c
@@ -62,7 +62,7 @@ void ucall_init(struct kvm_vm *vm, void *arg)
if (ucall_mmio_init(vm, start + offset))
return;
}
- TEST_ASSERT(false, "Can't find a ucall mmio address");
+ TEST_FAIL("Can't find a ucall mmio address");
}
void ucall_uninit(struct kvm_vm *vm)
diff --git a/tools/testing/selftests/kvm/lib/assert.c b/tools/testing/selftests/kvm/lib/assert.c
index d1cf9f6e0e6b..5ebbd0d6b472 100644
--- a/tools/testing/selftests/kvm/lib/assert.c
+++ b/tools/testing/selftests/kvm/lib/assert.c
@@ -82,8 +82,10 @@ test_assert(bool exp, const char *exp_str,
}
va_end(ap);
- if (errno == EACCES)
- ksft_exit_skip("Access denied - Exiting.\n");
+ if (errno == EACCES) {
+ print_skip("Access denied - Exiting");
+ exit(KSFT_SKIP);
+ }
exit(254);
}
diff --git a/tools/testing/selftests/kvm/lib/io.c b/tools/testing/selftests/kvm/lib/io.c
index eaf351cc7e7f..fedb2a741f0b 100644
--- a/tools/testing/selftests/kvm/lib/io.c
+++ b/tools/testing/selftests/kvm/lib/io.c
@@ -61,9 +61,9 @@ ssize_t test_write(int fd, const void *buf, size_t count)
continue;
case 0:
- TEST_ASSERT(false, "Unexpected EOF,\n"
- " rc: %zi num_written: %zi num_left: %zu",
- rc, num_written, num_left);
+ TEST_FAIL("Unexpected EOF,\n"
+ " rc: %zi num_written: %zi num_left: %zu",
+ rc, num_written, num_left);
break;
default:
@@ -138,9 +138,9 @@ ssize_t test_read(int fd, void *buf, size_t count)
break;
case 0:
- TEST_ASSERT(false, "Unexpected EOF,\n"
- " rc: %zi num_read: %zi num_left: %zu",
- rc, num_read, num_left);
+ TEST_FAIL("Unexpected EOF,\n"
+ " rc: %zi num_read: %zi num_left: %zu",
+ rc, num_read, num_left);
break;
default:
diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c
index a6dd0401eb50..c9cede5c7d0d 100644
--- a/tools/testing/selftests/kvm/lib/kvm_util.c
+++ b/tools/testing/selftests/kvm/lib/kvm_util.c
@@ -92,7 +92,7 @@ static void vm_open(struct kvm_vm *vm, int perm)
exit(KSFT_SKIP);
if (!kvm_check_cap(KVM_CAP_IMMEDIATE_EXIT)) {
- fprintf(stderr, "immediate_exit not available, skipping test\n");
+ print_skip("immediate_exit not available");
exit(KSFT_SKIP);
}
@@ -113,6 +113,25 @@ const char * const vm_guest_mode_string[] = {
_Static_assert(sizeof(vm_guest_mode_string)/sizeof(char *) == NUM_VM_MODES,
"Missing new mode strings?");
+struct vm_guest_mode_params {
+ unsigned int pa_bits;
+ unsigned int va_bits;
+ unsigned int page_size;
+ unsigned int page_shift;
+};
+
+static const struct vm_guest_mode_params vm_guest_mode_params[] = {
+ { 52, 48, 0x1000, 12 },
+ { 52, 48, 0x10000, 16 },
+ { 48, 48, 0x1000, 12 },
+ { 48, 48, 0x10000, 16 },
+ { 40, 48, 0x1000, 12 },
+ { 40, 48, 0x10000, 16 },
+ { 0, 0, 0x1000, 12 },
+};
+_Static_assert(sizeof(vm_guest_mode_params)/sizeof(struct vm_guest_mode_params) == NUM_VM_MODES,
+ "Missing new mode params?");
+
/*
* VM Create
*
@@ -136,75 +155,57 @@ struct kvm_vm *_vm_create(enum vm_guest_mode mode, uint64_t phy_pages, int perm)
{
struct kvm_vm *vm;
- DEBUG("Testing guest mode: %s\n", vm_guest_mode_string(mode));
+ pr_debug("%s: mode='%s' pages='%ld' perm='%d'\n", __func__,
+ vm_guest_mode_string(mode), phy_pages, perm);
vm = calloc(1, sizeof(*vm));
TEST_ASSERT(vm != NULL, "Insufficient Memory");
+ INIT_LIST_HEAD(&vm->vcpus);
+ INIT_LIST_HEAD(&vm->userspace_mem_regions);
+
vm->mode = mode;
vm->type = 0;
+ vm->pa_bits = vm_guest_mode_params[mode].pa_bits;
+ vm->va_bits = vm_guest_mode_params[mode].va_bits;
+ vm->page_size = vm_guest_mode_params[mode].page_size;
+ vm->page_shift = vm_guest_mode_params[mode].page_shift;
+
/* Setup mode specific traits. */
switch (vm->mode) {
case VM_MODE_P52V48_4K:
vm->pgtable_levels = 4;
- vm->pa_bits = 52;
- vm->va_bits = 48;
- vm->page_size = 0x1000;
- vm->page_shift = 12;
break;
case VM_MODE_P52V48_64K:
vm->pgtable_levels = 3;
- vm->pa_bits = 52;
- vm->va_bits = 48;
- vm->page_size = 0x10000;
- vm->page_shift = 16;
break;
case VM_MODE_P48V48_4K:
vm->pgtable_levels = 4;
- vm->pa_bits = 48;
- vm->va_bits = 48;
- vm->page_size = 0x1000;
- vm->page_shift = 12;
break;
case VM_MODE_P48V48_64K:
vm->pgtable_levels = 3;
- vm->pa_bits = 48;
- vm->va_bits = 48;
- vm->page_size = 0x10000;
- vm->page_shift = 16;
break;
case VM_MODE_P40V48_4K:
vm->pgtable_levels = 4;
- vm->pa_bits = 40;
- vm->va_bits = 48;
- vm->page_size = 0x1000;
- vm->page_shift = 12;
break;
case VM_MODE_P40V48_64K:
vm->pgtable_levels = 3;
- vm->pa_bits = 40;
- vm->va_bits = 48;
- vm->page_size = 0x10000;
- vm->page_shift = 16;
break;
case VM_MODE_PXXV48_4K:
#ifdef __x86_64__
kvm_get_cpu_address_width(&vm->pa_bits, &vm->va_bits);
TEST_ASSERT(vm->va_bits == 48, "Linear address width "
"(%d bits) not supported", vm->va_bits);
+ pr_debug("Guest physical address width detected: %d\n",
+ vm->pa_bits);
vm->pgtable_levels = 4;
- vm->page_size = 0x1000;
- vm->page_shift = 12;
- DEBUG("Guest physical address width detected: %d\n",
- vm->pa_bits);
#else
- TEST_ASSERT(false, "VM_MODE_PXXV48_4K not supported on "
- "non-x86 platforms");
+ TEST_FAIL("VM_MODE_PXXV48_4K not supported on non-x86 platforms");
#endif
break;
default:
- TEST_ASSERT(false, "Unknown guest mode, mode: 0x%x", mode);
+ TEST_FAIL("Unknown guest mode, mode: 0x%x", mode);
}
#ifdef __aarch64__
@@ -260,13 +261,12 @@ void kvm_vm_restart(struct kvm_vm *vmp, int perm)
if (vmp->has_irqchip)
vm_create_irqchip(vmp);
- for (region = vmp->userspace_mem_region_head; region;
- region = region->next) {
+ list_for_each_entry(region, &vmp->userspace_mem_regions, list) {
int ret = ioctl(vmp->fd, KVM_SET_USER_MEMORY_REGION, &region->region);
TEST_ASSERT(ret == 0, "KVM_SET_USER_MEMORY_REGION IOCTL failed,\n"
" rc: %i errno: %i\n"
" slot: %u flags: 0x%x\n"
- " guest_phys_addr: 0x%lx size: 0x%lx",
+ " guest_phys_addr: 0x%llx size: 0x%llx",
ret, errno, region->region.slot,
region->region.flags,
region->region.guest_phys_addr,
@@ -281,7 +281,7 @@ void kvm_vm_get_dirty_log(struct kvm_vm *vm, int slot, void *log)
ret = ioctl(vm->fd, KVM_GET_DIRTY_LOG, &args);
TEST_ASSERT(ret == 0, "%s: KVM_GET_DIRTY_LOG failed: %s",
- strerror(-ret));
+ __func__, strerror(-ret));
}
void kvm_vm_clear_dirty_log(struct kvm_vm *vm, int slot, void *log,
@@ -294,7 +294,7 @@ void kvm_vm_clear_dirty_log(struct kvm_vm *vm, int slot, void *log,
ret = ioctl(vm->fd, KVM_CLEAR_DIRTY_LOG, &args);
TEST_ASSERT(ret == 0, "%s: KVM_CLEAR_DIRTY_LOG failed: %s",
- strerror(-ret));
+ __func__, strerror(-ret));
}
/*
@@ -321,8 +321,7 @@ userspace_mem_region_find(struct kvm_vm *vm, uint64_t start, uint64_t end)
{
struct userspace_mem_region *region;
- for (region = vm->userspace_mem_region_head; region;
- region = region->next) {
+ list_for_each_entry(region, &vm->userspace_mem_regions, list) {
uint64_t existing_start = region->region.guest_phys_addr;
uint64_t existing_end = region->region.guest_phys_addr
+ region->region.memory_size - 1;
@@ -380,11 +379,11 @@ kvm_userspace_memory_region_find(struct kvm_vm *vm, uint64_t start,
*/
struct vcpu *vcpu_find(struct kvm_vm *vm, uint32_t vcpuid)
{
- struct vcpu *vcpup;
+ struct vcpu *vcpu;
- for (vcpup = vm->vcpu_head; vcpup; vcpup = vcpup->next) {
- if (vcpup->id == vcpuid)
- return vcpup;
+ list_for_each_entry(vcpu, &vm->vcpus, list) {
+ if (vcpu->id == vcpuid)
+ return vcpu;
}
return NULL;
@@ -394,18 +393,16 @@ struct vcpu *vcpu_find(struct kvm_vm *vm, uint32_t vcpuid)
* VM VCPU Remove
*
* Input Args:
- * vm - Virtual Machine
- * vcpuid - VCPU ID
+ * vcpu - VCPU to remove
*
* Output Args: None
*
* Return: None, TEST_ASSERT failures for all error conditions
*
- * Within the VM specified by vm, removes the VCPU given by vcpuid.
+ * Removes a vCPU from a VM and frees its resources.
*/
-static void vm_vcpu_rm(struct kvm_vm *vm, uint32_t vcpuid)
+static void vm_vcpu_rm(struct vcpu *vcpu)
{
- struct vcpu *vcpu = vcpu_find(vm, vcpuid);
int ret;
ret = munmap(vcpu->state, sizeof(*vcpu->state));
@@ -415,21 +412,17 @@ static void vm_vcpu_rm(struct kvm_vm *vm, uint32_t vcpuid)
TEST_ASSERT(ret == 0, "Close of VCPU fd failed, rc: %i "
"errno: %i", ret, errno);
- if (vcpu->next)
- vcpu->next->prev = vcpu->prev;
- if (vcpu->prev)
- vcpu->prev->next = vcpu->next;
- else
- vm->vcpu_head = vcpu->next;
+ list_del(&vcpu->list);
free(vcpu);
}
void kvm_vm_release(struct kvm_vm *vmp)
{
+ struct vcpu *vcpu, *tmp;
int ret;
- while (vmp->vcpu_head)
- vm_vcpu_rm(vmp, vmp->vcpu_head->id);
+ list_for_each_entry_safe(vcpu, tmp, &vmp->vcpus, list)
+ vm_vcpu_rm(vcpu);
ret = close(vmp->fd);
TEST_ASSERT(ret == 0, "Close of vm fd failed,\n"
@@ -440,35 +433,38 @@ void kvm_vm_release(struct kvm_vm *vmp)
" vmp->kvm_fd: %i rc: %i errno: %i", vmp->kvm_fd, ret, errno);
}
+static void __vm_mem_region_delete(struct kvm_vm *vm,
+ struct userspace_mem_region *region)
+{
+ int ret;
+
+ list_del(&region->list);
+
+ region->region.memory_size = 0;
+ ret = ioctl(vm->fd, KVM_SET_USER_MEMORY_REGION, &region->region);
+ TEST_ASSERT(ret == 0, "KVM_SET_USER_MEMORY_REGION IOCTL failed, "
+ "rc: %i errno: %i", ret, errno);
+
+ sparsebit_free(&region->unused_phy_pages);
+ ret = munmap(region->mmap_start, region->mmap_size);
+ TEST_ASSERT(ret == 0, "munmap failed, rc: %i errno: %i", ret, errno);
+
+ free(region);
+}
+
/*
* Destroys and frees the VM pointed to by vmp.
*/
void kvm_vm_free(struct kvm_vm *vmp)
{
- int ret;
+ struct userspace_mem_region *region, *tmp;
if (vmp == NULL)
return;
/* Free userspace_mem_regions. */
- while (vmp->userspace_mem_region_head) {
- struct userspace_mem_region *region
- = vmp->userspace_mem_region_head;
-
- region->region.memory_size = 0;
- ret = ioctl(vmp->fd, KVM_SET_USER_MEMORY_REGION,
- &region->region);
- TEST_ASSERT(ret == 0, "KVM_SET_USER_MEMORY_REGION IOCTL failed, "
- "rc: %i errno: %i", ret, errno);
-
- vmp->userspace_mem_region_head = region->next;
- sparsebit_free(&region->unused_phy_pages);
- ret = munmap(region->mmap_start, region->mmap_size);
- TEST_ASSERT(ret == 0, "munmap failed, rc: %i errno: %i",
- ret, errno);
-
- free(region);
- }
+ list_for_each_entry_safe(region, tmp, &vmp->userspace_mem_regions, list)
+ __vm_mem_region_delete(vmp, region);
/* Free sparsebit arrays. */
sparsebit_free(&vmp->vpages_valid);
@@ -582,6 +578,10 @@ void vm_userspace_mem_region_add(struct kvm_vm *vm,
size_t huge_page_size = KVM_UTIL_PGS_PER_HUGEPG * vm->page_size;
size_t alignment;
+ TEST_ASSERT(vm_adjust_num_guest_pages(vm->mode, npages) == npages,
+ "Number of guest pages is not compatible with the host. "
+ "Try npages=%d", vm_adjust_num_guest_pages(vm->mode, npages));
+
TEST_ASSERT((guest_paddr % vm->page_size) == 0, "Guest physical "
"address not on a page boundary.\n"
" guest_paddr: 0x%lx vm->page_size: 0x%x",
@@ -600,7 +600,7 @@ void vm_userspace_mem_region_add(struct kvm_vm *vm,
region = (struct userspace_mem_region *) userspace_mem_region_find(
vm, guest_paddr, (guest_paddr + npages * vm->page_size) - 1);
if (region != NULL)
- TEST_ASSERT(false, "overlapping userspace_mem_region already "
+ TEST_FAIL("overlapping userspace_mem_region already "
"exists\n"
" requested guest_paddr: 0x%lx npages: 0x%lx "
"page_size: 0x%x\n"
@@ -610,13 +610,11 @@ void vm_userspace_mem_region_add(struct kvm_vm *vm,
(uint64_t) region->region.memory_size);
/* Confirm no region with the requested slot already exists. */
- for (region = vm->userspace_mem_region_head; region;
- region = region->next) {
- if (region->region.slot == slot)
- break;
- }
- if (region != NULL)
- TEST_ASSERT(false, "A mem region with the requested slot "
+ list_for_each_entry(region, &vm->userspace_mem_regions, list) {
+ if (region->region.slot != slot)
+ continue;
+
+ TEST_FAIL("A mem region with the requested slot "
"already exists.\n"
" requested slot: %u paddr: 0x%lx npages: 0x%lx\n"
" existing slot: %u paddr: 0x%lx size: 0x%lx",
@@ -624,6 +622,7 @@ void vm_userspace_mem_region_add(struct kvm_vm *vm,
region->region.slot,
(uint64_t) region->region.guest_phys_addr,
(uint64_t) region->region.memory_size);
+ }
/* Allocate and initialize new mem region structure. */
region = calloc(1, sizeof(*region));
@@ -684,10 +683,7 @@ void vm_userspace_mem_region_add(struct kvm_vm *vm,
guest_paddr, (uint64_t) region->region.memory_size);
/* Add to linked-list of memory regions. */
- if (vm->userspace_mem_region_head)
- vm->userspace_mem_region_head->prev = region;
- region->next = vm->userspace_mem_region_head;
- vm->userspace_mem_region_head = region;
+ list_add(&region->list, &vm->userspace_mem_regions);
}
/*
@@ -710,20 +706,17 @@ memslot2region(struct kvm_vm *vm, uint32_t memslot)
{
struct userspace_mem_region *region;
- for (region = vm->userspace_mem_region_head; region;
- region = region->next) {
+ list_for_each_entry(region, &vm->userspace_mem_regions, list) {
if (region->region.slot == memslot)
- break;
- }
- if (region == NULL) {
- fprintf(stderr, "No mem region with the requested slot found,\n"
- " requested slot: %u\n", memslot);
- fputs("---- vm dump ----\n", stderr);
- vm_dump(stderr, vm, 2);
- TEST_ASSERT(false, "Mem region not found");
+ return region;
}
- return region;
+ fprintf(stderr, "No mem region with the requested slot found,\n"
+ " requested slot: %u\n", memslot);
+ fputs("---- vm dump ----\n", stderr);
+ vm_dump(stderr, vm, 2);
+ TEST_FAIL("Mem region not found");
+ return NULL;
}
/*
@@ -757,6 +750,54 @@ void vm_mem_region_set_flags(struct kvm_vm *vm, uint32_t slot, uint32_t flags)
}
/*
+ * VM Memory Region Move
+ *
+ * Input Args:
+ * vm - Virtual Machine
+ * slot - Slot of the memory region to move
+ * new_gpa - Starting guest physical address
+ *
+ * Output Args: None
+ *
+ * Return: None
+ *
+ * Change the gpa of a memory region.
+ */
+void vm_mem_region_move(struct kvm_vm *vm, uint32_t slot, uint64_t new_gpa)
+{
+ struct userspace_mem_region *region;
+ int ret;
+
+ region = memslot2region(vm, slot);
+
+ region->region.guest_phys_addr = new_gpa;
+
+ ret = ioctl(vm->fd, KVM_SET_USER_MEMORY_REGION, &region->region);
+
+ TEST_ASSERT(!ret, "KVM_SET_USER_MEMORY_REGION failed\n"
+ "ret: %i errno: %i slot: %u new_gpa: 0x%lx",
+ ret, errno, slot, new_gpa);
+}
+
+/*
+ * VM Memory Region Delete
+ *
+ * Input Args:
+ * vm - Virtual Machine
+ * slot - Slot of the memory region to delete
+ *
+ * Output Args: None
+ *
+ * Return: None
+ *
+ * Delete a memory region.
+ */
+void vm_mem_region_delete(struct kvm_vm *vm, uint32_t slot)
+{
+ __vm_mem_region_delete(vm, memslot2region(vm, slot));
+}
+
+/*
* VCPU mmap Size
*
* Input Args: None
@@ -808,7 +849,7 @@ void vm_vcpu_add(struct kvm_vm *vm, uint32_t vcpuid)
/* Confirm a vcpu with the specified id doesn't already exist. */
vcpu = vcpu_find(vm, vcpuid);
if (vcpu != NULL)
- TEST_ASSERT(false, "vcpu with the specified id "
+ TEST_FAIL("vcpu with the specified id "
"already exists,\n"
" requested vcpuid: %u\n"
" existing vcpuid: %u state: %p",
@@ -831,10 +872,7 @@ void vm_vcpu_add(struct kvm_vm *vm, uint32_t vcpuid)
"vcpu id: %u errno: %i", vcpuid, errno);
/* Add to linked-list of VCPUs. */
- if (vm->vcpu_head)
- vm->vcpu_head->prev = vcpu;
- vcpu->next = vm->vcpu_head;
- vm->vcpu_head = vcpu;
+ list_add(&vcpu->list, &vm->vcpus);
}
/*
@@ -901,8 +939,7 @@ static vm_vaddr_t vm_vaddr_unused_gap(struct kvm_vm *vm, size_t sz,
} while (pgidx_start != 0);
no_va_found:
- TEST_ASSERT(false, "No vaddr of specified pages available, "
- "pages: 0x%lx", pages);
+ TEST_FAIL("No vaddr of specified pages available, pages: 0x%lx", pages);
/* NOT REACHED */
return -1;
@@ -982,21 +1019,21 @@ vm_vaddr_t vm_vaddr_alloc(struct kvm_vm *vm, size_t sz, vm_vaddr_t vaddr_min,
* vm - Virtual Machine
* vaddr - Virtuall address to map
* paddr - VM Physical Address
- * size - The size of the range to map
+ * npages - The number of pages to map
* pgd_memslot - Memory region slot for new virtual translation tables
*
* Output Args: None
*
* Return: None
*
- * Within the VM given by vm, creates a virtual translation for the
- * page range starting at vaddr to the page range starting at paddr.
+ * Within the VM given by @vm, creates a virtual translation for
+ * @npages starting at @vaddr to the page range starting at @paddr.
*/
void virt_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr,
- size_t size, uint32_t pgd_memslot)
+ unsigned int npages, uint32_t pgd_memslot)
{
size_t page_size = vm->page_size;
- size_t npages = size / page_size;
+ size_t size = npages * page_size;
TEST_ASSERT(vaddr + size > vaddr, "Vaddr overflow");
TEST_ASSERT(paddr + size > paddr, "Paddr overflow");
@@ -1028,8 +1065,8 @@ void virt_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr,
void *addr_gpa2hva(struct kvm_vm *vm, vm_paddr_t gpa)
{
struct userspace_mem_region *region;
- for (region = vm->userspace_mem_region_head; region;
- region = region->next) {
+
+ list_for_each_entry(region, &vm->userspace_mem_regions, list) {
if ((gpa >= region->region.guest_phys_addr)
&& (gpa <= (region->region.guest_phys_addr
+ region->region.memory_size - 1)))
@@ -1037,7 +1074,7 @@ void *addr_gpa2hva(struct kvm_vm *vm, vm_paddr_t gpa)
+ (gpa - region->region.guest_phys_addr));
}
- TEST_ASSERT(false, "No vm physical memory at 0x%lx", gpa);
+ TEST_FAIL("No vm physical memory at 0x%lx", gpa);
return NULL;
}
@@ -1061,8 +1098,8 @@ void *addr_gpa2hva(struct kvm_vm *vm, vm_paddr_t gpa)
vm_paddr_t addr_hva2gpa(struct kvm_vm *vm, void *hva)
{
struct userspace_mem_region *region;
- for (region = vm->userspace_mem_region_head; region;
- region = region->next) {
+
+ list_for_each_entry(region, &vm->userspace_mem_regions, list) {
if ((hva >= region->host_mem)
&& (hva <= (region->host_mem
+ region->region.memory_size - 1)))
@@ -1071,8 +1108,7 @@ vm_paddr_t addr_hva2gpa(struct kvm_vm *vm, void *hva)
+ (hva - (uintptr_t) region->host_mem));
}
- TEST_ASSERT(false, "No mapping to a guest physical address, "
- "hva: %p", hva);
+ TEST_FAIL("No mapping to a guest physical address, hva: %p", hva);
return -1;
}
@@ -1171,6 +1207,15 @@ void vcpu_run_complete_io(struct kvm_vm *vm, uint32_t vcpuid)
ret, errno);
}
+void vcpu_set_guest_debug(struct kvm_vm *vm, uint32_t vcpuid,
+ struct kvm_guest_debug *debug)
+{
+ struct vcpu *vcpu = vcpu_find(vm, vcpuid);
+ int ret = ioctl(vcpu->fd, KVM_SET_GUEST_DEBUG, debug);
+
+ TEST_ASSERT(ret == 0, "KVM_SET_GUEST_DEBUG failed: %d", ret);
+}
+
/*
* VM VCPU Set MP State
*
@@ -1490,8 +1535,7 @@ void vm_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent)
fprintf(stream, "%*sfd: %i\n", indent, "", vm->fd);
fprintf(stream, "%*spage_size: 0x%x\n", indent, "", vm->page_size);
fprintf(stream, "%*sMem Regions:\n", indent, "");
- for (region = vm->userspace_mem_region_head; region;
- region = region->next) {
+ list_for_each_entry(region, &vm->userspace_mem_regions, list) {
fprintf(stream, "%*sguest_phys: 0x%lx size: 0x%lx "
"host_virt: %p\n", indent + 2, "",
(uint64_t) region->region.guest_phys_addr,
@@ -1510,7 +1554,7 @@ void vm_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent)
virt_dump(stream, vm, indent + 4);
}
fprintf(stream, "%*sVCPUs:\n", indent, "");
- for (vcpu = vm->vcpu_head; vcpu; vcpu = vcpu->next)
+ list_for_each_entry(vcpu, &vm->vcpus, list)
vcpu_dump(stream, vm, vcpu->id, indent + 2);
}
@@ -1703,3 +1747,48 @@ unsigned int vm_get_max_gfn(struct kvm_vm *vm)
{
return vm->max_gfn;
}
+
+int vm_get_fd(struct kvm_vm *vm)
+{
+ return vm->fd;
+}
+
+static unsigned int vm_calc_num_pages(unsigned int num_pages,
+ unsigned int page_shift,
+ unsigned int new_page_shift,
+ bool ceil)
+{
+ unsigned int n = 1 << (new_page_shift - page_shift);
+
+ if (page_shift >= new_page_shift)
+ return num_pages * (1 << (page_shift - new_page_shift));
+
+ return num_pages / n + !!(ceil && num_pages % n);
+}
+
+static inline int getpageshift(void)
+{
+ return __builtin_ffs(getpagesize()) - 1;
+}
+
+unsigned int
+vm_num_host_pages(enum vm_guest_mode mode, unsigned int num_guest_pages)
+{
+ return vm_calc_num_pages(num_guest_pages,
+ vm_guest_mode_params[mode].page_shift,
+ getpageshift(), true);
+}
+
+unsigned int
+vm_num_guest_pages(enum vm_guest_mode mode, unsigned int num_host_pages)
+{
+ return vm_calc_num_pages(num_host_pages, getpageshift(),
+ vm_guest_mode_params[mode].page_shift, false);
+}
+
+unsigned int vm_calc_num_guest_pages(enum vm_guest_mode mode, size_t size)
+{
+ unsigned int n;
+ n = DIV_ROUND_UP(size, vm_guest_mode_params[mode].page_size);
+ return vm_adjust_num_guest_pages(mode, n);
+}
diff --git a/tools/testing/selftests/kvm/lib/kvm_util_internal.h b/tools/testing/selftests/kvm/lib/kvm_util_internal.h
index ac50c42750cf..2ef446520748 100644
--- a/tools/testing/selftests/kvm/lib/kvm_util_internal.h
+++ b/tools/testing/selftests/kvm/lib/kvm_util_internal.h
@@ -12,19 +12,7 @@
#define KVM_DEV_PATH "/dev/kvm"
-#ifndef BITS_PER_BYTE
-#define BITS_PER_BYTE 8
-#endif
-
-#ifndef BITS_PER_LONG
-#define BITS_PER_LONG (BITS_PER_BYTE * sizeof(long))
-#endif
-
-#define DIV_ROUND_UP(n, d) (((n) + (d) - 1) / (d))
-#define BITS_TO_LONGS(nr) DIV_ROUND_UP(nr, BITS_PER_LONG)
-
struct userspace_mem_region {
- struct userspace_mem_region *next, *prev;
struct kvm_userspace_memory_region region;
struct sparsebit *unused_phy_pages;
int fd;
@@ -32,10 +20,11 @@ struct userspace_mem_region {
void *host_mem;
void *mmap_start;
size_t mmap_size;
+ struct list_head list;
};
struct vcpu {
- struct vcpu *next, *prev;
+ struct list_head list;
uint32_t id;
int fd;
struct kvm_run *state;
@@ -52,8 +41,8 @@ struct kvm_vm {
unsigned int pa_bits;
unsigned int va_bits;
uint64_t max_gfn;
- struct vcpu *vcpu_head;
- struct userspace_mem_region *userspace_mem_region_head;
+ struct list_head vcpus;
+ struct list_head userspace_mem_regions;
struct sparsebit *vpages_valid;
struct sparsebit *vpages_mapped;
bool has_irqchip;
@@ -64,8 +53,56 @@ struct kvm_vm {
};
struct vcpu *vcpu_find(struct kvm_vm *vm, uint32_t vcpuid);
+
+/*
+ * Virtual Translation Tables Dump
+ *
+ * Input Args:
+ * stream - Output FILE stream
+ * vm - Virtual Machine
+ * indent - Left margin indent amount
+ *
+ * Output Args: None
+ *
+ * Return: None
+ *
+ * Dumps to the FILE stream given by @stream, the contents of all the
+ * virtual translation tables for the VM given by @vm.
+ */
void virt_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent);
+
+/*
+ * Register Dump
+ *
+ * Input Args:
+ * stream - Output FILE stream
+ * regs - Registers
+ * indent - Left margin indent amount
+ *
+ * Output Args: None
+ *
+ * Return: None
+ *
+ * Dumps the state of the registers given by @regs, to the FILE stream
+ * given by @stream.
+ */
void regs_dump(FILE *stream, struct kvm_regs *regs, uint8_t indent);
+
+/*
+ * System Register Dump
+ *
+ * Input Args:
+ * stream - Output FILE stream
+ * sregs - System registers
+ * indent - Left margin indent amount
+ *
+ * Output Args: None
+ *
+ * Return: None
+ *
+ * Dumps the state of the system registers given by @sregs, to the FILE stream
+ * given by @stream.
+ */
void sregs_dump(FILE *stream, struct kvm_sregs *sregs, uint8_t indent);
struct userspace_mem_region *
diff --git a/tools/testing/selftests/kvm/lib/s390x/processor.c b/tools/testing/selftests/kvm/lib/s390x/processor.c
index 32a02360b1eb..a88c5d665725 100644
--- a/tools/testing/selftests/kvm/lib/s390x/processor.c
+++ b/tools/testing/selftests/kvm/lib/s390x/processor.c
@@ -51,22 +51,6 @@ static uint64_t virt_alloc_region(struct kvm_vm *vm, int ri, uint32_t memslot)
| ((ri < 4 ? (PAGES_PER_REGION - 1) : 0) & REGION_ENTRY_LENGTH);
}
-/*
- * VM Virtual Page Map
- *
- * Input Args:
- * vm - Virtual Machine
- * gva - VM Virtual Address
- * gpa - VM Physical Address
- * memslot - Memory region slot for new virtual translation tables
- *
- * Output Args: None
- *
- * Return: None
- *
- * Within the VM given by vm, creates a virtual translation for the page
- * starting at vaddr to the page starting at paddr.
- */
void virt_pg_map(struct kvm_vm *vm, uint64_t gva, uint64_t gpa,
uint32_t memslot)
{
@@ -107,26 +91,6 @@ void virt_pg_map(struct kvm_vm *vm, uint64_t gva, uint64_t gpa,
entry[idx] = gpa;
}
-/*
- * Address Guest Virtual to Guest Physical
- *
- * Input Args:
- * vm - Virtual Machine
- * gpa - VM virtual address
- *
- * Output Args: None
- *
- * Return:
- * Equivalent VM physical address
- *
- * Translates the VM virtual address given by gva to a VM physical
- * address and then locates the memory region containing the VM
- * physical address, within the VM given by vm. When found, the host
- * virtual address providing the memory to the vm physical address is
- * returned.
- * A TEST_ASSERT failure occurs if no region containing translated
- * VM virtual address exists.
- */
vm_paddr_t addr_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva)
{
int ri, idx;
@@ -196,21 +160,6 @@ void virt_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent)
virt_dump_region(stream, vm, indent, vm->pgd);
}
-/*
- * Create a VM with reasonable defaults
- *
- * Input Args:
- * vcpuid - The id of the single VCPU to add to the VM.
- * extra_mem_pages - The size of extra memories to add (this will
- * decide how much extra space we will need to
- * setup the page tables using mem slot 0)
- * guest_code - The vCPU's entry point
- *
- * Output Args: None
- *
- * Return:
- * Pointer to opaque structure that describes the created VM.
- */
struct kvm_vm *vm_create_default(uint32_t vcpuid, uint64_t extra_mem_pages,
void *guest_code)
{
@@ -231,13 +180,6 @@ struct kvm_vm *vm_create_default(uint32_t vcpuid, uint64_t extra_mem_pages,
return vm;
}
-/*
- * Adds a vCPU with reasonable defaults (i.e. a stack and initial PSW)
- *
- * Input Args:
- * vcpuid - The id of the VCPU to add to the VM.
- * guest_code - The vCPU's entry point
- */
void vm_vcpu_add_default(struct kvm_vm *vm, uint32_t vcpuid, void *guest_code)
{
size_t stack_size = DEFAULT_STACK_PGS * getpagesize();
@@ -269,9 +211,32 @@ void vm_vcpu_add_default(struct kvm_vm *vm, uint32_t vcpuid, void *guest_code)
run->psw_addr = (uintptr_t)guest_code;
}
+void vcpu_args_set(struct kvm_vm *vm, uint32_t vcpuid, unsigned int num, ...)
+{
+ va_list ap;
+ struct kvm_regs regs;
+ int i;
+
+ TEST_ASSERT(num >= 1 && num <= 5, "Unsupported number of args,\n"
+ " num: %u\n",
+ num);
+
+ va_start(ap, num);
+ vcpu_regs_get(vm, vcpuid, &regs);
+
+ for (i = 0; i < num; i++)
+ regs.gprs[i + 2] = va_arg(ap, uint64_t);
+
+ vcpu_regs_set(vm, vcpuid, &regs);
+ va_end(ap);
+}
+
void vcpu_dump(FILE *stream, struct kvm_vm *vm, uint32_t vcpuid, uint8_t indent)
{
- struct vcpu *vcpu = vm->vcpu_head;
+ struct vcpu *vcpu = vcpu_find(vm, vcpuid);
+
+ if (!vcpu)
+ return;
fprintf(stream, "%*spstate: psw: 0x%.16llx:0x%.16llx\n",
indent, "", vcpu->state->psw_mask, vcpu->state->psw_addr);
diff --git a/tools/testing/selftests/kvm/lib/test_util.c b/tools/testing/selftests/kvm/lib/test_util.c
new file mode 100644
index 000000000000..689e97c27ee2
--- /dev/null
+++ b/tools/testing/selftests/kvm/lib/test_util.c
@@ -0,0 +1,93 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * tools/testing/selftests/kvm/lib/test_util.c
+ *
+ * Copyright (C) 2020, Google LLC.
+ */
+#include <stdlib.h>
+#include <ctype.h>
+#include <limits.h>
+#include <assert.h>
+#include "test_util.h"
+
+/*
+ * Parses "[0-9]+[kmgt]?".
+ */
+size_t parse_size(const char *size)
+{
+ size_t base;
+ char *scale;
+ int shift = 0;
+
+ TEST_ASSERT(size && isdigit(size[0]), "Need at least one digit in '%s'", size);
+
+ base = strtoull(size, &scale, 0);
+
+ TEST_ASSERT(base != ULLONG_MAX, "Overflow parsing size!");
+
+ switch (tolower(*scale)) {
+ case 't':
+ shift = 40;
+ break;
+ case 'g':
+ shift = 30;
+ break;
+ case 'm':
+ shift = 20;
+ break;
+ case 'k':
+ shift = 10;
+ break;
+ case 'b':
+ case '\0':
+ shift = 0;
+ break;
+ default:
+ TEST_ASSERT(false, "Unknown size letter %c", *scale);
+ }
+
+ TEST_ASSERT((base << shift) >> shift == base, "Overflow scaling size!");
+
+ return base << shift;
+}
+
+int64_t timespec_to_ns(struct timespec ts)
+{
+ return (int64_t)ts.tv_nsec + 1000000000LL * (int64_t)ts.tv_sec;
+}
+
+struct timespec timespec_add_ns(struct timespec ts, int64_t ns)
+{
+ struct timespec res;
+
+ res.tv_nsec = ts.tv_nsec + ns;
+ res.tv_sec = ts.tv_sec + res.tv_nsec / 1000000000LL;
+ res.tv_nsec %= 1000000000LL;
+
+ return res;
+}
+
+struct timespec timespec_add(struct timespec ts1, struct timespec ts2)
+{
+ int64_t ns1 = timespec_to_ns(ts1);
+ int64_t ns2 = timespec_to_ns(ts2);
+ return timespec_add_ns((struct timespec){0}, ns1 + ns2);
+}
+
+struct timespec timespec_sub(struct timespec ts1, struct timespec ts2)
+{
+ int64_t ns1 = timespec_to_ns(ts1);
+ int64_t ns2 = timespec_to_ns(ts2);
+ return timespec_add_ns((struct timespec){0}, ns1 - ns2);
+}
+
+void print_skip(const char *fmt, ...)
+{
+ va_list ap;
+
+ assert(fmt);
+ va_start(ap, fmt);
+ vprintf(fmt, ap);
+ va_end(ap);
+ puts(", skipping test");
+}
diff --git a/tools/testing/selftests/kvm/lib/x86_64/processor.c b/tools/testing/selftests/kvm/lib/x86_64/processor.c
index 683d3bdb8f6a..f6eb34eaa0d2 100644
--- a/tools/testing/selftests/kvm/lib/x86_64/processor.c
+++ b/tools/testing/selftests/kvm/lib/x86_64/processor.c
@@ -77,20 +77,6 @@ struct pageTableEntry {
uint64_t execute_disable:1;
};
-/* Register Dump
- *
- * Input Args:
- * indent - Left margin indent amount
- * regs - register
- *
- * Output Args:
- * stream - Output FILE stream
- *
- * Return: None
- *
- * Dumps the state of the registers given by regs, to the FILE stream
- * given by steam.
- */
void regs_dump(FILE *stream, struct kvm_regs *regs,
uint8_t indent)
{
@@ -115,19 +101,20 @@ void regs_dump(FILE *stream, struct kvm_regs *regs,
regs->rip, regs->rflags);
}
-/* Segment Dump
+/*
+ * Segment Dump
*
* Input Args:
- * indent - Left margin indent amount
+ * stream - Output FILE stream
* segment - KVM segment
+ * indent - Left margin indent amount
*
- * Output Args:
- * stream - Output FILE stream
+ * Output Args: None
*
* Return: None
*
- * Dumps the state of the KVM segment given by segment, to the FILE stream
- * given by steam.
+ * Dumps the state of the KVM segment given by @segment, to the FILE stream
+ * given by @stream.
*/
static void segment_dump(FILE *stream, struct kvm_segment *segment,
uint8_t indent)
@@ -146,19 +133,20 @@ static void segment_dump(FILE *stream, struct kvm_segment *segment,
segment->unusable, segment->padding);
}
-/* dtable Dump
+/*
+ * dtable Dump
*
* Input Args:
- * indent - Left margin indent amount
+ * stream - Output FILE stream
* dtable - KVM dtable
+ * indent - Left margin indent amount
*
- * Output Args:
- * stream - Output FILE stream
+ * Output Args: None
*
* Return: None
*
- * Dumps the state of the KVM dtable given by dtable, to the FILE stream
- * given by steam.
+ * Dumps the state of the KVM dtable given by @dtable, to the FILE stream
+ * given by @stream.
*/
static void dtable_dump(FILE *stream, struct kvm_dtable *dtable,
uint8_t indent)
@@ -169,20 +157,6 @@ static void dtable_dump(FILE *stream, struct kvm_dtable *dtable,
dtable->padding[0], dtable->padding[1], dtable->padding[2]);
}
-/* System Register Dump
- *
- * Input Args:
- * indent - Left margin indent amount
- * sregs - System registers
- *
- * Output Args:
- * stream - Output FILE stream
- *
- * Return: None
- *
- * Dumps the state of the system registers given by sregs, to the FILE stream
- * given by steam.
- */
void sregs_dump(FILE *stream, struct kvm_sregs *sregs,
uint8_t indent)
{
@@ -240,21 +214,6 @@ void virt_pgd_alloc(struct kvm_vm *vm, uint32_t pgd_memslot)
}
}
-/* VM Virtual Page Map
- *
- * Input Args:
- * vm - Virtual Machine
- * vaddr - VM Virtual Address
- * paddr - VM Physical Address
- * pgd_memslot - Memory region slot for new virtual translation tables
- *
- * Output Args: None
- *
- * Return: None
- *
- * Within the VM given by vm, creates a virtual translation for the page
- * starting at vaddr to the page starting at paddr.
- */
void virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr,
uint32_t pgd_memslot)
{
@@ -326,20 +285,6 @@ void virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr,
pte[index[0]].present = 1;
}
-/* Virtual Translation Tables Dump
- *
- * Input Args:
- * vm - Virtual Machine
- * indent - Left margin indent amount
- *
- * Output Args:
- * stream - Output FILE stream
- *
- * Return: None
- *
- * Dumps to the FILE stream given by stream, the contents of all the
- * virtual translation tables for the VM given by vm.
- */
void virt_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent)
{
struct pageMapL4Entry *pml4e, *pml4e_start;
@@ -421,7 +366,8 @@ void virt_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent)
}
}
-/* Set Unusable Segment
+/*
+ * Set Unusable Segment
*
* Input Args: None
*
@@ -430,7 +376,7 @@ void virt_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent)
*
* Return: None
*
- * Sets the segment register pointed to by segp to an unusable state.
+ * Sets the segment register pointed to by @segp to an unusable state.
*/
static void kvm_seg_set_unusable(struct kvm_segment *segp)
{
@@ -460,7 +406,8 @@ static void kvm_seg_fill_gdt_64bit(struct kvm_vm *vm, struct kvm_segment *segp)
}
-/* Set Long Mode Flat Kernel Code Segment
+/*
+ * Set Long Mode Flat Kernel Code Segment
*
* Input Args:
* vm - VM whose GDT is being filled, or NULL to only write segp
@@ -471,8 +418,8 @@ static void kvm_seg_fill_gdt_64bit(struct kvm_vm *vm, struct kvm_segment *segp)
*
* Return: None
*
- * Sets up the KVM segment pointed to by segp, to be a code segment
- * with the selector value given by selector.
+ * Sets up the KVM segment pointed to by @segp, to be a code segment
+ * with the selector value given by @selector.
*/
static void kvm_seg_set_kernel_code_64bit(struct kvm_vm *vm, uint16_t selector,
struct kvm_segment *segp)
@@ -491,7 +438,8 @@ static void kvm_seg_set_kernel_code_64bit(struct kvm_vm *vm, uint16_t selector,
kvm_seg_fill_gdt_64bit(vm, segp);
}
-/* Set Long Mode Flat Kernel Data Segment
+/*
+ * Set Long Mode Flat Kernel Data Segment
*
* Input Args:
* vm - VM whose GDT is being filled, or NULL to only write segp
@@ -502,8 +450,8 @@ static void kvm_seg_set_kernel_code_64bit(struct kvm_vm *vm, uint16_t selector,
*
* Return: None
*
- * Sets up the KVM segment pointed to by segp, to be a data segment
- * with the selector value given by selector.
+ * Sets up the KVM segment pointed to by @segp, to be a data segment
+ * with the selector value given by @selector.
*/
static void kvm_seg_set_kernel_data_64bit(struct kvm_vm *vm, uint16_t selector,
struct kvm_segment *segp)
@@ -521,24 +469,6 @@ static void kvm_seg_set_kernel_data_64bit(struct kvm_vm *vm, uint16_t selector,
kvm_seg_fill_gdt_64bit(vm, segp);
}
-/* Address Guest Virtual to Guest Physical
- *
- * Input Args:
- * vm - Virtual Machine
- * gpa - VM virtual address
- *
- * Output Args: None
- *
- * Return:
- * Equivalent VM physical address
- *
- * Translates the VM virtual address given by gva to a VM physical
- * address and then locates the memory region containing the VM
- * physical address, within the VM given by vm. When found, the host
- * virtual address providing the memory to the vm physical address is returned.
- * A TEST_ASSERT failure occurs if no region containing translated
- * VM virtual address exists.
- */
vm_paddr_t addr_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva)
{
uint16_t index[4];
@@ -576,8 +506,7 @@ vm_paddr_t addr_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva)
return (pte[index[0]].address * vm->page_size) + (gva & 0xfffu);
unmapped_gva:
- TEST_ASSERT(false, "No mapping for vm virtual address, "
- "gva: 0x%lx", gva);
+ TEST_FAIL("No mapping for vm virtual address, gva: 0x%lx", gva);
exit(EXIT_FAILURE);
}
@@ -634,18 +563,13 @@ static void vcpu_setup(struct kvm_vm *vm, int vcpuid, int pgd_memslot, int gdt_m
break;
default:
- TEST_ASSERT(false, "Unknown guest mode, mode: 0x%x", vm->mode);
+ TEST_FAIL("Unknown guest mode, mode: 0x%x", vm->mode);
}
sregs.cr3 = vm->pgd;
vcpu_sregs_set(vm, vcpuid, &sregs);
}
-/* Adds a vCPU with reasonable defaults (i.e., a stack)
- *
- * Input Args:
- * vcpuid - The id of the VCPU to add to the VM.
- * guest_code - The vCPU's entry point
- */
+
void vm_vcpu_add_default(struct kvm_vm *vm, uint32_t vcpuid, void *guest_code)
{
struct kvm_mp_state mp_state;
@@ -670,7 +594,8 @@ void vm_vcpu_add_default(struct kvm_vm *vm, uint32_t vcpuid, void *guest_code)
vcpu_set_mp_state(vm, vcpuid, &mp_state);
}
-/* Allocate an instance of struct kvm_cpuid2
+/*
+ * Allocate an instance of struct kvm_cpuid2
*
* Input Args: None
*
@@ -703,7 +628,8 @@ static struct kvm_cpuid2 *allocate_kvm_cpuid2(void)
return cpuid;
}
-/* KVM Supported CPUID Get
+/*
+ * KVM Supported CPUID Get
*
* Input Args: None
*
@@ -735,11 +661,12 @@ struct kvm_cpuid2 *kvm_get_supported_cpuid(void)
return cpuid;
}
-/* Locate a cpuid entry.
+/*
+ * Locate a cpuid entry.
*
* Input Args:
- * cpuid: The cpuid.
* function: The function of the cpuid entry to find.
+ * index: The index of the cpuid entry.
*
* Output Args: None
*
@@ -766,7 +693,8 @@ kvm_get_supported_cpuid_index(uint32_t function, uint32_t index)
return entry;
}
-/* VM VCPU CPUID Set
+/*
+ * VM VCPU CPUID Set
*
* Input Args:
* vm - Virtual Machine
@@ -793,20 +721,6 @@ void vcpu_set_cpuid(struct kvm_vm *vm,
}
-/* Create a VM with reasonable defaults
- *
- * Input Args:
- * vcpuid - The id of the single VCPU to add to the VM.
- * extra_mem_pages - The size of extra memories to add (this will
- * decide how much extra space we will need to
- * setup the page tables using mem slot 0)
- * guest_code - The vCPU's entry point
- *
- * Output Args: None
- *
- * Return:
- * Pointer to opaque structure that describes the created VM.
- */
struct kvm_vm *vm_create_default(uint32_t vcpuid, uint64_t extra_mem_pages,
void *guest_code)
{
@@ -837,7 +751,8 @@ struct kvm_vm *vm_create_default(uint32_t vcpuid, uint64_t extra_mem_pages,
return vm;
}
-/* VCPU Get MSR
+/*
+ * VCPU Get MSR
*
* Input Args:
* vm - Virtual Machine
@@ -869,7 +784,8 @@ uint64_t vcpu_get_msr(struct kvm_vm *vm, uint32_t vcpuid, uint64_t msr_index)
return buffer.entry.data;
}
-/* _VCPU Set MSR
+/*
+ * _VCPU Set MSR
*
* Input Args:
* vm - Virtual Machine
@@ -902,7 +818,8 @@ int _vcpu_set_msr(struct kvm_vm *vm, uint32_t vcpuid, uint64_t msr_index,
return r;
}
-/* VCPU Set MSR
+/*
+ * VCPU Set MSR
*
* Input Args:
* vm - Virtual Machine
@@ -926,22 +843,6 @@ void vcpu_set_msr(struct kvm_vm *vm, uint32_t vcpuid, uint64_t msr_index,
" rc: %i errno: %i", r, errno);
}
-/* VM VCPU Args Set
- *
- * Input Args:
- * vm - Virtual Machine
- * vcpuid - VCPU ID
- * num - number of arguments
- * ... - arguments, each of type uint64_t
- *
- * Output Args: None
- *
- * Return: None
- *
- * Sets the first num function input arguments to the values
- * given as variable args. Each of the variable args is expected to
- * be of type uint64_t.
- */
void vcpu_args_set(struct kvm_vm *vm, uint32_t vcpuid, unsigned int num, ...)
{
va_list ap;
@@ -976,22 +877,6 @@ void vcpu_args_set(struct kvm_vm *vm, uint32_t vcpuid, unsigned int num, ...)
va_end(ap);
}
-/*
- * VM VCPU Dump
- *
- * Input Args:
- * vm - Virtual Machine
- * vcpuid - VCPU ID
- * indent - Left margin indent amount
- *
- * Output Args:
- * stream - Output FILE stream
- *
- * Return: None
- *
- * Dumps the current state of the VCPU specified by vcpuid, within the VM
- * given by vm, to the FILE stream given by stream.
- */
void vcpu_dump(FILE *stream, struct kvm_vm *vm, uint32_t vcpuid, uint8_t indent)
{
struct kvm_regs regs;
diff --git a/tools/testing/selftests/kvm/lib/x86_64/svm.c b/tools/testing/selftests/kvm/lib/x86_64/svm.c
index 6e05a8fc3fe0..c42401068373 100644
--- a/tools/testing/selftests/kvm/lib/x86_64/svm.c
+++ b/tools/testing/selftests/kvm/lib/x86_64/svm.c
@@ -154,7 +154,7 @@ void nested_svm_check_supported(void)
kvm_get_supported_cpuid_entry(0x80000001);
if (!(entry->ecx & CPUID_SVM)) {
- fprintf(stderr, "nested SVM not enabled, skipping test\n");
+ print_skip("nested SVM not enabled");
exit(KSFT_SKIP);
}
}
diff --git a/tools/testing/selftests/kvm/lib/x86_64/vmx.c b/tools/testing/selftests/kvm/lib/x86_64/vmx.c
index 7aaa99ca4dbc..4ae104f6ce69 100644
--- a/tools/testing/selftests/kvm/lib/x86_64/vmx.c
+++ b/tools/testing/selftests/kvm/lib/x86_64/vmx.c
@@ -17,6 +17,9 @@
bool enable_evmcs;
+struct hv_enlightened_vmcs *current_evmcs;
+struct hv_vp_assist_page *current_vp_assist;
+
struct eptPageTableEntry {
uint64_t readable:1;
uint64_t writable:1;
@@ -191,7 +194,7 @@ bool load_vmcs(struct vmx_pages *vmx)
if (evmcs_vmptrld(vmx->enlightened_vmcs_gpa,
vmx->enlightened_vmcs))
return false;
- current_evmcs->revision_id = vmcs_revision();
+ current_evmcs->revision_id = EVMCS_VERSION;
}
return true;
@@ -381,7 +384,7 @@ void nested_vmx_check_supported(void)
struct kvm_cpuid_entry2 *entry = kvm_get_supported_cpuid_entry(1);
if (!(entry->ecx & CPUID_VMX)) {
- fprintf(stderr, "nested VMX not enabled, skipping test\n");
+ print_skip("nested VMX not enabled");
exit(KSFT_SKIP);
}
}
diff --git a/tools/testing/selftests/kvm/s390x/memop.c b/tools/testing/selftests/kvm/s390x/memop.c
index 9edaa9a134ce..9f49ead380ab 100644
--- a/tools/testing/selftests/kvm/s390x/memop.c
+++ b/tools/testing/selftests/kvm/s390x/memop.c
@@ -40,7 +40,7 @@ int main(int argc, char *argv[])
maxsize = kvm_check_cap(KVM_CAP_S390_MEM_OP);
if (!maxsize) {
- fprintf(stderr, "CAP_S390_MEM_OP not supported -> skip test\n");
+ print_skip("CAP_S390_MEM_OP not supported");
exit(KSFT_SKIP);
}
if (maxsize > sizeof(mem1))
diff --git a/tools/testing/selftests/kvm/s390x/resets.c b/tools/testing/selftests/kvm/s390x/resets.c
index 1485bc6c8999..b143db6d8693 100644
--- a/tools/testing/selftests/kvm/s390x/resets.c
+++ b/tools/testing/selftests/kvm/s390x/resets.c
@@ -20,29 +20,42 @@ struct kvm_s390_irq buf[VCPU_ID + LOCAL_IRQS];
struct kvm_vm *vm;
struct kvm_run *run;
-struct kvm_sync_regs *regs;
-static uint64_t regs_null[16];
-
-static uint64_t crs[16] = { 0x40000ULL,
- 0x42000ULL,
- 0, 0, 0, 0, 0,
- 0x43000ULL,
- 0, 0, 0, 0, 0,
- 0x44000ULL,
- 0, 0
-};
+struct kvm_sync_regs *sync_regs;
+static uint8_t regs_null[512];
static void guest_code_initial(void)
{
- /* Round toward 0 */
- uint32_t fpc = 0x11;
+ /* set several CRs to "safe" value */
+ unsigned long cr2_59 = 0x10; /* enable guarded storage */
+ unsigned long cr8_63 = 0x1; /* monitor mask = 1 */
+ unsigned long cr10 = 1; /* PER START */
+ unsigned long cr11 = -1; /* PER END */
+
/* Dirty registers */
asm volatile (
- " lctlg 0,15,%0\n"
- " sfpc %1\n"
- : : "Q" (crs), "d" (fpc));
- GUEST_SYNC(0);
+ " lghi 2,0x11\n" /* Round toward 0 */
+ " sfpc 2\n" /* set fpc to !=0 */
+ " lctlg 2,2,%0\n"
+ " lctlg 8,8,%1\n"
+ " lctlg 10,10,%2\n"
+ " lctlg 11,11,%3\n"
+ /* now clobber some general purpose regs */
+ " llihh 0,0xffff\n"
+ " llihl 1,0x5555\n"
+ " llilh 2,0xaaaa\n"
+ " llill 3,0x0000\n"
+ /* now clobber a floating point reg */
+ " lghi 4,0x1\n"
+ " cdgbr 0,4\n"
+ /* now clobber an access reg */
+ " sar 9,4\n"
+ /* We embed diag 501 here to control register content */
+ " diag 0,0,0x501\n"
+ :
+ : "m" (cr2_59), "m" (cr8_63), "m" (cr10), "m" (cr11)
+ /* no clobber list as this should not return */
+ );
}
static void test_one_reg(uint64_t id, uint64_t value)
@@ -53,7 +66,7 @@ static void test_one_reg(uint64_t id, uint64_t value)
reg.addr = (uintptr_t)&eval_reg;
reg.id = id;
vcpu_get_reg(vm, VCPU_ID, &reg);
- TEST_ASSERT(eval_reg == value, "value == %s", value);
+ TEST_ASSERT(eval_reg == value, "value == 0x%lx", value);
}
static void assert_noirq(void)
@@ -87,6 +100,31 @@ static void assert_clear(void)
vcpu_fpu_get(vm, VCPU_ID, &fpu);
TEST_ASSERT(!memcmp(&fpu.fprs, regs_null, sizeof(fpu.fprs)), "fprs == 0");
+
+ /* sync regs */
+ TEST_ASSERT(!memcmp(sync_regs->gprs, regs_null, sizeof(sync_regs->gprs)),
+ "gprs0-15 == 0 (sync_regs)");
+
+ TEST_ASSERT(!memcmp(sync_regs->acrs, regs_null, sizeof(sync_regs->acrs)),
+ "acrs0-15 == 0 (sync_regs)");
+
+ TEST_ASSERT(!memcmp(sync_regs->vrs, regs_null, sizeof(sync_regs->vrs)),
+ "vrs0-15 == 0 (sync_regs)");
+}
+
+static void assert_initial_noclear(void)
+{
+ TEST_ASSERT(sync_regs->gprs[0] == 0xffff000000000000UL,
+ "gpr0 == 0xffff000000000000 (sync_regs)");
+ TEST_ASSERT(sync_regs->gprs[1] == 0x0000555500000000UL,
+ "gpr1 == 0x0000555500000000 (sync_regs)");
+ TEST_ASSERT(sync_regs->gprs[2] == 0x00000000aaaa0000UL,
+ "gpr2 == 0x00000000aaaa0000 (sync_regs)");
+ TEST_ASSERT(sync_regs->gprs[3] == 0x0000000000000000UL,
+ "gpr3 == 0x0000000000000000 (sync_regs)");
+ TEST_ASSERT(sync_regs->fprs[0] == 0x3ff0000000000000UL,
+ "fpr0 == 0f1 (sync_regs)");
+ TEST_ASSERT(sync_regs->acrs[9] == 1, "ar9 == 1 (sync_regs)");
}
static void assert_initial(void)
@@ -94,12 +132,32 @@ static void assert_initial(void)
struct kvm_sregs sregs;
struct kvm_fpu fpu;
+ /* KVM_GET_SREGS */
vcpu_sregs_get(vm, VCPU_ID, &sregs);
- TEST_ASSERT(sregs.crs[0] == 0xE0UL, "cr0 == 0xE0");
- TEST_ASSERT(sregs.crs[14] == 0xC2000000UL, "cr14 == 0xC2000000");
+ TEST_ASSERT(sregs.crs[0] == 0xE0UL, "cr0 == 0xE0 (KVM_GET_SREGS)");
+ TEST_ASSERT(sregs.crs[14] == 0xC2000000UL,
+ "cr14 == 0xC2000000 (KVM_GET_SREGS)");
TEST_ASSERT(!memcmp(&sregs.crs[1], regs_null, sizeof(sregs.crs[1]) * 12),
- "cr1-13 == 0");
- TEST_ASSERT(sregs.crs[15] == 0, "cr15 == 0");
+ "cr1-13 == 0 (KVM_GET_SREGS)");
+ TEST_ASSERT(sregs.crs[15] == 0, "cr15 == 0 (KVM_GET_SREGS)");
+
+ /* sync regs */
+ TEST_ASSERT(sync_regs->crs[0] == 0xE0UL, "cr0 == 0xE0 (sync_regs)");
+ TEST_ASSERT(sync_regs->crs[14] == 0xC2000000UL,
+ "cr14 == 0xC2000000 (sync_regs)");
+ TEST_ASSERT(!memcmp(&sync_regs->crs[1], regs_null, 8 * 12),
+ "cr1-13 == 0 (sync_regs)");
+ TEST_ASSERT(sync_regs->crs[15] == 0, "cr15 == 0 (sync_regs)");
+ TEST_ASSERT(sync_regs->fpc == 0, "fpc == 0 (sync_regs)");
+ TEST_ASSERT(sync_regs->todpr == 0, "todpr == 0 (sync_regs)");
+ TEST_ASSERT(sync_regs->cputm == 0, "cputm == 0 (sync_regs)");
+ TEST_ASSERT(sync_regs->ckc == 0, "ckc == 0 (sync_regs)");
+ TEST_ASSERT(sync_regs->pp == 0, "pp == 0 (sync_regs)");
+ TEST_ASSERT(sync_regs->gbea == 1, "gbea == 1 (sync_regs)");
+
+ /* kvm_run */
+ TEST_ASSERT(run->psw_addr == 0, "psw_addr == 0 (kvm_run)");
+ TEST_ASSERT(run->psw_mask == 0, "psw_mask == 0 (kvm_run)");
vcpu_fpu_get(vm, VCPU_ID, &fpu);
TEST_ASSERT(!fpu.fpc, "fpc == 0");
@@ -111,9 +169,19 @@ static void assert_initial(void)
test_one_reg(KVM_REG_S390_CLOCK_COMP, 0);
}
+static void assert_normal_noclear(void)
+{
+ TEST_ASSERT(sync_regs->crs[2] == 0x10, "cr2 == 10 (sync_regs)");
+ TEST_ASSERT(sync_regs->crs[8] == 1, "cr10 == 1 (sync_regs)");
+ TEST_ASSERT(sync_regs->crs[10] == 1, "cr10 == 1 (sync_regs)");
+ TEST_ASSERT(sync_regs->crs[11] == -1, "cr11 == -1 (sync_regs)");
+}
+
static void assert_normal(void)
{
test_one_reg(KVM_REG_S390_PFTOKEN, KVM_S390_PFAULT_TOKEN_INVALID);
+ TEST_ASSERT(sync_regs->pft == KVM_S390_PFAULT_TOKEN_INVALID,
+ "pft == 0xff..... (sync_regs)");
assert_noirq();
}
@@ -134,53 +202,67 @@ static void inject_irq(int cpu_id)
static void test_normal(void)
{
- printf("Testing normal reset\n");
+ pr_info("Testing normal reset\n");
/* Create VM */
vm = vm_create_default(VCPU_ID, 0, guest_code_initial);
run = vcpu_state(vm, VCPU_ID);
- regs = &run->s.regs;
+ sync_regs = &run->s.regs;
vcpu_run(vm, VCPU_ID);
inject_irq(VCPU_ID);
vcpu_ioctl(vm, VCPU_ID, KVM_S390_NORMAL_RESET, 0);
+
+ /* must clears */
assert_normal();
+ /* must not clears */
+ assert_normal_noclear();
+ assert_initial_noclear();
+
kvm_vm_free(vm);
}
static void test_initial(void)
{
- printf("Testing initial reset\n");
+ pr_info("Testing initial reset\n");
vm = vm_create_default(VCPU_ID, 0, guest_code_initial);
run = vcpu_state(vm, VCPU_ID);
- regs = &run->s.regs;
+ sync_regs = &run->s.regs;
vcpu_run(vm, VCPU_ID);
inject_irq(VCPU_ID);
vcpu_ioctl(vm, VCPU_ID, KVM_S390_INITIAL_RESET, 0);
+
+ /* must clears */
assert_normal();
assert_initial();
+ /* must not clears */
+ assert_initial_noclear();
+
kvm_vm_free(vm);
}
static void test_clear(void)
{
- printf("Testing clear reset\n");
+ pr_info("Testing clear reset\n");
vm = vm_create_default(VCPU_ID, 0, guest_code_initial);
run = vcpu_state(vm, VCPU_ID);
- regs = &run->s.regs;
+ sync_regs = &run->s.regs;
vcpu_run(vm, VCPU_ID);
inject_irq(VCPU_ID);
vcpu_ioctl(vm, VCPU_ID, KVM_S390_CLEAR_RESET, 0);
+
+ /* must clears */
assert_normal();
assert_initial();
assert_clear();
+
kvm_vm_free(vm);
}
diff --git a/tools/testing/selftests/kvm/s390x/sync_regs_test.c b/tools/testing/selftests/kvm/s390x/sync_regs_test.c
index b705637ca14b..5731ccf34917 100644
--- a/tools/testing/selftests/kvm/s390x/sync_regs_test.c
+++ b/tools/testing/selftests/kvm/s390x/sync_regs_test.c
@@ -42,6 +42,13 @@ static void guest_code(void)
" values did not match: 0x%llx, 0x%llx\n", \
left->reg, right->reg)
+#define REG_COMPARE32(reg) \
+ TEST_ASSERT(left->reg == right->reg, \
+ "Register " #reg \
+ " values did not match: 0x%x, 0x%x\n", \
+ left->reg, right->reg)
+
+
static void compare_regs(struct kvm_regs *left, struct kvm_sync_regs *right)
{
int i;
@@ -55,7 +62,7 @@ static void compare_sregs(struct kvm_sregs *left, struct kvm_sync_regs *right)
int i;
for (i = 0; i < 16; i++)
- REG_COMPARE(acrs[i]);
+ REG_COMPARE32(acrs[i]);
for (i = 0; i < 16; i++)
REG_COMPARE(crs[i]);
@@ -79,7 +86,7 @@ int main(int argc, char *argv[])
cap = kvm_check_cap(KVM_CAP_SYNC_REGS);
if (!cap) {
- fprintf(stderr, "CAP_SYNC_REGS not supported, skipping test\n");
+ print_skip("CAP_SYNC_REGS not supported");
exit(KSFT_SKIP);
}
@@ -155,7 +162,7 @@ int main(int argc, char *argv[])
"r11 sync regs value incorrect 0x%llx.",
run->s.regs.gprs[11]);
TEST_ASSERT(run->s.regs.acrs[0] == 1 << 11,
- "acr0 sync regs value incorrect 0x%llx.",
+ "acr0 sync regs value incorrect 0x%x.",
run->s.regs.acrs[0]);
vcpu_regs_get(vm, VCPU_ID, &regs);
diff --git a/tools/testing/selftests/kvm/set_memory_region_test.c b/tools/testing/selftests/kvm/set_memory_region_test.c
new file mode 100644
index 000000000000..b3ece55a2da6
--- /dev/null
+++ b/tools/testing/selftests/kvm/set_memory_region_test.c
@@ -0,0 +1,408 @@
+// SPDX-License-Identifier: GPL-2.0
+#define _GNU_SOURCE /* for program_invocation_short_name */
+#include <fcntl.h>
+#include <pthread.h>
+#include <sched.h>
+#include <semaphore.h>
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+#include <sys/mman.h>
+
+#include <linux/compiler.h>
+
+#include <test_util.h>
+#include <kvm_util.h>
+#include <processor.h>
+
+#define VCPU_ID 0
+
+/*
+ * s390x needs at least 1MB alignment, and the x86_64 MOVE/DELETE tests need a
+ * 2MB sized and aligned region so that the initial region corresponds to
+ * exactly one large page.
+ */
+#define MEM_REGION_SIZE 0x200000
+
+#ifdef __x86_64__
+/*
+ * Somewhat arbitrary location and slot, intended to not overlap anything.
+ */
+#define MEM_REGION_GPA 0xc0000000
+#define MEM_REGION_SLOT 10
+
+static const uint64_t MMIO_VAL = 0xbeefull;
+
+extern const uint64_t final_rip_start;
+extern const uint64_t final_rip_end;
+
+static sem_t vcpu_ready;
+
+static inline uint64_t guest_spin_on_val(uint64_t spin_val)
+{
+ uint64_t val;
+
+ do {
+ val = READ_ONCE(*((uint64_t *)MEM_REGION_GPA));
+ } while (val == spin_val);
+
+ GUEST_SYNC(0);
+ return val;
+}
+
+static void *vcpu_worker(void *data)
+{
+ struct kvm_vm *vm = data;
+ struct kvm_run *run;
+ struct ucall uc;
+ uint64_t cmd;
+
+ /*
+ * Loop until the guest is done. Re-enter the guest on all MMIO exits,
+ * which will occur if the guest attempts to access a memslot after it
+ * has been deleted or while it is being moved .
+ */
+ run = vcpu_state(vm, VCPU_ID);
+
+ while (1) {
+ vcpu_run(vm, VCPU_ID);
+
+ if (run->exit_reason == KVM_EXIT_IO) {
+ cmd = get_ucall(vm, VCPU_ID, &uc);
+ if (cmd != UCALL_SYNC)
+ break;
+
+ sem_post(&vcpu_ready);
+ continue;
+ }
+
+ if (run->exit_reason != KVM_EXIT_MMIO)
+ break;
+
+ TEST_ASSERT(!run->mmio.is_write, "Unexpected exit mmio write");
+ TEST_ASSERT(run->mmio.len == 8,
+ "Unexpected exit mmio size = %u", run->mmio.len);
+
+ TEST_ASSERT(run->mmio.phys_addr == MEM_REGION_GPA,
+ "Unexpected exit mmio address = 0x%llx",
+ run->mmio.phys_addr);
+ memcpy(run->mmio.data, &MMIO_VAL, 8);
+ }
+
+ if (run->exit_reason == KVM_EXIT_IO && cmd == UCALL_ABORT)
+ TEST_FAIL("%s at %s:%ld, val = %lu", (const char *)uc.args[0],
+ __FILE__, uc.args[1], uc.args[2]);
+
+ return NULL;
+}
+
+static void wait_for_vcpu(void)
+{
+ struct timespec ts;
+
+ TEST_ASSERT(!clock_gettime(CLOCK_REALTIME, &ts),
+ "clock_gettime() failed: %d\n", errno);
+
+ ts.tv_sec += 2;
+ TEST_ASSERT(!sem_timedwait(&vcpu_ready, &ts),
+ "sem_timedwait() failed: %d\n", errno);
+
+ /* Wait for the vCPU thread to reenter the guest. */
+ usleep(100000);
+}
+
+static struct kvm_vm *spawn_vm(pthread_t *vcpu_thread, void *guest_code)
+{
+ struct kvm_vm *vm;
+ uint64_t *hva;
+ uint64_t gpa;
+
+ vm = vm_create_default(VCPU_ID, 0, guest_code);
+
+ vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid());
+
+ vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS_THP,
+ MEM_REGION_GPA, MEM_REGION_SLOT,
+ MEM_REGION_SIZE / getpagesize(), 0);
+
+ /*
+ * Allocate and map two pages so that the GPA accessed by guest_code()
+ * stays valid across the memslot move.
+ */
+ gpa = vm_phy_pages_alloc(vm, 2, MEM_REGION_GPA, MEM_REGION_SLOT);
+ TEST_ASSERT(gpa == MEM_REGION_GPA, "Failed vm_phy_pages_alloc\n");
+
+ virt_map(vm, MEM_REGION_GPA, MEM_REGION_GPA, 2, 0);
+
+ /* Ditto for the host mapping so that both pages can be zeroed. */
+ hva = addr_gpa2hva(vm, MEM_REGION_GPA);
+ memset(hva, 0, 2 * 4096);
+
+ pthread_create(vcpu_thread, NULL, vcpu_worker, vm);
+
+ /* Ensure the guest thread is spun up. */
+ wait_for_vcpu();
+
+ return vm;
+}
+
+
+static void guest_code_move_memory_region(void)
+{
+ uint64_t val;
+
+ GUEST_SYNC(0);
+
+ /*
+ * Spin until the memory region is moved to a misaligned address. This
+ * may or may not trigger MMIO, as the window where the memslot is
+ * invalid is quite small.
+ */
+ val = guest_spin_on_val(0);
+ GUEST_ASSERT_1(val == 1 || val == MMIO_VAL, val);
+
+ /* Spin until the memory region is realigned. */
+ val = guest_spin_on_val(MMIO_VAL);
+ GUEST_ASSERT_1(val == 1, val);
+
+ GUEST_DONE();
+}
+
+static void test_move_memory_region(void)
+{
+ pthread_t vcpu_thread;
+ struct kvm_vm *vm;
+ uint64_t *hva;
+
+ vm = spawn_vm(&vcpu_thread, guest_code_move_memory_region);
+
+ hva = addr_gpa2hva(vm, MEM_REGION_GPA);
+
+ /*
+ * Shift the region's base GPA. The guest should not see "2" as the
+ * hva->gpa translation is misaligned, i.e. the guest is accessing a
+ * different host pfn.
+ */
+ vm_mem_region_move(vm, MEM_REGION_SLOT, MEM_REGION_GPA - 4096);
+ WRITE_ONCE(*hva, 2);
+
+ /*
+ * The guest _might_ see an invalid memslot and trigger MMIO, but it's
+ * a tiny window. Spin and defer the sync until the memslot is
+ * restored and guest behavior is once again deterministic.
+ */
+ usleep(100000);
+
+ /*
+ * Note, value in memory needs to be changed *before* restoring the
+ * memslot, else the guest could race the update and see "2".
+ */
+ WRITE_ONCE(*hva, 1);
+
+ /* Restore the original base, the guest should see "1". */
+ vm_mem_region_move(vm, MEM_REGION_SLOT, MEM_REGION_GPA);
+ wait_for_vcpu();
+ /* Defered sync from when the memslot was misaligned (above). */
+ wait_for_vcpu();
+
+ pthread_join(vcpu_thread, NULL);
+
+ kvm_vm_free(vm);
+}
+
+static void guest_code_delete_memory_region(void)
+{
+ uint64_t val;
+
+ GUEST_SYNC(0);
+
+ /* Spin until the memory region is deleted. */
+ val = guest_spin_on_val(0);
+ GUEST_ASSERT_1(val == MMIO_VAL, val);
+
+ /* Spin until the memory region is recreated. */
+ val = guest_spin_on_val(MMIO_VAL);
+ GUEST_ASSERT_1(val == 0, val);
+
+ /* Spin until the memory region is deleted. */
+ val = guest_spin_on_val(0);
+ GUEST_ASSERT_1(val == MMIO_VAL, val);
+
+ asm("1:\n\t"
+ ".pushsection .rodata\n\t"
+ ".global final_rip_start\n\t"
+ "final_rip_start: .quad 1b\n\t"
+ ".popsection");
+
+ /* Spin indefinitely (until the code memslot is deleted). */
+ guest_spin_on_val(MMIO_VAL);
+
+ asm("1:\n\t"
+ ".pushsection .rodata\n\t"
+ ".global final_rip_end\n\t"
+ "final_rip_end: .quad 1b\n\t"
+ ".popsection");
+
+ GUEST_ASSERT_1(0, 0);
+}
+
+static void test_delete_memory_region(void)
+{
+ pthread_t vcpu_thread;
+ struct kvm_regs regs;
+ struct kvm_run *run;
+ struct kvm_vm *vm;
+
+ vm = spawn_vm(&vcpu_thread, guest_code_delete_memory_region);
+
+ /* Delete the memory region, the guest should not die. */
+ vm_mem_region_delete(vm, MEM_REGION_SLOT);
+ wait_for_vcpu();
+
+ /* Recreate the memory region. The guest should see "0". */
+ vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS_THP,
+ MEM_REGION_GPA, MEM_REGION_SLOT,
+ MEM_REGION_SIZE / getpagesize(), 0);
+ wait_for_vcpu();
+
+ /* Delete the region again so that there's only one memslot left. */
+ vm_mem_region_delete(vm, MEM_REGION_SLOT);
+ wait_for_vcpu();
+
+ /*
+ * Delete the primary memslot. This should cause an emulation error or
+ * shutdown due to the page tables getting nuked.
+ */
+ vm_mem_region_delete(vm, 0);
+
+ pthread_join(vcpu_thread, NULL);
+
+ run = vcpu_state(vm, VCPU_ID);
+
+ TEST_ASSERT(run->exit_reason == KVM_EXIT_SHUTDOWN ||
+ run->exit_reason == KVM_EXIT_INTERNAL_ERROR,
+ "Unexpected exit reason = %d", run->exit_reason);
+
+ vcpu_regs_get(vm, VCPU_ID, &regs);
+
+ /*
+ * On AMD, after KVM_EXIT_SHUTDOWN the VMCB has been reinitialized already,
+ * so the instruction pointer would point to the reset vector.
+ */
+ if (run->exit_reason == KVM_EXIT_INTERNAL_ERROR)
+ TEST_ASSERT(regs.rip >= final_rip_start &&
+ regs.rip < final_rip_end,
+ "Bad rip, expected 0x%lx - 0x%lx, got 0x%llx\n",
+ final_rip_start, final_rip_end, regs.rip);
+
+ kvm_vm_free(vm);
+}
+
+static void test_zero_memory_regions(void)
+{
+ struct kvm_run *run;
+ struct kvm_vm *vm;
+
+ pr_info("Testing KVM_RUN with zero added memory regions\n");
+
+ vm = vm_create(VM_MODE_DEFAULT, 0, O_RDWR);
+ vm_vcpu_add(vm, VCPU_ID);
+
+ TEST_ASSERT(!ioctl(vm_get_fd(vm), KVM_SET_NR_MMU_PAGES, 64),
+ "KVM_SET_NR_MMU_PAGES failed, errno = %d\n", errno);
+ vcpu_run(vm, VCPU_ID);
+
+ run = vcpu_state(vm, VCPU_ID);
+ TEST_ASSERT(run->exit_reason == KVM_EXIT_INTERNAL_ERROR,
+ "Unexpected exit_reason = %u\n", run->exit_reason);
+
+ kvm_vm_free(vm);
+}
+#endif /* __x86_64__ */
+
+/*
+ * Test it can be added memory slots up to KVM_CAP_NR_MEMSLOTS, then any
+ * tentative to add further slots should fail.
+ */
+static void test_add_max_memory_regions(void)
+{
+ int ret;
+ struct kvm_vm *vm;
+ uint32_t max_mem_slots;
+ uint32_t slot;
+ uint64_t guest_addr = 0x0;
+ uint64_t mem_reg_npages;
+ void *mem;
+
+ max_mem_slots = kvm_check_cap(KVM_CAP_NR_MEMSLOTS);
+ TEST_ASSERT(max_mem_slots > 0,
+ "KVM_CAP_NR_MEMSLOTS should be greater than 0");
+ pr_info("Allowed number of memory slots: %i\n", max_mem_slots);
+
+ vm = vm_create(VM_MODE_DEFAULT, 0, O_RDWR);
+
+ mem_reg_npages = vm_calc_num_guest_pages(VM_MODE_DEFAULT, MEM_REGION_SIZE);
+
+ /* Check it can be added memory slots up to the maximum allowed */
+ pr_info("Adding slots 0..%i, each memory region with %dK size\n",
+ (max_mem_slots - 1), MEM_REGION_SIZE >> 10);
+ for (slot = 0; slot < max_mem_slots; slot++) {
+ vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS,
+ guest_addr, slot, mem_reg_npages,
+ 0);
+ guest_addr += MEM_REGION_SIZE;
+ }
+
+ /* Check it cannot be added memory slots beyond the limit */
+ mem = mmap(NULL, MEM_REGION_SIZE, PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+ TEST_ASSERT(mem != MAP_FAILED, "Failed to mmap() host");
+
+ ret = ioctl(vm_get_fd(vm), KVM_SET_USER_MEMORY_REGION,
+ &(struct kvm_userspace_memory_region) {slot, 0, guest_addr,
+ MEM_REGION_SIZE, (uint64_t) mem});
+ TEST_ASSERT(ret == -1 && errno == EINVAL,
+ "Adding one more memory slot should fail with EINVAL");
+
+ munmap(mem, MEM_REGION_SIZE);
+ kvm_vm_free(vm);
+}
+
+int main(int argc, char *argv[])
+{
+#ifdef __x86_64__
+ int i, loops;
+#endif
+
+ /* Tell stdout not to buffer its content */
+ setbuf(stdout, NULL);
+
+#ifdef __x86_64__
+ /*
+ * FIXME: the zero-memslot test fails on aarch64 and s390x because
+ * KVM_RUN fails with ENOEXEC or EFAULT.
+ */
+ test_zero_memory_regions();
+#endif
+
+ test_add_max_memory_regions();
+
+#ifdef __x86_64__
+ if (argc > 1)
+ loops = atoi(argv[1]);
+ else
+ loops = 10;
+
+ pr_info("Testing MOVE of in-use region, %d loops\n", loops);
+ for (i = 0; i < loops; i++)
+ test_move_memory_region();
+
+ pr_info("Testing DELETE of in-use region, %d loops\n", loops);
+ for (i = 0; i < loops; i++)
+ test_delete_memory_region();
+#endif
+
+ return 0;
+}
diff --git a/tools/testing/selftests/kvm/steal_time.c b/tools/testing/selftests/kvm/steal_time.c
new file mode 100644
index 000000000000..fcc840088c91
--- /dev/null
+++ b/tools/testing/selftests/kvm/steal_time.c
@@ -0,0 +1,352 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * steal/stolen time test
+ *
+ * Copyright (C) 2020, Red Hat, Inc.
+ */
+#define _GNU_SOURCE
+#include <stdio.h>
+#include <time.h>
+#include <sched.h>
+#include <pthread.h>
+#include <linux/kernel.h>
+#include <sys/syscall.h>
+#include <asm/kvm.h>
+#include <asm/kvm_para.h>
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "processor.h"
+
+#define NR_VCPUS 4
+#define ST_GPA_BASE (1 << 30)
+#define MIN_RUN_DELAY_NS 200000UL
+
+static void *st_gva[NR_VCPUS];
+static uint64_t guest_stolen_time[NR_VCPUS];
+
+#if defined(__x86_64__)
+
+/* steal_time must have 64-byte alignment */
+#define STEAL_TIME_SIZE ((sizeof(struct kvm_steal_time) + 63) & ~63)
+
+static void check_status(struct kvm_steal_time *st)
+{
+ GUEST_ASSERT(!(READ_ONCE(st->version) & 1));
+ GUEST_ASSERT(READ_ONCE(st->flags) == 0);
+ GUEST_ASSERT(READ_ONCE(st->preempted) == 0);
+}
+
+static void guest_code(int cpu)
+{
+ struct kvm_steal_time *st = st_gva[cpu];
+ uint32_t version;
+
+ GUEST_ASSERT(rdmsr(MSR_KVM_STEAL_TIME) == ((uint64_t)st_gva[cpu] | KVM_MSR_ENABLED));
+
+ memset(st, 0, sizeof(*st));
+ GUEST_SYNC(0);
+
+ check_status(st);
+ WRITE_ONCE(guest_stolen_time[cpu], st->steal);
+ version = READ_ONCE(st->version);
+ check_status(st);
+ GUEST_SYNC(1);
+
+ check_status(st);
+ GUEST_ASSERT(version < READ_ONCE(st->version));
+ WRITE_ONCE(guest_stolen_time[cpu], st->steal);
+ check_status(st);
+ GUEST_DONE();
+}
+
+static void steal_time_init(struct kvm_vm *vm)
+{
+ int i;
+
+ if (!(kvm_get_supported_cpuid_entry(KVM_CPUID_FEATURES)->eax &
+ KVM_FEATURE_STEAL_TIME)) {
+ print_skip("steal-time not supported");
+ exit(KSFT_SKIP);
+ }
+
+ for (i = 0; i < NR_VCPUS; ++i) {
+ int ret;
+
+ vcpu_set_cpuid(vm, i, kvm_get_supported_cpuid());
+
+ /* ST_GPA_BASE is identity mapped */
+ st_gva[i] = (void *)(ST_GPA_BASE + i * STEAL_TIME_SIZE);
+ sync_global_to_guest(vm, st_gva[i]);
+
+ ret = _vcpu_set_msr(vm, i, MSR_KVM_STEAL_TIME, (ulong)st_gva[i] | KVM_STEAL_RESERVED_MASK);
+ TEST_ASSERT(ret == 0, "Bad GPA didn't fail");
+
+ vcpu_set_msr(vm, i, MSR_KVM_STEAL_TIME, (ulong)st_gva[i] | KVM_MSR_ENABLED);
+ }
+}
+
+static void steal_time_dump(struct kvm_vm *vm, uint32_t vcpuid)
+{
+ struct kvm_steal_time *st = addr_gva2hva(vm, (ulong)st_gva[vcpuid]);
+ int i;
+
+ pr_info("VCPU%d:\n", vcpuid);
+ pr_info(" steal: %lld\n", st->steal);
+ pr_info(" version: %d\n", st->version);
+ pr_info(" flags: %d\n", st->flags);
+ pr_info(" preempted: %d\n", st->preempted);
+ pr_info(" u8_pad: ");
+ for (i = 0; i < 3; ++i)
+ pr_info("%d", st->u8_pad[i]);
+ pr_info("\n pad: ");
+ for (i = 0; i < 11; ++i)
+ pr_info("%d", st->pad[i]);
+ pr_info("\n");
+}
+
+#elif defined(__aarch64__)
+
+/* PV_TIME_ST must have 64-byte alignment */
+#define STEAL_TIME_SIZE ((sizeof(struct st_time) + 63) & ~63)
+
+#define SMCCC_ARCH_FEATURES 0x80000001
+#define PV_TIME_FEATURES 0xc5000020
+#define PV_TIME_ST 0xc5000021
+
+struct st_time {
+ uint32_t rev;
+ uint32_t attr;
+ uint64_t st_time;
+};
+
+static int64_t smccc(uint32_t func, uint32_t arg)
+{
+ unsigned long ret;
+
+ asm volatile(
+ "mov x0, %1\n"
+ "mov x1, %2\n"
+ "hvc #0\n"
+ "mov %0, x0\n"
+ : "=r" (ret) : "r" (func), "r" (arg) :
+ "x0", "x1", "x2", "x3");
+
+ return ret;
+}
+
+static void check_status(struct st_time *st)
+{
+ GUEST_ASSERT(READ_ONCE(st->rev) == 0);
+ GUEST_ASSERT(READ_ONCE(st->attr) == 0);
+}
+
+static void guest_code(int cpu)
+{
+ struct st_time *st;
+ int64_t status;
+
+ status = smccc(SMCCC_ARCH_FEATURES, PV_TIME_FEATURES);
+ GUEST_ASSERT(status == 0);
+ status = smccc(PV_TIME_FEATURES, PV_TIME_FEATURES);
+ GUEST_ASSERT(status == 0);
+ status = smccc(PV_TIME_FEATURES, PV_TIME_ST);
+ GUEST_ASSERT(status == 0);
+
+ status = smccc(PV_TIME_ST, 0);
+ GUEST_ASSERT(status != -1);
+ GUEST_ASSERT(status == (ulong)st_gva[cpu]);
+
+ st = (struct st_time *)status;
+ GUEST_SYNC(0);
+
+ check_status(st);
+ WRITE_ONCE(guest_stolen_time[cpu], st->st_time);
+ GUEST_SYNC(1);
+
+ check_status(st);
+ WRITE_ONCE(guest_stolen_time[cpu], st->st_time);
+ GUEST_DONE();
+}
+
+static void steal_time_init(struct kvm_vm *vm)
+{
+ struct kvm_device_attr dev = {
+ .group = KVM_ARM_VCPU_PVTIME_CTRL,
+ .attr = KVM_ARM_VCPU_PVTIME_IPA,
+ };
+ int i, ret;
+
+ ret = _vcpu_ioctl(vm, 0, KVM_HAS_DEVICE_ATTR, &dev);
+ if (ret != 0 && errno == ENXIO) {
+ print_skip("steal-time not supported");
+ exit(KSFT_SKIP);
+ }
+
+ for (i = 0; i < NR_VCPUS; ++i) {
+ uint64_t st_ipa;
+
+ vcpu_ioctl(vm, i, KVM_HAS_DEVICE_ATTR, &dev);
+
+ dev.addr = (uint64_t)&st_ipa;
+
+ /* ST_GPA_BASE is identity mapped */
+ st_gva[i] = (void *)(ST_GPA_BASE + i * STEAL_TIME_SIZE);
+ sync_global_to_guest(vm, st_gva[i]);
+
+ st_ipa = (ulong)st_gva[i] | 1;
+ ret = _vcpu_ioctl(vm, i, KVM_SET_DEVICE_ATTR, &dev);
+ TEST_ASSERT(ret == -1 && errno == EINVAL, "Bad IPA didn't report EINVAL");
+
+ st_ipa = (ulong)st_gva[i];
+ vcpu_ioctl(vm, i, KVM_SET_DEVICE_ATTR, &dev);
+
+ ret = _vcpu_ioctl(vm, i, KVM_SET_DEVICE_ATTR, &dev);
+ TEST_ASSERT(ret == -1 && errno == EEXIST, "Set IPA twice without EEXIST");
+
+ }
+}
+
+static void steal_time_dump(struct kvm_vm *vm, uint32_t vcpuid)
+{
+ struct st_time *st = addr_gva2hva(vm, (ulong)st_gva[vcpuid]);
+
+ pr_info("VCPU%d:\n", vcpuid);
+ pr_info(" rev: %d\n", st->rev);
+ pr_info(" attr: %d\n", st->attr);
+ pr_info(" st_time: %ld\n", st->st_time);
+}
+
+#endif
+
+static long get_run_delay(void)
+{
+ char path[64];
+ long val[2];
+ FILE *fp;
+
+ sprintf(path, "/proc/%ld/schedstat", syscall(SYS_gettid));
+ fp = fopen(path, "r");
+ fscanf(fp, "%ld %ld ", &val[0], &val[1]);
+ fclose(fp);
+
+ return val[1];
+}
+
+static void *do_steal_time(void *arg)
+{
+ struct timespec ts, stop;
+
+ clock_gettime(CLOCK_MONOTONIC, &ts);
+ stop = timespec_add_ns(ts, MIN_RUN_DELAY_NS);
+
+ while (1) {
+ clock_gettime(CLOCK_MONOTONIC, &ts);
+ if (timespec_to_ns(timespec_sub(ts, stop)) >= 0)
+ break;
+ }
+
+ return NULL;
+}
+
+static void run_vcpu(struct kvm_vm *vm, uint32_t vcpuid)
+{
+ struct ucall uc;
+
+ vcpu_args_set(vm, vcpuid, 1, vcpuid);
+
+ vcpu_ioctl(vm, vcpuid, KVM_RUN, NULL);
+
+ switch (get_ucall(vm, vcpuid, &uc)) {
+ case UCALL_SYNC:
+ case UCALL_DONE:
+ break;
+ case UCALL_ABORT:
+ TEST_ASSERT(false, "%s at %s:%ld", (const char *)uc.args[0],
+ __FILE__, uc.args[1]);
+ default:
+ TEST_ASSERT(false, "Unexpected exit: %s",
+ exit_reason_str(vcpu_state(vm, vcpuid)->exit_reason));
+ }
+}
+
+int main(int ac, char **av)
+{
+ struct kvm_vm *vm;
+ pthread_attr_t attr;
+ pthread_t thread;
+ cpu_set_t cpuset;
+ unsigned int gpages;
+ long stolen_time;
+ long run_delay;
+ bool verbose;
+ int i;
+
+ verbose = ac > 1 && (!strncmp(av[1], "-v", 3) || !strncmp(av[1], "--verbose", 10));
+
+ /* Set CPU affinity so we can force preemption of the VCPU */
+ CPU_ZERO(&cpuset);
+ CPU_SET(0, &cpuset);
+ pthread_attr_init(&attr);
+ pthread_attr_setaffinity_np(&attr, sizeof(cpu_set_t), &cpuset);
+ pthread_setaffinity_np(pthread_self(), sizeof(cpu_set_t), &cpuset);
+
+ /* Create a one VCPU guest and an identity mapped memslot for the steal time structure */
+ vm = vm_create_default(0, 0, guest_code);
+ gpages = vm_calc_num_guest_pages(VM_MODE_DEFAULT, STEAL_TIME_SIZE * NR_VCPUS);
+ vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, ST_GPA_BASE, 1, gpages, 0);
+ virt_map(vm, ST_GPA_BASE, ST_GPA_BASE, gpages, 0);
+ ucall_init(vm, NULL);
+
+ /* Add the rest of the VCPUs */
+ for (i = 1; i < NR_VCPUS; ++i)
+ vm_vcpu_add_default(vm, i, guest_code);
+
+ steal_time_init(vm);
+
+ /* Run test on each VCPU */
+ for (i = 0; i < NR_VCPUS; ++i) {
+ /* First VCPU run initializes steal-time */
+ run_vcpu(vm, i);
+
+ /* Second VCPU run, expect guest stolen time to be <= run_delay */
+ run_vcpu(vm, i);
+ sync_global_from_guest(vm, guest_stolen_time[i]);
+ stolen_time = guest_stolen_time[i];
+ run_delay = get_run_delay();
+ TEST_ASSERT(stolen_time <= run_delay,
+ "Expected stolen time <= %ld, got %ld",
+ run_delay, stolen_time);
+
+ /* Steal time from the VCPU. The steal time thread has the same CPU affinity as the VCPUs. */
+ run_delay = get_run_delay();
+ pthread_create(&thread, &attr, do_steal_time, NULL);
+ do
+ pthread_yield();
+ while (get_run_delay() - run_delay < MIN_RUN_DELAY_NS);
+ pthread_join(thread, NULL);
+ run_delay = get_run_delay() - run_delay;
+ TEST_ASSERT(run_delay >= MIN_RUN_DELAY_NS,
+ "Expected run_delay >= %ld, got %ld",
+ MIN_RUN_DELAY_NS, run_delay);
+
+ /* Run VCPU again to confirm stolen time is consistent with run_delay */
+ run_vcpu(vm, i);
+ sync_global_from_guest(vm, guest_stolen_time[i]);
+ stolen_time = guest_stolen_time[i] - stolen_time;
+ TEST_ASSERT(stolen_time >= run_delay,
+ "Expected stolen time >= %ld, got %ld",
+ run_delay, stolen_time);
+
+ if (verbose) {
+ pr_info("VCPU%d: total-stolen-time=%ld test-stolen-time=%ld", i,
+ guest_stolen_time[i], stolen_time);
+ if (stolen_time == run_delay)
+ pr_info(" (BONUS: guest test-stolen-time even exactly matches test-run_delay)");
+ pr_info("\n");
+ steal_time_dump(vm, i);
+ }
+ }
+
+ return 0;
+}
diff --git a/tools/testing/selftests/kvm/x86_64/cr4_cpuid_sync_test.c b/tools/testing/selftests/kvm/x86_64/cr4_cpuid_sync_test.c
index 63cc9c3f5ab6..140e91901582 100644
--- a/tools/testing/selftests/kvm/x86_64/cr4_cpuid_sync_test.c
+++ b/tools/testing/selftests/kvm/x86_64/cr4_cpuid_sync_test.c
@@ -72,7 +72,7 @@ int main(int argc, char *argv[])
entry = kvm_get_supported_cpuid_entry(1);
if (!(entry->ecx & X86_FEATURE_XSAVE)) {
- printf("XSAVE feature not supported, skipping test\n");
+ print_skip("XSAVE feature not supported");
return 0;
}
@@ -101,12 +101,12 @@ int main(int argc, char *argv[])
vcpu_sregs_set(vm, VCPU_ID, &sregs);
break;
case UCALL_ABORT:
- TEST_ASSERT(false, "Guest CR4 bit (OSXSAVE) unsynchronized with CPUID bit.");
+ TEST_FAIL("Guest CR4 bit (OSXSAVE) unsynchronized with CPUID bit.");
break;
case UCALL_DONE:
goto done;
default:
- TEST_ASSERT(false, "Unknown ucall 0x%x.", uc.cmd);
+ TEST_FAIL("Unknown ucall %lu", uc.cmd);
}
}
diff --git a/tools/testing/selftests/kvm/x86_64/debug_regs.c b/tools/testing/selftests/kvm/x86_64/debug_regs.c
new file mode 100644
index 000000000000..8162c58a1234
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86_64/debug_regs.c
@@ -0,0 +1,202 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * KVM guest debug register tests
+ *
+ * Copyright (C) 2020, Red Hat, Inc.
+ */
+#include <stdio.h>
+#include <string.h>
+#include "kvm_util.h"
+#include "processor.h"
+
+#define VCPU_ID 0
+
+#define DR6_BD (1 << 13)
+#define DR7_GD (1 << 13)
+
+/* For testing data access debug BP */
+uint32_t guest_value;
+
+extern unsigned char sw_bp, hw_bp, write_data, ss_start, bd_start;
+
+static void guest_code(void)
+{
+ /*
+ * Software BP tests.
+ *
+ * NOTE: sw_bp need to be before the cmd here, because int3 is an
+ * exception rather than a normal trap for KVM_SET_GUEST_DEBUG (we
+ * capture it using the vcpu exception bitmap).
+ */
+ asm volatile("sw_bp: int3");
+
+ /* Hardware instruction BP test */
+ asm volatile("hw_bp: nop");
+
+ /* Hardware data BP test */
+ asm volatile("mov $1234,%%rax;\n\t"
+ "mov %%rax,%0;\n\t write_data:"
+ : "=m" (guest_value) : : "rax");
+
+ /* Single step test, covers 2 basic instructions and 2 emulated */
+ asm volatile("ss_start: "
+ "xor %%rax,%%rax\n\t"
+ "cpuid\n\t"
+ "movl $0x1a0,%%ecx\n\t"
+ "rdmsr\n\t"
+ : : : "rax", "ecx");
+
+ /* DR6.BD test */
+ asm volatile("bd_start: mov %%dr0, %%rax" : : : "rax");
+ GUEST_DONE();
+}
+
+#define CLEAR_DEBUG() memset(&debug, 0, sizeof(debug))
+#define APPLY_DEBUG() vcpu_set_guest_debug(vm, VCPU_ID, &debug)
+#define CAST_TO_RIP(v) ((unsigned long long)&(v))
+#define SET_RIP(v) do { \
+ vcpu_regs_get(vm, VCPU_ID, &regs); \
+ regs.rip = (v); \
+ vcpu_regs_set(vm, VCPU_ID, &regs); \
+ } while (0)
+#define MOVE_RIP(v) SET_RIP(regs.rip + (v));
+
+int main(void)
+{
+ struct kvm_guest_debug debug;
+ unsigned long long target_dr6, target_rip;
+ struct kvm_regs regs;
+ struct kvm_run *run;
+ struct kvm_vm *vm;
+ struct ucall uc;
+ uint64_t cmd;
+ int i;
+ /* Instruction lengths starting at ss_start */
+ int ss_size[4] = {
+ 3, /* xor */
+ 2, /* cpuid */
+ 5, /* mov */
+ 2, /* rdmsr */
+ };
+
+ if (!kvm_check_cap(KVM_CAP_SET_GUEST_DEBUG)) {
+ print_skip("KVM_CAP_SET_GUEST_DEBUG not supported");
+ return 0;
+ }
+
+ vm = vm_create_default(VCPU_ID, 0, guest_code);
+ vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid());
+ run = vcpu_state(vm, VCPU_ID);
+
+ /* Test software BPs - int3 */
+ CLEAR_DEBUG();
+ debug.control = KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP;
+ APPLY_DEBUG();
+ vcpu_run(vm, VCPU_ID);
+ TEST_ASSERT(run->exit_reason == KVM_EXIT_DEBUG &&
+ run->debug.arch.exception == BP_VECTOR &&
+ run->debug.arch.pc == CAST_TO_RIP(sw_bp),
+ "INT3: exit %d exception %d rip 0x%llx (should be 0x%llx)",
+ run->exit_reason, run->debug.arch.exception,
+ run->debug.arch.pc, CAST_TO_RIP(sw_bp));
+ MOVE_RIP(1);
+
+ /* Test instruction HW BP over DR[0-3] */
+ for (i = 0; i < 4; i++) {
+ CLEAR_DEBUG();
+ debug.control = KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_HW_BP;
+ debug.arch.debugreg[i] = CAST_TO_RIP(hw_bp);
+ debug.arch.debugreg[7] = 0x400 | (1UL << (2*i+1));
+ APPLY_DEBUG();
+ vcpu_run(vm, VCPU_ID);
+ target_dr6 = 0xffff0ff0 | (1UL << i);
+ TEST_ASSERT(run->exit_reason == KVM_EXIT_DEBUG &&
+ run->debug.arch.exception == DB_VECTOR &&
+ run->debug.arch.pc == CAST_TO_RIP(hw_bp) &&
+ run->debug.arch.dr6 == target_dr6,
+ "INS_HW_BP (DR%d): exit %d exception %d rip 0x%llx "
+ "(should be 0x%llx) dr6 0x%llx (should be 0x%llx)",
+ i, run->exit_reason, run->debug.arch.exception,
+ run->debug.arch.pc, CAST_TO_RIP(hw_bp),
+ run->debug.arch.dr6, target_dr6);
+ }
+ /* Skip "nop" */
+ MOVE_RIP(1);
+
+ /* Test data access HW BP over DR[0-3] */
+ for (i = 0; i < 4; i++) {
+ CLEAR_DEBUG();
+ debug.control = KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_HW_BP;
+ debug.arch.debugreg[i] = CAST_TO_RIP(guest_value);
+ debug.arch.debugreg[7] = 0x00000400 | (1UL << (2*i+1)) |
+ (0x000d0000UL << (4*i));
+ APPLY_DEBUG();
+ vcpu_run(vm, VCPU_ID);
+ target_dr6 = 0xffff0ff0 | (1UL << i);
+ TEST_ASSERT(run->exit_reason == KVM_EXIT_DEBUG &&
+ run->debug.arch.exception == DB_VECTOR &&
+ run->debug.arch.pc == CAST_TO_RIP(write_data) &&
+ run->debug.arch.dr6 == target_dr6,
+ "DATA_HW_BP (DR%d): exit %d exception %d rip 0x%llx "
+ "(should be 0x%llx) dr6 0x%llx (should be 0x%llx)",
+ i, run->exit_reason, run->debug.arch.exception,
+ run->debug.arch.pc, CAST_TO_RIP(write_data),
+ run->debug.arch.dr6, target_dr6);
+ /* Rollback the 4-bytes "mov" */
+ MOVE_RIP(-7);
+ }
+ /* Skip the 4-bytes "mov" */
+ MOVE_RIP(7);
+
+ /* Test single step */
+ target_rip = CAST_TO_RIP(ss_start);
+ target_dr6 = 0xffff4ff0ULL;
+ vcpu_regs_get(vm, VCPU_ID, &regs);
+ for (i = 0; i < (sizeof(ss_size) / sizeof(ss_size[0])); i++) {
+ target_rip += ss_size[i];
+ CLEAR_DEBUG();
+ debug.control = KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_SINGLESTEP;
+ debug.arch.debugreg[7] = 0x00000400;
+ APPLY_DEBUG();
+ vcpu_run(vm, VCPU_ID);
+ TEST_ASSERT(run->exit_reason == KVM_EXIT_DEBUG &&
+ run->debug.arch.exception == DB_VECTOR &&
+ run->debug.arch.pc == target_rip &&
+ run->debug.arch.dr6 == target_dr6,
+ "SINGLE_STEP[%d]: exit %d exception %d rip 0x%llx "
+ "(should be 0x%llx) dr6 0x%llx (should be 0x%llx)",
+ i, run->exit_reason, run->debug.arch.exception,
+ run->debug.arch.pc, target_rip, run->debug.arch.dr6,
+ target_dr6);
+ }
+
+ /* Finally test global disable */
+ CLEAR_DEBUG();
+ debug.control = KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_HW_BP;
+ debug.arch.debugreg[7] = 0x400 | DR7_GD;
+ APPLY_DEBUG();
+ vcpu_run(vm, VCPU_ID);
+ target_dr6 = 0xffff0ff0 | DR6_BD;
+ TEST_ASSERT(run->exit_reason == KVM_EXIT_DEBUG &&
+ run->debug.arch.exception == DB_VECTOR &&
+ run->debug.arch.pc == CAST_TO_RIP(bd_start) &&
+ run->debug.arch.dr6 == target_dr6,
+ "DR7.GD: exit %d exception %d rip 0x%llx "
+ "(should be 0x%llx) dr6 0x%llx (should be 0x%llx)",
+ run->exit_reason, run->debug.arch.exception,
+ run->debug.arch.pc, target_rip, run->debug.arch.dr6,
+ target_dr6);
+
+ /* Disable all debug controls, run to the end */
+ CLEAR_DEBUG();
+ APPLY_DEBUG();
+
+ vcpu_run(vm, VCPU_ID);
+ TEST_ASSERT(run->exit_reason == KVM_EXIT_IO, "KVM_EXIT_IO");
+ cmd = get_ucall(vm, VCPU_ID, &uc);
+ TEST_ASSERT(cmd == UCALL_DONE, "UCALL_DONE");
+
+ kvm_vm_free(vm);
+
+ return 0;
+}
diff --git a/tools/testing/selftests/kvm/x86_64/evmcs_test.c b/tools/testing/selftests/kvm/x86_64/evmcs_test.c
index 92915e6408e7..e6e62e5e75b2 100644
--- a/tools/testing/selftests/kvm/x86_64/evmcs_test.c
+++ b/tools/testing/selftests/kvm/x86_64/evmcs_test.c
@@ -21,10 +21,10 @@
void l2_guest_code(void)
{
- GUEST_SYNC(6);
-
GUEST_SYNC(7);
+ GUEST_SYNC(8);
+
/* Done, exit to L1 and never come back. */
vmcall();
}
@@ -50,12 +50,17 @@ void l1_guest_code(struct vmx_pages *vmx_pages)
GUEST_SYNC(5);
GUEST_ASSERT(vmptrstz() == vmx_pages->enlightened_vmcs_gpa);
+ current_evmcs->revision_id = -1u;
+ GUEST_ASSERT(vmlaunch());
+ current_evmcs->revision_id = EVMCS_VERSION;
+ GUEST_SYNC(6);
+
GUEST_ASSERT(!vmlaunch());
GUEST_ASSERT(vmptrstz() == vmx_pages->enlightened_vmcs_gpa);
- GUEST_SYNC(8);
+ GUEST_SYNC(9);
GUEST_ASSERT(!vmresume());
GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL);
- GUEST_SYNC(9);
+ GUEST_SYNC(10);
}
void guest_code(struct vmx_pages *vmx_pages)
@@ -67,6 +72,10 @@ void guest_code(struct vmx_pages *vmx_pages)
l1_guest_code(vmx_pages);
GUEST_DONE();
+
+ /* Try enlightened vmptrld with an incorrect GPA */
+ evmcs_vmptrld(0xdeadbeef, vmx_pages->enlightened_vmcs);
+ GUEST_ASSERT(vmlaunch());
}
int main(int argc, char *argv[])
@@ -87,7 +96,7 @@ int main(int argc, char *argv[])
if (!kvm_check_cap(KVM_CAP_NESTED_STATE) ||
!kvm_check_cap(KVM_CAP_HYPERV_ENLIGHTENED_VMCS)) {
- printf("capabilities not available, skipping test\n");
+ print_skip("capabilities not available");
exit(KSFT_SKIP);
}
@@ -109,20 +118,20 @@ int main(int argc, char *argv[])
switch (get_ucall(vm, VCPU_ID, &uc)) {
case UCALL_ABORT:
- TEST_ASSERT(false, "%s at %s:%d", (const char *)uc.args[0],
- __FILE__, uc.args[1]);
+ TEST_FAIL("%s at %s:%ld", (const char *)uc.args[0],
+ __FILE__, uc.args[1]);
/* NOT REACHED */
case UCALL_SYNC:
break;
case UCALL_DONE:
- goto done;
+ goto part1_done;
default:
- TEST_ASSERT(false, "Unknown ucall 0x%x.", uc.cmd);
+ TEST_FAIL("Unknown ucall %lu", uc.cmd);
}
/* UCALL_SYNC is handled here. */
TEST_ASSERT(!strcmp((const char *)uc.args[0], "hello") &&
- uc.args[1] == stage, "Unexpected register values vmexit #%lx, got %lx",
+ uc.args[1] == stage, "Stage %d: Unexpected register values vmexit, got %lx",
stage, (ulong)uc.args[1]);
state = vcpu_save_state(vm, VCPU_ID);
@@ -147,6 +156,10 @@ int main(int argc, char *argv[])
(ulong) regs2.rdi, (ulong) regs2.rsi);
}
-done:
+part1_done:
+ _vcpu_run(vm, VCPU_ID);
+ TEST_ASSERT(run->exit_reason == KVM_EXIT_SHUTDOWN,
+ "Unexpected successful VMEnter with invalid eVMCS pointer!");
+
kvm_vm_free(vm);
}
diff --git a/tools/testing/selftests/kvm/x86_64/hyperv_cpuid.c b/tools/testing/selftests/kvm/x86_64/hyperv_cpuid.c
index 443a2b54645b..4a7967cca281 100644
--- a/tools/testing/selftests/kvm/x86_64/hyperv_cpuid.c
+++ b/tools/testing/selftests/kvm/x86_64/hyperv_cpuid.c
@@ -26,18 +26,18 @@ static void guest_code(void)
{
}
-static int smt_possible(void)
+static bool smt_possible(void)
{
char buf[16];
FILE *f;
- bool res = 1;
+ bool res = true;
f = fopen("/sys/devices/system/cpu/smt/control", "r");
if (f) {
if (fread(buf, sizeof(*buf), sizeof(buf), f) > 0) {
if (!strncmp(buf, "forceoff", 8) ||
!strncmp(buf, "notsupported", 12))
- res = 0;
+ res = false;
}
fclose(f);
}
@@ -46,29 +46,31 @@ static int smt_possible(void)
}
static void test_hv_cpuid(struct kvm_cpuid2 *hv_cpuid_entries,
- int evmcs_enabled)
+ bool evmcs_enabled)
{
int i;
+ int nent = 9;
+ u32 test_val;
- if (!evmcs_enabled)
- TEST_ASSERT(hv_cpuid_entries->nent == 6,
- "KVM_GET_SUPPORTED_HV_CPUID should return 6 entries"
- " when Enlightened VMCS is disabled (returned %d)",
- hv_cpuid_entries->nent);
- else
- TEST_ASSERT(hv_cpuid_entries->nent == 7,
- "KVM_GET_SUPPORTED_HV_CPUID should return 7 entries"
- " when Enlightened VMCS is enabled (returned %d)",
- hv_cpuid_entries->nent);
+ if (evmcs_enabled)
+ nent += 1; /* 0x4000000A */
+
+ TEST_ASSERT(hv_cpuid_entries->nent == nent,
+ "KVM_GET_SUPPORTED_HV_CPUID should return %d entries"
+ " with evmcs=%d (returned %d)",
+ nent, evmcs_enabled, hv_cpuid_entries->nent);
for (i = 0; i < hv_cpuid_entries->nent; i++) {
struct kvm_cpuid_entry2 *entry = &hv_cpuid_entries->entries[i];
TEST_ASSERT((entry->function >= 0x40000000) &&
- (entry->function <= 0x4000000A),
- "function %lx is our of supported range",
+ (entry->function <= 0x40000082),
+ "function %x is our of supported range",
entry->function);
+ TEST_ASSERT(evmcs_enabled || (entry->function != 0x4000000A),
+ "0x4000000A leaf should not be reported");
+
TEST_ASSERT(entry->index == 0,
".index field should be zero");
@@ -78,12 +80,23 @@ static void test_hv_cpuid(struct kvm_cpuid2 *hv_cpuid_entries,
TEST_ASSERT(!entry->padding[0] && !entry->padding[1] &&
!entry->padding[2], "padding should be zero");
- if (entry->function == 0x40000004) {
- int nononarchcs = !!(entry->eax & (1UL << 18));
+ switch (entry->function) {
+ case 0x40000000:
+ test_val = 0x40000082;
- TEST_ASSERT(nononarchcs == !smt_possible(),
+ TEST_ASSERT(entry->eax == test_val,
+ "Wrong max leaf report in 0x40000000.EAX: %x"
+ " (evmcs=%d)",
+ entry->eax, evmcs_enabled
+ );
+ break;
+ case 0x40000004:
+ test_val = entry->eax & (1UL << 18);
+
+ TEST_ASSERT(!!test_val == !smt_possible(),
"NoNonArchitecturalCoreSharing bit"
" doesn't reflect SMT setting");
+ break;
}
/*
@@ -133,50 +146,44 @@ struct kvm_cpuid2 *kvm_get_supported_hv_cpuid(struct kvm_vm *vm)
int main(int argc, char *argv[])
{
struct kvm_vm *vm;
- int rv;
+ int rv, stage;
struct kvm_cpuid2 *hv_cpuid_entries;
+ bool evmcs_enabled;
/* Tell stdout not to buffer its content */
setbuf(stdout, NULL);
rv = kvm_check_cap(KVM_CAP_HYPERV_CPUID);
if (!rv) {
- fprintf(stderr,
- "KVM_CAP_HYPERV_CPUID not supported, skip test\n");
+ print_skip("KVM_CAP_HYPERV_CPUID not supported");
exit(KSFT_SKIP);
}
- /* Create VM */
- vm = vm_create_default(VCPU_ID, 0, guest_code);
-
- test_hv_cpuid_e2big(vm);
-
- hv_cpuid_entries = kvm_get_supported_hv_cpuid(vm);
- if (!hv_cpuid_entries)
- return 1;
-
- test_hv_cpuid(hv_cpuid_entries, 0);
-
- free(hv_cpuid_entries);
+ for (stage = 0; stage < 3; stage++) {
+ evmcs_enabled = false;
+
+ vm = vm_create_default(VCPU_ID, 0, guest_code);
+ switch (stage) {
+ case 0:
+ test_hv_cpuid_e2big(vm);
+ continue;
+ case 1:
+ break;
+ case 2:
+ if (!kvm_check_cap(KVM_CAP_HYPERV_ENLIGHTENED_VMCS)) {
+ print_skip("Enlightened VMCS is unsupported");
+ continue;
+ }
+ vcpu_enable_evmcs(vm, VCPU_ID);
+ evmcs_enabled = true;
+ break;
+ }
- if (!kvm_check_cap(KVM_CAP_HYPERV_ENLIGHTENED_VMCS)) {
- fprintf(stderr,
- "Enlightened VMCS is unsupported, skip related test\n");
- goto vm_free;
+ hv_cpuid_entries = kvm_get_supported_hv_cpuid(vm);
+ test_hv_cpuid(hv_cpuid_entries, evmcs_enabled);
+ free(hv_cpuid_entries);
+ kvm_vm_free(vm);
}
- vcpu_enable_evmcs(vm, VCPU_ID);
-
- hv_cpuid_entries = kvm_get_supported_hv_cpuid(vm);
- if (!hv_cpuid_entries)
- return 1;
-
- test_hv_cpuid(hv_cpuid_entries, 1);
-
- free(hv_cpuid_entries);
-
-vm_free:
- kvm_vm_free(vm);
-
return 0;
}
diff --git a/tools/testing/selftests/kvm/x86_64/mmio_warning_test.c b/tools/testing/selftests/kvm/x86_64/mmio_warning_test.c
index 00bb97d76000..e6480fd5c4bd 100644
--- a/tools/testing/selftests/kvm/x86_64/mmio_warning_test.c
+++ b/tools/testing/selftests/kvm/x86_64/mmio_warning_test.c
@@ -44,7 +44,7 @@ void *thr(void *arg)
struct kvm_run *run = tc->run;
res = ioctl(kvmcpu, KVM_RUN, 0);
- printf("ret1=%d exit_reason=%d suberror=%d\n",
+ pr_info("ret1=%d exit_reason=%d suberror=%d\n",
res, run->exit_reason, run->internal.suberror);
return 0;
@@ -93,12 +93,12 @@ int main(void)
int warnings_before, warnings_after;
if (!is_intel_cpu()) {
- printf("Must be run on an Intel CPU, skipping test\n");
+ print_skip("Must be run on an Intel CPU");
exit(KSFT_SKIP);
}
if (vm_is_unrestricted_guest(NULL)) {
- printf("Unrestricted guest must be disabled, skipping test\n");
+ print_skip("Unrestricted guest must be disabled");
exit(KSFT_SKIP);
}
diff --git a/tools/testing/selftests/kvm/x86_64/platform_info_test.c b/tools/testing/selftests/kvm/x86_64/platform_info_test.c
index f9334bd3cce9..1e89688cbbbf 100644
--- a/tools/testing/selftests/kvm/x86_64/platform_info_test.c
+++ b/tools/testing/selftests/kvm/x86_64/platform_info_test.c
@@ -58,8 +58,7 @@ static void test_msr_platform_info_enabled(struct kvm_vm *vm)
exit_reason_str(run->exit_reason));
get_ucall(vm, VCPU_ID, &uc);
TEST_ASSERT(uc.cmd == UCALL_SYNC,
- "Received ucall other than UCALL_SYNC: %u\n",
- ucall);
+ "Received ucall other than UCALL_SYNC: %lu\n", uc.cmd);
TEST_ASSERT((uc.args[1] & MSR_PLATFORM_INFO_MAX_TURBO_RATIO) ==
MSR_PLATFORM_INFO_MAX_TURBO_RATIO,
"Expected MSR_PLATFORM_INFO to have max turbo ratio mask: %i.",
@@ -89,8 +88,7 @@ int main(int argc, char *argv[])
rv = kvm_check_cap(KVM_CAP_MSR_PLATFORM_INFO);
if (!rv) {
- fprintf(stderr,
- "KVM_CAP_MSR_PLATFORM_INFO not supported, skip test\n");
+ print_skip("KVM_CAP_MSR_PLATFORM_INFO not supported");
exit(KSFT_SKIP);
}
diff --git a/tools/testing/selftests/kvm/x86_64/smm_test.c b/tools/testing/selftests/kvm/x86_64/smm_test.c
index 8c063646f2a0..6f8f478b3ceb 100644
--- a/tools/testing/selftests/kvm/x86_64/smm_test.c
+++ b/tools/testing/selftests/kvm/x86_64/smm_test.c
@@ -17,6 +17,7 @@
#include "kvm_util.h"
#include "vmx.h"
+#include "svm_util.h"
#define VCPU_ID 1
@@ -58,7 +59,7 @@ void self_smi(void)
APIC_DEST_SELF | APIC_INT_ASSERT | APIC_DM_SMI);
}
-void guest_code(struct vmx_pages *vmx_pages)
+void guest_code(void *arg)
{
uint64_t apicbase = rdmsr(MSR_IA32_APICBASE);
@@ -72,8 +73,11 @@ void guest_code(struct vmx_pages *vmx_pages)
sync_with_host(4);
- if (vmx_pages) {
- GUEST_ASSERT(prepare_for_vmx_operation(vmx_pages));
+ if (arg) {
+ if (cpu_has_svm())
+ generic_svm_setup(arg, NULL, NULL);
+ else
+ GUEST_ASSERT(prepare_for_vmx_operation(arg));
sync_with_host(5);
@@ -87,7 +91,7 @@ void guest_code(struct vmx_pages *vmx_pages)
int main(int argc, char *argv[])
{
- vm_vaddr_t vmx_pages_gva = 0;
+ vm_vaddr_t nested_gva = 0;
struct kvm_regs regs;
struct kvm_vm *vm;
@@ -114,10 +118,13 @@ int main(int argc, char *argv[])
vcpu_set_msr(vm, VCPU_ID, MSR_IA32_SMBASE, SMRAM_GPA);
if (kvm_check_cap(KVM_CAP_NESTED_STATE)) {
- vcpu_alloc_vmx(vm, &vmx_pages_gva);
- vcpu_args_set(vm, VCPU_ID, 1, vmx_pages_gva);
+ if (kvm_get_supported_cpuid_entry(0x80000001)->ecx & CPUID_SVM)
+ vcpu_alloc_svm(vm, &nested_gva);
+ else
+ vcpu_alloc_vmx(vm, &nested_gva);
+ vcpu_args_set(vm, VCPU_ID, 1, nested_gva);
} else {
- printf("will skip SMM test with VMX enabled\n");
+ pr_info("will skip SMM test with VMX enabled\n");
vcpu_args_set(vm, VCPU_ID, 1, 0);
}
diff --git a/tools/testing/selftests/kvm/x86_64/state_test.c b/tools/testing/selftests/kvm/x86_64/state_test.c
index 3ab5ec3da9f4..d43b6f99b66c 100644
--- a/tools/testing/selftests/kvm/x86_64/state_test.c
+++ b/tools/testing/selftests/kvm/x86_64/state_test.c
@@ -18,14 +18,46 @@
#include "kvm_util.h"
#include "processor.h"
#include "vmx.h"
+#include "svm_util.h"
#define VCPU_ID 5
+#define L2_GUEST_STACK_SIZE 256
-void l2_guest_code(void)
+void svm_l2_guest_code(void)
{
+ GUEST_SYNC(4);
+ /* Exit to L1 */
+ vmcall();
GUEST_SYNC(6);
+ /* Done, exit to L1 and never come back. */
+ vmcall();
+}
- /* Exit to L1 */
+static void svm_l1_guest_code(struct svm_test_data *svm)
+{
+ unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
+ struct vmcb *vmcb = svm->vmcb;
+
+ GUEST_ASSERT(svm->vmcb_gpa);
+ /* Prepare for L2 execution. */
+ generic_svm_setup(svm, svm_l2_guest_code,
+ &l2_guest_stack[L2_GUEST_STACK_SIZE]);
+
+ GUEST_SYNC(3);
+ run_guest(vmcb, svm->vmcb_gpa);
+ GUEST_ASSERT(vmcb->control.exit_code == SVM_EXIT_VMMCALL);
+ GUEST_SYNC(5);
+ vmcb->save.rip += 3;
+ run_guest(vmcb, svm->vmcb_gpa);
+ GUEST_ASSERT(vmcb->control.exit_code == SVM_EXIT_VMMCALL);
+ GUEST_SYNC(7);
+}
+
+void vmx_l2_guest_code(void)
+{
+ GUEST_SYNC(6);
+
+ /* Exit to L1 */
vmcall();
/* L1 has now set up a shadow VMCS for us. */
@@ -42,10 +74,9 @@ void l2_guest_code(void)
vmcall();
}
-void l1_guest_code(struct vmx_pages *vmx_pages)
+static void vmx_l1_guest_code(struct vmx_pages *vmx_pages)
{
-#define L2_GUEST_STACK_SIZE 64
- unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
+ unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
GUEST_ASSERT(vmx_pages->vmcs_gpa);
GUEST_ASSERT(prepare_for_vmx_operation(vmx_pages));
@@ -56,7 +87,7 @@ void l1_guest_code(struct vmx_pages *vmx_pages)
GUEST_SYNC(4);
GUEST_ASSERT(vmptrstz() == vmx_pages->vmcs_gpa);
- prepare_vmcs(vmx_pages, l2_guest_code,
+ prepare_vmcs(vmx_pages, vmx_l2_guest_code,
&l2_guest_stack[L2_GUEST_STACK_SIZE]);
GUEST_SYNC(5);
@@ -106,20 +137,24 @@ void l1_guest_code(struct vmx_pages *vmx_pages)
GUEST_ASSERT(vmresume());
}
-void guest_code(struct vmx_pages *vmx_pages)
+static void __attribute__((__flatten__)) guest_code(void *arg)
{
GUEST_SYNC(1);
GUEST_SYNC(2);
- if (vmx_pages)
- l1_guest_code(vmx_pages);
+ if (arg) {
+ if (cpu_has_svm())
+ svm_l1_guest_code(arg);
+ else
+ vmx_l1_guest_code(arg);
+ }
GUEST_DONE();
}
int main(int argc, char *argv[])
{
- vm_vaddr_t vmx_pages_gva = 0;
+ vm_vaddr_t nested_gva = 0;
struct kvm_regs regs1, regs2;
struct kvm_vm *vm;
@@ -136,10 +171,13 @@ int main(int argc, char *argv[])
vcpu_regs_get(vm, VCPU_ID, &regs1);
if (kvm_check_cap(KVM_CAP_NESTED_STATE)) {
- vcpu_alloc_vmx(vm, &vmx_pages_gva);
- vcpu_args_set(vm, VCPU_ID, 1, vmx_pages_gva);
+ if (kvm_get_supported_cpuid_entry(0x80000001)->ecx & CPUID_SVM)
+ vcpu_alloc_svm(vm, &nested_gva);
+ else
+ vcpu_alloc_vmx(vm, &nested_gva);
+ vcpu_args_set(vm, VCPU_ID, 1, nested_gva);
} else {
- printf("will skip nested state checks\n");
+ pr_info("will skip nested state checks\n");
vcpu_args_set(vm, VCPU_ID, 1, 0);
}
@@ -152,20 +190,20 @@ int main(int argc, char *argv[])
switch (get_ucall(vm, VCPU_ID, &uc)) {
case UCALL_ABORT:
- TEST_ASSERT(false, "%s at %s:%d", (const char *)uc.args[0],
- __FILE__, uc.args[1]);
+ TEST_FAIL("%s at %s:%ld", (const char *)uc.args[0],
+ __FILE__, uc.args[1]);
/* NOT REACHED */
case UCALL_SYNC:
break;
case UCALL_DONE:
goto done;
default:
- TEST_ASSERT(false, "Unknown ucall 0x%x.", uc.cmd);
+ TEST_FAIL("Unknown ucall %lu", uc.cmd);
}
/* UCALL_SYNC is handled here. */
TEST_ASSERT(!strcmp((const char *)uc.args[0], "hello") &&
- uc.args[1] == stage, "Unexpected register values vmexit #%lx, got %lx",
+ uc.args[1] == stage, "Stage %d: Unexpected register values vmexit, got %lx",
stage, (ulong)uc.args[1]);
state = vcpu_save_state(vm, VCPU_ID);
diff --git a/tools/testing/selftests/kvm/x86_64/svm_vmcall_test.c b/tools/testing/selftests/kvm/x86_64/svm_vmcall_test.c
index e280f68f6365..0e1adb4e3199 100644
--- a/tools/testing/selftests/kvm/x86_64/svm_vmcall_test.c
+++ b/tools/testing/selftests/kvm/x86_64/svm_vmcall_test.c
@@ -61,16 +61,14 @@ int main(int argc, char *argv[])
switch (get_ucall(vm, VCPU_ID, &uc)) {
case UCALL_ABORT:
- TEST_ASSERT(false, "%s",
- (const char *)uc.args[0]);
+ TEST_FAIL("%s", (const char *)uc.args[0]);
/* NOT REACHED */
case UCALL_SYNC:
break;
case UCALL_DONE:
goto done;
default:
- TEST_ASSERT(false,
- "Unknown ucall 0x%x.", uc.cmd);
+ TEST_FAIL("Unknown ucall 0x%lx.", uc.cmd);
}
}
done:
diff --git a/tools/testing/selftests/kvm/x86_64/sync_regs_test.c b/tools/testing/selftests/kvm/x86_64/sync_regs_test.c
index 5c8224256294..d672f0a473f8 100644
--- a/tools/testing/selftests/kvm/x86_64/sync_regs_test.c
+++ b/tools/testing/selftests/kvm/x86_64/sync_regs_test.c
@@ -91,11 +91,11 @@ int main(int argc, char *argv[])
cap = kvm_check_cap(KVM_CAP_SYNC_REGS);
if ((cap & TEST_SYNC_FIELDS) != TEST_SYNC_FIELDS) {
- fprintf(stderr, "KVM_CAP_SYNC_REGS not supported, skipping test\n");
+ print_skip("KVM_CAP_SYNC_REGS not supported");
exit(KSFT_SKIP);
}
if ((cap & INVALID_SYNC_FIELD) != 0) {
- fprintf(stderr, "The \"invalid\" field is not invalid, skipping test\n");
+ print_skip("The \"invalid\" field is not invalid");
exit(KSFT_SKIP);
}
diff --git a/tools/testing/selftests/kvm/x86_64/vmx_close_while_nested_test.c b/tools/testing/selftests/kvm/x86_64/vmx_close_while_nested_test.c
index 5dfb53546a26..fe40ade06a49 100644
--- a/tools/testing/selftests/kvm/x86_64/vmx_close_while_nested_test.c
+++ b/tools/testing/selftests/kvm/x86_64/vmx_close_while_nested_test.c
@@ -78,10 +78,10 @@ int main(int argc, char *argv[])
switch (get_ucall(vm, VCPU_ID, &uc)) {
case UCALL_ABORT:
- TEST_ASSERT(false, "%s", (const char *)uc.args[0]);
+ TEST_FAIL("%s", (const char *)uc.args[0]);
/* NOT REACHED */
default:
- TEST_ASSERT(false, "Unknown ucall 0x%x.", uc.cmd);
+ TEST_FAIL("Unknown ucall %lu", uc.cmd);
}
}
}
diff --git a/tools/testing/selftests/kvm/x86_64/vmx_dirty_log_test.c b/tools/testing/selftests/kvm/x86_64/vmx_dirty_log_test.c
index a223a6401258..e894a638a155 100644
--- a/tools/testing/selftests/kvm/x86_64/vmx_dirty_log_test.c
+++ b/tools/testing/selftests/kvm/x86_64/vmx_dirty_log_test.c
@@ -21,7 +21,7 @@
/* The memory slot index to track dirty pages */
#define TEST_MEM_SLOT_INDEX 1
-#define TEST_MEM_SIZE 3
+#define TEST_MEM_PAGES 3
/* L1 guest test virtual memory offset */
#define GUEST_TEST_MEM 0xc0000000
@@ -91,15 +91,14 @@ int main(int argc, char *argv[])
vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS,
GUEST_TEST_MEM,
TEST_MEM_SLOT_INDEX,
- TEST_MEM_SIZE,
+ TEST_MEM_PAGES,
KVM_MEM_LOG_DIRTY_PAGES);
/*
* Add an identity map for GVA range [0xc0000000, 0xc0002000). This
* affects both L1 and L2. However...
*/
- virt_map(vm, GUEST_TEST_MEM, GUEST_TEST_MEM,
- TEST_MEM_SIZE * 4096, 0);
+ virt_map(vm, GUEST_TEST_MEM, GUEST_TEST_MEM, TEST_MEM_PAGES, 0);
/*
* ... pages in the L2 GPA range [0xc0001000, 0xc0003000) will map to
@@ -113,11 +112,11 @@ int main(int argc, char *argv[])
nested_map(vmx, vm, NESTED_TEST_MEM1, GUEST_TEST_MEM, 4096, 0);
nested_map(vmx, vm, NESTED_TEST_MEM2, GUEST_TEST_MEM, 4096, 0);
- bmap = bitmap_alloc(TEST_MEM_SIZE);
+ bmap = bitmap_alloc(TEST_MEM_PAGES);
host_test_mem = addr_gpa2hva(vm, GUEST_TEST_MEM);
while (!done) {
- memset(host_test_mem, 0xaa, TEST_MEM_SIZE * 4096);
+ memset(host_test_mem, 0xaa, TEST_MEM_PAGES * 4096);
_vcpu_run(vm, VCPU_ID);
TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
"Unexpected exit reason: %u (%s),\n",
@@ -126,8 +125,8 @@ int main(int argc, char *argv[])
switch (get_ucall(vm, VCPU_ID, &uc)) {
case UCALL_ABORT:
- TEST_ASSERT(false, "%s at %s:%d", (const char *)uc.args[0],
- __FILE__, uc.args[1]);
+ TEST_FAIL("%s at %s:%ld", (const char *)uc.args[0],
+ __FILE__, uc.args[1]);
/* NOT REACHED */
case UCALL_SYNC:
/*
@@ -152,7 +151,7 @@ int main(int argc, char *argv[])
done = true;
break;
default:
- TEST_ASSERT(false, "Unknown ucall 0x%x.", uc.cmd);
+ TEST_FAIL("Unknown ucall %lu", uc.cmd);
}
}
}
diff --git a/tools/testing/selftests/kvm/x86_64/vmx_preemption_timer_test.c b/tools/testing/selftests/kvm/x86_64/vmx_preemption_timer_test.c
new file mode 100644
index 000000000000..cc72b6188ca7
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86_64/vmx_preemption_timer_test.c
@@ -0,0 +1,255 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * VMX-preemption timer test
+ *
+ * Copyright (C) 2020, Google, LLC.
+ *
+ * Test to ensure the VM-Enter after migration doesn't
+ * incorrectly restarts the timer with the full timer
+ * value instead of partially decayed timer value
+ *
+ */
+#define _GNU_SOURCE /* for program_invocation_short_name */
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+
+#include "test_util.h"
+
+#include "kvm_util.h"
+#include "processor.h"
+#include "vmx.h"
+
+#define VCPU_ID 5
+#define PREEMPTION_TIMER_VALUE 100000000ull
+#define PREEMPTION_TIMER_VALUE_THRESHOLD1 80000000ull
+
+u32 vmx_pt_rate;
+bool l2_save_restore_done;
+static u64 l2_vmx_pt_start;
+volatile u64 l2_vmx_pt_finish;
+
+void l2_guest_code(void)
+{
+ u64 vmx_pt_delta;
+
+ vmcall();
+ l2_vmx_pt_start = (rdtsc() >> vmx_pt_rate) << vmx_pt_rate;
+
+ /*
+ * Wait until the 1st threshold has passed
+ */
+ do {
+ l2_vmx_pt_finish = rdtsc();
+ vmx_pt_delta = (l2_vmx_pt_finish - l2_vmx_pt_start) >>
+ vmx_pt_rate;
+ } while (vmx_pt_delta < PREEMPTION_TIMER_VALUE_THRESHOLD1);
+
+ /*
+ * Force L2 through Save and Restore cycle
+ */
+ GUEST_SYNC(1);
+
+ l2_save_restore_done = 1;
+
+ /*
+ * Now wait for the preemption timer to fire and
+ * exit to L1
+ */
+ while ((l2_vmx_pt_finish = rdtsc()))
+ ;
+}
+
+void l1_guest_code(struct vmx_pages *vmx_pages)
+{
+#define L2_GUEST_STACK_SIZE 64
+ unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
+ u64 l1_vmx_pt_start;
+ u64 l1_vmx_pt_finish;
+ u64 l1_tsc_deadline, l2_tsc_deadline;
+
+ GUEST_ASSERT(vmx_pages->vmcs_gpa);
+ GUEST_ASSERT(prepare_for_vmx_operation(vmx_pages));
+ GUEST_ASSERT(load_vmcs(vmx_pages));
+ GUEST_ASSERT(vmptrstz() == vmx_pages->vmcs_gpa);
+
+ prepare_vmcs(vmx_pages, l2_guest_code,
+ &l2_guest_stack[L2_GUEST_STACK_SIZE]);
+
+ /*
+ * Check for Preemption timer support
+ */
+ basic.val = rdmsr(MSR_IA32_VMX_BASIC);
+ ctrl_pin_rev.val = rdmsr(basic.ctrl ? MSR_IA32_VMX_TRUE_PINBASED_CTLS
+ : MSR_IA32_VMX_PINBASED_CTLS);
+ ctrl_exit_rev.val = rdmsr(basic.ctrl ? MSR_IA32_VMX_TRUE_EXIT_CTLS
+ : MSR_IA32_VMX_EXIT_CTLS);
+
+ if (!(ctrl_pin_rev.clr & PIN_BASED_VMX_PREEMPTION_TIMER) ||
+ !(ctrl_exit_rev.clr & VM_EXIT_SAVE_VMX_PREEMPTION_TIMER))
+ return;
+
+ GUEST_ASSERT(!vmlaunch());
+ GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL);
+ vmwrite(GUEST_RIP, vmreadz(GUEST_RIP) + vmreadz(VM_EXIT_INSTRUCTION_LEN));
+
+ /*
+ * Turn on PIN control and resume the guest
+ */
+ GUEST_ASSERT(!vmwrite(PIN_BASED_VM_EXEC_CONTROL,
+ vmreadz(PIN_BASED_VM_EXEC_CONTROL) |
+ PIN_BASED_VMX_PREEMPTION_TIMER));
+
+ GUEST_ASSERT(!vmwrite(VMX_PREEMPTION_TIMER_VALUE,
+ PREEMPTION_TIMER_VALUE));
+
+ vmx_pt_rate = rdmsr(MSR_IA32_VMX_MISC) & 0x1F;
+
+ l2_save_restore_done = 0;
+
+ l1_vmx_pt_start = (rdtsc() >> vmx_pt_rate) << vmx_pt_rate;
+
+ GUEST_ASSERT(!vmresume());
+
+ l1_vmx_pt_finish = rdtsc();
+
+ /*
+ * Ensure exit from L2 happens after L2 goes through
+ * save and restore
+ */
+ GUEST_ASSERT(l2_save_restore_done);
+
+ /*
+ * Ensure the exit from L2 is due to preemption timer expiry
+ */
+ GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_PREEMPTION_TIMER);
+
+ l1_tsc_deadline = l1_vmx_pt_start +
+ (PREEMPTION_TIMER_VALUE << vmx_pt_rate);
+
+ l2_tsc_deadline = l2_vmx_pt_start +
+ (PREEMPTION_TIMER_VALUE << vmx_pt_rate);
+
+ /*
+ * Sync with the host and pass the l1|l2 pt_expiry_finish times and
+ * tsc deadlines so that host can verify they are as expected
+ */
+ GUEST_SYNC_ARGS(2, l1_vmx_pt_finish, l1_tsc_deadline,
+ l2_vmx_pt_finish, l2_tsc_deadline);
+}
+
+void guest_code(struct vmx_pages *vmx_pages)
+{
+ if (vmx_pages)
+ l1_guest_code(vmx_pages);
+
+ GUEST_DONE();
+}
+
+int main(int argc, char *argv[])
+{
+ vm_vaddr_t vmx_pages_gva = 0;
+
+ struct kvm_regs regs1, regs2;
+ struct kvm_vm *vm;
+ struct kvm_run *run;
+ struct kvm_x86_state *state;
+ struct ucall uc;
+ int stage;
+
+ /*
+ * AMD currently does not implement any VMX features, so for now we
+ * just early out.
+ */
+ nested_vmx_check_supported();
+
+ /* Create VM */
+ vm = vm_create_default(VCPU_ID, 0, guest_code);
+ vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid());
+ run = vcpu_state(vm, VCPU_ID);
+
+ vcpu_regs_get(vm, VCPU_ID, &regs1);
+
+ if (kvm_check_cap(KVM_CAP_NESTED_STATE)) {
+ vcpu_alloc_vmx(vm, &vmx_pages_gva);
+ vcpu_args_set(vm, VCPU_ID, 1, vmx_pages_gva);
+ } else {
+ pr_info("will skip vmx preemption timer checks\n");
+ goto done;
+ }
+
+ for (stage = 1;; stage++) {
+ _vcpu_run(vm, VCPU_ID);
+ TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
+ "Stage %d: unexpected exit reason: %u (%s),\n",
+ stage, run->exit_reason,
+ exit_reason_str(run->exit_reason));
+
+ switch (get_ucall(vm, VCPU_ID, &uc)) {
+ case UCALL_ABORT:
+ TEST_FAIL("%s at %s:%ld", (const char *)uc.args[0],
+ __FILE__, uc.args[1]);
+ /* NOT REACHED */
+ case UCALL_SYNC:
+ break;
+ case UCALL_DONE:
+ goto done;
+ default:
+ TEST_FAIL("Unknown ucall %lu", uc.cmd);
+ }
+
+ /* UCALL_SYNC is handled here. */
+ TEST_ASSERT(!strcmp((const char *)uc.args[0], "hello") &&
+ uc.args[1] == stage, "Stage %d: Unexpected register values vmexit, got %lx",
+ stage, (ulong)uc.args[1]);
+ /*
+ * If this stage 2 then we should verify the vmx pt expiry
+ * is as expected.
+ * From L1's perspective verify Preemption timer hasn't
+ * expired too early.
+ * From L2's perspective verify Preemption timer hasn't
+ * expired too late.
+ */
+ if (stage == 2) {
+
+ pr_info("Stage %d: L1 PT expiry TSC (%lu) , L1 TSC deadline (%lu)\n",
+ stage, uc.args[2], uc.args[3]);
+
+ pr_info("Stage %d: L2 PT expiry TSC (%lu) , L2 TSC deadline (%lu)\n",
+ stage, uc.args[4], uc.args[5]);
+
+ TEST_ASSERT(uc.args[2] >= uc.args[3],
+ "Stage %d: L1 PT expiry TSC (%lu) < L1 TSC deadline (%lu)",
+ stage, uc.args[2], uc.args[3]);
+
+ TEST_ASSERT(uc.args[4] < uc.args[5],
+ "Stage %d: L2 PT expiry TSC (%lu) > L2 TSC deadline (%lu)",
+ stage, uc.args[4], uc.args[5]);
+ }
+
+ state = vcpu_save_state(vm, VCPU_ID);
+ memset(&regs1, 0, sizeof(regs1));
+ vcpu_regs_get(vm, VCPU_ID, &regs1);
+
+ kvm_vm_release(vm);
+
+ /* Restore state in a new VM. */
+ kvm_vm_restart(vm, O_RDWR);
+ vm_vcpu_add(vm, VCPU_ID);
+ vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid());
+ vcpu_load_state(vm, VCPU_ID, state);
+ run = vcpu_state(vm, VCPU_ID);
+ free(state);
+
+ memset(&regs2, 0, sizeof(regs2));
+ vcpu_regs_get(vm, VCPU_ID, &regs2);
+ TEST_ASSERT(!memcmp(&regs1, &regs2, sizeof(regs2)),
+ "Unexpected register values after vcpu_load_state; rdi: %lx rsi: %lx",
+ (ulong) regs2.rdi, (ulong) regs2.rsi);
+ }
+
+done:
+ kvm_vm_free(vm);
+}
diff --git a/tools/testing/selftests/kvm/x86_64/vmx_set_nested_state_test.c b/tools/testing/selftests/kvm/x86_64/vmx_set_nested_state_test.c
index 9ef7fab39d48..54cdefdfb49d 100644
--- a/tools/testing/selftests/kvm/x86_64/vmx_set_nested_state_test.c
+++ b/tools/testing/selftests/kvm/x86_64/vmx_set_nested_state_test.c
@@ -212,7 +212,7 @@ void test_vmx_nested_state(struct kvm_vm *vm)
test_nested_state(vm, state);
vcpu_nested_state_get(vm, VCPU_ID, state);
TEST_ASSERT(state->size >= sizeof(*state) && state->size <= state_sz,
- "Size must be between %d and %d. The size returned was %d.",
+ "Size must be between %ld and %d. The size returned was %d.",
sizeof(*state), state_sz, state->size);
TEST_ASSERT(state->hdr.vmx.vmxon_pa == -1ull, "vmxon_pa must be -1ull.");
TEST_ASSERT(state->hdr.vmx.vmcs12_pa == -1ull, "vmcs_pa must be -1ull.");
@@ -228,7 +228,7 @@ int main(int argc, char *argv[])
have_evmcs = kvm_check_cap(KVM_CAP_HYPERV_ENLIGHTENED_VMCS);
if (!kvm_check_cap(KVM_CAP_NESTED_STATE)) {
- printf("KVM_CAP_NESTED_STATE not available, skipping test\n");
+ print_skip("KVM_CAP_NESTED_STATE not available");
exit(KSFT_SKIP);
}
diff --git a/tools/testing/selftests/kvm/x86_64/vmx_tsc_adjust_test.c b/tools/testing/selftests/kvm/x86_64/vmx_tsc_adjust_test.c
index 69e482a95c47..fbe8417cbc2c 100644
--- a/tools/testing/selftests/kvm/x86_64/vmx_tsc_adjust_test.c
+++ b/tools/testing/selftests/kvm/x86_64/vmx_tsc_adjust_test.c
@@ -121,8 +121,8 @@ static void l1_guest_code(struct vmx_pages *vmx_pages)
static void report(int64_t val)
{
- printf("IA32_TSC_ADJUST is %ld (%lld * TSC_ADJUST_VALUE + %lld).\n",
- val, val / TSC_ADJUST_VALUE, val % TSC_ADJUST_VALUE);
+ pr_info("IA32_TSC_ADJUST is %ld (%lld * TSC_ADJUST_VALUE + %lld).\n",
+ val, val / TSC_ADJUST_VALUE, val % TSC_ADJUST_VALUE);
}
int main(int argc, char *argv[])
@@ -150,7 +150,7 @@ int main(int argc, char *argv[])
switch (get_ucall(vm, VCPU_ID, &uc)) {
case UCALL_ABORT:
- TEST_ASSERT(false, "%s", (const char *)uc.args[0]);
+ TEST_FAIL("%s", (const char *)uc.args[0]);
/* NOT REACHED */
case UCALL_SYNC:
report(uc.args[1]);
@@ -158,7 +158,7 @@ int main(int argc, char *argv[])
case UCALL_DONE:
goto done;
default:
- TEST_ASSERT(false, "Unknown ucall 0x%x.", uc.cmd);
+ TEST_FAIL("Unknown ucall %lu", uc.cmd);
}
}
diff --git a/tools/testing/selftests/kvm/x86_64/xss_msr_test.c b/tools/testing/selftests/kvm/x86_64/xss_msr_test.c
index 851ea81b9d9f..3529376747c2 100644
--- a/tools/testing/selftests/kvm/x86_64/xss_msr_test.c
+++ b/tools/testing/selftests/kvm/x86_64/xss_msr_test.c
@@ -51,7 +51,7 @@ int main(int argc, char *argv[])
xss_supported = entry && !!(entry->eax & X86_FEATURE_XSAVES);
}
if (!xss_supported) {
- printf("IA32_XSS is not supported by the vCPU.\n");
+ print_skip("IA32_XSS is not supported by the vCPU");
exit(KSFT_SKIP);
}
diff --git a/tools/testing/selftests/lib.mk b/tools/testing/selftests/lib.mk
index 3ed0134a764d..b0556c752443 100644
--- a/tools/testing/selftests/lib.mk
+++ b/tools/testing/selftests/lib.mk
@@ -137,7 +137,8 @@ endif
# Selftest makefiles can override those targets by setting
# OVERRIDE_TARGETS = 1.
ifeq ($(OVERRIDE_TARGETS),)
-$(OUTPUT)/%:%.c
+LOCAL_HDRS := $(selfdir)/kselftest_harness.h $(selfdir)/kselftest.h
+$(OUTPUT)/%:%.c $(LOCAL_HDRS)
$(LINK.c) $^ $(LDLIBS) -o $@
$(OUTPUT)/%.o:%.S
diff --git a/tools/testing/selftests/lib/config b/tools/testing/selftests/lib/config
index 14a77ea4a8da..b80ee3f6e265 100644
--- a/tools/testing/selftests/lib/config
+++ b/tools/testing/selftests/lib/config
@@ -2,3 +2,4 @@ CONFIG_TEST_PRINTF=m
CONFIG_TEST_BITMAP=m
CONFIG_PRIME_NUMBERS=m
CONFIG_TEST_STRSCPY=m
+CONFIG_TEST_BITOPS=m
diff --git a/tools/testing/selftests/lkdtm/run.sh b/tools/testing/selftests/lkdtm/run.sh
index dadf819148a4..ee64ff8df8f4 100755
--- a/tools/testing/selftests/lkdtm/run.sh
+++ b/tools/testing/selftests/lkdtm/run.sh
@@ -25,13 +25,13 @@ fi
# Figure out which test to run from our script name.
test=$(basename $0 .sh)
# Look up details about the test from master list of LKDTM tests.
-line=$(egrep '^#?'"$test"'\b' tests.txt)
+line=$(grep -E '^#?'"$test"'\b' tests.txt)
if [ -z "$line" ]; then
echo "Skipped: missing test '$test' in tests.txt"
exit $KSELFTEST_SKIP_TEST
fi
# Check that the test is known to LKDTM.
-if ! egrep -q '^'"$test"'$' "$TRIGGER" ; then
+if ! grep -E -q '^'"$test"'$' "$TRIGGER" ; then
echo "Skipped: test '$test' missing in $TRIGGER!"
exit $KSELFTEST_SKIP_TEST
fi
@@ -59,30 +59,32 @@ if [ -z "$expect" ]; then
expect="call trace:"
fi
-# Clear out dmesg for output reporting
-dmesg -c >/dev/null
-
# Prepare log for report checking
-LOG=$(mktemp --tmpdir -t lkdtm-XXXXXX)
+LOG=$(mktemp --tmpdir -t lkdtm-log-XXXXXX)
+DMESG=$(mktemp --tmpdir -t lkdtm-dmesg-XXXXXX)
cleanup() {
- rm -f "$LOG"
+ rm -f "$LOG" "$DMESG"
}
trap cleanup EXIT
+# Save existing dmesg so we can detect new content below
+dmesg > "$DMESG"
+
# Most shells yell about signals and we're expecting the "cat" process
# to usually be killed by the kernel. So we have to run it in a sub-shell
# and silence errors.
($SHELL -c 'cat <(echo '"$test"') >'"$TRIGGER" 2>/dev/null) || true
# Record and dump the results
-dmesg -c >"$LOG"
+dmesg | diff --changed-group-format='%>' --unchanged-group-format='' "$DMESG" - > "$LOG" || true
+
cat "$LOG"
# Check for expected output
-if egrep -qi "$expect" "$LOG" ; then
+if grep -E -qi "$expect" "$LOG" ; then
echo "$test: saw '$expect': ok"
exit 0
else
- if egrep -qi XFAIL: "$LOG" ; then
+ if grep -E -qi XFAIL: "$LOG" ; then
echo "$test: saw 'XFAIL': [SKIP]"
exit $KSELFTEST_SKIP_TEST
else
diff --git a/tools/testing/selftests/media_tests/.gitignore b/tools/testing/selftests/media_tests/.gitignore
index 8745eba39012..da438e780ffe 100644
--- a/tools/testing/selftests/media_tests/.gitignore
+++ b/tools/testing/selftests/media_tests/.gitignore
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
media_device_test
media_device_open
video_device_test
diff --git a/tools/testing/selftests/membarrier/.gitignore b/tools/testing/selftests/membarrier/.gitignore
index f2f7ec0a99b4..f2fbba178601 100644
--- a/tools/testing/selftests/membarrier/.gitignore
+++ b/tools/testing/selftests/membarrier/.gitignore
@@ -1,2 +1,3 @@
+# SPDX-License-Identifier: GPL-2.0-only
membarrier_test_multi_thread
membarrier_test_single_thread
diff --git a/tools/testing/selftests/memfd/.gitignore b/tools/testing/selftests/memfd/.gitignore
index afe87c40ac80..dd9a051f608e 100644
--- a/tools/testing/selftests/memfd/.gitignore
+++ b/tools/testing/selftests/memfd/.gitignore
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
fuse_mnt
fuse_test
memfd_test
diff --git a/tools/testing/selftests/memfd/Makefile b/tools/testing/selftests/memfd/Makefile
index 53a848109f7b..4da8b565fa32 100644
--- a/tools/testing/selftests/memfd/Makefile
+++ b/tools/testing/selftests/memfd/Makefile
@@ -6,15 +6,25 @@ CFLAGS += -I../../../../usr/include/
TEST_GEN_PROGS := memfd_test
TEST_PROGS := run_fuse_test.sh run_hugetlbfs_test.sh
-TEST_GEN_FILES := fuse_mnt fuse_test
+TEST_GEN_FILES := fuse_test fuse_mnt
-fuse_mnt.o: CFLAGS += $(shell pkg-config fuse --cflags)
+VAR_CFLAGS := $(shell pkg-config fuse --cflags 2>/dev/null)
+ifeq ($(VAR_CFLAGS),)
+VAR_CFLAGS := -D_FILE_OFFSET_BITS=64 -I/usr/include/fuse
+endif
+
+VAR_LDLIBS := $(shell pkg-config fuse --libs 2>/dev/null)
+ifeq ($(VAR_LDLIBS),)
+VAR_LDLIBS := -lfuse -pthread
+endif
+
+fuse_mnt.o: CFLAGS += $(VAR_CFLAGS)
include ../lib.mk
-$(OUTPUT)/fuse_mnt: LDLIBS += $(shell pkg-config fuse --libs)
+$(OUTPUT)/fuse_mnt: LDLIBS += $(VAR_LDLIBS)
-$(OUTPUT)/memfd_test: memfd_test.c common.o
-$(OUTPUT)/fuse_test: fuse_test.c common.o
+$(OUTPUT)/memfd_test: memfd_test.c common.c
+$(OUTPUT)/fuse_test: fuse_test.c common.c
-EXTRA_CLEAN = common.o
+EXTRA_CLEAN = $(OUTPUT)/common.o
diff --git a/tools/testing/selftests/mount/.gitignore b/tools/testing/selftests/mount/.gitignore
index 856ad4107eb3..0bc64a6d4c18 100644
--- a/tools/testing/selftests/mount/.gitignore
+++ b/tools/testing/selftests/mount/.gitignore
@@ -1 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0-only
unprivileged-remount-test
diff --git a/tools/testing/selftests/mqueue/.gitignore b/tools/testing/selftests/mqueue/.gitignore
index d8d42377205a..72ad8ca691c9 100644
--- a/tools/testing/selftests/mqueue/.gitignore
+++ b/tools/testing/selftests/mqueue/.gitignore
@@ -1,2 +1,3 @@
+# SPDX-License-Identifier: GPL-2.0-only
mq_open_tests
mq_perf_tests
diff --git a/tools/testing/selftests/net/.gitignore b/tools/testing/selftests/net/.gitignore
index ecc52d4c034d..742c499328b2 100644
--- a/tools/testing/selftests/net/.gitignore
+++ b/tools/testing/selftests/net/.gitignore
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
msg_zerocopy
socket
psock_fanout
@@ -23,3 +24,8 @@ so_txtime
tcp_fastopen_backup_key
nettest
fin_ack_lat
+reuseaddr_ports_exhausted
+hwtstamp_config
+rxtimestamp
+timestamping
+txtimestamp
diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile
index 4c1bd03ffa1c..895ec992b2f1 100644
--- a/tools/testing/selftests/net/Makefile
+++ b/tools/testing/selftests/net/Makefile
@@ -14,6 +14,9 @@ TEST_PROGS += tcp_fastopen_backup_key.sh fcnal-test.sh l2tp.sh traceroute.sh
TEST_PROGS += fin_ack_lat.sh fib_nexthop_multiprefix.sh fib_nexthops.sh
TEST_PROGS += altnames.sh icmp_redirect.sh ip6_gre_headroom.sh
TEST_PROGS += route_localnet.sh
+TEST_PROGS += reuseaddr_ports_exhausted.sh
+TEST_PROGS += txtimestamp.sh
+TEST_PROGS += vrf-xfrm-tests.sh
TEST_PROGS_EXTENDED := in_netns.sh
TEST_GEN_FILES = socket nettest
TEST_GEN_FILES += psock_fanout psock_tpacket msg_zerocopy reuseport_addr_any
@@ -22,6 +25,8 @@ TEST_GEN_FILES += udpgso udpgso_bench_tx udpgso_bench_rx ip_defrag
TEST_GEN_FILES += so_txtime ipv6_flowlabel ipv6_flowlabel_mgr
TEST_GEN_FILES += tcp_fastopen_backup_key
TEST_GEN_FILES += fin_ack_lat
+TEST_GEN_FILES += reuseaddr_ports_exhausted
+TEST_GEN_FILES += hwtstamp_config rxtimestamp timestamping txtimestamp
TEST_GEN_PROGS = reuseport_bpf reuseport_bpf_cpu reuseport_bpf_numa
TEST_GEN_PROGS += reuseport_dualstack reuseaddr_conflict tls
diff --git a/tools/testing/selftests/net/config b/tools/testing/selftests/net/config
index b8503a8119b0..3b42c06b5985 100644
--- a/tools/testing/selftests/net/config
+++ b/tools/testing/selftests/net/config
@@ -12,6 +12,7 @@ CONFIG_IPV6_VTI=y
CONFIG_DUMMY=y
CONFIG_BRIDGE=y
CONFIG_VLAN_8021Q=y
+CONFIG_IFB=y
CONFIG_NETFILTER=y
CONFIG_NETFILTER_ADVANCED=y
CONFIG_NF_CONNTRACK=m
@@ -27,5 +28,6 @@ CONFIG_NFT_CHAIN_NAT_IPV6=m
CONFIG_NFT_CHAIN_NAT_IPV4=m
CONFIG_NET_SCH_FQ=m
CONFIG_NET_SCH_ETF=m
+CONFIG_NET_SCH_NETEM=y
CONFIG_TEST_BLACKHOLE_DEV=m
CONFIG_KALLSYMS=y
diff --git a/tools/testing/selftests/net/fib_nexthops.sh b/tools/testing/selftests/net/fib_nexthops.sh
index 796670ebc65b..dee567f7576a 100755
--- a/tools/testing/selftests/net/fib_nexthops.sh
+++ b/tools/testing/selftests/net/fib_nexthops.sh
@@ -19,8 +19,8 @@ ret=0
ksft_skip=4
# all tests in this script. Can be overridden with -t option
-IPV4_TESTS="ipv4_fcnal ipv4_grp_fcnal ipv4_withv6_fcnal ipv4_fcnal_runtime"
-IPV6_TESTS="ipv6_fcnal ipv6_grp_fcnal ipv6_fcnal_runtime"
+IPV4_TESTS="ipv4_fcnal ipv4_grp_fcnal ipv4_withv6_fcnal ipv4_fcnal_runtime ipv4_large_grp ipv4_compat_mode ipv4_fdb_grp_fcnal ipv4_torture"
+IPV6_TESTS="ipv6_fcnal ipv6_grp_fcnal ipv6_fcnal_runtime ipv6_large_grp ipv6_compat_mode ipv6_fdb_grp_fcnal ipv6_torture"
ALL_TESTS="basic ${IPV4_TESTS} ${IPV6_TESTS}"
TESTS="${ALL_TESTS}"
@@ -146,35 +146,36 @@ setup()
create_ns remote
IP="ip -netns me"
+ BRIDGE="bridge -netns me"
set -e
$IP li add veth1 type veth peer name veth2
$IP li set veth1 up
$IP addr add 172.16.1.1/24 dev veth1
- $IP -6 addr add 2001:db8:91::1/64 dev veth1
+ $IP -6 addr add 2001:db8:91::1/64 dev veth1 nodad
$IP li add veth3 type veth peer name veth4
$IP li set veth3 up
$IP addr add 172.16.2.1/24 dev veth3
- $IP -6 addr add 2001:db8:92::1/64 dev veth3
+ $IP -6 addr add 2001:db8:92::1/64 dev veth3 nodad
$IP li set veth2 netns peer up
ip -netns peer addr add 172.16.1.2/24 dev veth2
- ip -netns peer -6 addr add 2001:db8:91::2/64 dev veth2
+ ip -netns peer -6 addr add 2001:db8:91::2/64 dev veth2 nodad
$IP li set veth4 netns peer up
ip -netns peer addr add 172.16.2.2/24 dev veth4
- ip -netns peer -6 addr add 2001:db8:92::2/64 dev veth4
+ ip -netns peer -6 addr add 2001:db8:92::2/64 dev veth4 nodad
ip -netns remote li add veth5 type veth peer name veth6
ip -netns remote li set veth5 up
ip -netns remote addr add dev veth5 172.16.101.1/24
- ip -netns remote addr add dev veth5 2001:db8:101::1/64
+ ip -netns remote -6 addr add dev veth5 2001:db8:101::1/64 nodad
ip -netns remote ro add 172.16.0.0/22 via 172.16.101.2
ip -netns remote -6 ro add 2001:db8:90::/40 via 2001:db8:101::2
ip -netns remote li set veth6 netns peer up
ip -netns peer addr add dev veth6 172.16.101.2/24
- ip -netns peer addr add dev veth6 2001:db8:101::2/64
+ ip -netns peer -6 addr add dev veth6 2001:db8:101::2/64 nodad
set +e
}
@@ -248,11 +249,247 @@ check_route6()
local expected="$2"
local out
- out=$($IP -6 route ls match ${pfx} 2>/dev/null)
+ out=$($IP -6 route ls match ${pfx} 2>/dev/null | sed -e 's/pref medium//')
check_output "${out}" "${expected}"
}
+check_large_grp()
+{
+ local ipv=$1
+ local ecmp=$2
+ local grpnum=100
+ local nhidstart=100
+ local grpidstart=1000
+ local iter=0
+ local nhidstr=""
+ local grpidstr=""
+ local grpstr=""
+ local ipstr=""
+
+ if [ $ipv -eq 4 ]; then
+ ipstr="172.16.1."
+ else
+ ipstr="2001:db8:91::"
+ fi
+
+ #
+ # Create $grpnum groups with specified $ecmp and dump them
+ #
+
+ # create nexthops with different gateways
+ iter=2
+ while [ $iter -le $(($ecmp + 1)) ]
+ do
+ nhidstr="$(($nhidstart + $iter))"
+ run_cmd "$IP nexthop add id $nhidstr via $ipstr$iter dev veth1"
+ check_nexthop "id $nhidstr" "id $nhidstr via $ipstr$iter dev veth1 scope link"
+
+ if [ $iter -le $ecmp ]; then
+ grpstr+="$nhidstr/"
+ else
+ grpstr+="$nhidstr"
+ fi
+ ((iter++))
+ done
+
+ # create duplicate large ecmp groups
+ iter=0
+ while [ $iter -le $grpnum ]
+ do
+ grpidstr="$(($grpidstart + $iter))"
+ run_cmd "$IP nexthop add id $grpidstr group $grpstr"
+ check_nexthop "id $grpidstr" "id $grpidstr group $grpstr"
+ ((iter++))
+ done
+
+ # dump large groups
+ run_cmd "$IP nexthop list"
+ log_test $? 0 "Dump large (x$ecmp) ecmp groups"
+}
+
+start_ip_monitor()
+{
+ local mtype=$1
+
+ # start the monitor in the background
+ tmpfile=`mktemp /var/run/nexthoptestXXX`
+ mpid=`($IP monitor $mtype > $tmpfile & echo $!) 2>/dev/null`
+ sleep 0.2
+ echo "$mpid $tmpfile"
+}
+
+stop_ip_monitor()
+{
+ local mpid=$1
+ local tmpfile=$2
+ local el=$3
+
+ # check the monitor results
+ kill $mpid
+ lines=`wc -l $tmpfile | cut "-d " -f1`
+ test $lines -eq $el
+ rc=$?
+ rm -rf $tmpfile
+
+ return $rc
+}
+
+check_nexthop_fdb_support()
+{
+ $IP nexthop help 2>&1 | grep -q fdb
+ if [ $? -ne 0 ]; then
+ echo "SKIP: iproute2 too old, missing fdb nexthop support"
+ return $ksft_skip
+ fi
+}
+
+ipv6_fdb_grp_fcnal()
+{
+ local rc
+
+ echo
+ echo "IPv6 fdb groups functional"
+ echo "--------------------------"
+
+ check_nexthop_fdb_support
+ if [ $? -eq $ksft_skip ]; then
+ return $ksft_skip
+ fi
+
+ # create group with multiple nexthops
+ run_cmd "$IP nexthop add id 61 via 2001:db8:91::2 fdb"
+ run_cmd "$IP nexthop add id 62 via 2001:db8:91::3 fdb"
+ run_cmd "$IP nexthop add id 102 group 61/62 fdb"
+ check_nexthop "id 102" "id 102 group 61/62 fdb"
+ log_test $? 0 "Fdb Nexthop group with multiple nexthops"
+
+ ## get nexthop group
+ run_cmd "$IP nexthop get id 102"
+ check_nexthop "id 102" "id 102 group 61/62 fdb"
+ log_test $? 0 "Get Fdb nexthop group by id"
+
+ # fdb nexthop group can only contain fdb nexthops
+ run_cmd "$IP nexthop add id 63 via 2001:db8:91::4"
+ run_cmd "$IP nexthop add id 64 via 2001:db8:91::5"
+ run_cmd "$IP nexthop add id 103 group 63/64 fdb"
+ log_test $? 2 "Fdb Nexthop group with non-fdb nexthops"
+
+ # Non fdb nexthop group can not contain fdb nexthops
+ run_cmd "$IP nexthop add id 65 via 2001:db8:91::5 fdb"
+ run_cmd "$IP nexthop add id 66 via 2001:db8:91::6 fdb"
+ run_cmd "$IP nexthop add id 104 group 65/66"
+ log_test $? 2 "Non-Fdb Nexthop group with fdb nexthops"
+
+ # fdb nexthop cannot have blackhole
+ run_cmd "$IP nexthop add id 67 blackhole fdb"
+ log_test $? 2 "Fdb Nexthop with blackhole"
+
+ # fdb nexthop with oif
+ run_cmd "$IP nexthop add id 68 via 2001:db8:91::7 dev veth1 fdb"
+ log_test $? 2 "Fdb Nexthop with oif"
+
+ # fdb nexthop with onlink
+ run_cmd "$IP nexthop add id 68 via 2001:db8:91::7 onlink fdb"
+ log_test $? 2 "Fdb Nexthop with onlink"
+
+ # fdb nexthop with encap
+ run_cmd "$IP nexthop add id 69 encap mpls 101 via 2001:db8:91::8 dev veth1 fdb"
+ log_test $? 2 "Fdb Nexthop with encap"
+
+ run_cmd "$IP link add name vx10 type vxlan id 1010 local 2001:db8:91::9 remote 2001:db8:91::10 dstport 4789 nolearning noudpcsum tos inherit ttl 100"
+ run_cmd "$BRIDGE fdb add 02:02:00:00:00:13 dev vx10 nhid 102 self"
+ log_test $? 0 "Fdb mac add with nexthop group"
+
+ ## fdb nexthops can only reference nexthop groups and not nexthops
+ run_cmd "$BRIDGE fdb add 02:02:00:00:00:14 dev vx10 nhid 61 self"
+ log_test $? 255 "Fdb mac add with nexthop"
+
+ run_cmd "$IP -6 ro add 2001:db8:101::1/128 nhid 66"
+ log_test $? 2 "Route add with fdb nexthop"
+
+ run_cmd "$IP -6 ro add 2001:db8:101::1/128 nhid 103"
+ log_test $? 2 "Route add with fdb nexthop group"
+
+ run_cmd "$IP nexthop del id 102"
+ log_test $? 0 "Fdb nexthop delete"
+
+ $IP link del dev vx10
+}
+
+ipv4_fdb_grp_fcnal()
+{
+ local rc
+
+ echo
+ echo "IPv4 fdb groups functional"
+ echo "--------------------------"
+
+ check_nexthop_fdb_support
+ if [ $? -eq $ksft_skip ]; then
+ return $ksft_skip
+ fi
+
+ # create group with multiple nexthops
+ run_cmd "$IP nexthop add id 12 via 172.16.1.2 fdb"
+ run_cmd "$IP nexthop add id 13 via 172.16.1.3 fdb"
+ run_cmd "$IP nexthop add id 102 group 12/13 fdb"
+ check_nexthop "id 102" "id 102 group 12/13 fdb"
+ log_test $? 0 "Fdb Nexthop group with multiple nexthops"
+
+ # get nexthop group
+ run_cmd "$IP nexthop get id 102"
+ check_nexthop "id 102" "id 102 group 12/13 fdb"
+ log_test $? 0 "Get Fdb nexthop group by id"
+
+ # fdb nexthop group can only contain fdb nexthops
+ run_cmd "$IP nexthop add id 14 via 172.16.1.2"
+ run_cmd "$IP nexthop add id 15 via 172.16.1.3"
+ run_cmd "$IP nexthop add id 103 group 14/15 fdb"
+ log_test $? 2 "Fdb Nexthop group with non-fdb nexthops"
+
+ # Non fdb nexthop group can not contain fdb nexthops
+ run_cmd "$IP nexthop add id 16 via 172.16.1.2 fdb"
+ run_cmd "$IP nexthop add id 17 via 172.16.1.3 fdb"
+ run_cmd "$IP nexthop add id 104 group 14/15"
+ log_test $? 2 "Non-Fdb Nexthop group with fdb nexthops"
+
+ # fdb nexthop cannot have blackhole
+ run_cmd "$IP nexthop add id 18 blackhole fdb"
+ log_test $? 2 "Fdb Nexthop with blackhole"
+
+ # fdb nexthop with oif
+ run_cmd "$IP nexthop add id 16 via 172.16.1.2 dev veth1 fdb"
+ log_test $? 2 "Fdb Nexthop with oif"
+
+ # fdb nexthop with onlink
+ run_cmd "$IP nexthop add id 16 via 172.16.1.2 onlink fdb"
+ log_test $? 2 "Fdb Nexthop with onlink"
+
+ # fdb nexthop with encap
+ run_cmd "$IP nexthop add id 17 encap mpls 101 via 172.16.1.2 dev veth1 fdb"
+ log_test $? 2 "Fdb Nexthop with encap"
+
+ run_cmd "$IP link add name vx10 type vxlan id 1010 local 10.0.0.1 remote 10.0.0.2 dstport 4789 nolearning noudpcsum tos inherit ttl 100"
+ run_cmd "$BRIDGE fdb add 02:02:00:00:00:13 dev vx10 nhid 102 self"
+ log_test $? 0 "Fdb mac add with nexthop group"
+
+ # fdb nexthops can only reference nexthop groups and not nexthops
+ run_cmd "$BRIDGE fdb add 02:02:00:00:00:14 dev vx10 nhid 12 self"
+ log_test $? 255 "Fdb mac add with nexthop"
+
+ run_cmd "$IP ro add 172.16.0.0/22 nhid 15"
+ log_test $? 2 "Route add with fdb nexthop"
+
+ run_cmd "$IP ro add 172.16.0.0/22 nhid 103"
+ log_test $? 2 "Route add with fdb nexthop group"
+
+ run_cmd "$IP nexthop del id 102"
+ log_test $? 0 "Fdb nexthop delete"
+
+ $IP link del dev vx10
+}
+
################################################################################
# basic operations (add, delete, replace) on nexthops and nexthop groups
#
@@ -423,8 +660,6 @@ ipv6_fcnal_runtime()
echo "IPv6 functional runtime"
echo "-----------------------"
- sleep 5
-
#
# IPv6 - the basics
#
@@ -481,12 +716,12 @@ ipv6_fcnal_runtime()
run_cmd "$IP -6 nexthop add id 85 dev veth1"
run_cmd "$IP ro replace 2001:db8:101::1/128 nhid 85"
log_test $? 0 "IPv6 route with device only nexthop"
- check_route6 "2001:db8:101::1" "2001:db8:101::1 nhid 85 dev veth1 metric 1024 pref medium"
+ check_route6 "2001:db8:101::1" "2001:db8:101::1 nhid 85 dev veth1 metric 1024"
run_cmd "$IP nexthop add id 123 group 81/85"
run_cmd "$IP ro replace 2001:db8:101::1/128 nhid 123"
log_test $? 0 "IPv6 multipath route with nexthop mix - dev only + gw"
- check_route6 "2001:db8:101::1" "2001:db8:101::1 nhid 123 metric 1024 nexthop via 2001:db8:91::2 dev veth1 weight 1 nexthop dev veth1 weight 1 pref medium"
+ check_route6 "2001:db8:101::1" "2001:db8:101::1 nhid 123 metric 1024 nexthop via 2001:db8:91::2 dev veth1 weight 1 nexthop dev veth1 weight 1"
#
# IPv6 route with v4 nexthop - not allowed
@@ -519,6 +754,75 @@ ipv6_fcnal_runtime()
# route with src address and using nexthop - not allowed
}
+ipv6_large_grp()
+{
+ local ecmp=32
+
+ echo
+ echo "IPv6 large groups (x$ecmp)"
+ echo "---------------------"
+
+ check_large_grp 6 $ecmp
+
+ $IP nexthop flush >/dev/null 2>&1
+}
+
+ipv6_del_add_loop1()
+{
+ while :; do
+ $IP nexthop del id 100
+ $IP nexthop add id 100 via 2001:db8:91::2 dev veth1
+ done >/dev/null 2>&1
+}
+
+ipv6_grp_replace_loop()
+{
+ while :; do
+ $IP nexthop replace id 102 group 100/101
+ done >/dev/null 2>&1
+}
+
+ipv6_torture()
+{
+ local pid1
+ local pid2
+ local pid3
+ local pid4
+ local pid5
+
+ echo
+ echo "IPv6 runtime torture"
+ echo "--------------------"
+ if [ ! -x "$(command -v mausezahn)" ]; then
+ echo "SKIP: Could not run test; need mausezahn tool"
+ return
+ fi
+
+ run_cmd "$IP nexthop add id 100 via 2001:db8:91::2 dev veth1"
+ run_cmd "$IP nexthop add id 101 via 2001:db8:92::2 dev veth3"
+ run_cmd "$IP nexthop add id 102 group 100/101"
+ run_cmd "$IP route add 2001:db8:101::1 nhid 102"
+ run_cmd "$IP route add 2001:db8:101::2 nhid 102"
+
+ ipv6_del_add_loop1 &
+ pid1=$!
+ ipv6_grp_replace_loop &
+ pid2=$!
+ ip netns exec me ping -f 2001:db8:101::1 >/dev/null 2>&1 &
+ pid3=$!
+ ip netns exec me ping -f 2001:db8:101::2 >/dev/null 2>&1 &
+ pid4=$!
+ ip netns exec me mausezahn veth1 -B 2001:db8:101::2 -A 2001:db8:91::1 -c 0 -t tcp "dp=1-1023, flags=syn" >/dev/null 2>&1 &
+ pid5=$!
+
+ sleep 300
+ kill -9 $pid1 $pid2 $pid3 $pid4 $pid5
+
+ # if we did not crash, success
+ log_test 0 0 "IPv6 torture test"
+}
+
+
ipv4_fcnal()
{
local rc
@@ -749,6 +1053,29 @@ ipv4_fcnal_runtime()
run_cmd "ip netns exec me ping -c1 -w1 172.16.101.1"
log_test $? 0 "Ping - multipath"
+ run_cmd "$IP ro delete 172.16.101.1/32 nhid 122"
+
+ #
+ # multiple default routes
+ # - tests fib_select_default
+ run_cmd "$IP nexthop add id 501 via 172.16.1.2 dev veth1"
+ run_cmd "$IP ro add default nhid 501"
+ run_cmd "$IP ro add default via 172.16.1.3 dev veth1 metric 20"
+ run_cmd "ip netns exec me ping -c1 -w1 172.16.101.1"
+ log_test $? 0 "Ping - multiple default routes, nh first"
+
+ # flip the order
+ run_cmd "$IP ro del default nhid 501"
+ run_cmd "$IP ro del default via 172.16.1.3 dev veth1 metric 20"
+ run_cmd "$IP ro add default via 172.16.1.2 dev veth1 metric 20"
+ run_cmd "$IP nexthop replace id 501 via 172.16.1.3 dev veth1"
+ run_cmd "$IP ro add default nhid 501 metric 20"
+ run_cmd "ip netns exec me ping -c1 -w1 172.16.101.1"
+ log_test $? 0 "Ping - multiple default routes, nh second"
+
+ run_cmd "$IP nexthop delete nhid 501"
+ run_cmd "$IP ro del default"
+
#
# IPv4 with blackhole nexthops
#
@@ -843,6 +1170,11 @@ ipv4_fcnal_runtime()
$IP neigh sh | grep 'dev veth1'
fi
+ run_cmd "$IP ro del 172.16.101.1/32 via inet6 ${lladdr} dev veth1"
+ run_cmd "$IP -4 ro add default via inet6 ${lladdr} dev veth1"
+ run_cmd "ip netns exec me ping -c1 -w1 172.16.101.1"
+ log_test $? 0 "IPv4 default route with IPv6 gateway"
+
#
# MPLS as an example of LWT encap
#
@@ -857,6 +1189,241 @@ ipv4_fcnal_runtime()
log_test $? 0 "IPv4 route with MPLS encap, v6 gw - check"
}
+ipv4_large_grp()
+{
+ local ecmp=32
+
+ echo
+ echo "IPv4 large groups (x$ecmp)"
+ echo "---------------------"
+
+ check_large_grp 4 $ecmp
+
+ $IP nexthop flush >/dev/null 2>&1
+}
+
+sysctl_nexthop_compat_mode_check()
+{
+ local sysctlname="net.ipv4.nexthop_compat_mode"
+ local lprefix=$1
+
+ IPE="ip netns exec me"
+
+ $IPE sysctl -q $sysctlname 2>&1 >/dev/null
+ if [ $? -ne 0 ]; then
+ echo "SKIP: kernel lacks nexthop compat mode sysctl control"
+ return $ksft_skip
+ fi
+
+ out=$($IPE sysctl $sysctlname 2>/dev/null)
+ log_test $? 0 "$lprefix default nexthop compat mode check"
+ check_output "${out}" "$sysctlname = 1"
+}
+
+sysctl_nexthop_compat_mode_set()
+{
+ local sysctlname="net.ipv4.nexthop_compat_mode"
+ local mode=$1
+ local lprefix=$2
+
+ IPE="ip netns exec me"
+
+ out=$($IPE sysctl -w $sysctlname=$mode)
+ log_test $? 0 "$lprefix set compat mode - $mode"
+ check_output "${out}" "net.ipv4.nexthop_compat_mode = $mode"
+}
+
+ipv6_compat_mode()
+{
+ local rc
+
+ echo
+ echo "IPv6 nexthop api compat mode test"
+ echo "--------------------------------"
+
+ sysctl_nexthop_compat_mode_check "IPv6"
+ if [ $? -eq $ksft_skip ]; then
+ return $ksft_skip
+ fi
+
+ run_cmd "$IP nexthop add id 62 via 2001:db8:91::2 dev veth1"
+ run_cmd "$IP nexthop add id 63 via 2001:db8:91::3 dev veth1"
+ run_cmd "$IP nexthop add id 122 group 62/63"
+ ipmout=$(start_ip_monitor route)
+
+ run_cmd "$IP -6 ro add 2001:db8:101::1/128 nhid 122"
+ # route add notification should contain expanded nexthops
+ stop_ip_monitor $ipmout 3
+ log_test $? 0 "IPv6 compat mode on - route add notification"
+
+ # route dump should contain expanded nexthops
+ check_route6 "2001:db8:101::1" "2001:db8:101::1 nhid 122 metric 1024 nexthop via 2001:db8:91::2 dev veth1 weight 1 nexthop via 2001:db8:91::3 dev veth1 weight 1"
+ log_test $? 0 "IPv6 compat mode on - route dump"
+
+ # change in nexthop group should generate route notification
+ run_cmd "$IP nexthop add id 64 via 2001:db8:91::4 dev veth1"
+ ipmout=$(start_ip_monitor route)
+ run_cmd "$IP nexthop replace id 122 group 62/64"
+ stop_ip_monitor $ipmout 3
+
+ log_test $? 0 "IPv6 compat mode on - nexthop change"
+
+ # set compat mode off
+ sysctl_nexthop_compat_mode_set 0 "IPv6"
+
+ run_cmd "$IP -6 ro del 2001:db8:101::1/128 nhid 122"
+
+ run_cmd "$IP nexthop add id 62 via 2001:db8:91::2 dev veth1"
+ run_cmd "$IP nexthop add id 63 via 2001:db8:91::3 dev veth1"
+ run_cmd "$IP nexthop add id 122 group 62/63"
+ ipmout=$(start_ip_monitor route)
+
+ run_cmd "$IP -6 ro add 2001:db8:101::1/128 nhid 122"
+ # route add notification should not contain expanded nexthops
+ stop_ip_monitor $ipmout 1
+ log_test $? 0 "IPv6 compat mode off - route add notification"
+
+ # route dump should not contain expanded nexthops
+ check_route6 "2001:db8:101::1" "2001:db8:101::1 nhid 122 metric 1024"
+ log_test $? 0 "IPv6 compat mode off - route dump"
+
+ # change in nexthop group should not generate route notification
+ run_cmd "$IP nexthop add id 64 via 2001:db8:91::4 dev veth1"
+ ipmout=$(start_ip_monitor route)
+ run_cmd "$IP nexthop replace id 122 group 62/64"
+ stop_ip_monitor $ipmout 0
+ log_test $? 0 "IPv6 compat mode off - nexthop change"
+
+ # nexthop delete should not generate route notification
+ ipmout=$(start_ip_monitor route)
+ run_cmd "$IP nexthop del id 122"
+ stop_ip_monitor $ipmout 0
+ log_test $? 0 "IPv6 compat mode off - nexthop delete"
+
+ # set compat mode back on
+ sysctl_nexthop_compat_mode_set 1 "IPv6"
+}
+
+ipv4_compat_mode()
+{
+ local rc
+
+ echo
+ echo "IPv4 nexthop api compat mode"
+ echo "----------------------------"
+
+ sysctl_nexthop_compat_mode_check "IPv4"
+ if [ $? -eq $ksft_skip ]; then
+ return $ksft_skip
+ fi
+
+ run_cmd "$IP nexthop add id 21 via 172.16.1.2 dev veth1"
+ run_cmd "$IP nexthop add id 22 via 172.16.1.2 dev veth1"
+ run_cmd "$IP nexthop add id 122 group 21/22"
+ ipmout=$(start_ip_monitor route)
+
+ run_cmd "$IP ro add 172.16.101.1/32 nhid 122"
+ stop_ip_monitor $ipmout 3
+
+ # route add notification should contain expanded nexthops
+ log_test $? 0 "IPv4 compat mode on - route add notification"
+
+ # route dump should contain expanded nexthops
+ check_route "172.16.101.1" "172.16.101.1 nhid 122 nexthop via 172.16.1.2 dev veth1 weight 1 nexthop via 172.16.1.2 dev veth1 weight 1"
+ log_test $? 0 "IPv4 compat mode on - route dump"
+
+ # change in nexthop group should generate route notification
+ run_cmd "$IP nexthop add id 23 via 172.16.1.3 dev veth1"
+ ipmout=$(start_ip_monitor route)
+ run_cmd "$IP nexthop replace id 122 group 21/23"
+ stop_ip_monitor $ipmout 3
+ log_test $? 0 "IPv4 compat mode on - nexthop change"
+
+ sysctl_nexthop_compat_mode_set 0 "IPv4"
+
+ # cleanup
+ run_cmd "$IP ro del 172.16.101.1/32 nhid 122"
+
+ ipmout=$(start_ip_monitor route)
+ run_cmd "$IP ro add 172.16.101.1/32 nhid 122"
+ stop_ip_monitor $ipmout 1
+ # route add notification should not contain expanded nexthops
+ log_test $? 0 "IPv4 compat mode off - route add notification"
+
+ # route dump should not contain expanded nexthops
+ check_route "172.16.101.1" "172.16.101.1 nhid 122"
+ log_test $? 0 "IPv4 compat mode off - route dump"
+
+ # change in nexthop group should not generate route notification
+ ipmout=$(start_ip_monitor route)
+ run_cmd "$IP nexthop replace id 122 group 21/22"
+ stop_ip_monitor $ipmout 0
+ log_test $? 0 "IPv4 compat mode off - nexthop change"
+
+ # nexthop delete should not generate route notification
+ ipmout=$(start_ip_monitor route)
+ run_cmd "$IP nexthop del id 122"
+ stop_ip_monitor $ipmout 0
+ log_test $? 0 "IPv4 compat mode off - nexthop delete"
+
+ sysctl_nexthop_compat_mode_set 1 "IPv4"
+}
+
+ipv4_del_add_loop1()
+{
+ while :; do
+ $IP nexthop del id 100
+ $IP nexthop add id 100 via 172.16.1.2 dev veth1
+ done >/dev/null 2>&1
+}
+
+ipv4_grp_replace_loop()
+{
+ while :; do
+ $IP nexthop replace id 102 group 100/101
+ done >/dev/null 2>&1
+}
+
+ipv4_torture()
+{
+ local pid1
+ local pid2
+ local pid3
+ local pid4
+ local pid5
+
+ echo
+ echo "IPv4 runtime torture"
+ echo "--------------------"
+ if [ ! -x "$(command -v mausezahn)" ]; then
+ echo "SKIP: Could not run test; need mausezahn tool"
+ return
+ fi
+
+ run_cmd "$IP nexthop add id 100 via 172.16.1.2 dev veth1"
+ run_cmd "$IP nexthop add id 101 via 172.16.2.2 dev veth3"
+ run_cmd "$IP nexthop add id 102 group 100/101"
+ run_cmd "$IP route add 172.16.101.1 nhid 102"
+ run_cmd "$IP route add 172.16.101.2 nhid 102"
+
+ ipv4_del_add_loop1 &
+ pid1=$!
+ ipv4_grp_replace_loop &
+ pid2=$!
+ ip netns exec me ping -f 172.16.101.1 >/dev/null 2>&1 &
+ pid3=$!
+ ip netns exec me ping -f 172.16.101.2 >/dev/null 2>&1 &
+ pid4=$!
+ ip netns exec me mausezahn veth1 -B 172.16.101.2 -A 172.16.1.1 -c 0 -t tcp "dp=1-1023, flags=syn" >/dev/null 2>&1 &
+ pid5=$!
+
+ sleep 300
+ kill -9 $pid1 $pid2 $pid3 $pid4 $pid5
+
+ # if we did not crash, success
+ log_test 0 0 "IPv4 torture test"
+}
+
basic()
{
echo
diff --git a/tools/testing/selftests/net/fib_tests.sh b/tools/testing/selftests/net/fib_tests.sh
index b7616704b55e..84205c3a55eb 100755
--- a/tools/testing/selftests/net/fib_tests.sh
+++ b/tools/testing/selftests/net/fib_tests.sh
@@ -618,16 +618,22 @@ fib_nexthop_test()
fib_suppress_test()
{
+ echo
+ echo "FIB rule with suppress_prefixlength"
+ setup
+
$IP link add dummy1 type dummy
$IP link set dummy1 up
$IP -6 route add default dev dummy1
$IP -6 rule add table main suppress_prefixlength 0
- ping -f -c 1000 -W 1 1234::1 || true
+ ping -f -c 1000 -W 1 1234::1 >/dev/null 2>&1
$IP -6 rule del table main suppress_prefixlength 0
$IP link del dummy1
# If we got here without crashing, we're good.
- return 0
+ log_test 0 0 "FIB rule suppress test"
+
+ cleanup
}
################################################################################
diff --git a/tools/testing/selftests/net/forwarding/.gitignore b/tools/testing/selftests/net/forwarding/.gitignore
index a793eef5b876..2dea317f12e7 100644
--- a/tools/testing/selftests/net/forwarding/.gitignore
+++ b/tools/testing/selftests/net/forwarding/.gitignore
@@ -1 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0-only
forwarding.config
diff --git a/tools/testing/selftests/net/forwarding/devlink_lib.sh b/tools/testing/selftests/net/forwarding/devlink_lib.sh
index 40b076983239..f0e6be4c09e9 100644
--- a/tools/testing/selftests/net/forwarding/devlink_lib.sh
+++ b/tools/testing/selftests/net/forwarding/devlink_lib.sh
@@ -35,6 +35,12 @@ if [ $? -ne 0 ]; then
exit 1
fi
+devlink dev help 2>&1 | grep info &> /dev/null
+if [ $? -ne 0 ]; then
+ echo "SKIP: iproute2 too old, missing devlink dev info support"
+ exit 1
+fi
+
##############################################################################
# Devlink helpers
@@ -359,7 +365,9 @@ devlink_trap_group_stats_idle_test()
devlink_trap_exception_test()
{
local trap_name=$1; shift
- local group_name=$1; shift
+ local group_name
+
+ group_name=$(devlink_trap_group_get $trap_name)
devlink_trap_stats_idle_test $trap_name
check_fail $? "Trap stats idle when packets should have been trapped"
@@ -371,8 +379,11 @@ devlink_trap_exception_test()
devlink_trap_drop_test()
{
local trap_name=$1; shift
- local group_name=$1; shift
local dev=$1; shift
+ local handle=$1; shift
+ local group_name
+
+ group_name=$(devlink_trap_group_get $trap_name)
# This is the common part of all the tests. It checks that stats are
# initially idle, then non-idle after changing the trap action and
@@ -383,7 +394,6 @@ devlink_trap_drop_test()
devlink_trap_group_stats_idle_test $group_name
check_err $? "Trap group stats not idle with initial drop action"
-
devlink_trap_action_set $trap_name "trap"
devlink_trap_stats_idle_test $trap_name
check_fail $? "Trap stats idle after setting action to trap"
@@ -397,7 +407,7 @@ devlink_trap_drop_test()
devlink_trap_group_stats_idle_test $group_name
check_err $? "Trap group stats not idle after setting action to drop"
- tc_check_packets "dev $dev egress" 101 0
+ tc_check_packets "dev $dev egress" $handle 0
check_err $? "Packets were not dropped"
}
@@ -406,7 +416,91 @@ devlink_trap_drop_cleanup()
local mz_pid=$1; shift
local dev=$1; shift
local proto=$1; shift
+ local pref=$1; shift
+ local handle=$1; shift
kill $mz_pid && wait $mz_pid &> /dev/null
- tc filter del dev $dev egress protocol $proto pref 1 handle 101 flower
+ tc filter del dev $dev egress protocol $proto pref $pref handle $handle flower
+}
+
+devlink_trap_stats_test()
+{
+ local test_name=$1; shift
+ local trap_name=$1; shift
+ local send_one="$@"
+ local t0_packets
+ local t1_packets
+
+ RET=0
+
+ t0_packets=$(devlink_trap_rx_packets_get $trap_name)
+
+ $send_one && sleep 1
+
+ t1_packets=$(devlink_trap_rx_packets_get $trap_name)
+
+ if [[ $t1_packets -eq $t0_packets ]]; then
+ check_err 1 "Trap stats did not increase"
+ fi
+
+ log_test "$test_name"
+}
+
+devlink_trap_policers_num_get()
+{
+ devlink -j -p trap policer show | jq '.[]["'$DEVLINK_DEV'"] | length'
+}
+
+devlink_trap_policer_rate_get()
+{
+ local policer_id=$1; shift
+
+ devlink -j -p trap policer show $DEVLINK_DEV policer $policer_id \
+ | jq '.[][][]["rate"]'
+}
+
+devlink_trap_policer_burst_get()
+{
+ local policer_id=$1; shift
+
+ devlink -j -p trap policer show $DEVLINK_DEV policer $policer_id \
+ | jq '.[][][]["burst"]'
+}
+
+devlink_trap_policer_rx_dropped_get()
+{
+ local policer_id=$1; shift
+
+ devlink -j -p -s trap policer show $DEVLINK_DEV policer $policer_id \
+ | jq '.[][][]["stats"]["rx"]["dropped"]'
+}
+
+devlink_trap_group_policer_get()
+{
+ local group_name=$1; shift
+
+ devlink -j -p trap group show $DEVLINK_DEV group $group_name \
+ | jq '.[][][]["policer"]'
+}
+
+devlink_trap_policer_ids_get()
+{
+ devlink -j -p trap policer show \
+ | jq '.[]["'$DEVLINK_DEV'"][]["policer"]'
+}
+
+devlink_port_by_netdev()
+{
+ local if_name=$1
+
+ devlink -j port show $if_name | jq -e '.[] | keys' | jq -r '.[]'
+}
+
+devlink_cpu_port_get()
+{
+ local cpu_dl_port_num=$(devlink port list | grep "$DEVLINK_DEV" |
+ grep cpu | cut -d/ -f3 | cut -d: -f1 |
+ sed -n '1p')
+
+ echo "$DEVLINK_DEV/$cpu_dl_port_num"
}
diff --git a/tools/testing/selftests/net/forwarding/lib.sh b/tools/testing/selftests/net/forwarding/lib.sh
index 2f5da414aaa7..977fc2b326a2 100644
--- a/tools/testing/selftests/net/forwarding/lib.sh
+++ b/tools/testing/selftests/net/forwarding/lib.sh
@@ -60,6 +60,15 @@ check_tc_chain_support()
fi
}
+check_tc_action_hw_stats_support()
+{
+ tc actions help 2>&1 | grep -q hw_stats
+ if [[ $? -ne 0 ]]; then
+ echo "SKIP: iproute2 too old; tc is missing action hw_stats support"
+ exit 1
+ fi
+}
+
if [[ "$(id -u)" -ne 0 ]]; then
echo "SKIP: need root privileges"
exit 0
@@ -248,13 +257,40 @@ busywait()
done
}
+not()
+{
+ "$@"
+ [[ $? != 0 ]]
+}
+
+grep_bridge_fdb()
+{
+ local addr=$1; shift
+ local word
+ local flag
+
+ if [ "$1" == "self" ] || [ "$1" == "master" ]; then
+ word=$1; shift
+ if [ "$1" == "-v" ]; then
+ flag=$1; shift
+ fi
+ fi
+
+ $@ | grep $addr | grep $flag "$word"
+}
+
+wait_for_offload()
+{
+ "$@" | grep -q offload
+}
+
until_counter_is()
{
- local value=$1; shift
+ local expr=$1; shift
local current=$("$@")
echo $((current))
- ((current >= value))
+ ((current $expr))
}
busywait_for_counter()
@@ -263,7 +299,7 @@ busywait_for_counter()
local delta=$1; shift
local base=$("$@")
- busywait "$timeout" until_counter_is $((base + delta)) "$@"
+ busywait "$timeout" until_counter_is ">= $((base + delta))" "$@"
}
setup_wait_dev()
@@ -599,6 +635,17 @@ tc_rule_stats_get()
| jq ".[1].options.actions[].stats$selector"
}
+tc_rule_handle_stats_get()
+{
+ local id=$1; shift
+ local handle=$1; shift
+ local selector=${1:-.packets}; shift
+
+ tc -j -s filter show $id \
+ | jq ".[] | select(.options.handle == $handle) | \
+ .options.actions[0].stats$selector"
+}
+
ethtool_stats_get()
{
local dev=$1; shift
@@ -607,6 +654,26 @@ ethtool_stats_get()
ethtool -S $dev | grep "^ *$stat:" | head -n 1 | cut -d: -f2
}
+qdisc_stats_get()
+{
+ local dev=$1; shift
+ local handle=$1; shift
+ local selector=$1; shift
+
+ tc -j -s qdisc show dev "$dev" \
+ | jq '.[] | select(.handle == "'"$handle"'") | '"$selector"
+}
+
+qdisc_parent_stats_get()
+{
+ local dev=$1; shift
+ local parent=$1; shift
+ local selector=$1; shift
+
+ tc -j -s qdisc show dev "$dev" invisible \
+ | jq '.[] | select(.parent == "'"$parent"'") | '"$selector"
+}
+
humanize()
{
local speed=$1; shift
@@ -1132,18 +1199,29 @@ flood_test()
flood_multicast_test $br_port $host1_if $host2_if
}
-start_traffic()
+__start_traffic()
{
+ local proto=$1; shift
local h_in=$1; shift # Where the traffic egresses the host
local sip=$1; shift
local dip=$1; shift
local dmac=$1; shift
$MZ $h_in -p 8000 -A $sip -B $dip -c 0 \
- -a own -b $dmac -t udp -q &
+ -a own -b $dmac -t "$proto" -q "$@" &
sleep 1
}
+start_traffic()
+{
+ __start_traffic udp "$@"
+}
+
+start_tcp_traffic()
+{
+ __start_traffic tcp "$@"
+}
+
stop_traffic()
{
# Suppress noise from killing mausezahn.
diff --git a/tools/testing/selftests/net/forwarding/mirror_lib.sh b/tools/testing/selftests/net/forwarding/mirror_lib.sh
index 00797597fcf5..c33bfd7ba214 100644
--- a/tools/testing/selftests/net/forwarding/mirror_lib.sh
+++ b/tools/testing/selftests/net/forwarding/mirror_lib.sh
@@ -29,11 +29,9 @@ mirror_test()
local pref=$1; shift
local expect=$1; shift
- local ping_timeout=$((PING_TIMEOUT * 5))
local t0=$(tc_rule_stats_get $dev $pref)
- ip vrf exec $vrf_name \
- ${PING} ${sip:+-I $sip} $dip -c 10 -i 0.5 -w $ping_timeout \
- &> /dev/null
+ $MZ $vrf_name ${sip:+-A $sip} -B $dip -a own -b bc -q \
+ -c 10 -d 100ms -t icmp type=8
sleep 0.5
local t1=$(tc_rule_stats_get $dev $pref)
local delta=$((t1 - t0))
diff --git a/tools/testing/selftests/net/forwarding/pedit_dsfield.sh b/tools/testing/selftests/net/forwarding/pedit_dsfield.sh
new file mode 100755
index 000000000000..55eeacf59241
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/pedit_dsfield.sh
@@ -0,0 +1,309 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# This test sends traffic from H1 to H2. Either on ingress of $swp1, or on
+# egress of $swp2, the traffic is acted upon by a pedit action. An ingress
+# filter installed on $h2 verifies that the packet looks like expected.
+#
+# +----------------------+ +----------------------+
+# | H1 | | H2 |
+# | + $h1 | | $h2 + |
+# | | 192.0.2.1/28 | | 192.0.2.2/28 | |
+# +----|-----------------+ +----------------|-----+
+# | |
+# +----|----------------------------------------------------------------|-----+
+# | SW | | |
+# | +-|----------------------------------------------------------------|-+ |
+# | | + $swp1 BR $swp2 + | |
+# | +--------------------------------------------------------------------+ |
+# +---------------------------------------------------------------------------+
+
+ALL_TESTS="
+ ping_ipv4
+ ping_ipv6
+ test_ip_dsfield
+ test_ip_dscp
+ test_ip_ecn
+ test_ip_dscp_ecn
+ test_ip6_dsfield
+ test_ip6_dscp
+ test_ip6_ecn
+"
+
+NUM_NETIFS=4
+source lib.sh
+source tc_common.sh
+
+: ${HIT_TIMEOUT:=2000} # ms
+
+h1_create()
+{
+ simple_if_init $h1 192.0.2.1/28 2001:db8:1::1/64
+}
+
+h1_destroy()
+{
+ simple_if_fini $h1 192.0.2.1/28 2001:db8:1::1/64
+}
+
+h2_create()
+{
+ simple_if_init $h2 192.0.2.2/28 2001:db8:1::2/64
+ tc qdisc add dev $h2 clsact
+}
+
+h2_destroy()
+{
+ tc qdisc del dev $h2 clsact
+ simple_if_fini $h2 192.0.2.2/28 2001:db8:1::2/64
+}
+
+switch_create()
+{
+ ip link add name br1 up type bridge vlan_filtering 1
+ ip link set dev $swp1 master br1
+ ip link set dev $swp1 up
+ ip link set dev $swp2 master br1
+ ip link set dev $swp2 up
+
+ tc qdisc add dev $swp1 clsact
+ tc qdisc add dev $swp2 clsact
+}
+
+switch_destroy()
+{
+ tc qdisc del dev $swp2 clsact
+ tc qdisc del dev $swp1 clsact
+
+ ip link set dev $swp2 nomaster
+ ip link set dev $swp1 nomaster
+ ip link del dev br1
+}
+
+setup_prepare()
+{
+ h1=${NETIFS[p1]}
+ swp1=${NETIFS[p2]}
+
+ swp2=${NETIFS[p3]}
+ h2=${NETIFS[p4]}
+
+ h2mac=$(mac_get $h2)
+
+ vrf_prepare
+ h1_create
+ h2_create
+ switch_create
+}
+
+cleanup()
+{
+ pre_cleanup
+
+ switch_destroy
+ h2_destroy
+ h1_destroy
+ vrf_cleanup
+}
+
+ping_ipv4()
+{
+ ping_test $h1 192.0.2.2
+}
+
+ping_ipv6()
+{
+ ping6_test $h1 2001:db8:1::2
+}
+
+do_test_pedit_dsfield_common()
+{
+ local pedit_locus=$1; shift
+ local pedit_action=$1; shift
+ local mz_flags=$1; shift
+
+ RET=0
+
+ # TOS 125: DSCP 31, ECN 1. Used for testing that the relevant part is
+ # overwritten when zero is selected.
+ $MZ $mz_flags $h1 -c 10 -d 20msec -p 100 \
+ -a own -b $h2mac -q -t tcp tos=0x7d,sp=54321,dp=12345
+
+ local pkts
+ pkts=$(busywait "$TC_HIT_TIMEOUT" until_counter_is ">= 10" \
+ tc_rule_handle_stats_get "dev $h2 ingress" 101)
+ check_err $? "Expected to get 10 packets on test probe, but got $pkts."
+
+ pkts=$(tc_rule_handle_stats_get "$pedit_locus" 101)
+ ((pkts >= 10))
+ check_err $? "Expected to get 10 packets on pedit rule, but got $pkts."
+
+ log_test "$pedit_locus pedit $pedit_action"
+}
+
+do_test_pedit_dsfield()
+{
+ local pedit_locus=$1; shift
+ local pedit_action=$1; shift
+ local match_prot=$1; shift
+ local match_flower=$1; shift
+ local mz_flags=$1; shift
+ local saddr=$1; shift
+ local daddr=$1; shift
+
+ tc filter add $pedit_locus handle 101 pref 1 \
+ flower action pedit ex munge $pedit_action
+ tc filter add dev $h2 ingress handle 101 pref 1 prot $match_prot \
+ flower skip_hw $match_flower action pass
+
+ do_test_pedit_dsfield_common "$pedit_locus" "$pedit_action" "$mz_flags"
+
+ tc filter del dev $h2 ingress pref 1
+ tc filter del $pedit_locus pref 1
+}
+
+do_test_ip_dsfield()
+{
+ local locus=$1; shift
+ local dsfield
+
+ for dsfield in 0 1 2 3 128 252 253 254 255; do
+ do_test_pedit_dsfield "$locus" \
+ "ip dsfield set $dsfield" \
+ ip "ip_tos $dsfield" \
+ "-A 192.0.2.1 -B 192.0.2.2"
+ done
+}
+
+test_ip_dsfield()
+{
+ do_test_ip_dsfield "dev $swp1 ingress"
+ do_test_ip_dsfield "dev $swp2 egress"
+}
+
+do_test_ip_dscp()
+{
+ local locus=$1; shift
+ local dscp
+
+ for dscp in 0 1 2 3 32 61 62 63; do
+ do_test_pedit_dsfield "$locus" \
+ "ip dsfield set $((dscp << 2)) retain 0xfc" \
+ ip "ip_tos $(((dscp << 2) | 1))" \
+ "-A 192.0.2.1 -B 192.0.2.2"
+ done
+}
+
+test_ip_dscp()
+{
+ do_test_ip_dscp "dev $swp1 ingress"
+ do_test_ip_dscp "dev $swp2 egress"
+}
+
+do_test_ip_ecn()
+{
+ local locus=$1; shift
+ local ecn
+
+ for ecn in 0 1 2 3; do
+ do_test_pedit_dsfield "$locus" \
+ "ip dsfield set $ecn retain 0x03" \
+ ip "ip_tos $((124 | $ecn))" \
+ "-A 192.0.2.1 -B 192.0.2.2"
+ done
+}
+
+test_ip_ecn()
+{
+ do_test_ip_ecn "dev $swp1 ingress"
+ do_test_ip_ecn "dev $swp2 egress"
+}
+
+do_test_ip_dscp_ecn()
+{
+ local locus=$1; shift
+
+ tc filter add $locus handle 101 pref 1 \
+ flower action pedit ex munge ip dsfield set 124 retain 0xfc \
+ action pedit ex munge ip dsfield set 1 retain 0x03
+ tc filter add dev $h2 ingress handle 101 pref 1 prot ip \
+ flower skip_hw ip_tos 125 action pass
+
+ do_test_pedit_dsfield_common "$locus" "set DSCP + set ECN" \
+ "-A 192.0.2.1 -B 192.0.2.2"
+
+ tc filter del dev $h2 ingress pref 1
+ tc filter del $locus pref 1
+}
+
+test_ip_dscp_ecn()
+{
+ do_test_ip_dscp_ecn "dev $swp1 ingress"
+ do_test_ip_dscp_ecn "dev $swp2 egress"
+}
+
+do_test_ip6_dsfield()
+{
+ local locus=$1; shift
+ local dsfield
+
+ for dsfield in 0 1 2 3 128 252 253 254 255; do
+ do_test_pedit_dsfield "$locus" \
+ "ip6 traffic_class set $dsfield" \
+ ipv6 "ip_tos $dsfield" \
+ "-6 -A 2001:db8:1::1 -B 2001:db8:1::2"
+ done
+}
+
+test_ip6_dsfield()
+{
+ do_test_ip6_dsfield "dev $swp1 ingress"
+ do_test_ip6_dsfield "dev $swp2 egress"
+}
+
+do_test_ip6_dscp()
+{
+ local locus=$1; shift
+ local dscp
+
+ for dscp in 0 1 2 3 32 61 62 63; do
+ do_test_pedit_dsfield "$locus" \
+ "ip6 traffic_class set $((dscp << 2)) retain 0xfc" \
+ ipv6 "ip_tos $(((dscp << 2) | 1))" \
+ "-6 -A 2001:db8:1::1 -B 2001:db8:1::2"
+ done
+}
+
+test_ip6_dscp()
+{
+ do_test_ip6_dscp "dev $swp1 ingress"
+ do_test_ip6_dscp "dev $swp2 egress"
+}
+
+do_test_ip6_ecn()
+{
+ local locus=$1; shift
+ local ecn
+
+ for ecn in 0 1 2 3; do
+ do_test_pedit_dsfield "$locus" \
+ "ip6 traffic_class set $ecn retain 0x3" \
+ ipv6 "ip_tos $((124 | $ecn))" \
+ "-6 -A 2001:db8:1::1 -B 2001:db8:1::2"
+ done
+}
+
+test_ip6_ecn()
+{
+ do_test_ip6_ecn "dev $swp1 ingress"
+ do_test_ip6_ecn "dev $swp2 egress"
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/sch_ets.sh b/tools/testing/selftests/net/forwarding/sch_ets.sh
index 40e0ad1bc4f2..e60c8b4818cc 100755
--- a/tools/testing/selftests/net/forwarding/sch_ets.sh
+++ b/tools/testing/selftests/net/forwarding/sch_ets.sh
@@ -34,11 +34,14 @@ switch_destroy()
}
# Callback from sch_ets_tests.sh
-get_stats()
+collect_stats()
{
- local stream=$1; shift
+ local -a streams=("$@")
+ local stream
- link_stats_get $h2.1$stream rx bytes
+ for stream in ${streams[@]}; do
+ qdisc_parent_stats_get $swp2 10:$((stream + 1)) .bytes
+ done
}
ets_run
diff --git a/tools/testing/selftests/net/forwarding/sch_ets_tests.sh b/tools/testing/selftests/net/forwarding/sch_ets_tests.sh
index 3c3b204d47e8..cdf689e99458 100644
--- a/tools/testing/selftests/net/forwarding/sch_ets_tests.sh
+++ b/tools/testing/selftests/net/forwarding/sch_ets_tests.sh
@@ -2,7 +2,7 @@
# Global interface:
# $put -- port under test (e.g. $swp2)
-# get_stats($band) -- A function to collect stats for band
+# collect_stats($streams...) -- A function to get stats for individual streams
# ets_start_traffic($band) -- Start traffic for this band
# ets_change_qdisc($op, $dev, $nstrict, $quanta...) -- Add or change qdisc
@@ -94,15 +94,11 @@ __ets_dwrr_test()
sleep 10
- t0=($(for stream in ${streams[@]}; do
- get_stats $stream
- done))
+ t0=($(collect_stats "${streams[@]}"))
sleep 10
- t1=($(for stream in ${streams[@]}; do
- get_stats $stream
- done))
+ t1=($(collect_stats "${streams[@]}"))
d=($(for ((i = 0; i < ${#streams[@]}; i++)); do
echo $((${t1[$i]} - ${t0[$i]}))
done))
diff --git a/tools/testing/selftests/net/forwarding/skbedit_priority.sh b/tools/testing/selftests/net/forwarding/skbedit_priority.sh
new file mode 100755
index 000000000000..e3bd8a6bb8b4
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/skbedit_priority.sh
@@ -0,0 +1,168 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# This test sends traffic from H1 to H2. Either on ingress of $swp1, or on
+# egress of $swp2, the traffic is acted upon by an action skbedit priority. The
+# new priority should be taken into account when classifying traffic on the PRIO
+# qdisc at $swp2. The test verifies that for different priority values, the
+# traffic ends up in expected PRIO band.
+#
+# +----------------------+ +----------------------+
+# | H1 | | H2 |
+# | + $h1 | | $h2 + |
+# | | 192.0.2.1/28 | | 192.0.2.2/28 | |
+# +----|-----------------+ +----------------|-----+
+# | |
+# +----|----------------------------------------------------------------|-----+
+# | SW | | |
+# | +-|----------------------------------------------------------------|-+ |
+# | | + $swp1 BR $swp2 + | |
+# | | PRIO | |
+# | +--------------------------------------------------------------------+ |
+# +---------------------------------------------------------------------------+
+
+ALL_TESTS="
+ ping_ipv4
+ test_ingress
+ test_egress
+"
+
+NUM_NETIFS=4
+source lib.sh
+
+: ${HIT_TIMEOUT:=2000} # ms
+
+h1_create()
+{
+ simple_if_init $h1 192.0.2.1/28
+}
+
+h1_destroy()
+{
+ simple_if_fini $h1 192.0.2.1/28
+}
+
+h2_create()
+{
+ simple_if_init $h2 192.0.2.2/28
+}
+
+h2_destroy()
+{
+ simple_if_fini $h2 192.0.2.2/28
+}
+
+switch_create()
+{
+ ip link add name br1 up type bridge vlan_filtering 1
+ ip link set dev $swp1 master br1
+ ip link set dev $swp1 up
+ ip link set dev $swp2 master br1
+ ip link set dev $swp2 up
+
+ tc qdisc add dev $swp1 clsact
+ tc qdisc add dev $swp2 clsact
+ tc qdisc add dev $swp2 root handle 10: \
+ prio bands 8 priomap 7 6 5 4 3 2 1 0
+}
+
+switch_destroy()
+{
+ tc qdisc del dev $swp2 root
+ tc qdisc del dev $swp2 clsact
+ tc qdisc del dev $swp1 clsact
+
+ ip link set dev $swp2 nomaster
+ ip link set dev $swp1 nomaster
+ ip link del dev br1
+}
+
+setup_prepare()
+{
+ h1=${NETIFS[p1]}
+ swp1=${NETIFS[p2]}
+
+ swp2=${NETIFS[p3]}
+ h2=${NETIFS[p4]}
+
+ h2mac=$(mac_get $h2)
+
+ vrf_prepare
+ h1_create
+ h2_create
+ switch_create
+}
+
+cleanup()
+{
+ pre_cleanup
+
+ switch_destroy
+ h2_destroy
+ h1_destroy
+ vrf_cleanup
+}
+
+ping_ipv4()
+{
+ ping_test $h1 192.0.2.2
+}
+
+test_skbedit_priority_one()
+{
+ local locus=$1; shift
+ local prio=$1; shift
+ local classid=$1; shift
+
+ RET=0
+
+ tc filter add $locus handle 101 pref 1 \
+ flower action skbedit priority $prio
+
+ local pkt0=$(qdisc_parent_stats_get $swp2 $classid .packets)
+ local pkt2=$(tc_rule_handle_stats_get "$locus" 101)
+ $MZ $h1 -t udp "sp=54321,dp=12345" -c 10 -d 20msec -p 100 \
+ -a own -b $h2mac -A 192.0.2.1 -B 192.0.2.2 -q
+
+ local pkt1
+ pkt1=$(busywait "$HIT_TIMEOUT" until_counter_is ">= $((pkt0 + 10))" \
+ qdisc_parent_stats_get $swp2 $classid .packets)
+ check_err $? "Expected to get 10 packets on class $classid, but got $((pkt1 - pkt0))."
+
+ local pkt3=$(tc_rule_handle_stats_get "$locus" 101)
+ ((pkt3 >= pkt2 + 10))
+ check_err $? "Expected to get 10 packets on skbedit rule but got $((pkt3 - pkt2))."
+
+ log_test "$locus skbedit priority $prio -> classid $classid"
+
+ tc filter del $locus pref 1
+}
+
+test_ingress()
+{
+ local prio
+
+ for prio in {0..7}; do
+ test_skbedit_priority_one "dev $swp1 ingress" \
+ $prio 10:$((8 - prio))
+ done
+}
+
+test_egress()
+{
+ local prio
+
+ for prio in {0..7}; do
+ test_skbedit_priority_one "dev $swp2 egress" \
+ $prio 10:$((8 - prio))
+ done
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/tc_actions.sh b/tools/testing/selftests/net/forwarding/tc_actions.sh
index 813d02d1939d..d9eca227136b 100755
--- a/tools/testing/selftests/net/forwarding/tc_actions.sh
+++ b/tools/testing/selftests/net/forwarding/tc_actions.sh
@@ -2,7 +2,8 @@
# SPDX-License-Identifier: GPL-2.0
ALL_TESTS="gact_drop_and_ok_test mirred_egress_redirect_test \
- mirred_egress_mirror_test gact_trap_test"
+ mirred_egress_mirror_test matchall_mirred_egress_mirror_test \
+ gact_trap_test"
NUM_NETIFS=4
source tc_common.sh
source lib.sh
@@ -50,6 +51,9 @@ switch_destroy()
mirred_egress_test()
{
local action=$1
+ local protocol=$2
+ local classifier=$3
+ local classifier_args=$4
RET=0
@@ -62,9 +66,9 @@ mirred_egress_test()
tc_check_packets "dev $h2 ingress" 101 1
check_fail $? "Matched without redirect rule inserted"
- tc filter add dev $swp1 ingress protocol ip pref 1 handle 101 flower \
- $tcflags dst_ip 192.0.2.2 action mirred egress $action \
- dev $swp2
+ tc filter add dev $swp1 ingress protocol $protocol pref 1 handle 101 \
+ $classifier $tcflags $classifier_args \
+ action mirred egress $action dev $swp2
$MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
-t ip -q
@@ -72,10 +76,11 @@ mirred_egress_test()
tc_check_packets "dev $h2 ingress" 101 1
check_err $? "Did not match incoming $action packet"
- tc filter del dev $swp1 ingress protocol ip pref 1 handle 101 flower
+ tc filter del dev $swp1 ingress protocol $protocol pref 1 handle 101 \
+ $classifier
tc filter del dev $h2 ingress protocol ip pref 1 handle 101 flower
- log_test "mirred egress $action ($tcflags)"
+ log_test "mirred egress $classifier $action ($tcflags)"
}
gact_drop_and_ok_test()
@@ -187,12 +192,17 @@ cleanup()
mirred_egress_redirect_test()
{
- mirred_egress_test "redirect"
+ mirred_egress_test "redirect" "ip" "flower" "dst_ip 192.0.2.2"
}
mirred_egress_mirror_test()
{
- mirred_egress_test "mirror"
+ mirred_egress_test "mirror" "ip" "flower" "dst_ip 192.0.2.2"
+}
+
+matchall_mirred_egress_mirror_test()
+{
+ mirred_egress_test "mirror" "all" "matchall" ""
}
trap cleanup EXIT
diff --git a/tools/testing/selftests/net/forwarding/tc_common.sh b/tools/testing/selftests/net/forwarding/tc_common.sh
index 64f652633585..0e18e8be6e2a 100644
--- a/tools/testing/selftests/net/forwarding/tc_common.sh
+++ b/tools/testing/selftests/net/forwarding/tc_common.sh
@@ -6,39 +6,14 @@ CHECK_TC="yes"
# Can be overridden by the configuration file. See lib.sh
TC_HIT_TIMEOUT=${TC_HIT_TIMEOUT:=1000} # ms
-__tc_check_packets()
-{
- local id=$1
- local handle=$2
- local count=$3
- local operator=$4
-
- start_time="$(date -u +%s%3N)"
- while true
- do
- cmd_jq "tc -j -s filter show $id" \
- ".[] | select(.options.handle == $handle) | \
- select(.options.actions[0].stats.packets $operator $count)" \
- &> /dev/null
- ret=$?
- if [[ $ret -eq 0 ]]; then
- return $ret
- fi
- current_time="$(date -u +%s%3N)"
- diff=$(expr $current_time - $start_time)
- if [ "$diff" -gt "$TC_HIT_TIMEOUT" ]; then
- return 1
- fi
- done
-}
-
tc_check_packets()
{
local id=$1
local handle=$2
local count=$3
- __tc_check_packets "$id" "$handle" "$count" "=="
+ busywait "$TC_HIT_TIMEOUT" until_counter_is "== $count" \
+ tc_rule_handle_stats_get "$id" "$handle" > /dev/null
}
tc_check_packets_hitting()
@@ -46,5 +21,6 @@ tc_check_packets_hitting()
local id=$1
local handle=$2
- __tc_check_packets "$id" "$handle" 0 ">"
+ busywait "$TC_HIT_TIMEOUT" until_counter_is "> 0" \
+ tc_rule_handle_stats_get "$id" "$handle" > /dev/null
}
diff --git a/tools/testing/selftests/networking/timestamping/hwtstamp_config.c b/tools/testing/selftests/net/hwtstamp_config.c
index e1fdee841021..e1fdee841021 100644
--- a/tools/testing/selftests/networking/timestamping/hwtstamp_config.c
+++ b/tools/testing/selftests/net/hwtstamp_config.c
diff --git a/tools/testing/selftests/net/ip_defrag.c b/tools/testing/selftests/net/ip_defrag.c
index c0c9ecb891e1..f9ed749fd8c7 100644
--- a/tools/testing/selftests/net/ip_defrag.c
+++ b/tools/testing/selftests/net/ip_defrag.c
@@ -192,9 +192,9 @@ static void send_fragment(int fd_raw, struct sockaddr *addr, socklen_t alen,
}
res = sendto(fd_raw, ip_frame, frag_len, 0, addr, alen);
- if (res < 0)
+ if (res < 0 && errno != EPERM)
error(1, errno, "send_fragment");
- if (res != frag_len)
+ if (res >= 0 && res != frag_len)
error(1, 0, "send_fragment: %d vs %d", res, frag_len);
frag_counter++;
@@ -313,9 +313,9 @@ static void send_udp_frags(int fd_raw, struct sockaddr *addr,
iphdr->ip_len = htons(frag_len);
}
res = sendto(fd_raw, ip_frame, frag_len, 0, addr, alen);
- if (res < 0)
+ if (res < 0 && errno != EPERM)
error(1, errno, "sendto overlap: %d", frag_len);
- if (res != frag_len)
+ if (res >= 0 && res != frag_len)
error(1, 0, "sendto overlap: %d vs %d", (int)res, frag_len);
frag_counter++;
}
diff --git a/tools/testing/selftests/net/mptcp/.gitignore b/tools/testing/selftests/net/mptcp/.gitignore
index d72f07642738..260336d5f0b1 100644
--- a/tools/testing/selftests/net/mptcp/.gitignore
+++ b/tools/testing/selftests/net/mptcp/.gitignore
@@ -1,2 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
mptcp_connect
+pm_nl_ctl
*.pcap
diff --git a/tools/testing/selftests/net/mptcp/Makefile b/tools/testing/selftests/net/mptcp/Makefile
index ba450e62dc5b..f50976ee7d44 100644
--- a/tools/testing/selftests/net/mptcp/Makefile
+++ b/tools/testing/selftests/net/mptcp/Makefile
@@ -1,12 +1,13 @@
# SPDX-License-Identifier: GPL-2.0
top_srcdir = ../../../../..
+KSFT_KHDR_INSTALL := 1
-CFLAGS = -Wall -Wl,--no-as-needed -O2 -g
+CFLAGS = -Wall -Wl,--no-as-needed -O2 -g -I$(top_srcdir)/usr/include
-TEST_PROGS := mptcp_connect.sh
+TEST_PROGS := mptcp_connect.sh pm_netlink.sh mptcp_join.sh
-TEST_GEN_FILES = mptcp_connect
+TEST_GEN_FILES = mptcp_connect pm_nl_ctl
TEST_FILES := settings
diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect.c b/tools/testing/selftests/net/mptcp/mptcp_connect.c
index 99579c0223c1..cedee5b952ba 100644
--- a/tools/testing/selftests/net/mptcp/mptcp_connect.c
+++ b/tools/testing/selftests/net/mptcp/mptcp_connect.c
@@ -34,8 +34,8 @@ extern int optind;
#define TCP_ULP 31
#endif
+static int poll_timeout = 10 * 1000;
static bool listen_mode;
-static int poll_timeout;
enum cfg_mode {
CFG_MODE_POLL,
@@ -50,11 +50,21 @@ static int cfg_sock_proto = IPPROTO_MPTCP;
static bool tcpulp_audit;
static int pf = AF_INET;
static int cfg_sndbuf;
+static int cfg_rcvbuf;
+static bool cfg_join;
static void die_usage(void)
{
- fprintf(stderr, "Usage: mptcp_connect [-6] [-u] [-s MPTCP|TCP] [-p port] -m mode]"
- "[ -l ] [ -t timeout ] connect_address\n");
+ fprintf(stderr, "Usage: mptcp_connect [-6] [-u] [-s MPTCP|TCP] [-p port] [-m mode]"
+ "[-l] connect_address\n");
+ fprintf(stderr, "\t-6 use ipv6\n");
+ fprintf(stderr, "\t-t num -- set poll timeout to num\n");
+ fprintf(stderr, "\t-S num -- set SO_SNDBUF to num\n");
+ fprintf(stderr, "\t-R num -- set SO_RCVBUF to num\n");
+ fprintf(stderr, "\t-p num -- use port num\n");
+ fprintf(stderr, "\t-m [MPTCP|TCP] -- use tcp or mptcp sockets\n");
+ fprintf(stderr, "\t-s [mmap|poll] -- use poll (default) or mmap\n");
+ fprintf(stderr, "\t-u -- check mptcp ulp\n");
exit(1);
}
@@ -97,6 +107,17 @@ static void xgetaddrinfo(const char *node, const char *service,
}
}
+static void set_rcvbuf(int fd, unsigned int size)
+{
+ int err;
+
+ err = setsockopt(fd, SOL_SOCKET, SO_RCVBUF, &size, sizeof(size));
+ if (err) {
+ perror("set SO_RCVBUF");
+ exit(1);
+ }
+}
+
static void set_sndbuf(int fd, unsigned int size)
{
int err;
@@ -230,6 +251,7 @@ static int sock_connect_mptcp(const char * const remoteaddr,
static size_t do_rnd_write(const int fd, char *buf, const size_t len)
{
+ static bool first = true;
unsigned int do_w;
ssize_t bw;
@@ -237,10 +259,19 @@ static size_t do_rnd_write(const int fd, char *buf, const size_t len)
if (do_w == 0 || do_w > len)
do_w = len;
+ if (cfg_join && first && do_w > 100)
+ do_w = 100;
+
bw = write(fd, buf, do_w);
if (bw < 0)
perror("write");
+ /* let the join handshake complete, before going on */
+ if (cfg_join && first) {
+ usleep(200000);
+ first = false;
+ }
+
return bw;
}
@@ -365,8 +396,11 @@ static int copyfd_io_poll(int infd, int peerfd, int outfd)
break;
/* ... but we still receive.
- * Close our write side.
+ * Close our write side, ev. give some time
+ * for address notification
*/
+ if (cfg_join)
+ usleep(400000);
shutdown(peerfd, SHUT_WR);
} else {
if (errno == EINTR)
@@ -383,6 +417,10 @@ static int copyfd_io_poll(int infd, int peerfd, int outfd)
}
}
+ /* leave some time for late join/announce */
+ if (cfg_join)
+ usleep(400000);
+
close(peerfd);
return 0;
}
@@ -638,7 +676,7 @@ static void maybe_close(int fd)
{
unsigned int r = rand();
- if (r & 1)
+ if (!cfg_join && (r & 1))
close(fd);
}
@@ -704,6 +742,8 @@ int main_loop(void)
check_getpeername_connect(fd);
+ if (cfg_rcvbuf)
+ set_rcvbuf(fd, cfg_rcvbuf);
if (cfg_sndbuf)
set_sndbuf(fd, cfg_sndbuf);
@@ -745,7 +785,7 @@ int parse_mode(const char *mode)
return 0;
}
-int parse_sndbuf(const char *size)
+static int parse_int(const char *size)
{
unsigned long s;
@@ -765,17 +805,19 @@ int parse_sndbuf(const char *size)
die_usage();
}
- cfg_sndbuf = s;
-
- return 0;
+ return (int)s;
}
static void parse_opts(int argc, char **argv)
{
int c;
- while ((c = getopt(argc, argv, "6lp:s:hut:m:b:")) != -1) {
+ while ((c = getopt(argc, argv, "6jlp:s:hut:m:S:R:")) != -1) {
switch (c) {
+ case 'j':
+ cfg_join = true;
+ cfg_mode = CFG_MODE_POLL;
+ break;
case 'l':
listen_mode = true;
break;
@@ -802,8 +844,11 @@ static void parse_opts(int argc, char **argv)
case 'm':
cfg_mode = parse_mode(optarg);
break;
- case 'b':
- cfg_sndbuf = parse_sndbuf(optarg);
+ case 'S':
+ cfg_sndbuf = parse_int(optarg);
+ break;
+ case 'R':
+ cfg_rcvbuf = parse_int(optarg);
break;
}
}
@@ -831,6 +876,8 @@ int main(int argc, char *argv[])
if (fd < 0)
return 1;
+ if (cfg_rcvbuf)
+ set_rcvbuf(fd, cfg_rcvbuf);
if (cfg_sndbuf)
set_sndbuf(fd, cfg_sndbuf);
diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect.sh b/tools/testing/selftests/net/mptcp/mptcp_connect.sh
index d573a0feb98d..acf02e156d20 100755
--- a/tools/testing/selftests/net/mptcp/mptcp_connect.sh
+++ b/tools/testing/selftests/net/mptcp/mptcp_connect.sh
@@ -3,7 +3,7 @@
time_start=$(date +%s)
-optstring="b:d:e:l:r:h4cm:"
+optstring="S:R:d:e:l:r:h4cm:"
ret=0
sin=""
sout=""
@@ -19,6 +19,7 @@ tc_loss=$((RANDOM%101))
tc_reorder=""
testmode=""
sndbuf=0
+rcvbuf=0
options_log=true
if [ $tc_loss -eq 100 ];then
@@ -39,7 +40,8 @@ usage() {
echo -e "\t-e: ethtool features to disable, e.g.: \"-e tso -e gso\" (default: randomly disable any of tso/gso/gro)"
echo -e "\t-4: IPv4 only: disable IPv6 tests (default: test both IPv4 and IPv6)"
echo -e "\t-c: capture packets for each test using tcpdump (default: no capture)"
- echo -e "\t-b: set sndbuf value (default: use kernel default)"
+ echo -e "\t-S: set sndbuf value (default: use kernel default)"
+ echo -e "\t-R: set rcvbuf value (default: use kernel default)"
echo -e "\t-m: test mode (poll, sendfile; default: poll)"
}
@@ -73,11 +75,19 @@ while getopts "$optstring" option;do
"c")
capture=true
;;
- "b")
+ "S")
if [ $OPTARG -ge 0 ];then
sndbuf="$OPTARG"
else
- echo "-s requires numeric argument, got \"$OPTARG\"" 1>&2
+ echo "-S requires numeric argument, got \"$OPTARG\"" 1>&2
+ exit 1
+ fi
+ ;;
+ "R")
+ if [ $OPTARG -ge 0 ];then
+ rcvbuf="$OPTARG"
+ else
+ echo "-R requires numeric argument, got \"$OPTARG\"" 1>&2
exit 1
fi
;;
@@ -342,8 +352,12 @@ do_transfer()
port=$((10000+$TEST_COUNT))
TEST_COUNT=$((TEST_COUNT+1))
+ if [ "$rcvbuf" -gt 0 ]; then
+ extra_args="$extra_args -R $rcvbuf"
+ fi
+
if [ "$sndbuf" -gt 0 ]; then
- extra_args="$extra_args -b $sndbuf"
+ extra_args="$extra_args -S $sndbuf"
fi
if [ -n "$testmode" ]; then
diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh
new file mode 100755
index 000000000000..dd42c2f692d0
--- /dev/null
+++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh
@@ -0,0 +1,357 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+ret=0
+sin=""
+sout=""
+cin=""
+cout=""
+ksft_skip=4
+timeout=30
+capture=0
+
+TEST_COUNT=0
+
+init()
+{
+ capout=$(mktemp)
+
+ rndh=$(printf %x $sec)-$(mktemp -u XXXXXX)
+
+ ns1="ns1-$rndh"
+ ns2="ns2-$rndh"
+
+ for netns in "$ns1" "$ns2";do
+ ip netns add $netns || exit $ksft_skip
+ ip -net $netns link set lo up
+ ip netns exec $netns sysctl -q net.mptcp.enabled=1
+ ip netns exec $netns sysctl -q net.ipv4.conf.all.rp_filter=0
+ ip netns exec $netns sysctl -q net.ipv4.conf.default.rp_filter=0
+ done
+
+ # ns1 ns2
+ # ns1eth1 ns2eth1
+ # ns1eth2 ns2eth2
+ # ns1eth3 ns2eth3
+ # ns1eth4 ns2eth4
+
+ for i in `seq 1 4`; do
+ ip link add ns1eth$i netns "$ns1" type veth peer name ns2eth$i netns "$ns2"
+ ip -net "$ns1" addr add 10.0.$i.1/24 dev ns1eth$i
+ ip -net "$ns1" addr add dead:beef:$i::1/64 dev ns1eth$i nodad
+ ip -net "$ns1" link set ns1eth$i up
+
+ ip -net "$ns2" addr add 10.0.$i.2/24 dev ns2eth$i
+ ip -net "$ns2" addr add dead:beef:$i::2/64 dev ns2eth$i nodad
+ ip -net "$ns2" link set ns2eth$i up
+
+ # let $ns2 reach any $ns1 address from any interface
+ ip -net "$ns2" route add default via 10.0.$i.1 dev ns2eth$i metric 10$i
+ done
+}
+
+cleanup_partial()
+{
+ rm -f "$capout"
+
+ for netns in "$ns1" "$ns2"; do
+ ip netns del $netns
+ done
+}
+
+cleanup()
+{
+ rm -f "$cin" "$cout"
+ rm -f "$sin" "$sout"
+ cleanup_partial
+}
+
+reset()
+{
+ cleanup_partial
+ init
+}
+
+for arg in "$@"; do
+ if [ "$arg" = "-c" ]; then
+ capture=1
+ fi
+done
+
+ip -Version > /dev/null 2>&1
+if [ $? -ne 0 ];then
+ echo "SKIP: Could not run test without ip tool"
+ exit $ksft_skip
+fi
+
+
+check_transfer()
+{
+ in=$1
+ out=$2
+ what=$3
+
+ cmp "$in" "$out" > /dev/null 2>&1
+ if [ $? -ne 0 ] ;then
+ echo "[ FAIL ] $what does not match (in, out):"
+ print_file_err "$in"
+ print_file_err "$out"
+
+ return 1
+ fi
+
+ return 0
+}
+
+do_ping()
+{
+ listener_ns="$1"
+ connector_ns="$2"
+ connect_addr="$3"
+
+ ip netns exec ${connector_ns} ping -q -c 1 $connect_addr >/dev/null
+ if [ $? -ne 0 ] ; then
+ echo "$listener_ns -> $connect_addr connectivity [ FAIL ]" 1>&2
+ ret=1
+ fi
+}
+
+do_transfer()
+{
+ listener_ns="$1"
+ connector_ns="$2"
+ cl_proto="$3"
+ srv_proto="$4"
+ connect_addr="$5"
+
+ port=$((10000+$TEST_COUNT))
+ TEST_COUNT=$((TEST_COUNT+1))
+
+ :> "$cout"
+ :> "$sout"
+ :> "$capout"
+
+ if [ $capture -eq 1 ]; then
+ if [ -z $SUDO_USER ] ; then
+ capuser=""
+ else
+ capuser="-Z $SUDO_USER"
+ fi
+
+ capfile="mp_join-${listener_ns}.pcap"
+
+ echo "Capturing traffic for test $TEST_COUNT into $capfile"
+ ip netns exec ${listener_ns} tcpdump -i any -s 65535 -B 32768 $capuser -w $capfile > "$capout" 2>&1 &
+ cappid=$!
+
+ sleep 1
+ fi
+
+ ip netns exec ${listener_ns} ./mptcp_connect -j -t $timeout -l -p $port -s ${srv_proto} 0.0.0.0 < "$sin" > "$sout" &
+ spid=$!
+
+ sleep 1
+
+ ip netns exec ${connector_ns} ./mptcp_connect -j -t $timeout -p $port -s ${cl_proto} $connect_addr < "$cin" > "$cout" &
+ cpid=$!
+
+ wait $cpid
+ retc=$?
+ wait $spid
+ rets=$?
+
+ if [ $capture -eq 1 ]; then
+ sleep 1
+ kill $cappid
+ fi
+
+ if [ ${rets} -ne 0 ] || [ ${retc} -ne 0 ]; then
+ echo " client exit code $retc, server $rets" 1>&2
+ echo "\nnetns ${listener_ns} socket stat for $port:" 1>&2
+ ip netns exec ${listener_ns} ss -nita 1>&2 -o "sport = :$port"
+ echo "\nnetns ${connector_ns} socket stat for $port:" 1>&2
+ ip netns exec ${connector_ns} ss -nita 1>&2 -o "dport = :$port"
+
+ cat "$capout"
+ return 1
+ fi
+
+ check_transfer $sin $cout "file received by client"
+ retc=$?
+ check_transfer $cin $sout "file received by server"
+ rets=$?
+
+ if [ $retc -eq 0 ] && [ $rets -eq 0 ];then
+ cat "$capout"
+ return 0
+ fi
+
+ cat "$capout"
+ return 1
+}
+
+make_file()
+{
+ name=$1
+ who=$2
+
+ SIZE=1
+
+ dd if=/dev/urandom of="$name" bs=1024 count=$SIZE 2> /dev/null
+ echo -e "\nMPTCP_TEST_FILE_END_MARKER" >> "$name"
+
+ echo "Created $name (size $SIZE KB) containing data sent by $who"
+}
+
+run_tests()
+{
+ listener_ns="$1"
+ connector_ns="$2"
+ connect_addr="$3"
+ lret=0
+
+ do_transfer ${listener_ns} ${connector_ns} MPTCP MPTCP ${connect_addr}
+ lret=$?
+ if [ $lret -ne 0 ]; then
+ ret=$lret
+ return
+ fi
+}
+
+chk_join_nr()
+{
+ local msg="$1"
+ local syn_nr=$2
+ local syn_ack_nr=$3
+ local ack_nr=$4
+ local count
+ local dump_stats
+
+ printf "%-36s %s" "$msg" "syn"
+ count=`ip netns exec $ns1 nstat -as | grep MPTcpExtMPJoinSynRx | awk '{print $2}'`
+ [ -z "$count" ] && count=0
+ if [ "$count" != "$syn_nr" ]; then
+ echo "[fail] got $count JOIN[s] syn expected $syn_nr"
+ ret=1
+ dump_stats=1
+ else
+ echo -n "[ ok ]"
+ fi
+
+ echo -n " - synack"
+ count=`ip netns exec $ns2 nstat -as | grep MPTcpExtMPJoinSynAckRx | awk '{print $2}'`
+ [ -z "$count" ] && count=0
+ if [ "$count" != "$syn_ack_nr" ]; then
+ echo "[fail] got $count JOIN[s] synack expected $syn_ack_nr"
+ ret=1
+ dump_stats=1
+ else
+ echo -n "[ ok ]"
+ fi
+
+ echo -n " - ack"
+ count=`ip netns exec $ns1 nstat -as | grep MPTcpExtMPJoinAckRx | awk '{print $2}'`
+ [ -z "$count" ] && count=0
+ if [ "$count" != "$ack_nr" ]; then
+ echo "[fail] got $count JOIN[s] ack expected $ack_nr"
+ ret=1
+ dump_stats=1
+ else
+ echo "[ ok ]"
+ fi
+ if [ "${dump_stats}" = 1 ]; then
+ echo Server ns stats
+ ip netns exec $ns1 nstat -as | grep MPTcp
+ echo Client ns stats
+ ip netns exec $ns2 nstat -as | grep MPTcp
+ fi
+}
+
+sin=$(mktemp)
+sout=$(mktemp)
+cin=$(mktemp)
+cout=$(mktemp)
+init
+make_file "$cin" "client"
+make_file "$sin" "server"
+trap cleanup EXIT
+
+run_tests $ns1 $ns2 10.0.1.1
+chk_join_nr "no JOIN" "0" "0" "0"
+
+# subflow limted by client
+reset
+ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow
+run_tests $ns1 $ns2 10.0.1.1
+chk_join_nr "single subflow, limited by client" 0 0 0
+
+# subflow limted by server
+reset
+ip netns exec $ns2 ./pm_nl_ctl limits 0 1
+ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow
+run_tests $ns1 $ns2 10.0.1.1
+chk_join_nr "single subflow, limited by server" 1 1 0
+
+# subflow
+reset
+ip netns exec $ns1 ./pm_nl_ctl limits 0 1
+ip netns exec $ns2 ./pm_nl_ctl limits 0 1
+ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow
+run_tests $ns1 $ns2 10.0.1.1
+chk_join_nr "single subflow" 1 1 1
+
+# multiple subflows
+reset
+ip netns exec $ns1 ./pm_nl_ctl limits 0 2
+ip netns exec $ns2 ./pm_nl_ctl limits 0 2
+ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow
+ip netns exec $ns2 ./pm_nl_ctl add 10.0.2.2 flags subflow
+run_tests $ns1 $ns2 10.0.1.1
+chk_join_nr "multiple subflows" 2 2 2
+
+# multiple subflows limited by serverf
+reset
+ip netns exec $ns1 ./pm_nl_ctl limits 0 1
+ip netns exec $ns2 ./pm_nl_ctl limits 0 2
+ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow
+ip netns exec $ns2 ./pm_nl_ctl add 10.0.2.2 flags subflow
+run_tests $ns1 $ns2 10.0.1.1
+chk_join_nr "multiple subflows, limited by server" 2 2 1
+
+# add_address, unused
+reset
+ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal
+run_tests $ns1 $ns2 10.0.1.1
+chk_join_nr "unused signal address" 0 0 0
+
+# accept and use add_addr
+reset
+ip netns exec $ns1 ./pm_nl_ctl limits 0 1
+ip netns exec $ns2 ./pm_nl_ctl limits 1 1
+ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal
+run_tests $ns1 $ns2 10.0.1.1
+chk_join_nr "signal address" 1 1 1
+
+# accept and use add_addr with an additional subflow
+# note: signal address in server ns and local addresses in client ns must
+# belong to different subnets or one of the listed local address could be
+# used for 'add_addr' subflow
+reset
+ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal
+ip netns exec $ns1 ./pm_nl_ctl limits 0 2
+ip netns exec $ns2 ./pm_nl_ctl limits 1 2
+ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow
+run_tests $ns1 $ns2 10.0.1.1
+chk_join_nr "subflow and signal" 2 2 2
+
+# accept and use add_addr with additional subflows
+reset
+ip netns exec $ns1 ./pm_nl_ctl limits 0 3
+ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal
+ip netns exec $ns2 ./pm_nl_ctl limits 1 3
+ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow
+ip netns exec $ns2 ./pm_nl_ctl add 10.0.4.2 flags subflow
+run_tests $ns1 $ns2 10.0.1.1
+chk_join_nr "multiple subflows and signal" 3 3 3
+
+exit $ret
diff --git a/tools/testing/selftests/net/mptcp/pm_netlink.sh b/tools/testing/selftests/net/mptcp/pm_netlink.sh
new file mode 100755
index 000000000000..15f4f46ca3a9
--- /dev/null
+++ b/tools/testing/selftests/net/mptcp/pm_netlink.sh
@@ -0,0 +1,130 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+ksft_skip=4
+ret=0
+
+usage() {
+ echo "Usage: $0 [ -h ]"
+}
+
+
+while getopts "$optstring" option;do
+ case "$option" in
+ "h")
+ usage $0
+ exit 0
+ ;;
+ "?")
+ usage $0
+ exit 1
+ ;;
+ esac
+done
+
+sec=$(date +%s)
+rndh=$(printf %x $sec)-$(mktemp -u XXXXXX)
+ns1="ns1-$rndh"
+err=$(mktemp)
+ret=0
+
+cleanup()
+{
+ rm -f $err
+ ip netns del $ns1
+}
+
+ip -Version > /dev/null 2>&1
+if [ $? -ne 0 ];then
+ echo "SKIP: Could not run test without ip tool"
+ exit $ksft_skip
+fi
+
+trap cleanup EXIT
+
+ip netns add $ns1 || exit $ksft_skip
+ip -net $ns1 link set lo up
+ip netns exec $ns1 sysctl -q net.mptcp.enabled=1
+
+check()
+{
+ local cmd="$1"
+ local expected="$2"
+ local msg="$3"
+ local out=`$cmd 2>$err`
+ local cmd_ret=$?
+
+ printf "%-50s %s" "$msg"
+ if [ $cmd_ret -ne 0 ]; then
+ echo "[FAIL] command execution '$cmd' stderr "
+ cat $err
+ ret=1
+ elif [ "$out" = "$expected" ]; then
+ echo "[ OK ]"
+ else
+ echo -n "[FAIL] "
+ echo "expected '$expected' got '$out'"
+ ret=1
+ fi
+}
+
+check "ip netns exec $ns1 ./pm_nl_ctl dump" "" "defaults addr list"
+check "ip netns exec $ns1 ./pm_nl_ctl limits" "accept 0
+subflows 0" "defaults limits"
+
+ip netns exec $ns1 ./pm_nl_ctl add 10.0.1.1
+ip netns exec $ns1 ./pm_nl_ctl add 10.0.1.2 flags subflow dev lo
+ip netns exec $ns1 ./pm_nl_ctl add 10.0.1.3 flags signal,backup
+check "ip netns exec $ns1 ./pm_nl_ctl get 1" "id 1 flags 10.0.1.1" "simple add/get addr"
+
+check "ip netns exec $ns1 ./pm_nl_ctl dump" \
+"id 1 flags 10.0.1.1
+id 2 flags subflow dev lo 10.0.1.2
+id 3 flags signal,backup 10.0.1.3" "dump addrs"
+
+ip netns exec $ns1 ./pm_nl_ctl del 2
+check "ip netns exec $ns1 ./pm_nl_ctl get 2" "" "simple del addr"
+check "ip netns exec $ns1 ./pm_nl_ctl dump" \
+"id 1 flags 10.0.1.1
+id 3 flags signal,backup 10.0.1.3" "dump addrs after del"
+
+ip netns exec $ns1 ./pm_nl_ctl add 10.0.1.3
+check "ip netns exec $ns1 ./pm_nl_ctl get 4" "" "duplicate addr"
+
+ip netns exec $ns1 ./pm_nl_ctl add 10.0.1.4 id 10 flags signal
+check "ip netns exec $ns1 ./pm_nl_ctl get 4" "id 4 flags signal 10.0.1.4" "id addr increment"
+
+for i in `seq 5 9`; do
+ ip netns exec $ns1 ./pm_nl_ctl add 10.0.1.$i flags signal >/dev/null 2>&1
+done
+check "ip netns exec $ns1 ./pm_nl_ctl get 9" "id 9 flags signal 10.0.1.9" "hard addr limit"
+check "ip netns exec $ns1 ./pm_nl_ctl get 10" "" "above hard addr limit"
+
+for i in `seq 9 256`; do
+ ip netns exec $ns1 ./pm_nl_ctl del $i
+ ip netns exec $ns1 ./pm_nl_ctl add 10.0.0.9
+done
+check "ip netns exec $ns1 ./pm_nl_ctl dump" "id 1 flags 10.0.1.1
+id 3 flags signal,backup 10.0.1.3
+id 4 flags signal 10.0.1.4
+id 5 flags signal 10.0.1.5
+id 6 flags signal 10.0.1.6
+id 7 flags signal 10.0.1.7
+id 8 flags signal 10.0.1.8" "id limit"
+
+ip netns exec $ns1 ./pm_nl_ctl flush
+check "ip netns exec $ns1 ./pm_nl_ctl dump" "" "flush addrs"
+
+ip netns exec $ns1 ./pm_nl_ctl limits 9 1
+check "ip netns exec $ns1 ./pm_nl_ctl limits" "accept 0
+subflows 0" "rcv addrs above hard limit"
+
+ip netns exec $ns1 ./pm_nl_ctl limits 1 9
+check "ip netns exec $ns1 ./pm_nl_ctl limits" "accept 0
+subflows 0" "subflows above hard limit"
+
+ip netns exec $ns1 ./pm_nl_ctl limits 8 8
+check "ip netns exec $ns1 ./pm_nl_ctl limits" "accept 8
+subflows 8" "set limits"
+
+exit $ret
diff --git a/tools/testing/selftests/net/mptcp/pm_nl_ctl.c b/tools/testing/selftests/net/mptcp/pm_nl_ctl.c
new file mode 100644
index 000000000000..b24a2f17d415
--- /dev/null
+++ b/tools/testing/selftests/net/mptcp/pm_nl_ctl.c
@@ -0,0 +1,616 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <errno.h>
+#include <error.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+#include <sys/socket.h>
+#include <sys/types.h>
+
+#include <arpa/inet.h>
+#include <net/if.h>
+
+#include <linux/rtnetlink.h>
+#include <linux/genetlink.h>
+
+#include "linux/mptcp.h"
+
+#ifndef MPTCP_PM_NAME
+#define MPTCP_PM_NAME "mptcp_pm"
+#endif
+
+static void syntax(char *argv[])
+{
+ fprintf(stderr, "%s add|get|del|flush|dump|accept [<args>]\n", argv[0]);
+ fprintf(stderr, "\tadd [flags signal|subflow|backup] [id <nr>] [dev <name>] <ip>\n");
+ fprintf(stderr, "\tdel <id>\n");
+ fprintf(stderr, "\tget <id>\n");
+ fprintf(stderr, "\tflush\n");
+ fprintf(stderr, "\tdump\n");
+ fprintf(stderr, "\tlimits [<rcv addr max> <subflow max>]\n");
+ exit(0);
+}
+
+static int init_genl_req(char *data, int family, int cmd, int version)
+{
+ struct nlmsghdr *nh = (void *)data;
+ struct genlmsghdr *gh;
+ int off = 0;
+
+ nh->nlmsg_type = family;
+ nh->nlmsg_flags = NLM_F_REQUEST;
+ nh->nlmsg_len = NLMSG_LENGTH(GENL_HDRLEN);
+ off += NLMSG_ALIGN(sizeof(*nh));
+
+ gh = (void *)(data + off);
+ gh->cmd = cmd;
+ gh->version = version;
+ off += NLMSG_ALIGN(sizeof(*gh));
+ return off;
+}
+
+static void nl_error(struct nlmsghdr *nh)
+{
+ struct nlmsgerr *err = (struct nlmsgerr *)NLMSG_DATA(nh);
+ int len = nh->nlmsg_len - sizeof(*nh);
+ uint32_t off;
+
+ if (len < sizeof(struct nlmsgerr))
+ error(1, 0, "netlink error message truncated %d min %ld", len,
+ sizeof(struct nlmsgerr));
+
+ if (!err->error) {
+ /* check messages from kernel */
+ struct rtattr *attrs = (struct rtattr *)NLMSG_DATA(nh);
+
+ while (RTA_OK(attrs, len)) {
+ if (attrs->rta_type == NLMSGERR_ATTR_MSG)
+ fprintf(stderr, "netlink ext ack msg: %s\n",
+ (char *)RTA_DATA(attrs));
+ if (attrs->rta_type == NLMSGERR_ATTR_OFFS) {
+ memcpy(&off, RTA_DATA(attrs), 4);
+ fprintf(stderr, "netlink err off %d\n",
+ (int)off);
+ }
+ attrs = RTA_NEXT(attrs, len);
+ }
+ } else {
+ fprintf(stderr, "netlink error %d", err->error);
+ }
+}
+
+/* do a netlink command and, if max > 0, fetch the reply */
+static int do_nl_req(int fd, struct nlmsghdr *nh, int len, int max)
+{
+ struct sockaddr_nl nladdr = { .nl_family = AF_NETLINK };
+ socklen_t addr_len;
+ void *data = nh;
+ int rem, ret;
+ int err = 0;
+
+ nh->nlmsg_len = len;
+ ret = sendto(fd, data, len, 0, (void *)&nladdr, sizeof(nladdr));
+ if (ret != len)
+ error(1, errno, "send netlink: %uB != %uB\n", ret, len);
+ if (max == 0)
+ return 0;
+
+ addr_len = sizeof(nladdr);
+ rem = ret = recvfrom(fd, data, max, 0, (void *)&nladdr, &addr_len);
+ if (ret < 0)
+ error(1, errno, "recv netlink: %uB\n", ret);
+
+ /* Beware: the NLMSG_NEXT macro updates the 'rem' argument */
+ for (; NLMSG_OK(nh, rem); nh = NLMSG_NEXT(nh, rem)) {
+ if (nh->nlmsg_type == NLMSG_ERROR) {
+ nl_error(nh);
+ err = 1;
+ }
+ }
+ if (err)
+ error(1, 0, "bailing out due to netlink error[s]");
+ return ret;
+}
+
+static int genl_parse_getfamily(struct nlmsghdr *nlh)
+{
+ struct genlmsghdr *ghdr = NLMSG_DATA(nlh);
+ int len = nlh->nlmsg_len;
+ struct rtattr *attrs;
+
+ if (nlh->nlmsg_type != GENL_ID_CTRL)
+ error(1, errno, "Not a controller message, len=%d type=0x%x\n",
+ nlh->nlmsg_len, nlh->nlmsg_type);
+
+ len -= NLMSG_LENGTH(GENL_HDRLEN);
+
+ if (len < 0)
+ error(1, errno, "wrong controller message len %d\n", len);
+
+ if (ghdr->cmd != CTRL_CMD_NEWFAMILY)
+ error(1, errno, "Unknown controller command %d\n", ghdr->cmd);
+
+ attrs = (struct rtattr *) ((char *) ghdr + GENL_HDRLEN);
+ while (RTA_OK(attrs, len)) {
+ if (attrs->rta_type == CTRL_ATTR_FAMILY_ID)
+ return *(__u16 *)RTA_DATA(attrs);
+ attrs = RTA_NEXT(attrs, len);
+ }
+
+ error(1, errno, "can't find CTRL_ATTR_FAMILY_ID attr");
+ return -1;
+}
+
+static int resolve_mptcp_pm_netlink(int fd)
+{
+ char data[NLMSG_ALIGN(sizeof(struct nlmsghdr)) +
+ NLMSG_ALIGN(sizeof(struct genlmsghdr)) +
+ 1024];
+ struct nlmsghdr *nh;
+ struct rtattr *rta;
+ int namelen;
+ int off = 0;
+
+ memset(data, 0, sizeof(data));
+ nh = (void *)data;
+ off = init_genl_req(data, GENL_ID_CTRL, CTRL_CMD_GETFAMILY, 0);
+
+ rta = (void *)(data + off);
+ namelen = strlen(MPTCP_PM_NAME) + 1;
+ rta->rta_type = CTRL_ATTR_FAMILY_NAME;
+ rta->rta_len = RTA_LENGTH(namelen);
+ memcpy(RTA_DATA(rta), MPTCP_PM_NAME, namelen);
+ off += NLMSG_ALIGN(rta->rta_len);
+
+ do_nl_req(fd, nh, off, sizeof(data));
+ return genl_parse_getfamily((void *)data);
+}
+
+int add_addr(int fd, int pm_family, int argc, char *argv[])
+{
+ char data[NLMSG_ALIGN(sizeof(struct nlmsghdr)) +
+ NLMSG_ALIGN(sizeof(struct genlmsghdr)) +
+ 1024];
+ struct rtattr *rta, *nest;
+ struct nlmsghdr *nh;
+ u_int16_t family;
+ u_int32_t flags;
+ int nest_start;
+ u_int8_t id;
+ int off = 0;
+ int arg;
+
+ memset(data, 0, sizeof(data));
+ nh = (void *)data;
+ off = init_genl_req(data, pm_family, MPTCP_PM_CMD_ADD_ADDR,
+ MPTCP_PM_VER);
+
+ if (argc < 3)
+ syntax(argv);
+
+ nest_start = off;
+ nest = (void *)(data + off);
+ nest->rta_type = NLA_F_NESTED | MPTCP_PM_ATTR_ADDR;
+ nest->rta_len = RTA_LENGTH(0);
+ off += NLMSG_ALIGN(nest->rta_len);
+
+ /* addr data */
+ rta = (void *)(data + off);
+ if (inet_pton(AF_INET, argv[2], RTA_DATA(rta))) {
+ family = AF_INET;
+ rta->rta_type = MPTCP_PM_ADDR_ATTR_ADDR4;
+ rta->rta_len = RTA_LENGTH(4);
+ } else if (inet_pton(AF_INET6, argv[2], RTA_DATA(rta))) {
+ family = AF_INET6;
+ rta->rta_type = MPTCP_PM_ADDR_ATTR_ADDR6;
+ rta->rta_len = RTA_LENGTH(16);
+ } else
+ error(1, errno, "can't parse ip %s", argv[2]);
+ off += NLMSG_ALIGN(rta->rta_len);
+
+ /* family */
+ rta = (void *)(data + off);
+ rta->rta_type = MPTCP_PM_ADDR_ATTR_FAMILY;
+ rta->rta_len = RTA_LENGTH(2);
+ memcpy(RTA_DATA(rta), &family, 2);
+ off += NLMSG_ALIGN(rta->rta_len);
+
+ for (arg = 3; arg < argc; arg++) {
+ if (!strcmp(argv[arg], "flags")) {
+ char *tok, *str;
+
+ /* flags */
+ flags = 0;
+ if (++arg >= argc)
+ error(1, 0, " missing flags value");
+
+ /* do not support flag list yet */
+ for (str = argv[arg]; (tok = strtok(str, ","));
+ str = NULL) {
+ if (!strcmp(tok, "subflow"))
+ flags |= MPTCP_PM_ADDR_FLAG_SUBFLOW;
+ else if (!strcmp(tok, "signal"))
+ flags |= MPTCP_PM_ADDR_FLAG_SIGNAL;
+ else if (!strcmp(tok, "backup"))
+ flags |= MPTCP_PM_ADDR_FLAG_BACKUP;
+ else
+ error(1, errno,
+ "unknown flag %s", argv[arg]);
+ }
+
+ rta = (void *)(data + off);
+ rta->rta_type = MPTCP_PM_ADDR_ATTR_FLAGS;
+ rta->rta_len = RTA_LENGTH(4);
+ memcpy(RTA_DATA(rta), &flags, 4);
+ off += NLMSG_ALIGN(rta->rta_len);
+ } else if (!strcmp(argv[arg], "id")) {
+ if (++arg >= argc)
+ error(1, 0, " missing id value");
+
+ id = atoi(argv[arg]);
+ rta = (void *)(data + off);
+ rta->rta_type = MPTCP_PM_ADDR_ATTR_ID;
+ rta->rta_len = RTA_LENGTH(1);
+ memcpy(RTA_DATA(rta), &id, 1);
+ off += NLMSG_ALIGN(rta->rta_len);
+ } else if (!strcmp(argv[arg], "dev")) {
+ int32_t ifindex;
+
+ if (++arg >= argc)
+ error(1, 0, " missing dev name");
+
+ ifindex = if_nametoindex(argv[arg]);
+ if (!ifindex)
+ error(1, errno, "unknown device %s", argv[arg]);
+
+ rta = (void *)(data + off);
+ rta->rta_type = MPTCP_PM_ADDR_ATTR_IF_IDX;
+ rta->rta_len = RTA_LENGTH(4);
+ memcpy(RTA_DATA(rta), &ifindex, 4);
+ off += NLMSG_ALIGN(rta->rta_len);
+ } else
+ error(1, 0, "unknown keyword %s", argv[arg]);
+ }
+ nest->rta_len = off - nest_start;
+
+ do_nl_req(fd, nh, off, 0);
+ return 0;
+}
+
+int del_addr(int fd, int pm_family, int argc, char *argv[])
+{
+ char data[NLMSG_ALIGN(sizeof(struct nlmsghdr)) +
+ NLMSG_ALIGN(sizeof(struct genlmsghdr)) +
+ 1024];
+ struct rtattr *rta, *nest;
+ struct nlmsghdr *nh;
+ int nest_start;
+ u_int8_t id;
+ int off = 0;
+
+ memset(data, 0, sizeof(data));
+ nh = (void *)data;
+ off = init_genl_req(data, pm_family, MPTCP_PM_CMD_DEL_ADDR,
+ MPTCP_PM_VER);
+
+ /* the only argument is the address id */
+ if (argc != 3)
+ syntax(argv);
+
+ id = atoi(argv[2]);
+
+ nest_start = off;
+ nest = (void *)(data + off);
+ nest->rta_type = NLA_F_NESTED | MPTCP_PM_ATTR_ADDR;
+ nest->rta_len = RTA_LENGTH(0);
+ off += NLMSG_ALIGN(nest->rta_len);
+
+ /* build a dummy addr with only the ID set */
+ rta = (void *)(data + off);
+ rta->rta_type = MPTCP_PM_ADDR_ATTR_ID;
+ rta->rta_len = RTA_LENGTH(1);
+ memcpy(RTA_DATA(rta), &id, 1);
+ off += NLMSG_ALIGN(rta->rta_len);
+ nest->rta_len = off - nest_start;
+
+ do_nl_req(fd, nh, off, 0);
+ return 0;
+}
+
+static void print_addr(struct rtattr *attrs, int len)
+{
+ uint16_t family = 0;
+ char str[1024];
+ uint32_t flags;
+ uint8_t id;
+
+ while (RTA_OK(attrs, len)) {
+ if (attrs->rta_type == MPTCP_PM_ADDR_ATTR_FAMILY)
+ memcpy(&family, RTA_DATA(attrs), 2);
+ if (attrs->rta_type == MPTCP_PM_ADDR_ATTR_ADDR4) {
+ if (family != AF_INET)
+ error(1, errno, "wrong IP (v4) for family %d",
+ family);
+ inet_ntop(AF_INET, RTA_DATA(attrs), str, sizeof(str));
+ printf("%s", str);
+ }
+ if (attrs->rta_type == MPTCP_PM_ADDR_ATTR_ADDR6) {
+ if (family != AF_INET6)
+ error(1, errno, "wrong IP (v6) for family %d",
+ family);
+ inet_ntop(AF_INET6, RTA_DATA(attrs), str, sizeof(str));
+ printf("%s", str);
+ }
+ if (attrs->rta_type == MPTCP_PM_ADDR_ATTR_ID) {
+ memcpy(&id, RTA_DATA(attrs), 1);
+ printf("id %d ", id);
+ }
+ if (attrs->rta_type == MPTCP_PM_ADDR_ATTR_FLAGS) {
+ memcpy(&flags, RTA_DATA(attrs), 4);
+
+ printf("flags ");
+ if (flags & MPTCP_PM_ADDR_FLAG_SIGNAL) {
+ printf("signal");
+ flags &= ~MPTCP_PM_ADDR_FLAG_SIGNAL;
+ if (flags)
+ printf(",");
+ }
+
+ if (flags & MPTCP_PM_ADDR_FLAG_SUBFLOW) {
+ printf("subflow");
+ flags &= ~MPTCP_PM_ADDR_FLAG_SUBFLOW;
+ if (flags)
+ printf(",");
+ }
+
+ if (flags & MPTCP_PM_ADDR_FLAG_BACKUP) {
+ printf("backup");
+ flags &= ~MPTCP_PM_ADDR_FLAG_BACKUP;
+ if (flags)
+ printf(",");
+ }
+
+ /* bump unknown flags, if any */
+ if (flags)
+ printf("0x%x", flags);
+ printf(" ");
+ }
+ if (attrs->rta_type == MPTCP_PM_ADDR_ATTR_IF_IDX) {
+ char name[IF_NAMESIZE], *ret;
+ int32_t ifindex;
+
+ memcpy(&ifindex, RTA_DATA(attrs), 4);
+ ret = if_indextoname(ifindex, name);
+ if (ret)
+ printf("dev %s ", ret);
+ else
+ printf("dev unknown/%d", ifindex);
+ }
+
+ attrs = RTA_NEXT(attrs, len);
+ }
+ printf("\n");
+}
+
+static void print_addrs(struct nlmsghdr *nh, int pm_family, int total_len)
+{
+ struct rtattr *attrs;
+
+ for (; NLMSG_OK(nh, total_len); nh = NLMSG_NEXT(nh, total_len)) {
+ int len = nh->nlmsg_len;
+
+ if (nh->nlmsg_type == NLMSG_DONE)
+ break;
+ if (nh->nlmsg_type == NLMSG_ERROR)
+ nl_error(nh);
+ if (nh->nlmsg_type != pm_family)
+ continue;
+
+ len -= NLMSG_LENGTH(GENL_HDRLEN);
+ attrs = (struct rtattr *) ((char *) NLMSG_DATA(nh) +
+ GENL_HDRLEN);
+ while (RTA_OK(attrs, len)) {
+ if (attrs->rta_type ==
+ (MPTCP_PM_ATTR_ADDR | NLA_F_NESTED))
+ print_addr((void *)RTA_DATA(attrs),
+ attrs->rta_len);
+ attrs = RTA_NEXT(attrs, len);
+ }
+ }
+}
+
+int get_addr(int fd, int pm_family, int argc, char *argv[])
+{
+ char data[NLMSG_ALIGN(sizeof(struct nlmsghdr)) +
+ NLMSG_ALIGN(sizeof(struct genlmsghdr)) +
+ 1024];
+ struct rtattr *rta, *nest;
+ struct nlmsghdr *nh;
+ int nest_start;
+ u_int8_t id;
+ int off = 0;
+
+ memset(data, 0, sizeof(data));
+ nh = (void *)data;
+ off = init_genl_req(data, pm_family, MPTCP_PM_CMD_GET_ADDR,
+ MPTCP_PM_VER);
+
+ /* the only argument is the address id */
+ if (argc != 3)
+ syntax(argv);
+
+ id = atoi(argv[2]);
+
+ nest_start = off;
+ nest = (void *)(data + off);
+ nest->rta_type = NLA_F_NESTED | MPTCP_PM_ATTR_ADDR;
+ nest->rta_len = RTA_LENGTH(0);
+ off += NLMSG_ALIGN(nest->rta_len);
+
+ /* build a dummy addr with only the ID set */
+ rta = (void *)(data + off);
+ rta->rta_type = MPTCP_PM_ADDR_ATTR_ID;
+ rta->rta_len = RTA_LENGTH(1);
+ memcpy(RTA_DATA(rta), &id, 1);
+ off += NLMSG_ALIGN(rta->rta_len);
+ nest->rta_len = off - nest_start;
+
+ print_addrs(nh, pm_family, do_nl_req(fd, nh, off, sizeof(data)));
+ return 0;
+}
+
+int dump_addrs(int fd, int pm_family, int argc, char *argv[])
+{
+ char data[NLMSG_ALIGN(sizeof(struct nlmsghdr)) +
+ NLMSG_ALIGN(sizeof(struct genlmsghdr)) +
+ 1024];
+ pid_t pid = getpid();
+ struct nlmsghdr *nh;
+ int off = 0;
+
+ memset(data, 0, sizeof(data));
+ nh = (void *)data;
+ off = init_genl_req(data, pm_family, MPTCP_PM_CMD_GET_ADDR,
+ MPTCP_PM_VER);
+ nh->nlmsg_flags |= NLM_F_DUMP;
+ nh->nlmsg_seq = 1;
+ nh->nlmsg_pid = pid;
+ nh->nlmsg_len = off;
+
+ print_addrs(nh, pm_family, do_nl_req(fd, nh, off, sizeof(data)));
+ return 0;
+}
+
+int flush_addrs(int fd, int pm_family, int argc, char *argv[])
+{
+ char data[NLMSG_ALIGN(sizeof(struct nlmsghdr)) +
+ NLMSG_ALIGN(sizeof(struct genlmsghdr)) +
+ 1024];
+ struct nlmsghdr *nh;
+ int off = 0;
+
+ memset(data, 0, sizeof(data));
+ nh = (void *)data;
+ off = init_genl_req(data, pm_family, MPTCP_PM_CMD_FLUSH_ADDRS,
+ MPTCP_PM_VER);
+
+ do_nl_req(fd, nh, off, 0);
+ return 0;
+}
+
+static void print_limits(struct nlmsghdr *nh, int pm_family, int total_len)
+{
+ struct rtattr *attrs;
+ uint32_t max;
+
+ for (; NLMSG_OK(nh, total_len); nh = NLMSG_NEXT(nh, total_len)) {
+ int len = nh->nlmsg_len;
+
+ if (nh->nlmsg_type == NLMSG_DONE)
+ break;
+ if (nh->nlmsg_type == NLMSG_ERROR)
+ nl_error(nh);
+ if (nh->nlmsg_type != pm_family)
+ continue;
+
+ len -= NLMSG_LENGTH(GENL_HDRLEN);
+ attrs = (struct rtattr *) ((char *) NLMSG_DATA(nh) +
+ GENL_HDRLEN);
+ while (RTA_OK(attrs, len)) {
+ int type = attrs->rta_type;
+
+ if (type != MPTCP_PM_ATTR_RCV_ADD_ADDRS &&
+ type != MPTCP_PM_ATTR_SUBFLOWS)
+ goto next;
+
+ memcpy(&max, RTA_DATA(attrs), 4);
+ printf("%s %u\n", type == MPTCP_PM_ATTR_SUBFLOWS ?
+ "subflows" : "accept", max);
+
+next:
+ attrs = RTA_NEXT(attrs, len);
+ }
+ }
+}
+
+int get_set_limits(int fd, int pm_family, int argc, char *argv[])
+{
+ char data[NLMSG_ALIGN(sizeof(struct nlmsghdr)) +
+ NLMSG_ALIGN(sizeof(struct genlmsghdr)) +
+ 1024];
+ uint32_t rcv_addr = 0, subflows = 0;
+ int cmd, len = sizeof(data);
+ struct nlmsghdr *nh;
+ int off = 0;
+
+ /* limit */
+ if (argc == 4) {
+ rcv_addr = atoi(argv[2]);
+ subflows = atoi(argv[3]);
+ cmd = MPTCP_PM_CMD_SET_LIMITS;
+ } else {
+ cmd = MPTCP_PM_CMD_GET_LIMITS;
+ }
+
+ memset(data, 0, sizeof(data));
+ nh = (void *)data;
+ off = init_genl_req(data, pm_family, cmd, MPTCP_PM_VER);
+
+ /* limit */
+ if (cmd == MPTCP_PM_CMD_SET_LIMITS) {
+ struct rtattr *rta = (void *)(data + off);
+
+ rta->rta_type = MPTCP_PM_ATTR_RCV_ADD_ADDRS;
+ rta->rta_len = RTA_LENGTH(4);
+ memcpy(RTA_DATA(rta), &rcv_addr, 4);
+ off += NLMSG_ALIGN(rta->rta_len);
+
+ rta = (void *)(data + off);
+ rta->rta_type = MPTCP_PM_ATTR_SUBFLOWS;
+ rta->rta_len = RTA_LENGTH(4);
+ memcpy(RTA_DATA(rta), &subflows, 4);
+ off += NLMSG_ALIGN(rta->rta_len);
+
+ /* do not expect a reply */
+ len = 0;
+ }
+
+ len = do_nl_req(fd, nh, off, len);
+ if (cmd == MPTCP_PM_CMD_GET_LIMITS)
+ print_limits(nh, pm_family, len);
+ return 0;
+}
+
+int main(int argc, char *argv[])
+{
+ int fd, pm_family;
+
+ if (argc < 2)
+ syntax(argv);
+
+ fd = socket(AF_NETLINK, SOCK_RAW, NETLINK_GENERIC);
+ if (fd == -1)
+ error(1, errno, "socket netlink");
+
+ pm_family = resolve_mptcp_pm_netlink(fd);
+
+ if (!strcmp(argv[1], "add"))
+ return add_addr(fd, pm_family, argc, argv);
+ else if (!strcmp(argv[1], "del"))
+ return del_addr(fd, pm_family, argc, argv);
+ else if (!strcmp(argv[1], "flush"))
+ return flush_addrs(fd, pm_family, argc, argv);
+ else if (!strcmp(argv[1], "get"))
+ return get_addr(fd, pm_family, argc, argv);
+ else if (!strcmp(argv[1], "dump"))
+ return dump_addrs(fd, pm_family, argc, argv);
+ else if (!strcmp(argv[1], "limits"))
+ return get_set_limits(fd, pm_family, argc, argv);
+
+ fprintf(stderr, "unknown sub-command: %s", argv[1]);
+ syntax(argv);
+ return 0;
+}
diff --git a/tools/testing/selftests/net/pmtu.sh b/tools/testing/selftests/net/pmtu.sh
index 71a62e7e35b1..77c09cd339c3 100755
--- a/tools/testing/selftests/net/pmtu.sh
+++ b/tools/testing/selftests/net/pmtu.sh
@@ -67,6 +67,10 @@
# Same as pmtu_ipv4_vxlan4, but using a generic UDP IPv4/IPv6
# encapsulation (GUE) over IPv4/IPv6, instead of VXLAN
#
+# - pmtu_ipv{4,6}_ipv{4,6}_exception
+# Same as pmtu_ipv4_vxlan4, but using a IPv4/IPv6 tunnel over IPv4/IPv6,
+# instead of VXLAN
+#
# - pmtu_vti4_exception
# Set up vti tunnel on top of veth, with xfrm states and policies, in two
# namespaces with matching endpoints. Check that route exception is not
@@ -151,6 +155,10 @@ tests="
pmtu_ipv6_gue4_exception IPv6 over gue4: PMTU exceptions 1
pmtu_ipv4_gue6_exception IPv4 over gue6: PMTU exceptions 1
pmtu_ipv6_gue6_exception IPv6 over gue6: PMTU exceptions 1
+ pmtu_ipv4_ipv4_exception IPv4 over IPv4: PMTU exceptions 1
+ pmtu_ipv6_ipv4_exception IPv6 over IPv4: PMTU exceptions 1
+ pmtu_ipv4_ipv6_exception IPv4 over IPv6: PMTU exceptions 1
+ pmtu_ipv6_ipv6_exception IPv6 over IPv6: PMTU exceptions 1
pmtu_vti6_exception vti6: PMTU exceptions 0
pmtu_vti4_exception vti4: PMTU exceptions 0
pmtu_vti4_default_mtu vti4: default MTU assignment 0
@@ -363,6 +371,62 @@ setup_gue66() {
setup_fou_or_gue 6 6 gue
}
+setup_ipvX_over_ipvY() {
+ inner=${1}
+ outer=${2}
+
+ if [ "${outer}" -eq 4 ]; then
+ a_addr="${prefix4}.${a_r1}.1"
+ b_addr="${prefix4}.${b_r1}.1"
+ if [ "${inner}" -eq 4 ]; then
+ type="ipip"
+ mode="ipip"
+ else
+ type="sit"
+ mode="ip6ip"
+ fi
+ else
+ a_addr="${prefix6}:${a_r1}::1"
+ b_addr="${prefix6}:${b_r1}::1"
+ type="ip6tnl"
+ if [ "${inner}" -eq 4 ]; then
+ mode="ipip6"
+ else
+ mode="ip6ip6"
+ fi
+ fi
+
+ run_cmd ${ns_a} ip link add ip_a type ${type} local ${a_addr} remote ${b_addr} mode ${mode} || return 2
+ run_cmd ${ns_b} ip link add ip_b type ${type} local ${b_addr} remote ${a_addr} mode ${mode}
+
+ run_cmd ${ns_a} ip link set ip_a up
+ run_cmd ${ns_b} ip link set ip_b up
+
+ if [ "${inner}" = "4" ]; then
+ run_cmd ${ns_a} ip addr add ${tunnel4_a_addr}/${tunnel4_mask} dev ip_a
+ run_cmd ${ns_b} ip addr add ${tunnel4_b_addr}/${tunnel4_mask} dev ip_b
+ else
+ run_cmd ${ns_a} ip addr add ${tunnel6_a_addr}/${tunnel6_mask} dev ip_a
+ run_cmd ${ns_b} ip addr add ${tunnel6_b_addr}/${tunnel6_mask} dev ip_b
+ fi
+}
+
+setup_ip4ip4() {
+ setup_ipvX_over_ipvY 4 4
+}
+
+setup_ip6ip4() {
+ setup_ipvX_over_ipvY 6 4
+}
+
+setup_ip4ip6() {
+ setup_ipvX_over_ipvY 4 6
+}
+
+setup_ip6ip6() {
+ setup_ipvX_over_ipvY 6 6
+}
+
setup_namespaces() {
for n in ${NS_A} ${NS_B} ${NS_R1} ${NS_R2}; do
ip netns add ${n} || return 1
@@ -908,6 +972,64 @@ test_pmtu_ipv6_gue6_exception() {
test_pmtu_ipvX_over_fouY_or_gueY 6 6 gue
}
+test_pmtu_ipvX_over_ipvY_exception() {
+ inner=${1}
+ outer=${2}
+ ll_mtu=4000
+
+ setup namespaces routing ip${inner}ip${outer} || return 2
+
+ trace "${ns_a}" ip_a "${ns_b}" ip_b \
+ "${ns_a}" veth_A-R1 "${ns_r1}" veth_R1-A \
+ "${ns_b}" veth_B-R1 "${ns_r1}" veth_R1-B
+
+ if [ ${inner} -eq 4 ]; then
+ ping=ping
+ dst=${tunnel4_b_addr}
+ else
+ ping=${ping6}
+ dst=${tunnel6_b_addr}
+ fi
+
+ if [ ${outer} -eq 4 ]; then
+ # IPv4 header
+ exp_mtu=$((${ll_mtu} - 20))
+ else
+ # IPv6 header Option 4
+ exp_mtu=$((${ll_mtu} - 40 - 8))
+ fi
+
+ # Create route exception by exceeding link layer MTU
+ mtu "${ns_a}" veth_A-R1 $((${ll_mtu} + 1000))
+ mtu "${ns_r1}" veth_R1-A $((${ll_mtu} + 1000))
+ mtu "${ns_b}" veth_B-R1 ${ll_mtu}
+ mtu "${ns_r1}" veth_R1-B ${ll_mtu}
+
+ mtu "${ns_a}" ip_a $((${ll_mtu} + 1000)) || return
+ mtu "${ns_b}" ip_b $((${ll_mtu} + 1000)) || return
+ run_cmd ${ns_a} ${ping} -q -M want -i 0.1 -w 1 -s $((${ll_mtu} + 500)) ${dst}
+
+ # Check that exception was created
+ pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${dst})"
+ check_pmtu_value ${exp_mtu} "${pmtu}" "exceeding link layer MTU on ip${inner}ip${outer} interface"
+}
+
+test_pmtu_ipv4_ipv4_exception() {
+ test_pmtu_ipvX_over_ipvY_exception 4 4
+}
+
+test_pmtu_ipv6_ipv4_exception() {
+ test_pmtu_ipvX_over_ipvY_exception 6 4
+}
+
+test_pmtu_ipv4_ipv6_exception() {
+ test_pmtu_ipvX_over_ipvY_exception 4 6
+}
+
+test_pmtu_ipv6_ipv6_exception() {
+ test_pmtu_ipvX_over_ipvY_exception 6 6
+}
+
test_pmtu_vti4_exception() {
setup namespaces veth vti4 xfrm4 || return 2
trace "${ns_a}" veth_a "${ns_b}" veth_b \
diff --git a/tools/testing/selftests/net/reuseaddr_ports_exhausted.c b/tools/testing/selftests/net/reuseaddr_ports_exhausted.c
new file mode 100644
index 000000000000..7b01b7c2ec10
--- /dev/null
+++ b/tools/testing/selftests/net/reuseaddr_ports_exhausted.c
@@ -0,0 +1,162 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Check if we can fully utilize 4-tuples for connect().
+ *
+ * Rules to bind sockets to the same port when all ephemeral ports are
+ * exhausted.
+ *
+ * 1. if there are TCP_LISTEN sockets on the port, fail to bind.
+ * 2. if there are sockets without SO_REUSEADDR, fail to bind.
+ * 3. if SO_REUSEADDR is disabled, fail to bind.
+ * 4. if SO_REUSEADDR is enabled and SO_REUSEPORT is disabled,
+ * succeed to bind.
+ * 5. if SO_REUSEADDR and SO_REUSEPORT are enabled and
+ * there is no socket having the both options and the same EUID,
+ * succeed to bind.
+ * 6. fail to bind.
+ *
+ * Author: Kuniyuki Iwashima <kuniyu@amazon.co.jp>
+ */
+#include <arpa/inet.h>
+#include <netinet/in.h>
+#include <sys/socket.h>
+#include <sys/types.h>
+#include <unistd.h>
+#include "../kselftest_harness.h"
+
+struct reuse_opts {
+ int reuseaddr[2];
+ int reuseport[2];
+};
+
+struct reuse_opts unreusable_opts[12] = {
+ {0, 0, 0, 0},
+ {0, 0, 0, 1},
+ {0, 0, 1, 0},
+ {0, 0, 1, 1},
+ {0, 1, 0, 0},
+ {0, 1, 0, 1},
+ {0, 1, 1, 0},
+ {0, 1, 1, 1},
+ {1, 0, 0, 0},
+ {1, 0, 0, 1},
+ {1, 0, 1, 0},
+ {1, 0, 1, 1},
+};
+
+struct reuse_opts reusable_opts[4] = {
+ {1, 1, 0, 0},
+ {1, 1, 0, 1},
+ {1, 1, 1, 0},
+ {1, 1, 1, 1},
+};
+
+int bind_port(struct __test_metadata *_metadata, int reuseaddr, int reuseport)
+{
+ struct sockaddr_in local_addr;
+ int len = sizeof(local_addr);
+ int fd, ret;
+
+ fd = socket(AF_INET, SOCK_STREAM, 0);
+ ASSERT_NE(-1, fd) TH_LOG("failed to open socket.");
+
+ ret = setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &reuseaddr, sizeof(int));
+ ASSERT_EQ(0, ret) TH_LOG("failed to setsockopt: SO_REUSEADDR.");
+
+ ret = setsockopt(fd, SOL_SOCKET, SO_REUSEPORT, &reuseport, sizeof(int));
+ ASSERT_EQ(0, ret) TH_LOG("failed to setsockopt: SO_REUSEPORT.");
+
+ local_addr.sin_family = AF_INET;
+ local_addr.sin_addr.s_addr = inet_addr("127.0.0.1");
+ local_addr.sin_port = 0;
+
+ if (bind(fd, (struct sockaddr *)&local_addr, len) == -1) {
+ close(fd);
+ return -1;
+ }
+
+ return fd;
+}
+
+TEST(reuseaddr_ports_exhausted_unreusable)
+{
+ struct reuse_opts *opts;
+ int i, j, fd[2];
+
+ for (i = 0; i < 12; i++) {
+ opts = &unreusable_opts[i];
+
+ for (j = 0; j < 2; j++)
+ fd[j] = bind_port(_metadata, opts->reuseaddr[j], opts->reuseport[j]);
+
+ ASSERT_NE(-1, fd[0]) TH_LOG("failed to bind.");
+ EXPECT_EQ(-1, fd[1]) TH_LOG("should fail to bind.");
+
+ for (j = 0; j < 2; j++)
+ if (fd[j] != -1)
+ close(fd[j]);
+ }
+}
+
+TEST(reuseaddr_ports_exhausted_reusable_same_euid)
+{
+ struct reuse_opts *opts;
+ int i, j, fd[2];
+
+ for (i = 0; i < 4; i++) {
+ opts = &reusable_opts[i];
+
+ for (j = 0; j < 2; j++)
+ fd[j] = bind_port(_metadata, opts->reuseaddr[j], opts->reuseport[j]);
+
+ ASSERT_NE(-1, fd[0]) TH_LOG("failed to bind.");
+
+ if (opts->reuseport[0] && opts->reuseport[1]) {
+ EXPECT_EQ(-1, fd[1]) TH_LOG("should fail to bind because both sockets succeed to be listened.");
+ } else {
+ EXPECT_NE(-1, fd[1]) TH_LOG("should succeed to bind to connect to different destinations.");
+ }
+
+ for (j = 0; j < 2; j++)
+ if (fd[j] != -1)
+ close(fd[j]);
+ }
+}
+
+TEST(reuseaddr_ports_exhausted_reusable_different_euid)
+{
+ struct reuse_opts *opts;
+ int i, j, ret, fd[2];
+ uid_t euid[2] = {10, 20};
+
+ for (i = 0; i < 4; i++) {
+ opts = &reusable_opts[i];
+
+ for (j = 0; j < 2; j++) {
+ ret = seteuid(euid[j]);
+ ASSERT_EQ(0, ret) TH_LOG("failed to seteuid: %d.", euid[j]);
+
+ fd[j] = bind_port(_metadata, opts->reuseaddr[j], opts->reuseport[j]);
+
+ ret = seteuid(0);
+ ASSERT_EQ(0, ret) TH_LOG("failed to seteuid: 0.");
+ }
+
+ ASSERT_NE(-1, fd[0]) TH_LOG("failed to bind.");
+ EXPECT_NE(-1, fd[1]) TH_LOG("should succeed to bind because one socket can be bound in each euid.");
+
+ if (fd[1] != -1) {
+ ret = listen(fd[0], 5);
+ ASSERT_EQ(0, ret) TH_LOG("failed to listen.");
+
+ ret = listen(fd[1], 5);
+ EXPECT_EQ(-1, ret) TH_LOG("should fail to listen because only one uid reserves the port in TCP_LISTEN.");
+ }
+
+ for (j = 0; j < 2; j++)
+ if (fd[j] != -1)
+ close(fd[j]);
+ }
+}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/net/reuseaddr_ports_exhausted.sh b/tools/testing/selftests/net/reuseaddr_ports_exhausted.sh
new file mode 100755
index 000000000000..20e3a2913d06
--- /dev/null
+++ b/tools/testing/selftests/net/reuseaddr_ports_exhausted.sh
@@ -0,0 +1,35 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Run tests when all ephemeral ports are exhausted.
+#
+# Author: Kuniyuki Iwashima <kuniyu@amazon.co.jp>
+
+set +x
+set -e
+
+readonly NETNS="ns-$(mktemp -u XXXXXX)"
+
+setup() {
+ ip netns add "${NETNS}"
+ ip -netns "${NETNS}" link set lo up
+ ip netns exec "${NETNS}" \
+ sysctl -w net.ipv4.ip_local_port_range="32768 32768" \
+ > /dev/null 2>&1
+ ip netns exec "${NETNS}" \
+ sysctl -w net.ipv4.ip_autobind_reuse=1 > /dev/null 2>&1
+}
+
+cleanup() {
+ ip netns del "${NETNS}"
+}
+
+trap cleanup EXIT
+setup
+
+do_test() {
+ ip netns exec "${NETNS}" ./reuseaddr_ports_exhausted
+}
+
+do_test
+echo "tests done"
diff --git a/tools/testing/selftests/networking/timestamping/rxtimestamp.c b/tools/testing/selftests/net/rxtimestamp.c
index 6dee9e636a95..6dee9e636a95 100644
--- a/tools/testing/selftests/networking/timestamping/rxtimestamp.c
+++ b/tools/testing/selftests/net/rxtimestamp.c
diff --git a/tools/testing/selftests/net/tcp_mmap.c b/tools/testing/selftests/net/tcp_mmap.c
index 35505b31e5cc..4555f88252ba 100644
--- a/tools/testing/selftests/net/tcp_mmap.c
+++ b/tools/testing/selftests/net/tcp_mmap.c
@@ -165,9 +165,10 @@ void *child_thread(void *arg)
socklen_t zc_len = sizeof(zc);
int res;
+ memset(&zc, 0, sizeof(zc));
zc.address = (__u64)((unsigned long)addr);
zc.length = chunk_size;
- zc.recv_skip_hint = 0;
+
res = getsockopt(fd, IPPROTO_TCP, TCP_ZEROCOPY_RECEIVE,
&zc, &zc_len);
if (res == -1)
@@ -281,12 +282,14 @@ static void setup_sockaddr(int domain, const char *str_addr,
static void do_accept(int fdlisten)
{
pthread_attr_t attr;
+ int rcvlowat;
pthread_attr_init(&attr);
pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED);
+ rcvlowat = chunk_size;
if (setsockopt(fdlisten, SOL_SOCKET, SO_RCVLOWAT,
- &chunk_size, sizeof(chunk_size)) == -1) {
+ &rcvlowat, sizeof(rcvlowat)) == -1) {
perror("setsockopt SO_RCVLOWAT");
}
diff --git a/tools/testing/selftests/networking/timestamping/timestamping.c b/tools/testing/selftests/net/timestamping.c
index aca3491174a1..aca3491174a1 100644
--- a/tools/testing/selftests/networking/timestamping/timestamping.c
+++ b/tools/testing/selftests/net/timestamping.c
diff --git a/tools/testing/selftests/net/tls.c b/tools/testing/selftests/net/tls.c
index 0ea44d975b6c..c5282e62df75 100644
--- a/tools/testing/selftests/net/tls.c
+++ b/tools/testing/selftests/net/tls.c
@@ -101,6 +101,21 @@ FIXTURE(tls)
bool notls;
};
+FIXTURE_VARIANT(tls)
+{
+ unsigned int tls_version;
+};
+
+FIXTURE_VARIANT_ADD(tls, 12)
+{
+ .tls_version = TLS_1_2_VERSION,
+};
+
+FIXTURE_VARIANT_ADD(tls, 13)
+{
+ .tls_version = TLS_1_3_VERSION,
+};
+
FIXTURE_SETUP(tls)
{
struct tls12_crypto_info_aes_gcm_128 tls12;
@@ -112,7 +127,7 @@ FIXTURE_SETUP(tls)
len = sizeof(addr);
memset(&tls12, 0, sizeof(tls12));
- tls12.info.version = TLS_1_3_VERSION;
+ tls12.info.version = variant->tls_version;
tls12.info.cipher_type = TLS_CIPHER_AES_GCM_128;
addr.sin_family = AF_INET;
@@ -733,7 +748,7 @@ TEST_F(tls, bidir)
struct tls12_crypto_info_aes_gcm_128 tls12;
memset(&tls12, 0, sizeof(tls12));
- tls12.info.version = TLS_1_3_VERSION;
+ tls12.info.version = variant->tls_version;
tls12.info.cipher_type = TLS_CIPHER_AES_GCM_128;
ret = setsockopt(self->fd, SOL_TLS, TLS_RX, &tls12,
@@ -1258,78 +1273,4 @@ TEST(keysizes) {
close(cfd);
}
-TEST(tls12) {
- int fd, cfd;
- bool notls;
-
- struct tls12_crypto_info_aes_gcm_128 tls12;
- struct sockaddr_in addr;
- socklen_t len;
- int sfd, ret;
-
- notls = false;
- len = sizeof(addr);
-
- memset(&tls12, 0, sizeof(tls12));
- tls12.info.version = TLS_1_2_VERSION;
- tls12.info.cipher_type = TLS_CIPHER_AES_GCM_128;
-
- addr.sin_family = AF_INET;
- addr.sin_addr.s_addr = htonl(INADDR_ANY);
- addr.sin_port = 0;
-
- fd = socket(AF_INET, SOCK_STREAM, 0);
- sfd = socket(AF_INET, SOCK_STREAM, 0);
-
- ret = bind(sfd, &addr, sizeof(addr));
- ASSERT_EQ(ret, 0);
- ret = listen(sfd, 10);
- ASSERT_EQ(ret, 0);
-
- ret = getsockname(sfd, &addr, &len);
- ASSERT_EQ(ret, 0);
-
- ret = connect(fd, &addr, sizeof(addr));
- ASSERT_EQ(ret, 0);
-
- ret = setsockopt(fd, IPPROTO_TCP, TCP_ULP, "tls", sizeof("tls"));
- if (ret != 0) {
- notls = true;
- printf("Failure setting TCP_ULP, testing without tls\n");
- }
-
- if (!notls) {
- ret = setsockopt(fd, SOL_TLS, TLS_TX, &tls12,
- sizeof(tls12));
- ASSERT_EQ(ret, 0);
- }
-
- cfd = accept(sfd, &addr, &len);
- ASSERT_GE(cfd, 0);
-
- if (!notls) {
- ret = setsockopt(cfd, IPPROTO_TCP, TCP_ULP, "tls",
- sizeof("tls"));
- ASSERT_EQ(ret, 0);
-
- ret = setsockopt(cfd, SOL_TLS, TLS_RX, &tls12,
- sizeof(tls12));
- ASSERT_EQ(ret, 0);
- }
-
- close(sfd);
-
- char const *test_str = "test_read";
- int send_len = 10;
- char buf[10];
-
- send_len = strlen(test_str) + 1;
- EXPECT_EQ(send(fd, test_str, send_len, 0), send_len);
- EXPECT_NE(recv(cfd, buf, send_len, 0), -1);
- EXPECT_EQ(memcmp(buf, test_str, send_len), 0);
-
- close(fd);
- close(cfd);
-}
-
TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/networking/timestamping/txtimestamp.c b/tools/testing/selftests/net/txtimestamp.c
index 7e386be47120..011b0da6b033 100644
--- a/tools/testing/selftests/networking/timestamping/txtimestamp.c
+++ b/tools/testing/selftests/net/txtimestamp.c
@@ -41,6 +41,7 @@
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
+#include <sys/epoll.h>
#include <sys/ioctl.h>
#include <sys/select.h>
#include <sys/socket.h>
@@ -49,6 +50,10 @@
#include <time.h>
#include <unistd.h>
+#define NSEC_PER_USEC 1000L
+#define USEC_PER_SEC 1000000L
+#define NSEC_PER_SEC 1000000000LL
+
/* command line parameters */
static int cfg_proto = SOCK_STREAM;
static int cfg_ipproto = IPPROTO_TCP;
@@ -61,12 +66,16 @@ static int cfg_delay_snd;
static int cfg_delay_ack;
static bool cfg_show_payload;
static bool cfg_do_pktinfo;
+static bool cfg_busy_poll;
+static int cfg_sleep_usec = 50 * 1000;
static bool cfg_loop_nodata;
-static bool cfg_no_delay;
static bool cfg_use_cmsg;
static bool cfg_use_pf_packet;
+static bool cfg_use_epoll;
+static bool cfg_epollet;
static bool cfg_do_listen;
static uint16_t dest_port = 9000;
+static bool cfg_print_nsec;
static struct sockaddr_in daddr;
static struct sockaddr_in6 daddr6;
@@ -75,11 +84,48 @@ static struct timespec ts_usr;
static int saved_tskey = -1;
static int saved_tskey_type = -1;
+struct timing_event {
+ int64_t min;
+ int64_t max;
+ int64_t total;
+ int count;
+};
+
+static struct timing_event usr_enq;
+static struct timing_event usr_snd;
+static struct timing_event usr_ack;
+
static bool test_failed;
+static int64_t timespec_to_ns64(struct timespec *ts)
+{
+ return ts->tv_sec * NSEC_PER_SEC + ts->tv_nsec;
+}
+
static int64_t timespec_to_us64(struct timespec *ts)
{
- return ts->tv_sec * 1000 * 1000 + ts->tv_nsec / 1000;
+ return ts->tv_sec * USEC_PER_SEC + ts->tv_nsec / NSEC_PER_USEC;
+}
+
+static void init_timing_event(struct timing_event *te)
+{
+ te->min = INT64_MAX;
+ te->max = 0;
+ te->total = 0;
+ te->count = 0;
+}
+
+static void add_timing_event(struct timing_event *te,
+ struct timespec *t_start, struct timespec *t_end)
+{
+ int64_t ts_delta = timespec_to_ns64(t_end) - timespec_to_ns64(t_start);
+
+ te->count++;
+ if (ts_delta < te->min)
+ te->min = ts_delta;
+ if (ts_delta > te->max)
+ te->max = ts_delta;
+ te->total += ts_delta;
}
static void validate_key(int tskey, int tstype)
@@ -113,25 +159,43 @@ static void validate_timestamp(struct timespec *cur, int min_delay)
start64 = timespec_to_us64(&ts_usr);
if (cur64 < start64 + min_delay || cur64 > start64 + max_delay) {
- fprintf(stderr, "ERROR: delay %lu expected between %d and %d\n",
+ fprintf(stderr, "ERROR: %lu us expected between %d and %d\n",
cur64 - start64, min_delay, max_delay);
test_failed = true;
}
}
+static void __print_ts_delta_formatted(int64_t ts_delta)
+{
+ if (cfg_print_nsec)
+ fprintf(stderr, "%lu ns", ts_delta);
+ else
+ fprintf(stderr, "%lu us", ts_delta / NSEC_PER_USEC);
+}
+
static void __print_timestamp(const char *name, struct timespec *cur,
uint32_t key, int payload_len)
{
+ int64_t ts_delta;
+
if (!(cur->tv_sec | cur->tv_nsec))
return;
- fprintf(stderr, " %s: %lu s %lu us (seq=%u, len=%u)",
- name, cur->tv_sec, cur->tv_nsec / 1000,
- key, payload_len);
-
- if (cur != &ts_usr)
- fprintf(stderr, " (USR %+" PRId64 " us)",
- timespec_to_us64(cur) - timespec_to_us64(&ts_usr));
+ if (cfg_print_nsec)
+ fprintf(stderr, " %s: %lu s %lu ns (seq=%u, len=%u)",
+ name, cur->tv_sec, cur->tv_nsec,
+ key, payload_len);
+ else
+ fprintf(stderr, " %s: %lu s %lu us (seq=%u, len=%u)",
+ name, cur->tv_sec, cur->tv_nsec / NSEC_PER_USEC,
+ key, payload_len);
+
+ if (cur != &ts_usr) {
+ ts_delta = timespec_to_ns64(cur) - timespec_to_ns64(&ts_usr);
+ fprintf(stderr, " (USR +");
+ __print_ts_delta_formatted(ts_delta);
+ fprintf(stderr, ")");
+ }
fprintf(stderr, "\n");
}
@@ -155,14 +219,17 @@ static void print_timestamp(struct scm_timestamping *tss, int tstype,
case SCM_TSTAMP_SCHED:
tsname = " ENQ";
validate_timestamp(&tss->ts[0], 0);
+ add_timing_event(&usr_enq, &ts_usr, &tss->ts[0]);
break;
case SCM_TSTAMP_SND:
tsname = " SND";
validate_timestamp(&tss->ts[0], cfg_delay_snd);
+ add_timing_event(&usr_snd, &ts_usr, &tss->ts[0]);
break;
case SCM_TSTAMP_ACK:
tsname = " ACK";
validate_timestamp(&tss->ts[0], cfg_delay_ack);
+ add_timing_event(&usr_ack, &ts_usr, &tss->ts[0]);
break;
default:
error(1, 0, "unknown timestamp type: %u",
@@ -171,6 +238,21 @@ static void print_timestamp(struct scm_timestamping *tss, int tstype,
__print_timestamp(tsname, &tss->ts[0], tskey, payload_len);
}
+static void print_timing_event(char *name, struct timing_event *te)
+{
+ if (!te->count)
+ return;
+
+ fprintf(stderr, " %s: count=%d", name, te->count);
+ fprintf(stderr, ", avg=");
+ __print_ts_delta_formatted((int64_t)(te->total / te->count));
+ fprintf(stderr, ", min=");
+ __print_ts_delta_formatted(te->min);
+ fprintf(stderr, ", max=");
+ __print_ts_delta_formatted(te->max);
+ fprintf(stderr, "\n");
+}
+
/* TODO: convert to check_and_print payload once API is stable */
static void print_payload(char *data, int len)
{
@@ -198,6 +280,17 @@ static void print_pktinfo(int family, int ifindex, void *saddr, void *daddr)
daddr ? inet_ntop(family, daddr, da, sizeof(da)) : "unknown");
}
+static void __epoll(int epfd)
+{
+ struct epoll_event events;
+ int ret;
+
+ memset(&events, 0, sizeof(events));
+ ret = epoll_wait(epfd, &events, 1, cfg_poll_timeout);
+ if (ret != 1)
+ error(1, errno, "epoll_wait");
+}
+
static void __poll(int fd)
{
struct pollfd pollfd;
@@ -391,7 +484,11 @@ static void do_test(int family, unsigned int report_opt)
struct msghdr msg;
struct iovec iov;
char *buf;
- int fd, i, val = 1, total_len;
+ int fd, i, val = 1, total_len, epfd = 0;
+
+ init_timing_event(&usr_enq);
+ init_timing_event(&usr_snd);
+ init_timing_event(&usr_ack);
total_len = cfg_payload_len;
if (cfg_use_pf_packet || cfg_proto == SOCK_RAW) {
@@ -418,6 +515,20 @@ static void do_test(int family, unsigned int report_opt)
if (fd < 0)
error(1, errno, "socket");
+ if (cfg_use_epoll) {
+ struct epoll_event ev;
+
+ memset(&ev, 0, sizeof(ev));
+ ev.data.fd = fd;
+ if (cfg_epollet)
+ ev.events |= EPOLLET;
+ epfd = epoll_create(1);
+ if (epfd <= 0)
+ error(1, errno, "epoll_create");
+ if (epoll_ctl(epfd, EPOLL_CTL_ADD, fd, &ev))
+ error(1, errno, "epoll_ctl");
+ }
+
/* reset expected key on each new socket */
saved_tskey = -1;
@@ -525,19 +636,28 @@ static void do_test(int family, unsigned int report_opt)
error(1, errno, "send");
/* wait for all errors to be queued, else ACKs arrive OOO */
- if (!cfg_no_delay)
- usleep(50 * 1000);
+ if (cfg_sleep_usec)
+ usleep(cfg_sleep_usec);
- __poll(fd);
+ if (!cfg_busy_poll) {
+ if (cfg_use_epoll)
+ __epoll(epfd);
+ else
+ __poll(fd);
+ }
while (!recv_errmsg(fd)) {}
}
+ print_timing_event("USR-ENQ", &usr_enq);
+ print_timing_event("USR-SND", &usr_snd);
+ print_timing_event("USR-ACK", &usr_ack);
+
if (close(fd))
error(1, errno, "close");
free(buf);
- usleep(100 * 1000);
+ usleep(100 * NSEC_PER_USEC);
}
static void __attribute__((noreturn)) usage(const char *filepath)
@@ -547,18 +667,22 @@ static void __attribute__((noreturn)) usage(const char *filepath)
" -4: only IPv4\n"
" -6: only IPv6\n"
" -h: show this message\n"
+ " -b: busy poll to read from error queue\n"
" -c N: number of packets for each test\n"
" -C: use cmsg to set tstamp recording options\n"
- " -D: no delay between packets\n"
- " -F: poll() waits forever for an event\n"
+ " -e: use level-triggered epoll() instead of poll()\n"
+ " -E: use event-triggered epoll() instead of poll()\n"
+ " -F: poll()/epoll() waits forever for an event\n"
" -I: request PKTINFO\n"
" -l N: send N bytes at a time\n"
" -L listen on hostname and port\n"
" -n: set no-payload option\n"
+ " -N: print timestamps and durations in nsec (instead of usec)\n"
" -p N: connect to port N\n"
" -P: use PF_PACKET\n"
" -r: use raw\n"
" -R: use raw (IP_HDRINCL)\n"
+ " -S N: usec to sleep before reading error queue\n"
" -u: use udp\n"
" -v: validate SND delay (usec)\n"
" -V: validate ACK delay (usec)\n"
@@ -572,7 +696,8 @@ static void parse_opt(int argc, char **argv)
int proto_count = 0;
int c;
- while ((c = getopt(argc, argv, "46c:CDFhIl:Lnp:PrRuv:V:x")) != -1) {
+ while ((c = getopt(argc, argv,
+ "46bc:CeEFhIl:LnNp:PrRS:uv:V:x")) != -1) {
switch (c) {
case '4':
do_ipv6 = 0;
@@ -580,15 +705,21 @@ static void parse_opt(int argc, char **argv)
case '6':
do_ipv4 = 0;
break;
+ case 'b':
+ cfg_busy_poll = true;
+ break;
case 'c':
cfg_num_pkts = strtoul(optarg, NULL, 10);
break;
case 'C':
cfg_use_cmsg = true;
break;
- case 'D':
- cfg_no_delay = true;
+ case 'e':
+ cfg_use_epoll = true;
break;
+ case 'E':
+ cfg_use_epoll = true;
+ cfg_epollet = true;
case 'F':
cfg_poll_timeout = -1;
break;
@@ -604,6 +735,9 @@ static void parse_opt(int argc, char **argv)
case 'n':
cfg_loop_nodata = true;
break;
+ case 'N':
+ cfg_print_nsec = true;
+ break;
case 'p':
dest_port = strtoul(optarg, NULL, 10);
break;
@@ -623,6 +757,9 @@ static void parse_opt(int argc, char **argv)
cfg_proto = SOCK_RAW;
cfg_ipproto = IPPROTO_RAW;
break;
+ case 'S':
+ cfg_sleep_usec = strtoul(optarg, NULL, 10);
+ break;
case 'u':
proto_count++;
cfg_proto = SOCK_DGRAM;
@@ -653,6 +790,8 @@ static void parse_opt(int argc, char **argv)
error(1, 0, "pass -P, -r, -R or -u, not multiple");
if (cfg_do_pktinfo && cfg_use_pf_packet)
error(1, 0, "cannot ask for pktinfo over pf_packet");
+ if (cfg_busy_poll && cfg_use_epoll)
+ error(1, 0, "pass epoll or busy_poll, not both");
if (optind != argc - 1)
error(1, 0, "missing required hostname argument");
diff --git a/tools/testing/selftests/networking/timestamping/txtimestamp.sh b/tools/testing/selftests/net/txtimestamp.sh
index df0d86ca72b7..eea6f5193693 100755
--- a/tools/testing/selftests/networking/timestamping/txtimestamp.sh
+++ b/tools/testing/selftests/net/txtimestamp.sh
@@ -43,15 +43,40 @@ run_test_tcpudpraw() {
}
run_test_all() {
+ setup
run_test_tcpudpraw # setsockopt
run_test_tcpudpraw -C # cmsg
run_test_tcpudpraw -n # timestamp w/o data
+ echo "OK. All tests passed"
+}
+
+run_test_one() {
+ setup
+ ./txtimestamp $@
+}
+
+usage() {
+ echo "Usage: $0 [ -r | --run ] <txtimestamp args> | [ -h | --help ]"
+ echo " (no args) Run all tests"
+ echo " -r|--run Run an individual test with arguments"
+ echo " -h|--help Help"
+}
+
+main() {
+ if [[ $# -eq 0 ]]; then
+ run_test_all
+ else
+ if [[ "$1" = "-r" || "$1" == "--run" ]]; then
+ shift
+ run_test_one $@
+ else
+ usage
+ fi
+ fi
}
if [[ "$(ip netns identify)" == "root" ]]; then
- ../../net/in_netns.sh $0 $@
+ ./in_netns.sh $0 $@
else
- setup
- run_test_all
- echo "OK. All tests passed"
+ main $@
fi
diff --git a/tools/testing/selftests/net/vrf-xfrm-tests.sh b/tools/testing/selftests/net/vrf-xfrm-tests.sh
new file mode 100755
index 000000000000..184da81f554f
--- /dev/null
+++ b/tools/testing/selftests/net/vrf-xfrm-tests.sh
@@ -0,0 +1,436 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Various combinations of VRF with xfrms and qdisc.
+
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
+PAUSE_ON_FAIL=no
+VERBOSE=0
+ret=0
+
+HOST1_4=192.168.1.1
+HOST2_4=192.168.1.2
+HOST1_6=2001:db8:1::1
+HOST2_6=2001:db8:1::2
+
+XFRM1_4=10.0.1.1
+XFRM2_4=10.0.1.2
+XFRM1_6=fc00:1000::1
+XFRM2_6=fc00:1000::2
+IF_ID=123
+
+VRF=red
+TABLE=300
+
+AUTH_1=0xd94fcfea65fddf21dc6e0d24a0253508
+AUTH_2=0xdc6e0d24a0253508d94fcfea65fddf21
+ENC_1=0xfc46c20f8048be9725930ff3fb07ac2a91f0347dffeacf62
+ENC_2=0x3fb07ac2a91f0347dffeacf62fc46c20f8048be9725930ff
+SPI_1=0x02122b77
+SPI_2=0x2b770212
+
+which ping6 > /dev/null 2>&1 && ping6=$(which ping6) || ping6=$(which ping)
+
+################################################################################
+#
+log_test()
+{
+ local rc=$1
+ local expected=$2
+ local msg="$3"
+
+ if [ ${rc} -eq ${expected} ]; then
+ printf "TEST: %-60s [ OK ]\n" "${msg}"
+ nsuccess=$((nsuccess+1))
+ else
+ ret=1
+ nfail=$((nfail+1))
+ printf "TEST: %-60s [FAIL]\n" "${msg}"
+ if [ "${PAUSE_ON_FAIL}" = "yes" ]; then
+ echo
+ echo "hit enter to continue, 'q' to quit"
+ read a
+ [ "$a" = "q" ] && exit 1
+ fi
+ fi
+}
+
+run_cmd_host1()
+{
+ local cmd="$*"
+ local out
+ local rc
+
+ if [ "$VERBOSE" = "1" ]; then
+ printf " COMMAND: $cmd\n"
+ fi
+
+ out=$(eval ip netns exec host1 $cmd 2>&1)
+ rc=$?
+ if [ "$VERBOSE" = "1" ]; then
+ if [ -n "$out" ]; then
+ echo
+ echo " $out"
+ fi
+ echo
+ fi
+
+ return $rc
+}
+
+################################################################################
+# create namespaces for hosts and sws
+
+create_vrf()
+{
+ local ns=$1
+ local vrf=$2
+ local table=$3
+
+ if [ -n "${ns}" ]; then
+ ns="-netns ${ns}"
+ fi
+
+ ip ${ns} link add ${vrf} type vrf table ${table}
+ ip ${ns} link set ${vrf} up
+ ip ${ns} route add vrf ${vrf} unreachable default metric 8192
+ ip ${ns} -6 route add vrf ${vrf} unreachable default metric 8192
+
+ ip ${ns} addr add 127.0.0.1/8 dev ${vrf}
+ ip ${ns} -6 addr add ::1 dev ${vrf} nodad
+
+ ip ${ns} ru del pref 0
+ ip ${ns} ru add pref 32765 from all lookup local
+ ip ${ns} -6 ru del pref 0
+ ip ${ns} -6 ru add pref 32765 from all lookup local
+}
+
+create_ns()
+{
+ local ns=$1
+ local addr=$2
+ local addr6=$3
+
+ [ -z "${addr}" ] && addr="-"
+ [ -z "${addr6}" ] && addr6="-"
+
+ ip netns add ${ns}
+
+ ip -netns ${ns} link set lo up
+ if [ "${addr}" != "-" ]; then
+ ip -netns ${ns} addr add dev lo ${addr}
+ fi
+ if [ "${addr6}" != "-" ]; then
+ ip -netns ${ns} -6 addr add dev lo ${addr6}
+ fi
+
+ ip -netns ${ns} ro add unreachable default metric 8192
+ ip -netns ${ns} -6 ro add unreachable default metric 8192
+
+ ip netns exec ${ns} sysctl -qw net.ipv4.ip_forward=1
+ ip netns exec ${ns} sysctl -qw net.ipv6.conf.all.keep_addr_on_down=1
+ ip netns exec ${ns} sysctl -qw net.ipv6.conf.all.forwarding=1
+ ip netns exec ${ns} sysctl -qw net.ipv6.conf.default.forwarding=1
+ ip netns exec ${ns} sysctl -qw net.ipv6.conf.default.accept_dad=0
+}
+
+# create veth pair to connect namespaces and apply addresses.
+connect_ns()
+{
+ local ns1=$1
+ local ns1_dev=$2
+ local ns1_addr=$3
+ local ns1_addr6=$4
+ local ns2=$5
+ local ns2_dev=$6
+ local ns2_addr=$7
+ local ns2_addr6=$8
+ local ns1arg
+ local ns2arg
+
+ if [ -n "${ns1}" ]; then
+ ns1arg="-netns ${ns1}"
+ fi
+ if [ -n "${ns2}" ]; then
+ ns2arg="-netns ${ns2}"
+ fi
+
+ ip ${ns1arg} li add ${ns1_dev} type veth peer name tmp
+ ip ${ns1arg} li set ${ns1_dev} up
+ ip ${ns1arg} li set tmp netns ${ns2} name ${ns2_dev}
+ ip ${ns2arg} li set ${ns2_dev} up
+
+ if [ "${ns1_addr}" != "-" ]; then
+ ip ${ns1arg} addr add dev ${ns1_dev} ${ns1_addr}
+ ip ${ns2arg} addr add dev ${ns2_dev} ${ns2_addr}
+ fi
+
+ if [ "${ns1_addr6}" != "-" ]; then
+ ip ${ns1arg} addr add dev ${ns1_dev} ${ns1_addr6} nodad
+ ip ${ns2arg} addr add dev ${ns2_dev} ${ns2_addr6} nodad
+ fi
+}
+
+################################################################################
+
+cleanup()
+{
+ ip netns del host1
+ ip netns del host2
+}
+
+setup()
+{
+ create_ns "host1"
+ create_ns "host2"
+
+ connect_ns "host1" eth0 ${HOST1_4}/24 ${HOST1_6}/64 \
+ "host2" eth0 ${HOST2_4}/24 ${HOST2_6}/64
+
+ create_vrf "host1" ${VRF} ${TABLE}
+ ip -netns host1 link set dev eth0 master ${VRF}
+}
+
+cleanup_xfrm()
+{
+ for ns in host1 host2
+ do
+ for x in state policy
+ do
+ ip -netns ${ns} xfrm ${x} flush
+ ip -6 -netns ${ns} xfrm ${x} flush
+ done
+ done
+}
+
+setup_xfrm()
+{
+ local h1_4=$1
+ local h2_4=$2
+ local h1_6=$3
+ local h2_6=$4
+ local devarg="$5"
+
+ #
+ # policy
+ #
+
+ # host1 - IPv4 out
+ ip -netns host1 xfrm policy add \
+ src ${h1_4} dst ${h2_4} ${devarg} dir out \
+ tmpl src ${HOST1_4} dst ${HOST2_4} proto esp mode tunnel
+
+ # host2 - IPv4 in
+ ip -netns host2 xfrm policy add \
+ src ${h1_4} dst ${h2_4} dir in \
+ tmpl src ${HOST1_4} dst ${HOST2_4} proto esp mode tunnel
+
+ # host1 - IPv4 in
+ ip -netns host1 xfrm policy add \
+ src ${h2_4} dst ${h1_4} ${devarg} dir in \
+ tmpl src ${HOST2_4} dst ${HOST1_4} proto esp mode tunnel
+
+ # host2 - IPv4 out
+ ip -netns host2 xfrm policy add \
+ src ${h2_4} dst ${h1_4} dir out \
+ tmpl src ${HOST2_4} dst ${HOST1_4} proto esp mode tunnel
+
+
+ # host1 - IPv6 out
+ ip -6 -netns host1 xfrm policy add \
+ src ${h1_6} dst ${h2_6} ${devarg} dir out \
+ tmpl src ${HOST1_6} dst ${HOST2_6} proto esp mode tunnel
+
+ # host2 - IPv6 in
+ ip -6 -netns host2 xfrm policy add \
+ src ${h1_6} dst ${h2_6} dir in \
+ tmpl src ${HOST1_6} dst ${HOST2_6} proto esp mode tunnel
+
+ # host1 - IPv6 in
+ ip -6 -netns host1 xfrm policy add \
+ src ${h2_6} dst ${h1_6} ${devarg} dir in \
+ tmpl src ${HOST2_6} dst ${HOST1_6} proto esp mode tunnel
+
+ # host2 - IPv6 out
+ ip -6 -netns host2 xfrm policy add \
+ src ${h2_6} dst ${h1_6} dir out \
+ tmpl src ${HOST2_6} dst ${HOST1_6} proto esp mode tunnel
+
+ #
+ # state
+ #
+ ip -netns host1 xfrm state add src ${HOST1_4} dst ${HOST2_4} \
+ proto esp spi ${SPI_1} reqid 0 mode tunnel \
+ replay-window 4 replay-oseq 0x4 \
+ auth-trunc 'hmac(md5)' ${AUTH_1} 96 \
+ enc 'cbc(des3_ede)' ${ENC_1} \
+ sel src ${h1_4} dst ${h2_4} ${devarg}
+
+ ip -netns host2 xfrm state add src ${HOST1_4} dst ${HOST2_4} \
+ proto esp spi ${SPI_1} reqid 0 mode tunnel \
+ replay-window 4 replay-oseq 0x4 \
+ auth-trunc 'hmac(md5)' ${AUTH_1} 96 \
+ enc 'cbc(des3_ede)' ${ENC_1} \
+ sel src ${h1_4} dst ${h2_4}
+
+
+ ip -netns host1 xfrm state add src ${HOST2_4} dst ${HOST1_4} \
+ proto esp spi ${SPI_2} reqid 0 mode tunnel \
+ replay-window 4 replay-oseq 0x4 \
+ auth-trunc 'hmac(md5)' ${AUTH_2} 96 \
+ enc 'cbc(des3_ede)' ${ENC_2} \
+ sel src ${h2_4} dst ${h1_4} ${devarg}
+
+ ip -netns host2 xfrm state add src ${HOST2_4} dst ${HOST1_4} \
+ proto esp spi ${SPI_2} reqid 0 mode tunnel \
+ replay-window 4 replay-oseq 0x4 \
+ auth-trunc 'hmac(md5)' ${AUTH_2} 96 \
+ enc 'cbc(des3_ede)' ${ENC_2} \
+ sel src ${h2_4} dst ${h1_4}
+
+
+ ip -6 -netns host1 xfrm state add src ${HOST1_6} dst ${HOST2_6} \
+ proto esp spi ${SPI_1} reqid 0 mode tunnel \
+ replay-window 4 replay-oseq 0x4 \
+ auth-trunc 'hmac(md5)' ${AUTH_1} 96 \
+ enc 'cbc(des3_ede)' ${ENC_1} \
+ sel src ${h1_6} dst ${h2_6} ${devarg}
+
+ ip -6 -netns host2 xfrm state add src ${HOST1_6} dst ${HOST2_6} \
+ proto esp spi ${SPI_1} reqid 0 mode tunnel \
+ replay-window 4 replay-oseq 0x4 \
+ auth-trunc 'hmac(md5)' ${AUTH_1} 96 \
+ enc 'cbc(des3_ede)' ${ENC_1} \
+ sel src ${h1_6} dst ${h2_6}
+
+
+ ip -6 -netns host1 xfrm state add src ${HOST2_6} dst ${HOST1_6} \
+ proto esp spi ${SPI_2} reqid 0 mode tunnel \
+ replay-window 4 replay-oseq 0x4 \
+ auth-trunc 'hmac(md5)' ${AUTH_2} 96 \
+ enc 'cbc(des3_ede)' ${ENC_2} \
+ sel src ${h2_6} dst ${h1_6} ${devarg}
+
+ ip -6 -netns host2 xfrm state add src ${HOST2_6} dst ${HOST1_6} \
+ proto esp spi ${SPI_2} reqid 0 mode tunnel \
+ replay-window 4 replay-oseq 0x4 \
+ auth-trunc 'hmac(md5)' ${AUTH_2} 96 \
+ enc 'cbc(des3_ede)' ${ENC_2} \
+ sel src ${h2_6} dst ${h1_6}
+}
+
+cleanup_xfrm_dev()
+{
+ ip -netns host1 li del xfrm0
+ ip -netns host2 addr del ${XFRM2_4}/24 dev eth0
+ ip -netns host2 addr del ${XFRM2_6}/64 dev eth0
+}
+
+setup_xfrm_dev()
+{
+ local vrfarg="vrf ${VRF}"
+
+ ip -netns host1 li add type xfrm dev eth0 if_id ${IF_ID}
+ ip -netns host1 li set xfrm0 ${vrfarg} up
+ ip -netns host1 addr add ${XFRM1_4}/24 dev xfrm0
+ ip -netns host1 addr add ${XFRM1_6}/64 dev xfrm0
+
+ ip -netns host2 addr add ${XFRM2_4}/24 dev eth0
+ ip -netns host2 addr add ${XFRM2_6}/64 dev eth0
+
+ setup_xfrm ${XFRM1_4} ${XFRM2_4} ${XFRM1_6} ${XFRM2_6} "if_id ${IF_ID}"
+}
+
+run_tests()
+{
+ cleanup_xfrm
+
+ # no IPsec
+ run_cmd_host1 ip vrf exec ${VRF} ping -c1 -w1 ${HOST2_4}
+ log_test $? 0 "IPv4 no xfrm policy"
+ run_cmd_host1 ip vrf exec ${VRF} ${ping6} -c1 -w1 ${HOST2_6}
+ log_test $? 0 "IPv6 no xfrm policy"
+
+ # xfrm without VRF in sel
+ setup_xfrm ${HOST1_4} ${HOST2_4} ${HOST1_6} ${HOST2_6}
+ run_cmd_host1 ip vrf exec ${VRF} ping -c1 -w1 ${HOST2_4}
+ log_test $? 0 "IPv4 xfrm policy based on address"
+ run_cmd_host1 ip vrf exec ${VRF} ${ping6} -c1 -w1 ${HOST2_6}
+ log_test $? 0 "IPv6 xfrm policy based on address"
+ cleanup_xfrm
+
+ # xfrm with VRF in sel
+ # Known failure: ipv4 resets the flow oif after the lookup. Fix is
+ # not straightforward.
+ # setup_xfrm ${HOST1_4} ${HOST2_4} ${HOST1_6} ${HOST2_6} "dev ${VRF}"
+ # run_cmd_host1 ip vrf exec ${VRF} ping -c1 -w1 ${HOST2_4}
+ # log_test $? 0 "IPv4 xfrm policy with VRF in selector"
+ run_cmd_host1 ip vrf exec ${VRF} ${ping6} -c1 -w1 ${HOST2_6}
+ log_test $? 0 "IPv6 xfrm policy with VRF in selector"
+ cleanup_xfrm
+
+ # xfrm with enslaved device in sel
+ # Known failures: combined with the above, __xfrm{4,6}_selector_match
+ # needs to consider both l3mdev and enslaved device index.
+ # setup_xfrm ${HOST1_4} ${HOST2_4} ${HOST1_6} ${HOST2_6} "dev eth0"
+ # run_cmd_host1 ip vrf exec ${VRF} ping -c1 -w1 ${HOST2_4}
+ # log_test $? 0 "IPv4 xfrm policy with enslaved device in selector"
+ # run_cmd_host1 ip vrf exec ${VRF} ${ping6} -c1 -w1 ${HOST2_6}
+ # log_test $? 0 "IPv6 xfrm policy with enslaved device in selector"
+ # cleanup_xfrm
+
+ # xfrm device
+ setup_xfrm_dev
+ run_cmd_host1 ip vrf exec ${VRF} ping -c1 -w1 ${XFRM2_4}
+ log_test $? 0 "IPv4 xfrm policy with xfrm device"
+ run_cmd_host1 ip vrf exec ${VRF} ${ping6} -c1 -w1 ${XFRM2_6}
+ log_test $? 0 "IPv6 xfrm policy with xfrm device"
+ cleanup_xfrm_dev
+}
+
+################################################################################
+# usage
+
+usage()
+{
+ cat <<EOF
+usage: ${0##*/} OPTS
+
+ -p Pause on fail
+ -v verbose mode (show commands and output)
+
+done
+EOF
+}
+
+################################################################################
+# main
+
+while getopts :pv o
+do
+ case $o in
+ p) PAUSE_ON_FAIL=yes;;
+ v) VERBOSE=$(($VERBOSE + 1));;
+ h) usage; exit 0;;
+ *) usage; exit 1;;
+ esac
+done
+
+cleanup 2>/dev/null
+setup
+
+echo
+echo "No qdisc on VRF device"
+run_tests
+
+run_cmd_host1 tc qdisc add dev ${VRF} root netem delay 100ms
+echo
+echo "netem qdisc on VRF device"
+run_tests
+
+printf "\nTests passed: %3d\n" ${nsuccess}
+printf "Tests failed: %3d\n" ${nfail}
+
+exit $ret
diff --git a/tools/testing/selftests/networking/timestamping/.gitignore b/tools/testing/selftests/networking/timestamping/.gitignore
deleted file mode 100644
index d9355035e746..000000000000
--- a/tools/testing/selftests/networking/timestamping/.gitignore
+++ /dev/null
@@ -1,4 +0,0 @@
-timestamping
-rxtimestamp
-txtimestamp
-hwtstamp_config
diff --git a/tools/testing/selftests/networking/timestamping/Makefile b/tools/testing/selftests/networking/timestamping/Makefile
deleted file mode 100644
index 1de8bd8ccf5d..000000000000
--- a/tools/testing/selftests/networking/timestamping/Makefile
+++ /dev/null
@@ -1,11 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0
-CFLAGS += -I../../../../../usr/include
-
-TEST_GEN_FILES := hwtstamp_config rxtimestamp timestamping txtimestamp
-TEST_PROGS := txtimestamp.sh
-
-all: $(TEST_PROGS)
-
-top_srcdir = ../../../../..
-KSFT_KHDR_INSTALL := 1
-include ../../lib.mk
diff --git a/tools/testing/selftests/networking/timestamping/config b/tools/testing/selftests/networking/timestamping/config
deleted file mode 100644
index a13e3169b0a4..000000000000
--- a/tools/testing/selftests/networking/timestamping/config
+++ /dev/null
@@ -1,2 +0,0 @@
-CONFIG_IFB=y
-CONFIG_NET_SCH_NETEM=y
diff --git a/tools/testing/selftests/nsfs/.gitignore b/tools/testing/selftests/nsfs/.gitignore
index 2ab2c824ce86..ed79ebdf286e 100644
--- a/tools/testing/selftests/nsfs/.gitignore
+++ b/tools/testing/selftests/nsfs/.gitignore
@@ -1,2 +1,3 @@
+# SPDX-License-Identifier: GPL-2.0-only
owner
pidns
diff --git a/tools/testing/selftests/nsfs/pidns.c b/tools/testing/selftests/nsfs/pidns.c
index e0d86e1668c0..e3c772c6a7c7 100644
--- a/tools/testing/selftests/nsfs/pidns.c
+++ b/tools/testing/selftests/nsfs/pidns.c
@@ -27,7 +27,7 @@
#define __stack_aligned__ __attribute__((aligned(16)))
struct cr_clone_arg {
char stack[128] __stack_aligned__;
- char stack_ptr[0];
+ char stack_ptr[];
};
static int child(void *args)
diff --git a/tools/testing/selftests/ntb/ntb_test.sh b/tools/testing/selftests/ntb/ntb_test.sh
index 9c60337317c6..020137b61407 100755
--- a/tools/testing/selftests/ntb/ntb_test.sh
+++ b/tools/testing/selftests/ntb/ntb_test.sh
@@ -241,7 +241,7 @@ function get_files_count()
split_remote $LOC
if [[ "$REMOTE" == "" ]]; then
- echo $(ls -1 "$LOC"/${NAME}* 2>/dev/null | wc -l)
+ echo $(ls -1 "$VPATH"/${NAME}* 2>/dev/null | wc -l)
else
echo $(ssh "$REMOTE" "ls -1 \"$VPATH\"/${NAME}* | \
wc -l" 2> /dev/null)
diff --git a/tools/testing/selftests/openat2/.gitignore b/tools/testing/selftests/openat2/.gitignore
index bd68f6c3fd07..82a4846cbc4b 100644
--- a/tools/testing/selftests/openat2/.gitignore
+++ b/tools/testing/selftests/openat2/.gitignore
@@ -1 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0-only
/*_test
diff --git a/tools/testing/selftests/pid_namespace/.gitignore b/tools/testing/selftests/pid_namespace/.gitignore
new file mode 100644
index 000000000000..93ab9d7e5b7e
--- /dev/null
+++ b/tools/testing/selftests/pid_namespace/.gitignore
@@ -0,0 +1 @@
+regression_enomem
diff --git a/tools/testing/selftests/pid_namespace/Makefile b/tools/testing/selftests/pid_namespace/Makefile
new file mode 100644
index 000000000000..dcaefa224ca0
--- /dev/null
+++ b/tools/testing/selftests/pid_namespace/Makefile
@@ -0,0 +1,8 @@
+# SPDX-License-Identifier: GPL-2.0
+CFLAGS += -g -I../../../../usr/include/
+
+TEST_GEN_PROGS := regression_enomem
+
+include ../lib.mk
+
+$(OUTPUT)/regression_enomem: regression_enomem.c ../pidfd/pidfd.h
diff --git a/tools/testing/selftests/pid_namespace/config b/tools/testing/selftests/pid_namespace/config
new file mode 100644
index 000000000000..26cdb27e7dbb
--- /dev/null
+++ b/tools/testing/selftests/pid_namespace/config
@@ -0,0 +1,2 @@
+CONFIG_PID_NS=y
+CONFIG_USER_NS=y
diff --git a/tools/testing/selftests/pid_namespace/regression_enomem.c b/tools/testing/selftests/pid_namespace/regression_enomem.c
new file mode 100644
index 000000000000..73d532556d17
--- /dev/null
+++ b/tools/testing/selftests/pid_namespace/regression_enomem.c
@@ -0,0 +1,45 @@
+#define _GNU_SOURCE
+#include <assert.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <linux/types.h>
+#include <sched.h>
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <syscall.h>
+#include <sys/wait.h>
+
+#include "../kselftest.h"
+#include "../kselftest_harness.h"
+#include "../pidfd/pidfd.h"
+
+/*
+ * Regression test for:
+ * 35f71bc0a09a ("fork: report pid reservation failure properly")
+ * b26ebfe12f34 ("pid: Fix error return value in some cases")
+ */
+TEST(regression_enomem)
+{
+ pid_t pid;
+
+ if (geteuid())
+ EXPECT_EQ(0, unshare(CLONE_NEWUSER));
+
+ EXPECT_EQ(0, unshare(CLONE_NEWPID));
+
+ pid = fork();
+ ASSERT_GE(pid, 0);
+
+ if (pid == 0)
+ exit(EXIT_SUCCESS);
+
+ EXPECT_EQ(0, wait_for_pid(pid));
+
+ pid = fork();
+ ASSERT_LT(pid, 0);
+ ASSERT_EQ(errno, ENOMEM);
+}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/pidfd/.gitignore b/tools/testing/selftests/pidfd/.gitignore
index 39559d723c41..973198a3ec3d 100644
--- a/tools/testing/selftests/pidfd/.gitignore
+++ b/tools/testing/selftests/pidfd/.gitignore
@@ -1,6 +1,8 @@
+# SPDX-License-Identifier: GPL-2.0-only
pidfd_open_test
pidfd_poll_test
pidfd_test
pidfd_wait
pidfd_fdinfo_test
pidfd_getfd_test
+pidfd_setns_test
diff --git a/tools/testing/selftests/pidfd/Makefile b/tools/testing/selftests/pidfd/Makefile
index 75a545861375..f4a2f28f926b 100644
--- a/tools/testing/selftests/pidfd/Makefile
+++ b/tools/testing/selftests/pidfd/Makefile
@@ -1,7 +1,8 @@
# SPDX-License-Identifier: GPL-2.0-only
CFLAGS += -g -I../../../../usr/include/ -pthread
-TEST_GEN_PROGS := pidfd_test pidfd_fdinfo_test pidfd_open_test pidfd_poll_test pidfd_wait pidfd_getfd_test
+TEST_GEN_PROGS := pidfd_test pidfd_fdinfo_test pidfd_open_test \
+ pidfd_poll_test pidfd_wait pidfd_getfd_test pidfd_setns_test
include ../lib.mk
diff --git a/tools/testing/selftests/pidfd/config b/tools/testing/selftests/pidfd/config
new file mode 100644
index 000000000000..bb11de90c0c9
--- /dev/null
+++ b/tools/testing/selftests/pidfd/config
@@ -0,0 +1,6 @@
+CONFIG_UTS_NS=y
+CONFIG_IPC_NS=y
+CONFIG_USER_NS=y
+CONFIG_PID_NS=y
+CONFIG_NET_NS=y
+CONFIG_CGROUPS=y
diff --git a/tools/testing/selftests/pidfd/pidfd.h b/tools/testing/selftests/pidfd/pidfd.h
index d482515604db..c1921a53dbed 100644
--- a/tools/testing/selftests/pidfd/pidfd.h
+++ b/tools/testing/selftests/pidfd/pidfd.h
@@ -13,6 +13,8 @@
#include <string.h>
#include <syscall.h>
#include <sys/mount.h>
+#include <sys/types.h>
+#include <sys/wait.h>
#include "../kselftest.h"
diff --git a/tools/testing/selftests/pidfd/pidfd_setns_test.c b/tools/testing/selftests/pidfd/pidfd_setns_test.c
new file mode 100644
index 000000000000..133ec5b6cda8
--- /dev/null
+++ b/tools/testing/selftests/pidfd/pidfd_setns_test.c
@@ -0,0 +1,473 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#define _GNU_SOURCE
+#include <errno.h>
+#include <fcntl.h>
+#include <limits.h>
+#include <linux/types.h>
+#include <sched.h>
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <syscall.h>
+#include <sys/prctl.h>
+#include <sys/wait.h>
+#include <unistd.h>
+#include <sys/socket.h>
+#include <sys/stat.h>
+#include <linux/kcmp.h>
+
+#include "pidfd.h"
+#include "../clone3/clone3_selftests.h"
+#include "../kselftest.h"
+#include "../kselftest_harness.h"
+
+enum {
+ PIDFD_NS_USER,
+ PIDFD_NS_MNT,
+ PIDFD_NS_PID,
+ PIDFD_NS_UTS,
+ PIDFD_NS_IPC,
+ PIDFD_NS_NET,
+ PIDFD_NS_CGROUP,
+ PIDFD_NS_PIDCLD,
+ PIDFD_NS_MAX
+};
+
+const struct ns_info {
+ const char *name;
+ int flag;
+} ns_info[] = {
+ [PIDFD_NS_USER] = { "user", CLONE_NEWUSER, },
+ [PIDFD_NS_MNT] = { "mnt", CLONE_NEWNS, },
+ [PIDFD_NS_PID] = { "pid", CLONE_NEWPID, },
+ [PIDFD_NS_UTS] = { "uts", CLONE_NEWUTS, },
+ [PIDFD_NS_IPC] = { "ipc", CLONE_NEWIPC, },
+ [PIDFD_NS_NET] = { "net", CLONE_NEWNET, },
+ [PIDFD_NS_CGROUP] = { "cgroup", CLONE_NEWCGROUP, },
+ [PIDFD_NS_PIDCLD] = { "pid_for_children", 0, },
+};
+
+FIXTURE(current_nsset)
+{
+ pid_t pid;
+ int pidfd;
+ int nsfds[PIDFD_NS_MAX];
+
+ pid_t child_pid_exited;
+ int child_pidfd_exited;
+
+ pid_t child_pid1;
+ int child_pidfd1;
+ int child_nsfds1[PIDFD_NS_MAX];
+
+ pid_t child_pid2;
+ int child_pidfd2;
+ int child_nsfds2[PIDFD_NS_MAX];
+};
+
+static int sys_waitid(int which, pid_t pid, int options)
+{
+ return syscall(__NR_waitid, which, pid, NULL, options, NULL);
+}
+
+pid_t create_child(int *pidfd, unsigned flags)
+{
+ struct clone_args args = {
+ .flags = CLONE_PIDFD | flags,
+ .exit_signal = SIGCHLD,
+ .pidfd = ptr_to_u64(pidfd),
+ };
+
+ return sys_clone3(&args, sizeof(struct clone_args));
+}
+
+FIXTURE_SETUP(current_nsset)
+{
+ int i, proc_fd, ret;
+
+ for (i = 0; i < PIDFD_NS_MAX; i++) {
+ self->nsfds[i] = -EBADF;
+ self->child_nsfds1[i] = -EBADF;
+ self->child_nsfds2[i] = -EBADF;
+ }
+
+ proc_fd = open("/proc/self/ns", O_DIRECTORY | O_CLOEXEC);
+ ASSERT_GE(proc_fd, 0) {
+ TH_LOG("%m - Failed to open /proc/self/ns");
+ }
+
+ self->pid = getpid();
+ for (i = 0; i < PIDFD_NS_MAX; i++) {
+ const struct ns_info *info = &ns_info[i];
+ self->nsfds[i] = openat(proc_fd, info->name, O_RDONLY | O_CLOEXEC);
+ if (self->nsfds[i] < 0) {
+ EXPECT_EQ(errno, ENOENT) {
+ TH_LOG("%m - Failed to open %s namespace for process %d",
+ info->name, self->pid);
+ }
+ }
+ }
+
+ self->pidfd = sys_pidfd_open(self->pid, 0);
+ EXPECT_GT(self->pidfd, 0) {
+ TH_LOG("%m - Failed to open pidfd for process %d", self->pid);
+ }
+
+ /* Create task that exits right away. */
+ self->child_pid_exited = create_child(&self->child_pidfd_exited,
+ CLONE_NEWUSER | CLONE_NEWNET);
+ EXPECT_GT(self->child_pid_exited, 0);
+
+ if (self->child_pid_exited == 0)
+ _exit(EXIT_SUCCESS);
+
+ ASSERT_EQ(sys_waitid(P_PID, self->child_pid_exited, WEXITED | WNOWAIT), 0);
+
+ self->pidfd = sys_pidfd_open(self->pid, 0);
+ EXPECT_GE(self->pidfd, 0) {
+ TH_LOG("%m - Failed to open pidfd for process %d", self->pid);
+ }
+
+ /* Create tasks that will be stopped. */
+ self->child_pid1 = create_child(&self->child_pidfd1,
+ CLONE_NEWUSER | CLONE_NEWNS |
+ CLONE_NEWCGROUP | CLONE_NEWIPC |
+ CLONE_NEWUTS | CLONE_NEWPID |
+ CLONE_NEWNET);
+ EXPECT_GE(self->child_pid1, 0);
+
+ if (self->child_pid1 == 0) {
+ pause();
+ _exit(EXIT_SUCCESS);
+ }
+
+ self->child_pid2 = create_child(&self->child_pidfd2,
+ CLONE_NEWUSER | CLONE_NEWNS |
+ CLONE_NEWCGROUP | CLONE_NEWIPC |
+ CLONE_NEWUTS | CLONE_NEWPID |
+ CLONE_NEWNET);
+ EXPECT_GE(self->child_pid2, 0);
+
+ if (self->child_pid2 == 0) {
+ pause();
+ _exit(EXIT_SUCCESS);
+ }
+
+ for (i = 0; i < PIDFD_NS_MAX; i++) {
+ char p[100];
+
+ const struct ns_info *info = &ns_info[i];
+
+ self->nsfds[i] = openat(proc_fd, info->name, O_RDONLY | O_CLOEXEC);
+ if (self->nsfds[i] < 0) {
+ EXPECT_EQ(errno, ENOENT) {
+ TH_LOG("%m - Failed to open %s namespace for process %d",
+ info->name, self->pid);
+ }
+ }
+
+ ret = snprintf(p, sizeof(p), "/proc/%d/ns/%s",
+ self->child_pid1, info->name);
+ EXPECT_GT(ret, 0);
+ EXPECT_LT(ret, sizeof(p));
+
+ self->child_nsfds1[i] = open(p, O_RDONLY | O_CLOEXEC);
+ if (self->child_nsfds1[i] < 0) {
+ EXPECT_EQ(errno, ENOENT) {
+ TH_LOG("%m - Failed to open %s namespace for process %d",
+ info->name, self->child_pid1);
+ }
+ }
+
+ ret = snprintf(p, sizeof(p), "/proc/%d/ns/%s",
+ self->child_pid2, info->name);
+ EXPECT_GT(ret, 0);
+ EXPECT_LT(ret, sizeof(p));
+
+ self->child_nsfds2[i] = open(p, O_RDONLY | O_CLOEXEC);
+ if (self->child_nsfds2[i] < 0) {
+ EXPECT_EQ(errno, ENOENT) {
+ TH_LOG("%m - Failed to open %s namespace for process %d",
+ info->name, self->child_pid1);
+ }
+ }
+ }
+
+ close(proc_fd);
+}
+
+FIXTURE_TEARDOWN(current_nsset)
+{
+ int i;
+
+ ASSERT_EQ(sys_pidfd_send_signal(self->child_pidfd1,
+ SIGKILL, NULL, 0), 0);
+ ASSERT_EQ(sys_pidfd_send_signal(self->child_pidfd2,
+ SIGKILL, NULL, 0), 0);
+
+ for (i = 0; i < PIDFD_NS_MAX; i++) {
+ if (self->nsfds[i] >= 0)
+ close(self->nsfds[i]);
+ if (self->child_nsfds1[i] >= 0)
+ close(self->child_nsfds1[i]);
+ if (self->child_nsfds2[i] >= 0)
+ close(self->child_nsfds2[i]);
+ }
+
+ if (self->child_pidfd1 >= 0)
+ EXPECT_EQ(0, close(self->child_pidfd1));
+ if (self->child_pidfd2 >= 0)
+ EXPECT_EQ(0, close(self->child_pidfd2));
+ ASSERT_EQ(sys_waitid(P_PID, self->child_pid_exited, WEXITED), 0);
+ ASSERT_EQ(sys_waitid(P_PID, self->child_pid1, WEXITED), 0);
+ ASSERT_EQ(sys_waitid(P_PID, self->child_pid2, WEXITED), 0);
+}
+
+static int preserve_ns(const int pid, const char *ns)
+{
+ int ret;
+ char path[50];
+
+ ret = snprintf(path, sizeof(path), "/proc/%d/ns/%s", pid, ns);
+ if (ret < 0 || (size_t)ret >= sizeof(path))
+ return -EIO;
+
+ return open(path, O_RDONLY | O_CLOEXEC);
+}
+
+static int in_same_namespace(int ns_fd1, pid_t pid2, const char *ns)
+{
+ int ns_fd2 = -EBADF;
+ int ret = -1;
+ struct stat ns_st1, ns_st2;
+
+ ret = fstat(ns_fd1, &ns_st1);
+ if (ret < 0)
+ return -1;
+
+ ns_fd2 = preserve_ns(pid2, ns);
+ if (ns_fd2 < 0)
+ return -1;
+
+ ret = fstat(ns_fd2, &ns_st2);
+ close(ns_fd2);
+ if (ret < 0)
+ return -1;
+
+ /* processes are in the same namespace */
+ if ((ns_st1.st_dev == ns_st2.st_dev) &&
+ (ns_st1.st_ino == ns_st2.st_ino))
+ return 1;
+
+ /* processes are in different namespaces */
+ return 0;
+}
+
+/* Test that we can't pass garbage to the kernel. */
+TEST_F(current_nsset, invalid_flags)
+{
+ ASSERT_NE(setns(self->pidfd, 0), 0);
+ EXPECT_EQ(errno, EINVAL);
+
+ ASSERT_NE(setns(self->pidfd, -1), 0);
+ EXPECT_EQ(errno, EINVAL);
+
+ ASSERT_NE(setns(self->pidfd, CLONE_VM), 0);
+ EXPECT_EQ(errno, EINVAL);
+
+ ASSERT_NE(setns(self->pidfd, CLONE_NEWUSER | CLONE_VM), 0);
+ EXPECT_EQ(errno, EINVAL);
+}
+
+/* Test that we can't attach to a task that has already exited. */
+TEST_F(current_nsset, pidfd_exited_child)
+{
+ int i;
+ pid_t pid;
+
+ ASSERT_NE(setns(self->child_pidfd_exited, CLONE_NEWUSER | CLONE_NEWNET),
+ 0);
+ EXPECT_EQ(errno, ESRCH);
+
+ pid = getpid();
+ for (i = 0; i < PIDFD_NS_MAX; i++) {
+ const struct ns_info *info = &ns_info[i];
+ /* Verify that we haven't changed any namespaces. */
+ if (self->nsfds[i] >= 0)
+ ASSERT_EQ(in_same_namespace(self->nsfds[i], pid, info->name), 1);
+ }
+}
+
+TEST_F(current_nsset, pidfd_incremental_setns)
+{
+ int i;
+ pid_t pid;
+
+ pid = getpid();
+ for (i = 0; i < PIDFD_NS_MAX; i++) {
+ const struct ns_info *info = &ns_info[i];
+ int nsfd;
+
+ if (self->child_nsfds1[i] < 0)
+ continue;
+
+ if (info->flag) {
+ ASSERT_EQ(setns(self->child_pidfd1, info->flag), 0) {
+ TH_LOG("%m - Failed to setns to %s namespace of %d via pidfd %d",
+ info->name, self->child_pid1,
+ self->child_pidfd1);
+ }
+ }
+
+ /* Verify that we have changed to the correct namespaces. */
+ if (info->flag == CLONE_NEWPID)
+ nsfd = self->nsfds[i];
+ else
+ nsfd = self->child_nsfds1[i];
+ ASSERT_EQ(in_same_namespace(nsfd, pid, info->name), 1) {
+ TH_LOG("setns failed to place us correctly into %s namespace of %d via pidfd %d",
+ info->name, self->child_pid1,
+ self->child_pidfd1);
+ }
+ TH_LOG("Managed to correctly setns to %s namespace of %d via pidfd %d",
+ info->name, self->child_pid1, self->child_pidfd1);
+ }
+}
+
+TEST_F(current_nsset, nsfd_incremental_setns)
+{
+ int i;
+ pid_t pid;
+
+ pid = getpid();
+ for (i = 0; i < PIDFD_NS_MAX; i++) {
+ const struct ns_info *info = &ns_info[i];
+ int nsfd;
+
+ if (self->child_nsfds1[i] < 0)
+ continue;
+
+ if (info->flag) {
+ ASSERT_EQ(setns(self->child_nsfds1[i], info->flag), 0) {
+ TH_LOG("%m - Failed to setns to %s namespace of %d via nsfd %d",
+ info->name, self->child_pid1,
+ self->child_nsfds1[i]);
+ }
+ }
+
+ /* Verify that we have changed to the correct namespaces. */
+ if (info->flag == CLONE_NEWPID)
+ nsfd = self->nsfds[i];
+ else
+ nsfd = self->child_nsfds1[i];
+ ASSERT_EQ(in_same_namespace(nsfd, pid, info->name), 1) {
+ TH_LOG("setns failed to place us correctly into %s namespace of %d via nsfd %d",
+ info->name, self->child_pid1,
+ self->child_nsfds1[i]);
+ }
+ TH_LOG("Managed to correctly setns to %s namespace of %d via nsfd %d",
+ info->name, self->child_pid1, self->child_nsfds1[i]);
+ }
+}
+
+TEST_F(current_nsset, pidfd_one_shot_setns)
+{
+ unsigned flags = 0;
+ int i;
+ pid_t pid;
+
+ for (i = 0; i < PIDFD_NS_MAX; i++) {
+ const struct ns_info *info = &ns_info[i];
+
+ if (self->child_nsfds1[i] < 0)
+ continue;
+
+ flags |= info->flag;
+ TH_LOG("Adding %s namespace of %d to list of namespaces to attach to",
+ info->name, self->child_pid1);
+ }
+
+ ASSERT_EQ(setns(self->child_pidfd1, flags), 0) {
+ TH_LOG("%m - Failed to setns to namespaces of %d",
+ self->child_pid1);
+ }
+
+ pid = getpid();
+ for (i = 0; i < PIDFD_NS_MAX; i++) {
+ const struct ns_info *info = &ns_info[i];
+ int nsfd;
+
+ if (self->child_nsfds1[i] < 0)
+ continue;
+
+ /* Verify that we have changed to the correct namespaces. */
+ if (info->flag == CLONE_NEWPID)
+ nsfd = self->nsfds[i];
+ else
+ nsfd = self->child_nsfds1[i];
+ ASSERT_EQ(in_same_namespace(nsfd, pid, info->name), 1) {
+ TH_LOG("setns failed to place us correctly into %s namespace of %d",
+ info->name, self->child_pid1);
+ }
+ TH_LOG("Managed to correctly setns to %s namespace of %d",
+ info->name, self->child_pid1);
+ }
+}
+
+TEST_F(current_nsset, no_foul_play)
+{
+ unsigned flags = 0;
+ int i;
+
+ for (i = 0; i < PIDFD_NS_MAX; i++) {
+ const struct ns_info *info = &ns_info[i];
+
+ if (self->child_nsfds1[i] < 0)
+ continue;
+
+ flags |= info->flag;
+ if (info->flag) /* No use logging pid_for_children. */
+ TH_LOG("Adding %s namespace of %d to list of namespaces to attach to",
+ info->name, self->child_pid1);
+ }
+
+ ASSERT_EQ(setns(self->child_pidfd1, flags), 0) {
+ TH_LOG("%m - Failed to setns to namespaces of %d vid pidfd %d",
+ self->child_pid1, self->child_pidfd1);
+ }
+
+ /*
+ * Can't setns to a user namespace outside of our hierarchy since we
+ * don't have caps in there and didn't create it. That means that under
+ * no circumstances should we be able to setns to any of the other
+ * ones since they aren't owned by our user namespace.
+ */
+ for (i = 0; i < PIDFD_NS_MAX; i++) {
+ const struct ns_info *info = &ns_info[i];
+
+ if (self->child_nsfds2[i] < 0 || !info->flag)
+ continue;
+
+ ASSERT_NE(setns(self->child_pidfd2, info->flag), 0) {
+ TH_LOG("Managed to setns to %s namespace of %d via pidfd %d",
+ info->name, self->child_pid2,
+ self->child_pidfd2);
+ }
+ TH_LOG("%m - Correctly failed to setns to %s namespace of %d via pidfd %d",
+ info->name, self->child_pid2,
+ self->child_pidfd2);
+
+ ASSERT_NE(setns(self->child_nsfds2[i], info->flag), 0) {
+ TH_LOG("Managed to setns to %s namespace of %d via nsfd %d",
+ info->name, self->child_pid2,
+ self->child_nsfds2[i]);
+ }
+ TH_LOG("%m - Correctly failed to setns to %s namespace of %d via nsfd %d",
+ info->name, self->child_pid2,
+ self->child_nsfds2[i]);
+ }
+}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/powerpc/Makefile b/tools/testing/selftests/powerpc/Makefile
index 644770c3b754..0830e63818c1 100644
--- a/tools/testing/selftests/powerpc/Makefile
+++ b/tools/testing/selftests/powerpc/Makefile
@@ -19,6 +19,7 @@ SUB_DIRS = alignment \
copyloops \
dscr \
mm \
+ nx-gzip \
pmu \
signal \
primitives \
diff --git a/tools/testing/selftests/powerpc/alignment/.gitignore b/tools/testing/selftests/powerpc/alignment/.gitignore
index 6d4fd014511c..28bc6ca13cc6 100644
--- a/tools/testing/selftests/powerpc/alignment/.gitignore
+++ b/tools/testing/selftests/powerpc/alignment/.gitignore
@@ -1,2 +1,3 @@
+# SPDX-License-Identifier: GPL-2.0-only
copy_first_unaligned
alignment_handler
diff --git a/tools/testing/selftests/powerpc/benchmarks/.gitignore b/tools/testing/selftests/powerpc/benchmarks/.gitignore
index 9161679b1e1a..c9ce13983c99 100644
--- a/tools/testing/selftests/powerpc/benchmarks/.gitignore
+++ b/tools/testing/selftests/powerpc/benchmarks/.gitignore
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
gettimeofday
context_switch
fork
diff --git a/tools/testing/selftests/powerpc/benchmarks/Makefile b/tools/testing/selftests/powerpc/benchmarks/Makefile
index d40300a65b42..a32a6ab89914 100644
--- a/tools/testing/selftests/powerpc/benchmarks/Makefile
+++ b/tools/testing/selftests/powerpc/benchmarks/Makefile
@@ -2,6 +2,8 @@
TEST_GEN_PROGS := gettimeofday context_switch fork mmap_bench futex_bench null_syscall
TEST_GEN_FILES := exec_target
+TEST_FILES := settings
+
CFLAGS += -O2
top_srcdir = ../../../../..
diff --git a/tools/testing/selftests/powerpc/benchmarks/settings b/tools/testing/selftests/powerpc/benchmarks/settings
new file mode 100644
index 000000000000..e7b9417537fb
--- /dev/null
+++ b/tools/testing/selftests/powerpc/benchmarks/settings
@@ -0,0 +1 @@
+timeout=0
diff --git a/tools/testing/selftests/powerpc/cache_shape/.gitignore b/tools/testing/selftests/powerpc/cache_shape/.gitignore
index ec1848434be5..b385eee3012c 100644
--- a/tools/testing/selftests/powerpc/cache_shape/.gitignore
+++ b/tools/testing/selftests/powerpc/cache_shape/.gitignore
@@ -1 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0-only
cache_shape
diff --git a/tools/testing/selftests/powerpc/copyloops/.gitignore b/tools/testing/selftests/powerpc/copyloops/.gitignore
index 12ef5b031974..ddaf140b8255 100644
--- a/tools/testing/selftests/powerpc/copyloops/.gitignore
+++ b/tools/testing/selftests/powerpc/copyloops/.gitignore
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
copyuser_64_t0
copyuser_64_t1
copyuser_64_t2
diff --git a/tools/testing/selftests/powerpc/dscr/.gitignore b/tools/testing/selftests/powerpc/dscr/.gitignore
index b585c6c1564a..1d08b15af697 100644
--- a/tools/testing/selftests/powerpc/dscr/.gitignore
+++ b/tools/testing/selftests/powerpc/dscr/.gitignore
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
dscr_default_test
dscr_explicit_test
dscr_inherit_exec_test
diff --git a/tools/testing/selftests/powerpc/dscr/Makefile b/tools/testing/selftests/powerpc/dscr/Makefile
index 5df476364b4d..cfa6eedcb66c 100644
--- a/tools/testing/selftests/powerpc/dscr/Makefile
+++ b/tools/testing/selftests/powerpc/dscr/Makefile
@@ -3,6 +3,8 @@ TEST_GEN_PROGS := dscr_default_test dscr_explicit_test dscr_user_test \
dscr_inherit_test dscr_inherit_exec_test dscr_sysfs_test \
dscr_sysfs_thread_test
+TEST_FILES := settings
+
top_srcdir = ../../../../..
include ../../lib.mk
diff --git a/tools/testing/selftests/powerpc/dscr/settings b/tools/testing/selftests/powerpc/dscr/settings
new file mode 100644
index 000000000000..e7b9417537fb
--- /dev/null
+++ b/tools/testing/selftests/powerpc/dscr/settings
@@ -0,0 +1 @@
+timeout=0
diff --git a/tools/testing/selftests/powerpc/eeh/eeh-basic.sh b/tools/testing/selftests/powerpc/eeh/eeh-basic.sh
index f988d2f42e8f..8a8d0f456946 100755
--- a/tools/testing/selftests/powerpc/eeh/eeh-basic.sh
+++ b/tools/testing/selftests/powerpc/eeh/eeh-basic.sh
@@ -41,6 +41,11 @@ for dev in `ls -1 /sys/bus/pci/devices/ | grep '\.0$'` ; do
continue;
fi
+ if [ "ahci" = "$(basename $(realpath /sys/bus/pci/devices/$dev/driver))" ] ; then
+ echo "$dev, Skipped: ahci doesn't support recovery"
+ continue
+ fi
+
# Don't inject errosr into an already-frozen PE. This happens with
# PEs that contain multiple PCI devices (e.g. multi-function cards)
# and injecting new errors during the recovery process will probably
diff --git a/tools/testing/selftests/powerpc/math/.gitignore b/tools/testing/selftests/powerpc/math/.gitignore
index 50ded63e25b7..e31ca6f453ed 100644
--- a/tools/testing/selftests/powerpc/math/.gitignore
+++ b/tools/testing/selftests/powerpc/math/.gitignore
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
fpu_syscall
vmx_syscall
fpu_preempt
diff --git a/tools/testing/selftests/powerpc/mm/.gitignore b/tools/testing/selftests/powerpc/mm/.gitignore
index 0ebeaea22641..2ca523255b1b 100644
--- a/tools/testing/selftests/powerpc/mm/.gitignore
+++ b/tools/testing/selftests/powerpc/mm/.gitignore
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
hugetlb_vs_thp_test
subpage_prot
tempfile
@@ -6,3 +7,4 @@ segv_errors
wild_bctr
large_vm_fork_separation
bad_accesses
+tlbie_test
diff --git a/tools/testing/selftests/powerpc/nx-gzip/99-nx-gzip.rules b/tools/testing/selftests/powerpc/nx-gzip/99-nx-gzip.rules
new file mode 100644
index 000000000000..5a7118495cb3
--- /dev/null
+++ b/tools/testing/selftests/powerpc/nx-gzip/99-nx-gzip.rules
@@ -0,0 +1 @@
+SUBSYSTEM=="nxgzip", KERNEL=="nx-gzip", MODE="0666"
diff --git a/tools/testing/selftests/powerpc/nx-gzip/Makefile b/tools/testing/selftests/powerpc/nx-gzip/Makefile
new file mode 100644
index 000000000000..640fad6cc2c7
--- /dev/null
+++ b/tools/testing/selftests/powerpc/nx-gzip/Makefile
@@ -0,0 +1,8 @@
+CFLAGS = -O3 -m64 -I./include
+
+TEST_GEN_FILES := gzfht_test gunz_test
+TEST_PROGS := nx-gzip-test.sh
+
+include ../../lib.mk
+
+$(TEST_GEN_FILES): gzip_vas.c
diff --git a/tools/testing/selftests/powerpc/nx-gzip/README b/tools/testing/selftests/powerpc/nx-gzip/README
new file mode 100644
index 000000000000..9809dbaa1905
--- /dev/null
+++ b/tools/testing/selftests/powerpc/nx-gzip/README
@@ -0,0 +1,45 @@
+Test the nx-gzip function:
+=========================
+
+Verify that following device exists:
+ /dev/crypto/nx-gzip
+If you get a permission error run as sudo or set the device permissions:
+ sudo chmod go+rw /dev/crypto/nx-gzip
+However, chmod may not survive across boots. You may create a udev file such
+as:
+ /etc/udev/rules.d/99-nx-gzip.rules
+
+
+To manually build and run:
+$ gcc -O3 -I./include -o gzfht_test gzfht_test.c gzip_vas.c
+$ gcc -O3 -I./include -o gunz_test gunz_test.c gzip_vas.c
+
+
+Compress any file using Fixed Huffman mode. Output will have a .nx.gz suffix:
+$ ./gzfht_test gzip_vas.c
+file gzip_vas.c read, 6413 bytes
+compressed 6413 to 3124 bytes total, crc32 checksum = abd15e8a
+
+
+Uncompress the previous output. Output will have a .nx.gunzip suffix:
+./gunz_test gzip_vas.c.nx.gz
+gzHeader FLG 0
+00 00 00 00 04 03
+gzHeader MTIME, XFL, OS ignored
+computed checksum abd15e8a isize 0000190d
+stored checksum abd15e8a isize 0000190d
+decomp is complete: fclose
+
+
+Compare two files:
+$ sha1sum gzip_vas.c.nx.gz.nx.gunzip gzip_vas.c
+bf43e3c0c3651f5f22b6f9784cd9b1eeab4120b6 gzip_vas.c.nx.gz.nx.gunzip
+bf43e3c0c3651f5f22b6f9784cd9b1eeab4120b6 gzip_vas.c
+
+
+Note that the code here are intended for testing the nx-gzip hardware function.
+They are not intended for demonstrating performance or compression ratio.
+By being simplistic these selftests expect to allocate the entire set of source
+and target pages in the memory so it needs enough memory to work.
+For more information and source code consider using:
+https://github.com/libnxz/power-gzip
diff --git a/tools/testing/selftests/powerpc/nx-gzip/gunz_test.c b/tools/testing/selftests/powerpc/nx-gzip/gunz_test.c
new file mode 100644
index 000000000000..6ee0fded0391
--- /dev/null
+++ b/tools/testing/selftests/powerpc/nx-gzip/gunz_test.c
@@ -0,0 +1,1028 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+/* P9 gunzip sample code for demonstrating the P9 NX hardware
+ * interface. Not intended for productive uses or for performance or
+ * compression ratio measurements. Note also that /dev/crypto/gzip,
+ * VAS and skiboot support are required
+ *
+ * Copyright 2020 IBM Corp.
+ *
+ * Author: Bulent Abali <abali@us.ibm.com>
+ *
+ * https://github.com/libnxz/power-gzip for zlib api and other utils
+ * Definitions of acronyms used here. See
+ * P9 NX Gzip Accelerator User's Manual for details:
+ * https://github.com/libnxz/power-gzip/blob/develop/doc/power_nx_gzip_um.pdf
+ *
+ * adler/crc: 32 bit checksums appended to stream tail
+ * ce: completion extension
+ * cpb: coprocessor parameter block (metadata)
+ * crb: coprocessor request block (command)
+ * csb: coprocessor status block (status)
+ * dht: dynamic huffman table
+ * dde: data descriptor element (address, length)
+ * ddl: list of ddes
+ * dh/fh: dynamic and fixed huffman types
+ * fc: coprocessor function code
+ * histlen: history/dictionary length
+ * history: sliding window of up to 32KB of data
+ * lzcount: Deflate LZ symbol counts
+ * rembytecnt: remaining byte count
+ * sfbt: source final block type; last block's type during decomp
+ * spbc: source processed byte count
+ * subc: source unprocessed bit count
+ * tebc: target ending bit count; valid bits in the last byte
+ * tpbc: target processed byte count
+ * vas: virtual accelerator switch; the user mode interface
+ */
+
+#define _ISOC11_SOURCE // For aligned_alloc()
+#define _DEFAULT_SOURCE // For endian.h
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <stdint.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+#include <sys/fcntl.h>
+#include <sys/mman.h>
+#include <endian.h>
+#include <bits/endian.h>
+#include <sys/ioctl.h>
+#include <assert.h>
+#include <errno.h>
+#include <signal.h>
+#include "nxu.h"
+#include "nx.h"
+#include "crb.h"
+
+int nx_dbg;
+FILE *nx_gzip_log;
+
+#define NX_MIN(X, Y) (((X) < (Y))?(X):(Y))
+#define NX_MAX(X, Y) (((X) > (Y))?(X):(Y))
+
+#define GETINPC(X) fgetc(X)
+#define FNAME_MAX 1024
+
+/* fifo queue management */
+#define fifo_used_bytes(used) (used)
+#define fifo_free_bytes(used, len) ((len)-(used))
+/* amount of free bytes in the first and last parts */
+#define fifo_free_first_bytes(cur, used, len) ((((cur)+(used)) <= (len)) \
+ ? (len)-((cur)+(used)) : 0)
+#define fifo_free_last_bytes(cur, used, len) ((((cur)+(used)) <= (len)) \
+ ? (cur) : (len)-(used))
+/* amount of used bytes in the first and last parts */
+#define fifo_used_first_bytes(cur, used, len) ((((cur)+(used)) <= (len)) \
+ ? (used) : (len)-(cur))
+#define fifo_used_last_bytes(cur, used, len) ((((cur)+(used)) <= (len)) \
+ ? 0 : ((used)+(cur))-(len))
+/* first and last free parts start here */
+#define fifo_free_first_offset(cur, used) ((cur)+(used))
+#define fifo_free_last_offset(cur, used, len) \
+ fifo_used_last_bytes(cur, used, len)
+/* first and last used parts start here */
+#define fifo_used_first_offset(cur) (cur)
+#define fifo_used_last_offset(cur) (0)
+
+const int fifo_in_len = 1<<24;
+const int fifo_out_len = 1<<24;
+const int page_sz = 1<<16;
+const int line_sz = 1<<7;
+const int window_max = 1<<15;
+
+/*
+ * Adds an (address, len) pair to the list of ddes (ddl) and updates
+ * the base dde. ddl[0] is the only dde in a direct dde which
+ * contains a single (addr,len) pair. For more pairs, ddl[0] becomes
+ * the indirect (base) dde that points to a list of direct ddes.
+ * See Section 6.4 of the NX-gzip user manual for DDE description.
+ * Addr=NULL, len=0 clears the ddl[0]. Returns the total number of
+ * bytes in ddl. Caller is responsible for allocting the array of
+ * nx_dde_t *ddl. If N addresses are required in the scatter-gather
+ * list, the ddl array must have N+1 entries minimum.
+ */
+static inline uint32_t nx_append_dde(struct nx_dde_t *ddl, void *addr,
+ uint32_t len)
+{
+ uint32_t ddecnt;
+ uint32_t bytes;
+
+ if (addr == NULL && len == 0) {
+ clearp_dde(ddl);
+ return 0;
+ }
+
+ NXPRT(fprintf(stderr, "%d: %s addr %p len %x\n", __LINE__, addr,
+ __func__, len));
+
+ /* Number of ddes in the dde list ; == 0 when it is a direct dde */
+ ddecnt = getpnn(ddl, dde_count);
+ bytes = getp32(ddl, ddebc);
+
+ if (ddecnt == 0 && bytes == 0) {
+ /* First dde is unused; make it a direct dde */
+ bytes = len;
+ putp32(ddl, ddebc, bytes);
+ putp64(ddl, ddead, (uint64_t) addr);
+ } else if (ddecnt == 0) {
+ /* Converting direct to indirect dde
+ * ddl[0] becomes head dde of ddl
+ * copy direct to indirect first.
+ */
+ ddl[1] = ddl[0];
+
+ /* Add the new dde next */
+ clear_dde(ddl[2]);
+ put32(ddl[2], ddebc, len);
+ put64(ddl[2], ddead, (uint64_t) addr);
+
+ /* Ddl head points to 2 direct ddes */
+ ddecnt = 2;
+ putpnn(ddl, dde_count, ddecnt);
+ bytes = bytes + len;
+ putp32(ddl, ddebc, bytes);
+ /* Pointer to the first direct dde */
+ putp64(ddl, ddead, (uint64_t) &ddl[1]);
+ } else {
+ /* Append a dde to an existing indirect ddl */
+ ++ddecnt;
+ clear_dde(ddl[ddecnt]);
+ put64(ddl[ddecnt], ddead, (uint64_t) addr);
+ put32(ddl[ddecnt], ddebc, len);
+
+ putpnn(ddl, dde_count, ddecnt);
+ bytes = bytes + len;
+ putp32(ddl, ddebc, bytes); /* byte sum of all dde */
+ }
+ return bytes;
+}
+
+/*
+ * Touch specified number of pages represented in number bytes
+ * beginning from the first buffer in a dde list.
+ * Do not touch the pages past buf_sz-th byte's page.
+ *
+ * Set buf_sz = 0 to touch all pages described by the ddep.
+ */
+static int nx_touch_pages_dde(struct nx_dde_t *ddep, long buf_sz, long page_sz,
+ int wr)
+{
+ uint32_t indirect_count;
+ uint32_t buf_len;
+ long total;
+ uint64_t buf_addr;
+ struct nx_dde_t *dde_list;
+ int i;
+
+ assert(!!ddep);
+
+ indirect_count = getpnn(ddep, dde_count);
+
+ NXPRT(fprintf(stderr, "%s dde_count %d request len ", __func__,
+ indirect_count));
+ NXPRT(fprintf(stderr, "0x%lx\n", buf_sz));
+
+ if (indirect_count == 0) {
+ /* Direct dde */
+ buf_len = getp32(ddep, ddebc);
+ buf_addr = getp64(ddep, ddead);
+
+ NXPRT(fprintf(stderr, "touch direct ddebc 0x%x ddead %p\n",
+ buf_len, (void *)buf_addr));
+
+ if (buf_sz == 0)
+ nxu_touch_pages((void *)buf_addr, buf_len, page_sz, wr);
+ else
+ nxu_touch_pages((void *)buf_addr, NX_MIN(buf_len,
+ buf_sz), page_sz, wr);
+
+ return ERR_NX_OK;
+ }
+
+ /* Indirect dde */
+ if (indirect_count > MAX_DDE_COUNT)
+ return ERR_NX_EXCESSIVE_DDE;
+
+ /* First address of the list */
+ dde_list = (struct nx_dde_t *) getp64(ddep, ddead);
+
+ if (buf_sz == 0)
+ buf_sz = getp32(ddep, ddebc);
+
+ total = 0;
+ for (i = 0; i < indirect_count; i++) {
+ buf_len = get32(dde_list[i], ddebc);
+ buf_addr = get64(dde_list[i], ddead);
+ total += buf_len;
+
+ NXPRT(fprintf(stderr, "touch loop len 0x%x ddead %p total ",
+ buf_len, (void *)buf_addr));
+ NXPRT(fprintf(stderr, "0x%lx\n", total));
+
+ /* Touching fewer pages than encoded in the ddebc */
+ if (total > buf_sz) {
+ buf_len = NX_MIN(buf_len, total - buf_sz);
+ nxu_touch_pages((void *)buf_addr, buf_len, page_sz, wr);
+ NXPRT(fprintf(stderr, "touch loop break len 0x%x ",
+ buf_len));
+ NXPRT(fprintf(stderr, "ddead %p\n", (void *)buf_addr));
+ break;
+ }
+ nxu_touch_pages((void *)buf_addr, buf_len, page_sz, wr);
+ }
+ return ERR_NX_OK;
+}
+
+/*
+ * Src and dst buffers are supplied in scatter gather lists.
+ * NX function code and other parameters supplied in cmdp.
+ */
+static int nx_submit_job(struct nx_dde_t *src, struct nx_dde_t *dst,
+ struct nx_gzip_crb_cpb_t *cmdp, void *handle)
+{
+ uint64_t csbaddr;
+
+ memset((void *)&cmdp->crb.csb, 0, sizeof(cmdp->crb.csb));
+
+ cmdp->crb.source_dde = *src;
+ cmdp->crb.target_dde = *dst;
+
+ /* Status, output byte count in tpbc */
+ csbaddr = ((uint64_t) &cmdp->crb.csb) & csb_address_mask;
+ put64(cmdp->crb, csb_address, csbaddr);
+
+ /* NX reports input bytes in spbc; cleared */
+ cmdp->cpb.out_spbc_comp_wrap = 0;
+ cmdp->cpb.out_spbc_comp_with_count = 0;
+ cmdp->cpb.out_spbc_decomp = 0;
+
+ /* Clear output */
+ put32(cmdp->cpb, out_crc, INIT_CRC);
+ put32(cmdp->cpb, out_adler, INIT_ADLER);
+
+ /* Submit the crb, the job descriptor, to the accelerator. */
+ return nxu_submit_job(cmdp, handle);
+}
+
+int decompress_file(int argc, char **argv, void *devhandle)
+{
+ FILE *inpf = NULL;
+ FILE *outf = NULL;
+
+ int c, expect, i, cc, rc = 0;
+ char gzfname[FNAME_MAX];
+
+ /* Queuing, file ops, byte counting */
+ char *fifo_in, *fifo_out;
+ int used_in, cur_in, used_out, cur_out, read_sz, n;
+ int first_free, last_free, first_used, last_used;
+ int first_offset, last_offset;
+ int write_sz, free_space, source_sz;
+ int source_sz_estimate, target_sz_estimate;
+ uint64_t last_comp_ratio = 0; /* 1000 max */
+ uint64_t total_out = 0;
+ int is_final, is_eof;
+
+ /* nx hardware */
+ int sfbt, subc, spbc, tpbc, nx_ce, fc, resuming = 0;
+ int history_len = 0;
+ struct nx_gzip_crb_cpb_t cmd, *cmdp;
+ struct nx_dde_t *ddl_in;
+ struct nx_dde_t dde_in[6] __aligned(128);
+ struct nx_dde_t *ddl_out;
+ struct nx_dde_t dde_out[6] __aligned(128);
+ int pgfault_retries;
+
+ /* when using mmap'ed files */
+ off_t input_file_offset;
+
+ if (argc > 2) {
+ fprintf(stderr, "usage: %s <fname> or stdin\n", argv[0]);
+ fprintf(stderr, " writes to stdout or <fname>.nx.gunzip\n");
+ return -1;
+ }
+
+ if (argc == 1) {
+ inpf = stdin;
+ outf = stdout;
+ } else if (argc == 2) {
+ char w[1024];
+ char *wp;
+
+ inpf = fopen(argv[1], "r");
+ if (inpf == NULL) {
+ perror(argv[1]);
+ return -1;
+ }
+
+ /* Make a new file name to write to. Ignoring '.gz' */
+ wp = (NULL != (wp = strrchr(argv[1], '/'))) ? (wp+1) : argv[1];
+ strcpy(w, wp);
+ strcat(w, ".nx.gunzip");
+
+ outf = fopen(w, "w");
+ if (outf == NULL) {
+ perror(w);
+ return -1;
+ }
+ }
+
+ /* Decode the gzip header */
+ c = GETINPC(inpf); expect = 0x1f; /* ID1 */
+ if (c != expect)
+ goto err1;
+
+ c = GETINPC(inpf); expect = 0x8b; /* ID2 */
+ if (c != expect)
+ goto err1;
+
+ c = GETINPC(inpf); expect = 0x08; /* CM */
+ if (c != expect)
+ goto err1;
+
+ int flg = GETINPC(inpf); /* FLG */
+
+ if (flg & 0xE0 || flg & 0x4 || flg == EOF)
+ goto err2;
+
+ fprintf(stderr, "gzHeader FLG %x\n", flg);
+
+ /* Read 6 bytes; ignoring the MTIME, XFL, OS fields in this
+ * sample code.
+ */
+ for (i = 0; i < 6; i++) {
+ char tmp[10];
+
+ tmp[i] = GETINPC(inpf);
+ if (tmp[i] == EOF)
+ goto err3;
+ fprintf(stderr, "%02x ", tmp[i]);
+ if (i == 5)
+ fprintf(stderr, "\n");
+ }
+ fprintf(stderr, "gzHeader MTIME, XFL, OS ignored\n");
+
+ /* FNAME */
+ if (flg & 0x8) {
+ int k = 0;
+
+ do {
+ c = GETINPC(inpf);
+ if (c == EOF || k >= FNAME_MAX)
+ goto err3;
+ gzfname[k++] = c;
+ } while (c);
+ fprintf(stderr, "gzHeader FNAME: %s\n", gzfname);
+ }
+
+ /* FHCRC */
+ if (flg & 0x2) {
+ c = GETINPC(inpf);
+ if (c == EOF)
+ goto err3;
+ c = GETINPC(inpf);
+ if (c == EOF)
+ goto err3;
+ fprintf(stderr, "gzHeader FHCRC: ignored\n");
+ }
+
+ used_in = cur_in = used_out = cur_out = 0;
+ is_final = is_eof = 0;
+
+ /* Allocate one page larger to prevent page faults due to NX
+ * overfetching.
+ * Either do this (char*)(uintptr_t)aligned_alloc or use
+ * -std=c11 flag to make the int-to-pointer warning go away.
+ */
+ assert((fifo_in = (char *)(uintptr_t)aligned_alloc(line_sz,
+ fifo_in_len + page_sz)) != NULL);
+ assert((fifo_out = (char *)(uintptr_t)aligned_alloc(line_sz,
+ fifo_out_len + page_sz + line_sz)) != NULL);
+ /* Leave unused space due to history rounding rules */
+ fifo_out = fifo_out + line_sz;
+ nxu_touch_pages(fifo_out, fifo_out_len, page_sz, 1);
+
+ ddl_in = &dde_in[0];
+ ddl_out = &dde_out[0];
+ cmdp = &cmd;
+ memset(&cmdp->crb, 0, sizeof(cmdp->crb));
+
+read_state:
+
+ /* Read from .gz file */
+
+ NXPRT(fprintf(stderr, "read_state:\n"));
+
+ if (is_eof != 0)
+ goto write_state;
+
+ /* We read in to fifo_in in two steps: first: read in to from
+ * cur_in to the end of the buffer. last: if free space wrapped
+ * around, read from fifo_in offset 0 to offset cur_in.
+ */
+
+ /* Reset fifo head to reduce unnecessary wrap arounds */
+ cur_in = (used_in == 0) ? 0 : cur_in;
+
+ /* Free space total is reduced by a gap */
+ free_space = NX_MAX(0, fifo_free_bytes(used_in, fifo_in_len)
+ - line_sz);
+
+ /* Free space may wrap around as first and last */
+ first_free = fifo_free_first_bytes(cur_in, used_in, fifo_in_len);
+ last_free = fifo_free_last_bytes(cur_in, used_in, fifo_in_len);
+
+ /* Start offsets of the free memory */
+ first_offset = fifo_free_first_offset(cur_in, used_in);
+ last_offset = fifo_free_last_offset(cur_in, used_in, fifo_in_len);
+
+ /* Reduce read_sz because of the line_sz gap */
+ read_sz = NX_MIN(free_space, first_free);
+ n = 0;
+ if (read_sz > 0) {
+ /* Read in to offset cur_in + used_in */
+ n = fread(fifo_in + first_offset, 1, read_sz, inpf);
+ used_in = used_in + n;
+ free_space = free_space - n;
+ assert(n <= read_sz);
+ if (n != read_sz) {
+ /* Either EOF or error; exit the read loop */
+ is_eof = 1;
+ goto write_state;
+ }
+ }
+
+ /* If free space wrapped around */
+ if (last_free > 0) {
+ /* Reduce read_sz because of the line_sz gap */
+ read_sz = NX_MIN(free_space, last_free);
+ n = 0;
+ if (read_sz > 0) {
+ n = fread(fifo_in + last_offset, 1, read_sz, inpf);
+ used_in = used_in + n; /* Increase used space */
+ free_space = free_space - n; /* Decrease free space */
+ assert(n <= read_sz);
+ if (n != read_sz) {
+ /* Either EOF or error; exit the read loop */
+ is_eof = 1;
+ goto write_state;
+ }
+ }
+ }
+
+ /* At this point we have used_in bytes in fifo_in with the
+ * data head starting at cur_in and possibly wrapping around.
+ */
+
+write_state:
+
+ /* Write decompressed data to output file */
+
+ NXPRT(fprintf(stderr, "write_state:\n"));
+
+ if (used_out == 0)
+ goto decomp_state;
+
+ /* If fifo_out has data waiting, write it out to the file to
+ * make free target space for the accelerator used bytes in
+ * the first and last parts of fifo_out.
+ */
+
+ first_used = fifo_used_first_bytes(cur_out, used_out, fifo_out_len);
+ last_used = fifo_used_last_bytes(cur_out, used_out, fifo_out_len);
+
+ write_sz = first_used;
+
+ n = 0;
+ if (write_sz > 0) {
+ n = fwrite(fifo_out + cur_out, 1, write_sz, outf);
+ used_out = used_out - n;
+ /* Move head of the fifo */
+ cur_out = (cur_out + n) % fifo_out_len;
+ assert(n <= write_sz);
+ if (n != write_sz) {
+ fprintf(stderr, "error: write\n");
+ rc = -1;
+ goto err5;
+ }
+ }
+
+ if (last_used > 0) { /* If more data available in the last part */
+ write_sz = last_used; /* Keep it here for later */
+ n = 0;
+ if (write_sz > 0) {
+ n = fwrite(fifo_out, 1, write_sz, outf);
+ used_out = used_out - n;
+ cur_out = (cur_out + n) % fifo_out_len;
+ assert(n <= write_sz);
+ if (n != write_sz) {
+ fprintf(stderr, "error: write\n");
+ rc = -1;
+ goto err5;
+ }
+ }
+ }
+
+decomp_state:
+
+ /* NX decompresses input data */
+
+ NXPRT(fprintf(stderr, "decomp_state:\n"));
+
+ if (is_final)
+ goto finish_state;
+
+ /* Address/len lists */
+ clearp_dde(ddl_in);
+ clearp_dde(ddl_out);
+
+ /* FC, CRC, HistLen, Table 6-6 */
+ if (resuming) {
+ /* Resuming a partially decompressed input.
+ * The key to resume is supplying the 32KB
+ * dictionary (history) to NX, which is basically
+ * the last 32KB of output produced.
+ */
+ fc = GZIP_FC_DECOMPRESS_RESUME;
+
+ cmdp->cpb.in_crc = cmdp->cpb.out_crc;
+ cmdp->cpb.in_adler = cmdp->cpb.out_adler;
+
+ /* Round up the history size to quadword. Section 2.10 */
+ history_len = (history_len + 15) / 16;
+ putnn(cmdp->cpb, in_histlen, history_len);
+ history_len = history_len * 16; /* bytes */
+
+ if (history_len > 0) {
+ /* Chain in the history buffer to the DDE list */
+ if (cur_out >= history_len) {
+ nx_append_dde(ddl_in, fifo_out
+ + (cur_out - history_len),
+ history_len);
+ } else {
+ nx_append_dde(ddl_in, fifo_out
+ + ((fifo_out_len + cur_out)
+ - history_len),
+ history_len - cur_out);
+ /* Up to 32KB history wraps around fifo_out */
+ nx_append_dde(ddl_in, fifo_out, cur_out);
+ }
+
+ }
+ } else {
+ /* First decompress job */
+ fc = GZIP_FC_DECOMPRESS;
+
+ history_len = 0;
+ /* Writing 0 clears out subc as well */
+ cmdp->cpb.in_histlen = 0;
+ total_out = 0;
+
+ put32(cmdp->cpb, in_crc, INIT_CRC);
+ put32(cmdp->cpb, in_adler, INIT_ADLER);
+ put32(cmdp->cpb, out_crc, INIT_CRC);
+ put32(cmdp->cpb, out_adler, INIT_ADLER);
+
+ /* Assuming 10% compression ratio initially; use the
+ * most recently measured compression ratio as a
+ * heuristic to estimate the input and output
+ * sizes. If we give too much input, the target buffer
+ * overflows and NX cycles are wasted, and then we
+ * must retry with smaller input size. 1000 is 100%.
+ */
+ last_comp_ratio = 100UL;
+ }
+ cmdp->crb.gzip_fc = 0;
+ putnn(cmdp->crb, gzip_fc, fc);
+
+ /*
+ * NX source buffers
+ */
+ first_used = fifo_used_first_bytes(cur_in, used_in, fifo_in_len);
+ last_used = fifo_used_last_bytes(cur_in, used_in, fifo_in_len);
+
+ if (first_used > 0)
+ nx_append_dde(ddl_in, fifo_in + cur_in, first_used);
+
+ if (last_used > 0)
+ nx_append_dde(ddl_in, fifo_in, last_used);
+
+ /*
+ * NX target buffers
+ */
+ first_free = fifo_free_first_bytes(cur_out, used_out, fifo_out_len);
+ last_free = fifo_free_last_bytes(cur_out, used_out, fifo_out_len);
+
+ /* Reduce output free space amount not to overwrite the history */
+ int target_max = NX_MAX(0, fifo_free_bytes(used_out, fifo_out_len)
+ - (1<<16));
+
+ NXPRT(fprintf(stderr, "target_max %d (0x%x)\n", target_max,
+ target_max));
+
+ first_free = NX_MIN(target_max, first_free);
+ if (first_free > 0) {
+ first_offset = fifo_free_first_offset(cur_out, used_out);
+ nx_append_dde(ddl_out, fifo_out + first_offset, first_free);
+ }
+
+ if (last_free > 0) {
+ last_free = NX_MIN(target_max - first_free, last_free);
+ if (last_free > 0) {
+ last_offset = fifo_free_last_offset(cur_out, used_out,
+ fifo_out_len);
+ nx_append_dde(ddl_out, fifo_out + last_offset,
+ last_free);
+ }
+ }
+
+ /* Target buffer size is used to limit the source data size
+ * based on previous measurements of compression ratio.
+ */
+
+ /* source_sz includes history */
+ source_sz = getp32(ddl_in, ddebc);
+ assert(source_sz > history_len);
+ source_sz = source_sz - history_len;
+
+ /* Estimating how much source is needed to 3/4 fill a
+ * target_max size target buffer. If we overshoot, then NX
+ * must repeat the job with smaller input and we waste
+ * bandwidth. If we undershoot then we use more NX calls than
+ * necessary.
+ */
+
+ source_sz_estimate = ((uint64_t)target_max * last_comp_ratio * 3UL)
+ / 4000;
+
+ if (source_sz_estimate < source_sz) {
+ /* Target might be small, therefore limiting the
+ * source data.
+ */
+ source_sz = source_sz_estimate;
+ target_sz_estimate = target_max;
+ } else {
+ /* Source file might be small, therefore limiting target
+ * touch pages to a smaller value to save processor cycles.
+ */
+ target_sz_estimate = ((uint64_t)source_sz * 1000UL)
+ / (last_comp_ratio + 1);
+ target_sz_estimate = NX_MIN(2 * target_sz_estimate,
+ target_max);
+ }
+
+ source_sz = source_sz + history_len;
+
+ /* Some NX condition codes require submitting the NX job again.
+ * Kernel doesn't handle NX page faults. Expects user code to
+ * touch pages.
+ */
+ pgfault_retries = NX_MAX_FAULTS;
+
+restart_nx:
+
+ putp32(ddl_in, ddebc, source_sz);
+
+ /* Fault in pages */
+ nxu_touch_pages(cmdp, sizeof(struct nx_gzip_crb_cpb_t), page_sz, 1);
+ nx_touch_pages_dde(ddl_in, 0, page_sz, 0);
+ nx_touch_pages_dde(ddl_out, target_sz_estimate, page_sz, 1);
+
+ /* Send job to NX */
+ cc = nx_submit_job(ddl_in, ddl_out, cmdp, devhandle);
+
+ switch (cc) {
+
+ case ERR_NX_TRANSLATION:
+
+ /* We touched the pages ahead of time. In the most common case
+ * we shouldn't be here. But may be some pages were paged out.
+ * Kernel should have placed the faulting address to fsaddr.
+ */
+ NXPRT(fprintf(stderr, "ERR_NX_TRANSLATION %p\n",
+ (void *)cmdp->crb.csb.fsaddr));
+
+ if (pgfault_retries == NX_MAX_FAULTS) {
+ /* Try once with exact number of pages */
+ --pgfault_retries;
+ goto restart_nx;
+ } else if (pgfault_retries > 0) {
+ /* If still faulting try fewer input pages
+ * assuming memory outage
+ */
+ if (source_sz > page_sz)
+ source_sz = NX_MAX(source_sz / 2, page_sz);
+ --pgfault_retries;
+ goto restart_nx;
+ } else {
+ fprintf(stderr, "cannot make progress; too many ");
+ fprintf(stderr, "page fault retries cc= %d\n", cc);
+ rc = -1;
+ goto err5;
+ }
+
+ case ERR_NX_DATA_LENGTH:
+
+ NXPRT(fprintf(stderr, "ERR_NX_DATA_LENGTH; "));
+ NXPRT(fprintf(stderr, "stream may have trailing data\n"));
+
+ /* Not an error in the most common case; it just says
+ * there is trailing data that we must examine.
+ *
+ * CC=3 CE(1)=0 CE(0)=1 indicates partial completion
+ * Fig.6-7 and Table 6-8.
+ */
+ nx_ce = get_csb_ce_ms3b(cmdp->crb.csb);
+
+ if (!csb_ce_termination(nx_ce) &&
+ csb_ce_partial_completion(nx_ce)) {
+ /* Check CPB for more information
+ * spbc and tpbc are valid
+ */
+ sfbt = getnn(cmdp->cpb, out_sfbt); /* Table 6-4 */
+ subc = getnn(cmdp->cpb, out_subc); /* Table 6-4 */
+ spbc = get32(cmdp->cpb, out_spbc_decomp);
+ tpbc = get32(cmdp->crb.csb, tpbc);
+ assert(target_max >= tpbc);
+
+ goto ok_cc3; /* not an error */
+ } else {
+ /* History length error when CE(1)=1 CE(0)=0. */
+ rc = -1;
+ fprintf(stderr, "history length error cc= %d\n", cc);
+ goto err5;
+ }
+
+ case ERR_NX_TARGET_SPACE:
+
+ /* Target buffer not large enough; retry smaller input
+ * data; give at least 1 byte. SPBC/TPBC are not valid.
+ */
+ assert(source_sz > history_len);
+ source_sz = ((source_sz - history_len + 2) / 2) + history_len;
+ NXPRT(fprintf(stderr, "ERR_NX_TARGET_SPACE; retry with "));
+ NXPRT(fprintf(stderr, "smaller input data src %d hist %d\n",
+ source_sz, history_len));
+ goto restart_nx;
+
+ case ERR_NX_OK:
+
+ /* This should not happen for gzip formatted data;
+ * we need trailing crc and isize
+ */
+ fprintf(stderr, "ERR_NX_OK\n");
+ spbc = get32(cmdp->cpb, out_spbc_decomp);
+ tpbc = get32(cmdp->crb.csb, tpbc);
+ assert(target_max >= tpbc);
+ assert(spbc >= history_len);
+ source_sz = spbc - history_len;
+ goto offsets_state;
+
+ default:
+ fprintf(stderr, "error: cc= %d\n", cc);
+ rc = -1;
+ goto err5;
+ }
+
+ok_cc3:
+
+ NXPRT(fprintf(stderr, "cc3: sfbt: %x\n", sfbt));
+
+ assert(spbc > history_len);
+ source_sz = spbc - history_len;
+
+ /* Table 6-4: Source Final Block Type (SFBT) describes the
+ * last processed deflate block and clues the software how to
+ * resume the next job. SUBC indicates how many input bits NX
+ * consumed but did not process. SPBC indicates how many
+ * bytes of source were given to the accelerator including
+ * history bytes.
+ */
+
+ switch (sfbt) {
+ int dhtlen;
+
+ case 0x0: /* Deflate final EOB received */
+
+ /* Calculating the checksum start position. */
+
+ source_sz = source_sz - subc / 8;
+ is_final = 1;
+ break;
+
+ /* Resume decompression cases are below. Basically
+ * indicates where NX has suspended and how to resume
+ * the input stream.
+ */
+
+ case 0x8: /* Within a literal block; use rembytecount */
+ case 0x9: /* Within a literal block; use rembytecount; bfinal=1 */
+
+ /* Supply the partially processed source byte again */
+ source_sz = source_sz - ((subc + 7) / 8);
+
+ /* SUBC LS 3bits: number of bits in the first source byte need
+ * to be processed.
+ * 000 means all 8 bits; Table 6-3
+ * Clear subc, histlen, sfbt, rembytecnt, dhtlen
+ */
+ cmdp->cpb.in_subc = 0;
+ cmdp->cpb.in_sfbt = 0;
+ putnn(cmdp->cpb, in_subc, subc % 8);
+ putnn(cmdp->cpb, in_sfbt, sfbt);
+ putnn(cmdp->cpb, in_rembytecnt, getnn(cmdp->cpb,
+ out_rembytecnt));
+ break;
+
+ case 0xA: /* Within a FH block; */
+ case 0xB: /* Within a FH block; bfinal=1 */
+
+ source_sz = source_sz - ((subc + 7) / 8);
+
+ /* Clear subc, histlen, sfbt, rembytecnt, dhtlen */
+ cmdp->cpb.in_subc = 0;
+ cmdp->cpb.in_sfbt = 0;
+ putnn(cmdp->cpb, in_subc, subc % 8);
+ putnn(cmdp->cpb, in_sfbt, sfbt);
+ break;
+
+ case 0xC: /* Within a DH block; */
+ case 0xD: /* Within a DH block; bfinal=1 */
+
+ source_sz = source_sz - ((subc + 7) / 8);
+
+ /* Clear subc, histlen, sfbt, rembytecnt, dhtlen */
+ cmdp->cpb.in_subc = 0;
+ cmdp->cpb.in_sfbt = 0;
+ putnn(cmdp->cpb, in_subc, subc % 8);
+ putnn(cmdp->cpb, in_sfbt, sfbt);
+
+ dhtlen = getnn(cmdp->cpb, out_dhtlen);
+ putnn(cmdp->cpb, in_dhtlen, dhtlen);
+ assert(dhtlen >= 42);
+
+ /* Round up to a qword */
+ dhtlen = (dhtlen + 127) / 128;
+
+ while (dhtlen > 0) { /* Copy dht from cpb.out to cpb.in */
+ --dhtlen;
+ cmdp->cpb.in_dht[dhtlen] = cmdp->cpb.out_dht[dhtlen];
+ }
+ break;
+
+ case 0xE: /* Within a block header; bfinal=0; */
+ /* Also given if source data exactly ends (SUBC=0) with
+ * EOB code with BFINAL=0. Means the next byte will
+ * contain a block header.
+ */
+ case 0xF: /* within a block header with BFINAL=1. */
+
+ source_sz = source_sz - ((subc + 7) / 8);
+
+ /* Clear subc, histlen, sfbt, rembytecnt, dhtlen */
+ cmdp->cpb.in_subc = 0;
+ cmdp->cpb.in_sfbt = 0;
+ putnn(cmdp->cpb, in_subc, subc % 8);
+ putnn(cmdp->cpb, in_sfbt, sfbt);
+
+ /* Engine did not process any data */
+ if (is_eof && (source_sz == 0))
+ is_final = 1;
+ }
+
+offsets_state:
+
+ /* Adjust the source and target buffer offsets and lengths */
+
+ NXPRT(fprintf(stderr, "offsets_state:\n"));
+
+ /* Delete input data from fifo_in */
+ used_in = used_in - source_sz;
+ cur_in = (cur_in + source_sz) % fifo_in_len;
+ input_file_offset = input_file_offset + source_sz;
+
+ /* Add output data to fifo_out */
+ used_out = used_out + tpbc;
+
+ assert(used_out <= fifo_out_len);
+
+ total_out = total_out + tpbc;
+
+ /* Deflate history is 32KB max. No need to supply more
+ * than 32KB on a resume.
+ */
+ history_len = (total_out > window_max) ? window_max : total_out;
+
+ /* To estimate expected expansion in the next NX job; 500 means 50%.
+ * Deflate best case is around 1 to 1000.
+ */
+ last_comp_ratio = (1000UL * ((uint64_t)source_sz + 1))
+ / ((uint64_t)tpbc + 1);
+ last_comp_ratio = NX_MAX(NX_MIN(1000UL, last_comp_ratio), 1);
+ NXPRT(fprintf(stderr, "comp_ratio %ld source_sz %d spbc %d tpbc %d\n",
+ last_comp_ratio, source_sz, spbc, tpbc));
+
+ resuming = 1;
+
+finish_state:
+
+ NXPRT(fprintf(stderr, "finish_state:\n"));
+
+ if (is_final) {
+ if (used_out)
+ goto write_state; /* More data to write out */
+ else if (used_in < 8) {
+ /* Need at least 8 more bytes containing gzip crc
+ * and isize.
+ */
+ rc = -1;
+ goto err4;
+ } else {
+ /* Compare checksums and exit */
+ int i;
+ unsigned char tail[8];
+ uint32_t cksum, isize;
+
+ for (i = 0; i < 8; i++)
+ tail[i] = fifo_in[(cur_in + i) % fifo_in_len];
+ fprintf(stderr, "computed checksum %08x isize %08x\n",
+ cmdp->cpb.out_crc, (uint32_t) (total_out
+ % (1ULL<<32)));
+ cksum = ((uint32_t) tail[0] | (uint32_t) tail[1]<<8
+ | (uint32_t) tail[2]<<16
+ | (uint32_t) tail[3]<<24);
+ isize = ((uint32_t) tail[4] | (uint32_t) tail[5]<<8
+ | (uint32_t) tail[6]<<16
+ | (uint32_t) tail[7]<<24);
+ fprintf(stderr, "stored checksum %08x isize %08x\n",
+ cksum, isize);
+
+ if (cksum == cmdp->cpb.out_crc && isize == (uint32_t)
+ (total_out % (1ULL<<32))) {
+ rc = 0; goto ok1;
+ } else {
+ rc = -1; goto err4;
+ }
+ }
+ } else
+ goto read_state;
+
+ return -1;
+
+err1:
+ fprintf(stderr, "error: not a gzip file, expect %x, read %x\n",
+ expect, c);
+ return -1;
+
+err2:
+ fprintf(stderr, "error: the FLG byte is wrong or not being handled\n");
+ return -1;
+
+err3:
+ fprintf(stderr, "error: gzip header\n");
+ return -1;
+
+err4:
+ fprintf(stderr, "error: checksum missing or mismatch\n");
+
+err5:
+ok1:
+ fprintf(stderr, "decomp is complete: fclose\n");
+ fclose(outf);
+
+ return rc;
+}
+
+
+int main(int argc, char **argv)
+{
+ int rc;
+ struct sigaction act;
+ void *handle;
+
+ nx_dbg = 0;
+ nx_gzip_log = NULL;
+ act.sa_handler = 0;
+ act.sa_sigaction = nxu_sigsegv_handler;
+ act.sa_flags = SA_SIGINFO;
+ act.sa_restorer = 0;
+ sigemptyset(&act.sa_mask);
+ sigaction(SIGSEGV, &act, NULL);
+
+ handle = nx_function_begin(NX_FUNC_COMP_GZIP, 0);
+ if (!handle) {
+ fprintf(stderr, "Unable to init NX, errno %d\n", errno);
+ exit(-1);
+ }
+
+ rc = decompress_file(argc, argv, handle);
+
+ nx_function_end(handle);
+
+ return rc;
+}
diff --git a/tools/testing/selftests/powerpc/nx-gzip/gzfht_test.c b/tools/testing/selftests/powerpc/nx-gzip/gzfht_test.c
new file mode 100644
index 000000000000..7496a83f9c9d
--- /dev/null
+++ b/tools/testing/selftests/powerpc/nx-gzip/gzfht_test.c
@@ -0,0 +1,433 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+/* P9 gzip sample code for demonstrating the P9 NX hardware interface.
+ * Not intended for productive uses or for performance or compression
+ * ratio measurements. For simplicity of demonstration, this sample
+ * code compresses in to fixed Huffman blocks only (Deflate btype=1)
+ * and has very simple memory management. Dynamic Huffman blocks
+ * (Deflate btype=2) are more involved as detailed in the user guide.
+ * Note also that /dev/crypto/gzip, VAS and skiboot support are
+ * required.
+ *
+ * Copyright 2020 IBM Corp.
+ *
+ * https://github.com/libnxz/power-gzip for zlib api and other utils
+ *
+ * Author: Bulent Abali <abali@us.ibm.com>
+ *
+ * Definitions of acronyms used here. See
+ * P9 NX Gzip Accelerator User's Manual for details:
+ * https://github.com/libnxz/power-gzip/blob/develop/doc/power_nx_gzip_um.pdf
+ *
+ * adler/crc: 32 bit checksums appended to stream tail
+ * ce: completion extension
+ * cpb: coprocessor parameter block (metadata)
+ * crb: coprocessor request block (command)
+ * csb: coprocessor status block (status)
+ * dht: dynamic huffman table
+ * dde: data descriptor element (address, length)
+ * ddl: list of ddes
+ * dh/fh: dynamic and fixed huffman types
+ * fc: coprocessor function code
+ * histlen: history/dictionary length
+ * history: sliding window of up to 32KB of data
+ * lzcount: Deflate LZ symbol counts
+ * rembytecnt: remaining byte count
+ * sfbt: source final block type; last block's type during decomp
+ * spbc: source processed byte count
+ * subc: source unprocessed bit count
+ * tebc: target ending bit count; valid bits in the last byte
+ * tpbc: target processed byte count
+ * vas: virtual accelerator switch; the user mode interface
+ */
+
+#define _ISOC11_SOURCE // For aligned_alloc()
+#define _DEFAULT_SOURCE // For endian.h
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <stdint.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+#include <sys/fcntl.h>
+#include <sys/mman.h>
+#include <endian.h>
+#include <bits/endian.h>
+#include <sys/ioctl.h>
+#include <assert.h>
+#include <errno.h>
+#include <signal.h>
+#include "nxu.h"
+#include "nx.h"
+
+int nx_dbg;
+FILE *nx_gzip_log;
+
+#define NX_MIN(X, Y) (((X) < (Y)) ? (X) : (Y))
+#define FNAME_MAX 1024
+#define FEXT ".nx.gz"
+
+/*
+ * LZ counts returned in the user supplied nx_gzip_crb_cpb_t structure.
+ */
+static int compress_fht_sample(char *src, uint32_t srclen, char *dst,
+ uint32_t dstlen, int with_count,
+ struct nx_gzip_crb_cpb_t *cmdp, void *handle)
+{
+ uint32_t fc;
+
+ assert(!!cmdp);
+
+ put32(cmdp->crb, gzip_fc, 0); /* clear */
+ fc = (with_count) ? GZIP_FC_COMPRESS_RESUME_FHT_COUNT :
+ GZIP_FC_COMPRESS_RESUME_FHT;
+ putnn(cmdp->crb, gzip_fc, fc);
+ putnn(cmdp->cpb, in_histlen, 0); /* resuming with no history */
+ memset((void *) &cmdp->crb.csb, 0, sizeof(cmdp->crb.csb));
+
+ /* Section 6.6 programming notes; spbc may be in two different
+ * places depending on FC.
+ */
+ if (!with_count)
+ put32(cmdp->cpb, out_spbc_comp, 0);
+ else
+ put32(cmdp->cpb, out_spbc_comp_with_count, 0);
+
+ /* Figure 6-3 6-4; CSB location */
+ put64(cmdp->crb, csb_address, 0);
+ put64(cmdp->crb, csb_address,
+ (uint64_t) &cmdp->crb.csb & csb_address_mask);
+
+ /* Source direct dde (scatter-gather list) */
+ clear_dde(cmdp->crb.source_dde);
+ putnn(cmdp->crb.source_dde, dde_count, 0);
+ put32(cmdp->crb.source_dde, ddebc, srclen);
+ put64(cmdp->crb.source_dde, ddead, (uint64_t) src);
+
+ /* Target direct dde (scatter-gather list) */
+ clear_dde(cmdp->crb.target_dde);
+ putnn(cmdp->crb.target_dde, dde_count, 0);
+ put32(cmdp->crb.target_dde, ddebc, dstlen);
+ put64(cmdp->crb.target_dde, ddead, (uint64_t) dst);
+
+ /* Submit the crb, the job descriptor, to the accelerator */
+ return nxu_submit_job(cmdp, handle);
+}
+
+/*
+ * Prepares a blank no filename no timestamp gzip header and returns
+ * the number of bytes written to buf.
+ * Gzip specification at https://tools.ietf.org/html/rfc1952
+ */
+int gzip_header_blank(char *buf)
+{
+ int i = 0;
+
+ buf[i++] = 0x1f; /* ID1 */
+ buf[i++] = 0x8b; /* ID2 */
+ buf[i++] = 0x08; /* CM */
+ buf[i++] = 0x00; /* FLG */
+ buf[i++] = 0x00; /* MTIME */
+ buf[i++] = 0x00; /* MTIME */
+ buf[i++] = 0x00; /* MTIME */
+ buf[i++] = 0x00; /* MTIME */
+ buf[i++] = 0x04; /* XFL 4=fastest */
+ buf[i++] = 0x03; /* OS UNIX */
+
+ return i;
+}
+
+/* Caller must free the allocated buffer return nonzero on error. */
+int read_alloc_input_file(char *fname, char **buf, size_t *bufsize)
+{
+ struct stat statbuf;
+ FILE *fp;
+ char *p;
+ size_t num_bytes;
+
+ if (stat(fname, &statbuf)) {
+ perror(fname);
+ return(-1);
+ }
+ fp = fopen(fname, "r");
+ if (fp == NULL) {
+ perror(fname);
+ return(-1);
+ }
+ assert(NULL != (p = (char *) malloc(statbuf.st_size)));
+ num_bytes = fread(p, 1, statbuf.st_size, fp);
+ if (ferror(fp) || (num_bytes != statbuf.st_size)) {
+ perror(fname);
+ return(-1);
+ }
+ *buf = p;
+ *bufsize = num_bytes;
+ return 0;
+}
+
+/* Returns nonzero on error */
+int write_output_file(char *fname, char *buf, size_t bufsize)
+{
+ FILE *fp;
+ size_t num_bytes;
+
+ fp = fopen(fname, "w");
+ if (fp == NULL) {
+ perror(fname);
+ return(-1);
+ }
+ num_bytes = fwrite(buf, 1, bufsize, fp);
+ if (ferror(fp) || (num_bytes != bufsize)) {
+ perror(fname);
+ return(-1);
+ }
+ fclose(fp);
+ return 0;
+}
+
+/*
+ * Z_SYNC_FLUSH as described in zlib.h.
+ * Returns number of appended bytes
+ */
+int append_sync_flush(char *buf, int tebc, int final)
+{
+ uint64_t flush;
+ int shift = (tebc & 0x7);
+
+ if (tebc > 0) {
+ /* Last byte is partially full */
+ buf = buf - 1;
+ *buf = *buf & (unsigned char) ((1<<tebc)-1);
+ } else
+ *buf = 0;
+ flush = ((0x1ULL & final) << shift) | *buf;
+ shift = shift + 3; /* BFINAL and BTYPE written */
+ shift = (shift <= 8) ? 8 : 16;
+ flush |= (0xFFFF0000ULL) << shift; /* Zero length block */
+ shift = shift + 32;
+ while (shift > 0) {
+ *buf++ = (unsigned char) (flush & 0xffULL);
+ flush = flush >> 8;
+ shift = shift - 8;
+ }
+ return(((tebc > 5) || (tebc == 0)) ? 5 : 4);
+}
+
+/*
+ * Final deflate block bit. This call assumes the block
+ * beginning is byte aligned.
+ */
+static void set_bfinal(void *buf, int bfinal)
+{
+ char *b = buf;
+
+ if (bfinal)
+ *b = *b | (unsigned char) 0x01;
+ else
+ *b = *b & (unsigned char) 0xfe;
+}
+
+int compress_file(int argc, char **argv, void *handle)
+{
+ char *inbuf, *outbuf, *srcbuf, *dstbuf;
+ char outname[FNAME_MAX];
+ uint32_t srclen, dstlen;
+ uint32_t flushlen, chunk;
+ size_t inlen, outlen, dsttotlen, srctotlen;
+ uint32_t crc, spbc, tpbc, tebc;
+ int lzcounts = 0;
+ int cc;
+ int num_hdr_bytes;
+ struct nx_gzip_crb_cpb_t *cmdp;
+ uint32_t pagelen = 65536;
+ int fault_tries = NX_MAX_FAULTS;
+
+ cmdp = (void *)(uintptr_t)
+ aligned_alloc(sizeof(struct nx_gzip_crb_cpb_t),
+ sizeof(struct nx_gzip_crb_cpb_t));
+
+ if (argc != 2) {
+ fprintf(stderr, "usage: %s <fname>\n", argv[0]);
+ exit(-1);
+ }
+ if (read_alloc_input_file(argv[1], &inbuf, &inlen))
+ exit(-1);
+ fprintf(stderr, "file %s read, %ld bytes\n", argv[1], inlen);
+
+ /* Generous output buffer for header/trailer */
+ outlen = 2 * inlen + 1024;
+
+ assert(NULL != (outbuf = (char *)malloc(outlen)));
+ nxu_touch_pages(outbuf, outlen, pagelen, 1);
+
+ /* Compress piecemeal in smallish chunks */
+ chunk = 1<<22;
+
+ /* Write the gzip header to the stream */
+ num_hdr_bytes = gzip_header_blank(outbuf);
+ dstbuf = outbuf + num_hdr_bytes;
+ outlen = outlen - num_hdr_bytes;
+ dsttotlen = num_hdr_bytes;
+
+ srcbuf = inbuf;
+ srctotlen = 0;
+
+ /* Init the CRB, the coprocessor request block */
+ memset(&cmdp->crb, 0, sizeof(cmdp->crb));
+
+ /* Initial gzip crc32 */
+ put32(cmdp->cpb, in_crc, 0);
+
+ while (inlen > 0) {
+
+ /* Submit chunk size source data per job */
+ srclen = NX_MIN(chunk, inlen);
+ /* Supply large target in case data expands */
+ dstlen = NX_MIN(2*srclen, outlen);
+
+ /* Page faults are handled by the user code */
+
+ /* Fault-in pages; an improved code wouldn't touch so
+ * many pages but would try to estimate the
+ * compression ratio and adjust both the src and dst
+ * touch amounts.
+ */
+ nxu_touch_pages(cmdp, sizeof(struct nx_gzip_crb_cpb_t), pagelen,
+ 1);
+ nxu_touch_pages(srcbuf, srclen, pagelen, 0);
+ nxu_touch_pages(dstbuf, dstlen, pagelen, 1);
+
+ cc = compress_fht_sample(
+ srcbuf, srclen,
+ dstbuf, dstlen,
+ lzcounts, cmdp, handle);
+
+ if (cc != ERR_NX_OK && cc != ERR_NX_TPBC_GT_SPBC &&
+ cc != ERR_NX_TRANSLATION) {
+ fprintf(stderr, "nx error: cc= %d\n", cc);
+ exit(-1);
+ }
+
+ /* Page faults are handled by the user code */
+ if (cc == ERR_NX_TRANSLATION) {
+ NXPRT(fprintf(stderr, "page fault: cc= %d, ", cc));
+ NXPRT(fprintf(stderr, "try= %d, fsa= %08llx\n",
+ fault_tries,
+ (unsigned long long) cmdp->crb.csb.fsaddr));
+ fault_tries--;
+ if (fault_tries > 0) {
+ continue;
+ } else {
+ fprintf(stderr, "error: cannot progress; ");
+ fprintf(stderr, "too many faults\n");
+ exit(-1);
+ };
+ }
+
+ fault_tries = NX_MAX_FAULTS; /* Reset for the next chunk */
+
+ inlen = inlen - srclen;
+ srcbuf = srcbuf + srclen;
+ srctotlen = srctotlen + srclen;
+
+ /* Two possible locations for spbc depending on the function
+ * code.
+ */
+ spbc = (!lzcounts) ? get32(cmdp->cpb, out_spbc_comp) :
+ get32(cmdp->cpb, out_spbc_comp_with_count);
+ assert(spbc == srclen);
+
+ /* Target byte count */
+ tpbc = get32(cmdp->crb.csb, tpbc);
+ /* Target ending bit count */
+ tebc = getnn(cmdp->cpb, out_tebc);
+ NXPRT(fprintf(stderr, "compressed chunk %d ", spbc));
+ NXPRT(fprintf(stderr, "to %d bytes, tebc= %d\n", tpbc, tebc));
+
+ if (inlen > 0) { /* More chunks to go */
+ set_bfinal(dstbuf, 0);
+ dstbuf = dstbuf + tpbc;
+ dsttotlen = dsttotlen + tpbc;
+ outlen = outlen - tpbc;
+ /* Round up to the next byte with a flush
+ * block; do not set the BFINAqL bit.
+ */
+ flushlen = append_sync_flush(dstbuf, tebc, 0);
+ dsttotlen = dsttotlen + flushlen;
+ outlen = outlen - flushlen;
+ dstbuf = dstbuf + flushlen;
+ NXPRT(fprintf(stderr, "added sync_flush %d bytes\n",
+ flushlen));
+ } else { /* Done */
+ /* Set the BFINAL bit of the last block per Deflate
+ * specification.
+ */
+ set_bfinal(dstbuf, 1);
+ dstbuf = dstbuf + tpbc;
+ dsttotlen = dsttotlen + tpbc;
+ outlen = outlen - tpbc;
+ }
+
+ /* Resuming crc32 for the next chunk */
+ crc = get32(cmdp->cpb, out_crc);
+ put32(cmdp->cpb, in_crc, crc);
+ crc = be32toh(crc);
+ }
+
+ /* Append crc32 and ISIZE to the end */
+ memcpy(dstbuf, &crc, 4);
+ memcpy(dstbuf+4, &srctotlen, 4);
+ dsttotlen = dsttotlen + 8;
+ outlen = outlen - 8;
+
+ assert(FNAME_MAX > (strlen(argv[1]) + strlen(FEXT)));
+ strcpy(outname, argv[1]);
+ strcat(outname, FEXT);
+ if (write_output_file(outname, outbuf, dsttotlen)) {
+ fprintf(stderr, "write error: %s\n", outname);
+ exit(-1);
+ }
+
+ fprintf(stderr, "compressed %ld to %ld bytes total, ", srctotlen,
+ dsttotlen);
+ fprintf(stderr, "crc32 checksum = %08x\n", crc);
+
+ if (inbuf != NULL)
+ free(inbuf);
+
+ if (outbuf != NULL)
+ free(outbuf);
+
+ return 0;
+}
+
+int main(int argc, char **argv)
+{
+ int rc;
+ struct sigaction act;
+ void *handle;
+
+ nx_dbg = 0;
+ nx_gzip_log = NULL;
+ act.sa_handler = 0;
+ act.sa_sigaction = nxu_sigsegv_handler;
+ act.sa_flags = SA_SIGINFO;
+ act.sa_restorer = 0;
+ sigemptyset(&act.sa_mask);
+ sigaction(SIGSEGV, &act, NULL);
+
+ handle = nx_function_begin(NX_FUNC_COMP_GZIP, 0);
+ if (!handle) {
+ fprintf(stderr, "Unable to init NX, errno %d\n", errno);
+ exit(-1);
+ }
+
+ rc = compress_file(argc, argv, handle);
+
+ nx_function_end(handle);
+
+ return rc;
+}
diff --git a/tools/testing/selftests/powerpc/nx-gzip/gzip_vas.c b/tools/testing/selftests/powerpc/nx-gzip/gzip_vas.c
new file mode 100644
index 000000000000..c055885da40a
--- /dev/null
+++ b/tools/testing/selftests/powerpc/nx-gzip/gzip_vas.c
@@ -0,0 +1,316 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+/*
+ * Copyright 2020 IBM Corp.
+ *
+ * Author: Bulent Abali <abali@us.ibm.com>
+ *
+ */
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <stdint.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+#include <sys/fcntl.h>
+#include <sys/mman.h>
+#include <endian.h>
+#include <bits/endian.h>
+#include <sys/ioctl.h>
+#include <assert.h>
+#include <errno.h>
+#include <signal.h>
+#include "vas-api.h"
+#include "nx.h"
+#include "copy-paste.h"
+#include "nxu.h"
+#include "nx_dbg.h"
+#include <sys/platform/ppc.h>
+
+#define barrier()
+#define hwsync() ({ asm volatile("sync" ::: "memory"); })
+
+#ifndef NX_NO_CPU_PRI
+#define cpu_pri_default() ({ asm volatile ("or 2, 2, 2"); })
+#define cpu_pri_low() ({ asm volatile ("or 31, 31, 31"); })
+#else
+#define cpu_pri_default()
+#define cpu_pri_low()
+#endif
+
+void *nx_fault_storage_address;
+
+struct nx_handle {
+ int fd;
+ int function;
+ void *paste_addr;
+};
+
+static int open_device_nodes(char *devname, int pri, struct nx_handle *handle)
+{
+ int rc, fd;
+ void *addr;
+ struct vas_tx_win_open_attr txattr;
+
+ fd = open(devname, O_RDWR);
+ if (fd < 0) {
+ fprintf(stderr, " open device name %s\n", devname);
+ return -errno;
+ }
+
+ memset(&txattr, 0, sizeof(txattr));
+ txattr.version = 1;
+ txattr.vas_id = pri;
+ rc = ioctl(fd, VAS_TX_WIN_OPEN, (unsigned long)&txattr);
+ if (rc < 0) {
+ fprintf(stderr, "ioctl() n %d, error %d\n", rc, errno);
+ rc = -errno;
+ goto out;
+ }
+
+ addr = mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0ULL);
+ if (addr == MAP_FAILED) {
+ fprintf(stderr, "mmap() failed, errno %d\n", errno);
+ rc = -errno;
+ goto out;
+ }
+ handle->fd = fd;
+ handle->paste_addr = (void *)((char *)addr + 0x400);
+
+ rc = 0;
+out:
+ close(fd);
+ return rc;
+}
+
+void *nx_function_begin(int function, int pri)
+{
+ int rc;
+ char *devname = "/dev/crypto/nx-gzip";
+ struct nx_handle *nxhandle;
+
+ if (function != NX_FUNC_COMP_GZIP) {
+ errno = EINVAL;
+ fprintf(stderr, " NX_FUNC_COMP_GZIP not found\n");
+ return NULL;
+ }
+
+
+ nxhandle = malloc(sizeof(*nxhandle));
+ if (!nxhandle) {
+ errno = ENOMEM;
+ fprintf(stderr, " No memory\n");
+ return NULL;
+ }
+
+ nxhandle->function = function;
+ rc = open_device_nodes(devname, pri, nxhandle);
+ if (rc < 0) {
+ errno = -rc;
+ fprintf(stderr, " open_device_nodes failed\n");
+ return NULL;
+ }
+
+ return nxhandle;
+}
+
+int nx_function_end(void *handle)
+{
+ int rc = 0;
+ struct nx_handle *nxhandle = handle;
+
+ rc = munmap(nxhandle->paste_addr - 0x400, 4096);
+ if (rc < 0) {
+ fprintf(stderr, "munmap() failed, errno %d\n", errno);
+ return rc;
+ }
+ close(nxhandle->fd);
+ free(nxhandle);
+
+ return rc;
+}
+
+static int nx_wait_for_csb(struct nx_gzip_crb_cpb_t *cmdp)
+{
+ long poll = 0;
+ uint64_t t;
+
+ /* Save power and let other threads use the h/w. top may show
+ * 100% but only because OS doesn't know we slowed the this
+ * h/w thread while polling. We're letting other threads have
+ * higher throughput on the core.
+ */
+ cpu_pri_low();
+
+#define CSB_MAX_POLL 200000000UL
+#define USLEEP_TH 300000UL
+
+ t = __ppc_get_timebase();
+
+ while (getnn(cmdp->crb.csb, csb_v) == 0) {
+ ++poll;
+ hwsync();
+
+ cpu_pri_low();
+
+ /* usleep(0) takes around 29000 ticks ~60 us.
+ * 300000 is spinning for about 600 us then
+ * start sleeping.
+ */
+ if ((__ppc_get_timebase() - t) > USLEEP_TH) {
+ cpu_pri_default();
+ usleep(1);
+ }
+
+ if (poll > CSB_MAX_POLL)
+ break;
+
+ /* Fault address from signal handler */
+ if (nx_fault_storage_address) {
+ cpu_pri_default();
+ return -EAGAIN;
+ }
+
+ }
+
+ cpu_pri_default();
+
+ /* hw has updated csb and output buffer */
+ hwsync();
+
+ /* Check CSB flags. */
+ if (getnn(cmdp->crb.csb, csb_v) == 0) {
+ fprintf(stderr, "CSB still not valid after %d polls.\n",
+ (int) poll);
+ prt_err("CSB still not valid after %d polls, giving up.\n",
+ (int) poll);
+ return -ETIMEDOUT;
+ }
+
+ return 0;
+}
+
+static int nxu_run_job(struct nx_gzip_crb_cpb_t *cmdp, void *handle)
+{
+ int i, ret, retries;
+ struct nx_handle *nxhandle = handle;
+
+ assert(handle != NULL);
+ i = 0;
+ retries = 5000;
+ while (i++ < retries) {
+ hwsync();
+ vas_copy(&cmdp->crb, 0);
+ ret = vas_paste(nxhandle->paste_addr, 0);
+ hwsync();
+
+ NXPRT(fprintf(stderr, "Paste attempt %d/%d returns 0x%x\n",
+ i, retries, ret));
+
+ if ((ret == 2) || (ret == 3)) {
+
+ ret = nx_wait_for_csb(cmdp);
+ if (!ret) {
+ goto out;
+ } else if (ret == -EAGAIN) {
+ long x;
+
+ prt_err("Touching address %p, 0x%lx\n",
+ nx_fault_storage_address,
+ *(long *) nx_fault_storage_address);
+ x = *(long *) nx_fault_storage_address;
+ *(long *) nx_fault_storage_address = x;
+ nx_fault_storage_address = 0;
+ continue;
+ } else {
+ prt_err("wait_for_csb() returns %d\n", ret);
+ break;
+ }
+ } else {
+ if (i < 10) {
+ /* spin for few ticks */
+#define SPIN_TH 500UL
+ uint64_t fail_spin;
+
+ fail_spin = __ppc_get_timebase();
+ while ((__ppc_get_timebase() - fail_spin) <
+ SPIN_TH)
+ ;
+ } else {
+ /* sleep */
+ unsigned int pr = 0;
+
+ if (pr++ % 100 == 0) {
+ prt_err("Paste attempt %d/", i);
+ prt_err("%d, failed pid= %d\n", retries,
+ getpid());
+ }
+ usleep(1);
+ }
+ continue;
+ }
+ }
+
+out:
+ cpu_pri_default();
+
+ return ret;
+}
+
+int nxu_submit_job(struct nx_gzip_crb_cpb_t *cmdp, void *handle)
+{
+ int cc;
+
+ cc = nxu_run_job(cmdp, handle);
+
+ if (!cc)
+ cc = getnn(cmdp->crb.csb, csb_cc); /* CC Table 6-8 */
+
+ return cc;
+}
+
+
+void nxu_sigsegv_handler(int sig, siginfo_t *info, void *ctx)
+{
+ fprintf(stderr, "%d: Got signal %d si_code %d, si_addr %p\n", getpid(),
+ sig, info->si_code, info->si_addr);
+
+ nx_fault_storage_address = info->si_addr;
+}
+
+/*
+ * Fault in pages prior to NX job submission. wr=1 may be required to
+ * touch writeable pages. System zero pages do not fault-in the page as
+ * intended. Typically set wr=1 for NX target pages and set wr=0 for NX
+ * source pages.
+ */
+int nxu_touch_pages(void *buf, long buf_len, long page_len, int wr)
+{
+ char *begin = buf;
+ char *end = (char *) buf + buf_len - 1;
+ volatile char t;
+
+ assert(buf_len >= 0 && !!buf);
+
+ NXPRT(fprintf(stderr, "touch %p %p len 0x%lx wr=%d\n", buf,
+ (buf + buf_len), buf_len, wr));
+
+ if (buf_len <= 0 || buf == NULL)
+ return -1;
+
+ do {
+ t = *begin;
+ if (wr)
+ *begin = t;
+ begin = begin + page_len;
+ } while (begin < end);
+
+ /* When buf_sz is small or buf tail is in another page */
+ t = *end;
+ if (wr)
+ *end = t;
+
+ return 0;
+}
diff --git a/tools/testing/selftests/powerpc/nx-gzip/include/copy-paste.h b/tools/testing/selftests/powerpc/nx-gzip/include/copy-paste.h
new file mode 100644
index 000000000000..0db2d6485037
--- /dev/null
+++ b/tools/testing/selftests/powerpc/nx-gzip/include/copy-paste.h
@@ -0,0 +1,56 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+
+/* From asm-compat.h */
+#define __stringify_in_c(...) #__VA_ARGS__
+#define stringify_in_c(...) __stringify_in_c(__VA_ARGS__) " "
+
+/*
+ * Macros taken from arch/powerpc/include/asm/ppc-opcode.h and other
+ * header files.
+ */
+#define ___PPC_RA(a) (((a) & 0x1f) << 16)
+#define ___PPC_RB(b) (((b) & 0x1f) << 11)
+
+#define PPC_INST_COPY 0x7c20060c
+#define PPC_INST_PASTE 0x7c20070d
+
+#define PPC_COPY(a, b) stringify_in_c(.long PPC_INST_COPY | \
+ ___PPC_RA(a) | ___PPC_RB(b))
+#define PPC_PASTE(a, b) stringify_in_c(.long PPC_INST_PASTE | \
+ ___PPC_RA(a) | ___PPC_RB(b))
+#define CR0_SHIFT 28
+#define CR0_MASK 0xF
+/*
+ * Copy/paste instructions:
+ *
+ * copy RA,RB
+ * Copy contents of address (RA) + effective_address(RB)
+ * to internal copy-buffer.
+ *
+ * paste RA,RB
+ * Paste contents of internal copy-buffer to the address
+ * (RA) + effective_address(RB)
+ */
+static inline int vas_copy(void *crb, int offset)
+{
+ asm volatile(PPC_COPY(%0, %1)";"
+ :
+ : "b" (offset), "b" (crb)
+ : "memory");
+
+ return 0;
+}
+
+static inline int vas_paste(void *paste_address, int offset)
+{
+ __u32 cr;
+
+ cr = 0;
+ asm volatile(PPC_PASTE(%1, %2)";"
+ "mfocrf %0, 0x80;"
+ : "=r" (cr)
+ : "b" (offset), "b" (paste_address)
+ : "memory", "cr0");
+
+ return (cr >> CR0_SHIFT) & CR0_MASK;
+}
diff --git a/tools/testing/selftests/powerpc/nx-gzip/include/crb.h b/tools/testing/selftests/powerpc/nx-gzip/include/crb.h
new file mode 100644
index 000000000000..ab101085fa7e
--- /dev/null
+++ b/tools/testing/selftests/powerpc/nx-gzip/include/crb.h
@@ -0,0 +1,155 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+#ifndef __CRB_H
+#define __CRB_H
+#include <linux/types.h>
+#include "nx.h"
+
+/* CCW 842 CI/FC masks
+ * NX P8 workbook, section 4.3.1, figure 4-6
+ * "CI/FC Boundary by NX CT type"
+ */
+#define CCW_CI_842 (0x00003ff8)
+#define CCW_FC_842 (0x00000007)
+
+/* Chapter 6.5.8 Coprocessor-Completion Block (CCB) */
+
+#define CCB_VALUE (0x3fffffffffffffff)
+#define CCB_ADDRESS (0xfffffffffffffff8)
+#define CCB_CM (0x0000000000000007)
+#define CCB_CM0 (0x0000000000000004)
+#define CCB_CM12 (0x0000000000000003)
+
+#define CCB_CM0_ALL_COMPLETIONS (0x0)
+#define CCB_CM0_LAST_IN_CHAIN (0x4)
+#define CCB_CM12_STORE (0x0)
+#define CCB_CM12_INTERRUPT (0x1)
+
+#define CCB_SIZE (0x10)
+#define CCB_ALIGN CCB_SIZE
+
+struct coprocessor_completion_block {
+ __be64 value;
+ __be64 address;
+} __aligned(CCB_ALIGN);
+
+
+/* Chapter 6.5.7 Coprocessor-Status Block (CSB) */
+
+#define CSB_V (0x80)
+#define CSB_F (0x04)
+#define CSB_CH (0x03)
+#define CSB_CE_INCOMPLETE (0x80)
+#define CSB_CE_TERMINATION (0x40)
+#define CSB_CE_TPBC (0x20)
+
+#define CSB_CC_SUCCESS (0)
+#define CSB_CC_INVALID_ALIGN (1)
+#define CSB_CC_OPERAND_OVERLAP (2)
+#define CSB_CC_DATA_LENGTH (3)
+#define CSB_CC_TRANSLATION (5)
+#define CSB_CC_PROTECTION (6)
+#define CSB_CC_RD_EXTERNAL (7)
+#define CSB_CC_INVALID_OPERAND (8)
+#define CSB_CC_PRIVILEGE (9)
+#define CSB_CC_INTERNAL (10)
+#define CSB_CC_WR_EXTERNAL (12)
+#define CSB_CC_NOSPC (13)
+#define CSB_CC_EXCESSIVE_DDE (14)
+#define CSB_CC_WR_TRANSLATION (15)
+#define CSB_CC_WR_PROTECTION (16)
+#define CSB_CC_UNKNOWN_CODE (17)
+#define CSB_CC_ABORT (18)
+#define CSB_CC_TRANSPORT (20)
+#define CSB_CC_SEGMENTED_DDL (31)
+#define CSB_CC_PROGRESS_POINT (32)
+#define CSB_CC_DDE_OVERFLOW (33)
+#define CSB_CC_SESSION (34)
+#define CSB_CC_PROVISION (36)
+#define CSB_CC_CHAIN (37)
+#define CSB_CC_SEQUENCE (38)
+#define CSB_CC_HW (39)
+
+#define CSB_SIZE (0x10)
+#define CSB_ALIGN CSB_SIZE
+
+struct coprocessor_status_block {
+ __u8 flags;
+ __u8 cs;
+ __u8 cc;
+ __u8 ce;
+ __be32 count;
+ __be64 address;
+} __aligned(CSB_ALIGN);
+
+
+/* Chapter 6.5.10 Data-Descriptor List (DDL)
+ * each list contains one or more Data-Descriptor Entries (DDE)
+ */
+
+#define DDE_P (0x8000)
+
+#define DDE_SIZE (0x10)
+#define DDE_ALIGN DDE_SIZE
+
+struct data_descriptor_entry {
+ __be16 flags;
+ __u8 count;
+ __u8 index;
+ __be32 length;
+ __be64 address;
+} __aligned(DDE_ALIGN);
+
+
+/* Chapter 6.5.2 Coprocessor-Request Block (CRB) */
+
+#define CRB_SIZE (0x80)
+#define CRB_ALIGN (0x100) /* Errata: requires 256 alignment */
+
+
+/* Coprocessor Status Block field
+ * ADDRESS address of CSB
+ * C CCB is valid
+ * AT 0 = addrs are virtual, 1 = addrs are phys
+ * M enable perf monitor
+ */
+#define CRB_CSB_ADDRESS (0xfffffffffffffff0)
+#define CRB_CSB_C (0x0000000000000008)
+#define CRB_CSB_AT (0x0000000000000002)
+#define CRB_CSB_M (0x0000000000000001)
+
+struct coprocessor_request_block {
+ __be32 ccw;
+ __be32 flags;
+ __be64 csb_addr;
+
+ struct data_descriptor_entry source;
+ struct data_descriptor_entry target;
+
+ struct coprocessor_completion_block ccb;
+
+ __u8 reserved[48];
+
+ struct coprocessor_status_block csb;
+} __aligned(CRB_ALIGN);
+
+#define crb_csb_addr(c) __be64_to_cpu(c->csb_addr)
+#define crb_nx_fault_addr(c) __be64_to_cpu(c->stamp.nx.fault_storage_addr)
+#define crb_nx_flags(c) c->stamp.nx.flags
+#define crb_nx_fault_status(c) c->stamp.nx.fault_status
+#define crb_nx_pswid(c) c->stamp.nx.pswid
+
+
+/* RFC02167 Initiate Coprocessor Instructions document
+ * Chapter 8.2.1.1.1 RS
+ * Chapter 8.2.3 Coprocessor Directive
+ * Chapter 8.2.4 Execution
+ *
+ * The CCW must be converted to BE before passing to icswx()
+ */
+
+#define CCW_PS (0xff000000)
+#define CCW_CT (0x00ff0000)
+#define CCW_CD (0x0000ffff)
+#define CCW_CL (0x0000c000)
+
+#endif
diff --git a/tools/testing/selftests/powerpc/nx-gzip/include/nx.h b/tools/testing/selftests/powerpc/nx-gzip/include/nx.h
new file mode 100644
index 000000000000..1abe23fc29e8
--- /dev/null
+++ b/tools/testing/selftests/powerpc/nx-gzip/include/nx.h
@@ -0,0 +1,38 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright 2020 IBM Corp.
+ *
+ */
+#ifndef _NX_H
+#define _NX_H
+
+#include <stdbool.h>
+
+#define NX_FUNC_COMP_842 1
+#define NX_FUNC_COMP_GZIP 2
+
+#ifndef __aligned
+#define __aligned(x) __attribute__((aligned(x)))
+#endif
+
+struct nx842_func_args {
+ bool use_crc;
+ bool decompress; /* true decompress; false compress */
+ bool move_data;
+ int timeout; /* seconds */
+};
+
+struct nxbuf_t {
+ int len;
+ char *buf;
+};
+
+/* @function should be EFT (aka 842), GZIP etc */
+void *nx_function_begin(int function, int pri);
+
+int nx_function(void *handle, struct nxbuf_t *in, struct nxbuf_t *out,
+ void *arg);
+
+int nx_function_end(void *handle);
+
+#endif /* _NX_H */
diff --git a/tools/testing/selftests/powerpc/nx-gzip/include/nx_dbg.h b/tools/testing/selftests/powerpc/nx-gzip/include/nx_dbg.h
new file mode 100644
index 000000000000..16464e19c47f
--- /dev/null
+++ b/tools/testing/selftests/powerpc/nx-gzip/include/nx_dbg.h
@@ -0,0 +1,95 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright 2020 IBM Corporation
+ *
+ */
+
+#ifndef _NXU_DBG_H_
+#define _NXU_DBG_H_
+
+#include <sys/file.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <time.h>
+#include <pthread.h>
+
+extern FILE * nx_gzip_log;
+extern int nx_gzip_trace;
+extern unsigned int nx_gzip_inflate_impl;
+extern unsigned int nx_gzip_deflate_impl;
+extern unsigned int nx_gzip_inflate_flags;
+extern unsigned int nx_gzip_deflate_flags;
+
+extern int nx_dbg;
+pthread_mutex_t mutex_log;
+
+#define nx_gzip_trace_enabled() (nx_gzip_trace & 0x1)
+#define nx_gzip_hw_trace_enabled() (nx_gzip_trace & 0x2)
+#define nx_gzip_sw_trace_enabled() (nx_gzip_trace & 0x4)
+#define nx_gzip_gather_statistics() (nx_gzip_trace & 0x8)
+#define nx_gzip_per_stream_stat() (nx_gzip_trace & 0x10)
+
+#define prt(fmt, ...) do { \
+ pthread_mutex_lock(&mutex_log); \
+ flock(nx_gzip_log->_fileno, LOCK_EX); \
+ time_t t; struct tm *m; time(&t); m = localtime(&t); \
+ fprintf(nx_gzip_log, "[%04d/%02d/%02d %02d:%02d:%02d] " \
+ "pid %d: " fmt, \
+ (int)m->tm_year + 1900, (int)m->tm_mon+1, (int)m->tm_mday, \
+ (int)m->tm_hour, (int)m->tm_min, (int)m->tm_sec, \
+ (int)getpid(), ## __VA_ARGS__); \
+ fflush(nx_gzip_log); \
+ flock(nx_gzip_log->_fileno, LOCK_UN); \
+ pthread_mutex_unlock(&mutex_log); \
+} while (0)
+
+/* Use in case of an error */
+#define prt_err(fmt, ...) do { if (nx_dbg >= 0) { \
+ prt("%s:%u: Error: "fmt, \
+ __FILE__, __LINE__, ## __VA_ARGS__); \
+}} while (0)
+
+/* Use in case of an warning */
+#define prt_warn(fmt, ...) do { if (nx_dbg >= 1) { \
+ prt("%s:%u: Warning: "fmt, \
+ __FILE__, __LINE__, ## __VA_ARGS__); \
+}} while (0)
+
+/* Informational printouts */
+#define prt_info(fmt, ...) do { if (nx_dbg >= 2) { \
+ prt("Info: "fmt, ## __VA_ARGS__); \
+}} while (0)
+
+/* Trace zlib wrapper code */
+#define prt_trace(fmt, ...) do { if (nx_gzip_trace_enabled()) { \
+ prt("### "fmt, ## __VA_ARGS__); \
+}} while (0)
+
+/* Trace statistics */
+#define prt_stat(fmt, ...) do { if (nx_gzip_gather_statistics()) { \
+ prt("### "fmt, ## __VA_ARGS__); \
+}} while (0)
+
+/* Trace zlib hardware implementation */
+#define hw_trace(fmt, ...) do { \
+ if (nx_gzip_hw_trace_enabled()) \
+ fprintf(nx_gzip_log, "hhh " fmt, ## __VA_ARGS__); \
+ } while (0)
+
+/* Trace zlib software implementation */
+#define sw_trace(fmt, ...) do { \
+ if (nx_gzip_sw_trace_enabled()) \
+ fprintf(nx_gzip_log, "sss " fmt, ## __VA_ARGS__); \
+ } while (0)
+
+
+/**
+ * str_to_num - Convert string into number and copy with endings like
+ * KiB for kilobyte
+ * MiB for megabyte
+ * GiB for gigabyte
+ */
+uint64_t str_to_num(char *str);
+void nx_lib_debug(int onoff);
+
+#endif /* _NXU_DBG_H_ */
diff --git a/tools/testing/selftests/powerpc/nx-gzip/include/nxu.h b/tools/testing/selftests/powerpc/nx-gzip/include/nxu.h
new file mode 100644
index 000000000000..20a4e883e0d3
--- /dev/null
+++ b/tools/testing/selftests/powerpc/nx-gzip/include/nxu.h
@@ -0,0 +1,650 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Hardware interface of the NX-GZIP compression accelerator
+ *
+ * Copyright (C) IBM Corporation, 2020
+ *
+ * Author: Bulent Abali <abali@us.ibm.com>
+ *
+ */
+
+#ifndef _NXU_H
+#define _NXU_H
+
+#include <stdint.h>
+#include <endian.h>
+#include "nx.h"
+
+/* deflate */
+#define LLSZ 286
+#define DSZ 30
+
+/* nx */
+#define DHTSZ 18
+#define DHT_MAXSZ 288
+#define MAX_DDE_COUNT 256
+
+/* util */
+#ifdef NXDBG
+#define NXPRT(X) X
+#else
+#define NXPRT(X)
+#endif
+
+#ifdef NXTIMER
+#include <sys/platform/ppc.h>
+#define NX_CLK(X) X
+#define nx_get_time() __ppc_get_timebase()
+#define nx_get_freq() __ppc_get_timebase_freq()
+#else
+#define NX_CLK(X)
+#define nx_get_time() (-1)
+#define nx_get_freq() (-1)
+#endif
+
+#define NX_MAX_FAULTS 500
+
+/*
+ * Definitions of acronyms used here. See
+ * P9 NX Gzip Accelerator User's Manual for details:
+ * https://github.com/libnxz/power-gzip/blob/develop/doc/power_nx_gzip_um.pdf
+ *
+ * adler/crc: 32 bit checksums appended to stream tail
+ * ce: completion extension
+ * cpb: coprocessor parameter block (metadata)
+ * crb: coprocessor request block (command)
+ * csb: coprocessor status block (status)
+ * dht: dynamic huffman table
+ * dde: data descriptor element (address, length)
+ * ddl: list of ddes
+ * dh/fh: dynamic and fixed huffman types
+ * fc: coprocessor function code
+ * histlen: history/dictionary length
+ * history: sliding window of up to 32KB of data
+ * lzcount: Deflate LZ symbol counts
+ * rembytecnt: remaining byte count
+ * sfbt: source final block type; last block's type during decomp
+ * spbc: source processed byte count
+ * subc: source unprocessed bit count
+ * tebc: target ending bit count; valid bits in the last byte
+ * tpbc: target processed byte count
+ * vas: virtual accelerator switch; the user mode interface
+ */
+
+union nx_qw_t {
+ uint32_t word[4];
+ uint64_t dword[2];
+} __aligned(16);
+
+/*
+ * Note: NX registers with fewer than 32 bits are declared by
+ * convention as uint32_t variables in unions. If *_offset and *_mask
+ * are defined for a variable, then use get_ put_ macros to
+ * conveniently access the register fields for endian conversions.
+ */
+
+struct nx_dde_t {
+ /* Data Descriptor Element, Section 6.4 */
+ union {
+ uint32_t dde_count;
+ /* When dde_count == 0 ddead is a pointer to a data buffer;
+ * ddebc is the buffer length bytes.
+ * When dde_count > 0 dde is an indirect dde; ddead is a
+ * pointer to a contiguous list of direct ddes; ddebc is the
+ * total length of all data pointed to by the list of direct
+ * ddes. Note that only one level of indirection is permitted.
+ * See Section 6.4 of the user manual for additional details.
+ */
+ };
+ uint32_t ddebc; /* dde byte count */
+ uint64_t ddead; /* dde address */
+} __aligned(16);
+
+struct nx_csb_t {
+ /* Coprocessor Status Block, Section 6.6 */
+ union {
+ uint32_t csb_v;
+ /* Valid bit. v must be set to 0 by the program
+ * before submitting the coprocessor command.
+ * Software can poll for the v bit
+ */
+
+ uint32_t csb_f;
+ /* 16B CSB size. Written to 0 by DMA when it writes the CPB */
+
+ uint32_t csb_cs;
+ /* cs completion sequence; unused */
+
+ uint32_t csb_cc;
+ /* cc completion code; cc != 0 exception occurred */
+
+ uint32_t csb_ce;
+ /* ce completion extension */
+
+ };
+ uint32_t tpbc;
+ /* target processed byte count TPBC */
+
+ uint64_t fsaddr;
+ /* Section 6.12.1 CSB NonZero error summary. FSA Failing storage
+ * address. Address where error occurred. When available, written
+ * to A field of CSB
+ */
+} __aligned(16);
+
+struct nx_ccb_t {
+ /* Coprocessor Completion Block, Section 6.7 */
+
+ uint32_t reserved[3];
+ union {
+ /* When crb.c==0 (no ccb defined) it is reserved;
+ * When crb.c==1 (ccb defined) it is cm
+ */
+
+ uint32_t ccb_cm;
+ /* Signal interrupt of crb.c==1 and cm==1 */
+
+ uint32_t word;
+ /* generic access to the 32bit word */
+ };
+} __aligned(16);
+
+struct vas_stamped_crb_t {
+ /*
+ * CRB operand of the paste coprocessor instruction is stamped
+ * in quadword 4 with the information shown here as its written
+ * in to the receive FIFO of the coprocessor
+ */
+
+ union {
+ uint32_t vas_buf_num;
+ /* Verification only vas buffer number which correlates to
+ * the low order bits of the atag in the paste command
+ */
+
+ uint32_t send_wc_id;
+ /* Pointer to Send Window Context that provides for NX address
+ * translation information, such as MSR and LPCR bits, job
+ * completion interrupt RA, PSWID, and job utilization counter.
+ */
+
+ };
+ union {
+ uint32_t recv_wc_id;
+ /* Pointer to Receive Window Context. NX uses this to return
+ * credits to a Receive FIFO as entries are dequeued.
+ */
+
+ };
+ uint32_t reserved2;
+ union {
+ uint32_t vas_invalid;
+ /* Invalid bit. If this bit is 1 the CRB is discarded by
+ * NX upon fetching from the receive FIFO. If this bit is 0
+ * the CRB is processed normally. The bit is stamped to 0
+ * by VAS and may be written to 1 by hypervisor while
+ * the CRB is in the receive FIFO (in memory).
+ */
+
+ };
+};
+
+struct nx_stamped_fault_crb_t {
+ /*
+ * A CRB that has a translation fault is stamped by NX in quadword 4
+ * and pasted to the Fault Send Window in VAS.
+ */
+ uint64_t fsa;
+ union {
+ uint32_t nxsf_t;
+ uint32_t nxsf_fs;
+ };
+ uint32_t pswid;
+};
+
+union stamped_crb_t {
+ struct vas_stamped_crb_t vas;
+ struct nx_stamped_fault_crb_t nx;
+};
+
+struct nx_gzip_cpb_t {
+ /*
+ * Coprocessor Parameter Block In/Out are used to pass metadata
+ * to/from accelerator. Tables 6.5 and 6.6 of the user manual.
+ */
+
+ /* CPBInput */
+
+ struct {
+ union {
+ union nx_qw_t qw0;
+ struct {
+ uint32_t in_adler; /* bits 0:31 */
+ uint32_t in_crc; /* bits 32:63 */
+ union {
+ uint32_t in_histlen; /* bits 64:75 */
+ uint32_t in_subc; /* bits 93:95 */
+ };
+ union {
+ /* bits 108:111 */
+ uint32_t in_sfbt;
+ /* bits 112:127 */
+ uint32_t in_rembytecnt;
+ /* bits 116:127 */
+ uint32_t in_dhtlen;
+ };
+ };
+ };
+ union {
+ union nx_qw_t in_dht[DHTSZ]; /* qw[1:18] */
+ char in_dht_char[DHT_MAXSZ]; /* byte access */
+ };
+ union nx_qw_t reserved[5]; /* qw[19:23] */
+ };
+
+ /* CPBOutput */
+
+ volatile struct {
+ union {
+ union nx_qw_t qw24;
+ struct {
+ uint32_t out_adler; /* bits 0:31 qw[24] */
+ uint32_t out_crc; /* bits 32:63 qw[24] */
+ union {
+ /* bits 77:79 qw[24] */
+ uint32_t out_tebc;
+ /* bits 80:95 qw[24] */
+ uint32_t out_subc;
+ };
+ union {
+ /* bits 108:111 qw[24] */
+ uint32_t out_sfbt;
+ /* bits 112:127 qw[24] */
+ uint32_t out_rembytecnt;
+ /* bits 116:127 qw[24] */
+ uint32_t out_dhtlen;
+ };
+ };
+ };
+ union {
+ union nx_qw_t qw25[79]; /* qw[25:103] */
+ /* qw[25] compress no lzcounts or wrap */
+ uint32_t out_spbc_comp_wrap;
+ uint32_t out_spbc_wrap; /* qw[25] wrap */
+ /* qw[25] compress no lzcounts */
+ uint32_t out_spbc_comp;
+ /* 286 LL and 30 D symbol counts */
+ uint32_t out_lzcount[LLSZ+DSZ];
+ struct {
+ union nx_qw_t out_dht[DHTSZ]; /* qw[25:42] */
+ /* qw[43] decompress */
+ uint32_t out_spbc_decomp;
+ };
+ };
+ /* qw[104] compress with lzcounts */
+ uint32_t out_spbc_comp_with_count;
+ };
+} __aligned(128);
+
+struct nx_gzip_crb_t {
+ union { /* byte[0:3] */
+ uint32_t gzip_fc; /* bits[24-31] */
+ };
+ uint32_t reserved1; /* byte[4:7] */
+ union {
+ uint64_t csb_address; /* byte[8:15] */
+ struct {
+ uint32_t reserved2;
+ union {
+ uint32_t crb_c;
+ /* c==0 no ccb defined */
+
+ uint32_t crb_at;
+ /* at==0 address type is ignored;
+ * all addrs effective assumed.
+ */
+
+ };
+ };
+ };
+ struct nx_dde_t source_dde; /* byte[16:31] */
+ struct nx_dde_t target_dde; /* byte[32:47] */
+ volatile struct nx_ccb_t ccb; /* byte[48:63] */
+ volatile union {
+ /* byte[64:239] shift csb by 128 bytes out of the crb; csb was
+ * in crb earlier; JReilly says csb written with partial inject
+ */
+ union nx_qw_t reserved64[11];
+ union stamped_crb_t stamp; /* byte[64:79] */
+ };
+ volatile struct nx_csb_t csb;
+} __aligned(128);
+
+struct nx_gzip_crb_cpb_t {
+ struct nx_gzip_crb_t crb;
+ struct nx_gzip_cpb_t cpb;
+} __aligned(2048);
+
+
+/*
+ * NX hardware convention has the msb bit on the left numbered 0.
+ * The defines below has *_offset defined as the right most bit
+ * position of a field. x of size_mask(x) is the field width in bits.
+ */
+
+#define size_mask(x) ((1U<<(x))-1)
+
+/*
+ * Offsets and Widths within the containing 32 bits of the various NX
+ * gzip hardware registers. Use the getnn/putnn macros to access
+ * these regs
+ */
+
+#define dde_count_mask size_mask(8)
+#define dde_count_offset 23
+
+/* CSB */
+
+#define csb_v_mask size_mask(1)
+#define csb_v_offset 0
+#define csb_f_mask size_mask(1)
+#define csb_f_offset 6
+#define csb_cs_mask size_mask(8)
+#define csb_cs_offset 15
+#define csb_cc_mask size_mask(8)
+#define csb_cc_offset 23
+#define csb_ce_mask size_mask(8)
+#define csb_ce_offset 31
+
+/* CCB */
+
+#define ccb_cm_mask size_mask(3)
+#define ccb_cm_offset 31
+
+/* VAS stamped CRB fields */
+
+#define vas_buf_num_mask size_mask(6)
+#define vas_buf_num_offset 5
+#define send_wc_id_mask size_mask(16)
+#define send_wc_id_offset 31
+#define recv_wc_id_mask size_mask(16)
+#define recv_wc_id_offset 31
+#define vas_invalid_mask size_mask(1)
+#define vas_invalid_offset 31
+
+/* NX stamped fault CRB fields */
+
+#define nxsf_t_mask size_mask(1)
+#define nxsf_t_offset 23
+#define nxsf_fs_mask size_mask(8)
+#define nxsf_fs_offset 31
+
+/* CPB input */
+
+#define in_histlen_mask size_mask(12)
+#define in_histlen_offset 11
+#define in_dhtlen_mask size_mask(12)
+#define in_dhtlen_offset 31
+#define in_subc_mask size_mask(3)
+#define in_subc_offset 31
+#define in_sfbt_mask size_mask(4)
+#define in_sfbt_offset 15
+#define in_rembytecnt_mask size_mask(16)
+#define in_rembytecnt_offset 31
+
+/* CPB output */
+
+#define out_tebc_mask size_mask(3)
+#define out_tebc_offset 15
+#define out_subc_mask size_mask(16)
+#define out_subc_offset 31
+#define out_sfbt_mask size_mask(4)
+#define out_sfbt_offset 15
+#define out_rembytecnt_mask size_mask(16)
+#define out_rembytecnt_offset 31
+#define out_dhtlen_mask size_mask(12)
+#define out_dhtlen_offset 31
+
+/* CRB */
+
+#define gzip_fc_mask size_mask(8)
+#define gzip_fc_offset 31
+#define crb_c_mask size_mask(1)
+#define crb_c_offset 28
+#define crb_at_mask size_mask(1)
+#define crb_at_offset 30
+#define csb_address_mask ~(15UL) /* mask off bottom 4b */
+
+/*
+ * Access macros for the registers. Do not access registers directly
+ * because of the endian conversion. P9 processor may run either as
+ * Little or Big endian. However the NX coprocessor regs are always
+ * big endian.
+ * Use the 32 and 64b macros to access respective
+ * register sizes.
+ * Use nn forms for the register fields shorter than 32 bits.
+ */
+
+#define getnn(ST, REG) ((be32toh(ST.REG) >> (31-REG##_offset)) \
+ & REG##_mask)
+#define getpnn(ST, REG) ((be32toh((ST)->REG) >> (31-REG##_offset)) \
+ & REG##_mask)
+#define get32(ST, REG) (be32toh(ST.REG))
+#define getp32(ST, REG) (be32toh((ST)->REG))
+#define get64(ST, REG) (be64toh(ST.REG))
+#define getp64(ST, REG) (be64toh((ST)->REG))
+
+#define unget32(ST, REG) (get32(ST, REG) & ~((REG##_mask) \
+ << (31-REG##_offset)))
+/* get 32bits less the REG field */
+
+#define ungetp32(ST, REG) (getp32(ST, REG) & ~((REG##_mask) \
+ << (31-REG##_offset)))
+/* get 32bits less the REG field */
+
+#define clear_regs(ST) memset((void *)(&(ST)), 0, sizeof(ST))
+#define clear_dde(ST) do { ST.dde_count = ST.ddebc = 0; ST.ddead = 0; \
+ } while (0)
+#define clearp_dde(ST) do { (ST)->dde_count = (ST)->ddebc = 0; \
+ (ST)->ddead = 0; \
+ } while (0)
+#define clear_struct(ST) memset((void *)(&(ST)), 0, sizeof(ST))
+#define putnn(ST, REG, X) (ST.REG = htobe32(unget32(ST, REG) | (((X) \
+ & REG##_mask) << (31-REG##_offset))))
+#define putpnn(ST, REG, X) ((ST)->REG = htobe32(ungetp32(ST, REG) \
+ | (((X) & REG##_mask) << (31-REG##_offset))))
+
+#define put32(ST, REG, X) (ST.REG = htobe32(X))
+#define putp32(ST, REG, X) ((ST)->REG = htobe32(X))
+#define put64(ST, REG, X) (ST.REG = htobe64(X))
+#define putp64(ST, REG, X) ((ST)->REG = htobe64(X))
+
+/*
+ * Completion extension ce(0) ce(1) ce(2). Bits ce(3-7)
+ * unused. Section 6.6 Figure 6.7.
+ */
+
+#define get_csb_ce(ST) ((uint32_t)getnn(ST, csb_ce))
+#define get_csb_ce_ms3b(ST) (get_csb_ce(ST) >> 5)
+#define put_csb_ce_ms3b(ST, X) putnn(ST, csb_ce, ((uint32_t)(X) << 5))
+
+#define CSB_CE_PARTIAL 0x4
+#define CSB_CE_TERMINATE 0x2
+#define CSB_CE_TPBC_VALID 0x1
+
+#define csb_ce_termination(X) (!!((X) & CSB_CE_TERMINATE))
+/* termination, output buffers may be modified, SPBC/TPBC invalid Fig.6-7 */
+
+#define csb_ce_check_completion(X) (!csb_ce_termination(X))
+/* if not terminated then check full or partial completion */
+
+#define csb_ce_partial_completion(X) (!!((X) & CSB_CE_PARTIAL))
+#define csb_ce_full_completion(X) (!csb_ce_partial_completion(X))
+#define csb_ce_tpbc_valid(X) (!!((X) & CSB_CE_TPBC_VALID))
+/* TPBC indicates successfully stored data count */
+
+#define csb_ce_default_err(X) csb_ce_termination(X)
+/* most error CEs have CE(0)=0 and CE(1)=1 */
+
+#define csb_ce_cc3_partial(X) csb_ce_partial_completion(X)
+/* some CC=3 are partially completed, Table 6-8 */
+
+#define csb_ce_cc64(X) ((X)&(CSB_CE_PARTIAL \
+ | CSB_CE_TERMINATE) == 0)
+/* Compression: when TPBC>SPBC then CC=64 Table 6-8; target didn't
+ * compress smaller than source.
+ */
+
+/* Decompress SFBT combinations Tables 5-3, 6-4, 6-6 */
+
+#define SFBT_BFINAL 0x1
+#define SFBT_LIT 0x4
+#define SFBT_FHT 0x5
+#define SFBT_DHT 0x6
+#define SFBT_HDR 0x7
+
+/*
+ * NX gzip function codes. Table 6.2.
+ * Bits 0:4 are the FC. Bit 5 is used by the DMA controller to
+ * select one of the two Byte Count Limits.
+ */
+
+#define GZIP_FC_LIMIT_MASK 0x01
+#define GZIP_FC_COMPRESS_FHT 0x00
+#define GZIP_FC_COMPRESS_DHT 0x02
+#define GZIP_FC_COMPRESS_FHT_COUNT 0x04
+#define GZIP_FC_COMPRESS_DHT_COUNT 0x06
+#define GZIP_FC_COMPRESS_RESUME_FHT 0x08
+#define GZIP_FC_COMPRESS_RESUME_DHT 0x0a
+#define GZIP_FC_COMPRESS_RESUME_FHT_COUNT 0x0c
+#define GZIP_FC_COMPRESS_RESUME_DHT_COUNT 0x0e
+#define GZIP_FC_DECOMPRESS 0x10
+#define GZIP_FC_DECOMPRESS_SINGLE_BLK_N_SUSPEND 0x12
+#define GZIP_FC_DECOMPRESS_RESUME 0x14
+#define GZIP_FC_DECOMPRESS_RESUME_SINGLE_BLK_N_SUSPEND 0x16
+#define GZIP_FC_WRAP 0x1e
+
+#define fc_is_compress(fc) (((fc) & 0x10) == 0)
+#define fc_has_count(fc) (fc_is_compress(fc) && (((fc) & 0x4) != 0))
+
+/* CSB.CC Error codes */
+
+#define ERR_NX_OK 0
+#define ERR_NX_ALIGNMENT 1
+#define ERR_NX_OPOVERLAP 2
+#define ERR_NX_DATA_LENGTH 3
+#define ERR_NX_TRANSLATION 5
+#define ERR_NX_PROTECTION 6
+#define ERR_NX_EXTERNAL_UE7 7
+#define ERR_NX_INVALID_OP 8
+#define ERR_NX_PRIVILEGE 9
+#define ERR_NX_INTERNAL_UE 10
+#define ERR_NX_EXTERN_UE_WR 12
+#define ERR_NX_TARGET_SPACE 13
+#define ERR_NX_EXCESSIVE_DDE 14
+#define ERR_NX_TRANSL_WR 15
+#define ERR_NX_PROTECT_WR 16
+#define ERR_NX_SUBFUNCTION 17
+#define ERR_NX_FUNC_ABORT 18
+#define ERR_NX_BYTE_MAX 19
+#define ERR_NX_CORRUPT_CRB 20
+#define ERR_NX_INVALID_CRB 21
+#define ERR_NX_INVALID_DDE 30
+#define ERR_NX_SEGMENTED_DDL 31
+#define ERR_NX_DDE_OVERFLOW 33
+#define ERR_NX_TPBC_GT_SPBC 64
+#define ERR_NX_MISSING_CODE 66
+#define ERR_NX_INVALID_DIST 67
+#define ERR_NX_INVALID_DHT 68
+#define ERR_NX_EXTERNAL_UE90 90
+#define ERR_NX_WDOG_TIMER 224
+#define ERR_NX_AT_FAULT 250
+#define ERR_NX_INTR_SERVER 252
+#define ERR_NX_UE253 253
+#define ERR_NX_NO_HW 254
+#define ERR_NX_HUNG_OP 255
+#define ERR_NX_END 256
+
+/* initial values for non-resume operations */
+#define INIT_CRC 0 /* crc32(0L, Z_NULL, 0) */
+#define INIT_ADLER 1 /* adler32(0L, Z_NULL, 0) adler is initialized to 1 */
+
+/* prototypes */
+int nxu_submit_job(struct nx_gzip_crb_cpb_t *c, void *handle);
+
+extern void nxu_sigsegv_handler(int sig, siginfo_t *info, void *ctx);
+extern int nxu_touch_pages(void *buf, long buf_len, long page_len, int wr);
+
+/* caller supplies a print buffer 4*sizeof(crb) */
+
+char *nx_crb_str(struct nx_gzip_crb_t *crb, char *prbuf);
+char *nx_cpb_str(struct nx_gzip_cpb_t *cpb, char *prbuf);
+char *nx_prt_hex(void *cp, int sz, char *prbuf);
+char *nx_lzcount_str(struct nx_gzip_cpb_t *cpb, char *prbuf);
+char *nx_strerror(int e);
+
+#ifdef NX_SIM
+#include <stdio.h>
+int nx_sim_init(void *ctx);
+int nx_sim_end(void *ctx);
+int nxu_run_sim_job(struct nx_gzip_crb_cpb_t *c, void *ctx);
+#endif /* NX_SIM */
+
+/* Deflate stream manipulation */
+
+#define set_final_bit(x) (x |= (unsigned char)1)
+#define clr_final_bit(x) (x &= ~(unsigned char)1)
+
+#define append_empty_fh_blk(p, b) do { *(p) = (2 | (1&(b))); *((p)+1) = 0; \
+ } while (0)
+/* append 10 bits 0000001b 00...... ;
+ * assumes appending starts on a byte boundary; b is the final bit.
+ */
+
+
+#ifdef NX_842
+
+/* 842 Engine */
+
+struct nx_eft_crb_t {
+ union { /* byte[0:3] */
+ uint32_t eft_fc; /* bits[29-31] */
+ };
+ uint32_t reserved1; /* byte[4:7] */
+ union {
+ uint64_t csb_address; /* byte[8:15] */
+ struct {
+ uint32_t reserved2;
+ union {
+ uint32_t crb_c;
+ /* c==0 no ccb defined */
+
+ uint32_t crb_at;
+ /* at==0 address type is ignored;
+ * all addrs effective assumed.
+ */
+
+ };
+ };
+ };
+ struct nx_dde_t source_dde; /* byte[16:31] */
+ struct nx_dde_t target_dde; /* byte[32:47] */
+ struct nx_ccb_t ccb; /* byte[48:63] */
+ union {
+ union nx_qw_t reserved64[3]; /* byte[64:96] */
+ };
+ struct nx_csb_t csb;
+} __aligned(128);
+
+/* 842 CRB */
+
+#define EFT_FC_MASK size_mask(3)
+#define EFT_FC_OFFSET 31
+#define EFT_FC_COMPRESS 0x0
+#define EFT_FC_COMPRESS_WITH_CRC 0x1
+#define EFT_FC_DECOMPRESS 0x2
+#define EFT_FC_DECOMPRESS_WITH_CRC 0x3
+#define EFT_FC_BLK_DATA_MOVE 0x4
+#endif /* NX_842 */
+
+#endif /* _NXU_H */
diff --git a/tools/testing/selftests/powerpc/nx-gzip/include/vas-api.h b/tools/testing/selftests/powerpc/nx-gzip/include/vas-api.h
new file mode 120000
index 000000000000..77fb4c7236d0
--- /dev/null
+++ b/tools/testing/selftests/powerpc/nx-gzip/include/vas-api.h
@@ -0,0 +1 @@
+../../../../../../arch/powerpc/include/uapi/asm/vas-api.h \ No newline at end of file
diff --git a/tools/testing/selftests/powerpc/nx-gzip/nx-gzip-test.sh b/tools/testing/selftests/powerpc/nx-gzip/nx-gzip-test.sh
new file mode 100755
index 000000000000..c7b46c5fd7b3
--- /dev/null
+++ b/tools/testing/selftests/powerpc/nx-gzip/nx-gzip-test.sh
@@ -0,0 +1,46 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0-or-later
+
+if [[ ! -w /dev/crypto/nx-gzip ]]; then
+ echo "Can't access /dev/crypto/nx-gzip, skipping"
+ echo "skip: $0"
+ exit 4
+fi
+
+set -e
+
+function cleanup
+{
+ rm -f nx-tempfile*
+}
+
+trap cleanup EXIT
+
+function test_sizes
+{
+ local n=$1
+ local fname="nx-tempfile.$n"
+
+ for size in 4K 64K 1M 64M
+ do
+ echo "Testing $size ($n) ..."
+ dd if=/dev/urandom of=$fname bs=$size count=1
+ ./gzfht_test $fname
+ ./gunz_test ${fname}.nx.gz
+ done
+}
+
+echo "Doing basic test of different sizes ..."
+test_sizes 0
+
+echo "Running tests in parallel ..."
+for i in {1..16}
+do
+ test_sizes $i &
+done
+
+wait
+
+echo "OK"
+
+exit 0
diff --git a/tools/testing/selftests/powerpc/pmu/.gitignore b/tools/testing/selftests/powerpc/pmu/.gitignore
index e748f336eed3..f69b1e2641a1 100644
--- a/tools/testing/selftests/powerpc/pmu/.gitignore
+++ b/tools/testing/selftests/powerpc/pmu/.gitignore
@@ -1,3 +1,5 @@
+# SPDX-License-Identifier: GPL-2.0-only
count_instructions
l3_bank_test
per_event_excludes
+count_stcx_fail
diff --git a/tools/testing/selftests/powerpc/pmu/Makefile b/tools/testing/selftests/powerpc/pmu/Makefile
index 19046db995fe..904672fb78dd 100644
--- a/tools/testing/selftests/powerpc/pmu/Makefile
+++ b/tools/testing/selftests/powerpc/pmu/Makefile
@@ -2,7 +2,7 @@
noarg:
$(MAKE) -C ../
-TEST_GEN_PROGS := count_instructions l3_bank_test per_event_excludes
+TEST_GEN_PROGS := count_instructions count_stcx_fail l3_bank_test per_event_excludes
EXTRA_SOURCES := ../harness.c event.c lib.c ../utils.c
top_srcdir = ../../../../..
@@ -13,8 +13,12 @@ all: $(TEST_GEN_PROGS) ebb
$(TEST_GEN_PROGS): $(EXTRA_SOURCES)
# loop.S can only be built 64-bit
+$(OUTPUT)/count_instructions: CFLAGS += -m64
$(OUTPUT)/count_instructions: loop.S count_instructions.c $(EXTRA_SOURCES)
- $(CC) $(CFLAGS) -m64 -o $@ $^
+
+$(OUTPUT)/count_stcx_fail: CFLAGS += -m64
+$(OUTPUT)/count_stcx_fail: loop.S $(EXTRA_SOURCES)
+
$(OUTPUT)/per_event_excludes: ../utils.c
diff --git a/tools/testing/selftests/powerpc/pmu/count_stcx_fail.c b/tools/testing/selftests/powerpc/pmu/count_stcx_fail.c
new file mode 100644
index 000000000000..7b4ac4537702
--- /dev/null
+++ b/tools/testing/selftests/powerpc/pmu/count_stcx_fail.c
@@ -0,0 +1,161 @@
+/*
+ * Copyright 2013, Michael Ellerman, IBM Corp.
+ * Licensed under GPLv2.
+ */
+
+#define _GNU_SOURCE
+
+#include <stdio.h>
+#include <stdbool.h>
+#include <string.h>
+#include <sys/prctl.h>
+
+#include "event.h"
+#include "utils.h"
+#include "lib.h"
+
+extern void thirty_two_instruction_loop_with_ll_sc(u64 loops, u64 *ll_sc_target);
+
+static void setup_event(struct event *e, u64 config, int type, char *name)
+{
+ event_init_opts(e, config, type, name);
+
+ e->attr.disabled = 1;
+ e->attr.exclude_kernel = 1;
+ e->attr.exclude_hv = 1;
+ e->attr.exclude_idle = 1;
+}
+
+static int do_count_loop(struct event *events, u64 instructions,
+ u64 overhead, bool report)
+{
+ s64 difference, expected;
+ double percentage;
+ u64 dummy;
+
+ prctl(PR_TASK_PERF_EVENTS_ENABLE);
+
+ /* Run for 1M instructions */
+ thirty_two_instruction_loop_with_ll_sc(instructions >> 5, &dummy);
+
+ prctl(PR_TASK_PERF_EVENTS_DISABLE);
+
+ event_read(&events[0]);
+ event_read(&events[1]);
+ event_read(&events[2]);
+
+ expected = instructions + overhead + (events[2].result.value * 10);
+ difference = events[0].result.value - expected;
+ percentage = (double)difference / events[0].result.value * 100;
+
+ if (report) {
+ printf("-----\n");
+ event_report(&events[0]);
+ event_report(&events[1]);
+ event_report(&events[2]);
+
+ printf("Looped for %llu instructions, overhead %llu\n", instructions, overhead);
+ printf("Expected %llu\n", expected);
+ printf("Actual %llu\n", events[0].result.value);
+ printf("Delta %lld, %f%%\n", difference, percentage);
+ }
+
+ event_reset(&events[0]);
+ event_reset(&events[1]);
+ event_reset(&events[2]);
+
+ if (difference < 0)
+ difference = -difference;
+
+ /* Tolerate a difference below 0.0001 % */
+ difference *= 10000 * 100;
+ if (difference / events[0].result.value)
+ return -1;
+
+ return 0;
+}
+
+/* Count how many instructions it takes to do a null loop */
+static u64 determine_overhead(struct event *events)
+{
+ u64 current, overhead;
+ int i;
+
+ do_count_loop(events, 0, 0, false);
+ overhead = events[0].result.value;
+
+ for (i = 0; i < 100; i++) {
+ do_count_loop(events, 0, 0, false);
+ current = events[0].result.value;
+ if (current < overhead) {
+ printf("Replacing overhead %llu with %llu\n", overhead, current);
+ overhead = current;
+ }
+ }
+
+ return overhead;
+}
+
+#define PM_MRK_STCX_FAIL 0x03e158
+#define PM_STCX_FAIL 0x01e058
+
+static int test_body(void)
+{
+ struct event events[3];
+ u64 overhead;
+
+ setup_event(&events[0], PERF_COUNT_HW_INSTRUCTIONS, PERF_TYPE_HARDWARE, "instructions");
+ setup_event(&events[1], PERF_COUNT_HW_CPU_CYCLES, PERF_TYPE_HARDWARE, "cycles");
+ setup_event(&events[2], PM_STCX_FAIL, PERF_TYPE_RAW, "stcx_fail");
+
+ if (event_open(&events[0])) {
+ perror("perf_event_open");
+ return -1;
+ }
+
+ if (event_open_with_group(&events[1], events[0].fd)) {
+ perror("perf_event_open");
+ return -1;
+ }
+
+ if (event_open_with_group(&events[2], events[0].fd)) {
+ perror("perf_event_open");
+ return -1;
+ }
+
+ overhead = determine_overhead(events);
+ printf("Overhead of null loop: %llu instructions\n", overhead);
+
+ /* Run for 1Mi instructions */
+ FAIL_IF(do_count_loop(events, 1000000, overhead, true));
+
+ /* Run for 10Mi instructions */
+ FAIL_IF(do_count_loop(events, 10000000, overhead, true));
+
+ /* Run for 100Mi instructions */
+ FAIL_IF(do_count_loop(events, 100000000, overhead, true));
+
+ /* Run for 1Bi instructions */
+ FAIL_IF(do_count_loop(events, 1000000000, overhead, true));
+
+ /* Run for 16Bi instructions */
+ FAIL_IF(do_count_loop(events, 16000000000, overhead, true));
+
+ /* Run for 64Bi instructions */
+ FAIL_IF(do_count_loop(events, 64000000000, overhead, true));
+
+ event_close(&events[0]);
+ event_close(&events[1]);
+
+ return 0;
+}
+
+static int count_ll_sc(void)
+{
+ return eat_cpu(test_body);
+}
+
+int main(void)
+{
+ return test_harness(count_ll_sc, "count_ll_sc");
+}
diff --git a/tools/testing/selftests/powerpc/pmu/ebb/.gitignore b/tools/testing/selftests/powerpc/pmu/ebb/.gitignore
index 42bddbed8b64..2920fb39439b 100644
--- a/tools/testing/selftests/powerpc/pmu/ebb/.gitignore
+++ b/tools/testing/selftests/powerpc/pmu/ebb/.gitignore
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
reg_access_test
event_attributes_test
cycles_test
diff --git a/tools/testing/selftests/powerpc/pmu/ebb/Makefile b/tools/testing/selftests/powerpc/pmu/ebb/Makefile
index 417306353e07..ca35dd8848b0 100644
--- a/tools/testing/selftests/powerpc/pmu/ebb/Makefile
+++ b/tools/testing/selftests/powerpc/pmu/ebb/Makefile
@@ -7,6 +7,7 @@ noarg:
# The EBB handler is 64-bit code and everything links against it
CFLAGS += -m64
+TMPOUT = $(OUTPUT)/
# Toolchains may build PIE by default which breaks the assembly
no-pie-option := $(call try-run, echo 'int main() { return 0; }' | \
$(CC) -Werror $(KBUILD_CPPFLAGS) $(CC_OPTION_CFLAGS) -no-pie -x c - -o "$$TMP", -no-pie)
diff --git a/tools/testing/selftests/powerpc/pmu/ebb/trace.h b/tools/testing/selftests/powerpc/pmu/ebb/trace.h
index 7c0fb5d2bdb1..da2a3be5441f 100644
--- a/tools/testing/selftests/powerpc/pmu/ebb/trace.h
+++ b/tools/testing/selftests/powerpc/pmu/ebb/trace.h
@@ -18,7 +18,7 @@ struct trace_entry
{
u8 type;
u8 length;
- u8 data[0];
+ u8 data[];
};
struct trace_buffer
@@ -26,7 +26,7 @@ struct trace_buffer
u64 size;
bool overflow;
void *tail;
- u8 data[0];
+ u8 data[];
};
struct trace_buffer *trace_buffer_allocate(u64 size);
diff --git a/tools/testing/selftests/powerpc/pmu/loop.S b/tools/testing/selftests/powerpc/pmu/loop.S
index 8cc9b5e2c9de..c52ba09b6fed 100644
--- a/tools/testing/selftests/powerpc/pmu/loop.S
+++ b/tools/testing/selftests/powerpc/pmu/loop.S
@@ -41,3 +41,38 @@ FUNC_START(thirty_two_instruction_loop)
subi r3,r3,1
b FUNC_NAME(thirty_two_instruction_loop)
FUNC_END(thirty_two_instruction_loop)
+
+FUNC_START(thirty_two_instruction_loop_with_ll_sc)
+ cmpdi r3,0
+ beqlr
+ addi r5,r5,1
+ addi r5,r5,1
+ addi r5,r5,1 # 5
+ addi r5,r5,1
+ addi r5,r5,1
+ addi r5,r5,1
+ addi r5,r5,1
+1: ldarx r6,0,r4 # 10
+ addi r5,r5,1
+ addi r5,r5,1
+ addi r5,r5,1
+ addi r5,r5,1
+ addi r5,r5,1 # 15
+ addi r5,r5,1
+ addi r5,r5,1
+ stdcx. r6,0,r4
+ bne- 1b
+ addi r5,r5,1 # 20
+ addi r5,r5,1
+ addi r5,r5,1
+ addi r5,r5,1
+ addi r5,r5,1
+ addi r5,r5,1 # 25
+ addi r5,r5,1
+ addi r5,r5,1
+ addi r5,r5,1
+ addi r5,r5,1
+ addi r5,r5,1 # 30
+ subi r3,r3,1
+ b FUNC_NAME(thirty_two_instruction_loop_with_ll_sc)
+FUNC_END(thirty_two_instruction_loop_with_ll_sc)
diff --git a/tools/testing/selftests/powerpc/primitives/.gitignore b/tools/testing/selftests/powerpc/primitives/.gitignore
index 4cc4e31bed1d..1e5c04e24254 100644
--- a/tools/testing/selftests/powerpc/primitives/.gitignore
+++ b/tools/testing/selftests/powerpc/primitives/.gitignore
@@ -1 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0-only
load_unaligned_zeropad
diff --git a/tools/testing/selftests/powerpc/ptrace/.gitignore b/tools/testing/selftests/powerpc/ptrace/.gitignore
index dce19f221c46..0e96150b7c7e 100644
--- a/tools/testing/selftests/powerpc/ptrace/.gitignore
+++ b/tools/testing/selftests/powerpc/ptrace/.gitignore
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
ptrace-gpr
ptrace-tm-gpr
ptrace-tm-spd-gpr
diff --git a/tools/testing/selftests/powerpc/security/.gitignore b/tools/testing/selftests/powerpc/security/.gitignore
index 0b969fba3beb..f795e06f5ae3 100644
--- a/tools/testing/selftests/powerpc/security/.gitignore
+++ b/tools/testing/selftests/powerpc/security/.gitignore
@@ -1 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0-only
rfi_flush
diff --git a/tools/testing/selftests/powerpc/signal/.gitignore b/tools/testing/selftests/powerpc/signal/.gitignore
index dca5852a1546..405b5364044c 100644
--- a/tools/testing/selftests/powerpc/signal/.gitignore
+++ b/tools/testing/selftests/powerpc/signal/.gitignore
@@ -1,3 +1,5 @@
+# SPDX-License-Identifier: GPL-2.0-only
signal
signal_tm
sigfuz
+sigreturn_vdso
diff --git a/tools/testing/selftests/powerpc/signal/Makefile b/tools/testing/selftests/powerpc/signal/Makefile
index 113838fbbe7f..d6ae54663aed 100644
--- a/tools/testing/selftests/powerpc/signal/Makefile
+++ b/tools/testing/selftests/powerpc/signal/Makefile
@@ -1,10 +1,12 @@
# SPDX-License-Identifier: GPL-2.0
-TEST_GEN_PROGS := signal signal_tm sigfuz
+TEST_GEN_PROGS := signal signal_tm sigfuz sigreturn_vdso sig_sc_double_restart
CFLAGS += -maltivec
$(OUTPUT)/signal_tm: CFLAGS += -mhtm
$(OUTPUT)/sigfuz: CFLAGS += -pthread -m64
+TEST_FILES := settings
+
top_srcdir = ../../../../..
include ../../lib.mk
diff --git a/tools/testing/selftests/powerpc/signal/settings b/tools/testing/selftests/powerpc/signal/settings
new file mode 100644
index 000000000000..e7b9417537fb
--- /dev/null
+++ b/tools/testing/selftests/powerpc/signal/settings
@@ -0,0 +1 @@
+timeout=0
diff --git a/tools/testing/selftests/powerpc/signal/sig_sc_double_restart.c b/tools/testing/selftests/powerpc/signal/sig_sc_double_restart.c
new file mode 100644
index 000000000000..e3972264615b
--- /dev/null
+++ b/tools/testing/selftests/powerpc/signal/sig_sc_double_restart.c
@@ -0,0 +1,174 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Test that a syscall does not get restarted twice, handled by trap_norestart()
+ *
+ * Based on Al's description, and a test for the bug fixed in this commit:
+ *
+ * commit 9a81c16b527528ad307843be5571111aa8d35a80
+ * Author: Al Viro <viro@zeniv.linux.org.uk>
+ * Date: Mon Sep 20 21:48:57 2010 +0100
+ *
+ * powerpc: fix double syscall restarts
+ *
+ * Make sigreturn zero regs->trap, make do_signal() do the same on all
+ * paths. As it is, signal interrupting e.g. read() from fd 512 (==
+ * ERESTARTSYS) with another signal getting unblocked when the first
+ * handler finishes will lead to restart one insn earlier than it ought
+ * to. Same for multiple signals with in-kernel handlers interrupting
+ * that sucker at the same time. Same for multiple signals of any kind
+ * interrupting that sucker on 64bit...
+ */
+#define _GNU_SOURCE
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <sys/syscall.h>
+#include <unistd.h>
+#include <signal.h>
+#include <errno.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+
+#include "utils.h"
+
+static void SIGUSR1_handler(int sig)
+{
+ kill(getpid(), SIGUSR2);
+ /*
+ * SIGUSR2 is blocked until the handler exits, at which point it will
+ * be raised again and think there is a restart to be done because the
+ * pending restarted syscall has 512 (ERESTARTSYS) in r3. The second
+ * restart will retreat NIP another 4 bytes to fail case branch.
+ */
+}
+
+static void SIGUSR2_handler(int sig)
+{
+}
+
+static ssize_t raw_read(int fd, void *buf, size_t count)
+{
+ register long nr asm("r0") = __NR_read;
+ register long _fd asm("r3") = fd;
+ register void *_buf asm("r4") = buf;
+ register size_t _count asm("r5") = count;
+
+ asm volatile(
+" b 0f \n"
+" b 1f \n"
+" 0: sc 0 \n"
+" bns 2f \n"
+" neg %0,%0 \n"
+" b 2f \n"
+" 1: \n"
+" li %0,%4 \n"
+" 2: \n"
+ : "+r"(_fd), "+r"(nr), "+r"(_buf), "+r"(_count)
+ : "i"(-ENOANO)
+ : "memory", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "ctr", "cr0");
+
+ if (_fd < 0) {
+ errno = -_fd;
+ _fd = -1;
+ }
+
+ return _fd;
+}
+
+#define DATA "test 123"
+#define DLEN (strlen(DATA)+1)
+
+int test_restart(void)
+{
+ int pipefd[2];
+ pid_t pid;
+ char buf[512];
+
+ if (pipe(pipefd) == -1) {
+ perror("pipe");
+ exit(EXIT_FAILURE);
+ }
+
+ pid = fork();
+ if (pid == -1) {
+ perror("fork");
+ exit(EXIT_FAILURE);
+ }
+
+ if (pid == 0) { /* Child reads from pipe */
+ struct sigaction act;
+ int fd;
+
+ memset(&act, 0, sizeof(act));
+ sigaddset(&act.sa_mask, SIGUSR2);
+ act.sa_handler = SIGUSR1_handler;
+ act.sa_flags = SA_RESTART;
+ if (sigaction(SIGUSR1, &act, NULL) == -1) {
+ perror("sigaction");
+ exit(EXIT_FAILURE);
+ }
+
+ memset(&act, 0, sizeof(act));
+ act.sa_handler = SIGUSR2_handler;
+ act.sa_flags = SA_RESTART;
+ if (sigaction(SIGUSR2, &act, NULL) == -1) {
+ perror("sigaction");
+ exit(EXIT_FAILURE);
+ }
+
+ /* Let's get ERESTARTSYS into r3 */
+ while ((fd = dup(pipefd[0])) != 512) {
+ if (fd == -1) {
+ perror("dup");
+ exit(EXIT_FAILURE);
+ }
+ }
+
+ if (raw_read(fd, buf, 512) == -1) {
+ if (errno == ENOANO) {
+ fprintf(stderr, "Double restart moved restart before sc instruction.\n");
+ _exit(EXIT_FAILURE);
+ }
+ perror("read");
+ exit(EXIT_FAILURE);
+ }
+
+ if (strncmp(buf, DATA, DLEN)) {
+ fprintf(stderr, "bad test string %s\n", buf);
+ exit(EXIT_FAILURE);
+ }
+
+ return 0;
+
+ } else {
+ int wstatus;
+
+ usleep(100000); /* Hack to get reader waiting */
+ kill(pid, SIGUSR1);
+ usleep(100000);
+ if (write(pipefd[1], DATA, DLEN) != DLEN) {
+ perror("write");
+ exit(EXIT_FAILURE);
+ }
+ close(pipefd[0]);
+ close(pipefd[1]);
+ if (wait(&wstatus) == -1) {
+ perror("wait");
+ exit(EXIT_FAILURE);
+ }
+ if (!WIFEXITED(wstatus)) {
+ fprintf(stderr, "child exited abnormally\n");
+ exit(EXIT_FAILURE);
+ }
+
+ FAIL_IF(WEXITSTATUS(wstatus) != EXIT_SUCCESS);
+
+ return 0;
+ }
+}
+
+int main(void)
+{
+ test_harness_set_timeout(10);
+ return test_harness(test_restart, "sig sys restart");
+}
diff --git a/tools/testing/selftests/powerpc/signal/sigreturn_vdso.c b/tools/testing/selftests/powerpc/signal/sigreturn_vdso.c
new file mode 100644
index 000000000000..e282fff0fe25
--- /dev/null
+++ b/tools/testing/selftests/powerpc/signal/sigreturn_vdso.c
@@ -0,0 +1,127 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Test that we can take signals with and without the VDSO mapped, which trigger
+ * different paths in the signal handling code.
+ *
+ * See handle_rt_signal64() and setup_trampoline() in signal_64.c
+ */
+
+#define _GNU_SOURCE
+
+#include <errno.h>
+#include <stdio.h>
+#include <signal.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/mman.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+// Ensure assert() is not compiled out
+#undef NDEBUG
+#include <assert.h>
+
+#include "utils.h"
+
+static int search_proc_maps(char *needle, unsigned long *low, unsigned long *high)
+{
+ unsigned long start, end;
+ static char buf[4096];
+ char name[128];
+ FILE *f;
+ int rc = -1;
+
+ f = fopen("/proc/self/maps", "r");
+ if (!f) {
+ perror("fopen");
+ return -1;
+ }
+
+ while (fgets(buf, sizeof(buf), f)) {
+ rc = sscanf(buf, "%lx-%lx %*c%*c%*c%*c %*x %*d:%*d %*d %127s\n",
+ &start, &end, name);
+ if (rc == 2)
+ continue;
+
+ if (rc != 3) {
+ printf("sscanf errored\n");
+ rc = -1;
+ break;
+ }
+
+ if (strstr(name, needle)) {
+ *low = start;
+ *high = end - 1;
+ rc = 0;
+ break;
+ }
+ }
+
+ fclose(f);
+
+ return rc;
+}
+
+static volatile sig_atomic_t took_signal = 0;
+
+static void sigusr1_handler(int sig)
+{
+ took_signal++;
+}
+
+int test_sigreturn_vdso(void)
+{
+ unsigned long low, high, size;
+ struct sigaction act;
+ char *p;
+
+ act.sa_handler = sigusr1_handler;
+ act.sa_flags = 0;
+ sigemptyset(&act.sa_mask);
+
+ assert(sigaction(SIGUSR1, &act, NULL) == 0);
+
+ // Confirm the VDSO is mapped, and work out where it is
+ assert(search_proc_maps("[vdso]", &low, &high) == 0);
+ size = high - low + 1;
+ printf("VDSO is at 0x%lx-0x%lx (%lu bytes)\n", low, high, size);
+
+ kill(getpid(), SIGUSR1);
+ assert(took_signal == 1);
+ printf("Signal delivered OK with VDSO mapped\n");
+
+ // Remap the VDSO somewhere else
+ p = mmap(NULL, size, PROT_READ|PROT_WRITE, MAP_ANONYMOUS|MAP_PRIVATE, -1, 0);
+ assert(p != MAP_FAILED);
+ assert(mremap((void *)low, size, size, MREMAP_MAYMOVE|MREMAP_FIXED, p) != MAP_FAILED);
+ assert(search_proc_maps("[vdso]", &low, &high) == 0);
+ size = high - low + 1;
+ printf("VDSO moved to 0x%lx-0x%lx (%lu bytes)\n", low, high, size);
+
+ kill(getpid(), SIGUSR1);
+ assert(took_signal == 2);
+ printf("Signal delivered OK with VDSO moved\n");
+
+ assert(munmap((void *)low, size) == 0);
+ printf("Unmapped VDSO\n");
+
+ // Confirm the VDSO is not mapped anymore
+ assert(search_proc_maps("[vdso]", &low, &high) != 0);
+
+ // Make the stack executable
+ assert(search_proc_maps("[stack]", &low, &high) == 0);
+ size = high - low + 1;
+ mprotect((void *)low, size, PROT_READ|PROT_WRITE|PROT_EXEC);
+ printf("Remapped the stack executable\n");
+
+ kill(getpid(), SIGUSR1);
+ assert(took_signal == 3);
+ printf("Signal delivered OK with VDSO unmapped\n");
+
+ return 0;
+}
+
+int main(void)
+{
+ return test_harness(test_sigreturn_vdso, "sigreturn_vdso");
+}
diff --git a/tools/testing/selftests/powerpc/stringloops/.gitignore b/tools/testing/selftests/powerpc/stringloops/.gitignore
index 31a17e0ba884..b0dfc74aa57e 100644
--- a/tools/testing/selftests/powerpc/stringloops/.gitignore
+++ b/tools/testing/selftests/powerpc/stringloops/.gitignore
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
memcmp_64
memcmp_32
strlen
diff --git a/tools/testing/selftests/powerpc/switch_endian/.gitignore b/tools/testing/selftests/powerpc/switch_endian/.gitignore
index 89e762eab676..30e962cf84d1 100644
--- a/tools/testing/selftests/powerpc/switch_endian/.gitignore
+++ b/tools/testing/selftests/powerpc/switch_endian/.gitignore
@@ -1,2 +1,3 @@
+# SPDX-License-Identifier: GPL-2.0-only
switch_endian_test
check-reversed.S
diff --git a/tools/testing/selftests/powerpc/syscalls/.gitignore b/tools/testing/selftests/powerpc/syscalls/.gitignore
index f0f3fcc9d802..b00cab225476 100644
--- a/tools/testing/selftests/powerpc/syscalls/.gitignore
+++ b/tools/testing/selftests/powerpc/syscalls/.gitignore
@@ -1 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0-only
ipc_unmuxed
diff --git a/tools/testing/selftests/powerpc/tm/.gitignore b/tools/testing/selftests/powerpc/tm/.gitignore
index 98f2708d86cc..d8900a0c47a1 100644
--- a/tools/testing/selftests/powerpc/tm/.gitignore
+++ b/tools/testing/selftests/powerpc/tm/.gitignore
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
tm-resched-dscr
tm-syscall
tm-signal-msr-resv
@@ -13,6 +14,7 @@ tm-signal-context-chk-vmx
tm-signal-context-chk-vsx
tm-signal-context-force-tm
tm-signal-sigreturn-nt
+tm-signal-pagefault
tm-vmx-unavail
tm-unavailable
tm-trap
diff --git a/tools/testing/selftests/powerpc/tm/Makefile b/tools/testing/selftests/powerpc/tm/Makefile
index b15a1a325bd0..5881e97c73c1 100644
--- a/tools/testing/selftests/powerpc/tm/Makefile
+++ b/tools/testing/selftests/powerpc/tm/Makefile
@@ -5,7 +5,9 @@ SIGNAL_CONTEXT_CHK_TESTS := tm-signal-context-chk-gpr tm-signal-context-chk-fpu
TEST_GEN_PROGS := tm-resched-dscr tm-syscall tm-signal-msr-resv tm-signal-stack \
tm-vmxcopy tm-fork tm-tar tm-tmspr tm-vmx-unavail tm-unavailable tm-trap \
$(SIGNAL_CONTEXT_CHK_TESTS) tm-sigreturn tm-signal-sigreturn-nt \
- tm-signal-context-force-tm tm-poison
+ tm-signal-context-force-tm tm-poison tm-signal-pagefault
+
+TEST_FILES := settings
top_srcdir = ../../../../..
include ../../lib.mk
@@ -22,6 +24,8 @@ $(OUTPUT)/tm-resched-dscr: ../pmu/lib.c
$(OUTPUT)/tm-unavailable: CFLAGS += -O0 -pthread -m64 -Wno-error=uninitialized -mvsx
$(OUTPUT)/tm-trap: CFLAGS += -O0 -pthread -m64
$(OUTPUT)/tm-signal-context-force-tm: CFLAGS += -pthread -m64
+$(OUTPUT)/tm-signal-pagefault: CFLAGS += -pthread -m64
+$(OUTPUT)/tm-poison: CFLAGS += -m64
SIGNAL_CONTEXT_CHK_TESTS := $(patsubst %,$(OUTPUT)/%,$(SIGNAL_CONTEXT_CHK_TESTS))
$(SIGNAL_CONTEXT_CHK_TESTS): tm-signal.S
diff --git a/tools/testing/selftests/powerpc/tm/settings b/tools/testing/selftests/powerpc/tm/settings
new file mode 100644
index 000000000000..e7b9417537fb
--- /dev/null
+++ b/tools/testing/selftests/powerpc/tm/settings
@@ -0,0 +1 @@
+timeout=0
diff --git a/tools/testing/selftests/powerpc/tm/tm-signal-context-force-tm.c b/tools/testing/selftests/powerpc/tm/tm-signal-context-force-tm.c
index 31717625f318..421cb082f6be 100644
--- a/tools/testing/selftests/powerpc/tm/tm-signal-context-force-tm.c
+++ b/tools/testing/selftests/powerpc/tm/tm-signal-context-force-tm.c
@@ -42,9 +42,10 @@
#endif
/* Setting contexts because the test will crash and we want to recover */
-ucontext_t init_context, main_context;
+ucontext_t init_context;
-static int count, first_time;
+/* count is changed in the signal handler, so it must be volatile */
+static volatile int count;
void usr_signal_handler(int signo, siginfo_t *si, void *uc)
{
@@ -98,11 +99,6 @@ void usr_signal_handler(int signo, siginfo_t *si, void *uc)
void seg_signal_handler(int signo, siginfo_t *si, void *uc)
{
- if (count == COUNT_MAX) {
- /* Return to tm_signal_force_msr() and exit */
- setcontext(&main_context);
- }
-
count++;
/* Reexecute the test */
@@ -126,37 +122,41 @@ void tm_trap_test(void)
*/
getcontext(&init_context);
- /* Allocated an alternative signal stack area */
- ss.ss_sp = mmap(NULL, SIGSTKSZ, PROT_READ | PROT_WRITE,
- MAP_PRIVATE | MAP_ANONYMOUS, 0, 0);
- ss.ss_size = SIGSTKSZ;
- ss.ss_flags = 0;
+ while (count < COUNT_MAX) {
+ /* Allocated an alternative signal stack area */
+ ss.ss_sp = mmap(NULL, SIGSTKSZ, PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS, 0, 0);
+ ss.ss_size = SIGSTKSZ;
+ ss.ss_flags = 0;
- if (ss.ss_sp == (void *)-1) {
- perror("mmap error\n");
- exit(-1);
- }
+ if (ss.ss_sp == (void *)-1) {
+ perror("mmap error\n");
+ exit(-1);
+ }
- /* Force the allocation through a page fault */
- if (madvise(ss.ss_sp, SIGSTKSZ, MADV_DONTNEED)) {
- perror("madvise\n");
- exit(-1);
- }
+ /* Force the allocation through a page fault */
+ if (madvise(ss.ss_sp, SIGSTKSZ, MADV_DONTNEED)) {
+ perror("madvise\n");
+ exit(-1);
+ }
- /* Setting an alternative stack to generate a page fault when
- * the signal is raised.
- */
- if (sigaltstack(&ss, NULL)) {
- perror("sigaltstack\n");
- exit(-1);
+ /*
+ * Setting an alternative stack to generate a page fault when
+ * the signal is raised.
+ */
+ if (sigaltstack(&ss, NULL)) {
+ perror("sigaltstack\n");
+ exit(-1);
+ }
+
+ /* The signal handler will enable MSR_TS */
+ sigaction(SIGUSR1, &usr_sa, NULL);
+ /* If it does not crash, it might segfault, avoid it to retest */
+ sigaction(SIGSEGV, &seg_sa, NULL);
+
+ raise(SIGUSR1);
+ count++;
}
-
- /* The signal handler will enable MSR_TS */
- sigaction(SIGUSR1, &usr_sa, NULL);
- /* If it does not crash, it will segfault, avoid it to retest */
- sigaction(SIGSEGV, &seg_sa, NULL);
-
- raise(SIGUSR1);
}
int tm_signal_context_force_tm(void)
@@ -169,11 +169,7 @@ int tm_signal_context_force_tm(void)
*/
SKIP_IF(!is_ppc64le());
- /* Will get back here after COUNT_MAX interactions */
- getcontext(&main_context);
-
- if (!first_time++)
- tm_trap_test();
+ tm_trap_test();
return EXIT_SUCCESS;
}
diff --git a/tools/testing/selftests/powerpc/tm/tm-signal-pagefault.c b/tools/testing/selftests/powerpc/tm/tm-signal-pagefault.c
new file mode 100644
index 000000000000..5908bc6abe60
--- /dev/null
+++ b/tools/testing/selftests/powerpc/tm/tm-signal-pagefault.c
@@ -0,0 +1,284 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright 2020, Gustavo Luiz Duarte, IBM Corp.
+ *
+ * This test starts a transaction and triggers a signal, forcing a pagefault to
+ * happen when the kernel signal handling code touches the user signal stack.
+ *
+ * In order to avoid pre-faulting the signal stack memory and to force the
+ * pagefault to happen precisely in the kernel signal handling code, the
+ * pagefault handling is done in userspace using the userfaultfd facility.
+ *
+ * Further pagefaults are triggered by crafting the signal handler's ucontext
+ * to point to additional memory regions managed by the userfaultfd, so using
+ * the same mechanism used to avoid pre-faulting the signal stack memory.
+ *
+ * On failure (bug is present) kernel crashes or never returns control back to
+ * userspace. If bug is not present, tests completes almost immediately.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <linux/userfaultfd.h>
+#include <poll.h>
+#include <unistd.h>
+#include <sys/ioctl.h>
+#include <sys/syscall.h>
+#include <fcntl.h>
+#include <sys/mman.h>
+#include <pthread.h>
+#include <signal.h>
+#include <errno.h>
+
+#include "tm.h"
+
+
+#define UF_MEM_SIZE 655360 /* 10 x 64k pages */
+
+/* Memory handled by userfaultfd */
+static char *uf_mem;
+static size_t uf_mem_offset = 0;
+
+/*
+ * Data that will be copied into the faulting pages (instead of zero-filled
+ * pages). This is used to make the test more reliable and avoid segfaulting
+ * when we return from the signal handler. Since we are making the signal
+ * handler's ucontext point to newly allocated memory, when that memory is
+ * paged-in it will contain the expected content.
+ */
+static char backing_mem[UF_MEM_SIZE];
+
+static size_t pagesize;
+
+/*
+ * Return a chunk of at least 'size' bytes of memory that will be handled by
+ * userfaultfd. If 'backing_data' is not NULL, its content will be save to
+ * 'backing_mem' and then copied into the faulting pages when the page fault
+ * is handled.
+ */
+void *get_uf_mem(size_t size, void *backing_data)
+{
+ void *ret;
+
+ if (uf_mem_offset + size > UF_MEM_SIZE) {
+ fprintf(stderr, "Requesting more uf_mem than expected!\n");
+ exit(EXIT_FAILURE);
+ }
+
+ ret = &uf_mem[uf_mem_offset];
+
+ /* Save the data that will be copied into the faulting page */
+ if (backing_data != NULL)
+ memcpy(&backing_mem[uf_mem_offset], backing_data, size);
+
+ /* Reserve the requested amount of uf_mem */
+ uf_mem_offset += size;
+ /* Keep uf_mem_offset aligned to the page size (round up) */
+ uf_mem_offset = (uf_mem_offset + pagesize - 1) & ~(pagesize - 1);
+
+ return ret;
+}
+
+void *fault_handler_thread(void *arg)
+{
+ struct uffd_msg msg; /* Data read from userfaultfd */
+ long uffd; /* userfaultfd file descriptor */
+ struct uffdio_copy uffdio_copy;
+ struct pollfd pollfd;
+ ssize_t nread, offset;
+
+ uffd = (long) arg;
+
+ for (;;) {
+ pollfd.fd = uffd;
+ pollfd.events = POLLIN;
+ if (poll(&pollfd, 1, -1) == -1) {
+ perror("poll() failed");
+ exit(EXIT_FAILURE);
+ }
+
+ nread = read(uffd, &msg, sizeof(msg));
+ if (nread == 0) {
+ fprintf(stderr, "read(): EOF on userfaultfd\n");
+ exit(EXIT_FAILURE);
+ }
+
+ if (nread == -1) {
+ perror("read() failed");
+ exit(EXIT_FAILURE);
+ }
+
+ /* We expect only one kind of event */
+ if (msg.event != UFFD_EVENT_PAGEFAULT) {
+ fprintf(stderr, "Unexpected event on userfaultfd\n");
+ exit(EXIT_FAILURE);
+ }
+
+ /*
+ * We need to handle page faults in units of pages(!).
+ * So, round faulting address down to page boundary.
+ */
+ uffdio_copy.dst = msg.arg.pagefault.address & ~(pagesize-1);
+
+ offset = (char *) uffdio_copy.dst - uf_mem;
+ uffdio_copy.src = (unsigned long) &backing_mem[offset];
+
+ uffdio_copy.len = pagesize;
+ uffdio_copy.mode = 0;
+ uffdio_copy.copy = 0;
+ if (ioctl(uffd, UFFDIO_COPY, &uffdio_copy) == -1) {
+ perror("ioctl-UFFDIO_COPY failed");
+ exit(EXIT_FAILURE);
+ }
+ }
+}
+
+void setup_uf_mem(void)
+{
+ long uffd; /* userfaultfd file descriptor */
+ pthread_t thr;
+ struct uffdio_api uffdio_api;
+ struct uffdio_register uffdio_register;
+ int ret;
+
+ pagesize = sysconf(_SC_PAGE_SIZE);
+
+ /* Create and enable userfaultfd object */
+ uffd = syscall(__NR_userfaultfd, O_CLOEXEC | O_NONBLOCK);
+ if (uffd == -1) {
+ perror("userfaultfd() failed");
+ exit(EXIT_FAILURE);
+ }
+ uffdio_api.api = UFFD_API;
+ uffdio_api.features = 0;
+ if (ioctl(uffd, UFFDIO_API, &uffdio_api) == -1) {
+ perror("ioctl-UFFDIO_API failed");
+ exit(EXIT_FAILURE);
+ }
+
+ /*
+ * Create a private anonymous mapping. The memory will be demand-zero
+ * paged, that is, not yet allocated. When we actually touch the memory
+ * the related page will be allocated via the userfaultfd mechanism.
+ */
+ uf_mem = mmap(NULL, UF_MEM_SIZE, PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+ if (uf_mem == MAP_FAILED) {
+ perror("mmap() failed");
+ exit(EXIT_FAILURE);
+ }
+
+ /*
+ * Register the memory range of the mapping we've just mapped to be
+ * handled by the userfaultfd object. In 'mode' we request to track
+ * missing pages (i.e. pages that have not yet been faulted-in).
+ */
+ uffdio_register.range.start = (unsigned long) uf_mem;
+ uffdio_register.range.len = UF_MEM_SIZE;
+ uffdio_register.mode = UFFDIO_REGISTER_MODE_MISSING;
+ if (ioctl(uffd, UFFDIO_REGISTER, &uffdio_register) == -1) {
+ perror("ioctl-UFFDIO_REGISTER");
+ exit(EXIT_FAILURE);
+ }
+
+ /* Create a thread that will process the userfaultfd events */
+ ret = pthread_create(&thr, NULL, fault_handler_thread, (void *) uffd);
+ if (ret != 0) {
+ fprintf(stderr, "pthread_create(): Error. Returned %d\n", ret);
+ exit(EXIT_FAILURE);
+ }
+}
+
+/*
+ * Assumption: the signal was delivered while userspace was in transactional or
+ * suspended state, i.e. uc->uc_link != NULL.
+ */
+void signal_handler(int signo, siginfo_t *si, void *uc)
+{
+ ucontext_t *ucp = uc;
+
+ /* Skip 'trap' after returning, otherwise we get a SIGTRAP again */
+ ucp->uc_link->uc_mcontext.regs->nip += 4;
+
+ ucp->uc_mcontext.v_regs =
+ get_uf_mem(sizeof(elf_vrreg_t), ucp->uc_mcontext.v_regs);
+
+ ucp->uc_link->uc_mcontext.v_regs =
+ get_uf_mem(sizeof(elf_vrreg_t), ucp->uc_link->uc_mcontext.v_regs);
+
+ ucp->uc_link = get_uf_mem(sizeof(ucontext_t), ucp->uc_link);
+}
+
+bool have_userfaultfd(void)
+{
+ long rc;
+
+ errno = 0;
+ rc = syscall(__NR_userfaultfd, -1);
+
+ return rc == 0 || errno != ENOSYS;
+}
+
+int tm_signal_pagefault(void)
+{
+ struct sigaction sa;
+ stack_t ss;
+
+ SKIP_IF(!have_htm());
+ SKIP_IF(!have_userfaultfd());
+
+ setup_uf_mem();
+
+ /*
+ * Set an alternative stack that will generate a page fault when the
+ * signal is raised. The page fault will be treated via userfaultfd,
+ * i.e. via fault_handler_thread.
+ */
+ ss.ss_sp = get_uf_mem(SIGSTKSZ, NULL);
+ ss.ss_size = SIGSTKSZ;
+ ss.ss_flags = 0;
+ if (sigaltstack(&ss, NULL) == -1) {
+ perror("sigaltstack() failed");
+ exit(EXIT_FAILURE);
+ }
+
+ sa.sa_flags = SA_SIGINFO | SA_ONSTACK;
+ sa.sa_sigaction = signal_handler;
+ if (sigaction(SIGTRAP, &sa, NULL) == -1) {
+ perror("sigaction() failed");
+ exit(EXIT_FAILURE);
+ }
+
+ /* Trigger a SIGTRAP in transactional state */
+ asm __volatile__(
+ "tbegin.;"
+ "beq 1f;"
+ "trap;"
+ "1: ;"
+ : : : "memory");
+
+ /* Trigger a SIGTRAP in suspended state */
+ asm __volatile__(
+ "tbegin.;"
+ "beq 1f;"
+ "tsuspend.;"
+ "trap;"
+ "tresume.;"
+ "1: ;"
+ : : : "memory");
+
+ return EXIT_SUCCESS;
+}
+
+int main(int argc, char **argv)
+{
+ /*
+ * Depending on kernel config, the TM Bad Thing might not result in a
+ * crash, instead the kernel never returns control back to userspace, so
+ * set a tight timeout. If the test passes it completes almost
+ * immediately.
+ */
+ test_harness_set_timeout(2);
+ return test_harness(tm_signal_pagefault, "tm_signal_pagefault");
+}
diff --git a/tools/testing/selftests/powerpc/vphn/.gitignore b/tools/testing/selftests/powerpc/vphn/.gitignore
index 7c04395010cb..b744aedfd1f2 100644
--- a/tools/testing/selftests/powerpc/vphn/.gitignore
+++ b/tools/testing/selftests/powerpc/vphn/.gitignore
@@ -1 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0-only
test-vphn
diff --git a/tools/testing/selftests/prctl/.gitignore b/tools/testing/selftests/prctl/.gitignore
index 0b5c27447bf6..91af2b631bc9 100644
--- a/tools/testing/selftests/prctl/.gitignore
+++ b/tools/testing/selftests/prctl/.gitignore
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
disable-tsc-ctxt-sw-stress-test
disable-tsc-on-off-stress-test
disable-tsc-test
diff --git a/tools/testing/selftests/proc/.gitignore b/tools/testing/selftests/proc/.gitignore
index 66fab4c58ed4..bed4b5318a86 100644
--- a/tools/testing/selftests/proc/.gitignore
+++ b/tools/testing/selftests/proc/.gitignore
@@ -1,7 +1,10 @@
+# SPDX-License-Identifier: GPL-2.0-only
/fd-001-lookup
/fd-002-posix-eq
/fd-003-kthread
+/proc-fsconfig-hidepid
/proc-loadavg-001
+/proc-multiple-procfs
/proc-pid-vm
/proc-self-map-files-001
/proc-self-map-files-002
diff --git a/tools/testing/selftests/proc/Makefile b/tools/testing/selftests/proc/Makefile
index a8ed0f684829..8be8a03d2973 100644
--- a/tools/testing/selftests/proc/Makefile
+++ b/tools/testing/selftests/proc/Makefile
@@ -19,5 +19,7 @@ TEST_GEN_PROGS += self
TEST_GEN_PROGS += setns-dcache
TEST_GEN_PROGS += setns-sysvipc
TEST_GEN_PROGS += thread-self
+TEST_GEN_PROGS += proc-multiple-procfs
+TEST_GEN_PROGS += proc-fsconfig-hidepid
include ../lib.mk
diff --git a/tools/testing/selftests/proc/proc-fsconfig-hidepid.c b/tools/testing/selftests/proc/proc-fsconfig-hidepid.c
new file mode 100644
index 000000000000..b9af8f537185
--- /dev/null
+++ b/tools/testing/selftests/proc/proc-fsconfig-hidepid.c
@@ -0,0 +1,50 @@
+/*
+ * Copyright © 2020 Alexey Gladkov <gladkov.alexey@gmail.com>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+#include <assert.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <errno.h>
+#include <linux/mount.h>
+#include <linux/unistd.h>
+
+static inline int fsopen(const char *fsname, unsigned int flags)
+{
+ return syscall(__NR_fsopen, fsname, flags);
+}
+
+static inline int fsconfig(int fd, unsigned int cmd, const char *key, const void *val, int aux)
+{
+ return syscall(__NR_fsconfig, fd, cmd, key, val, aux);
+}
+
+int main(void)
+{
+ int fsfd, ret;
+ int hidepid = 2;
+
+ assert((fsfd = fsopen("proc", 0)) != -1);
+
+ ret = fsconfig(fsfd, FSCONFIG_SET_BINARY, "hidepid", &hidepid, 0);
+ assert(ret == -1);
+ assert(errno == EINVAL);
+
+ assert(!fsconfig(fsfd, FSCONFIG_SET_STRING, "hidepid", "2", 0));
+ assert(!fsconfig(fsfd, FSCONFIG_SET_STRING, "hidepid", "invisible", 0));
+
+ assert(!close(fsfd));
+
+ return 0;
+}
diff --git a/tools/testing/selftests/proc/proc-multiple-procfs.c b/tools/testing/selftests/proc/proc-multiple-procfs.c
new file mode 100644
index 000000000000..ab912ad95dab
--- /dev/null
+++ b/tools/testing/selftests/proc/proc-multiple-procfs.c
@@ -0,0 +1,48 @@
+/*
+ * Copyright © 2020 Alexey Gladkov <gladkov.alexey@gmail.com>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+#include <assert.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <sys/mount.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+int main(void)
+{
+ struct stat proc_st1, proc_st2;
+ char procbuff[] = "/tmp/proc.XXXXXX/meminfo";
+ char procdir1[] = "/tmp/proc.XXXXXX";
+ char procdir2[] = "/tmp/proc.XXXXXX";
+
+ assert(mkdtemp(procdir1) != NULL);
+ assert(mkdtemp(procdir2) != NULL);
+
+ assert(!mount("proc", procdir1, "proc", 0, "hidepid=1"));
+ assert(!mount("proc", procdir2, "proc", 0, "hidepid=2"));
+
+ snprintf(procbuff, sizeof(procbuff), "%s/meminfo", procdir1);
+ assert(!stat(procbuff, &proc_st1));
+
+ snprintf(procbuff, sizeof(procbuff), "%s/meminfo", procdir2);
+ assert(!stat(procbuff, &proc_st2));
+
+ umount(procdir1);
+ umount(procdir2);
+
+ assert(proc_st1.st_dev != proc_st2.st_dev);
+
+ return 0;
+}
diff --git a/tools/testing/selftests/pstore/.gitignore b/tools/testing/selftests/pstore/.gitignore
index 5a4a26e5464b..9938fb406389 100644
--- a/tools/testing/selftests/pstore/.gitignore
+++ b/tools/testing/selftests/pstore/.gitignore
@@ -1,2 +1,3 @@
+# SPDX-License-Identifier: GPL-2.0-only
logs
*uuid
diff --git a/tools/testing/selftests/pstore/pstore_tests b/tools/testing/selftests/pstore/pstore_tests
index 1cef54458aff..2aa9a3852a84 100755
--- a/tools/testing/selftests/pstore/pstore_tests
+++ b/tools/testing/selftests/pstore/pstore_tests
@@ -10,7 +10,7 @@
. ./common_tests
prlog -n "Checking pstore console is registered ... "
-dmesg | grep -q "console \[pstore"
+dmesg | grep -Eq "console \[(pstore|${backend})"
show_result $?
prlog -n "Checking /dev/pmsg0 exists ... "
diff --git a/tools/testing/selftests/ptp/.gitignore b/tools/testing/selftests/ptp/.gitignore
index f562e49d6917..534ca26eee48 100644
--- a/tools/testing/selftests/ptp/.gitignore
+++ b/tools/testing/selftests/ptp/.gitignore
@@ -1 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0-only
testptp
diff --git a/tools/testing/selftests/ptp/testptp.c b/tools/testing/selftests/ptp/testptp.c
index c0dd10257df5..da7a9dda9490 100644
--- a/tools/testing/selftests/ptp/testptp.c
+++ b/tools/testing/selftests/ptp/testptp.c
@@ -269,14 +269,16 @@ int main(int argc, char *argv[])
" %d programmable periodic signals\n"
" %d pulse per second\n"
" %d programmable pins\n"
- " %d cross timestamping\n",
+ " %d cross timestamping\n"
+ " %d adjust_phase\n",
caps.max_adj,
caps.n_alarm,
caps.n_ext_ts,
caps.n_per_out,
caps.pps,
caps.n_pins,
- caps.cross_timestamping);
+ caps.cross_timestamping,
+ caps.adjust_phase);
}
}
diff --git a/tools/testing/selftests/ptrace/.gitignore b/tools/testing/selftests/ptrace/.gitignore
index cfcc49a7def7..7bebf9534a86 100644
--- a/tools/testing/selftests/ptrace/.gitignore
+++ b/tools/testing/selftests/ptrace/.gitignore
@@ -1,2 +1,3 @@
+# SPDX-License-Identifier: GPL-2.0-only
get_syscall_info
peeksiginfo
diff --git a/tools/testing/selftests/ptrace/Makefile b/tools/testing/selftests/ptrace/Makefile
index c0b7f89f0930..2f1f532c39db 100644
--- a/tools/testing/selftests/ptrace/Makefile
+++ b/tools/testing/selftests/ptrace/Makefile
@@ -1,6 +1,6 @@
# SPDX-License-Identifier: GPL-2.0-only
-CFLAGS += -iquote../../../../include/uapi -Wall
+CFLAGS += -std=c99 -pthread -iquote../../../../include/uapi -Wall
-TEST_GEN_PROGS := get_syscall_info peeksiginfo
+TEST_GEN_PROGS := get_syscall_info peeksiginfo vmaccess
include ../lib.mk
diff --git a/tools/testing/selftests/ptrace/vmaccess.c b/tools/testing/selftests/ptrace/vmaccess.c
new file mode 100644
index 000000000000..4db327b44586
--- /dev/null
+++ b/tools/testing/selftests/ptrace/vmaccess.c
@@ -0,0 +1,86 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * Copyright (c) 2020 Bernd Edlinger <bernd.edlinger@hotmail.de>
+ * All rights reserved.
+ *
+ * Check whether /proc/$pid/mem can be accessed without causing deadlocks
+ * when de_thread is blocked with ->cred_guard_mutex held.
+ */
+
+#include "../kselftest_harness.h"
+#include <stdio.h>
+#include <fcntl.h>
+#include <pthread.h>
+#include <signal.h>
+#include <unistd.h>
+#include <sys/ptrace.h>
+
+static void *thread(void *arg)
+{
+ ptrace(PTRACE_TRACEME, 0, 0L, 0L);
+ return NULL;
+}
+
+TEST(vmaccess)
+{
+ int f, pid = fork();
+ char mm[64];
+
+ if (!pid) {
+ pthread_t pt;
+
+ pthread_create(&pt, NULL, thread, NULL);
+ pthread_join(pt, NULL);
+ execlp("true", "true", NULL);
+ }
+
+ sleep(1);
+ sprintf(mm, "/proc/%d/mem", pid);
+ f = open(mm, O_RDONLY);
+ ASSERT_GE(f, 0);
+ close(f);
+ f = kill(pid, SIGCONT);
+ ASSERT_EQ(f, 0);
+}
+
+TEST(attach)
+{
+ int s, k, pid = fork();
+
+ if (!pid) {
+ pthread_t pt;
+
+ pthread_create(&pt, NULL, thread, NULL);
+ pthread_join(pt, NULL);
+ execlp("sleep", "sleep", "2", NULL);
+ }
+
+ sleep(1);
+ k = ptrace(PTRACE_ATTACH, pid, 0L, 0L);
+ ASSERT_EQ(errno, EAGAIN);
+ ASSERT_EQ(k, -1);
+ k = waitpid(-1, &s, WNOHANG);
+ ASSERT_NE(k, -1);
+ ASSERT_NE(k, 0);
+ ASSERT_NE(k, pid);
+ ASSERT_EQ(WIFEXITED(s), 1);
+ ASSERT_EQ(WEXITSTATUS(s), 0);
+ sleep(1);
+ k = ptrace(PTRACE_ATTACH, pid, 0L, 0L);
+ ASSERT_EQ(k, 0);
+ k = waitpid(-1, &s, 0);
+ ASSERT_EQ(k, pid);
+ ASSERT_EQ(WIFSTOPPED(s), 1);
+ ASSERT_EQ(WSTOPSIG(s), SIGSTOP);
+ k = ptrace(PTRACE_DETACH, pid, 0L, 0L);
+ ASSERT_EQ(k, 0);
+ k = waitpid(-1, &s, 0);
+ ASSERT_EQ(k, pid);
+ ASSERT_EQ(WIFEXITED(s), 1);
+ ASSERT_EQ(WEXITSTATUS(s), 0);
+ k = waitpid(-1, NULL, 0);
+ ASSERT_EQ(k, -1);
+ ASSERT_EQ(errno, ECHILD);
+}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/rcutorture/.gitignore b/tools/testing/selftests/rcutorture/.gitignore
index ccc240275d1c..f6cbce77460b 100644
--- a/tools/testing/selftests/rcutorture/.gitignore
+++ b/tools/testing/selftests/rcutorture/.gitignore
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
initrd
b[0-9]*
res
diff --git a/tools/testing/selftests/rcutorture/bin/functions.sh b/tools/testing/selftests/rcutorture/bin/functions.sh
index c3a49fb4d6f6..12810229fddc 100644
--- a/tools/testing/selftests/rcutorture/bin/functions.sh
+++ b/tools/testing/selftests/rcutorture/bin/functions.sh
@@ -12,7 +12,7 @@
# Returns 1 if the specified boot-parameter string tells rcutorture to
# test CPU-hotplug operations.
bootparam_hotplug_cpu () {
- echo "$1" | grep -q "rcutorture\.onoff_"
+ echo "$1" | grep -q "torture\.onoff_"
}
# checkarg --argname argtype $# arg mustmatch cannotmatch
diff --git a/tools/testing/selftests/rcutorture/bin/kcsan-collapse.sh b/tools/testing/selftests/rcutorture/bin/kcsan-collapse.sh
new file mode 100755
index 000000000000..e5cc6b2f195e
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/bin/kcsan-collapse.sh
@@ -0,0 +1,22 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0+
+#
+# If this was a KCSAN run, collapse the reports in the various console.log
+# files onto pairs of functions.
+#
+# Usage: kcsan-collapse.sh resultsdir
+#
+# Copyright (C) 2020 Facebook, Inc.
+#
+# Authors: Paul E. McKenney <paulmck@kernel.org>
+
+if test -z "$TORTURE_KCONFIG_KCSAN_ARG"
+then
+ exit 0
+fi
+cat $1/*/console.log |
+ grep "BUG: KCSAN: " |
+ sed -e 's/^\[[^]]*] //' |
+ sort |
+ uniq -c |
+ sort -k1nr > $1/kcsan.sum
diff --git a/tools/testing/selftests/rcutorture/bin/kvm-find-errors.sh b/tools/testing/selftests/rcutorture/bin/kvm-find-errors.sh
index 1871d00bccd7..6f50722f251f 100755
--- a/tools/testing/selftests/rcutorture/bin/kvm-find-errors.sh
+++ b/tools/testing/selftests/rcutorture/bin/kvm-find-errors.sh
@@ -20,7 +20,9 @@
rundir="${1}"
if test -z "$rundir" -o ! -d "$rundir"
then
+ echo Directory "$rundir" not found.
echo Usage: $0 directory
+ exit 1
fi
editor=${EDITOR-vi}
diff --git a/tools/testing/selftests/rcutorture/bin/kvm-recheck-rcu.sh b/tools/testing/selftests/rcutorture/bin/kvm-recheck-rcu.sh
index 9d9a41625dd9..1706cd4466b4 100755
--- a/tools/testing/selftests/rcutorture/bin/kvm-recheck-rcu.sh
+++ b/tools/testing/selftests/rcutorture/bin/kvm-recheck-rcu.sh
@@ -41,7 +41,21 @@ else
title="$title ($ngpsps/s)"
fi
echo $title $stopstate $fwdprog
- nclosecalls=`grep --binary-files=text 'torture: Reader Batch' $i/console.log | tail -1 | awk '{for (i=NF-8;i<=NF;i++) sum+=$i; } END {print sum}'`
+ nclosecalls=`grep --binary-files=text 'torture: Reader Batch' $i/console.log | tail -1 | \
+ awk -v sum=0 '
+ {
+ for (i = 0; i <= NF; i++) {
+ sum += $i;
+ if ($i ~ /Batch:/) {
+ sum = 0;
+ i = i + 2;
+ }
+ }
+ }
+
+ END {
+ print sum
+ }'`
if test -z "$nclosecalls"
then
exit 0
diff --git a/tools/testing/selftests/rcutorture/bin/kvm-recheck.sh b/tools/testing/selftests/rcutorture/bin/kvm-recheck.sh
index e5edd5198725..736f04749b90 100755
--- a/tools/testing/selftests/rcutorture/bin/kvm-recheck.sh
+++ b/tools/testing/selftests/rcutorture/bin/kvm-recheck.sh
@@ -13,6 +13,9 @@
#
# Authors: Paul E. McKenney <paulmck@linux.ibm.com>
+T=/tmp/kvm-recheck.sh.$$
+trap 'rm -f $T' 0 2
+
PATH=`pwd`/tools/testing/selftests/rcutorture/bin:$PATH; export PATH
. functions.sh
for rd in "$@"
@@ -67,5 +70,26 @@ do
fi
fi
done
+ if test -f "$rd/kcsan.sum"
+ then
+ if test -s "$rd/kcsan.sum"
+ then
+ echo KCSAN summary in $rd/kcsan.sum
+ else
+ echo Clean KCSAN run in $rd
+ fi
+ fi
done
-EDITOR=echo kvm-find-errors.sh "${@: -1}" > /dev/null 2>&1
+EDITOR=echo kvm-find-errors.sh "${@: -1}" > $T 2>&1
+ret=$?
+builderrors="`tr ' ' '\012' < $T | grep -c '/Make.out.diags'`"
+if test "$builderrors" -gt 0
+then
+ echo $builderrors runs with build errors.
+fi
+runerrors="`tr ' ' '\012' < $T | grep -c '/console.log.diags'`"
+if test "$runerrors" -gt 0
+then
+ echo $runerrors runs with runtime errors.
+fi
+exit $ret
diff --git a/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh b/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh
index e0352304b98b..6ff611c630d1 100755
--- a/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh
+++ b/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh
@@ -44,30 +44,32 @@ then
fi
echo ' ---' `date`: Starting build
echo ' ---' Kconfig fragment at: $config_template >> $resdir/log
-touch $resdir/ConfigFragment.input $resdir/ConfigFragment
-if test -r "$config_dir/CFcommon"
-then
- echo " --- $config_dir/CFcommon" >> $resdir/ConfigFragment.input
- cat < $config_dir/CFcommon >> $resdir/ConfigFragment.input
- config_override.sh $config_dir/CFcommon $config_template > $T/Kc1
- grep '#CHECK#' $config_dir/CFcommon >> $resdir/ConfigFragment
-else
- cp $config_template $T/Kc1
-fi
-echo " --- $config_template" >> $resdir/ConfigFragment.input
-cat $config_template >> $resdir/ConfigFragment.input
-grep '#CHECK#' $config_template >> $resdir/ConfigFragment
-if test -n "$TORTURE_KCONFIG_ARG"
-then
- echo $TORTURE_KCONFIG_ARG | tr -s " " "\012" > $T/cmdline
- echo " --- --kconfig argument" >> $resdir/ConfigFragment.input
- cat $T/cmdline >> $resdir/ConfigFragment.input
- config_override.sh $T/Kc1 $T/cmdline > $T/Kc2
- # Note that "#CHECK#" is not permitted on commandline.
-else
- cp $T/Kc1 $T/Kc2
-fi
-cat $T/Kc2 >> $resdir/ConfigFragment
+touch $resdir/ConfigFragment.input
+
+# Combine additional Kconfig options into an existing set such that
+# newer options win. The first argument is the Kconfig source ID, the
+# second the to-be-updated file within $T, and the third and final the
+# list of additional Kconfig options. Note that a $2.tmp file is
+# created when doing the update.
+config_override_param () {
+ if test -n "$3"
+ then
+ echo $3 | sed -e 's/^ *//' -e 's/ *$//' | tr -s " " "\012" > $T/Kconfig_args
+ echo " --- $1" >> $resdir/ConfigFragment.input
+ cat $T/Kconfig_args >> $resdir/ConfigFragment.input
+ config_override.sh $T/$2 $T/Kconfig_args > $T/$2.tmp
+ mv $T/$2.tmp $T/$2
+ # Note that "#CHECK#" is not permitted on commandline.
+ fi
+}
+
+echo > $T/KcList
+config_override_param "$config_dir/CFcommon" KcList "`cat $config_dir/CFcommon 2> /dev/null`"
+config_override_param "$config_template" KcList "`cat $config_template 2> /dev/null`"
+config_override_param "--kasan options" KcList "$TORTURE_KCONFIG_KASAN_ARG"
+config_override_param "--kcsan options" KcList "$TORTURE_KCONFIG_KCSAN_ARG"
+config_override_param "--kconfig argument" KcList "$TORTURE_KCONFIG_ARG"
+cp $T/KcList $resdir/ConfigFragment
base_resdir=`echo $resdir | sed -e 's/\.[0-9]\+$//'`
if test "$base_resdir" != "$resdir" -a -f $base_resdir/bzImage -a -f $base_resdir/vmlinux
@@ -80,7 +82,7 @@ then
ln -s $base_resdir/.config $resdir # for kvm-recheck.sh
# Arch-independent indicator
touch $resdir/builtkernel
-elif kvm-build.sh $T/Kc2 $resdir
+elif kvm-build.sh $T/KcList $resdir
then
# Had to build a kernel for this test.
QEMU="`identify_qemu vmlinux`"
diff --git a/tools/testing/selftests/rcutorture/bin/kvm.sh b/tools/testing/selftests/rcutorture/bin/kvm.sh
index 78d18ab8e954..c279cf9cb010 100755
--- a/tools/testing/selftests/rcutorture/bin/kvm.sh
+++ b/tools/testing/selftests/rcutorture/bin/kvm.sh
@@ -31,6 +31,8 @@ TORTURE_DEFCONFIG=defconfig
TORTURE_BOOT_IMAGE=""
TORTURE_INITRD="$KVM/initrd"; export TORTURE_INITRD
TORTURE_KCONFIG_ARG=""
+TORTURE_KCONFIG_KASAN_ARG=""
+TORTURE_KCONFIG_KCSAN_ARG=""
TORTURE_KMAKE_ARG=""
TORTURE_QEMU_MEM=512
TORTURE_SHUTDOWN_GRACE=180
@@ -39,7 +41,7 @@ TORTURE_TRUST_MAKE=""
resdir=""
configs=""
cpus=0
-ds=`date +%Y.%m.%d-%H:%M:%S`
+ds=`date +%Y.%m.%d-%H.%M.%S`
jitter="-1"
usage () {
@@ -133,6 +135,12 @@ do
TORTURE_KCONFIG_ARG="$2"
shift
;;
+ --kasan)
+ TORTURE_KCONFIG_KASAN_ARG="CONFIG_DEBUG_INFO=y CONFIG_KASAN=y"; export TORTURE_KCONFIG_KASAN_ARG
+ ;;
+ --kcsan)
+ TORTURE_KCONFIG_KCSAN_ARG="CONFIG_DEBUG_INFO=y CONFIG_KCSAN=y CONFIG_KCSAN_ASSUME_PLAIN_WRITES_ATOMIC=n CONFIG_KCSAN_REPORT_VALUE_CHANGE_ONLY=n CONFIG_KCSAN_REPORT_ONCE_IN_MS=100000 CONFIG_KCSAN_VERBOSE=y CONFIG_KCSAN_INTERRUPT_WATCHER=y"; export TORTURE_KCONFIG_KCSAN_ARG
+ ;;
--kmake-arg)
checkarg --kmake-arg "(kernel make arguments)" $# "$2" '.*' '^error$'
TORTURE_KMAKE_ARG="$2"
@@ -310,6 +318,8 @@ TORTURE_BUILDONLY="$TORTURE_BUILDONLY"; export TORTURE_BUILDONLY
TORTURE_DEFCONFIG="$TORTURE_DEFCONFIG"; export TORTURE_DEFCONFIG
TORTURE_INITRD="$TORTURE_INITRD"; export TORTURE_INITRD
TORTURE_KCONFIG_ARG="$TORTURE_KCONFIG_ARG"; export TORTURE_KCONFIG_ARG
+TORTURE_KCONFIG_KASAN_ARG="$TORTURE_KCONFIG_KASAN_ARG"; export TORTURE_KCONFIG_KASAN_ARG
+TORTURE_KCONFIG_KCSAN_ARG="$TORTURE_KCONFIG_KCSAN_ARG"; export TORTURE_KCONFIG_KCSAN_ARG
TORTURE_KMAKE_ARG="$TORTURE_KMAKE_ARG"; export TORTURE_KMAKE_ARG
TORTURE_QEMU_CMD="$TORTURE_QEMU_CMD"; export TORTURE_QEMU_CMD
TORTURE_QEMU_INTERACTIVE="$TORTURE_QEMU_INTERACTIVE"; export TORTURE_QEMU_INTERACTIVE
@@ -464,6 +474,7 @@ echo
echo
echo " --- `date` Test summary:"
echo Results directory: $resdir/$ds
+kcsan-collapse.sh $resdir/$ds
kvm-recheck.sh $resdir/$ds
___EOF___
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/CFLIST b/tools/testing/selftests/rcutorture/configs/rcu/CFLIST
index c3c1fb5a9e1f..f2b20db9e296 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/CFLIST
+++ b/tools/testing/selftests/rcutorture/configs/rcu/CFLIST
@@ -14,3 +14,6 @@ TINY02
TASKS01
TASKS02
TASKS03
+RUDE01
+TRACE01
+TRACE02
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/CFcommon b/tools/testing/selftests/rcutorture/configs/rcu/CFcommon
index e19a444a0684..0e92d85313aa 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/CFcommon
+++ b/tools/testing/selftests/rcutorture/configs/rcu/CFcommon
@@ -3,3 +3,5 @@ CONFIG_PRINTK_TIME=y
CONFIG_HYPERVISOR_GUEST=y
CONFIG_PARAVIRT=y
CONFIG_KVM_GUEST=y
+CONFIG_KCSAN_ASSUME_PLAIN_WRITES_ATOMIC=n
+CONFIG_KCSAN_REPORT_VALUE_CHANGE_ONLY=n
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/RUDE01 b/tools/testing/selftests/rcutorture/configs/rcu/RUDE01
new file mode 100644
index 000000000000..bafe94cbd739
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/configs/rcu/RUDE01
@@ -0,0 +1,10 @@
+CONFIG_SMP=y
+CONFIG_NR_CPUS=2
+CONFIG_HOTPLUG_CPU=y
+CONFIG_PREEMPT_NONE=n
+CONFIG_PREEMPT_VOLUNTARY=n
+CONFIG_PREEMPT=y
+CONFIG_DEBUG_LOCK_ALLOC=y
+CONFIG_PROVE_LOCKING=y
+#CHECK#CONFIG_PROVE_RCU=y
+CONFIG_RCU_EXPERT=y
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/RUDE01.boot b/tools/testing/selftests/rcutorture/configs/rcu/RUDE01.boot
new file mode 100644
index 000000000000..9363708c9075
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/configs/rcu/RUDE01.boot
@@ -0,0 +1 @@
+rcutorture.torture_type=tasks-rude
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TRACE01 b/tools/testing/selftests/rcutorture/configs/rcu/TRACE01
new file mode 100644
index 000000000000..12e7661b86f5
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TRACE01
@@ -0,0 +1,11 @@
+CONFIG_SMP=y
+CONFIG_NR_CPUS=4
+CONFIG_HOTPLUG_CPU=y
+CONFIG_PREEMPT_NONE=y
+CONFIG_PREEMPT_VOLUNTARY=n
+CONFIG_PREEMPT=n
+CONFIG_DEBUG_LOCK_ALLOC=y
+CONFIG_PROVE_LOCKING=y
+#CHECK#CONFIG_PROVE_RCU=y
+CONFIG_TASKS_TRACE_RCU_READ_MB=y
+CONFIG_RCU_EXPERT=y
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TRACE01.boot b/tools/testing/selftests/rcutorture/configs/rcu/TRACE01.boot
new file mode 100644
index 000000000000..9675ad632dcc
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TRACE01.boot
@@ -0,0 +1 @@
+rcutorture.torture_type=tasks-tracing
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TRACE02 b/tools/testing/selftests/rcutorture/configs/rcu/TRACE02
new file mode 100644
index 000000000000..b69ed6673c41
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TRACE02
@@ -0,0 +1,11 @@
+CONFIG_SMP=y
+CONFIG_NR_CPUS=4
+CONFIG_HOTPLUG_CPU=y
+CONFIG_PREEMPT_NONE=n
+CONFIG_PREEMPT_VOLUNTARY=n
+CONFIG_PREEMPT=y
+CONFIG_DEBUG_LOCK_ALLOC=n
+CONFIG_PROVE_LOCKING=n
+#CHECK#CONFIG_PROVE_RCU=n
+CONFIG_TASKS_TRACE_RCU_READ_MB=n
+CONFIG_RCU_EXPERT=y
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TRACE02.boot b/tools/testing/selftests/rcutorture/configs/rcu/TRACE02.boot
new file mode 100644
index 000000000000..9675ad632dcc
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TRACE02.boot
@@ -0,0 +1 @@
+rcutorture.torture_type=tasks-tracing
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE10 b/tools/testing/selftests/rcutorture/configs/rcu/TREE10
new file mode 100644
index 000000000000..7311f84a5876
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE10
@@ -0,0 +1,18 @@
+CONFIG_SMP=y
+CONFIG_NR_CPUS=56
+CONFIG_PREEMPT_NONE=y
+CONFIG_PREEMPT_VOLUNTARY=n
+CONFIG_PREEMPT=n
+#CHECK#CONFIG_TREE_RCU=y
+CONFIG_HZ_PERIODIC=n
+CONFIG_NO_HZ_IDLE=y
+CONFIG_NO_HZ_FULL=n
+CONFIG_RCU_FAST_NO_HZ=n
+CONFIG_RCU_TRACE=n
+CONFIG_RCU_NOCB_CPU=n
+CONFIG_DEBUG_LOCK_ALLOC=n
+CONFIG_PROVE_LOCKING=n
+#CHECK#CONFIG_PROVE_RCU=n
+CONFIG_DEBUG_OBJECTS=n
+CONFIG_DEBUG_OBJECTS_RCU_HEAD=n
+CONFIG_RCU_EXPERT=n
diff --git a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/.gitignore b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/.gitignore
index 712a3d41a325..24e27957efcc 100644
--- a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/.gitignore
+++ b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/.gitignore
@@ -1 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0-only
srcu.c
diff --git a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/include/linux/.gitignore b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/include/linux/.gitignore
index 1d016e66980a..57d296341304 100644
--- a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/include/linux/.gitignore
+++ b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/include/linux/.gitignore
@@ -1 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0-only
srcu.h
diff --git a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/tests/store_buffering/.gitignore b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/tests/store_buffering/.gitignore
index f47cb2045f13..d65462d64816 100644
--- a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/tests/store_buffering/.gitignore
+++ b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/tests/store_buffering/.gitignore
@@ -1 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0-only
*.out
diff --git a/tools/testing/selftests/resctrl/Makefile b/tools/testing/selftests/resctrl/Makefile
new file mode 100644
index 000000000000..d585cc1948cc
--- /dev/null
+++ b/tools/testing/selftests/resctrl/Makefile
@@ -0,0 +1,17 @@
+CC = $(CROSS_COMPILE)gcc
+CFLAGS = -g -Wall
+SRCS=$(wildcard *.c)
+OBJS=$(SRCS:.c=.o)
+
+all: resctrl_tests
+
+$(OBJS): $(SRCS)
+ $(CC) $(CFLAGS) -c $(SRCS)
+
+resctrl_tests: $(OBJS)
+ $(CC) $(CFLAGS) -o $@ $^
+
+.PHONY: clean
+
+clean:
+ $(RM) $(OBJS) resctrl_tests
diff --git a/tools/testing/selftests/resctrl/README b/tools/testing/selftests/resctrl/README
new file mode 100644
index 000000000000..6e5a0ffa18e8
--- /dev/null
+++ b/tools/testing/selftests/resctrl/README
@@ -0,0 +1,53 @@
+resctrl_tests - resctrl file system test suit
+
+Authors:
+ Fenghua Yu <fenghua.yu@intel.com>
+ Sai Praneeth Prakhya <sai.praneeth.prakhya@intel.com>,
+
+resctrl_tests tests various resctrl functionalities and interfaces including
+both software and hardware.
+
+Currently it supports Memory Bandwidth Monitoring test and Memory Bandwidth
+Allocation test on Intel RDT hardware. More tests will be added in the future.
+And the test suit can be extended to cover AMD QoS and ARM MPAM hardware
+as well.
+
+BUILD
+-----
+
+Run "make" to build executable file "resctrl_tests".
+
+RUN
+---
+
+To use resctrl_tests, root or sudoer privileges are required. This is because
+the test needs to mount resctrl file system and change contents in the file
+system.
+
+Executing the test without any parameter will run all supported tests:
+
+ sudo ./resctrl_tests
+
+OVERVIEW OF EXECUTION
+---------------------
+
+A test case has four stages:
+
+ - setup: mount resctrl file system, create group, setup schemata, move test
+ process pids to tasks, start benchmark.
+ - execute: let benchmark run
+ - verify: get resctrl data and verify the data with another source, e.g.
+ perf event.
+ - teardown: umount resctrl and clear temporary files.
+
+ARGUMENTS
+---------
+
+Parameter '-h' shows usage information.
+
+usage: resctrl_tests [-h] [-b "benchmark_cmd [options]"] [-t test list] [-n no_of_bits]
+ -b benchmark_cmd [options]: run specified benchmark for MBM, MBA and CQM default benchmark is builtin fill_buf
+ -t test list: run tests specified in the test list, e.g. -t mbm, mba, cqm, cat
+ -n no_of_bits: run cache tests using specified no of bits in cache bit mask
+ -p cpu_no: specify CPU number to run the test. 1 is default
+ -h: help
diff --git a/tools/testing/selftests/resctrl/cache.c b/tools/testing/selftests/resctrl/cache.c
new file mode 100644
index 000000000000..38dbf4962e33
--- /dev/null
+++ b/tools/testing/selftests/resctrl/cache.c
@@ -0,0 +1,272 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <stdint.h>
+#include "resctrl.h"
+
+struct read_format {
+ __u64 nr; /* The number of events */
+ struct {
+ __u64 value; /* The value of the event */
+ } values[2];
+};
+
+static struct perf_event_attr pea_llc_miss;
+static struct read_format rf_cqm;
+static int fd_lm;
+char llc_occup_path[1024];
+
+static void initialize_perf_event_attr(void)
+{
+ pea_llc_miss.type = PERF_TYPE_HARDWARE;
+ pea_llc_miss.size = sizeof(struct perf_event_attr);
+ pea_llc_miss.read_format = PERF_FORMAT_GROUP;
+ pea_llc_miss.exclude_kernel = 1;
+ pea_llc_miss.exclude_hv = 1;
+ pea_llc_miss.exclude_idle = 1;
+ pea_llc_miss.exclude_callchain_kernel = 1;
+ pea_llc_miss.inherit = 1;
+ pea_llc_miss.exclude_guest = 1;
+ pea_llc_miss.disabled = 1;
+}
+
+static void ioctl_perf_event_ioc_reset_enable(void)
+{
+ ioctl(fd_lm, PERF_EVENT_IOC_RESET, 0);
+ ioctl(fd_lm, PERF_EVENT_IOC_ENABLE, 0);
+}
+
+static int perf_event_open_llc_miss(pid_t pid, int cpu_no)
+{
+ fd_lm = perf_event_open(&pea_llc_miss, pid, cpu_no, -1,
+ PERF_FLAG_FD_CLOEXEC);
+ if (fd_lm == -1) {
+ perror("Error opening leader");
+ ctrlc_handler(0, NULL, NULL);
+ return -1;
+ }
+
+ return 0;
+}
+
+static int initialize_llc_perf(void)
+{
+ memset(&pea_llc_miss, 0, sizeof(struct perf_event_attr));
+ memset(&rf_cqm, 0, sizeof(struct read_format));
+
+ /* Initialize perf_event_attr structures for HW_CACHE_MISSES */
+ initialize_perf_event_attr();
+
+ pea_llc_miss.config = PERF_COUNT_HW_CACHE_MISSES;
+
+ rf_cqm.nr = 1;
+
+ return 0;
+}
+
+static int reset_enable_llc_perf(pid_t pid, int cpu_no)
+{
+ int ret = 0;
+
+ ret = perf_event_open_llc_miss(pid, cpu_no);
+ if (ret < 0)
+ return ret;
+
+ /* Start counters to log values */
+ ioctl_perf_event_ioc_reset_enable();
+
+ return 0;
+}
+
+/*
+ * get_llc_perf: llc cache miss through perf events
+ * @cpu_no: CPU number that the benchmark PID is binded to
+ *
+ * Perf events like HW_CACHE_MISSES could be used to validate number of
+ * cache lines allocated.
+ *
+ * Return: =0 on success. <0 on failure.
+ */
+static int get_llc_perf(unsigned long *llc_perf_miss)
+{
+ __u64 total_misses;
+
+ /* Stop counters after one span to get miss rate */
+
+ ioctl(fd_lm, PERF_EVENT_IOC_DISABLE, 0);
+
+ if (read(fd_lm, &rf_cqm, sizeof(struct read_format)) == -1) {
+ perror("Could not get llc misses through perf");
+
+ return -1;
+ }
+
+ total_misses = rf_cqm.values[0].value;
+
+ close(fd_lm);
+
+ *llc_perf_miss = total_misses;
+
+ return 0;
+}
+
+/*
+ * Get LLC Occupancy as reported by RESCTRL FS
+ * For CQM,
+ * 1. If con_mon grp and mon grp given, then read from mon grp in
+ * con_mon grp
+ * 2. If only con_mon grp given, then read from con_mon grp
+ * 3. If both not given, then read from root con_mon grp
+ * For CAT,
+ * 1. If con_mon grp given, then read from it
+ * 2. If con_mon grp not given, then read from root con_mon grp
+ *
+ * Return: =0 on success. <0 on failure.
+ */
+static int get_llc_occu_resctrl(unsigned long *llc_occupancy)
+{
+ FILE *fp;
+
+ fp = fopen(llc_occup_path, "r");
+ if (!fp) {
+ perror("Failed to open results file");
+
+ return errno;
+ }
+ if (fscanf(fp, "%lu", llc_occupancy) <= 0) {
+ perror("Could not get llc occupancy");
+ fclose(fp);
+
+ return -1;
+ }
+ fclose(fp);
+
+ return 0;
+}
+
+/*
+ * print_results_cache: the cache results are stored in a file
+ * @filename: file that stores the results
+ * @bm_pid: child pid that runs benchmark
+ * @llc_value: perf miss value /
+ * llc occupancy value reported by resctrl FS
+ *
+ * Return: 0 on success. non-zero on failure.
+ */
+static int print_results_cache(char *filename, int bm_pid,
+ unsigned long llc_value)
+{
+ FILE *fp;
+
+ if (strcmp(filename, "stdio") == 0 || strcmp(filename, "stderr") == 0) {
+ printf("Pid: %d \t LLC_value: %lu\n", bm_pid,
+ llc_value);
+ } else {
+ fp = fopen(filename, "a");
+ if (!fp) {
+ perror("Cannot open results file");
+
+ return errno;
+ }
+ fprintf(fp, "Pid: %d \t llc_value: %lu\n", bm_pid, llc_value);
+ fclose(fp);
+ }
+
+ return 0;
+}
+
+int measure_cache_vals(struct resctrl_val_param *param, int bm_pid)
+{
+ unsigned long llc_perf_miss = 0, llc_occu_resc = 0, llc_value = 0;
+ int ret;
+
+ /*
+ * Measure cache miss from perf.
+ */
+ if (!strcmp(param->resctrl_val, "cat")) {
+ ret = get_llc_perf(&llc_perf_miss);
+ if (ret < 0)
+ return ret;
+ llc_value = llc_perf_miss;
+ }
+
+ /*
+ * Measure llc occupancy from resctrl.
+ */
+ if (!strcmp(param->resctrl_val, "cqm")) {
+ ret = get_llc_occu_resctrl(&llc_occu_resc);
+ if (ret < 0)
+ return ret;
+ llc_value = llc_occu_resc;
+ }
+ ret = print_results_cache(param->filename, bm_pid, llc_value);
+ if (ret)
+ return ret;
+
+ return 0;
+}
+
+/*
+ * cache_val: execute benchmark and measure LLC occupancy resctrl
+ * and perf cache miss for the benchmark
+ * @param: parameters passed to cache_val()
+ *
+ * Return: 0 on success. non-zero on failure.
+ */
+int cat_val(struct resctrl_val_param *param)
+{
+ int malloc_and_init_memory = 1, memflush = 1, operation = 0, ret = 0;
+ char *resctrl_val = param->resctrl_val;
+ pid_t bm_pid;
+
+ if (strcmp(param->filename, "") == 0)
+ sprintf(param->filename, "stdio");
+
+ bm_pid = getpid();
+
+ /* Taskset benchmark to specified cpu */
+ ret = taskset_benchmark(bm_pid, param->cpu_no);
+ if (ret)
+ return ret;
+
+ /* Write benchmark to specified con_mon grp, mon_grp in resctrl FS*/
+ ret = write_bm_pid_to_resctrl(bm_pid, param->ctrlgrp, param->mongrp,
+ resctrl_val);
+ if (ret)
+ return ret;
+
+ if ((strcmp(resctrl_val, "cat") == 0)) {
+ ret = initialize_llc_perf();
+ if (ret)
+ return ret;
+ }
+
+ /* Test runs until the callback setup() tells the test to stop. */
+ while (1) {
+ if (strcmp(resctrl_val, "cat") == 0) {
+ ret = param->setup(1, param);
+ if (ret) {
+ ret = 0;
+ break;
+ }
+ ret = reset_enable_llc_perf(bm_pid, param->cpu_no);
+ if (ret)
+ break;
+
+ if (run_fill_buf(param->span, malloc_and_init_memory,
+ memflush, operation, resctrl_val)) {
+ fprintf(stderr, "Error-running fill buffer\n");
+ ret = -1;
+ break;
+ }
+
+ sleep(1);
+ ret = measure_cache_vals(param, bm_pid);
+ if (ret)
+ break;
+ } else {
+ break;
+ }
+ }
+
+ return ret;
+}
diff --git a/tools/testing/selftests/resctrl/cat_test.c b/tools/testing/selftests/resctrl/cat_test.c
new file mode 100644
index 000000000000..5da43767b973
--- /dev/null
+++ b/tools/testing/selftests/resctrl/cat_test.c
@@ -0,0 +1,250 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Cache Allocation Technology (CAT) test
+ *
+ * Copyright (C) 2018 Intel Corporation
+ *
+ * Authors:
+ * Sai Praneeth Prakhya <sai.praneeth.prakhya@intel.com>,
+ * Fenghua Yu <fenghua.yu@intel.com>
+ */
+#include "resctrl.h"
+#include <unistd.h>
+
+#define RESULT_FILE_NAME1 "result_cat1"
+#define RESULT_FILE_NAME2 "result_cat2"
+#define NUM_OF_RUNS 5
+#define MAX_DIFF_PERCENT 4
+#define MAX_DIFF 1000000
+
+int count_of_bits;
+char cbm_mask[256];
+unsigned long long_mask;
+unsigned long cache_size;
+
+/*
+ * Change schemata. Write schemata to specified
+ * con_mon grp, mon_grp in resctrl FS.
+ * Run 5 times in order to get average values.
+ */
+static int cat_setup(int num, ...)
+{
+ struct resctrl_val_param *p;
+ char schemata[64];
+ va_list param;
+ int ret = 0;
+
+ va_start(param, num);
+ p = va_arg(param, struct resctrl_val_param *);
+ va_end(param);
+
+ /* Run NUM_OF_RUNS times */
+ if (p->num_of_runs >= NUM_OF_RUNS)
+ return -1;
+
+ if (p->num_of_runs == 0) {
+ sprintf(schemata, "%lx", p->mask);
+ ret = write_schemata(p->ctrlgrp, schemata, p->cpu_no,
+ p->resctrl_val);
+ }
+ p->num_of_runs++;
+
+ return ret;
+}
+
+static void show_cache_info(unsigned long sum_llc_perf_miss, int no_of_bits,
+ unsigned long span)
+{
+ unsigned long allocated_cache_lines = span / 64;
+ unsigned long avg_llc_perf_miss = 0;
+ float diff_percent;
+
+ avg_llc_perf_miss = sum_llc_perf_miss / (NUM_OF_RUNS - 1);
+ diff_percent = ((float)allocated_cache_lines - avg_llc_perf_miss) /
+ allocated_cache_lines * 100;
+
+ printf("%sok CAT: cache miss rate within %d%%\n",
+ !is_amd && abs((int)diff_percent) > MAX_DIFF_PERCENT ?
+ "not " : "", MAX_DIFF_PERCENT);
+ tests_run++;
+ printf("# Percent diff=%d\n", abs((int)diff_percent));
+ printf("# Number of bits: %d\n", no_of_bits);
+ printf("# Avg_llc_perf_miss: %lu\n", avg_llc_perf_miss);
+ printf("# Allocated cache lines: %lu\n", allocated_cache_lines);
+}
+
+static int check_results(struct resctrl_val_param *param)
+{
+ char *token_array[8], temp[512];
+ unsigned long sum_llc_perf_miss = 0;
+ int runs = 0, no_of_bits = 0;
+ FILE *fp;
+
+ printf("# Checking for pass/fail\n");
+ fp = fopen(param->filename, "r");
+ if (!fp) {
+ perror("# Cannot open file");
+
+ return errno;
+ }
+
+ while (fgets(temp, sizeof(temp), fp)) {
+ char *token = strtok(temp, ":\t");
+ int fields = 0;
+
+ while (token) {
+ token_array[fields++] = token;
+ token = strtok(NULL, ":\t");
+ }
+ /*
+ * Discard the first value which is inaccurate due to monitoring
+ * setup transition phase.
+ */
+ if (runs > 0)
+ sum_llc_perf_miss += strtoul(token_array[3], NULL, 0);
+ runs++;
+ }
+
+ fclose(fp);
+ no_of_bits = count_bits(param->mask);
+
+ show_cache_info(sum_llc_perf_miss, no_of_bits, param->span);
+
+ return 0;
+}
+
+void cat_test_cleanup(void)
+{
+ remove(RESULT_FILE_NAME1);
+ remove(RESULT_FILE_NAME2);
+}
+
+int cat_perf_miss_val(int cpu_no, int n, char *cache_type)
+{
+ unsigned long l_mask, l_mask_1;
+ int ret, pipefd[2], sibling_cpu_no;
+ char pipe_message;
+ pid_t bm_pid;
+
+ cache_size = 0;
+
+ ret = remount_resctrlfs(true);
+ if (ret)
+ return ret;
+
+ if (!validate_resctrl_feature_request("cat"))
+ return -1;
+
+ /* Get default cbm mask for L3/L2 cache */
+ ret = get_cbm_mask(cache_type);
+ if (ret)
+ return ret;
+
+ long_mask = strtoul(cbm_mask, NULL, 16);
+
+ /* Get L3/L2 cache size */
+ ret = get_cache_size(cpu_no, cache_type, &cache_size);
+ if (ret)
+ return ret;
+ printf("cache size :%lu\n", cache_size);
+
+ /* Get max number of bits from default-cabm mask */
+ count_of_bits = count_bits(long_mask);
+
+ if (n < 1 || n > count_of_bits - 1) {
+ printf("Invalid input value for no_of_bits n!\n");
+ printf("Please Enter value in range 1 to %d\n",
+ count_of_bits - 1);
+ return -1;
+ }
+
+ /* Get core id from same socket for running another thread */
+ sibling_cpu_no = get_core_sibling(cpu_no);
+ if (sibling_cpu_no < 0)
+ return -1;
+
+ struct resctrl_val_param param = {
+ .resctrl_val = "cat",
+ .cpu_no = cpu_no,
+ .mum_resctrlfs = 0,
+ .setup = cat_setup,
+ };
+
+ l_mask = long_mask >> n;
+ l_mask_1 = ~l_mask & long_mask;
+
+ /* Set param values for parent thread which will be allocated bitmask
+ * with (max_bits - n) bits
+ */
+ param.span = cache_size * (count_of_bits - n) / count_of_bits;
+ strcpy(param.ctrlgrp, "c2");
+ strcpy(param.mongrp, "m2");
+ strcpy(param.filename, RESULT_FILE_NAME2);
+ param.mask = l_mask;
+ param.num_of_runs = 0;
+
+ if (pipe(pipefd)) {
+ perror("# Unable to create pipe");
+ return errno;
+ }
+
+ bm_pid = fork();
+
+ /* Set param values for child thread which will be allocated bitmask
+ * with n bits
+ */
+ if (bm_pid == 0) {
+ param.mask = l_mask_1;
+ strcpy(param.ctrlgrp, "c1");
+ strcpy(param.mongrp, "m1");
+ param.span = cache_size * n / count_of_bits;
+ strcpy(param.filename, RESULT_FILE_NAME1);
+ param.num_of_runs = 0;
+ param.cpu_no = sibling_cpu_no;
+ }
+
+ remove(param.filename);
+
+ ret = cat_val(&param);
+ if (ret)
+ return ret;
+
+ ret = check_results(&param);
+ if (ret)
+ return ret;
+
+ if (bm_pid == 0) {
+ /* Tell parent that child is ready */
+ close(pipefd[0]);
+ pipe_message = 1;
+ if (write(pipefd[1], &pipe_message, sizeof(pipe_message)) <
+ sizeof(pipe_message)) {
+ close(pipefd[1]);
+ perror("# failed signaling parent process");
+ return errno;
+ }
+
+ close(pipefd[1]);
+ while (1)
+ ;
+ } else {
+ /* Parent waits for child to be ready. */
+ close(pipefd[1]);
+ pipe_message = 0;
+ while (pipe_message != 1) {
+ if (read(pipefd[0], &pipe_message,
+ sizeof(pipe_message)) < sizeof(pipe_message)) {
+ perror("# failed reading from child process");
+ break;
+ }
+ }
+ close(pipefd[0]);
+ kill(bm_pid, SIGKILL);
+ }
+
+ cat_test_cleanup();
+ if (bm_pid)
+ umount_resctrlfs();
+
+ return 0;
+}
diff --git a/tools/testing/selftests/resctrl/cqm_test.c b/tools/testing/selftests/resctrl/cqm_test.c
new file mode 100644
index 000000000000..c8756152bd61
--- /dev/null
+++ b/tools/testing/selftests/resctrl/cqm_test.c
@@ -0,0 +1,176 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Cache Monitoring Technology (CQM) test
+ *
+ * Copyright (C) 2018 Intel Corporation
+ *
+ * Authors:
+ * Sai Praneeth Prakhya <sai.praneeth.prakhya@intel.com>,
+ * Fenghua Yu <fenghua.yu@intel.com>
+ */
+#include "resctrl.h"
+#include <unistd.h>
+
+#define RESULT_FILE_NAME "result_cqm"
+#define NUM_OF_RUNS 5
+#define MAX_DIFF 2000000
+#define MAX_DIFF_PERCENT 15
+
+int count_of_bits;
+char cbm_mask[256];
+unsigned long long_mask;
+unsigned long cache_size;
+
+static int cqm_setup(int num, ...)
+{
+ struct resctrl_val_param *p;
+ va_list param;
+
+ va_start(param, num);
+ p = va_arg(param, struct resctrl_val_param *);
+ va_end(param);
+
+ /* Run NUM_OF_RUNS times */
+ if (p->num_of_runs >= NUM_OF_RUNS)
+ return -1;
+
+ p->num_of_runs++;
+
+ return 0;
+}
+
+static void show_cache_info(unsigned long sum_llc_occu_resc, int no_of_bits,
+ unsigned long span)
+{
+ unsigned long avg_llc_occu_resc = 0;
+ float diff_percent;
+ long avg_diff = 0;
+ bool res;
+
+ avg_llc_occu_resc = sum_llc_occu_resc / (NUM_OF_RUNS - 1);
+ avg_diff = (long)abs(span - avg_llc_occu_resc);
+
+ diff_percent = (((float)span - avg_llc_occu_resc) / span) * 100;
+
+ if ((abs((int)diff_percent) <= MAX_DIFF_PERCENT) ||
+ (abs(avg_diff) <= MAX_DIFF))
+ res = true;
+ else
+ res = false;
+
+ printf("%sok CQM: diff within %d, %d\%%\n", res ? "" : "not",
+ MAX_DIFF, (int)MAX_DIFF_PERCENT);
+
+ printf("# diff: %ld\n", avg_diff);
+ printf("# percent diff=%d\n", abs((int)diff_percent));
+ printf("# Results are displayed in (Bytes)\n");
+ printf("# Number of bits: %d\n", no_of_bits);
+ printf("# Avg_llc_occu_resc: %lu\n", avg_llc_occu_resc);
+ printf("# llc_occu_exp (span): %lu\n", span);
+
+ tests_run++;
+}
+
+static int check_results(struct resctrl_val_param *param, int no_of_bits)
+{
+ char *token_array[8], temp[512];
+ unsigned long sum_llc_occu_resc = 0;
+ int runs = 0;
+ FILE *fp;
+
+ printf("# checking for pass/fail\n");
+ fp = fopen(param->filename, "r");
+ if (!fp) {
+ perror("# Error in opening file\n");
+
+ return errno;
+ }
+
+ while (fgets(temp, 1024, fp)) {
+ char *token = strtok(temp, ":\t");
+ int fields = 0;
+
+ while (token) {
+ token_array[fields++] = token;
+ token = strtok(NULL, ":\t");
+ }
+
+ /* Field 3 is llc occ resc value */
+ if (runs > 0)
+ sum_llc_occu_resc += strtoul(token_array[3], NULL, 0);
+ runs++;
+ }
+ fclose(fp);
+ show_cache_info(sum_llc_occu_resc, no_of_bits, param->span);
+
+ return 0;
+}
+
+void cqm_test_cleanup(void)
+{
+ remove(RESULT_FILE_NAME);
+}
+
+int cqm_resctrl_val(int cpu_no, int n, char **benchmark_cmd)
+{
+ int ret, mum_resctrlfs;
+
+ cache_size = 0;
+ mum_resctrlfs = 1;
+
+ ret = remount_resctrlfs(mum_resctrlfs);
+ if (ret)
+ return ret;
+
+ if (!validate_resctrl_feature_request("cqm"))
+ return -1;
+
+ ret = get_cbm_mask("L3");
+ if (ret)
+ return ret;
+
+ long_mask = strtoul(cbm_mask, NULL, 16);
+
+ ret = get_cache_size(cpu_no, "L3", &cache_size);
+ if (ret)
+ return ret;
+ printf("cache size :%lu\n", cache_size);
+
+ count_of_bits = count_bits(long_mask);
+
+ if (n < 1 || n > count_of_bits) {
+ printf("Invalid input value for numbr_of_bits n!\n");
+ printf("Please Enter value in range 1 to %d\n", count_of_bits);
+ return -1;
+ }
+
+ struct resctrl_val_param param = {
+ .resctrl_val = "cqm",
+ .ctrlgrp = "c1",
+ .mongrp = "m1",
+ .cpu_no = cpu_no,
+ .mum_resctrlfs = 0,
+ .filename = RESULT_FILE_NAME,
+ .mask = ~(long_mask << n) & long_mask,
+ .span = cache_size * n / count_of_bits,
+ .num_of_runs = 0,
+ .setup = cqm_setup,
+ };
+
+ if (strcmp(benchmark_cmd[0], "fill_buf") == 0)
+ sprintf(benchmark_cmd[1], "%lu", param.span);
+
+ remove(RESULT_FILE_NAME);
+
+ ret = resctrl_val(benchmark_cmd, &param);
+ if (ret)
+ return ret;
+
+ ret = check_results(&param, n);
+ if (ret)
+ return ret;
+
+ cqm_test_cleanup();
+
+ return 0;
+}
diff --git a/tools/testing/selftests/resctrl/fill_buf.c b/tools/testing/selftests/resctrl/fill_buf.c
new file mode 100644
index 000000000000..79c611c99a3d
--- /dev/null
+++ b/tools/testing/selftests/resctrl/fill_buf.c
@@ -0,0 +1,213 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * fill_buf benchmark
+ *
+ * Copyright (C) 2018 Intel Corporation
+ *
+ * Authors:
+ * Sai Praneeth Prakhya <sai.praneeth.prakhya@intel.com>,
+ * Fenghua Yu <fenghua.yu@intel.com>
+ */
+#include <stdio.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <inttypes.h>
+#include <malloc.h>
+#include <string.h>
+
+#include "resctrl.h"
+
+#define CL_SIZE (64)
+#define PAGE_SIZE (4 * 1024)
+#define MB (1024 * 1024)
+
+static unsigned char *startptr;
+
+static void sb(void)
+{
+#if defined(__i386) || defined(__x86_64)
+ asm volatile("sfence\n\t"
+ : : : "memory");
+#endif
+}
+
+static void ctrl_handler(int signo)
+{
+ free(startptr);
+ printf("\nEnding\n");
+ sb();
+ exit(EXIT_SUCCESS);
+}
+
+static void cl_flush(void *p)
+{
+#if defined(__i386) || defined(__x86_64)
+ asm volatile("clflush (%0)\n\t"
+ : : "r"(p) : "memory");
+#endif
+}
+
+static void mem_flush(void *p, size_t s)
+{
+ char *cp = (char *)p;
+ size_t i = 0;
+
+ s = s / CL_SIZE; /* mem size in cache llines */
+
+ for (i = 0; i < s; i++)
+ cl_flush(&cp[i * CL_SIZE]);
+
+ sb();
+}
+
+static void *malloc_and_init_memory(size_t s)
+{
+ uint64_t *p64;
+ size_t s64;
+
+ void *p = memalign(PAGE_SIZE, s);
+
+ p64 = (uint64_t *)p;
+ s64 = s / sizeof(uint64_t);
+
+ while (s64 > 0) {
+ *p64 = (uint64_t)rand();
+ p64 += (CL_SIZE / sizeof(uint64_t));
+ s64 -= (CL_SIZE / sizeof(uint64_t));
+ }
+
+ return p;
+}
+
+static int fill_one_span_read(unsigned char *start_ptr, unsigned char *end_ptr)
+{
+ unsigned char sum, *p;
+
+ sum = 0;
+ p = start_ptr;
+ while (p < end_ptr) {
+ sum += *p;
+ p += (CL_SIZE / 2);
+ }
+
+ return sum;
+}
+
+static
+void fill_one_span_write(unsigned char *start_ptr, unsigned char *end_ptr)
+{
+ unsigned char *p;
+
+ p = start_ptr;
+ while (p < end_ptr) {
+ *p = '1';
+ p += (CL_SIZE / 2);
+ }
+}
+
+static int fill_cache_read(unsigned char *start_ptr, unsigned char *end_ptr,
+ char *resctrl_val)
+{
+ int ret = 0;
+ FILE *fp;
+
+ while (1) {
+ ret = fill_one_span_read(start_ptr, end_ptr);
+ if (!strcmp(resctrl_val, "cat"))
+ break;
+ }
+
+ /* Consume read result so that reading memory is not optimized out. */
+ fp = fopen("/dev/null", "w");
+ if (!fp)
+ perror("Unable to write to /dev/null");
+ fprintf(fp, "Sum: %d ", ret);
+ fclose(fp);
+
+ return 0;
+}
+
+static int fill_cache_write(unsigned char *start_ptr, unsigned char *end_ptr,
+ char *resctrl_val)
+{
+ while (1) {
+ fill_one_span_write(start_ptr, end_ptr);
+ if (!strcmp(resctrl_val, "cat"))
+ break;
+ }
+
+ return 0;
+}
+
+static int
+fill_cache(unsigned long long buf_size, int malloc_and_init, int memflush,
+ int op, char *resctrl_val)
+{
+ unsigned char *start_ptr, *end_ptr;
+ unsigned long long i;
+ int ret;
+
+ if (malloc_and_init)
+ start_ptr = malloc_and_init_memory(buf_size);
+ else
+ start_ptr = malloc(buf_size);
+
+ if (!start_ptr)
+ return -1;
+
+ startptr = start_ptr;
+ end_ptr = start_ptr + buf_size;
+
+ /*
+ * It's better to touch the memory once to avoid any compiler
+ * optimizations
+ */
+ if (!malloc_and_init) {
+ for (i = 0; i < buf_size; i++)
+ *start_ptr++ = (unsigned char)rand();
+ }
+
+ start_ptr = startptr;
+
+ /* Flush the memory before using to avoid "cache hot pages" effect */
+ if (memflush)
+ mem_flush(start_ptr, buf_size);
+
+ if (op == 0)
+ ret = fill_cache_read(start_ptr, end_ptr, resctrl_val);
+ else
+ ret = fill_cache_write(start_ptr, end_ptr, resctrl_val);
+
+ if (ret) {
+ printf("\n Error in fill cache read/write...\n");
+ return -1;
+ }
+
+ free(startptr);
+
+ return 0;
+}
+
+int run_fill_buf(unsigned long span, int malloc_and_init_memory,
+ int memflush, int op, char *resctrl_val)
+{
+ unsigned long long cache_size = span;
+ int ret;
+
+ /* set up ctrl-c handler */
+ if (signal(SIGINT, ctrl_handler) == SIG_ERR)
+ printf("Failed to catch SIGINT!\n");
+ if (signal(SIGHUP, ctrl_handler) == SIG_ERR)
+ printf("Failed to catch SIGHUP!\n");
+
+ ret = fill_cache(cache_size, malloc_and_init_memory, memflush, op,
+ resctrl_val);
+ if (ret) {
+ printf("\n Error in fill cache\n");
+ return -1;
+ }
+
+ return 0;
+}
diff --git a/tools/testing/selftests/resctrl/mba_test.c b/tools/testing/selftests/resctrl/mba_test.c
new file mode 100644
index 000000000000..7bf8eaa6204b
--- /dev/null
+++ b/tools/testing/selftests/resctrl/mba_test.c
@@ -0,0 +1,171 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Memory Bandwidth Allocation (MBA) test
+ *
+ * Copyright (C) 2018 Intel Corporation
+ *
+ * Authors:
+ * Sai Praneeth Prakhya <sai.praneeth.prakhya@intel.com>,
+ * Fenghua Yu <fenghua.yu@intel.com>
+ */
+#include "resctrl.h"
+
+#define RESULT_FILE_NAME "result_mba"
+#define NUM_OF_RUNS 5
+#define MAX_DIFF 300
+#define ALLOCATION_MAX 100
+#define ALLOCATION_MIN 10
+#define ALLOCATION_STEP 10
+
+/*
+ * Change schemata percentage from 100 to 10%. Write schemata to specified
+ * con_mon grp, mon_grp in resctrl FS.
+ * For each allocation, run 5 times in order to get average values.
+ */
+static int mba_setup(int num, ...)
+{
+ static int runs_per_allocation, allocation = 100;
+ struct resctrl_val_param *p;
+ char allocation_str[64];
+ va_list param;
+
+ va_start(param, num);
+ p = va_arg(param, struct resctrl_val_param *);
+ va_end(param);
+
+ if (runs_per_allocation >= NUM_OF_RUNS)
+ runs_per_allocation = 0;
+
+ /* Only set up schemata once every NUM_OF_RUNS of allocations */
+ if (runs_per_allocation++ != 0)
+ return 0;
+
+ if (allocation < ALLOCATION_MIN || allocation > ALLOCATION_MAX)
+ return -1;
+
+ sprintf(allocation_str, "%d", allocation);
+
+ write_schemata(p->ctrlgrp, allocation_str, p->cpu_no, p->resctrl_val);
+ allocation -= ALLOCATION_STEP;
+
+ return 0;
+}
+
+static void show_mba_info(unsigned long *bw_imc, unsigned long *bw_resc)
+{
+ int allocation, runs;
+ bool failed = false;
+
+ printf("# Results are displayed in (MB)\n");
+ /* Memory bandwidth from 100% down to 10% */
+ for (allocation = 0; allocation < ALLOCATION_MAX / ALLOCATION_STEP;
+ allocation++) {
+ unsigned long avg_bw_imc, avg_bw_resc;
+ unsigned long sum_bw_imc = 0, sum_bw_resc = 0;
+ unsigned long avg_diff;
+
+ /*
+ * The first run is discarded due to inaccurate value from
+ * phase transition.
+ */
+ for (runs = NUM_OF_RUNS * allocation + 1;
+ runs < NUM_OF_RUNS * allocation + NUM_OF_RUNS ; runs++) {
+ sum_bw_imc += bw_imc[runs];
+ sum_bw_resc += bw_resc[runs];
+ }
+
+ avg_bw_imc = sum_bw_imc / (NUM_OF_RUNS - 1);
+ avg_bw_resc = sum_bw_resc / (NUM_OF_RUNS - 1);
+ avg_diff = labs((long)(avg_bw_resc - avg_bw_imc));
+
+ printf("%sok MBA schemata percentage %u smaller than %d %%\n",
+ avg_diff > MAX_DIFF ? "not " : "",
+ ALLOCATION_MAX - ALLOCATION_STEP * allocation,
+ MAX_DIFF);
+ tests_run++;
+ printf("# avg_diff: %lu\n", avg_diff);
+ printf("# avg_bw_imc: %lu\n", avg_bw_imc);
+ printf("# avg_bw_resc: %lu\n", avg_bw_resc);
+ if (avg_diff > MAX_DIFF)
+ failed = true;
+ }
+
+ printf("%sok schemata change using MBA%s\n", failed ? "not " : "",
+ failed ? " # at least one test failed" : "");
+ tests_run++;
+}
+
+static int check_results(void)
+{
+ char *token_array[8], output[] = RESULT_FILE_NAME, temp[512];
+ unsigned long bw_imc[1024], bw_resc[1024];
+ int runs;
+ FILE *fp;
+
+ fp = fopen(output, "r");
+ if (!fp) {
+ perror(output);
+
+ return errno;
+ }
+
+ runs = 0;
+ while (fgets(temp, sizeof(temp), fp)) {
+ char *token = strtok(temp, ":\t");
+ int fields = 0;
+
+ while (token) {
+ token_array[fields++] = token;
+ token = strtok(NULL, ":\t");
+ }
+
+ /* Field 3 is perf imc value */
+ bw_imc[runs] = strtoul(token_array[3], NULL, 0);
+ /* Field 5 is resctrl value */
+ bw_resc[runs] = strtoul(token_array[5], NULL, 0);
+ runs++;
+ }
+
+ fclose(fp);
+
+ show_mba_info(bw_imc, bw_resc);
+
+ return 0;
+}
+
+void mba_test_cleanup(void)
+{
+ remove(RESULT_FILE_NAME);
+}
+
+int mba_schemata_change(int cpu_no, char *bw_report, char **benchmark_cmd)
+{
+ struct resctrl_val_param param = {
+ .resctrl_val = "mba",
+ .ctrlgrp = "c1",
+ .mongrp = "m1",
+ .cpu_no = cpu_no,
+ .mum_resctrlfs = 1,
+ .filename = RESULT_FILE_NAME,
+ .bw_report = bw_report,
+ .setup = mba_setup
+ };
+ int ret;
+
+ remove(RESULT_FILE_NAME);
+
+ if (!validate_resctrl_feature_request("mba"))
+ return -1;
+
+ ret = resctrl_val(benchmark_cmd, &param);
+ if (ret)
+ return ret;
+
+ ret = check_results();
+ if (ret)
+ return ret;
+
+ mba_test_cleanup();
+
+ return 0;
+}
diff --git a/tools/testing/selftests/resctrl/mbm_test.c b/tools/testing/selftests/resctrl/mbm_test.c
new file mode 100644
index 000000000000..4700f7453f81
--- /dev/null
+++ b/tools/testing/selftests/resctrl/mbm_test.c
@@ -0,0 +1,145 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Memory Bandwidth Monitoring (MBM) test
+ *
+ * Copyright (C) 2018 Intel Corporation
+ *
+ * Authors:
+ * Sai Praneeth Prakhya <sai.praneeth.prakhya@intel.com>,
+ * Fenghua Yu <fenghua.yu@intel.com>
+ */
+#include "resctrl.h"
+
+#define RESULT_FILE_NAME "result_mbm"
+#define MAX_DIFF 300
+#define NUM_OF_RUNS 5
+
+static void
+show_bw_info(unsigned long *bw_imc, unsigned long *bw_resc, int span)
+{
+ unsigned long avg_bw_imc = 0, avg_bw_resc = 0;
+ unsigned long sum_bw_imc = 0, sum_bw_resc = 0;
+ long avg_diff = 0;
+ int runs;
+
+ /*
+ * Discard the first value which is inaccurate due to monitoring setup
+ * transition phase.
+ */
+ for (runs = 1; runs < NUM_OF_RUNS ; runs++) {
+ sum_bw_imc += bw_imc[runs];
+ sum_bw_resc += bw_resc[runs];
+ }
+
+ avg_bw_imc = sum_bw_imc / 4;
+ avg_bw_resc = sum_bw_resc / 4;
+ avg_diff = avg_bw_resc - avg_bw_imc;
+
+ printf("%sok MBM: diff within %d%%\n",
+ labs(avg_diff) > MAX_DIFF ? "not " : "", MAX_DIFF);
+ tests_run++;
+ printf("# avg_diff: %lu\n", labs(avg_diff));
+ printf("# Span (MB): %d\n", span);
+ printf("# avg_bw_imc: %lu\n", avg_bw_imc);
+ printf("# avg_bw_resc: %lu\n", avg_bw_resc);
+}
+
+static int check_results(int span)
+{
+ unsigned long bw_imc[NUM_OF_RUNS], bw_resc[NUM_OF_RUNS];
+ char temp[1024], *token_array[8];
+ char output[] = RESULT_FILE_NAME;
+ int runs;
+ FILE *fp;
+
+ printf("# Checking for pass/fail\n");
+
+ fp = fopen(output, "r");
+ if (!fp) {
+ perror(output);
+
+ return errno;
+ }
+
+ runs = 0;
+ while (fgets(temp, sizeof(temp), fp)) {
+ char *token = strtok(temp, ":\t");
+ int i = 0;
+
+ while (token) {
+ token_array[i++] = token;
+ token = strtok(NULL, ":\t");
+ }
+
+ bw_resc[runs] = strtoul(token_array[5], NULL, 0);
+ bw_imc[runs] = strtoul(token_array[3], NULL, 0);
+ runs++;
+ }
+
+ show_bw_info(bw_imc, bw_resc, span);
+
+ fclose(fp);
+
+ return 0;
+}
+
+static int mbm_setup(int num, ...)
+{
+ struct resctrl_val_param *p;
+ static int num_of_runs;
+ va_list param;
+ int ret = 0;
+
+ /* Run NUM_OF_RUNS times */
+ if (num_of_runs++ >= NUM_OF_RUNS)
+ return -1;
+
+ va_start(param, num);
+ p = va_arg(param, struct resctrl_val_param *);
+ va_end(param);
+
+ /* Set up shemata with 100% allocation on the first run. */
+ if (num_of_runs == 0)
+ ret = write_schemata(p->ctrlgrp, "100", p->cpu_no,
+ p->resctrl_val);
+
+ return ret;
+}
+
+void mbm_test_cleanup(void)
+{
+ remove(RESULT_FILE_NAME);
+}
+
+int mbm_bw_change(int span, int cpu_no, char *bw_report, char **benchmark_cmd)
+{
+ struct resctrl_val_param param = {
+ .resctrl_val = "mbm",
+ .ctrlgrp = "c1",
+ .mongrp = "m1",
+ .span = span,
+ .cpu_no = cpu_no,
+ .mum_resctrlfs = 1,
+ .filename = RESULT_FILE_NAME,
+ .bw_report = bw_report,
+ .setup = mbm_setup
+ };
+ int ret;
+
+ remove(RESULT_FILE_NAME);
+
+ if (!validate_resctrl_feature_request("mbm"))
+ return -1;
+
+ ret = resctrl_val(benchmark_cmd, &param);
+ if (ret)
+ return ret;
+
+ ret = check_results(span);
+ if (ret)
+ return ret;
+
+ mbm_test_cleanup();
+
+ return 0;
+}
diff --git a/tools/testing/selftests/resctrl/resctrl.h b/tools/testing/selftests/resctrl/resctrl.h
new file mode 100644
index 000000000000..39bf59c6b9c5
--- /dev/null
+++ b/tools/testing/selftests/resctrl/resctrl.h
@@ -0,0 +1,107 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#define _GNU_SOURCE
+#ifndef RESCTRL_H
+#define RESCTRL_H
+#include <stdio.h>
+#include <stdarg.h>
+#include <math.h>
+#include <errno.h>
+#include <sched.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <string.h>
+#include <signal.h>
+#include <dirent.h>
+#include <stdbool.h>
+#include <sys/stat.h>
+#include <sys/ioctl.h>
+#include <sys/mount.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <sys/select.h>
+#include <sys/time.h>
+#include <sys/eventfd.h>
+#include <asm/unistd.h>
+#include <linux/perf_event.h>
+
+#define MB (1024 * 1024)
+#define RESCTRL_PATH "/sys/fs/resctrl"
+#define PHYS_ID_PATH "/sys/devices/system/cpu/cpu"
+#define CBM_MASK_PATH "/sys/fs/resctrl/info"
+
+#define PARENT_EXIT(err_msg) \
+ do { \
+ perror(err_msg); \
+ kill(ppid, SIGKILL); \
+ exit(EXIT_FAILURE); \
+ } while (0)
+
+/*
+ * resctrl_val_param: resctrl test parameters
+ * @resctrl_val: Resctrl feature (Eg: mbm, mba.. etc)
+ * @ctrlgrp: Name of the control monitor group (con_mon grp)
+ * @mongrp: Name of the monitor group (mon grp)
+ * @cpu_no: CPU number to which the benchmark would be binded
+ * @span: Memory bytes accessed in each benchmark iteration
+ * @mum_resctrlfs: Should the resctrl FS be remounted?
+ * @filename: Name of file to which the o/p should be written
+ * @bw_report: Bandwidth report type (reads vs writes)
+ * @setup: Call back function to setup test environment
+ */
+struct resctrl_val_param {
+ char *resctrl_val;
+ char ctrlgrp[64];
+ char mongrp[64];
+ int cpu_no;
+ unsigned long span;
+ int mum_resctrlfs;
+ char filename[64];
+ char *bw_report;
+ unsigned long mask;
+ int num_of_runs;
+ int (*setup)(int num, ...);
+};
+
+pid_t bm_pid, ppid;
+int tests_run;
+
+char llc_occup_path[1024];
+bool is_amd;
+
+bool check_resctrlfs_support(void);
+int filter_dmesg(void);
+int remount_resctrlfs(bool mum_resctrlfs);
+int get_resource_id(int cpu_no, int *resource_id);
+int umount_resctrlfs(void);
+int validate_bw_report_request(char *bw_report);
+bool validate_resctrl_feature_request(char *resctrl_val);
+char *fgrep(FILE *inf, const char *str);
+int taskset_benchmark(pid_t bm_pid, int cpu_no);
+void run_benchmark(int signum, siginfo_t *info, void *ucontext);
+int write_schemata(char *ctrlgrp, char *schemata, int cpu_no,
+ char *resctrl_val);
+int write_bm_pid_to_resctrl(pid_t bm_pid, char *ctrlgrp, char *mongrp,
+ char *resctrl_val);
+int perf_event_open(struct perf_event_attr *hw_event, pid_t pid, int cpu,
+ int group_fd, unsigned long flags);
+int run_fill_buf(unsigned long span, int malloc_and_init_memory, int memflush,
+ int op, char *resctrl_va);
+int resctrl_val(char **benchmark_cmd, struct resctrl_val_param *param);
+int mbm_bw_change(int span, int cpu_no, char *bw_report, char **benchmark_cmd);
+void tests_cleanup(void);
+void mbm_test_cleanup(void);
+int mba_schemata_change(int cpu_no, char *bw_report, char **benchmark_cmd);
+void mba_test_cleanup(void);
+int get_cbm_mask(char *cache_type);
+int get_cache_size(int cpu_no, char *cache_type, unsigned long *cache_size);
+void ctrlc_handler(int signum, siginfo_t *info, void *ptr);
+int cat_val(struct resctrl_val_param *param);
+void cat_test_cleanup(void);
+int cat_perf_miss_val(int cpu_no, int no_of_bits, char *cache_type);
+int cqm_resctrl_val(int cpu_no, int n, char **benchmark_cmd);
+unsigned int count_bits(unsigned long n);
+void cqm_test_cleanup(void);
+int get_core_sibling(int cpu_no);
+int measure_cache_vals(struct resctrl_val_param *param, int bm_pid);
+
+#endif /* RESCTRL_H */
diff --git a/tools/testing/selftests/resctrl/resctrl_tests.c b/tools/testing/selftests/resctrl/resctrl_tests.c
new file mode 100644
index 000000000000..425cc85ac883
--- /dev/null
+++ b/tools/testing/selftests/resctrl/resctrl_tests.c
@@ -0,0 +1,202 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Resctrl tests
+ *
+ * Copyright (C) 2018 Intel Corporation
+ *
+ * Authors:
+ * Sai Praneeth Prakhya <sai.praneeth.prakhya@intel.com>,
+ * Fenghua Yu <fenghua.yu@intel.com>
+ */
+#include "resctrl.h"
+
+#define BENCHMARK_ARGS 64
+#define BENCHMARK_ARG_SIZE 64
+
+bool is_amd;
+
+void detect_amd(void)
+{
+ FILE *inf = fopen("/proc/cpuinfo", "r");
+ char *res;
+
+ if (!inf)
+ return;
+
+ res = fgrep(inf, "vendor_id");
+
+ if (res) {
+ char *s = strchr(res, ':');
+
+ is_amd = s && !strcmp(s, ": AuthenticAMD\n");
+ free(res);
+ }
+ fclose(inf);
+}
+
+static void cmd_help(void)
+{
+ printf("usage: resctrl_tests [-h] [-b \"benchmark_cmd [options]\"] [-t test list] [-n no_of_bits]\n");
+ printf("\t-b benchmark_cmd [options]: run specified benchmark for MBM, MBA and CQM");
+ printf("\t default benchmark is builtin fill_buf\n");
+ printf("\t-t test list: run tests specified in the test list, ");
+ printf("e.g. -t mbm, mba, cqm, cat\n");
+ printf("\t-n no_of_bits: run cache tests using specified no of bits in cache bit mask\n");
+ printf("\t-p cpu_no: specify CPU number to run the test. 1 is default\n");
+ printf("\t-h: help\n");
+}
+
+void tests_cleanup(void)
+{
+ mbm_test_cleanup();
+ mba_test_cleanup();
+ cqm_test_cleanup();
+ cat_test_cleanup();
+}
+
+int main(int argc, char **argv)
+{
+ bool has_ben = false, mbm_test = true, mba_test = true, cqm_test = true;
+ int res, c, cpu_no = 1, span = 250, argc_new = argc, i, no_of_bits = 5;
+ char *benchmark_cmd[BENCHMARK_ARGS], bw_report[64], bm_type[64];
+ char benchmark_cmd_area[BENCHMARK_ARGS][BENCHMARK_ARG_SIZE];
+ int ben_ind, ben_count;
+ bool cat_test = true;
+
+ for (i = 0; i < argc; i++) {
+ if (strcmp(argv[i], "-b") == 0) {
+ ben_ind = i + 1;
+ ben_count = argc - ben_ind;
+ argc_new = ben_ind - 1;
+ has_ben = true;
+ break;
+ }
+ }
+
+ while ((c = getopt(argc_new, argv, "ht:b:")) != -1) {
+ char *token;
+
+ switch (c) {
+ case 't':
+ token = strtok(optarg, ",");
+
+ mbm_test = false;
+ mba_test = false;
+ cqm_test = false;
+ cat_test = false;
+ while (token) {
+ if (!strcmp(token, "mbm")) {
+ mbm_test = true;
+ } else if (!strcmp(token, "mba")) {
+ mba_test = true;
+ } else if (!strcmp(token, "cqm")) {
+ cqm_test = true;
+ } else if (!strcmp(token, "cat")) {
+ cat_test = true;
+ } else {
+ printf("invalid argument\n");
+
+ return -1;
+ }
+ token = strtok(NULL, ":\t");
+ }
+ break;
+ case 'p':
+ cpu_no = atoi(optarg);
+ break;
+ case 'n':
+ no_of_bits = atoi(optarg);
+ break;
+ case 'h':
+ cmd_help();
+
+ return 0;
+ default:
+ printf("invalid argument\n");
+
+ return -1;
+ }
+ }
+
+ printf("TAP version 13\n");
+
+ /*
+ * Typically we need root privileges, because:
+ * 1. We write to resctrl FS
+ * 2. We execute perf commands
+ */
+ if (geteuid() != 0)
+ printf("# WARNING: not running as root, tests may fail.\n");
+
+ /* Detect AMD vendor */
+ detect_amd();
+
+ if (has_ben) {
+ /* Extract benchmark command from command line. */
+ for (i = ben_ind; i < argc; i++) {
+ benchmark_cmd[i - ben_ind] = benchmark_cmd_area[i];
+ sprintf(benchmark_cmd[i - ben_ind], "%s", argv[i]);
+ }
+ benchmark_cmd[ben_count] = NULL;
+ } else {
+ /* If no benchmark is given by "-b" argument, use fill_buf. */
+ for (i = 0; i < 6; i++)
+ benchmark_cmd[i] = benchmark_cmd_area[i];
+
+ strcpy(benchmark_cmd[0], "fill_buf");
+ sprintf(benchmark_cmd[1], "%d", span);
+ strcpy(benchmark_cmd[2], "1");
+ strcpy(benchmark_cmd[3], "1");
+ strcpy(benchmark_cmd[4], "0");
+ strcpy(benchmark_cmd[5], "");
+ benchmark_cmd[6] = NULL;
+ }
+
+ sprintf(bw_report, "reads");
+ sprintf(bm_type, "fill_buf");
+
+ check_resctrlfs_support();
+ filter_dmesg();
+
+ if (!is_amd && mbm_test) {
+ printf("# Starting MBM BW change ...\n");
+ if (!has_ben)
+ sprintf(benchmark_cmd[5], "%s", "mba");
+ res = mbm_bw_change(span, cpu_no, bw_report, benchmark_cmd);
+ printf("%sok MBM: bw change\n", res ? "not " : "");
+ mbm_test_cleanup();
+ tests_run++;
+ }
+
+ if (!is_amd && mba_test) {
+ printf("# Starting MBA Schemata change ...\n");
+ if (!has_ben)
+ sprintf(benchmark_cmd[1], "%d", span);
+ res = mba_schemata_change(cpu_no, bw_report, benchmark_cmd);
+ printf("%sok MBA: schemata change\n", res ? "not " : "");
+ mba_test_cleanup();
+ tests_run++;
+ }
+
+ if (cqm_test) {
+ printf("# Starting CQM test ...\n");
+ if (!has_ben)
+ sprintf(benchmark_cmd[5], "%s", "cqm");
+ res = cqm_resctrl_val(cpu_no, no_of_bits, benchmark_cmd);
+ printf("%sok CQM: test\n", res ? "not " : "");
+ cqm_test_cleanup();
+ tests_run++;
+ }
+
+ if (cat_test) {
+ printf("# Starting CAT test ...\n");
+ res = cat_perf_miss_val(cpu_no, no_of_bits, "L3");
+ printf("%sok CAT: test\n", res ? "not " : "");
+ tests_run++;
+ cat_test_cleanup();
+ }
+
+ printf("1..%d\n", tests_run);
+
+ return 0;
+}
diff --git a/tools/testing/selftests/resctrl/resctrl_val.c b/tools/testing/selftests/resctrl/resctrl_val.c
new file mode 100644
index 000000000000..520fea3606d1
--- /dev/null
+++ b/tools/testing/selftests/resctrl/resctrl_val.c
@@ -0,0 +1,744 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Memory bandwidth monitoring and allocation library
+ *
+ * Copyright (C) 2018 Intel Corporation
+ *
+ * Authors:
+ * Sai Praneeth Prakhya <sai.praneeth.prakhya@intel.com>,
+ * Fenghua Yu <fenghua.yu@intel.com>
+ */
+#include "resctrl.h"
+
+#define UNCORE_IMC "uncore_imc"
+#define READ_FILE_NAME "events/cas_count_read"
+#define WRITE_FILE_NAME "events/cas_count_write"
+#define DYN_PMU_PATH "/sys/bus/event_source/devices"
+#define SCALE 0.00006103515625
+#define MAX_IMCS 20
+#define MAX_TOKENS 5
+#define READ 0
+#define WRITE 1
+#define CON_MON_MBM_LOCAL_BYTES_PATH \
+ "%s/%s/mon_groups/%s/mon_data/mon_L3_%02d/mbm_local_bytes"
+
+#define CON_MBM_LOCAL_BYTES_PATH \
+ "%s/%s/mon_data/mon_L3_%02d/mbm_local_bytes"
+
+#define MON_MBM_LOCAL_BYTES_PATH \
+ "%s/mon_groups/%s/mon_data/mon_L3_%02d/mbm_local_bytes"
+
+#define MBM_LOCAL_BYTES_PATH \
+ "%s/mon_data/mon_L3_%02d/mbm_local_bytes"
+
+#define CON_MON_LCC_OCCUP_PATH \
+ "%s/%s/mon_groups/%s/mon_data/mon_L3_%02d/llc_occupancy"
+
+#define CON_LCC_OCCUP_PATH \
+ "%s/%s/mon_data/mon_L3_%02d/llc_occupancy"
+
+#define MON_LCC_OCCUP_PATH \
+ "%s/mon_groups/%s/mon_data/mon_L3_%02d/llc_occupancy"
+
+#define LCC_OCCUP_PATH \
+ "%s/mon_data/mon_L3_%02d/llc_occupancy"
+
+struct membw_read_format {
+ __u64 value; /* The value of the event */
+ __u64 time_enabled; /* if PERF_FORMAT_TOTAL_TIME_ENABLED */
+ __u64 time_running; /* if PERF_FORMAT_TOTAL_TIME_RUNNING */
+ __u64 id; /* if PERF_FORMAT_ID */
+};
+
+struct imc_counter_config {
+ __u32 type;
+ __u64 event;
+ __u64 umask;
+ struct perf_event_attr pe;
+ struct membw_read_format return_value;
+ int fd;
+};
+
+static char mbm_total_path[1024];
+static int imcs;
+static struct imc_counter_config imc_counters_config[MAX_IMCS][2];
+
+void membw_initialize_perf_event_attr(int i, int j)
+{
+ memset(&imc_counters_config[i][j].pe, 0,
+ sizeof(struct perf_event_attr));
+ imc_counters_config[i][j].pe.type = imc_counters_config[i][j].type;
+ imc_counters_config[i][j].pe.size = sizeof(struct perf_event_attr);
+ imc_counters_config[i][j].pe.disabled = 1;
+ imc_counters_config[i][j].pe.inherit = 1;
+ imc_counters_config[i][j].pe.exclude_guest = 0;
+ imc_counters_config[i][j].pe.config =
+ imc_counters_config[i][j].umask << 8 |
+ imc_counters_config[i][j].event;
+ imc_counters_config[i][j].pe.sample_type = PERF_SAMPLE_IDENTIFIER;
+ imc_counters_config[i][j].pe.read_format =
+ PERF_FORMAT_TOTAL_TIME_ENABLED | PERF_FORMAT_TOTAL_TIME_RUNNING;
+}
+
+void membw_ioctl_perf_event_ioc_reset_enable(int i, int j)
+{
+ ioctl(imc_counters_config[i][j].fd, PERF_EVENT_IOC_RESET, 0);
+ ioctl(imc_counters_config[i][j].fd, PERF_EVENT_IOC_ENABLE, 0);
+}
+
+void membw_ioctl_perf_event_ioc_disable(int i, int j)
+{
+ ioctl(imc_counters_config[i][j].fd, PERF_EVENT_IOC_DISABLE, 0);
+}
+
+/*
+ * get_event_and_umask: Parse config into event and umask
+ * @cas_count_cfg: Config
+ * @count: iMC number
+ * @op: Operation (read/write)
+ */
+void get_event_and_umask(char *cas_count_cfg, int count, bool op)
+{
+ char *token[MAX_TOKENS];
+ int i = 0;
+
+ strcat(cas_count_cfg, ",");
+ token[0] = strtok(cas_count_cfg, "=,");
+
+ for (i = 1; i < MAX_TOKENS; i++)
+ token[i] = strtok(NULL, "=,");
+
+ for (i = 0; i < MAX_TOKENS; i++) {
+ if (!token[i])
+ break;
+ if (strcmp(token[i], "event") == 0) {
+ if (op == READ)
+ imc_counters_config[count][READ].event =
+ strtol(token[i + 1], NULL, 16);
+ else
+ imc_counters_config[count][WRITE].event =
+ strtol(token[i + 1], NULL, 16);
+ }
+ if (strcmp(token[i], "umask") == 0) {
+ if (op == READ)
+ imc_counters_config[count][READ].umask =
+ strtol(token[i + 1], NULL, 16);
+ else
+ imc_counters_config[count][WRITE].umask =
+ strtol(token[i + 1], NULL, 16);
+ }
+ }
+}
+
+static int open_perf_event(int i, int cpu_no, int j)
+{
+ imc_counters_config[i][j].fd =
+ perf_event_open(&imc_counters_config[i][j].pe, -1, cpu_no, -1,
+ PERF_FLAG_FD_CLOEXEC);
+
+ if (imc_counters_config[i][j].fd == -1) {
+ fprintf(stderr, "Error opening leader %llx\n",
+ imc_counters_config[i][j].pe.config);
+
+ return -1;
+ }
+
+ return 0;
+}
+
+/* Get type and config (read and write) of an iMC counter */
+static int read_from_imc_dir(char *imc_dir, int count)
+{
+ char cas_count_cfg[1024], imc_counter_cfg[1024], imc_counter_type[1024];
+ FILE *fp;
+
+ /* Get type of iMC counter */
+ sprintf(imc_counter_type, "%s%s", imc_dir, "type");
+ fp = fopen(imc_counter_type, "r");
+ if (!fp) {
+ perror("Failed to open imc counter type file");
+
+ return -1;
+ }
+ if (fscanf(fp, "%u", &imc_counters_config[count][READ].type) <= 0) {
+ perror("Could not get imc type");
+ fclose(fp);
+
+ return -1;
+ }
+ fclose(fp);
+
+ imc_counters_config[count][WRITE].type =
+ imc_counters_config[count][READ].type;
+
+ /* Get read config */
+ sprintf(imc_counter_cfg, "%s%s", imc_dir, READ_FILE_NAME);
+ fp = fopen(imc_counter_cfg, "r");
+ if (!fp) {
+ perror("Failed to open imc config file");
+
+ return -1;
+ }
+ if (fscanf(fp, "%s", cas_count_cfg) <= 0) {
+ perror("Could not get imc cas count read");
+ fclose(fp);
+
+ return -1;
+ }
+ fclose(fp);
+
+ get_event_and_umask(cas_count_cfg, count, READ);
+
+ /* Get write config */
+ sprintf(imc_counter_cfg, "%s%s", imc_dir, WRITE_FILE_NAME);
+ fp = fopen(imc_counter_cfg, "r");
+ if (!fp) {
+ perror("Failed to open imc config file");
+
+ return -1;
+ }
+ if (fscanf(fp, "%s", cas_count_cfg) <= 0) {
+ perror("Could not get imc cas count write");
+ fclose(fp);
+
+ return -1;
+ }
+ fclose(fp);
+
+ get_event_and_umask(cas_count_cfg, count, WRITE);
+
+ return 0;
+}
+
+/*
+ * A system can have 'n' number of iMC (Integrated Memory Controller)
+ * counters, get that 'n'. For each iMC counter get it's type and config.
+ * Also, each counter has two configs, one for read and the other for write.
+ * A config again has two parts, event and umask.
+ * Enumerate all these details into an array of structures.
+ *
+ * Return: >= 0 on success. < 0 on failure.
+ */
+static int num_of_imcs(void)
+{
+ unsigned int count = 0;
+ char imc_dir[512];
+ struct dirent *ep;
+ int ret;
+ DIR *dp;
+
+ dp = opendir(DYN_PMU_PATH);
+ if (dp) {
+ while ((ep = readdir(dp))) {
+ if (strstr(ep->d_name, UNCORE_IMC)) {
+ sprintf(imc_dir, "%s/%s/", DYN_PMU_PATH,
+ ep->d_name);
+ ret = read_from_imc_dir(imc_dir, count);
+ if (ret) {
+ closedir(dp);
+
+ return ret;
+ }
+ count++;
+ }
+ }
+ closedir(dp);
+ if (count == 0) {
+ perror("Unable find iMC counters!\n");
+
+ return -1;
+ }
+ } else {
+ perror("Unable to open PMU directory!\n");
+
+ return -1;
+ }
+
+ return count;
+}
+
+static int initialize_mem_bw_imc(void)
+{
+ int imc, j;
+
+ imcs = num_of_imcs();
+ if (imcs <= 0)
+ return imcs;
+
+ /* Initialize perf_event_attr structures for all iMC's */
+ for (imc = 0; imc < imcs; imc++) {
+ for (j = 0; j < 2; j++)
+ membw_initialize_perf_event_attr(imc, j);
+ }
+
+ return 0;
+}
+
+/*
+ * get_mem_bw_imc: Memory band width as reported by iMC counters
+ * @cpu_no: CPU number that the benchmark PID is binded to
+ * @bw_report: Bandwidth report type (reads, writes)
+ *
+ * Memory B/W utilized by a process on a socket can be calculated using
+ * iMC counters. Perf events are used to read these counters.
+ *
+ * Return: >= 0 on success. < 0 on failure.
+ */
+static float get_mem_bw_imc(int cpu_no, char *bw_report)
+{
+ float reads, writes, of_mul_read, of_mul_write;
+ int imc, j, ret;
+
+ /* Start all iMC counters to log values (both read and write) */
+ reads = 0, writes = 0, of_mul_read = 1, of_mul_write = 1;
+ for (imc = 0; imc < imcs; imc++) {
+ for (j = 0; j < 2; j++) {
+ ret = open_perf_event(imc, cpu_no, j);
+ if (ret)
+ return -1;
+ }
+ for (j = 0; j < 2; j++)
+ membw_ioctl_perf_event_ioc_reset_enable(imc, j);
+ }
+
+ sleep(1);
+
+ /* Stop counters after a second to get results (both read and write) */
+ for (imc = 0; imc < imcs; imc++) {
+ for (j = 0; j < 2; j++)
+ membw_ioctl_perf_event_ioc_disable(imc, j);
+ }
+
+ /*
+ * Get results which are stored in struct type imc_counter_config
+ * Take over flow into consideration before calculating total b/w
+ */
+ for (imc = 0; imc < imcs; imc++) {
+ struct imc_counter_config *r =
+ &imc_counters_config[imc][READ];
+ struct imc_counter_config *w =
+ &imc_counters_config[imc][WRITE];
+
+ if (read(r->fd, &r->return_value,
+ sizeof(struct membw_read_format)) == -1) {
+ perror("Couldn't get read b/w through iMC");
+
+ return -1;
+ }
+
+ if (read(w->fd, &w->return_value,
+ sizeof(struct membw_read_format)) == -1) {
+ perror("Couldn't get write bw through iMC");
+
+ return -1;
+ }
+
+ __u64 r_time_enabled = r->return_value.time_enabled;
+ __u64 r_time_running = r->return_value.time_running;
+
+ if (r_time_enabled != r_time_running)
+ of_mul_read = (float)r_time_enabled /
+ (float)r_time_running;
+
+ __u64 w_time_enabled = w->return_value.time_enabled;
+ __u64 w_time_running = w->return_value.time_running;
+
+ if (w_time_enabled != w_time_running)
+ of_mul_write = (float)w_time_enabled /
+ (float)w_time_running;
+ reads += r->return_value.value * of_mul_read * SCALE;
+ writes += w->return_value.value * of_mul_write * SCALE;
+ }
+
+ for (imc = 0; imc < imcs; imc++) {
+ close(imc_counters_config[imc][READ].fd);
+ close(imc_counters_config[imc][WRITE].fd);
+ }
+
+ if (strcmp(bw_report, "reads") == 0)
+ return reads;
+
+ if (strcmp(bw_report, "writes") == 0)
+ return writes;
+
+ return (reads + writes);
+}
+
+void set_mbm_path(const char *ctrlgrp, const char *mongrp, int resource_id)
+{
+ if (ctrlgrp && mongrp)
+ sprintf(mbm_total_path, CON_MON_MBM_LOCAL_BYTES_PATH,
+ RESCTRL_PATH, ctrlgrp, mongrp, resource_id);
+ else if (!ctrlgrp && mongrp)
+ sprintf(mbm_total_path, MON_MBM_LOCAL_BYTES_PATH, RESCTRL_PATH,
+ mongrp, resource_id);
+ else if (ctrlgrp && !mongrp)
+ sprintf(mbm_total_path, CON_MBM_LOCAL_BYTES_PATH, RESCTRL_PATH,
+ ctrlgrp, resource_id);
+ else if (!ctrlgrp && !mongrp)
+ sprintf(mbm_total_path, MBM_LOCAL_BYTES_PATH, RESCTRL_PATH,
+ resource_id);
+}
+
+/*
+ * initialize_mem_bw_resctrl: Appropriately populate "mbm_total_path"
+ * @ctrlgrp: Name of the control monitor group (con_mon grp)
+ * @mongrp: Name of the monitor group (mon grp)
+ * @cpu_no: CPU number that the benchmark PID is binded to
+ * @resctrl_val: Resctrl feature (Eg: mbm, mba.. etc)
+ */
+static void initialize_mem_bw_resctrl(const char *ctrlgrp, const char *mongrp,
+ int cpu_no, char *resctrl_val)
+{
+ int resource_id;
+
+ if (get_resource_id(cpu_no, &resource_id) < 0) {
+ perror("Could not get resource_id");
+ return;
+ }
+
+ if (strcmp(resctrl_val, "mbm") == 0)
+ set_mbm_path(ctrlgrp, mongrp, resource_id);
+
+ if ((strcmp(resctrl_val, "mba") == 0)) {
+ if (ctrlgrp)
+ sprintf(mbm_total_path, CON_MBM_LOCAL_BYTES_PATH,
+ RESCTRL_PATH, ctrlgrp, resource_id);
+ else
+ sprintf(mbm_total_path, MBM_LOCAL_BYTES_PATH,
+ RESCTRL_PATH, resource_id);
+ }
+}
+
+/*
+ * Get MBM Local bytes as reported by resctrl FS
+ * For MBM,
+ * 1. If con_mon grp and mon grp are given, then read from con_mon grp's mon grp
+ * 2. If only con_mon grp is given, then read from con_mon grp
+ * 3. If both are not given, then read from root con_mon grp
+ * For MBA,
+ * 1. If con_mon grp is given, then read from it
+ * 2. If con_mon grp is not given, then read from root con_mon grp
+ */
+static unsigned long get_mem_bw_resctrl(void)
+{
+ unsigned long mbm_total = 0;
+ FILE *fp;
+
+ fp = fopen(mbm_total_path, "r");
+ if (!fp) {
+ perror("Failed to open total bw file");
+
+ return -1;
+ }
+ if (fscanf(fp, "%lu", &mbm_total) <= 0) {
+ perror("Could not get mbm local bytes");
+ fclose(fp);
+
+ return -1;
+ }
+ fclose(fp);
+
+ return mbm_total;
+}
+
+pid_t bm_pid, ppid;
+
+void ctrlc_handler(int signum, siginfo_t *info, void *ptr)
+{
+ kill(bm_pid, SIGKILL);
+ umount_resctrlfs();
+ tests_cleanup();
+ printf("Ending\n\n");
+
+ exit(EXIT_SUCCESS);
+}
+
+/*
+ * print_results_bw: the memory bandwidth results are stored in a file
+ * @filename: file that stores the results
+ * @bm_pid: child pid that runs benchmark
+ * @bw_imc: perf imc counter value
+ * @bw_resc: memory bandwidth value
+ *
+ * Return: 0 on success. non-zero on failure.
+ */
+static int print_results_bw(char *filename, int bm_pid, float bw_imc,
+ unsigned long bw_resc)
+{
+ unsigned long diff = fabs(bw_imc - bw_resc);
+ FILE *fp;
+
+ if (strcmp(filename, "stdio") == 0 || strcmp(filename, "stderr") == 0) {
+ printf("Pid: %d \t Mem_BW_iMC: %f \t ", bm_pid, bw_imc);
+ printf("Mem_BW_resc: %lu \t Difference: %lu\n", bw_resc, diff);
+ } else {
+ fp = fopen(filename, "a");
+ if (!fp) {
+ perror("Cannot open results file");
+
+ return errno;
+ }
+ if (fprintf(fp, "Pid: %d \t Mem_BW_iMC: %f \t Mem_BW_resc: %lu \t Difference: %lu\n",
+ bm_pid, bw_imc, bw_resc, diff) <= 0) {
+ fclose(fp);
+ perror("Could not log results.");
+
+ return errno;
+ }
+ fclose(fp);
+ }
+
+ return 0;
+}
+
+static void set_cqm_path(const char *ctrlgrp, const char *mongrp, char sock_num)
+{
+ if (strlen(ctrlgrp) && strlen(mongrp))
+ sprintf(llc_occup_path, CON_MON_LCC_OCCUP_PATH, RESCTRL_PATH,
+ ctrlgrp, mongrp, sock_num);
+ else if (!strlen(ctrlgrp) && strlen(mongrp))
+ sprintf(llc_occup_path, MON_LCC_OCCUP_PATH, RESCTRL_PATH,
+ mongrp, sock_num);
+ else if (strlen(ctrlgrp) && !strlen(mongrp))
+ sprintf(llc_occup_path, CON_LCC_OCCUP_PATH, RESCTRL_PATH,
+ ctrlgrp, sock_num);
+ else if (!strlen(ctrlgrp) && !strlen(mongrp))
+ sprintf(llc_occup_path, LCC_OCCUP_PATH, RESCTRL_PATH, sock_num);
+}
+
+/*
+ * initialize_llc_occu_resctrl: Appropriately populate "llc_occup_path"
+ * @ctrlgrp: Name of the control monitor group (con_mon grp)
+ * @mongrp: Name of the monitor group (mon grp)
+ * @cpu_no: CPU number that the benchmark PID is binded to
+ * @resctrl_val: Resctrl feature (Eg: cat, cqm.. etc)
+ */
+static void initialize_llc_occu_resctrl(const char *ctrlgrp, const char *mongrp,
+ int cpu_no, char *resctrl_val)
+{
+ int resource_id;
+
+ if (get_resource_id(cpu_no, &resource_id) < 0) {
+ perror("# Unable to resource_id");
+ return;
+ }
+
+ if (strcmp(resctrl_val, "cqm") == 0)
+ set_cqm_path(ctrlgrp, mongrp, resource_id);
+}
+
+static int
+measure_vals(struct resctrl_val_param *param, unsigned long *bw_resc_start)
+{
+ unsigned long bw_imc, bw_resc, bw_resc_end;
+ int ret;
+
+ /*
+ * Measure memory bandwidth from resctrl and from
+ * another source which is perf imc value or could
+ * be something else if perf imc event is not available.
+ * Compare the two values to validate resctrl value.
+ * It takes 1sec to measure the data.
+ */
+ bw_imc = get_mem_bw_imc(param->cpu_no, param->bw_report);
+ if (bw_imc <= 0)
+ return bw_imc;
+
+ bw_resc_end = get_mem_bw_resctrl();
+ if (bw_resc_end <= 0)
+ return bw_resc_end;
+
+ bw_resc = (bw_resc_end - *bw_resc_start) / MB;
+ ret = print_results_bw(param->filename, bm_pid, bw_imc, bw_resc);
+ if (ret)
+ return ret;
+
+ *bw_resc_start = bw_resc_end;
+
+ return 0;
+}
+
+/*
+ * resctrl_val: execute benchmark and measure memory bandwidth on
+ * the benchmark
+ * @benchmark_cmd: benchmark command and its arguments
+ * @param: parameters passed to resctrl_val()
+ *
+ * Return: 0 on success. non-zero on failure.
+ */
+int resctrl_val(char **benchmark_cmd, struct resctrl_val_param *param)
+{
+ char *resctrl_val = param->resctrl_val;
+ unsigned long bw_resc_start = 0;
+ struct sigaction sigact;
+ int ret = 0, pipefd[2];
+ char pipe_message = 0;
+ union sigval value;
+
+ if (strcmp(param->filename, "") == 0)
+ sprintf(param->filename, "stdio");
+
+ if ((strcmp(resctrl_val, "mba")) == 0 ||
+ (strcmp(resctrl_val, "mbm")) == 0) {
+ ret = validate_bw_report_request(param->bw_report);
+ if (ret)
+ return ret;
+ }
+
+ ret = remount_resctrlfs(param->mum_resctrlfs);
+ if (ret)
+ return ret;
+
+ /*
+ * If benchmark wasn't successfully started by child, then child should
+ * kill parent, so save parent's pid
+ */
+ ppid = getpid();
+
+ if (pipe(pipefd)) {
+ perror("# Unable to create pipe");
+
+ return -1;
+ }
+
+ /*
+ * Fork to start benchmark, save child's pid so that it can be killed
+ * when needed
+ */
+ bm_pid = fork();
+ if (bm_pid == -1) {
+ perror("# Unable to fork");
+
+ return -1;
+ }
+
+ if (bm_pid == 0) {
+ /*
+ * Mask all signals except SIGUSR1, parent uses SIGUSR1 to
+ * start benchmark
+ */
+ sigfillset(&sigact.sa_mask);
+ sigdelset(&sigact.sa_mask, SIGUSR1);
+
+ sigact.sa_sigaction = run_benchmark;
+ sigact.sa_flags = SA_SIGINFO;
+
+ /* Register for "SIGUSR1" signal from parent */
+ if (sigaction(SIGUSR1, &sigact, NULL))
+ PARENT_EXIT("Can't register child for signal");
+
+ /* Tell parent that child is ready */
+ close(pipefd[0]);
+ pipe_message = 1;
+ if (write(pipefd[1], &pipe_message, sizeof(pipe_message)) <
+ sizeof(pipe_message)) {
+ perror("# failed signaling parent process");
+ close(pipefd[1]);
+ return -1;
+ }
+ close(pipefd[1]);
+
+ /* Suspend child until delivery of "SIGUSR1" from parent */
+ sigsuspend(&sigact.sa_mask);
+
+ PARENT_EXIT("Child is done");
+ }
+
+ printf("# benchmark PID: %d\n", bm_pid);
+
+ /*
+ * Register CTRL-C handler for parent, as it has to kill benchmark
+ * before exiting
+ */
+ sigact.sa_sigaction = ctrlc_handler;
+ sigemptyset(&sigact.sa_mask);
+ sigact.sa_flags = SA_SIGINFO;
+ if (sigaction(SIGINT, &sigact, NULL) ||
+ sigaction(SIGHUP, &sigact, NULL)) {
+ perror("# sigaction");
+ ret = errno;
+ goto out;
+ }
+
+ value.sival_ptr = benchmark_cmd;
+
+ /* Taskset benchmark to specified cpu */
+ ret = taskset_benchmark(bm_pid, param->cpu_no);
+ if (ret)
+ goto out;
+
+ /* Write benchmark to specified control&monitoring grp in resctrl FS */
+ ret = write_bm_pid_to_resctrl(bm_pid, param->ctrlgrp, param->mongrp,
+ resctrl_val);
+ if (ret)
+ goto out;
+
+ if ((strcmp(resctrl_val, "mbm") == 0) ||
+ (strcmp(resctrl_val, "mba") == 0)) {
+ ret = initialize_mem_bw_imc();
+ if (ret)
+ goto out;
+
+ initialize_mem_bw_resctrl(param->ctrlgrp, param->mongrp,
+ param->cpu_no, resctrl_val);
+ } else if (strcmp(resctrl_val, "cqm") == 0)
+ initialize_llc_occu_resctrl(param->ctrlgrp, param->mongrp,
+ param->cpu_no, resctrl_val);
+
+ /* Parent waits for child to be ready. */
+ close(pipefd[1]);
+ while (pipe_message != 1) {
+ if (read(pipefd[0], &pipe_message, sizeof(pipe_message)) <
+ sizeof(pipe_message)) {
+ perror("# failed reading message from child process");
+ close(pipefd[0]);
+ goto out;
+ }
+ }
+ close(pipefd[0]);
+
+ /* Signal child to start benchmark */
+ if (sigqueue(bm_pid, SIGUSR1, value) == -1) {
+ perror("# sigqueue SIGUSR1 to child");
+ ret = errno;
+ goto out;
+ }
+
+ /* Give benchmark enough time to fully run */
+ sleep(1);
+
+ /* Test runs until the callback setup() tells the test to stop. */
+ while (1) {
+ if ((strcmp(resctrl_val, "mbm") == 0) ||
+ (strcmp(resctrl_val, "mba") == 0)) {
+ ret = param->setup(1, param);
+ if (ret) {
+ ret = 0;
+ break;
+ }
+
+ ret = measure_vals(param, &bw_resc_start);
+ if (ret)
+ break;
+ } else if (strcmp(resctrl_val, "cqm") == 0) {
+ ret = param->setup(1, param);
+ if (ret) {
+ ret = 0;
+ break;
+ }
+ sleep(1);
+ ret = measure_cache_vals(param, bm_pid);
+ if (ret)
+ break;
+ } else {
+ break;
+ }
+ }
+
+out:
+ kill(bm_pid, SIGKILL);
+ umount_resctrlfs();
+
+ return ret;
+}
diff --git a/tools/testing/selftests/resctrl/resctrlfs.c b/tools/testing/selftests/resctrl/resctrlfs.c
new file mode 100644
index 000000000000..19c0ec4045a4
--- /dev/null
+++ b/tools/testing/selftests/resctrl/resctrlfs.c
@@ -0,0 +1,722 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Basic resctrl file system operations
+ *
+ * Copyright (C) 2018 Intel Corporation
+ *
+ * Authors:
+ * Sai Praneeth Prakhya <sai.praneeth.prakhya@intel.com>,
+ * Fenghua Yu <fenghua.yu@intel.com>
+ */
+#include "resctrl.h"
+
+int tests_run;
+
+static int find_resctrl_mount(char *buffer)
+{
+ FILE *mounts;
+ char line[256], *fs, *mntpoint;
+
+ mounts = fopen("/proc/mounts", "r");
+ if (!mounts) {
+ perror("/proc/mounts");
+ return -ENXIO;
+ }
+ while (!feof(mounts)) {
+ if (!fgets(line, 256, mounts))
+ break;
+ fs = strtok(line, " \t");
+ if (!fs)
+ continue;
+ mntpoint = strtok(NULL, " \t");
+ if (!mntpoint)
+ continue;
+ fs = strtok(NULL, " \t");
+ if (!fs)
+ continue;
+ if (strcmp(fs, "resctrl"))
+ continue;
+
+ fclose(mounts);
+ if (buffer)
+ strncpy(buffer, mntpoint, 256);
+
+ return 0;
+ }
+
+ fclose(mounts);
+
+ return -ENOENT;
+}
+
+char cbm_mask[256];
+
+/*
+ * remount_resctrlfs - Remount resctrl FS at /sys/fs/resctrl
+ * @mum_resctrlfs: Should the resctrl FS be remounted?
+ *
+ * If not mounted, mount it.
+ * If mounted and mum_resctrlfs then remount resctrl FS.
+ * If mounted and !mum_resctrlfs then noop
+ *
+ * Return: 0 on success, non-zero on failure
+ */
+int remount_resctrlfs(bool mum_resctrlfs)
+{
+ char mountpoint[256];
+ int ret;
+
+ ret = find_resctrl_mount(mountpoint);
+ if (ret)
+ strcpy(mountpoint, RESCTRL_PATH);
+
+ if (!ret && mum_resctrlfs && umount(mountpoint)) {
+ printf("not ok unmounting \"%s\"\n", mountpoint);
+ perror("# umount");
+ tests_run++;
+ }
+
+ if (!ret && !mum_resctrlfs)
+ return 0;
+
+ ret = mount("resctrl", RESCTRL_PATH, "resctrl", 0, NULL);
+ printf("%sok mounting resctrl to \"%s\"\n", ret ? "not " : "",
+ RESCTRL_PATH);
+ if (ret)
+ perror("# mount");
+
+ tests_run++;
+
+ return ret;
+}
+
+int umount_resctrlfs(void)
+{
+ if (umount(RESCTRL_PATH)) {
+ perror("# Unable to umount resctrl");
+
+ return errno;
+ }
+
+ return 0;
+}
+
+/*
+ * get_resource_id - Get socket number/l3 id for a specified CPU
+ * @cpu_no: CPU number
+ * @resource_id: Socket number or l3_id
+ *
+ * Return: >= 0 on success, < 0 on failure.
+ */
+int get_resource_id(int cpu_no, int *resource_id)
+{
+ char phys_pkg_path[1024];
+ FILE *fp;
+
+ if (is_amd)
+ sprintf(phys_pkg_path, "%s%d/cache/index3/id",
+ PHYS_ID_PATH, cpu_no);
+ else
+ sprintf(phys_pkg_path, "%s%d/topology/physical_package_id",
+ PHYS_ID_PATH, cpu_no);
+
+ fp = fopen(phys_pkg_path, "r");
+ if (!fp) {
+ perror("Failed to open physical_package_id");
+
+ return -1;
+ }
+ if (fscanf(fp, "%d", resource_id) <= 0) {
+ perror("Could not get socket number or l3 id");
+ fclose(fp);
+
+ return -1;
+ }
+ fclose(fp);
+
+ return 0;
+}
+
+/*
+ * get_cache_size - Get cache size for a specified CPU
+ * @cpu_no: CPU number
+ * @cache_type: Cache level L2/L3
+ * @cache_size: pointer to cache_size
+ *
+ * Return: = 0 on success, < 0 on failure.
+ */
+int get_cache_size(int cpu_no, char *cache_type, unsigned long *cache_size)
+{
+ char cache_path[1024], cache_str[64];
+ int length, i, cache_num;
+ FILE *fp;
+
+ if (!strcmp(cache_type, "L3")) {
+ cache_num = 3;
+ } else if (!strcmp(cache_type, "L2")) {
+ cache_num = 2;
+ } else {
+ perror("Invalid cache level");
+ return -1;
+ }
+
+ sprintf(cache_path, "/sys/bus/cpu/devices/cpu%d/cache/index%d/size",
+ cpu_no, cache_num);
+ fp = fopen(cache_path, "r");
+ if (!fp) {
+ perror("Failed to open cache size");
+
+ return -1;
+ }
+ if (fscanf(fp, "%s", cache_str) <= 0) {
+ perror("Could not get cache_size");
+ fclose(fp);
+
+ return -1;
+ }
+ fclose(fp);
+
+ length = (int)strlen(cache_str);
+
+ *cache_size = 0;
+
+ for (i = 0; i < length; i++) {
+ if ((cache_str[i] >= '0') && (cache_str[i] <= '9'))
+
+ *cache_size = *cache_size * 10 + (cache_str[i] - '0');
+
+ else if (cache_str[i] == 'K')
+
+ *cache_size = *cache_size * 1024;
+
+ else if (cache_str[i] == 'M')
+
+ *cache_size = *cache_size * 1024 * 1024;
+
+ else
+ break;
+ }
+
+ return 0;
+}
+
+#define CORE_SIBLINGS_PATH "/sys/bus/cpu/devices/cpu"
+
+/*
+ * get_cbm_mask - Get cbm mask for given cache
+ * @cache_type: Cache level L2/L3
+ *
+ * Mask is stored in cbm_mask which is global variable.
+ *
+ * Return: = 0 on success, < 0 on failure.
+ */
+int get_cbm_mask(char *cache_type)
+{
+ char cbm_mask_path[1024];
+ FILE *fp;
+
+ sprintf(cbm_mask_path, "%s/%s/cbm_mask", CBM_MASK_PATH, cache_type);
+
+ fp = fopen(cbm_mask_path, "r");
+ if (!fp) {
+ perror("Failed to open cache level");
+
+ return -1;
+ }
+ if (fscanf(fp, "%s", cbm_mask) <= 0) {
+ perror("Could not get max cbm_mask");
+ fclose(fp);
+
+ return -1;
+ }
+ fclose(fp);
+
+ return 0;
+}
+
+/*
+ * get_core_sibling - Get sibling core id from the same socket for given CPU
+ * @cpu_no: CPU number
+ *
+ * Return: > 0 on success, < 0 on failure.
+ */
+int get_core_sibling(int cpu_no)
+{
+ char core_siblings_path[1024], cpu_list_str[64];
+ int sibling_cpu_no = -1;
+ FILE *fp;
+
+ sprintf(core_siblings_path, "%s%d/topology/core_siblings_list",
+ CORE_SIBLINGS_PATH, cpu_no);
+
+ fp = fopen(core_siblings_path, "r");
+ if (!fp) {
+ perror("Failed to open core siblings path");
+
+ return -1;
+ }
+ if (fscanf(fp, "%s", cpu_list_str) <= 0) {
+ perror("Could not get core_siblings list");
+ fclose(fp);
+
+ return -1;
+ }
+ fclose(fp);
+
+ char *token = strtok(cpu_list_str, "-,");
+
+ while (token) {
+ sibling_cpu_no = atoi(token);
+ /* Skipping core 0 as we don't want to run test on core 0 */
+ if (sibling_cpu_no != 0)
+ break;
+ token = strtok(NULL, "-,");
+ }
+
+ return sibling_cpu_no;
+}
+
+/*
+ * taskset_benchmark - Taskset PID (i.e. benchmark) to a specified cpu
+ * @bm_pid: PID that should be binded
+ * @cpu_no: CPU number at which the PID would be binded
+ *
+ * Return: 0 on success, non-zero on failure
+ */
+int taskset_benchmark(pid_t bm_pid, int cpu_no)
+{
+ cpu_set_t my_set;
+
+ CPU_ZERO(&my_set);
+ CPU_SET(cpu_no, &my_set);
+
+ if (sched_setaffinity(bm_pid, sizeof(cpu_set_t), &my_set)) {
+ perror("Unable to taskset benchmark");
+
+ return -1;
+ }
+
+ return 0;
+}
+
+/*
+ * run_benchmark - Run a specified benchmark or fill_buf (default benchmark)
+ * in specified signal. Direct benchmark stdio to /dev/null.
+ * @signum: signal number
+ * @info: signal info
+ * @ucontext: user context in signal handling
+ *
+ * Return: void
+ */
+void run_benchmark(int signum, siginfo_t *info, void *ucontext)
+{
+ int operation, ret, malloc_and_init_memory, memflush;
+ unsigned long span, buffer_span;
+ char **benchmark_cmd;
+ char resctrl_val[64];
+ FILE *fp;
+
+ benchmark_cmd = info->si_ptr;
+
+ /*
+ * Direct stdio of child to /dev/null, so that only parent writes to
+ * stdio (console)
+ */
+ fp = freopen("/dev/null", "w", stdout);
+ if (!fp)
+ PARENT_EXIT("Unable to direct benchmark status to /dev/null");
+
+ if (strcmp(benchmark_cmd[0], "fill_buf") == 0) {
+ /* Execute default fill_buf benchmark */
+ span = strtoul(benchmark_cmd[1], NULL, 10);
+ malloc_and_init_memory = atoi(benchmark_cmd[2]);
+ memflush = atoi(benchmark_cmd[3]);
+ operation = atoi(benchmark_cmd[4]);
+ sprintf(resctrl_val, "%s", benchmark_cmd[5]);
+
+ if (strcmp(resctrl_val, "cqm") != 0)
+ buffer_span = span * MB;
+ else
+ buffer_span = span;
+
+ if (run_fill_buf(buffer_span, malloc_and_init_memory, memflush,
+ operation, resctrl_val))
+ fprintf(stderr, "Error in running fill buffer\n");
+ } else {
+ /* Execute specified benchmark */
+ ret = execvp(benchmark_cmd[0], benchmark_cmd);
+ if (ret)
+ perror("wrong\n");
+ }
+
+ fclose(stdout);
+ PARENT_EXIT("Unable to run specified benchmark");
+}
+
+/*
+ * create_grp - Create a group only if one doesn't exist
+ * @grp_name: Name of the group
+ * @grp: Full path and name of the group
+ * @parent_grp: Full path and name of the parent group
+ *
+ * Return: 0 on success, non-zero on failure
+ */
+static int create_grp(const char *grp_name, char *grp, const char *parent_grp)
+{
+ int found_grp = 0;
+ struct dirent *ep;
+ DIR *dp;
+
+ /*
+ * At this point, we are guaranteed to have resctrl FS mounted and if
+ * length of grp_name == 0, it means, user wants to use root con_mon
+ * grp, so do nothing
+ */
+ if (strlen(grp_name) == 0)
+ return 0;
+
+ /* Check if requested grp exists or not */
+ dp = opendir(parent_grp);
+ if (dp) {
+ while ((ep = readdir(dp)) != NULL) {
+ if (strcmp(ep->d_name, grp_name) == 0)
+ found_grp = 1;
+ }
+ closedir(dp);
+ } else {
+ perror("Unable to open resctrl for group");
+
+ return -1;
+ }
+
+ /* Requested grp doesn't exist, hence create it */
+ if (found_grp == 0) {
+ if (mkdir(grp, 0) == -1) {
+ perror("Unable to create group");
+
+ return -1;
+ }
+ }
+
+ return 0;
+}
+
+static int write_pid_to_tasks(char *tasks, pid_t pid)
+{
+ FILE *fp;
+
+ fp = fopen(tasks, "w");
+ if (!fp) {
+ perror("Failed to open tasks file");
+
+ return -1;
+ }
+ if (fprintf(fp, "%d\n", pid) < 0) {
+ perror("Failed to wr pid to tasks file");
+ fclose(fp);
+
+ return -1;
+ }
+ fclose(fp);
+
+ return 0;
+}
+
+/*
+ * write_bm_pid_to_resctrl - Write a PID (i.e. benchmark) to resctrl FS
+ * @bm_pid: PID that should be written
+ * @ctrlgrp: Name of the control monitor group (con_mon grp)
+ * @mongrp: Name of the monitor group (mon grp)
+ * @resctrl_val: Resctrl feature (Eg: mbm, mba.. etc)
+ *
+ * If a con_mon grp is requested, create it and write pid to it, otherwise
+ * write pid to root con_mon grp.
+ * If a mon grp is requested, create it and write pid to it, otherwise
+ * pid is not written, this means that pid is in con_mon grp and hence
+ * should consult con_mon grp's mon_data directory for results.
+ *
+ * Return: 0 on success, non-zero on failure
+ */
+int write_bm_pid_to_resctrl(pid_t bm_pid, char *ctrlgrp, char *mongrp,
+ char *resctrl_val)
+{
+ char controlgroup[128], monitorgroup[512], monitorgroup_p[256];
+ char tasks[1024];
+ int ret = 0;
+
+ if (strlen(ctrlgrp))
+ sprintf(controlgroup, "%s/%s", RESCTRL_PATH, ctrlgrp);
+ else
+ sprintf(controlgroup, "%s", RESCTRL_PATH);
+
+ /* Create control and monitoring group and write pid into it */
+ ret = create_grp(ctrlgrp, controlgroup, RESCTRL_PATH);
+ if (ret)
+ goto out;
+ sprintf(tasks, "%s/tasks", controlgroup);
+ ret = write_pid_to_tasks(tasks, bm_pid);
+ if (ret)
+ goto out;
+
+ /* Create mon grp and write pid into it for "mbm" and "cqm" test */
+ if ((strcmp(resctrl_val, "cqm") == 0) ||
+ (strcmp(resctrl_val, "mbm") == 0)) {
+ if (strlen(mongrp)) {
+ sprintf(monitorgroup_p, "%s/mon_groups", controlgroup);
+ sprintf(monitorgroup, "%s/%s", monitorgroup_p, mongrp);
+ ret = create_grp(mongrp, monitorgroup, monitorgroup_p);
+ if (ret)
+ goto out;
+
+ sprintf(tasks, "%s/mon_groups/%s/tasks",
+ controlgroup, mongrp);
+ ret = write_pid_to_tasks(tasks, bm_pid);
+ if (ret)
+ goto out;
+ }
+ }
+
+out:
+ printf("%sok writing benchmark parameters to resctrl FS\n",
+ ret ? "not " : "");
+ if (ret)
+ perror("# writing to resctrlfs");
+
+ tests_run++;
+
+ return ret;
+}
+
+/*
+ * write_schemata - Update schemata of a con_mon grp
+ * @ctrlgrp: Name of the con_mon grp
+ * @schemata: Schemata that should be updated to
+ * @cpu_no: CPU number that the benchmark PID is binded to
+ * @resctrl_val: Resctrl feature (Eg: mbm, mba.. etc)
+ *
+ * Update schemata of a con_mon grp *only* if requested resctrl feature is
+ * allocation type
+ *
+ * Return: 0 on success, non-zero on failure
+ */
+int write_schemata(char *ctrlgrp, char *schemata, int cpu_no, char *resctrl_val)
+{
+ char controlgroup[1024], schema[1024], reason[64];
+ int resource_id, ret = 0;
+ FILE *fp;
+
+ if ((strcmp(resctrl_val, "mba") != 0) &&
+ (strcmp(resctrl_val, "cat") != 0) &&
+ (strcmp(resctrl_val, "cqm") != 0))
+ return -ENOENT;
+
+ if (!schemata) {
+ printf("# Skipping empty schemata update\n");
+
+ return -1;
+ }
+
+ if (get_resource_id(cpu_no, &resource_id) < 0) {
+ sprintf(reason, "Failed to get resource id");
+ ret = -1;
+
+ goto out;
+ }
+
+ if (strlen(ctrlgrp) != 0)
+ sprintf(controlgroup, "%s/%s/schemata", RESCTRL_PATH, ctrlgrp);
+ else
+ sprintf(controlgroup, "%s/schemata", RESCTRL_PATH);
+
+ if (!strcmp(resctrl_val, "cat") || !strcmp(resctrl_val, "cqm"))
+ sprintf(schema, "%s%d%c%s", "L3:", resource_id, '=', schemata);
+ if (strcmp(resctrl_val, "mba") == 0)
+ sprintf(schema, "%s%d%c%s", "MB:", resource_id, '=', schemata);
+
+ fp = fopen(controlgroup, "w");
+ if (!fp) {
+ sprintf(reason, "Failed to open control group");
+ ret = -1;
+
+ goto out;
+ }
+
+ if (fprintf(fp, "%s\n", schema) < 0) {
+ sprintf(reason, "Failed to write schemata in control group");
+ fclose(fp);
+ ret = -1;
+
+ goto out;
+ }
+ fclose(fp);
+
+out:
+ printf("%sok Write schema \"%s\" to resctrl FS%s%s\n",
+ ret ? "not " : "", schema, ret ? " # " : "",
+ ret ? reason : "");
+ tests_run++;
+
+ return ret;
+}
+
+bool check_resctrlfs_support(void)
+{
+ FILE *inf = fopen("/proc/filesystems", "r");
+ DIR *dp;
+ char *res;
+ bool ret = false;
+
+ if (!inf)
+ return false;
+
+ res = fgrep(inf, "nodev\tresctrl\n");
+
+ if (res) {
+ ret = true;
+ free(res);
+ }
+
+ fclose(inf);
+
+ printf("%sok kernel supports resctrl filesystem\n", ret ? "" : "not ");
+ tests_run++;
+
+ dp = opendir(RESCTRL_PATH);
+ printf("%sok resctrl mountpoint \"%s\" exists\n",
+ dp ? "" : "not ", RESCTRL_PATH);
+ if (dp)
+ closedir(dp);
+ tests_run++;
+
+ printf("# resctrl filesystem %s mounted\n",
+ find_resctrl_mount(NULL) ? "not" : "is");
+
+ return ret;
+}
+
+char *fgrep(FILE *inf, const char *str)
+{
+ char line[256];
+ int slen = strlen(str);
+
+ while (!feof(inf)) {
+ if (!fgets(line, 256, inf))
+ break;
+ if (strncmp(line, str, slen))
+ continue;
+
+ return strdup(line);
+ }
+
+ return NULL;
+}
+
+/*
+ * validate_resctrl_feature_request - Check if requested feature is valid.
+ * @resctrl_val: Requested feature
+ *
+ * Return: 0 on success, non-zero on failure
+ */
+bool validate_resctrl_feature_request(char *resctrl_val)
+{
+ FILE *inf = fopen("/proc/cpuinfo", "r");
+ bool found = false;
+ char *res;
+
+ if (!inf)
+ return false;
+
+ res = fgrep(inf, "flags");
+
+ if (res) {
+ char *s = strchr(res, ':');
+
+ found = s && !strstr(s, resctrl_val);
+ free(res);
+ }
+ fclose(inf);
+
+ return found;
+}
+
+int filter_dmesg(void)
+{
+ char line[1024];
+ FILE *fp;
+ int pipefds[2];
+ pid_t pid;
+ int ret;
+
+ ret = pipe(pipefds);
+ if (ret) {
+ perror("pipe");
+ return ret;
+ }
+ pid = fork();
+ if (pid == 0) {
+ close(pipefds[0]);
+ dup2(pipefds[1], STDOUT_FILENO);
+ execlp("dmesg", "dmesg", NULL);
+ perror("executing dmesg");
+ exit(1);
+ }
+ close(pipefds[1]);
+ fp = fdopen(pipefds[0], "r");
+ if (!fp) {
+ perror("fdopen(pipe)");
+ kill(pid, SIGTERM);
+
+ return -1;
+ }
+
+ while (fgets(line, 1024, fp)) {
+ if (strstr(line, "intel_rdt:"))
+ printf("# dmesg: %s", line);
+ if (strstr(line, "resctrl:"))
+ printf("# dmesg: %s", line);
+ }
+ fclose(fp);
+ waitpid(pid, NULL, 0);
+
+ return 0;
+}
+
+int validate_bw_report_request(char *bw_report)
+{
+ if (strcmp(bw_report, "reads") == 0)
+ return 0;
+ if (strcmp(bw_report, "writes") == 0)
+ return 0;
+ if (strcmp(bw_report, "nt-writes") == 0) {
+ strcpy(bw_report, "writes");
+ return 0;
+ }
+ if (strcmp(bw_report, "total") == 0)
+ return 0;
+
+ fprintf(stderr, "Requested iMC B/W report type unavailable\n");
+
+ return -1;
+}
+
+int perf_event_open(struct perf_event_attr *hw_event, pid_t pid, int cpu,
+ int group_fd, unsigned long flags)
+{
+ int ret;
+
+ ret = syscall(__NR_perf_event_open, hw_event, pid, cpu,
+ group_fd, flags);
+ return ret;
+}
+
+unsigned int count_bits(unsigned long n)
+{
+ unsigned int count = 0;
+
+ while (n) {
+ count += n & 1;
+ n >>= 1;
+ }
+
+ return count;
+}
diff --git a/tools/testing/selftests/rseq/.gitignore b/tools/testing/selftests/rseq/.gitignore
index cc610da7e369..5910888ebfe1 100644
--- a/tools/testing/selftests/rseq/.gitignore
+++ b/tools/testing/selftests/rseq/.gitignore
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
basic_percpu_ops_test
basic_test
basic_rseq_op_test
diff --git a/tools/testing/selftests/rtc/.gitignore b/tools/testing/selftests/rtc/.gitignore
index d0ad44f6294a..fb2d533aa575 100644
--- a/tools/testing/selftests/rtc/.gitignore
+++ b/tools/testing/selftests/rtc/.gitignore
@@ -1,2 +1,3 @@
+# SPDX-License-Identifier: GPL-2.0-only
rtctest
setdate
diff --git a/tools/testing/selftests/safesetid/.gitignore b/tools/testing/selftests/safesetid/.gitignore
index 9c1a629bca01..25d3db172907 100644
--- a/tools/testing/selftests/safesetid/.gitignore
+++ b/tools/testing/selftests/safesetid/.gitignore
@@ -1 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0-only
safesetid-test
diff --git a/tools/testing/selftests/seccomp/.gitignore b/tools/testing/selftests/seccomp/.gitignore
index 5af29d3a1b0a..dec678577f9c 100644
--- a/tools/testing/selftests/seccomp/.gitignore
+++ b/tools/testing/selftests/seccomp/.gitignore
@@ -1,2 +1,3 @@
+# SPDX-License-Identifier: GPL-2.0-only
seccomp_bpf
seccomp_benchmark
diff --git a/tools/testing/selftests/seccomp/Makefile b/tools/testing/selftests/seccomp/Makefile
index 1760b3e39730..0ebfe8b0e147 100644
--- a/tools/testing/selftests/seccomp/Makefile
+++ b/tools/testing/selftests/seccomp/Makefile
@@ -1,17 +1,6 @@
# SPDX-License-Identifier: GPL-2.0
-all:
-
-include ../lib.mk
-
-.PHONY: all clean
-
-BINARIES := seccomp_bpf seccomp_benchmark
CFLAGS += -Wl,-no-as-needed -Wall
+LDFLAGS += -lpthread
-seccomp_bpf: seccomp_bpf.c ../kselftest_harness.h
- $(CC) $(CFLAGS) $(LDFLAGS) $< -lpthread -o $@
-
-TEST_PROGS += $(BINARIES)
-EXTRA_CLEAN := $(BINARIES)
-
-all: $(BINARIES)
+TEST_GEN_PROGS := seccomp_bpf seccomp_benchmark
+include ../lib.mk
diff --git a/tools/testing/selftests/seccomp/seccomp_bpf.c b/tools/testing/selftests/seccomp/seccomp_bpf.c
index ee1b727ede04..c0aa46ce14f6 100644
--- a/tools/testing/selftests/seccomp/seccomp_bpf.c
+++ b/tools/testing/selftests/seccomp/seccomp_bpf.c
@@ -212,6 +212,10 @@ struct seccomp_notif_sizes {
#define SECCOMP_USER_NOTIF_FLAG_CONTINUE 0x00000001
#endif
+#ifndef SECCOMP_FILTER_FLAG_TSYNC_ESRCH
+#define SECCOMP_FILTER_FLAG_TSYNC_ESRCH (1UL << 4)
+#endif
+
#ifndef seccomp
int seccomp(unsigned int op, unsigned int flags, void *args)
{
@@ -909,7 +913,7 @@ TEST(ERRNO_order)
EXPECT_EQ(12, errno);
}
-FIXTURE_DATA(TRAP) {
+FIXTURE(TRAP) {
struct sock_fprog prog;
};
@@ -1020,7 +1024,7 @@ TEST_F(TRAP, handler)
EXPECT_NE(0, (unsigned long)sigsys->_call_addr);
}
-FIXTURE_DATA(precedence) {
+FIXTURE(precedence) {
struct sock_fprog allow;
struct sock_fprog log;
struct sock_fprog trace;
@@ -1509,7 +1513,7 @@ void tracer_poke(struct __test_metadata *_metadata, pid_t tracee, int status,
EXPECT_EQ(0, ret);
}
-FIXTURE_DATA(TRACE_poke) {
+FIXTURE(TRACE_poke) {
struct sock_fprog prog;
pid_t tracer;
long poked;
@@ -1817,7 +1821,7 @@ void tracer_ptrace(struct __test_metadata *_metadata, pid_t tracee,
change_syscall(_metadata, tracee, -1, -ESRCH);
}
-FIXTURE_DATA(TRACE_syscall) {
+FIXTURE(TRACE_syscall) {
struct sock_fprog prog;
pid_t tracer, mytid, mypid, parent;
};
@@ -2187,7 +2191,8 @@ TEST(detect_seccomp_filter_flags)
unsigned int flags[] = { SECCOMP_FILTER_FLAG_TSYNC,
SECCOMP_FILTER_FLAG_LOG,
SECCOMP_FILTER_FLAG_SPEC_ALLOW,
- SECCOMP_FILTER_FLAG_NEW_LISTENER };
+ SECCOMP_FILTER_FLAG_NEW_LISTENER,
+ SECCOMP_FILTER_FLAG_TSYNC_ESRCH };
unsigned int exclusive[] = {
SECCOMP_FILTER_FLAG_TSYNC,
SECCOMP_FILTER_FLAG_NEW_LISTENER };
@@ -2321,7 +2326,7 @@ struct tsync_sibling {
} \
} while (0)
-FIXTURE_DATA(TSYNC) {
+FIXTURE(TSYNC) {
struct sock_fprog root_prog, apply_prog;
struct tsync_sibling sibling[TSYNC_SIBLINGS];
sem_t started;
@@ -2645,6 +2650,55 @@ TEST_F(TSYNC, two_siblings_with_one_divergence)
EXPECT_EQ(SIBLING_EXIT_UNKILLED, (long)status);
}
+TEST_F(TSYNC, two_siblings_with_one_divergence_no_tid_in_err)
+{
+ long ret, flags;
+ void *status;
+
+ ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) {
+ TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
+ }
+
+ ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &self->root_prog);
+ ASSERT_NE(ENOSYS, errno) {
+ TH_LOG("Kernel does not support seccomp syscall!");
+ }
+ ASSERT_EQ(0, ret) {
+ TH_LOG("Kernel does not support SECCOMP_SET_MODE_FILTER!");
+ }
+ self->sibling[0].diverge = 1;
+ tsync_start_sibling(&self->sibling[0]);
+ tsync_start_sibling(&self->sibling[1]);
+
+ while (self->sibling_count < TSYNC_SIBLINGS) {
+ sem_wait(&self->started);
+ self->sibling_count++;
+ }
+
+ flags = SECCOMP_FILTER_FLAG_TSYNC | \
+ SECCOMP_FILTER_FLAG_TSYNC_ESRCH;
+ ret = seccomp(SECCOMP_SET_MODE_FILTER, flags, &self->apply_prog);
+ ASSERT_EQ(ESRCH, errno) {
+ TH_LOG("Did not return ESRCH for diverged sibling.");
+ }
+ ASSERT_EQ(-1, ret) {
+ TH_LOG("Did not fail on diverged sibling.");
+ }
+
+ /* Wake the threads */
+ pthread_mutex_lock(&self->mutex);
+ ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) {
+ TH_LOG("cond broadcast non-zero");
+ }
+ pthread_mutex_unlock(&self->mutex);
+
+ /* Ensure they are both unkilled. */
+ PTHREAD_JOIN(self->sibling[0].tid, &status);
+ EXPECT_EQ(SIBLING_EXIT_UNKILLED, (long)status);
+ PTHREAD_JOIN(self->sibling[1].tid, &status);
+ EXPECT_EQ(SIBLING_EXIT_UNKILLED, (long)status);
+}
+
TEST_F(TSYNC, two_siblings_not_under_filter)
{
long ret, sib;
@@ -2749,12 +2803,13 @@ TEST(syscall_restart)
offsetof(struct seccomp_data, nr)),
#ifdef __NR_sigreturn
- BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_sigreturn, 6, 0),
+ BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_sigreturn, 7, 0),
#endif
- BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_read, 5, 0),
- BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_exit, 4, 0),
- BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_rt_sigreturn, 3, 0),
- BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_nanosleep, 4, 0),
+ BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_read, 6, 0),
+ BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_exit, 5, 0),
+ BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_rt_sigreturn, 4, 0),
+ BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_nanosleep, 5, 0),
+ BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_clock_nanosleep, 4, 0),
BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_restart_syscall, 4, 0),
/* Allow __NR_write for easy logging. */
@@ -2841,7 +2896,8 @@ TEST(syscall_restart)
ASSERT_EQ(PTRACE_EVENT_SECCOMP, (status >> 16));
ASSERT_EQ(0, ptrace(PTRACE_GETEVENTMSG, child_pid, NULL, &msg));
ASSERT_EQ(0x100, msg);
- EXPECT_EQ(__NR_nanosleep, get_syscall(_metadata, child_pid));
+ ret = get_syscall(_metadata, child_pid);
+ EXPECT_TRUE(ret == __NR_nanosleep || ret == __NR_clock_nanosleep);
/* Might as well check siginfo for sanity while we're here. */
ASSERT_EQ(0, ptrace(PTRACE_GETSIGINFO, child_pid, NULL, &info));
@@ -3196,6 +3252,24 @@ TEST(user_notification_basic)
EXPECT_EQ(0, WEXITSTATUS(status));
}
+TEST(user_notification_with_tsync)
+{
+ int ret;
+ unsigned int flags;
+
+ /* these were exclusive */
+ flags = SECCOMP_FILTER_FLAG_NEW_LISTENER |
+ SECCOMP_FILTER_FLAG_TSYNC;
+ ASSERT_EQ(-1, user_trap_syscall(__NR_getppid, flags));
+ ASSERT_EQ(EINVAL, errno);
+
+ /* but now they're not */
+ flags |= SECCOMP_FILTER_FLAG_TSYNC_ESRCH;
+ ret = user_trap_syscall(__NR_getppid, flags);
+ close(ret);
+ ASSERT_LE(0, ret);
+}
+
TEST(user_notification_kill_in_middle)
{
pid_t pid;
diff --git a/tools/testing/selftests/sigaltstack/.gitignore b/tools/testing/selftests/sigaltstack/.gitignore
index 35897b0a3f44..50a19a8888ce 100644
--- a/tools/testing/selftests/sigaltstack/.gitignore
+++ b/tools/testing/selftests/sigaltstack/.gitignore
@@ -1 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0-only
sas
diff --git a/tools/testing/selftests/size/.gitignore b/tools/testing/selftests/size/.gitignore
index 189b7818de34..923e18eed1a0 100644
--- a/tools/testing/selftests/size/.gitignore
+++ b/tools/testing/selftests/size/.gitignore
@@ -1 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0-only
get_size
diff --git a/tools/testing/selftests/sparc64/drivers/.gitignore b/tools/testing/selftests/sparc64/drivers/.gitignore
index 90e835ed74e6..0331f77373b5 100644
--- a/tools/testing/selftests/sparc64/drivers/.gitignore
+++ b/tools/testing/selftests/sparc64/drivers/.gitignore
@@ -1 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0-only
adi-test
diff --git a/tools/testing/selftests/splice/.gitignore b/tools/testing/selftests/splice/.gitignore
index 1e23fefd68e8..d5a2da428752 100644
--- a/tools/testing/selftests/splice/.gitignore
+++ b/tools/testing/selftests/splice/.gitignore
@@ -1 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0-only
default_file_splice_read
diff --git a/tools/testing/selftests/sync/.gitignore b/tools/testing/selftests/sync/.gitignore
index f5091e7792f2..f1152357712f 100644
--- a/tools/testing/selftests/sync/.gitignore
+++ b/tools/testing/selftests/sync/.gitignore
@@ -1 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0-only
sync_test
diff --git a/tools/testing/selftests/sysctl/config b/tools/testing/selftests/sysctl/config
index 6ca14800d755..fc263efd1fad 100644
--- a/tools/testing/selftests/sysctl/config
+++ b/tools/testing/selftests/sysctl/config
@@ -1 +1 @@
-CONFIG_TEST_SYSCTL=y
+CONFIG_TEST_SYSCTL=m
diff --git a/tools/testing/selftests/sysctl/sysctl.sh b/tools/testing/selftests/sysctl/sysctl.sh
index 6a970b127c9b..19515dcb7d04 100755
--- a/tools/testing/selftests/sysctl/sysctl.sh
+++ b/tools/testing/selftests/sysctl/sysctl.sh
@@ -39,16 +39,7 @@ ALL_TESTS="$ALL_TESTS 0003:1:1:int_0002"
ALL_TESTS="$ALL_TESTS 0004:1:1:uint_0001"
ALL_TESTS="$ALL_TESTS 0005:3:1:int_0003"
ALL_TESTS="$ALL_TESTS 0006:50:1:bitmap_0001"
-
-test_modprobe()
-{
- if [ ! -d $DIR ]; then
- echo "$0: $DIR not present" >&2
- echo "You must have the following enabled in your kernel:" >&2
- cat $TEST_DIR/config >&2
- exit $ksft_skip
- fi
-}
+ALL_TESTS="$ALL_TESTS 0007:1:1:boot_int"
function allow_user_defaults()
{
@@ -122,13 +113,15 @@ test_reqs()
function load_req_mod()
{
- if [ ! -d $DIR ]; then
+ if [ ! -d $SYSCTL ]; then
if ! modprobe -q -n $TEST_DRIVER; then
echo "$0: module $TEST_DRIVER not found [SKIP]"
+ echo "You must set CONFIG_TEST_SYSCTL=m in your kernel" >&2
exit $ksft_skip
fi
modprobe $TEST_DRIVER
if [ $? -ne 0 ]; then
+ echo "$0: modprobe $TEST_DRIVER failed."
exit
fi
fi
@@ -752,6 +745,46 @@ sysctl_test_0006()
run_bitmaptest
}
+sysctl_test_0007()
+{
+ TARGET="${SYSCTL}/boot_int"
+ if [ ! -f $TARGET ]; then
+ echo "Skipping test for $TARGET as it is not present ..."
+ return $ksft_skip
+ fi
+
+ if [ -d $DIR ]; then
+ echo "Boot param test only possible sysctl_test is built-in, not module:"
+ cat $TEST_DIR/config >&2
+ return $ksft_skip
+ fi
+
+ echo -n "Testing if $TARGET is set to 1 ..."
+ ORIG=$(cat "${TARGET}")
+
+ if [ x$ORIG = "x1" ]; then
+ echo "ok"
+ return 0
+ fi
+ echo "FAIL"
+ echo "Checking if /proc/cmdline contains setting of the expected parameter ..."
+ if [ ! -f /proc/cmdline ]; then
+ echo "/proc/cmdline does not exist, test inconclusive"
+ return 0
+ fi
+
+ FOUND=$(grep -c "sysctl[./]debug[./]test_sysctl[./]boot_int=1" /proc/cmdline)
+ if [ $FOUND = "1" ]; then
+ echo "Kernel param found but $TARGET is not 1, TEST FAILED"
+ rc=1
+ test_rc
+ fi
+
+ echo "Skipping test, expected kernel parameter missing."
+ echo "To perform this test, make sure kernel is booted with parameter: sysctl.debug.test_sysctl.boot_int=1"
+ return $ksft_skip
+}
+
list_tests()
{
echo "Test ID list:"
@@ -766,6 +799,7 @@ list_tests()
echo "0004 x $(get_test_count 0004) - tests proc_douintvec()"
echo "0005 x $(get_test_count 0005) - tests proc_douintvec() array"
echo "0006 x $(get_test_count 0006) - tests proc_do_large_bitmap()"
+ echo "0007 x $(get_test_count 0007) - tests setting sysctl from kernel boot param"
}
usage()
@@ -929,7 +963,6 @@ test_reqs
allow_user_defaults
check_production_sysctl_writes_strict
load_req_mod
-test_modprobe
trap "test_finish" EXIT
diff --git a/tools/testing/selftests/tc-testing/.gitignore b/tools/testing/selftests/tc-testing/.gitignore
index c26d72e0166f..d52f65de23b4 100644
--- a/tools/testing/selftests/tc-testing/.gitignore
+++ b/tools/testing/selftests/tc-testing/.gitignore
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
__pycache__/
*.pyc
plugins/
diff --git a/tools/testing/selftests/tc-testing/config b/tools/testing/selftests/tc-testing/config
index c03af4600281..c33a7aac27ff 100644
--- a/tools/testing/selftests/tc-testing/config
+++ b/tools/testing/selftests/tc-testing/config
@@ -31,6 +31,7 @@ CONFIG_NET_EMATCH_U32=m
CONFIG_NET_EMATCH_META=m
CONFIG_NET_EMATCH_TEXT=m
CONFIG_NET_EMATCH_IPSET=m
+CONFIG_NET_EMATCH_CANID=m
CONFIG_NET_EMATCH_IPT=m
CONFIG_NET_CLS_ACT=y
CONFIG_NET_ACT_POLICE=m
@@ -58,3 +59,8 @@ CONFIG_NET_IFE_SKBPRIO=m
CONFIG_NET_IFE_SKBTCINDEX=m
CONFIG_NET_SCH_FIFO=y
CONFIG_NET_SCH_ETS=m
+
+#
+## Network testing
+#
+CONFIG_CAN=m
diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/pedit.json b/tools/testing/selftests/tc-testing/tc-tests/actions/pedit.json
index f8ea6f5fa8e9..72cdc3c800a5 100644
--- a/tools/testing/selftests/tc-testing/tc-tests/actions/pedit.json
+++ b/tools/testing/selftests/tc-testing/tc-tests/actions/pedit.json
@@ -1472,6 +1472,31 @@
]
},
{
+ "id": "94bb",
+ "name": "Add pedit action with LAYERED_OP ip6 traffic_class",
+ "category": [
+ "actions",
+ "pedit",
+ "layered_op"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action pedit",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action pedit ex munge ip6 traffic_class set 0x40 continue",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions list action pedit",
+ "matchPattern": "ipv6\\+0: val 04000000 mask f00fffff",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action pedit"
+ ]
+ },
+ {
"id": "6f5e",
"name": "Add pedit action with LAYERED_OP ip6 flow_lbl",
"category": [
diff --git a/tools/testing/selftests/tc-testing/tc-tests/filters/basic.json b/tools/testing/selftests/tc-testing/tc-tests/filters/basic.json
index 98a20faf3198..e788c114a484 100644
--- a/tools/testing/selftests/tc-testing/tc-tests/filters/basic.json
+++ b/tools/testing/selftests/tc-testing/tc-tests/filters/basic.json
@@ -372,5 +372,907 @@
"teardown": [
"$TC qdisc del dev $DEV1 ingress"
]
+ },
+ {
+ "id": "bae4",
+ "name": "Add basic filter with u32 ematch u8/zero offset and default action",
+ "category": [
+ "filter",
+ "basic"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$TC qdisc add dev $DEV1 ingress"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 basic match 'u32(u8 0x11 0x0f at 0)' classid 1:1",
+ "expExitCode": "0",
+ "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol ip basic",
+ "matchPattern": "^filter parent ffff: protocol ip pref 1 basic.*handle 0x1 flowid 1:1.*u32\\(01000000/0f000000 at 0\\)",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress"
+ ]
+ },
+ {
+ "id": "e6cb",
+ "name": "Add basic filter with u32 ematch u8/zero offset and invalid value >0xFF",
+ "category": [
+ "filter",
+ "basic"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$TC qdisc add dev $DEV1 ingress"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 basic match 'u32(u8 0x1122 0x0f at 0)' classid 1:1",
+ "expExitCode": "1",
+ "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol ip basic",
+ "matchPattern": "^filter parent ffff: protocol ip pref 1 basic.*handle 0x1 flowid 1:1.*u32\\(11220000/0f000000 at 0\\)",
+ "matchCount": "0",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress"
+ ]
+ },
+ {
+ "id": "7727",
+ "name": "Add basic filter with u32 ematch u8/positive offset and default action",
+ "category": [
+ "filter",
+ "basic"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$TC qdisc add dev $DEV1 ingress"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 basic match 'u32(u8 0x77 0x1f at 12)' classid 1:1",
+ "expExitCode": "0",
+ "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol ip basic",
+ "matchPattern": "^filter parent ffff: protocol ip pref 1 basic.*handle 0x1 flowid 1:1.*u32\\(17000000/1f000000 at 12\\)",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress"
+ ]
+ },
+ {
+ "id": "a429",
+ "name": "Add basic filter with u32 ematch u8/invalid mask >0xFF",
+ "category": [
+ "filter",
+ "basic"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$TC qdisc add dev $DEV1 ingress"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 basic match 'u32(u8 0x77 0xff00 at 12)' classid 1:1",
+ "expExitCode": "1",
+ "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol ip basic",
+ "matchPattern": "^filter parent ffff: protocol ip pref 1 basic.*handle 0x1 flowid 1:1.*u32\\(77000000/ff000000 at 12\\)",
+ "matchCount": "0",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress"
+ ]
+ },
+ {
+ "id": "8373",
+ "name": "Add basic filter with u32 ematch u8/missing offset",
+ "category": [
+ "filter",
+ "basic"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$TC qdisc add dev $DEV1 ingress"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 basic match 'u32(u8 0x77 0xff at)' classid 1:1",
+ "expExitCode": "1",
+ "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol ip basic",
+ "matchPattern": "^filter parent ffff: protocol ip pref 1 basic.*handle 0x1 flowid 1:1.*u32\\(77000000 at 12\\)",
+ "matchCount": "0",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress"
+ ]
+ },
+ {
+ "id": "ab8e",
+ "name": "Add basic filter with u32 ematch u8/missing AT keyword",
+ "category": [
+ "filter",
+ "basic"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$TC qdisc add dev $DEV1 ingress"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 basic match 'u32(u8 0x77 0xff 0)' classid 1:1",
+ "expExitCode": "1",
+ "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol ip basic",
+ "matchPattern": "^filter parent ffff: protocol ip pref 1 basic.*handle 0x1 flowid 1:1.*u32\\(77000000 at 12\\)",
+ "matchCount": "0",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress"
+ ]
+ },
+ {
+ "id": "712d",
+ "name": "Add basic filter with u32 ematch u8/missing value",
+ "category": [
+ "filter",
+ "basic"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$TC qdisc add dev $DEV1 ingress"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 basic match 'u32(u8 at 12)' classid 1:1",
+ "expExitCode": "1",
+ "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol ip basic",
+ "matchPattern": "^filter parent ffff: protocol ip pref 1 basic.*handle 0x1 flowid 1:1.*u32\\(at 12\\)",
+ "matchCount": "0",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress"
+ ]
+ },
+ {
+ "id": "350f",
+ "name": "Add basic filter with u32 ematch u8/non-numeric value",
+ "category": [
+ "filter",
+ "basic"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$TC qdisc add dev $DEV1 ingress"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 basic match 'u32(u8 zero 0xff at 0)' classid 1:1",
+ "expExitCode": "1",
+ "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol ip basic",
+ "matchPattern": "^filter parent ffff: protocol ip pref 1 basic.*handle 0x1 flowid 1:1.*u32\\(00000000/ff000000 at 0\\)",
+ "matchCount": "0",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress"
+ ]
+ },
+ {
+ "id": "e28f",
+ "name": "Add basic filter with u32 ematch u8/non-numeric mask",
+ "category": [
+ "filter",
+ "basic"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$TC qdisc add dev $DEV1 ingress"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 basic match 'u32(u8 0x11 mask at 0)' classid 1:1",
+ "expExitCode": "1",
+ "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol ip basic",
+ "matchPattern": "^filter parent ffff: protocol ip pref 1 basic.*handle 0x1 flowid 1:1.*u32\\(11000000/00000000 at 0\\)",
+ "matchCount": "0",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress"
+ ]
+ },
+ {
+ "id": "6d5f",
+ "name": "Add basic filter with u32 ematch u8/negative offset and default action",
+ "category": [
+ "filter",
+ "basic"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$TC qdisc add dev $DEV1 ingress"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 basic match 'u32(u8 0xaa 0xf0 at -14)' classid 1:1",
+ "expExitCode": "0",
+ "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol ip basic",
+ "matchPattern": "^filter parent ffff: protocol ip pref 1 basic.*handle 0x1 flowid 1:1.*u32\\(0000a000/0000f000 at -16\\)",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress"
+ ]
+ },
+ {
+ "id": "12dc",
+ "name": "Add basic filter with u32 ematch u8/nexthdr+ offset and default action",
+ "category": [
+ "filter",
+ "basic"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$TC qdisc add dev $DEV1 ingress"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 basic match 'u32(u8 0xaa 0xf0 at nexthdr+0)' classid 1:1",
+ "expExitCode": "0",
+ "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol ip basic",
+ "matchPattern": "^filter parent ffff: protocol ip pref 1 basic.*handle 0x1 flowid 1:1.*u32\\(a0000000/f0000000 at nexthdr\\+0\\)",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress"
+ ]
+ },
+ {
+ "id": "1d85",
+ "name": "Add basic filter with u32 ematch u16/zero offset and default action",
+ "category": [
+ "filter",
+ "basic"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$TC qdisc add dev $DEV1 ingress"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 basic match 'u32(u16 0x1122 0xffff at 0)' classid 1:1",
+ "expExitCode": "0",
+ "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol ip basic",
+ "matchPattern": "^filter parent ffff: protocol ip pref 1 basic.*handle 0x1 flowid 1:1.*u32\\(11220000/ffff0000 at 0\\)",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress"
+ ]
+ },
+ {
+ "id": "3672",
+ "name": "Add basic filter with u32 ematch u16/zero offset and invalid value >0xFFFF",
+ "category": [
+ "filter",
+ "basic"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$TC qdisc add dev $DEV1 ingress"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 basic match 'u32(u16 0x112233 0xffff at 0)' classid 1:1",
+ "expExitCode": "1",
+ "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol ip basic",
+ "matchPattern": "^filter parent ffff: protocol ip pref 1 basic.*handle 0x1 flowid 1:1.*u32\\(11223300/ffff0000 at 0\\)",
+ "matchCount": "0",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress"
+ ]
+ },
+ {
+ "id": "7fb0",
+ "name": "Add basic filter with u32 ematch u16/positive offset and default action",
+ "category": [
+ "filter",
+ "basic"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$TC qdisc add dev $DEV1 ingress"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 basic match 'u32(u16 0x7788 0x1fff at 12)' classid 1:1",
+ "expExitCode": "0",
+ "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol ip basic",
+ "matchPattern": "^filter parent ffff: protocol ip pref 1 basic.*handle 0x1 flowid 1:1.*u32\\(17880000/1fff0000 at 12\\)",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress"
+ ]
+ },
+ {
+ "id": "19af",
+ "name": "Add basic filter with u32 ematch u16/invalid mask >0xFFFF",
+ "category": [
+ "filter",
+ "basic"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$TC qdisc add dev $DEV1 ingress"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 basic match 'u32(u16 0x7788 0xffffffff at 12)' classid 1:1",
+ "expExitCode": "1",
+ "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol ip basic",
+ "matchPattern": "^filter parent ffff: protocol ip pref 1 basic.*handle 0x1 flowid 1:1.*u32\\(77880000/ffffffff at 12\\)",
+ "matchCount": "0",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress"
+ ]
+ },
+ {
+ "id": "446d",
+ "name": "Add basic filter with u32 ematch u16/missing offset",
+ "category": [
+ "filter",
+ "basic"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$TC qdisc add dev $DEV1 ingress"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 basic match 'u32(u16 0x7788 0xffff at)' classid 1:1",
+ "expExitCode": "1",
+ "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol ip basic",
+ "matchPattern": "^filter parent ffff: protocol ip pref 1 basic.*handle 0x1 flowid 1:1.*u32\\(77880000 at 12\\)",
+ "matchCount": "0",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress"
+ ]
+ },
+ {
+ "id": "151b",
+ "name": "Add basic filter with u32 ematch u16/missing AT keyword",
+ "category": [
+ "filter",
+ "basic"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$TC qdisc add dev $DEV1 ingress"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 basic match 'u32(u16 0x7788 0xffff 0)' classid 1:1",
+ "expExitCode": "1",
+ "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol ip basic",
+ "matchPattern": "^filter parent ffff: protocol ip pref 1 basic.*handle 0x1 flowid 1:1.*u32\\(77880000/ffff0000 at 0\\)",
+ "matchCount": "0",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress"
+ ]
+ },
+ {
+ "id": "bb23",
+ "name": "Add basic filter with u32 ematch u16/missing value",
+ "category": [
+ "filter",
+ "basic"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$TC qdisc add dev $DEV1 ingress"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 basic match 'u32(u16 at 12)' classid 1:1",
+ "expExitCode": "1",
+ "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol ip basic",
+ "matchPattern": "^filter parent ffff: protocol ip pref 1 basic.*handle 0x1 flowid 1:1.*u32\\(at 12\\)",
+ "matchCount": "0",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress"
+ ]
+ },
+ {
+ "id": "decc",
+ "name": "Add basic filter with u32 ematch u16/non-numeric value",
+ "category": [
+ "filter",
+ "basic"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$TC qdisc add dev $DEV1 ingress"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 basic match 'u32(u16 zero 0xffff at 0)' classid 1:1",
+ "expExitCode": "1",
+ "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol ip basic",
+ "matchPattern": "^filter parent ffff: protocol ip pref 1 basic.*handle 0x1 flowid 1:1.*u32\\(00000000/ffff0000 at 0\\)",
+ "matchCount": "0",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress"
+ ]
+ },
+ {
+ "id": "e988",
+ "name": "Add basic filter with u32 ematch u16/non-numeric mask",
+ "category": [
+ "filter",
+ "basic"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$TC qdisc add dev $DEV1 ingress"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 basic match 'u32(u8 0x1122 mask at 0)' classid 1:1",
+ "expExitCode": "1",
+ "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol ip basic",
+ "matchPattern": "^filter parent ffff: protocol ip pref 1 basic.*handle 0x1 flowid 1:1.*u32\\(11220000/00000000 at 0\\)",
+ "matchCount": "0",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress"
+ ]
+ },
+ {
+ "id": "07d8",
+ "name": "Add basic filter with u32 ematch u16/negative offset and default action",
+ "category": [
+ "filter",
+ "basic"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$TC qdisc add dev $DEV1 ingress"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 basic match 'u32(u16 0xaabb 0xffff at -12)' classid 1:1",
+ "expExitCode": "0",
+ "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol ip basic",
+ "matchPattern": "^filter parent ffff: protocol ip pref 1 basic.*handle 0x1 flowid 1:1.*u32\\(aabb0000/ffff0000 at -12\\)",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress"
+ ]
+ },
+ {
+ "id": "f474",
+ "name": "Add basic filter with u32 ematch u16/nexthdr+ offset and default action",
+ "category": [
+ "filter",
+ "basic"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$TC qdisc add dev $DEV1 ingress"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 basic match 'u32(u16 0xaabb 0xf0f0 at nexthdr+0)' classid 1:1",
+ "expExitCode": "0",
+ "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol ip basic",
+ "matchPattern": "^filter parent ffff: protocol ip pref 1 basic.*handle 0x1 flowid 1:1.*u32\\(a0b00000/f0f00000 at nexthdr\\+0\\)",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress"
+ ]
+ },
+ {
+ "id": "47a0",
+ "name": "Add basic filter with u32 ematch u32/zero offset and default action",
+ "category": [
+ "filter",
+ "basic"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$TC qdisc add dev $DEV1 ingress"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 basic match 'u32(u32 0xaabbccdd 0xffffffff at 0)' classid 1:1",
+ "expExitCode": "0",
+ "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol ip basic",
+ "matchPattern": "^filter parent ffff: protocol ip pref 1 basic.*handle 0x1 flowid 1:1.*u32\\(aabbccdd/ffffffff at 0\\)",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress"
+ ]
+ },
+ {
+ "id": "849f",
+ "name": "Add basic filter with u32 ematch u32/positive offset and default action",
+ "category": [
+ "filter",
+ "basic"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$TC qdisc add dev $DEV1 ingress"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 basic match 'u32(u32 0x11227788 0x1ffff0f0 at 12)' classid 1:1",
+ "expExitCode": "0",
+ "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol ip basic",
+ "matchPattern": "^filter parent ffff: protocol ip pref 1 basic.*handle 0x1 flowid 1:1.*u32\\(11227080/1ffff0f0 at 12\\)",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress"
+ ]
+ },
+ {
+ "id": "d288",
+ "name": "Add basic filter with u32 ematch u32/missing offset",
+ "category": [
+ "filter",
+ "basic"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$TC qdisc add dev $DEV1 ingress"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 basic match 'u32(u32 0x11227788 0xffffffff at)' classid 1:1",
+ "expExitCode": "1",
+ "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol ip basic",
+ "matchPattern": "^filter parent ffff: protocol ip pref 1 basic.*handle 0x1 flowid 1:1.*u32\\(11227788/ffffffff at 12\\)",
+ "matchCount": "0",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress"
+ ]
+ },
+ {
+ "id": "4998",
+ "name": "Add basic filter with u32 ematch u32/missing AT keyword",
+ "category": [
+ "filter",
+ "basic"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$TC qdisc add dev $DEV1 ingress"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 basic match 'u32(u32 0x77889900 0xfffff0f0 0)' classid 1:1",
+ "expExitCode": "1",
+ "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol ip basic",
+ "matchPattern": "^filter parent ffff: protocol ip pref 1 basic.*handle 0x1 flowid 1:1.*u32\\(77889900/fffff0f0 at 0\\)",
+ "matchCount": "0",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress"
+ ]
+ },
+ {
+ "id": "1f0a",
+ "name": "Add basic filter with u32 ematch u32/missing value",
+ "category": [
+ "filter",
+ "basic"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$TC qdisc add dev $DEV1 ingress"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 basic match 'u32(u32 at 12)' classid 1:1",
+ "expExitCode": "1",
+ "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol ip basic",
+ "matchPattern": "^filter parent ffff: protocol ip pref 1 basic.*handle 0x1 flowid 1:1.*u32\\(at 12\\)",
+ "matchCount": "0",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress"
+ ]
+ },
+ {
+ "id": "848e",
+ "name": "Add basic filter with u32 ematch u32/non-numeric value",
+ "category": [
+ "filter",
+ "basic"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$TC qdisc add dev $DEV1 ingress"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 basic match 'u32(u32 zero 0xffff at 0)' classid 1:1",
+ "expExitCode": "1",
+ "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol ip basic",
+ "matchPattern": "^filter parent ffff: protocol ip pref 1 basic.*handle 0x1 flowid 1:1.*u32\\(00000000/ffff0000 at 0\\)",
+ "matchCount": "0",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress"
+ ]
+ },
+ {
+ "id": "f748",
+ "name": "Add basic filter with u32 ematch u32/non-numeric mask",
+ "category": [
+ "filter",
+ "basic"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$TC qdisc add dev $DEV1 ingress"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 basic match 'u32(u32 0x11223344 mask at 0)' classid 1:1",
+ "expExitCode": "1",
+ "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol ip basic",
+ "matchPattern": "^filter parent ffff: protocol ip pref 1 basic.*handle 0x1 flowid 1:1.*u32\\(11223344/00000000 at 0\\)",
+ "matchCount": "0",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress"
+ ]
+ },
+ {
+ "id": "55a6",
+ "name": "Add basic filter with u32 ematch u32/negative offset and default action",
+ "category": [
+ "filter",
+ "basic"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$TC qdisc add dev $DEV1 ingress"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 basic match 'u32(u32 0xaabbccdd 0xff00ff00 at -12)' classid 1:1",
+ "expExitCode": "0",
+ "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol ip basic",
+ "matchPattern": "^filter parent ffff: protocol ip pref 1 basic.*handle 0x1 flowid 1:1.*u32\\(aa00cc00/ff00ff00 at -12\\)",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress"
+ ]
+ },
+ {
+ "id": "7282",
+ "name": "Add basic filter with u32 ematch u32/nexthdr+ offset and default action",
+ "category": [
+ "filter",
+ "basic"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$TC qdisc add dev $DEV1 ingress"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 basic match 'u32(u32 0xaabbccdd 0xffffffff at nexthdr+0)' classid 1:1",
+ "expExitCode": "0",
+ "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol ip basic",
+ "matchPattern": "^filter parent ffff: protocol ip pref 1 basic.*handle 0x1 flowid 1:1.*u32\\(aabbccdd/ffffffff at nexthdr\\+0\\)",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress"
+ ]
+ },
+ {
+ "id": "b2b6",
+ "name": "Add basic filter with canid ematch and single SFF",
+ "category": [
+ "filter",
+ "basic"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$TC qdisc add dev $DEV1 ingress"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 basic match 'canid(sff 1)' classid 1:1",
+ "expExitCode": "0",
+ "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol ip basic",
+ "matchPattern": "^filter parent ffff: protocol ip pref 1 basic.*handle 0x1 flowid 1:1.*canid\\(sff 0x1\\)",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress"
+ ]
+ },
+ {
+ "id": "f67f",
+ "name": "Add basic filter with canid ematch and single SFF with mask",
+ "category": [
+ "filter",
+ "basic"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$TC qdisc add dev $DEV1 ingress"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 basic match 'canid(sff 0xaabb:0x00ff)' classid 1:1",
+ "expExitCode": "0",
+ "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol ip basic",
+ "matchPattern": "^filter parent ffff: protocol ip pref 1 basic.*handle 0x1 flowid 1:1.*canid\\(sff 0x2BB:0xFF\\)",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress"
+ ]
+ },
+ {
+ "id": "bd5c",
+ "name": "Add basic filter with canid ematch and multiple SFF",
+ "category": [
+ "filter",
+ "basic"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$TC qdisc add dev $DEV1 ingress"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 basic match 'canid(sff 1 sff 2 sff 3)' classid 1:1",
+ "expExitCode": "0",
+ "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol ip basic",
+ "matchPattern": "^filter parent ffff: protocol ip pref 1 basic.*handle 0x1 flowid 1:1.*canid\\(sff 0x1 sff 0x2 sff 0x3\\)",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress"
+ ]
+ },
+ {
+ "id": "83c7",
+ "name": "Add basic filter with canid ematch and multiple SFF with masks",
+ "category": [
+ "filter",
+ "basic"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$TC qdisc add dev $DEV1 ingress"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 basic match 'canid(sff 0xaa:0x01 sff 0xbb:0x02 sff 0xcc:0x03)' classid 1:1",
+ "expExitCode": "0",
+ "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol ip basic",
+ "matchPattern": "^filter parent ffff: protocol ip pref 1 basic.*handle 0x1 flowid 1:1.*canid\\(sff 0xAA:0x1 sff 0xBB:0x2 sff 0xCC:0x3\\)",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress"
+ ]
+ },
+ {
+ "id": "a8f5",
+ "name": "Add basic filter with canid ematch and single EFF",
+ "category": [
+ "filter",
+ "basic"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$TC qdisc add dev $DEV1 ingress"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 basic match 'canid(eff 1)' classid 1:1",
+ "expExitCode": "0",
+ "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol ip basic",
+ "matchPattern": "^filter parent ffff: protocol ip pref 1 basic.*handle 0x1 flowid 1:1.*canid\\(eff 0x1\\)",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress"
+ ]
+ },
+ {
+ "id": "98ae",
+ "name": "Add basic filter with canid ematch and single EFF with mask",
+ "category": [
+ "filter",
+ "basic"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$TC qdisc add dev $DEV1 ingress"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 basic match 'canid(eff 0xaabb:0xf1f1)' classid 1:1",
+ "expExitCode": "0",
+ "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol ip basic",
+ "matchPattern": "^filter parent ffff: protocol ip pref 1 basic.*handle 0x1 flowid 1:1.*canid\\(eff 0xAABB:0xF1F1\\)",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress"
+ ]
+ },
+ {
+ "id": "6056",
+ "name": "Add basic filter with canid ematch and multiple EFF",
+ "category": [
+ "filter",
+ "basic"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$TC qdisc add dev $DEV1 ingress"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 basic match 'canid(eff 1 eff 2 eff 3)' classid 1:1",
+ "expExitCode": "0",
+ "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol ip basic",
+ "matchPattern": "^filter parent ffff: protocol ip pref 1 basic.*handle 0x1 flowid 1:1.*canid\\(eff 0x1 eff 0x2 eff 0x3\\)",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress"
+ ]
+ },
+ {
+ "id": "d188",
+ "name": "Add basic filter with canid ematch and multiple EFF with masks",
+ "category": [
+ "filter",
+ "basic"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$TC qdisc add dev $DEV1 ingress"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 basic match 'canid(eff 0xaa:0x01 eff 0xbb:0x02 eff 0xcc:0x03)' classid 1:1",
+ "expExitCode": "0",
+ "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol ip basic",
+ "matchPattern": "^filter parent ffff: protocol ip pref 1 basic.*handle 0x1 flowid 1:1.*canid\\(eff 0xAA:0x1 eff 0xBB:0x2 eff 0xCC:0x3\\)",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress"
+ ]
+ },
+ {
+ "id": "25d1",
+ "name": "Add basic filter with canid ematch and a combination of SFF/EFF",
+ "category": [
+ "filter",
+ "basic"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$TC qdisc add dev $DEV1 ingress"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 basic match 'canid(sff 0x01 eff 0x02)' classid 1:1",
+ "expExitCode": "0",
+ "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol ip basic",
+ "matchPattern": "^filter parent ffff: protocol ip pref 1 basic.*handle 0x1 flowid 1:1.*canid\\(eff 0x2 sff 0x1\\)",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress"
+ ]
+ },
+ {
+ "id": "b438",
+ "name": "Add basic filter with canid ematch and a combination of SFF/EFF with masks",
+ "category": [
+ "filter",
+ "basic"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$TC qdisc add dev $DEV1 ingress"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 basic match 'canid(sff 0x01:0xf eff 0x02:0xf)' classid 1:1",
+ "expExitCode": "0",
+ "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol ip basic",
+ "matchPattern": "^filter parent ffff: protocol ip pref 1 basic.*handle 0x1 flowid 1:1.*canid\\(eff 0x2:0xF sff 0x1:0xF\\)",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress"
+ ]
}
]
diff --git a/tools/testing/selftests/tc-testing/tc-tests/filters/tests.json b/tools/testing/selftests/tc-testing/tc-tests/filters/tests.json
index 8877f7b2b809..bb543bf69d69 100644
--- a/tools/testing/selftests/tc-testing/tc-tests/filters/tests.json
+++ b/tools/testing/selftests/tc-testing/tc-tests/filters/tests.json
@@ -32,7 +32,7 @@
"setup": [
"$TC qdisc add dev $DEV2 ingress"
],
- "cmdUnderTest": "$TC filter add dev $DEV2 protocol ip pref 1 parent ffff: handle 0xffffffff flower action ok",
+ "cmdUnderTest": "$TC filter add dev $DEV2 protocol ip pref 1 ingress handle 0xffffffff flower action ok",
"expExitCode": "0",
"verifyCmd": "$TC filter show dev $DEV2 ingress",
"matchPattern": "filter protocol ip pref 1 flower.*handle 0xffffffff",
@@ -77,9 +77,9 @@
},
"setup": [
"$TC qdisc add dev $DEV2 ingress",
- "$TC filter add dev $DEV2 protocol ip prio 1 parent ffff: flower dst_mac e4:11:22:11:4a:51 src_mac e4:11:22:11:4a:50 ip_proto tcp src_ip 1.1.1.1 dst_ip 2.2.2.2 action drop"
+ "$TC filter add dev $DEV2 protocol ip prio 1 ingress flower dst_mac e4:11:22:11:4a:51 src_mac e4:11:22:11:4a:50 ip_proto tcp src_ip 1.1.1.1 dst_ip 2.2.2.2 action drop"
],
- "cmdUnderTest": "$TC filter add dev $DEV2 protocol ip prio 1 parent ffff: flower dst_mac e4:11:22:11:4a:51 src_mac e4:11:22:11:4a:50 ip_proto tcp src_ip 1.1.1.1 dst_ip 2.2.2.2 action drop",
+ "cmdUnderTest": "$TC filter add dev $DEV2 protocol ip prio 1 ingress flower dst_mac e4:11:22:11:4a:51 src_mac e4:11:22:11:4a:50 ip_proto tcp src_ip 1.1.1.1 dst_ip 2.2.2.2 action drop",
"expExitCode": "2",
"verifyCmd": "$TC -s filter show dev $DEV2 ingress",
"matchPattern": "filter protocol ip pref 1 flower chain 0 handle",
@@ -87,5 +87,43 @@
"teardown": [
"$TC qdisc del dev $DEV2 ingress"
]
+ },
+ {
+ "id": "7c65",
+ "name": "Add flower filter and then terse dump it",
+ "category": [
+ "filter",
+ "flower"
+ ],
+ "setup": [
+ "$TC qdisc add dev $DEV2 ingress"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DEV2 protocol ip pref 1 ingress flower dst_mac e4:11:22:11:4a:51 action drop",
+ "expExitCode": "0",
+ "verifyCmd": "$TC filter show terse dev $DEV2 ingress",
+ "matchPattern": "filter protocol ip pref 1 flower.*handle",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DEV2 ingress"
+ ]
+ },
+ {
+ "id": "d45e",
+ "name": "Add flower filter and verify that terse dump doesn't output filter key",
+ "category": [
+ "filter",
+ "flower"
+ ],
+ "setup": [
+ "$TC qdisc add dev $DEV2 ingress"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DEV2 protocol ip pref 1 ingress flower dst_mac e4:11:22:11:4a:51 action drop",
+ "expExitCode": "0",
+ "verifyCmd": "$TC filter show terse dev $DEV2 ingress",
+ "matchPattern": " dst_mac e4:11:22:11:4a:51",
+ "matchCount": "0",
+ "teardown": [
+ "$TC qdisc del dev $DEV2 ingress"
+ ]
}
]
diff --git a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/fq_pie.json b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/fq_pie.json
new file mode 100644
index 000000000000..1cda2e11b3ad
--- /dev/null
+++ b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/fq_pie.json
@@ -0,0 +1,21 @@
+[
+ {
+ "id": "83be",
+ "name": "Create FQ-PIE with invalid number of flows",
+ "category": [
+ "qdisc",
+ "fq_pie"
+ ],
+ "setup": [
+ "$IP link add dev $DUMMY type dummy || /bin/true"
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $DUMMY root fq_pie flows 65536",
+ "expExitCode": "2",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
+ "matchPattern": "qdisc",
+ "matchCount": "0",
+ "teardown": [
+ "$IP link del dev $DUMMY"
+ ]
+ }
+]
diff --git a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/red.json b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/red.json
new file mode 100644
index 000000000000..0703a2a255eb
--- /dev/null
+++ b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/red.json
@@ -0,0 +1,185 @@
+[
+ {
+ "id": "8b6e",
+ "name": "Create RED with no flags",
+ "category": [
+ "qdisc",
+ "red"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$IP link add dev $DUMMY type dummy || /bin/true"
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root red limit 1M avpkt 1500 min 100K max 300K",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
+ "matchPattern": "qdisc red 1: root .* limit 1Mb min 100Kb max 300Kb $",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY handle 1: root",
+ "$IP link del dev $DUMMY type dummy"
+ ]
+ },
+ {
+ "id": "342e",
+ "name": "Create RED with adaptive flag",
+ "category": [
+ "qdisc",
+ "red"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$IP link add dev $DUMMY type dummy || /bin/true"
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root red adaptive limit 1M avpkt 1500 min 100K max 300K",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
+ "matchPattern": "qdisc red 1: root .* limit 1Mb min 100Kb max 300Kb adaptive $",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY handle 1: root",
+ "$IP link del dev $DUMMY type dummy"
+ ]
+ },
+ {
+ "id": "2d4b",
+ "name": "Create RED with ECN flag",
+ "category": [
+ "qdisc",
+ "red"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$IP link add dev $DUMMY type dummy || /bin/true"
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root red ecn limit 1M avpkt 1500 min 100K max 300K",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
+ "matchPattern": "qdisc red 1: root .* limit 1Mb min 100Kb max 300Kb ecn $",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY handle 1: root",
+ "$IP link del dev $DUMMY type dummy"
+ ]
+ },
+ {
+ "id": "650f",
+ "name": "Create RED with flags ECN, adaptive",
+ "category": [
+ "qdisc",
+ "red"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$IP link add dev $DUMMY type dummy || /bin/true"
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root red ecn adaptive limit 1M avpkt 1500 min 100K max 300K",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
+ "matchPattern": "qdisc red 1: root .* limit 1Mb min 100Kb max 300Kb ecn adaptive $",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY handle 1: root",
+ "$IP link del dev $DUMMY type dummy"
+ ]
+ },
+ {
+ "id": "5f15",
+ "name": "Create RED with flags ECN, harddrop",
+ "category": [
+ "qdisc",
+ "red"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$IP link add dev $DUMMY type dummy || /bin/true"
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root red ecn harddrop limit 1M avpkt 1500 min 100K max 300K",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
+ "matchPattern": "qdisc red 1: root .* limit 1Mb min 100Kb max 300Kb ecn harddrop $",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY handle 1: root",
+ "$IP link del dev $DUMMY type dummy"
+ ]
+ },
+ {
+ "id": "53e8",
+ "name": "Create RED with flags ECN, nodrop",
+ "category": [
+ "qdisc",
+ "red"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$IP link add dev $DUMMY type dummy || /bin/true"
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root red ecn nodrop limit 1M avpkt 1500 min 100K max 300K",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
+ "matchPattern": "qdisc red 1: root .* limit 1Mb min 100Kb max 300Kb ecn nodrop $",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY handle 1: root",
+ "$IP link del dev $DUMMY type dummy"
+ ]
+ },
+ {
+ "id": "d091",
+ "name": "Fail to create RED with only nodrop flag",
+ "category": [
+ "qdisc",
+ "red"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$IP link add dev $DUMMY type dummy || /bin/true"
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root red nodrop limit 1M avpkt 1500 min 100K max 300K",
+ "expExitCode": "2",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
+ "matchPattern": "qdisc red",
+ "matchCount": "0",
+ "teardown": [
+ "$IP link del dev $DUMMY type dummy"
+ ]
+ },
+ {
+ "id": "af8e",
+ "name": "Create RED with flags ECN, nodrop, harddrop",
+ "category": [
+ "qdisc",
+ "red"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$IP link add dev $DUMMY type dummy || /bin/true"
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root red ecn harddrop nodrop limit 1M avpkt 1500 min 100K max 300K",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
+ "matchPattern": "qdisc red 1: root .* limit 1Mb min 100Kb max 300Kb ecn harddrop nodrop $",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY handle 1: root",
+ "$IP link del dev $DUMMY type dummy"
+ ]
+ }
+]
diff --git a/tools/testing/selftests/tc-testing/tdc.py b/tools/testing/selftests/tc-testing/tdc.py
index e566c70e64a1..a3e43189d940 100755
--- a/tools/testing/selftests/tc-testing/tdc.py
+++ b/tools/testing/selftests/tc-testing/tdc.py
@@ -713,9 +713,8 @@ def set_operation_mode(pm, parser, args, remaining):
exit(0)
if args.list:
- if args.list:
- list_test_cases(alltests)
- exit(0)
+ list_test_cases(alltests)
+ exit(0)
if len(alltests):
req_plugins = pm.get_required_plugins(alltests)
diff --git a/tools/testing/selftests/tc-testing/tdc_batch.py b/tools/testing/selftests/tc-testing/tdc_batch.py
index 6a2bd2cf528e..995f66ce43eb 100755
--- a/tools/testing/selftests/tc-testing/tdc_batch.py
+++ b/tools/testing/selftests/tc-testing/tdc_batch.py
@@ -72,21 +72,21 @@ mac_prefix = args.mac_prefix
def format_add_filter(device, prio, handle, skip, src_mac, dst_mac,
share_action):
- return ("filter add dev {} {} protocol ip parent ffff: handle {} "
+ return ("filter add dev {} {} protocol ip ingress handle {} "
" flower {} src_mac {} dst_mac {} action drop {}".format(
device, prio, handle, skip, src_mac, dst_mac, share_action))
def format_rep_filter(device, prio, handle, skip, src_mac, dst_mac,
share_action):
- return ("filter replace dev {} {} protocol ip parent ffff: handle {} "
+ return ("filter replace dev {} {} protocol ip ingress handle {} "
" flower {} src_mac {} dst_mac {} action drop {}".format(
device, prio, handle, skip, src_mac, dst_mac, share_action))
def format_del_filter(device, prio, handle, skip, src_mac, dst_mac,
share_action):
- return ("filter del dev {} {} protocol ip parent ffff: handle {} "
+ return ("filter del dev {} {} protocol ip ingress handle {} "
"flower".format(device, prio, handle))
diff --git a/tools/testing/selftests/timens/.gitignore b/tools/testing/selftests/timens/.gitignore
index 789f21e81028..2e43851b47c1 100644
--- a/tools/testing/selftests/timens/.gitignore
+++ b/tools/testing/selftests/timens/.gitignore
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
clock_nanosleep
exec
gettime_perf
diff --git a/tools/testing/selftests/timens/clock_nanosleep.c b/tools/testing/selftests/timens/clock_nanosleep.c
index 8e7b7c72ef65..72d41b955fb2 100644
--- a/tools/testing/selftests/timens/clock_nanosleep.c
+++ b/tools/testing/selftests/timens/clock_nanosleep.c
@@ -119,7 +119,7 @@ int main(int argc, char *argv[])
ksft_set_plan(4);
- check_config_posix_timers();
+ check_supported_timers();
if (unshare_timens())
return 1;
diff --git a/tools/testing/selftests/timens/exec.c b/tools/testing/selftests/timens/exec.c
index 87b47b557a7a..e40dc5be2f66 100644
--- a/tools/testing/selftests/timens/exec.c
+++ b/tools/testing/selftests/timens/exec.c
@@ -11,7 +11,6 @@
#include <sys/wait.h>
#include <time.h>
#include <unistd.h>
-#include <time.h>
#include <string.h>
#include "log.h"
diff --git a/tools/testing/selftests/timens/procfs.c b/tools/testing/selftests/timens/procfs.c
index 43d93f4006b9..7f14f0fdac84 100644
--- a/tools/testing/selftests/timens/procfs.c
+++ b/tools/testing/selftests/timens/procfs.c
@@ -12,7 +12,6 @@
#include <sys/types.h>
#include <time.h>
#include <unistd.h>
-#include <time.h>
#include "log.h"
#include "timens.h"
diff --git a/tools/testing/selftests/timens/timens.c b/tools/testing/selftests/timens/timens.c
index 559d26e21ba0..52b6a1185f52 100644
--- a/tools/testing/selftests/timens/timens.c
+++ b/tools/testing/selftests/timens/timens.c
@@ -10,7 +10,6 @@
#include <sys/types.h>
#include <time.h>
#include <unistd.h>
-#include <time.h>
#include <string.h>
#include "log.h"
@@ -156,7 +155,7 @@ int main(int argc, char *argv[])
nscheck();
- check_config_posix_timers();
+ check_supported_timers();
ksft_set_plan(ARRAY_SIZE(clocks) * 2);
diff --git a/tools/testing/selftests/timens/timens.h b/tools/testing/selftests/timens/timens.h
index e09e7e39bc52..d4fc52d47146 100644
--- a/tools/testing/selftests/timens/timens.h
+++ b/tools/testing/selftests/timens/timens.h
@@ -14,15 +14,26 @@
#endif
static int config_posix_timers = true;
+static int config_alarm_timers = true;
-static inline void check_config_posix_timers(void)
+static inline void check_supported_timers(void)
{
+ struct timespec ts;
+
if (timer_create(-1, 0, 0) == -1 && errno == ENOSYS)
config_posix_timers = false;
+
+ if (clock_gettime(CLOCK_BOOTTIME_ALARM, &ts) == -1 && errno == EINVAL)
+ config_alarm_timers = false;
}
static inline bool check_skip(int clockid)
{
+ if (!config_alarm_timers && clockid == CLOCK_BOOTTIME_ALARM) {
+ ksft_test_result_skip("CLOCK_BOOTTIME_ALARM isn't supported\n");
+ return true;
+ }
+
if (config_posix_timers)
return false;
diff --git a/tools/testing/selftests/timens/timer.c b/tools/testing/selftests/timens/timer.c
index 0cca7aafc4bd..5e7f0051bd7b 100644
--- a/tools/testing/selftests/timens/timer.c
+++ b/tools/testing/selftests/timens/timer.c
@@ -11,7 +11,6 @@
#include <stdio.h>
#include <stdint.h>
#include <signal.h>
-#include <time.h>
#include "log.h"
#include "timens.h"
@@ -23,6 +22,9 @@ int run_test(int clockid, struct timespec now)
timer_t fd;
int i;
+ if (check_skip(clockid))
+ return 0;
+
for (i = 0; i < 2; i++) {
struct sigevent sevp = {.sigev_notify = SIGEV_NONE};
int flags = 0;
@@ -75,6 +77,8 @@ int main(int argc, char *argv[])
nscheck();
+ check_supported_timers();
+
ksft_set_plan(3);
clock_gettime(CLOCK_MONOTONIC, &mtime_now);
diff --git a/tools/testing/selftests/timens/timerfd.c b/tools/testing/selftests/timens/timerfd.c
index eff1ec5ff215..9edd43d6b2c1 100644
--- a/tools/testing/selftests/timens/timerfd.c
+++ b/tools/testing/selftests/timens/timerfd.c
@@ -28,6 +28,9 @@ int run_test(int clockid, struct timespec now)
long long elapsed;
int fd, i;
+ if (check_skip(clockid))
+ return 0;
+
if (tclock_gettime(clockid, &now))
return pr_perror("clock_gettime(%d)", clockid);
@@ -81,6 +84,8 @@ int main(int argc, char *argv[])
nscheck();
+ check_supported_timers();
+
ksft_set_plan(3);
clock_gettime(CLOCK_MONOTONIC, &mtime_now);
diff --git a/tools/testing/selftests/timers/.gitignore b/tools/testing/selftests/timers/.gitignore
index 32a9eadb2d4e..bb5326ff900b 100644
--- a/tools/testing/selftests/timers/.gitignore
+++ b/tools/testing/selftests/timers/.gitignore
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
alarmtimer-suspend
change_skew
clocksource-switch
diff --git a/tools/testing/selftests/tmpfs/.gitignore b/tools/testing/selftests/tmpfs/.gitignore
index a96838fad74d..b1afaa925905 100644
--- a/tools/testing/selftests/tmpfs/.gitignore
+++ b/tools/testing/selftests/tmpfs/.gitignore
@@ -1 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0-only
/bug-link-o-tmpfile
diff --git a/tools/testing/selftests/tpm2/test_smoke.sh b/tools/testing/selftests/tpm2/test_smoke.sh
index b630c7b5950a..663062701d5a 100755
--- a/tools/testing/selftests/tpm2/test_smoke.sh
+++ b/tools/testing/selftests/tpm2/test_smoke.sh
@@ -1,17 +1,13 @@
#!/bin/bash
# SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause)
-self.flags = flags
# Kselftest framework requirement - SKIP code is 4.
ksft_skip=4
+[ -f /dev/tpm0 ] || exit $ksft_skip
-if [ -f /dev/tpm0 ] ; then
- python -m unittest -v tpm2_tests.SmokeTest
- python -m unittest -v tpm2_tests.AsyncTest
-else
- exit $ksft_skip
-fi
+python -m unittest -v tpm2_tests.SmokeTest
+python -m unittest -v tpm2_tests.AsyncTest
CLEAR_CMD=$(which tpm2_clear)
if [ -n $CLEAR_CMD ]; then
diff --git a/tools/testing/selftests/tpm2/test_space.sh b/tools/testing/selftests/tpm2/test_space.sh
index 180b469c53b4..36c9d030a1c6 100755
--- a/tools/testing/selftests/tpm2/test_space.sh
+++ b/tools/testing/selftests/tpm2/test_space.sh
@@ -4,8 +4,6 @@
# Kselftest framework requirement - SKIP code is 4.
ksft_skip=4
-if [ -f /dev/tpmrm0 ] ; then
- python -m unittest -v tpm2_tests.SpaceTest
-else
- exit $ksft_skip
-fi
+[ -f /dev/tpmrm0 ] || exit $ksft_skip
+
+python -m unittest -v tpm2_tests.SpaceTest
diff --git a/tools/testing/selftests/vDSO/.gitignore b/tools/testing/selftests/vDSO/.gitignore
index 133bf9ee986c..5eb64d41e541 100644
--- a/tools/testing/selftests/vDSO/.gitignore
+++ b/tools/testing/selftests/vDSO/.gitignore
@@ -1,2 +1,5 @@
+# SPDX-License-Identifier: GPL-2.0-only
vdso_test
+vdso_test_gettimeofday
+vdso_test_getcpu
vdso_standalone_test_x86
diff --git a/tools/testing/selftests/vDSO/Makefile b/tools/testing/selftests/vDSO/Makefile
index 9e03d61f52fd..0069f2f83f86 100644
--- a/tools/testing/selftests/vDSO/Makefile
+++ b/tools/testing/selftests/vDSO/Makefile
@@ -4,7 +4,7 @@ include ../lib.mk
uname_M := $(shell uname -m 2>/dev/null || echo not)
ARCH ?= $(shell echo $(uname_M) | sed -e s/i.86/x86/ -e s/x86_64/x86/)
-TEST_GEN_PROGS := $(OUTPUT)/vdso_test
+TEST_GEN_PROGS := $(OUTPUT)/vdso_test_gettimeofday $(OUTPUT)/vdso_test_getcpu
ifeq ($(ARCH),x86)
TEST_GEN_PROGS += $(OUTPUT)/vdso_standalone_test_x86
endif
@@ -17,7 +17,8 @@ LDLIBS += -lgcc_s
endif
all: $(TEST_GEN_PROGS)
-$(OUTPUT)/vdso_test: parse_vdso.c vdso_test.c
+$(OUTPUT)/vdso_test_gettimeofday: parse_vdso.c vdso_test_gettimeofday.c
+$(OUTPUT)/vdso_test_getcpu: parse_vdso.c vdso_test_getcpu.c
$(OUTPUT)/vdso_standalone_test_x86: vdso_standalone_test_x86.c parse_vdso.c
$(CC) $(CFLAGS) $(CFLAGS_vdso_standalone_test_x86) \
vdso_standalone_test_x86.c parse_vdso.c \
diff --git a/tools/testing/selftests/vDSO/parse_vdso.c b/tools/testing/selftests/vDSO/parse_vdso.c
index 1dbb4b87268f..413f75620a35 100644
--- a/tools/testing/selftests/vDSO/parse_vdso.c
+++ b/tools/testing/selftests/vDSO/parse_vdso.c
@@ -21,29 +21,7 @@
#include <limits.h>
#include <elf.h>
-/*
- * To use this vDSO parser, first call one of the vdso_init_* functions.
- * If you've already parsed auxv, then pass the value of AT_SYSINFO_EHDR
- * to vdso_init_from_sysinfo_ehdr. Otherwise pass auxv to vdso_init_from_auxv.
- * Then call vdso_sym for each symbol you want. For example, to look up
- * gettimeofday on x86_64, use:
- *
- * <some pointer> = vdso_sym("LINUX_2.6", "gettimeofday");
- * or
- * <some pointer> = vdso_sym("LINUX_2.6", "__vdso_gettimeofday");
- *
- * vdso_sym will return 0 if the symbol doesn't exist or if the init function
- * failed or was not called. vdso_sym is a little slow, so its return value
- * should be cached.
- *
- * vdso_sym is threadsafe; the init functions are not.
- *
- * These are the prototypes:
- */
-extern void vdso_init_from_auxv(void *auxv);
-extern void vdso_init_from_sysinfo_ehdr(uintptr_t base);
-extern void *vdso_sym(const char *version, const char *name);
-
+#include "parse_vdso.h"
/* And here's the code. */
#ifndef ELF_BITS
diff --git a/tools/testing/selftests/vDSO/parse_vdso.h b/tools/testing/selftests/vDSO/parse_vdso.h
new file mode 100644
index 000000000000..de0453067d7c
--- /dev/null
+++ b/tools/testing/selftests/vDSO/parse_vdso.h
@@ -0,0 +1,31 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+
+#ifndef PARSE_VDSO_H
+#define PARSE_VDSO_H
+
+#include <stdint.h>
+
+/*
+ * To use this vDSO parser, first call one of the vdso_init_* functions.
+ * If you've already parsed auxv, then pass the value of AT_SYSINFO_EHDR
+ * to vdso_init_from_sysinfo_ehdr. Otherwise pass auxv to vdso_init_from_auxv.
+ * Then call vdso_sym for each symbol you want. For example, to look up
+ * gettimeofday on x86_64, use:
+ *
+ * <some pointer> = vdso_sym("LINUX_2.6", "gettimeofday");
+ * or
+ * <some pointer> = vdso_sym("LINUX_2.6", "__vdso_gettimeofday");
+ *
+ * vdso_sym will return 0 if the symbol doesn't exist or if the init function
+ * failed or was not called. vdso_sym is a little slow, so its return value
+ * should be cached.
+ *
+ * vdso_sym is threadsafe; the init functions are not.
+ *
+ * These are the prototypes:
+ */
+void *vdso_sym(const char *version, const char *name);
+void vdso_init_from_sysinfo_ehdr(uintptr_t base);
+void vdso_init_from_auxv(void *auxv);
+
+#endif
diff --git a/tools/testing/selftests/vDSO/vdso_standalone_test_x86.c b/tools/testing/selftests/vDSO/vdso_standalone_test_x86.c
index 5ac4b00acfbc..8a44ff973ee1 100644
--- a/tools/testing/selftests/vDSO/vdso_standalone_test_x86.c
+++ b/tools/testing/selftests/vDSO/vdso_standalone_test_x86.c
@@ -16,9 +16,7 @@
#include <unistd.h>
#include <stdint.h>
-extern void *vdso_sym(const char *version, const char *name);
-extern void vdso_init_from_sysinfo_ehdr(uintptr_t base);
-extern void vdso_init_from_auxv(void *auxv);
+#include "parse_vdso.h"
/* We need a libc functions... */
int strcmp(const char *a, const char *b)
diff --git a/tools/testing/selftests/vDSO/vdso_test_getcpu.c b/tools/testing/selftests/vDSO/vdso_test_getcpu.c
new file mode 100644
index 000000000000..fc25ede131b8
--- /dev/null
+++ b/tools/testing/selftests/vDSO/vdso_test_getcpu.c
@@ -0,0 +1,54 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * vdso_test_getcpu.c: Sample code to test parse_vdso.c and vDSO getcpu()
+ *
+ * Copyright (c) 2020 Arm Ltd
+ */
+
+#include <stdint.h>
+#include <elf.h>
+#include <stdio.h>
+#include <sys/auxv.h>
+#include <sys/time.h>
+
+#include "../kselftest.h"
+#include "parse_vdso.h"
+
+const char *version = "LINUX_2.6";
+const char *name = "__vdso_getcpu";
+
+struct getcpu_cache;
+typedef long (*getcpu_t)(unsigned int *, unsigned int *,
+ struct getcpu_cache *);
+
+int main(int argc, char **argv)
+{
+ unsigned long sysinfo_ehdr;
+ unsigned int cpu, node;
+ getcpu_t get_cpu;
+ long ret;
+
+ sysinfo_ehdr = getauxval(AT_SYSINFO_EHDR);
+ if (!sysinfo_ehdr) {
+ printf("AT_SYSINFO_EHDR is not present!\n");
+ return KSFT_SKIP;
+ }
+
+ vdso_init_from_sysinfo_ehdr(getauxval(AT_SYSINFO_EHDR));
+
+ get_cpu = (getcpu_t)vdso_sym(version, name);
+ if (!get_cpu) {
+ printf("Could not find %s\n", name);
+ return KSFT_SKIP;
+ }
+
+ ret = get_cpu(&cpu, &node, 0);
+ if (ret == 0) {
+ printf("Running on CPU %u node %u\n", cpu, node);
+ } else {
+ printf("%s failed\n", name);
+ return KSFT_FAIL;
+ }
+
+ return 0;
+}
diff --git a/tools/testing/selftests/vDSO/vdso_test.c b/tools/testing/selftests/vDSO/vdso_test_gettimeofday.c
index 719d5a6bd664..8ccc73ed8240 100644
--- a/tools/testing/selftests/vDSO/vdso_test.c
+++ b/tools/testing/selftests/vDSO/vdso_test_gettimeofday.c
@@ -1,10 +1,11 @@
// SPDX-License-Identifier: GPL-2.0-only
/*
- * vdso_test.c: Sample code to test parse_vdso.c
+ * vdso_test_gettimeofday.c: Sample code to test parse_vdso.c and
+ * vDSO gettimeofday()
* Copyright (c) 2014 Andy Lutomirski
*
* Compile with:
- * gcc -std=gnu99 vdso_test.c parse_vdso.c
+ * gcc -std=gnu99 vdso_test_gettimeofday.c parse_vdso_gettimeofday.c
*
* Tested on x86, 32-bit and 64-bit. It may work on other architectures, too.
*/
@@ -16,10 +17,7 @@
#include <sys/time.h>
#include "../kselftest.h"
-
-extern void *vdso_sym(const char *version, const char *name);
-extern void vdso_init_from_sysinfo_ehdr(uintptr_t base);
-extern void vdso_init_from_auxv(void *auxv);
+#include "parse_vdso.h"
/*
* ARM64's vDSO exports its gettimeofday() implementation with a different
diff --git a/tools/testing/selftests/vm/.gitignore b/tools/testing/selftests/vm/.gitignore
index 31b3c98b6d34..849e8226395a 100644
--- a/tools/testing/selftests/vm/.gitignore
+++ b/tools/testing/selftests/vm/.gitignore
@@ -1,12 +1,16 @@
+# SPDX-License-Identifier: GPL-2.0-only
hugepage-mmap
hugepage-shm
+khugepaged
map_hugetlb
map_populate
thuge-gen
compaction_test
mlock2-tests
+mremap_dontunmap
on-fault-limit
transhuge-stress
+protection_keys
userfaultfd
mlock-intersect-test
mlock-random-test
@@ -14,3 +18,5 @@ virtual_address_range
gup_benchmark
va_128TBswitch
map_fixed_noreplace
+write_to_hugetlbfs
+hmm-tests
diff --git a/tools/testing/selftests/vm/Makefile b/tools/testing/selftests/vm/Makefile
index 7f9a8a8c31da..a9026706d597 100644
--- a/tools/testing/selftests/vm/Makefile
+++ b/tools/testing/selftests/vm/Makefile
@@ -1,12 +1,13 @@
# SPDX-License-Identifier: GPL-2.0
# Makefile for vm selftests
uname_M := $(shell uname -m 2>/dev/null || echo not)
-ARCH ?= $(shell echo $(uname_M) | sed -e 's/aarch64.*/arm64/')
+MACHINE ?= $(shell echo $(uname_M) | sed -e 's/aarch64.*/arm64/')
CFLAGS = -Wall -I ../../../../usr/include $(EXTRA_CFLAGS)
LDLIBS = -lrt
TEST_GEN_FILES = compaction_test
TEST_GEN_FILES += gup_benchmark
+TEST_GEN_FILES += hmm-tests
TEST_GEN_FILES += hugepage-mmap
TEST_GEN_FILES += hugepage-shm
TEST_GEN_FILES += map_hugetlb
@@ -14,14 +15,41 @@ TEST_GEN_FILES += map_fixed_noreplace
TEST_GEN_FILES += map_populate
TEST_GEN_FILES += mlock-random-test
TEST_GEN_FILES += mlock2-tests
+TEST_GEN_FILES += mremap_dontunmap
TEST_GEN_FILES += on-fault-limit
TEST_GEN_FILES += thuge-gen
TEST_GEN_FILES += transhuge-stress
TEST_GEN_FILES += userfaultfd
+TEST_GEN_FILES += khugepaged
-ifneq (,$(filter $(ARCH),arm64 ia64 mips64 parisc64 ppc64 riscv64 s390x sh64 sparc64 x86_64))
+ifeq ($(ARCH),x86_64)
+CAN_BUILD_I386 := $(shell ./../x86/check_cc.sh $(CC) ../x86/trivial_32bit_program.c -m32)
+CAN_BUILD_X86_64 := $(shell ./../x86/check_cc.sh $(CC) ../x86/trivial_64bit_program.c)
+CAN_BUILD_WITH_NOPIE := $(shell ./../x86/check_cc.sh $(CC) ../x86/trivial_program.c -no-pie)
+
+TARGETS := protection_keys
+BINARIES_32 := $(TARGETS:%=%_32)
+BINARIES_64 := $(TARGETS:%=%_64)
+
+ifeq ($(CAN_BUILD_WITH_NOPIE),1)
+CFLAGS += -no-pie
+endif
+
+ifeq ($(CAN_BUILD_I386),1)
+TEST_GEN_FILES += $(BINARIES_32)
+endif
+
+ifeq ($(CAN_BUILD_X86_64),1)
+TEST_GEN_FILES += $(BINARIES_64)
+endif
+else
+TEST_GEN_FILES += protection_keys
+endif
+
+ifneq (,$(filter $(MACHINE),arm64 ia64 mips64 parisc64 ppc64 ppc64le riscv64 s390x sh64 sparc64 x86_64))
TEST_GEN_FILES += va_128TBswitch
TEST_GEN_FILES += virtual_address_range
+TEST_GEN_FILES += write_to_hugetlbfs
endif
TEST_PROGS := run_vmtests
@@ -31,6 +59,57 @@ TEST_FILES := test_vmalloc.sh
KSFT_KHDR_INSTALL := 1
include ../lib.mk
+$(OUTPUT)/hmm-tests: LDLIBS += -lhugetlbfs -lpthread
+
+ifeq ($(ARCH),x86_64)
+BINARIES_32 := $(patsubst %,$(OUTPUT)/%,$(BINARIES_32))
+BINARIES_64 := $(patsubst %,$(OUTPUT)/%,$(BINARIES_64))
+
+define gen-target-rule-32
+$(1) $(1)_32: $(OUTPUT)/$(1)_32
+.PHONY: $(1) $(1)_32
+endef
+
+define gen-target-rule-64
+$(1) $(1)_64: $(OUTPUT)/$(1)_64
+.PHONY: $(1) $(1)_64
+endef
+
+ifeq ($(CAN_BUILD_I386),1)
+$(BINARIES_32): CFLAGS += -m32
+$(BINARIES_32): LDLIBS += -lrt -ldl -lm
+$(BINARIES_32): %_32: %.c
+ $(CC) $(CFLAGS) $(EXTRA_CFLAGS) $(notdir $^) $(LDLIBS) -o $@
+$(foreach t,$(TARGETS),$(eval $(call gen-target-rule-32,$(t))))
+endif
+
+ifeq ($(CAN_BUILD_X86_64),1)
+$(BINARIES_64): CFLAGS += -m64
+$(BINARIES_64): LDLIBS += -lrt -ldl
+$(BINARIES_64): %_64: %.c
+ $(CC) $(CFLAGS) $(EXTRA_CFLAGS) $(notdir $^) $(LDLIBS) -o $@
+$(foreach t,$(TARGETS),$(eval $(call gen-target-rule-64,$(t))))
+endif
+
+# x86_64 users should be encouraged to install 32-bit libraries
+ifeq ($(CAN_BUILD_I386)$(CAN_BUILD_X86_64),01)
+all: warn_32bit_failure
+
+warn_32bit_failure:
+ @echo "Warning: you seem to have a broken 32-bit build" 2>&1; \
+ echo "environment. This will reduce test coverage of 64-bit" 2>&1; \
+ echo "kernels. If you are using a Debian-like distribution," 2>&1; \
+ echo "try:"; 2>&1; \
+ echo ""; \
+ echo " apt-get install gcc-multilib libc6-i386 libc6-dev-i386"; \
+ echo ""; \
+ echo "If you are using a Fedora-like distribution, try:"; \
+ echo ""; \
+ echo " yum install glibc-devel.*i686"; \
+ exit 0;
+endif
+endif
+
$(OUTPUT)/userfaultfd: LDLIBS += -lpthread
$(OUTPUT)/mlock-random-test: LDLIBS += -lcap
diff --git a/tools/testing/selftests/vm/charge_reserved_hugetlb.sh b/tools/testing/selftests/vm/charge_reserved_hugetlb.sh
new file mode 100644
index 000000000000..18d33684faad
--- /dev/null
+++ b/tools/testing/selftests/vm/charge_reserved_hugetlb.sh
@@ -0,0 +1,575 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+
+set -e
+
+if [[ $(id -u) -ne 0 ]]; then
+ echo "This test must be run as root. Skipping..."
+ exit 0
+fi
+
+fault_limit_file=limit_in_bytes
+reservation_limit_file=rsvd.limit_in_bytes
+fault_usage_file=usage_in_bytes
+reservation_usage_file=rsvd.usage_in_bytes
+
+if [[ "$1" == "-cgroup-v2" ]]; then
+ cgroup2=1
+ fault_limit_file=max
+ reservation_limit_file=rsvd.max
+ fault_usage_file=current
+ reservation_usage_file=rsvd.current
+fi
+
+cgroup_path=/dev/cgroup/memory
+if [[ ! -e $cgroup_path ]]; then
+ mkdir -p $cgroup_path
+ if [[ $cgroup2 ]]; then
+ mount -t cgroup2 none $cgroup_path
+ else
+ mount -t cgroup memory,hugetlb $cgroup_path
+ fi
+fi
+
+if [[ $cgroup2 ]]; then
+ echo "+hugetlb" >/dev/cgroup/memory/cgroup.subtree_control
+fi
+
+function cleanup() {
+ if [[ $cgroup2 ]]; then
+ echo $$ >$cgroup_path/cgroup.procs
+ else
+ echo $$ >$cgroup_path/tasks
+ fi
+
+ if [[ -e /mnt/huge ]]; then
+ rm -rf /mnt/huge/*
+ umount /mnt/huge || echo error
+ rmdir /mnt/huge
+ fi
+ if [[ -e $cgroup_path/hugetlb_cgroup_test ]]; then
+ rmdir $cgroup_path/hugetlb_cgroup_test
+ fi
+ if [[ -e $cgroup_path/hugetlb_cgroup_test1 ]]; then
+ rmdir $cgroup_path/hugetlb_cgroup_test1
+ fi
+ if [[ -e $cgroup_path/hugetlb_cgroup_test2 ]]; then
+ rmdir $cgroup_path/hugetlb_cgroup_test2
+ fi
+ echo 0 >/proc/sys/vm/nr_hugepages
+ echo CLEANUP DONE
+}
+
+function expect_equal() {
+ local expected="$1"
+ local actual="$2"
+ local error="$3"
+
+ if [[ "$expected" != "$actual" ]]; then
+ echo "expected ($expected) != actual ($actual): $3"
+ cleanup
+ exit 1
+ fi
+}
+
+function get_machine_hugepage_size() {
+ hpz=$(grep -i hugepagesize /proc/meminfo)
+ kb=${hpz:14:-3}
+ mb=$(($kb / 1024))
+ echo $mb
+}
+
+MB=$(get_machine_hugepage_size)
+
+function setup_cgroup() {
+ local name="$1"
+ local cgroup_limit="$2"
+ local reservation_limit="$3"
+
+ mkdir $cgroup_path/$name
+
+ echo writing cgroup limit: "$cgroup_limit"
+ echo "$cgroup_limit" >$cgroup_path/$name/hugetlb.${MB}MB.$fault_limit_file
+
+ echo writing reseravation limit: "$reservation_limit"
+ echo "$reservation_limit" > \
+ $cgroup_path/$name/hugetlb.${MB}MB.$reservation_limit_file
+
+ if [ -e "$cgroup_path/$name/cpuset.cpus" ]; then
+ echo 0 >$cgroup_path/$name/cpuset.cpus
+ fi
+ if [ -e "$cgroup_path/$name/cpuset.mems" ]; then
+ echo 0 >$cgroup_path/$name/cpuset.mems
+ fi
+}
+
+function wait_for_hugetlb_memory_to_get_depleted() {
+ local cgroup="$1"
+ local path="/dev/cgroup/memory/$cgroup/hugetlb.${MB}MB.$reservation_usage_file"
+ # Wait for hugetlbfs memory to get depleted.
+ while [ $(cat $path) != 0 ]; do
+ echo Waiting for hugetlb memory to get depleted.
+ cat $path
+ sleep 0.5
+ done
+}
+
+function wait_for_hugetlb_memory_to_get_reserved() {
+ local cgroup="$1"
+ local size="$2"
+
+ local path="/dev/cgroup/memory/$cgroup/hugetlb.${MB}MB.$reservation_usage_file"
+ # Wait for hugetlbfs memory to get written.
+ while [ $(cat $path) != $size ]; do
+ echo Waiting for hugetlb memory reservation to reach size $size.
+ cat $path
+ sleep 0.5
+ done
+}
+
+function wait_for_hugetlb_memory_to_get_written() {
+ local cgroup="$1"
+ local size="$2"
+
+ local path="/dev/cgroup/memory/$cgroup/hugetlb.${MB}MB.$fault_usage_file"
+ # Wait for hugetlbfs memory to get written.
+ while [ $(cat $path) != $size ]; do
+ echo Waiting for hugetlb memory to reach size $size.
+ cat $path
+ sleep 0.5
+ done
+}
+
+function write_hugetlbfs_and_get_usage() {
+ local cgroup="$1"
+ local size="$2"
+ local populate="$3"
+ local write="$4"
+ local path="$5"
+ local method="$6"
+ local private="$7"
+ local expect_failure="$8"
+ local reserve="$9"
+
+ # Function return values.
+ reservation_failed=0
+ oom_killed=0
+ hugetlb_difference=0
+ reserved_difference=0
+
+ local hugetlb_usage=$cgroup_path/$cgroup/hugetlb.${MB}MB.$fault_usage_file
+ local reserved_usage=$cgroup_path/$cgroup/hugetlb.${MB}MB.$reservation_usage_file
+
+ local hugetlb_before=$(cat $hugetlb_usage)
+ local reserved_before=$(cat $reserved_usage)
+
+ echo
+ echo Starting:
+ echo hugetlb_usage="$hugetlb_before"
+ echo reserved_usage="$reserved_before"
+ echo expect_failure is "$expect_failure"
+
+ output=$(mktemp)
+ set +e
+ if [[ "$method" == "1" ]] || [[ "$method" == 2 ]] ||
+ [[ "$private" == "-r" ]] && [[ "$expect_failure" != 1 ]]; then
+
+ bash write_hugetlb_memory.sh "$size" "$populate" "$write" \
+ "$cgroup" "$path" "$method" "$private" "-l" "$reserve" 2>&1 | tee $output &
+
+ local write_result=$?
+ local write_pid=$!
+
+ until grep -q -i "DONE" $output; do
+ echo waiting for DONE signal.
+ if ! ps $write_pid > /dev/null
+ then
+ echo "FAIL: The write died"
+ cleanup
+ exit 1
+ fi
+ sleep 0.5
+ done
+
+ echo ================= write_hugetlb_memory.sh output is:
+ cat $output
+ echo ================= end output.
+
+ if [[ "$populate" == "-o" ]] || [[ "$write" == "-w" ]]; then
+ wait_for_hugetlb_memory_to_get_written "$cgroup" "$size"
+ elif [[ "$reserve" != "-n" ]]; then
+ wait_for_hugetlb_memory_to_get_reserved "$cgroup" "$size"
+ else
+ # This case doesn't produce visible effects, but we still have
+ # to wait for the async process to start and execute...
+ sleep 0.5
+ fi
+
+ echo write_result is $write_result
+ else
+ bash write_hugetlb_memory.sh "$size" "$populate" "$write" \
+ "$cgroup" "$path" "$method" "$private" "$reserve"
+ local write_result=$?
+
+ if [[ "$reserve" != "-n" ]]; then
+ wait_for_hugetlb_memory_to_get_reserved "$cgroup" "$size"
+ fi
+ fi
+ set -e
+
+ if [[ "$write_result" == 1 ]]; then
+ reservation_failed=1
+ fi
+
+ # On linus/master, the above process gets SIGBUS'd on oomkill, with
+ # return code 135. On earlier kernels, it gets actual oomkill, with return
+ # code 137, so just check for both conditions in case we're testing
+ # against an earlier kernel.
+ if [[ "$write_result" == 135 ]] || [[ "$write_result" == 137 ]]; then
+ oom_killed=1
+ fi
+
+ local hugetlb_after=$(cat $hugetlb_usage)
+ local reserved_after=$(cat $reserved_usage)
+
+ echo After write:
+ echo hugetlb_usage="$hugetlb_after"
+ echo reserved_usage="$reserved_after"
+
+ hugetlb_difference=$(($hugetlb_after - $hugetlb_before))
+ reserved_difference=$(($reserved_after - $reserved_before))
+}
+
+function cleanup_hugetlb_memory() {
+ set +e
+ local cgroup="$1"
+ if [[ "$(pgrep -f write_to_hugetlbfs)" != "" ]]; then
+ echo killing write_to_hugetlbfs
+ killall -2 write_to_hugetlbfs
+ wait_for_hugetlb_memory_to_get_depleted $cgroup
+ fi
+ set -e
+
+ if [[ -e /mnt/huge ]]; then
+ rm -rf /mnt/huge/*
+ umount /mnt/huge
+ rmdir /mnt/huge
+ fi
+}
+
+function run_test() {
+ local size=$(($1 * ${MB} * 1024 * 1024))
+ local populate="$2"
+ local write="$3"
+ local cgroup_limit=$(($4 * ${MB} * 1024 * 1024))
+ local reservation_limit=$(($5 * ${MB} * 1024 * 1024))
+ local nr_hugepages="$6"
+ local method="$7"
+ local private="$8"
+ local expect_failure="$9"
+ local reserve="${10}"
+
+ # Function return values.
+ hugetlb_difference=0
+ reserved_difference=0
+ reservation_failed=0
+ oom_killed=0
+
+ echo nr hugepages = "$nr_hugepages"
+ echo "$nr_hugepages" >/proc/sys/vm/nr_hugepages
+
+ setup_cgroup "hugetlb_cgroup_test" "$cgroup_limit" "$reservation_limit"
+
+ mkdir -p /mnt/huge
+ mount -t hugetlbfs -o pagesize=${MB}M,size=256M none /mnt/huge
+
+ write_hugetlbfs_and_get_usage "hugetlb_cgroup_test" "$size" "$populate" \
+ "$write" "/mnt/huge/test" "$method" "$private" "$expect_failure" \
+ "$reserve"
+
+ cleanup_hugetlb_memory "hugetlb_cgroup_test"
+
+ local final_hugetlb=$(cat $cgroup_path/hugetlb_cgroup_test/hugetlb.${MB}MB.$fault_usage_file)
+ local final_reservation=$(cat $cgroup_path/hugetlb_cgroup_test/hugetlb.${MB}MB.$reservation_usage_file)
+
+ echo $hugetlb_difference
+ echo $reserved_difference
+ expect_equal "0" "$final_hugetlb" "final hugetlb is not zero"
+ expect_equal "0" "$final_reservation" "final reservation is not zero"
+}
+
+function run_multiple_cgroup_test() {
+ local size1="$1"
+ local populate1="$2"
+ local write1="$3"
+ local cgroup_limit1="$4"
+ local reservation_limit1="$5"
+
+ local size2="$6"
+ local populate2="$7"
+ local write2="$8"
+ local cgroup_limit2="$9"
+ local reservation_limit2="${10}"
+
+ local nr_hugepages="${11}"
+ local method="${12}"
+ local private="${13}"
+ local expect_failure="${14}"
+ local reserve="${15}"
+
+ # Function return values.
+ hugetlb_difference1=0
+ reserved_difference1=0
+ reservation_failed1=0
+ oom_killed1=0
+
+ hugetlb_difference2=0
+ reserved_difference2=0
+ reservation_failed2=0
+ oom_killed2=0
+
+ echo nr hugepages = "$nr_hugepages"
+ echo "$nr_hugepages" >/proc/sys/vm/nr_hugepages
+
+ setup_cgroup "hugetlb_cgroup_test1" "$cgroup_limit1" "$reservation_limit1"
+ setup_cgroup "hugetlb_cgroup_test2" "$cgroup_limit2" "$reservation_limit2"
+
+ mkdir -p /mnt/huge
+ mount -t hugetlbfs -o pagesize=${MB}M,size=256M none /mnt/huge
+
+ write_hugetlbfs_and_get_usage "hugetlb_cgroup_test1" "$size1" \
+ "$populate1" "$write1" "/mnt/huge/test1" "$method" "$private" \
+ "$expect_failure" "$reserve"
+
+ hugetlb_difference1=$hugetlb_difference
+ reserved_difference1=$reserved_difference
+ reservation_failed1=$reservation_failed
+ oom_killed1=$oom_killed
+
+ local cgroup1_hugetlb_usage=$cgroup_path/hugetlb_cgroup_test1/hugetlb.${MB}MB.$fault_usage_file
+ local cgroup1_reservation_usage=$cgroup_path/hugetlb_cgroup_test1/hugetlb.${MB}MB.$reservation_usage_file
+ local cgroup2_hugetlb_usage=$cgroup_path/hugetlb_cgroup_test2/hugetlb.${MB}MB.$fault_usage_file
+ local cgroup2_reservation_usage=$cgroup_path/hugetlb_cgroup_test2/hugetlb.${MB}MB.$reservation_usage_file
+
+ local usage_before_second_write=$(cat $cgroup1_hugetlb_usage)
+ local reservation_usage_before_second_write=$(cat $cgroup1_reservation_usage)
+
+ write_hugetlbfs_and_get_usage "hugetlb_cgroup_test2" "$size2" \
+ "$populate2" "$write2" "/mnt/huge/test2" "$method" "$private" \
+ "$expect_failure" "$reserve"
+
+ hugetlb_difference2=$hugetlb_difference
+ reserved_difference2=$reserved_difference
+ reservation_failed2=$reservation_failed
+ oom_killed2=$oom_killed
+
+ expect_equal "$usage_before_second_write" \
+ "$(cat $cgroup1_hugetlb_usage)" "Usage changed."
+ expect_equal "$reservation_usage_before_second_write" \
+ "$(cat $cgroup1_reservation_usage)" "Reservation usage changed."
+
+ cleanup_hugetlb_memory
+
+ local final_hugetlb=$(cat $cgroup1_hugetlb_usage)
+ local final_reservation=$(cat $cgroup1_reservation_usage)
+
+ expect_equal "0" "$final_hugetlb" \
+ "hugetlbt_cgroup_test1 final hugetlb is not zero"
+ expect_equal "0" "$final_reservation" \
+ "hugetlbt_cgroup_test1 final reservation is not zero"
+
+ local final_hugetlb=$(cat $cgroup2_hugetlb_usage)
+ local final_reservation=$(cat $cgroup2_reservation_usage)
+
+ expect_equal "0" "$final_hugetlb" \
+ "hugetlb_cgroup_test2 final hugetlb is not zero"
+ expect_equal "0" "$final_reservation" \
+ "hugetlb_cgroup_test2 final reservation is not zero"
+}
+
+cleanup
+
+for populate in "" "-o"; do
+ for method in 0 1 2; do
+ for private in "" "-r"; do
+ for reserve in "" "-n"; do
+
+ # Skip mmap(MAP_HUGETLB | MAP_SHARED). Doesn't seem to be supported.
+ if [[ "$method" == 1 ]] && [[ "$private" == "" ]]; then
+ continue
+ fi
+
+ # Skip populated shmem tests. Doesn't seem to be supported.
+ if [[ "$method" == 2"" ]] && [[ "$populate" == "-o" ]]; then
+ continue
+ fi
+
+ if [[ "$method" == 2"" ]] && [[ "$reserve" == "-n" ]]; then
+ continue
+ fi
+
+ cleanup
+ echo
+ echo
+ echo
+ echo Test normal case.
+ echo private=$private, populate=$populate, method=$method, reserve=$reserve
+ run_test 5 "$populate" "" 10 10 10 "$method" "$private" "0" "$reserve"
+
+ echo Memory charged to hugtlb=$hugetlb_difference
+ echo Memory charged to reservation=$reserved_difference
+
+ if [[ "$populate" == "-o" ]]; then
+ expect_equal "$((5 * $MB * 1024 * 1024))" "$hugetlb_difference" \
+ "Reserved memory charged to hugetlb cgroup."
+ else
+ expect_equal "0" "$hugetlb_difference" \
+ "Reserved memory charged to hugetlb cgroup."
+ fi
+
+ if [[ "$reserve" != "-n" ]] || [[ "$populate" == "-o" ]]; then
+ expect_equal "$((5 * $MB * 1024 * 1024))" "$reserved_difference" \
+ "Reserved memory not charged to reservation usage."
+ else
+ expect_equal "0" "$reserved_difference" \
+ "Reserved memory not charged to reservation usage."
+ fi
+
+ echo 'PASS'
+
+ cleanup
+ echo
+ echo
+ echo
+ echo Test normal case with write.
+ echo private=$private, populate=$populate, method=$method, reserve=$reserve
+ run_test 5 "$populate" '-w' 5 5 10 "$method" "$private" "0" "$reserve"
+
+ echo Memory charged to hugtlb=$hugetlb_difference
+ echo Memory charged to reservation=$reserved_difference
+
+ expect_equal "$((5 * $MB * 1024 * 1024))" "$hugetlb_difference" \
+ "Reserved memory charged to hugetlb cgroup."
+
+ expect_equal "$((5 * $MB * 1024 * 1024))" "$reserved_difference" \
+ "Reserved memory not charged to reservation usage."
+
+ echo 'PASS'
+
+ cleanup
+ continue
+ echo
+ echo
+ echo
+ echo Test more than reservation case.
+ echo private=$private, populate=$populate, method=$method, reserve=$reserve
+
+ if [ "$reserve" != "-n" ]; then
+ run_test "5" "$populate" '' "10" "2" "10" "$method" "$private" "1" \
+ "$reserve"
+
+ expect_equal "1" "$reservation_failed" "Reservation succeeded."
+ fi
+
+ echo 'PASS'
+
+ cleanup
+
+ echo
+ echo
+ echo
+ echo Test more than cgroup limit case.
+ echo private=$private, populate=$populate, method=$method, reserve=$reserve
+
+ # Not sure if shm memory can be cleaned up when the process gets sigbus'd.
+ if [[ "$method" != 2 ]]; then
+ run_test 5 "$populate" "-w" 2 10 10 "$method" "$private" "1" "$reserve"
+
+ expect_equal "1" "$oom_killed" "Not oom killed."
+ fi
+ echo 'PASS'
+
+ cleanup
+
+ echo
+ echo
+ echo
+ echo Test normal case, multiple cgroups.
+ echo private=$private, populate=$populate, method=$method, reserve=$reserve
+ run_multiple_cgroup_test "3" "$populate" "" "10" "10" "5" \
+ "$populate" "" "10" "10" "10" \
+ "$method" "$private" "0" "$reserve"
+
+ echo Memory charged to hugtlb1=$hugetlb_difference1
+ echo Memory charged to reservation1=$reserved_difference1
+ echo Memory charged to hugtlb2=$hugetlb_difference2
+ echo Memory charged to reservation2=$reserved_difference2
+
+ if [[ "$reserve" != "-n" ]] || [[ "$populate" == "-o" ]]; then
+ expect_equal "3" "$reserved_difference1" \
+ "Incorrect reservations charged to cgroup 1."
+
+ expect_equal "5" "$reserved_difference2" \
+ "Incorrect reservation charged to cgroup 2."
+
+ else
+ expect_equal "0" "$reserved_difference1" \
+ "Incorrect reservations charged to cgroup 1."
+
+ expect_equal "0" "$reserved_difference2" \
+ "Incorrect reservation charged to cgroup 2."
+ fi
+
+ if [[ "$populate" == "-o" ]]; then
+ expect_equal "3" "$hugetlb_difference1" \
+ "Incorrect hugetlb charged to cgroup 1."
+
+ expect_equal "5" "$hugetlb_difference2" \
+ "Incorrect hugetlb charged to cgroup 2."
+
+ else
+ expect_equal "0" "$hugetlb_difference1" \
+ "Incorrect hugetlb charged to cgroup 1."
+
+ expect_equal "0" "$hugetlb_difference2" \
+ "Incorrect hugetlb charged to cgroup 2."
+ fi
+ echo 'PASS'
+
+ cleanup
+ echo
+ echo
+ echo
+ echo Test normal case with write, multiple cgroups.
+ echo private=$private, populate=$populate, method=$method, reserve=$reserve
+ run_multiple_cgroup_test "3" "$populate" "-w" "10" "10" "5" \
+ "$populate" "-w" "10" "10" "10" \
+ "$method" "$private" "0" "$reserve"
+
+ echo Memory charged to hugtlb1=$hugetlb_difference1
+ echo Memory charged to reservation1=$reserved_difference1
+ echo Memory charged to hugtlb2=$hugetlb_difference2
+ echo Memory charged to reservation2=$reserved_difference2
+
+ expect_equal "3" "$hugetlb_difference1" \
+ "Incorrect hugetlb charged to cgroup 1."
+
+ expect_equal "3" "$reserved_difference1" \
+ "Incorrect reservation charged to cgroup 1."
+
+ expect_equal "5" "$hugetlb_difference2" \
+ "Incorrect hugetlb charged to cgroup 2."
+
+ expect_equal "5" "$reserved_difference2" \
+ "Incorrected reservation charged to cgroup 2."
+ echo 'PASS'
+
+ cleanup
+
+ done # reserve
+ done # private
+ done # populate
+done # method
+
+umount $cgroup_path
+rmdir $cgroup_path
diff --git a/tools/testing/selftests/vm/config b/tools/testing/selftests/vm/config
index 93b90a9b1eeb..3ba674b64fa9 100644
--- a/tools/testing/selftests/vm/config
+++ b/tools/testing/selftests/vm/config
@@ -1,3 +1,5 @@
CONFIG_SYSVIPC=y
CONFIG_USERFAULTFD=y
CONFIG_TEST_VMALLOC=m
+CONFIG_DEVICE_PRIVATE=y
+CONFIG_TEST_HMM=m
diff --git a/tools/testing/selftests/vm/gup_benchmark.c b/tools/testing/selftests/vm/gup_benchmark.c
index 389327e9b30a..43b4dfe161a2 100644
--- a/tools/testing/selftests/vm/gup_benchmark.c
+++ b/tools/testing/selftests/vm/gup_benchmark.c
@@ -18,6 +18,10 @@
#define GUP_LONGTERM_BENCHMARK _IOWR('g', 2, struct gup_benchmark)
#define GUP_BENCHMARK _IOWR('g', 3, struct gup_benchmark)
+/* Similar to above, but use FOLL_PIN instead of FOLL_GET. */
+#define PIN_FAST_BENCHMARK _IOWR('g', 4, struct gup_benchmark)
+#define PIN_BENCHMARK _IOWR('g', 5, struct gup_benchmark)
+
/* Just the flags we need, copied from mm.h: */
#define FOLL_WRITE 0x01 /* check pte is writable */
@@ -40,8 +44,14 @@ int main(int argc, char **argv)
char *file = "/dev/zero";
char *p;
- while ((opt = getopt(argc, argv, "m:r:n:f:tTLUwSH")) != -1) {
+ while ((opt = getopt(argc, argv, "m:r:n:f:abtTLUuwSH")) != -1) {
switch (opt) {
+ case 'a':
+ cmd = PIN_FAST_BENCHMARK;
+ break;
+ case 'b':
+ cmd = PIN_BENCHMARK;
+ break;
case 'm':
size = atoi(optarg) * MB;
break;
@@ -63,6 +73,9 @@ int main(int argc, char **argv)
case 'U':
cmd = GUP_BENCHMARK;
break;
+ case 'u':
+ cmd = GUP_FAST_BENCHMARK;
+ break;
case 'w':
write = 1;
break;
diff --git a/tools/testing/selftests/vm/hmm-tests.c b/tools/testing/selftests/vm/hmm-tests.c
new file mode 100644
index 000000000000..79db22604019
--- /dev/null
+++ b/tools/testing/selftests/vm/hmm-tests.c
@@ -0,0 +1,1359 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * HMM stands for Heterogeneous Memory Management, it is a helper layer inside
+ * the linux kernel to help device drivers mirror a process address space in
+ * the device. This allows the device to use the same address space which
+ * makes communication and data exchange a lot easier.
+ *
+ * This framework's sole purpose is to exercise various code paths inside
+ * the kernel to make sure that HMM performs as expected and to flush out any
+ * bugs.
+ */
+
+#include "../kselftest_harness.h"
+
+#include <errno.h>
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <unistd.h>
+#include <strings.h>
+#include <time.h>
+#include <pthread.h>
+#include <hugetlbfs.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/mman.h>
+#include <sys/ioctl.h>
+
+/*
+ * This is a private UAPI to the kernel test module so it isn't exported
+ * in the usual include/uapi/... directory.
+ */
+#include "../../../../lib/test_hmm_uapi.h"
+
+struct hmm_buffer {
+ void *ptr;
+ void *mirror;
+ unsigned long size;
+ int fd;
+ uint64_t cpages;
+ uint64_t faults;
+};
+
+#define TWOMEG (1 << 21)
+#define HMM_BUFFER_SIZE (1024 << 12)
+#define HMM_PATH_MAX 64
+#define NTIMES 256
+
+#define ALIGN(x, a) (((x) + (a - 1)) & (~((a) - 1)))
+
+FIXTURE(hmm)
+{
+ int fd;
+ unsigned int page_size;
+ unsigned int page_shift;
+};
+
+FIXTURE(hmm2)
+{
+ int fd0;
+ int fd1;
+ unsigned int page_size;
+ unsigned int page_shift;
+};
+
+static int hmm_open(int unit)
+{
+ char pathname[HMM_PATH_MAX];
+ int fd;
+
+ snprintf(pathname, sizeof(pathname), "/dev/hmm_dmirror%d", unit);
+ fd = open(pathname, O_RDWR, 0);
+ if (fd < 0)
+ fprintf(stderr, "could not open hmm dmirror driver (%s)\n",
+ pathname);
+ return fd;
+}
+
+FIXTURE_SETUP(hmm)
+{
+ self->page_size = sysconf(_SC_PAGE_SIZE);
+ self->page_shift = ffs(self->page_size) - 1;
+
+ self->fd = hmm_open(0);
+ ASSERT_GE(self->fd, 0);
+}
+
+FIXTURE_SETUP(hmm2)
+{
+ self->page_size = sysconf(_SC_PAGE_SIZE);
+ self->page_shift = ffs(self->page_size) - 1;
+
+ self->fd0 = hmm_open(0);
+ ASSERT_GE(self->fd0, 0);
+ self->fd1 = hmm_open(1);
+ ASSERT_GE(self->fd1, 0);
+}
+
+FIXTURE_TEARDOWN(hmm)
+{
+ int ret = close(self->fd);
+
+ ASSERT_EQ(ret, 0);
+ self->fd = -1;
+}
+
+FIXTURE_TEARDOWN(hmm2)
+{
+ int ret = close(self->fd0);
+
+ ASSERT_EQ(ret, 0);
+ self->fd0 = -1;
+
+ ret = close(self->fd1);
+ ASSERT_EQ(ret, 0);
+ self->fd1 = -1;
+}
+
+static int hmm_dmirror_cmd(int fd,
+ unsigned long request,
+ struct hmm_buffer *buffer,
+ unsigned long npages)
+{
+ struct hmm_dmirror_cmd cmd;
+ int ret;
+
+ /* Simulate a device reading system memory. */
+ cmd.addr = (__u64)buffer->ptr;
+ cmd.ptr = (__u64)buffer->mirror;
+ cmd.npages = npages;
+
+ for (;;) {
+ ret = ioctl(fd, request, &cmd);
+ if (ret == 0)
+ break;
+ if (errno == EINTR)
+ continue;
+ return -errno;
+ }
+ buffer->cpages = cmd.cpages;
+ buffer->faults = cmd.faults;
+
+ return 0;
+}
+
+static void hmm_buffer_free(struct hmm_buffer *buffer)
+{
+ if (buffer == NULL)
+ return;
+
+ if (buffer->ptr)
+ munmap(buffer->ptr, buffer->size);
+ free(buffer->mirror);
+ free(buffer);
+}
+
+/*
+ * Create a temporary file that will be deleted on close.
+ */
+static int hmm_create_file(unsigned long size)
+{
+ char path[HMM_PATH_MAX];
+ int fd;
+
+ strcpy(path, "/tmp");
+ fd = open(path, O_TMPFILE | O_EXCL | O_RDWR, 0600);
+ if (fd >= 0) {
+ int r;
+
+ do {
+ r = ftruncate(fd, size);
+ } while (r == -1 && errno == EINTR);
+ if (!r)
+ return fd;
+ close(fd);
+ }
+ return -1;
+}
+
+/*
+ * Return a random unsigned number.
+ */
+static unsigned int hmm_random(void)
+{
+ static int fd = -1;
+ unsigned int r;
+
+ if (fd < 0) {
+ fd = open("/dev/urandom", O_RDONLY);
+ if (fd < 0) {
+ fprintf(stderr, "%s:%d failed to open /dev/urandom\n",
+ __FILE__, __LINE__);
+ return ~0U;
+ }
+ }
+ read(fd, &r, sizeof(r));
+ return r;
+}
+
+static void hmm_nanosleep(unsigned int n)
+{
+ struct timespec t;
+
+ t.tv_sec = 0;
+ t.tv_nsec = n;
+ nanosleep(&t, NULL);
+}
+
+/*
+ * Simple NULL test of device open/close.
+ */
+TEST_F(hmm, open_close)
+{
+}
+
+/*
+ * Read private anonymous memory.
+ */
+TEST_F(hmm, anon_read)
+{
+ struct hmm_buffer *buffer;
+ unsigned long npages;
+ unsigned long size;
+ unsigned long i;
+ int *ptr;
+ int ret;
+ int val;
+
+ npages = ALIGN(HMM_BUFFER_SIZE, self->page_size) >> self->page_shift;
+ ASSERT_NE(npages, 0);
+ size = npages << self->page_shift;
+
+ buffer = malloc(sizeof(*buffer));
+ ASSERT_NE(buffer, NULL);
+
+ buffer->fd = -1;
+ buffer->size = size;
+ buffer->mirror = malloc(size);
+ ASSERT_NE(buffer->mirror, NULL);
+
+ buffer->ptr = mmap(NULL, size,
+ PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS,
+ buffer->fd, 0);
+ ASSERT_NE(buffer->ptr, MAP_FAILED);
+
+ /*
+ * Initialize buffer in system memory but leave the first two pages
+ * zero (pte_none and pfn_zero).
+ */
+ i = 2 * self->page_size / sizeof(*ptr);
+ for (ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
+ ptr[i] = i;
+
+ /* Set buffer permission to read-only. */
+ ret = mprotect(buffer->ptr, size, PROT_READ);
+ ASSERT_EQ(ret, 0);
+
+ /* Populate the CPU page table with a special zero page. */
+ val = *(int *)(buffer->ptr + self->page_size);
+ ASSERT_EQ(val, 0);
+
+ /* Simulate a device reading system memory. */
+ ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_READ, buffer, npages);
+ ASSERT_EQ(ret, 0);
+ ASSERT_EQ(buffer->cpages, npages);
+ ASSERT_EQ(buffer->faults, 1);
+
+ /* Check what the device read. */
+ ptr = buffer->mirror;
+ for (i = 0; i < 2 * self->page_size / sizeof(*ptr); ++i)
+ ASSERT_EQ(ptr[i], 0);
+ for (; i < size / sizeof(*ptr); ++i)
+ ASSERT_EQ(ptr[i], i);
+
+ hmm_buffer_free(buffer);
+}
+
+/*
+ * Read private anonymous memory which has been protected with
+ * mprotect() PROT_NONE.
+ */
+TEST_F(hmm, anon_read_prot)
+{
+ struct hmm_buffer *buffer;
+ unsigned long npages;
+ unsigned long size;
+ unsigned long i;
+ int *ptr;
+ int ret;
+
+ npages = ALIGN(HMM_BUFFER_SIZE, self->page_size) >> self->page_shift;
+ ASSERT_NE(npages, 0);
+ size = npages << self->page_shift;
+
+ buffer = malloc(sizeof(*buffer));
+ ASSERT_NE(buffer, NULL);
+
+ buffer->fd = -1;
+ buffer->size = size;
+ buffer->mirror = malloc(size);
+ ASSERT_NE(buffer->mirror, NULL);
+
+ buffer->ptr = mmap(NULL, size,
+ PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS,
+ buffer->fd, 0);
+ ASSERT_NE(buffer->ptr, MAP_FAILED);
+
+ /* Initialize buffer in system memory. */
+ for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
+ ptr[i] = i;
+
+ /* Initialize mirror buffer so we can verify it isn't written. */
+ for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i)
+ ptr[i] = -i;
+
+ /* Protect buffer from reading. */
+ ret = mprotect(buffer->ptr, size, PROT_NONE);
+ ASSERT_EQ(ret, 0);
+
+ /* Simulate a device reading system memory. */
+ ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_READ, buffer, npages);
+ ASSERT_EQ(ret, -EFAULT);
+
+ /* Allow CPU to read the buffer so we can check it. */
+ ret = mprotect(buffer->ptr, size, PROT_READ);
+ ASSERT_EQ(ret, 0);
+ for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
+ ASSERT_EQ(ptr[i], i);
+
+ /* Check what the device read. */
+ for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i)
+ ASSERT_EQ(ptr[i], -i);
+
+ hmm_buffer_free(buffer);
+}
+
+/*
+ * Write private anonymous memory.
+ */
+TEST_F(hmm, anon_write)
+{
+ struct hmm_buffer *buffer;
+ unsigned long npages;
+ unsigned long size;
+ unsigned long i;
+ int *ptr;
+ int ret;
+
+ npages = ALIGN(HMM_BUFFER_SIZE, self->page_size) >> self->page_shift;
+ ASSERT_NE(npages, 0);
+ size = npages << self->page_shift;
+
+ buffer = malloc(sizeof(*buffer));
+ ASSERT_NE(buffer, NULL);
+
+ buffer->fd = -1;
+ buffer->size = size;
+ buffer->mirror = malloc(size);
+ ASSERT_NE(buffer->mirror, NULL);
+
+ buffer->ptr = mmap(NULL, size,
+ PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS,
+ buffer->fd, 0);
+ ASSERT_NE(buffer->ptr, MAP_FAILED);
+
+ /* Initialize data that the device will write to buffer->ptr. */
+ for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i)
+ ptr[i] = i;
+
+ /* Simulate a device writing system memory. */
+ ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_WRITE, buffer, npages);
+ ASSERT_EQ(ret, 0);
+ ASSERT_EQ(buffer->cpages, npages);
+ ASSERT_EQ(buffer->faults, 1);
+
+ /* Check what the device wrote. */
+ for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
+ ASSERT_EQ(ptr[i], i);
+
+ hmm_buffer_free(buffer);
+}
+
+/*
+ * Write private anonymous memory which has been protected with
+ * mprotect() PROT_READ.
+ */
+TEST_F(hmm, anon_write_prot)
+{
+ struct hmm_buffer *buffer;
+ unsigned long npages;
+ unsigned long size;
+ unsigned long i;
+ int *ptr;
+ int ret;
+
+ npages = ALIGN(HMM_BUFFER_SIZE, self->page_size) >> self->page_shift;
+ ASSERT_NE(npages, 0);
+ size = npages << self->page_shift;
+
+ buffer = malloc(sizeof(*buffer));
+ ASSERT_NE(buffer, NULL);
+
+ buffer->fd = -1;
+ buffer->size = size;
+ buffer->mirror = malloc(size);
+ ASSERT_NE(buffer->mirror, NULL);
+
+ buffer->ptr = mmap(NULL, size,
+ PROT_READ,
+ MAP_PRIVATE | MAP_ANONYMOUS,
+ buffer->fd, 0);
+ ASSERT_NE(buffer->ptr, MAP_FAILED);
+
+ /* Simulate a device reading a zero page of memory. */
+ ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_READ, buffer, 1);
+ ASSERT_EQ(ret, 0);
+ ASSERT_EQ(buffer->cpages, 1);
+ ASSERT_EQ(buffer->faults, 1);
+
+ /* Initialize data that the device will write to buffer->ptr. */
+ for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i)
+ ptr[i] = i;
+
+ /* Simulate a device writing system memory. */
+ ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_WRITE, buffer, npages);
+ ASSERT_EQ(ret, -EPERM);
+
+ /* Check what the device wrote. */
+ for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
+ ASSERT_EQ(ptr[i], 0);
+
+ /* Now allow writing and see that the zero page is replaced. */
+ ret = mprotect(buffer->ptr, size, PROT_WRITE | PROT_READ);
+ ASSERT_EQ(ret, 0);
+
+ /* Simulate a device writing system memory. */
+ ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_WRITE, buffer, npages);
+ ASSERT_EQ(ret, 0);
+ ASSERT_EQ(buffer->cpages, npages);
+ ASSERT_EQ(buffer->faults, 1);
+
+ /* Check what the device wrote. */
+ for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
+ ASSERT_EQ(ptr[i], i);
+
+ hmm_buffer_free(buffer);
+}
+
+/*
+ * Check that a device writing an anonymous private mapping
+ * will copy-on-write if a child process inherits the mapping.
+ */
+TEST_F(hmm, anon_write_child)
+{
+ struct hmm_buffer *buffer;
+ unsigned long npages;
+ unsigned long size;
+ unsigned long i;
+ int *ptr;
+ pid_t pid;
+ int child_fd;
+ int ret;
+
+ npages = ALIGN(HMM_BUFFER_SIZE, self->page_size) >> self->page_shift;
+ ASSERT_NE(npages, 0);
+ size = npages << self->page_shift;
+
+ buffer = malloc(sizeof(*buffer));
+ ASSERT_NE(buffer, NULL);
+
+ buffer->fd = -1;
+ buffer->size = size;
+ buffer->mirror = malloc(size);
+ ASSERT_NE(buffer->mirror, NULL);
+
+ buffer->ptr = mmap(NULL, size,
+ PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS,
+ buffer->fd, 0);
+ ASSERT_NE(buffer->ptr, MAP_FAILED);
+
+ /* Initialize buffer->ptr so we can tell if it is written. */
+ for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
+ ptr[i] = i;
+
+ /* Initialize data that the device will write to buffer->ptr. */
+ for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i)
+ ptr[i] = -i;
+
+ pid = fork();
+ if (pid == -1)
+ ASSERT_EQ(pid, 0);
+ if (pid != 0) {
+ waitpid(pid, &ret, 0);
+ ASSERT_EQ(WIFEXITED(ret), 1);
+
+ /* Check that the parent's buffer did not change. */
+ for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
+ ASSERT_EQ(ptr[i], i);
+ return;
+ }
+
+ /* Check that we see the parent's values. */
+ for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
+ ASSERT_EQ(ptr[i], i);
+ for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i)
+ ASSERT_EQ(ptr[i], -i);
+
+ /* The child process needs its own mirror to its own mm. */
+ child_fd = hmm_open(0);
+ ASSERT_GE(child_fd, 0);
+
+ /* Simulate a device writing system memory. */
+ ret = hmm_dmirror_cmd(child_fd, HMM_DMIRROR_WRITE, buffer, npages);
+ ASSERT_EQ(ret, 0);
+ ASSERT_EQ(buffer->cpages, npages);
+ ASSERT_EQ(buffer->faults, 1);
+
+ /* Check what the device wrote. */
+ for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
+ ASSERT_EQ(ptr[i], -i);
+
+ close(child_fd);
+ exit(0);
+}
+
+/*
+ * Check that a device writing an anonymous shared mapping
+ * will not copy-on-write if a child process inherits the mapping.
+ */
+TEST_F(hmm, anon_write_child_shared)
+{
+ struct hmm_buffer *buffer;
+ unsigned long npages;
+ unsigned long size;
+ unsigned long i;
+ int *ptr;
+ pid_t pid;
+ int child_fd;
+ int ret;
+
+ npages = ALIGN(HMM_BUFFER_SIZE, self->page_size) >> self->page_shift;
+ ASSERT_NE(npages, 0);
+ size = npages << self->page_shift;
+
+ buffer = malloc(sizeof(*buffer));
+ ASSERT_NE(buffer, NULL);
+
+ buffer->fd = -1;
+ buffer->size = size;
+ buffer->mirror = malloc(size);
+ ASSERT_NE(buffer->mirror, NULL);
+
+ buffer->ptr = mmap(NULL, size,
+ PROT_READ | PROT_WRITE,
+ MAP_SHARED | MAP_ANONYMOUS,
+ buffer->fd, 0);
+ ASSERT_NE(buffer->ptr, MAP_FAILED);
+
+ /* Initialize buffer->ptr so we can tell if it is written. */
+ for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
+ ptr[i] = i;
+
+ /* Initialize data that the device will write to buffer->ptr. */
+ for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i)
+ ptr[i] = -i;
+
+ pid = fork();
+ if (pid == -1)
+ ASSERT_EQ(pid, 0);
+ if (pid != 0) {
+ waitpid(pid, &ret, 0);
+ ASSERT_EQ(WIFEXITED(ret), 1);
+
+ /* Check that the parent's buffer did change. */
+ for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
+ ASSERT_EQ(ptr[i], -i);
+ return;
+ }
+
+ /* Check that we see the parent's values. */
+ for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
+ ASSERT_EQ(ptr[i], i);
+ for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i)
+ ASSERT_EQ(ptr[i], -i);
+
+ /* The child process needs its own mirror to its own mm. */
+ child_fd = hmm_open(0);
+ ASSERT_GE(child_fd, 0);
+
+ /* Simulate a device writing system memory. */
+ ret = hmm_dmirror_cmd(child_fd, HMM_DMIRROR_WRITE, buffer, npages);
+ ASSERT_EQ(ret, 0);
+ ASSERT_EQ(buffer->cpages, npages);
+ ASSERT_EQ(buffer->faults, 1);
+
+ /* Check what the device wrote. */
+ for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
+ ASSERT_EQ(ptr[i], -i);
+
+ close(child_fd);
+ exit(0);
+}
+
+/*
+ * Write private anonymous huge page.
+ */
+TEST_F(hmm, anon_write_huge)
+{
+ struct hmm_buffer *buffer;
+ unsigned long npages;
+ unsigned long size;
+ unsigned long i;
+ void *old_ptr;
+ void *map;
+ int *ptr;
+ int ret;
+
+ size = 2 * TWOMEG;
+
+ buffer = malloc(sizeof(*buffer));
+ ASSERT_NE(buffer, NULL);
+
+ buffer->fd = -1;
+ buffer->size = size;
+ buffer->mirror = malloc(size);
+ ASSERT_NE(buffer->mirror, NULL);
+
+ buffer->ptr = mmap(NULL, size,
+ PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS,
+ buffer->fd, 0);
+ ASSERT_NE(buffer->ptr, MAP_FAILED);
+
+ size = TWOMEG;
+ npages = size >> self->page_shift;
+ map = (void *)ALIGN((uintptr_t)buffer->ptr, size);
+ ret = madvise(map, size, MADV_HUGEPAGE);
+ ASSERT_EQ(ret, 0);
+ old_ptr = buffer->ptr;
+ buffer->ptr = map;
+
+ /* Initialize data that the device will write to buffer->ptr. */
+ for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i)
+ ptr[i] = i;
+
+ /* Simulate a device writing system memory. */
+ ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_WRITE, buffer, npages);
+ ASSERT_EQ(ret, 0);
+ ASSERT_EQ(buffer->cpages, npages);
+ ASSERT_EQ(buffer->faults, 1);
+
+ /* Check what the device wrote. */
+ for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
+ ASSERT_EQ(ptr[i], i);
+
+ buffer->ptr = old_ptr;
+ hmm_buffer_free(buffer);
+}
+
+/*
+ * Write huge TLBFS page.
+ */
+TEST_F(hmm, anon_write_hugetlbfs)
+{
+ struct hmm_buffer *buffer;
+ unsigned long npages;
+ unsigned long size;
+ unsigned long i;
+ int *ptr;
+ int ret;
+ long pagesizes[4];
+ int n, idx;
+
+ /* Skip test if we can't allocate a hugetlbfs page. */
+
+ n = gethugepagesizes(pagesizes, 4);
+ if (n <= 0)
+ return;
+ for (idx = 0; --n > 0; ) {
+ if (pagesizes[n] < pagesizes[idx])
+ idx = n;
+ }
+ size = ALIGN(TWOMEG, pagesizes[idx]);
+ npages = size >> self->page_shift;
+
+ buffer = malloc(sizeof(*buffer));
+ ASSERT_NE(buffer, NULL);
+
+ buffer->ptr = get_hugepage_region(size, GHR_STRICT);
+ if (buffer->ptr == NULL) {
+ free(buffer);
+ return;
+ }
+
+ buffer->fd = -1;
+ buffer->size = size;
+ buffer->mirror = malloc(size);
+ ASSERT_NE(buffer->mirror, NULL);
+
+ /* Initialize data that the device will write to buffer->ptr. */
+ for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i)
+ ptr[i] = i;
+
+ /* Simulate a device writing system memory. */
+ ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_WRITE, buffer, npages);
+ ASSERT_EQ(ret, 0);
+ ASSERT_EQ(buffer->cpages, npages);
+ ASSERT_EQ(buffer->faults, 1);
+
+ /* Check what the device wrote. */
+ for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
+ ASSERT_EQ(ptr[i], i);
+
+ free_hugepage_region(buffer->ptr);
+ buffer->ptr = NULL;
+ hmm_buffer_free(buffer);
+}
+
+/*
+ * Read mmap'ed file memory.
+ */
+TEST_F(hmm, file_read)
+{
+ struct hmm_buffer *buffer;
+ unsigned long npages;
+ unsigned long size;
+ unsigned long i;
+ int *ptr;
+ int ret;
+ int fd;
+ ssize_t len;
+
+ npages = ALIGN(HMM_BUFFER_SIZE, self->page_size) >> self->page_shift;
+ ASSERT_NE(npages, 0);
+ size = npages << self->page_shift;
+
+ fd = hmm_create_file(size);
+ ASSERT_GE(fd, 0);
+
+ buffer = malloc(sizeof(*buffer));
+ ASSERT_NE(buffer, NULL);
+
+ buffer->fd = fd;
+ buffer->size = size;
+ buffer->mirror = malloc(size);
+ ASSERT_NE(buffer->mirror, NULL);
+
+ /* Write initial contents of the file. */
+ for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i)
+ ptr[i] = i;
+ len = pwrite(fd, buffer->mirror, size, 0);
+ ASSERT_EQ(len, size);
+ memset(buffer->mirror, 0, size);
+
+ buffer->ptr = mmap(NULL, size,
+ PROT_READ,
+ MAP_SHARED,
+ buffer->fd, 0);
+ ASSERT_NE(buffer->ptr, MAP_FAILED);
+
+ /* Simulate a device reading system memory. */
+ ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_READ, buffer, npages);
+ ASSERT_EQ(ret, 0);
+ ASSERT_EQ(buffer->cpages, npages);
+ ASSERT_EQ(buffer->faults, 1);
+
+ /* Check what the device read. */
+ for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i)
+ ASSERT_EQ(ptr[i], i);
+
+ hmm_buffer_free(buffer);
+}
+
+/*
+ * Write mmap'ed file memory.
+ */
+TEST_F(hmm, file_write)
+{
+ struct hmm_buffer *buffer;
+ unsigned long npages;
+ unsigned long size;
+ unsigned long i;
+ int *ptr;
+ int ret;
+ int fd;
+ ssize_t len;
+
+ npages = ALIGN(HMM_BUFFER_SIZE, self->page_size) >> self->page_shift;
+ ASSERT_NE(npages, 0);
+ size = npages << self->page_shift;
+
+ fd = hmm_create_file(size);
+ ASSERT_GE(fd, 0);
+
+ buffer = malloc(sizeof(*buffer));
+ ASSERT_NE(buffer, NULL);
+
+ buffer->fd = fd;
+ buffer->size = size;
+ buffer->mirror = malloc(size);
+ ASSERT_NE(buffer->mirror, NULL);
+
+ buffer->ptr = mmap(NULL, size,
+ PROT_READ | PROT_WRITE,
+ MAP_SHARED,
+ buffer->fd, 0);
+ ASSERT_NE(buffer->ptr, MAP_FAILED);
+
+ /* Initialize data that the device will write to buffer->ptr. */
+ for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i)
+ ptr[i] = i;
+
+ /* Simulate a device writing system memory. */
+ ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_WRITE, buffer, npages);
+ ASSERT_EQ(ret, 0);
+ ASSERT_EQ(buffer->cpages, npages);
+ ASSERT_EQ(buffer->faults, 1);
+
+ /* Check what the device wrote. */
+ for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
+ ASSERT_EQ(ptr[i], i);
+
+ /* Check that the device also wrote the file. */
+ len = pread(fd, buffer->mirror, size, 0);
+ ASSERT_EQ(len, size);
+ for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i)
+ ASSERT_EQ(ptr[i], i);
+
+ hmm_buffer_free(buffer);
+}
+
+/*
+ * Migrate anonymous memory to device private memory.
+ */
+TEST_F(hmm, migrate)
+{
+ struct hmm_buffer *buffer;
+ unsigned long npages;
+ unsigned long size;
+ unsigned long i;
+ int *ptr;
+ int ret;
+
+ npages = ALIGN(HMM_BUFFER_SIZE, self->page_size) >> self->page_shift;
+ ASSERT_NE(npages, 0);
+ size = npages << self->page_shift;
+
+ buffer = malloc(sizeof(*buffer));
+ ASSERT_NE(buffer, NULL);
+
+ buffer->fd = -1;
+ buffer->size = size;
+ buffer->mirror = malloc(size);
+ ASSERT_NE(buffer->mirror, NULL);
+
+ buffer->ptr = mmap(NULL, size,
+ PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS,
+ buffer->fd, 0);
+ ASSERT_NE(buffer->ptr, MAP_FAILED);
+
+ /* Initialize buffer in system memory. */
+ for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
+ ptr[i] = i;
+
+ /* Migrate memory to device. */
+ ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_MIGRATE, buffer, npages);
+ ASSERT_EQ(ret, 0);
+ ASSERT_EQ(buffer->cpages, npages);
+
+ /* Check what the device read. */
+ for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i)
+ ASSERT_EQ(ptr[i], i);
+
+ hmm_buffer_free(buffer);
+}
+
+/*
+ * Migrate anonymous memory to device private memory and fault it back to system
+ * memory.
+ */
+TEST_F(hmm, migrate_fault)
+{
+ struct hmm_buffer *buffer;
+ unsigned long npages;
+ unsigned long size;
+ unsigned long i;
+ int *ptr;
+ int ret;
+
+ npages = ALIGN(HMM_BUFFER_SIZE, self->page_size) >> self->page_shift;
+ ASSERT_NE(npages, 0);
+ size = npages << self->page_shift;
+
+ buffer = malloc(sizeof(*buffer));
+ ASSERT_NE(buffer, NULL);
+
+ buffer->fd = -1;
+ buffer->size = size;
+ buffer->mirror = malloc(size);
+ ASSERT_NE(buffer->mirror, NULL);
+
+ buffer->ptr = mmap(NULL, size,
+ PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS,
+ buffer->fd, 0);
+ ASSERT_NE(buffer->ptr, MAP_FAILED);
+
+ /* Initialize buffer in system memory. */
+ for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
+ ptr[i] = i;
+
+ /* Migrate memory to device. */
+ ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_MIGRATE, buffer, npages);
+ ASSERT_EQ(ret, 0);
+ ASSERT_EQ(buffer->cpages, npages);
+
+ /* Check what the device read. */
+ for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i)
+ ASSERT_EQ(ptr[i], i);
+
+ /* Fault pages back to system memory and check them. */
+ for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
+ ASSERT_EQ(ptr[i], i);
+
+ hmm_buffer_free(buffer);
+}
+
+/*
+ * Try to migrate various memory types to device private memory.
+ */
+TEST_F(hmm2, migrate_mixed)
+{
+ struct hmm_buffer *buffer;
+ unsigned long npages;
+ unsigned long size;
+ int *ptr;
+ unsigned char *p;
+ int ret;
+ int val;
+
+ npages = 6;
+ size = npages << self->page_shift;
+
+ buffer = malloc(sizeof(*buffer));
+ ASSERT_NE(buffer, NULL);
+
+ buffer->fd = -1;
+ buffer->size = size;
+ buffer->mirror = malloc(size);
+ ASSERT_NE(buffer->mirror, NULL);
+
+ /* Reserve a range of addresses. */
+ buffer->ptr = mmap(NULL, size,
+ PROT_NONE,
+ MAP_PRIVATE | MAP_ANONYMOUS,
+ buffer->fd, 0);
+ ASSERT_NE(buffer->ptr, MAP_FAILED);
+ p = buffer->ptr;
+
+ /* Migrating a protected area should be an error. */
+ ret = hmm_dmirror_cmd(self->fd1, HMM_DMIRROR_MIGRATE, buffer, npages);
+ ASSERT_EQ(ret, -EINVAL);
+
+ /* Punch a hole after the first page address. */
+ ret = munmap(buffer->ptr + self->page_size, self->page_size);
+ ASSERT_EQ(ret, 0);
+
+ /* We expect an error if the vma doesn't cover the range. */
+ ret = hmm_dmirror_cmd(self->fd1, HMM_DMIRROR_MIGRATE, buffer, 3);
+ ASSERT_EQ(ret, -EINVAL);
+
+ /* Page 2 will be a read-only zero page. */
+ ret = mprotect(buffer->ptr + 2 * self->page_size, self->page_size,
+ PROT_READ);
+ ASSERT_EQ(ret, 0);
+ ptr = (int *)(buffer->ptr + 2 * self->page_size);
+ val = *ptr + 3;
+ ASSERT_EQ(val, 3);
+
+ /* Page 3 will be read-only. */
+ ret = mprotect(buffer->ptr + 3 * self->page_size, self->page_size,
+ PROT_READ | PROT_WRITE);
+ ASSERT_EQ(ret, 0);
+ ptr = (int *)(buffer->ptr + 3 * self->page_size);
+ *ptr = val;
+ ret = mprotect(buffer->ptr + 3 * self->page_size, self->page_size,
+ PROT_READ);
+ ASSERT_EQ(ret, 0);
+
+ /* Page 4-5 will be read-write. */
+ ret = mprotect(buffer->ptr + 4 * self->page_size, 2 * self->page_size,
+ PROT_READ | PROT_WRITE);
+ ASSERT_EQ(ret, 0);
+ ptr = (int *)(buffer->ptr + 4 * self->page_size);
+ *ptr = val;
+ ptr = (int *)(buffer->ptr + 5 * self->page_size);
+ *ptr = val;
+
+ /* Now try to migrate pages 2-5 to device 1. */
+ buffer->ptr = p + 2 * self->page_size;
+ ret = hmm_dmirror_cmd(self->fd1, HMM_DMIRROR_MIGRATE, buffer, 4);
+ ASSERT_EQ(ret, 0);
+ ASSERT_EQ(buffer->cpages, 4);
+
+ /* Page 5 won't be migrated to device 0 because it's on device 1. */
+ buffer->ptr = p + 5 * self->page_size;
+ ret = hmm_dmirror_cmd(self->fd0, HMM_DMIRROR_MIGRATE, buffer, 1);
+ ASSERT_EQ(ret, -ENOENT);
+ buffer->ptr = p;
+
+ buffer->ptr = p;
+ hmm_buffer_free(buffer);
+}
+
+/*
+ * Migrate anonymous memory to device private memory and fault it back to system
+ * memory multiple times.
+ */
+TEST_F(hmm, migrate_multiple)
+{
+ struct hmm_buffer *buffer;
+ unsigned long npages;
+ unsigned long size;
+ unsigned long i;
+ unsigned long c;
+ int *ptr;
+ int ret;
+
+ npages = ALIGN(HMM_BUFFER_SIZE, self->page_size) >> self->page_shift;
+ ASSERT_NE(npages, 0);
+ size = npages << self->page_shift;
+
+ for (c = 0; c < NTIMES; c++) {
+ buffer = malloc(sizeof(*buffer));
+ ASSERT_NE(buffer, NULL);
+
+ buffer->fd = -1;
+ buffer->size = size;
+ buffer->mirror = malloc(size);
+ ASSERT_NE(buffer->mirror, NULL);
+
+ buffer->ptr = mmap(NULL, size,
+ PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS,
+ buffer->fd, 0);
+ ASSERT_NE(buffer->ptr, MAP_FAILED);
+
+ /* Initialize buffer in system memory. */
+ for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
+ ptr[i] = i;
+
+ /* Migrate memory to device. */
+ ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_MIGRATE, buffer,
+ npages);
+ ASSERT_EQ(ret, 0);
+ ASSERT_EQ(buffer->cpages, npages);
+
+ /* Check what the device read. */
+ for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i)
+ ASSERT_EQ(ptr[i], i);
+
+ /* Fault pages back to system memory and check them. */
+ for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
+ ASSERT_EQ(ptr[i], i);
+
+ hmm_buffer_free(buffer);
+ }
+}
+
+/*
+ * Read anonymous memory multiple times.
+ */
+TEST_F(hmm, anon_read_multiple)
+{
+ struct hmm_buffer *buffer;
+ unsigned long npages;
+ unsigned long size;
+ unsigned long i;
+ unsigned long c;
+ int *ptr;
+ int ret;
+
+ npages = ALIGN(HMM_BUFFER_SIZE, self->page_size) >> self->page_shift;
+ ASSERT_NE(npages, 0);
+ size = npages << self->page_shift;
+
+ for (c = 0; c < NTIMES; c++) {
+ buffer = malloc(sizeof(*buffer));
+ ASSERT_NE(buffer, NULL);
+
+ buffer->fd = -1;
+ buffer->size = size;
+ buffer->mirror = malloc(size);
+ ASSERT_NE(buffer->mirror, NULL);
+
+ buffer->ptr = mmap(NULL, size,
+ PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS,
+ buffer->fd, 0);
+ ASSERT_NE(buffer->ptr, MAP_FAILED);
+
+ /* Initialize buffer in system memory. */
+ for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
+ ptr[i] = i + c;
+
+ /* Simulate a device reading system memory. */
+ ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_READ, buffer,
+ npages);
+ ASSERT_EQ(ret, 0);
+ ASSERT_EQ(buffer->cpages, npages);
+ ASSERT_EQ(buffer->faults, 1);
+
+ /* Check what the device read. */
+ for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i)
+ ASSERT_EQ(ptr[i], i + c);
+
+ hmm_buffer_free(buffer);
+ }
+}
+
+void *unmap_buffer(void *p)
+{
+ struct hmm_buffer *buffer = p;
+
+ /* Delay for a bit and then unmap buffer while it is being read. */
+ hmm_nanosleep(hmm_random() % 32000);
+ munmap(buffer->ptr + buffer->size / 2, buffer->size / 2);
+ buffer->ptr = NULL;
+
+ return NULL;
+}
+
+/*
+ * Try reading anonymous memory while it is being unmapped.
+ */
+TEST_F(hmm, anon_teardown)
+{
+ unsigned long npages;
+ unsigned long size;
+ unsigned long c;
+ void *ret;
+
+ npages = ALIGN(HMM_BUFFER_SIZE, self->page_size) >> self->page_shift;
+ ASSERT_NE(npages, 0);
+ size = npages << self->page_shift;
+
+ for (c = 0; c < NTIMES; ++c) {
+ pthread_t thread;
+ struct hmm_buffer *buffer;
+ unsigned long i;
+ int *ptr;
+ int rc;
+
+ buffer = malloc(sizeof(*buffer));
+ ASSERT_NE(buffer, NULL);
+
+ buffer->fd = -1;
+ buffer->size = size;
+ buffer->mirror = malloc(size);
+ ASSERT_NE(buffer->mirror, NULL);
+
+ buffer->ptr = mmap(NULL, size,
+ PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS,
+ buffer->fd, 0);
+ ASSERT_NE(buffer->ptr, MAP_FAILED);
+
+ /* Initialize buffer in system memory. */
+ for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
+ ptr[i] = i + c;
+
+ rc = pthread_create(&thread, NULL, unmap_buffer, buffer);
+ ASSERT_EQ(rc, 0);
+
+ /* Simulate a device reading system memory. */
+ rc = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_READ, buffer,
+ npages);
+ if (rc == 0) {
+ ASSERT_EQ(buffer->cpages, npages);
+ ASSERT_EQ(buffer->faults, 1);
+
+ /* Check what the device read. */
+ for (i = 0, ptr = buffer->mirror;
+ i < size / sizeof(*ptr);
+ ++i)
+ ASSERT_EQ(ptr[i], i + c);
+ }
+
+ pthread_join(thread, &ret);
+ hmm_buffer_free(buffer);
+ }
+}
+
+/*
+ * Test memory snapshot without faulting in pages accessed by the device.
+ */
+TEST_F(hmm2, snapshot)
+{
+ struct hmm_buffer *buffer;
+ unsigned long npages;
+ unsigned long size;
+ int *ptr;
+ unsigned char *p;
+ unsigned char *m;
+ int ret;
+ int val;
+
+ npages = 7;
+ size = npages << self->page_shift;
+
+ buffer = malloc(sizeof(*buffer));
+ ASSERT_NE(buffer, NULL);
+
+ buffer->fd = -1;
+ buffer->size = size;
+ buffer->mirror = malloc(npages);
+ ASSERT_NE(buffer->mirror, NULL);
+
+ /* Reserve a range of addresses. */
+ buffer->ptr = mmap(NULL, size,
+ PROT_NONE,
+ MAP_PRIVATE | MAP_ANONYMOUS,
+ buffer->fd, 0);
+ ASSERT_NE(buffer->ptr, MAP_FAILED);
+ p = buffer->ptr;
+
+ /* Punch a hole after the first page address. */
+ ret = munmap(buffer->ptr + self->page_size, self->page_size);
+ ASSERT_EQ(ret, 0);
+
+ /* Page 2 will be read-only zero page. */
+ ret = mprotect(buffer->ptr + 2 * self->page_size, self->page_size,
+ PROT_READ);
+ ASSERT_EQ(ret, 0);
+ ptr = (int *)(buffer->ptr + 2 * self->page_size);
+ val = *ptr + 3;
+ ASSERT_EQ(val, 3);
+
+ /* Page 3 will be read-only. */
+ ret = mprotect(buffer->ptr + 3 * self->page_size, self->page_size,
+ PROT_READ | PROT_WRITE);
+ ASSERT_EQ(ret, 0);
+ ptr = (int *)(buffer->ptr + 3 * self->page_size);
+ *ptr = val;
+ ret = mprotect(buffer->ptr + 3 * self->page_size, self->page_size,
+ PROT_READ);
+ ASSERT_EQ(ret, 0);
+
+ /* Page 4-6 will be read-write. */
+ ret = mprotect(buffer->ptr + 4 * self->page_size, 3 * self->page_size,
+ PROT_READ | PROT_WRITE);
+ ASSERT_EQ(ret, 0);
+ ptr = (int *)(buffer->ptr + 4 * self->page_size);
+ *ptr = val;
+
+ /* Page 5 will be migrated to device 0. */
+ buffer->ptr = p + 5 * self->page_size;
+ ret = hmm_dmirror_cmd(self->fd0, HMM_DMIRROR_MIGRATE, buffer, 1);
+ ASSERT_EQ(ret, 0);
+ ASSERT_EQ(buffer->cpages, 1);
+
+ /* Page 6 will be migrated to device 1. */
+ buffer->ptr = p + 6 * self->page_size;
+ ret = hmm_dmirror_cmd(self->fd1, HMM_DMIRROR_MIGRATE, buffer, 1);
+ ASSERT_EQ(ret, 0);
+ ASSERT_EQ(buffer->cpages, 1);
+
+ /* Simulate a device snapshotting CPU pagetables. */
+ buffer->ptr = p;
+ ret = hmm_dmirror_cmd(self->fd0, HMM_DMIRROR_SNAPSHOT, buffer, npages);
+ ASSERT_EQ(ret, 0);
+ ASSERT_EQ(buffer->cpages, npages);
+
+ /* Check what the device saw. */
+ m = buffer->mirror;
+ ASSERT_EQ(m[0], HMM_DMIRROR_PROT_ERROR);
+ ASSERT_EQ(m[1], HMM_DMIRROR_PROT_ERROR);
+ ASSERT_EQ(m[2], HMM_DMIRROR_PROT_ZERO | HMM_DMIRROR_PROT_READ);
+ ASSERT_EQ(m[3], HMM_DMIRROR_PROT_READ);
+ ASSERT_EQ(m[4], HMM_DMIRROR_PROT_WRITE);
+ ASSERT_EQ(m[5], HMM_DMIRROR_PROT_DEV_PRIVATE_LOCAL |
+ HMM_DMIRROR_PROT_WRITE);
+ ASSERT_EQ(m[6], HMM_DMIRROR_PROT_NONE);
+
+ hmm_buffer_free(buffer);
+}
+
+/*
+ * Test two devices reading the same memory (double mapped).
+ */
+TEST_F(hmm2, double_map)
+{
+ struct hmm_buffer *buffer;
+ unsigned long npages;
+ unsigned long size;
+ unsigned long i;
+ int *ptr;
+ int ret;
+
+ npages = 6;
+ size = npages << self->page_shift;
+
+ buffer = malloc(sizeof(*buffer));
+ ASSERT_NE(buffer, NULL);
+
+ buffer->fd = -1;
+ buffer->size = size;
+ buffer->mirror = malloc(npages);
+ ASSERT_NE(buffer->mirror, NULL);
+
+ /* Reserve a range of addresses. */
+ buffer->ptr = mmap(NULL, size,
+ PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS,
+ buffer->fd, 0);
+ ASSERT_NE(buffer->ptr, MAP_FAILED);
+
+ /* Initialize buffer in system memory. */
+ for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
+ ptr[i] = i;
+
+ /* Make region read-only. */
+ ret = mprotect(buffer->ptr, size, PROT_READ);
+ ASSERT_EQ(ret, 0);
+
+ /* Simulate device 0 reading system memory. */
+ ret = hmm_dmirror_cmd(self->fd0, HMM_DMIRROR_READ, buffer, npages);
+ ASSERT_EQ(ret, 0);
+ ASSERT_EQ(buffer->cpages, npages);
+ ASSERT_EQ(buffer->faults, 1);
+
+ /* Check what the device read. */
+ for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i)
+ ASSERT_EQ(ptr[i], i);
+
+ /* Simulate device 1 reading system memory. */
+ ret = hmm_dmirror_cmd(self->fd1, HMM_DMIRROR_READ, buffer, npages);
+ ASSERT_EQ(ret, 0);
+ ASSERT_EQ(buffer->cpages, npages);
+ ASSERT_EQ(buffer->faults, 1);
+
+ /* Check what the device read. */
+ for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i)
+ ASSERT_EQ(ptr[i], i);
+
+ /* Punch a hole after the first page address. */
+ ret = munmap(buffer->ptr + self->page_size, self->page_size);
+ ASSERT_EQ(ret, 0);
+
+ hmm_buffer_free(buffer);
+}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/vm/hugetlb_reparenting_test.sh b/tools/testing/selftests/vm/hugetlb_reparenting_test.sh
new file mode 100644
index 000000000000..d11d1febccc3
--- /dev/null
+++ b/tools/testing/selftests/vm/hugetlb_reparenting_test.sh
@@ -0,0 +1,244 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+set -e
+
+if [[ $(id -u) -ne 0 ]]; then
+ echo "This test must be run as root. Skipping..."
+ exit 0
+fi
+
+usage_file=usage_in_bytes
+
+if [[ "$1" == "-cgroup-v2" ]]; then
+ cgroup2=1
+ usage_file=current
+fi
+
+CGROUP_ROOT='/dev/cgroup/memory'
+MNT='/mnt/huge/'
+
+if [[ ! -e $CGROUP_ROOT ]]; then
+ mkdir -p $CGROUP_ROOT
+ if [[ $cgroup2 ]]; then
+ mount -t cgroup2 none $CGROUP_ROOT
+ sleep 1
+ echo "+hugetlb +memory" >$CGROUP_ROOT/cgroup.subtree_control
+ else
+ mount -t cgroup memory,hugetlb $CGROUP_ROOT
+ fi
+fi
+
+function get_machine_hugepage_size() {
+ hpz=$(grep -i hugepagesize /proc/meminfo)
+ kb=${hpz:14:-3}
+ mb=$(($kb / 1024))
+ echo $mb
+}
+
+MB=$(get_machine_hugepage_size)
+
+function cleanup() {
+ echo cleanup
+ set +e
+ rm -rf "$MNT"/* 2>/dev/null
+ umount "$MNT" 2>/dev/null
+ rmdir "$MNT" 2>/dev/null
+ rmdir "$CGROUP_ROOT"/a/b 2>/dev/null
+ rmdir "$CGROUP_ROOT"/a 2>/dev/null
+ rmdir "$CGROUP_ROOT"/test1 2>/dev/null
+ echo 0 >/proc/sys/vm/nr_hugepages
+ set -e
+}
+
+function assert_state() {
+ local expected_a="$1"
+ local expected_a_hugetlb="$2"
+ local expected_b=""
+ local expected_b_hugetlb=""
+
+ if [ ! -z ${3:-} ] && [ ! -z ${4:-} ]; then
+ expected_b="$3"
+ expected_b_hugetlb="$4"
+ fi
+ local tolerance=$((5 * 1024 * 1024))
+
+ local actual_a
+ actual_a="$(cat "$CGROUP_ROOT"/a/memory.$usage_file)"
+ if [[ $actual_a -lt $(($expected_a - $tolerance)) ]] ||
+ [[ $actual_a -gt $(($expected_a + $tolerance)) ]]; then
+ echo actual a = $((${actual_a%% *} / 1024 / 1024)) MB
+ echo expected a = $((${expected_a%% *} / 1024 / 1024)) MB
+ echo fail
+
+ cleanup
+ exit 1
+ fi
+
+ local actual_a_hugetlb
+ actual_a_hugetlb="$(cat "$CGROUP_ROOT"/a/hugetlb.${MB}MB.$usage_file)"
+ if [[ $actual_a_hugetlb -lt $(($expected_a_hugetlb - $tolerance)) ]] ||
+ [[ $actual_a_hugetlb -gt $(($expected_a_hugetlb + $tolerance)) ]]; then
+ echo actual a hugetlb = $((${actual_a_hugetlb%% *} / 1024 / 1024)) MB
+ echo expected a hugetlb = $((${expected_a_hugetlb%% *} / 1024 / 1024)) MB
+ echo fail
+
+ cleanup
+ exit 1
+ fi
+
+ if [[ -z "$expected_b" || -z "$expected_b_hugetlb" ]]; then
+ return
+ fi
+
+ local actual_b
+ actual_b="$(cat "$CGROUP_ROOT"/a/b/memory.$usage_file)"
+ if [[ $actual_b -lt $(($expected_b - $tolerance)) ]] ||
+ [[ $actual_b -gt $(($expected_b + $tolerance)) ]]; then
+ echo actual b = $((${actual_b%% *} / 1024 / 1024)) MB
+ echo expected b = $((${expected_b%% *} / 1024 / 1024)) MB
+ echo fail
+
+ cleanup
+ exit 1
+ fi
+
+ local actual_b_hugetlb
+ actual_b_hugetlb="$(cat "$CGROUP_ROOT"/a/b/hugetlb.${MB}MB.$usage_file)"
+ if [[ $actual_b_hugetlb -lt $(($expected_b_hugetlb - $tolerance)) ]] ||
+ [[ $actual_b_hugetlb -gt $(($expected_b_hugetlb + $tolerance)) ]]; then
+ echo actual b hugetlb = $((${actual_b_hugetlb%% *} / 1024 / 1024)) MB
+ echo expected b hugetlb = $((${expected_b_hugetlb%% *} / 1024 / 1024)) MB
+ echo fail
+
+ cleanup
+ exit 1
+ fi
+}
+
+function setup() {
+ echo 100 >/proc/sys/vm/nr_hugepages
+ mkdir "$CGROUP_ROOT"/a
+ sleep 1
+ if [[ $cgroup2 ]]; then
+ echo "+hugetlb +memory" >$CGROUP_ROOT/a/cgroup.subtree_control
+ else
+ echo 0 >$CGROUP_ROOT/a/cpuset.mems
+ echo 0 >$CGROUP_ROOT/a/cpuset.cpus
+ fi
+
+ mkdir "$CGROUP_ROOT"/a/b
+
+ if [[ ! $cgroup2 ]]; then
+ echo 0 >$CGROUP_ROOT/a/b/cpuset.mems
+ echo 0 >$CGROUP_ROOT/a/b/cpuset.cpus
+ fi
+
+ mkdir -p "$MNT"
+ mount -t hugetlbfs none "$MNT"
+}
+
+write_hugetlbfs() {
+ local cgroup="$1"
+ local path="$2"
+ local size="$3"
+
+ if [[ $cgroup2 ]]; then
+ echo $$ >$CGROUP_ROOT/$cgroup/cgroup.procs
+ else
+ echo 0 >$CGROUP_ROOT/$cgroup/cpuset.mems
+ echo 0 >$CGROUP_ROOT/$cgroup/cpuset.cpus
+ echo $$ >"$CGROUP_ROOT/$cgroup/tasks"
+ fi
+ ./write_to_hugetlbfs -p "$path" -s "$size" -m 0 -o
+ if [[ $cgroup2 ]]; then
+ echo $$ >$CGROUP_ROOT/cgroup.procs
+ else
+ echo $$ >"$CGROUP_ROOT/tasks"
+ fi
+ echo
+}
+
+set -e
+
+size=$((${MB} * 1024 * 1024 * 25)) # 50MB = 25 * 2MB hugepages.
+
+cleanup
+
+echo
+echo
+echo Test charge, rmdir, uncharge
+setup
+echo mkdir
+mkdir $CGROUP_ROOT/test1
+
+echo write
+write_hugetlbfs test1 "$MNT"/test $size
+
+echo rmdir
+rmdir $CGROUP_ROOT/test1
+mkdir $CGROUP_ROOT/test1
+
+echo uncharge
+rm -rf /mnt/huge/*
+
+cleanup
+
+echo done
+echo
+echo
+if [[ ! $cgroup2 ]]; then
+ echo "Test parent and child hugetlb usage"
+ setup
+
+ echo write
+ write_hugetlbfs a "$MNT"/test $size
+
+ echo Assert memory charged correctly for parent use.
+ assert_state 0 $size 0 0
+
+ write_hugetlbfs a/b "$MNT"/test2 $size
+
+ echo Assert memory charged correctly for child use.
+ assert_state 0 $(($size * 2)) 0 $size
+
+ rmdir "$CGROUP_ROOT"/a/b
+ sleep 5
+ echo Assert memory reparent correctly.
+ assert_state 0 $(($size * 2))
+
+ rm -rf "$MNT"/*
+ umount "$MNT"
+ echo Assert memory uncharged correctly.
+ assert_state 0 0
+
+ cleanup
+fi
+
+echo
+echo
+echo "Test child only hugetlb usage"
+echo setup
+setup
+
+echo write
+write_hugetlbfs a/b "$MNT"/test2 $size
+
+echo Assert memory charged correctly for child only use.
+assert_state 0 $(($size)) 0 $size
+
+rmdir "$CGROUP_ROOT"/a/b
+echo Assert memory reparent correctly.
+assert_state 0 $size
+
+rm -rf "$MNT"/*
+umount "$MNT"
+echo Assert memory uncharged correctly.
+assert_state 0 0
+
+cleanup
+
+echo ALL PASS
+
+umount $CGROUP_ROOT
+rm -rf $CGROUP_ROOT
diff --git a/tools/testing/selftests/vm/khugepaged.c b/tools/testing/selftests/vm/khugepaged.c
new file mode 100644
index 000000000000..51b89cedd09d
--- /dev/null
+++ b/tools/testing/selftests/vm/khugepaged.c
@@ -0,0 +1,1035 @@
+#define _GNU_SOURCE
+#include <fcntl.h>
+#include <limits.h>
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdbool.h>
+#include <string.h>
+#include <unistd.h>
+
+#include <sys/mman.h>
+#include <sys/wait.h>
+
+#ifndef MADV_PAGEOUT
+#define MADV_PAGEOUT 21
+#endif
+
+#define BASE_ADDR ((void *)(1UL << 30))
+static unsigned long hpage_pmd_size;
+static unsigned long page_size;
+static int hpage_pmd_nr;
+
+#define THP_SYSFS "/sys/kernel/mm/transparent_hugepage/"
+#define PID_SMAPS "/proc/self/smaps"
+
+enum thp_enabled {
+ THP_ALWAYS,
+ THP_MADVISE,
+ THP_NEVER,
+};
+
+static const char *thp_enabled_strings[] = {
+ "always",
+ "madvise",
+ "never",
+ NULL
+};
+
+enum thp_defrag {
+ THP_DEFRAG_ALWAYS,
+ THP_DEFRAG_DEFER,
+ THP_DEFRAG_DEFER_MADVISE,
+ THP_DEFRAG_MADVISE,
+ THP_DEFRAG_NEVER,
+};
+
+static const char *thp_defrag_strings[] = {
+ "always",
+ "defer",
+ "defer+madvise",
+ "madvise",
+ "never",
+ NULL
+};
+
+enum shmem_enabled {
+ SHMEM_ALWAYS,
+ SHMEM_WITHIN_SIZE,
+ SHMEM_ADVISE,
+ SHMEM_NEVER,
+ SHMEM_DENY,
+ SHMEM_FORCE,
+};
+
+static const char *shmem_enabled_strings[] = {
+ "always",
+ "within_size",
+ "advise",
+ "never",
+ "deny",
+ "force",
+ NULL
+};
+
+struct khugepaged_settings {
+ bool defrag;
+ unsigned int alloc_sleep_millisecs;
+ unsigned int scan_sleep_millisecs;
+ unsigned int max_ptes_none;
+ unsigned int max_ptes_swap;
+ unsigned int max_ptes_shared;
+ unsigned long pages_to_scan;
+};
+
+struct settings {
+ enum thp_enabled thp_enabled;
+ enum thp_defrag thp_defrag;
+ enum shmem_enabled shmem_enabled;
+ bool debug_cow;
+ bool use_zero_page;
+ struct khugepaged_settings khugepaged;
+};
+
+static struct settings default_settings = {
+ .thp_enabled = THP_MADVISE,
+ .thp_defrag = THP_DEFRAG_ALWAYS,
+ .shmem_enabled = SHMEM_NEVER,
+ .debug_cow = 0,
+ .use_zero_page = 0,
+ .khugepaged = {
+ .defrag = 1,
+ .alloc_sleep_millisecs = 10,
+ .scan_sleep_millisecs = 10,
+ },
+};
+
+static struct settings saved_settings;
+static bool skip_settings_restore;
+
+static int exit_status;
+
+static void success(const char *msg)
+{
+ printf(" \e[32m%s\e[0m\n", msg);
+}
+
+static void fail(const char *msg)
+{
+ printf(" \e[31m%s\e[0m\n", msg);
+ exit_status++;
+}
+
+static int read_file(const char *path, char *buf, size_t buflen)
+{
+ int fd;
+ ssize_t numread;
+
+ fd = open(path, O_RDONLY);
+ if (fd == -1)
+ return 0;
+
+ numread = read(fd, buf, buflen - 1);
+ if (numread < 1) {
+ close(fd);
+ return 0;
+ }
+
+ buf[numread] = '\0';
+ close(fd);
+
+ return (unsigned int) numread;
+}
+
+static int write_file(const char *path, const char *buf, size_t buflen)
+{
+ int fd;
+ ssize_t numwritten;
+
+ fd = open(path, O_WRONLY);
+ if (fd == -1)
+ return 0;
+
+ numwritten = write(fd, buf, buflen - 1);
+ close(fd);
+ if (numwritten < 1)
+ return 0;
+
+ return (unsigned int) numwritten;
+}
+
+static int read_string(const char *name, const char *strings[])
+{
+ char path[PATH_MAX];
+ char buf[256];
+ char *c;
+ int ret;
+
+ ret = snprintf(path, PATH_MAX, THP_SYSFS "%s", name);
+ if (ret >= PATH_MAX) {
+ printf("%s: Pathname is too long\n", __func__);
+ exit(EXIT_FAILURE);
+ }
+
+ if (!read_file(path, buf, sizeof(buf))) {
+ perror(path);
+ exit(EXIT_FAILURE);
+ }
+
+ c = strchr(buf, '[');
+ if (!c) {
+ printf("%s: Parse failure\n", __func__);
+ exit(EXIT_FAILURE);
+ }
+
+ c++;
+ memmove(buf, c, sizeof(buf) - (c - buf));
+
+ c = strchr(buf, ']');
+ if (!c) {
+ printf("%s: Parse failure\n", __func__);
+ exit(EXIT_FAILURE);
+ }
+ *c = '\0';
+
+ ret = 0;
+ while (strings[ret]) {
+ if (!strcmp(strings[ret], buf))
+ return ret;
+ ret++;
+ }
+
+ printf("Failed to parse %s\n", name);
+ exit(EXIT_FAILURE);
+}
+
+static void write_string(const char *name, const char *val)
+{
+ char path[PATH_MAX];
+ int ret;
+
+ ret = snprintf(path, PATH_MAX, THP_SYSFS "%s", name);
+ if (ret >= PATH_MAX) {
+ printf("%s: Pathname is too long\n", __func__);
+ exit(EXIT_FAILURE);
+ }
+
+ if (!write_file(path, val, strlen(val) + 1)) {
+ perror(path);
+ exit(EXIT_FAILURE);
+ }
+}
+
+static const unsigned long read_num(const char *name)
+{
+ char path[PATH_MAX];
+ char buf[21];
+ int ret;
+
+ ret = snprintf(path, PATH_MAX, THP_SYSFS "%s", name);
+ if (ret >= PATH_MAX) {
+ printf("%s: Pathname is too long\n", __func__);
+ exit(EXIT_FAILURE);
+ }
+
+ ret = read_file(path, buf, sizeof(buf));
+ if (ret < 0) {
+ perror("read_file(read_num)");
+ exit(EXIT_FAILURE);
+ }
+
+ return strtoul(buf, NULL, 10);
+}
+
+static void write_num(const char *name, unsigned long num)
+{
+ char path[PATH_MAX];
+ char buf[21];
+ int ret;
+
+ ret = snprintf(path, PATH_MAX, THP_SYSFS "%s", name);
+ if (ret >= PATH_MAX) {
+ printf("%s: Pathname is too long\n", __func__);
+ exit(EXIT_FAILURE);
+ }
+
+ sprintf(buf, "%ld", num);
+ if (!write_file(path, buf, strlen(buf) + 1)) {
+ perror(path);
+ exit(EXIT_FAILURE);
+ }
+}
+
+static void write_settings(struct settings *settings)
+{
+ struct khugepaged_settings *khugepaged = &settings->khugepaged;
+
+ write_string("enabled", thp_enabled_strings[settings->thp_enabled]);
+ write_string("defrag", thp_defrag_strings[settings->thp_defrag]);
+ write_string("shmem_enabled",
+ shmem_enabled_strings[settings->shmem_enabled]);
+ write_num("debug_cow", settings->debug_cow);
+ write_num("use_zero_page", settings->use_zero_page);
+
+ write_num("khugepaged/defrag", khugepaged->defrag);
+ write_num("khugepaged/alloc_sleep_millisecs",
+ khugepaged->alloc_sleep_millisecs);
+ write_num("khugepaged/scan_sleep_millisecs",
+ khugepaged->scan_sleep_millisecs);
+ write_num("khugepaged/max_ptes_none", khugepaged->max_ptes_none);
+ write_num("khugepaged/max_ptes_swap", khugepaged->max_ptes_swap);
+ write_num("khugepaged/max_ptes_shared", khugepaged->max_ptes_shared);
+ write_num("khugepaged/pages_to_scan", khugepaged->pages_to_scan);
+}
+
+static void restore_settings(int sig)
+{
+ if (skip_settings_restore)
+ goto out;
+
+ printf("Restore THP and khugepaged settings...");
+ write_settings(&saved_settings);
+ success("OK");
+ if (sig)
+ exit(EXIT_FAILURE);
+out:
+ exit(exit_status);
+}
+
+static void save_settings(void)
+{
+ printf("Save THP and khugepaged settings...");
+ saved_settings = (struct settings) {
+ .thp_enabled = read_string("enabled", thp_enabled_strings),
+ .thp_defrag = read_string("defrag", thp_defrag_strings),
+ .shmem_enabled =
+ read_string("shmem_enabled", shmem_enabled_strings),
+ .debug_cow = read_num("debug_cow"),
+ .use_zero_page = read_num("use_zero_page"),
+ };
+ saved_settings.khugepaged = (struct khugepaged_settings) {
+ .defrag = read_num("khugepaged/defrag"),
+ .alloc_sleep_millisecs =
+ read_num("khugepaged/alloc_sleep_millisecs"),
+ .scan_sleep_millisecs =
+ read_num("khugepaged/scan_sleep_millisecs"),
+ .max_ptes_none = read_num("khugepaged/max_ptes_none"),
+ .max_ptes_swap = read_num("khugepaged/max_ptes_swap"),
+ .max_ptes_shared = read_num("khugepaged/max_ptes_shared"),
+ .pages_to_scan = read_num("khugepaged/pages_to_scan"),
+ };
+ success("OK");
+
+ signal(SIGTERM, restore_settings);
+ signal(SIGINT, restore_settings);
+ signal(SIGHUP, restore_settings);
+ signal(SIGQUIT, restore_settings);
+}
+
+static void adjust_settings(void)
+{
+
+ printf("Adjust settings...");
+ write_settings(&default_settings);
+ success("OK");
+}
+
+#define MAX_LINE_LENGTH 500
+
+static bool check_for_pattern(FILE *fp, char *pattern, char *buf)
+{
+ while (fgets(buf, MAX_LINE_LENGTH, fp) != NULL) {
+ if (!strncmp(buf, pattern, strlen(pattern)))
+ return true;
+ }
+ return false;
+}
+
+static bool check_huge(void *addr)
+{
+ bool thp = false;
+ int ret;
+ FILE *fp;
+ char buffer[MAX_LINE_LENGTH];
+ char addr_pattern[MAX_LINE_LENGTH];
+
+ ret = snprintf(addr_pattern, MAX_LINE_LENGTH, "%08lx-",
+ (unsigned long) addr);
+ if (ret >= MAX_LINE_LENGTH) {
+ printf("%s: Pattern is too long\n", __func__);
+ exit(EXIT_FAILURE);
+ }
+
+
+ fp = fopen(PID_SMAPS, "r");
+ if (!fp) {
+ printf("%s: Failed to open file %s\n", __func__, PID_SMAPS);
+ exit(EXIT_FAILURE);
+ }
+ if (!check_for_pattern(fp, addr_pattern, buffer))
+ goto err_out;
+
+ ret = snprintf(addr_pattern, MAX_LINE_LENGTH, "AnonHugePages:%10ld kB",
+ hpage_pmd_size >> 10);
+ if (ret >= MAX_LINE_LENGTH) {
+ printf("%s: Pattern is too long\n", __func__);
+ exit(EXIT_FAILURE);
+ }
+ /*
+ * Fetch the AnonHugePages: in the same block and check whether it got
+ * the expected number of hugeepages next.
+ */
+ if (!check_for_pattern(fp, "AnonHugePages:", buffer))
+ goto err_out;
+
+ if (strncmp(buffer, addr_pattern, strlen(addr_pattern)))
+ goto err_out;
+
+ thp = true;
+err_out:
+ fclose(fp);
+ return thp;
+}
+
+
+static bool check_swap(void *addr, unsigned long size)
+{
+ bool swap = false;
+ int ret;
+ FILE *fp;
+ char buffer[MAX_LINE_LENGTH];
+ char addr_pattern[MAX_LINE_LENGTH];
+
+ ret = snprintf(addr_pattern, MAX_LINE_LENGTH, "%08lx-",
+ (unsigned long) addr);
+ if (ret >= MAX_LINE_LENGTH) {
+ printf("%s: Pattern is too long\n", __func__);
+ exit(EXIT_FAILURE);
+ }
+
+
+ fp = fopen(PID_SMAPS, "r");
+ if (!fp) {
+ printf("%s: Failed to open file %s\n", __func__, PID_SMAPS);
+ exit(EXIT_FAILURE);
+ }
+ if (!check_for_pattern(fp, addr_pattern, buffer))
+ goto err_out;
+
+ ret = snprintf(addr_pattern, MAX_LINE_LENGTH, "Swap:%19ld kB",
+ size >> 10);
+ if (ret >= MAX_LINE_LENGTH) {
+ printf("%s: Pattern is too long\n", __func__);
+ exit(EXIT_FAILURE);
+ }
+ /*
+ * Fetch the Swap: in the same block and check whether it got
+ * the expected number of hugeepages next.
+ */
+ if (!check_for_pattern(fp, "Swap:", buffer))
+ goto err_out;
+
+ if (strncmp(buffer, addr_pattern, strlen(addr_pattern)))
+ goto err_out;
+
+ swap = true;
+err_out:
+ fclose(fp);
+ return swap;
+}
+
+static void *alloc_mapping(void)
+{
+ void *p;
+
+ p = mmap(BASE_ADDR, hpage_pmd_size, PROT_READ | PROT_WRITE,
+ MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
+ if (p != BASE_ADDR) {
+ printf("Failed to allocate VMA at %p\n", BASE_ADDR);
+ exit(EXIT_FAILURE);
+ }
+
+ return p;
+}
+
+static void fill_memory(int *p, unsigned long start, unsigned long end)
+{
+ int i;
+
+ for (i = start / page_size; i < end / page_size; i++)
+ p[i * page_size / sizeof(*p)] = i + 0xdead0000;
+}
+
+static void validate_memory(int *p, unsigned long start, unsigned long end)
+{
+ int i;
+
+ for (i = start / page_size; i < end / page_size; i++) {
+ if (p[i * page_size / sizeof(*p)] != i + 0xdead0000) {
+ printf("Page %d is corrupted: %#x\n",
+ i, p[i * page_size / sizeof(*p)]);
+ exit(EXIT_FAILURE);
+ }
+ }
+}
+
+#define TICK 500000
+static bool wait_for_scan(const char *msg, char *p)
+{
+ int full_scans;
+ int timeout = 6; /* 3 seconds */
+
+ /* Sanity check */
+ if (check_huge(p)) {
+ printf("Unexpected huge page\n");
+ exit(EXIT_FAILURE);
+ }
+
+ madvise(p, hpage_pmd_size, MADV_HUGEPAGE);
+
+ /* Wait until the second full_scan completed */
+ full_scans = read_num("khugepaged/full_scans") + 2;
+
+ printf("%s...", msg);
+ while (timeout--) {
+ if (check_huge(p))
+ break;
+ if (read_num("khugepaged/full_scans") >= full_scans)
+ break;
+ printf(".");
+ usleep(TICK);
+ }
+
+ madvise(p, hpage_pmd_size, MADV_NOHUGEPAGE);
+
+ return !timeout;
+}
+
+static void alloc_at_fault(void)
+{
+ struct settings settings = default_settings;
+ char *p;
+
+ settings.thp_enabled = THP_ALWAYS;
+ write_settings(&settings);
+
+ p = alloc_mapping();
+ *p = 1;
+ printf("Allocate huge page on fault...");
+ if (check_huge(p))
+ success("OK");
+ else
+ fail("Fail");
+
+ write_settings(&default_settings);
+
+ madvise(p, page_size, MADV_DONTNEED);
+ printf("Split huge PMD on MADV_DONTNEED...");
+ if (!check_huge(p))
+ success("OK");
+ else
+ fail("Fail");
+ munmap(p, hpage_pmd_size);
+}
+
+static void collapse_full(void)
+{
+ void *p;
+
+ p = alloc_mapping();
+ fill_memory(p, 0, hpage_pmd_size);
+ if (wait_for_scan("Collapse fully populated PTE table", p))
+ fail("Timeout");
+ else if (check_huge(p))
+ success("OK");
+ else
+ fail("Fail");
+ validate_memory(p, 0, hpage_pmd_size);
+ munmap(p, hpage_pmd_size);
+}
+
+static void collapse_empty(void)
+{
+ void *p;
+
+ p = alloc_mapping();
+ if (wait_for_scan("Do not collapse empty PTE table", p))
+ fail("Timeout");
+ else if (check_huge(p))
+ fail("Fail");
+ else
+ success("OK");
+ munmap(p, hpage_pmd_size);
+}
+
+static void collapse_single_pte_entry(void)
+{
+ void *p;
+
+ p = alloc_mapping();
+ fill_memory(p, 0, page_size);
+ if (wait_for_scan("Collapse PTE table with single PTE entry present", p))
+ fail("Timeout");
+ else if (check_huge(p))
+ success("OK");
+ else
+ fail("Fail");
+ validate_memory(p, 0, page_size);
+ munmap(p, hpage_pmd_size);
+}
+
+static void collapse_max_ptes_none(void)
+{
+ int max_ptes_none = hpage_pmd_nr / 2;
+ struct settings settings = default_settings;
+ void *p;
+
+ settings.khugepaged.max_ptes_none = max_ptes_none;
+ write_settings(&settings);
+
+ p = alloc_mapping();
+
+ fill_memory(p, 0, (hpage_pmd_nr - max_ptes_none - 1) * page_size);
+ if (wait_for_scan("Do not collapse with max_ptes_none exceeded", p))
+ fail("Timeout");
+ else if (check_huge(p))
+ fail("Fail");
+ else
+ success("OK");
+ validate_memory(p, 0, (hpage_pmd_nr - max_ptes_none - 1) * page_size);
+
+ fill_memory(p, 0, (hpage_pmd_nr - max_ptes_none) * page_size);
+ if (wait_for_scan("Collapse with max_ptes_none PTEs empty", p))
+ fail("Timeout");
+ else if (check_huge(p))
+ success("OK");
+ else
+ fail("Fail");
+ validate_memory(p, 0, (hpage_pmd_nr - max_ptes_none) * page_size);
+
+ munmap(p, hpage_pmd_size);
+ write_settings(&default_settings);
+}
+
+static void collapse_swapin_single_pte(void)
+{
+ void *p;
+ p = alloc_mapping();
+ fill_memory(p, 0, hpage_pmd_size);
+
+ printf("Swapout one page...");
+ if (madvise(p, page_size, MADV_PAGEOUT)) {
+ perror("madvise(MADV_PAGEOUT)");
+ exit(EXIT_FAILURE);
+ }
+ if (check_swap(p, page_size)) {
+ success("OK");
+ } else {
+ fail("Fail");
+ goto out;
+ }
+
+ if (wait_for_scan("Collapse with swapping in single PTE entry", p))
+ fail("Timeout");
+ else if (check_huge(p))
+ success("OK");
+ else
+ fail("Fail");
+ validate_memory(p, 0, hpage_pmd_size);
+out:
+ munmap(p, hpage_pmd_size);
+}
+
+static void collapse_max_ptes_swap(void)
+{
+ int max_ptes_swap = read_num("khugepaged/max_ptes_swap");
+ void *p;
+
+ p = alloc_mapping();
+
+ fill_memory(p, 0, hpage_pmd_size);
+ printf("Swapout %d of %d pages...", max_ptes_swap + 1, hpage_pmd_nr);
+ if (madvise(p, (max_ptes_swap + 1) * page_size, MADV_PAGEOUT)) {
+ perror("madvise(MADV_PAGEOUT)");
+ exit(EXIT_FAILURE);
+ }
+ if (check_swap(p, (max_ptes_swap + 1) * page_size)) {
+ success("OK");
+ } else {
+ fail("Fail");
+ goto out;
+ }
+
+ if (wait_for_scan("Do not collapse with max_ptes_swap exceeded", p))
+ fail("Timeout");
+ else if (check_huge(p))
+ fail("Fail");
+ else
+ success("OK");
+ validate_memory(p, 0, hpage_pmd_size);
+
+ fill_memory(p, 0, hpage_pmd_size);
+ printf("Swapout %d of %d pages...", max_ptes_swap, hpage_pmd_nr);
+ if (madvise(p, max_ptes_swap * page_size, MADV_PAGEOUT)) {
+ perror("madvise(MADV_PAGEOUT)");
+ exit(EXIT_FAILURE);
+ }
+ if (check_swap(p, max_ptes_swap * page_size)) {
+ success("OK");
+ } else {
+ fail("Fail");
+ goto out;
+ }
+
+ if (wait_for_scan("Collapse with max_ptes_swap pages swapped out", p))
+ fail("Timeout");
+ else if (check_huge(p))
+ success("OK");
+ else
+ fail("Fail");
+ validate_memory(p, 0, hpage_pmd_size);
+out:
+ munmap(p, hpage_pmd_size);
+}
+
+static void collapse_single_pte_entry_compound(void)
+{
+ void *p;
+
+ p = alloc_mapping();
+
+ printf("Allocate huge page...");
+ madvise(p, hpage_pmd_size, MADV_HUGEPAGE);
+ fill_memory(p, 0, hpage_pmd_size);
+ if (check_huge(p))
+ success("OK");
+ else
+ fail("Fail");
+ madvise(p, hpage_pmd_size, MADV_NOHUGEPAGE);
+
+ printf("Split huge page leaving single PTE mapping compound page...");
+ madvise(p + page_size, hpage_pmd_size - page_size, MADV_DONTNEED);
+ if (!check_huge(p))
+ success("OK");
+ else
+ fail("Fail");
+
+ if (wait_for_scan("Collapse PTE table with single PTE mapping compound page", p))
+ fail("Timeout");
+ else if (check_huge(p))
+ success("OK");
+ else
+ fail("Fail");
+ validate_memory(p, 0, page_size);
+ munmap(p, hpage_pmd_size);
+}
+
+static void collapse_full_of_compound(void)
+{
+ void *p;
+
+ p = alloc_mapping();
+
+ printf("Allocate huge page...");
+ madvise(p, hpage_pmd_size, MADV_HUGEPAGE);
+ fill_memory(p, 0, hpage_pmd_size);
+ if (check_huge(p))
+ success("OK");
+ else
+ fail("Fail");
+
+ printf("Split huge page leaving single PTE page table full of compound pages...");
+ madvise(p, page_size, MADV_NOHUGEPAGE);
+ madvise(p, hpage_pmd_size, MADV_NOHUGEPAGE);
+ if (!check_huge(p))
+ success("OK");
+ else
+ fail("Fail");
+
+ if (wait_for_scan("Collapse PTE table full of compound pages", p))
+ fail("Timeout");
+ else if (check_huge(p))
+ success("OK");
+ else
+ fail("Fail");
+ validate_memory(p, 0, hpage_pmd_size);
+ munmap(p, hpage_pmd_size);
+}
+
+static void collapse_compound_extreme(void)
+{
+ void *p;
+ int i;
+
+ p = alloc_mapping();
+ for (i = 0; i < hpage_pmd_nr; i++) {
+ printf("\rConstruct PTE page table full of different PTE-mapped compound pages %3d/%d...",
+ i + 1, hpage_pmd_nr);
+
+ madvise(BASE_ADDR, hpage_pmd_size, MADV_HUGEPAGE);
+ fill_memory(BASE_ADDR, 0, hpage_pmd_size);
+ if (!check_huge(BASE_ADDR)) {
+ printf("Failed to allocate huge page\n");
+ exit(EXIT_FAILURE);
+ }
+ madvise(BASE_ADDR, hpage_pmd_size, MADV_NOHUGEPAGE);
+
+ p = mremap(BASE_ADDR - i * page_size,
+ i * page_size + hpage_pmd_size,
+ (i + 1) * page_size,
+ MREMAP_MAYMOVE | MREMAP_FIXED,
+ BASE_ADDR + 2 * hpage_pmd_size);
+ if (p == MAP_FAILED) {
+ perror("mremap+unmap");
+ exit(EXIT_FAILURE);
+ }
+
+ p = mremap(BASE_ADDR + 2 * hpage_pmd_size,
+ (i + 1) * page_size,
+ (i + 1) * page_size + hpage_pmd_size,
+ MREMAP_MAYMOVE | MREMAP_FIXED,
+ BASE_ADDR - (i + 1) * page_size);
+ if (p == MAP_FAILED) {
+ perror("mremap+alloc");
+ exit(EXIT_FAILURE);
+ }
+ }
+
+ munmap(BASE_ADDR, hpage_pmd_size);
+ fill_memory(p, 0, hpage_pmd_size);
+ if (!check_huge(p))
+ success("OK");
+ else
+ fail("Fail");
+
+ if (wait_for_scan("Collapse PTE table full of different compound pages", p))
+ fail("Timeout");
+ else if (check_huge(p))
+ success("OK");
+ else
+ fail("Fail");
+
+ validate_memory(p, 0, hpage_pmd_size);
+ munmap(p, hpage_pmd_size);
+}
+
+static void collapse_fork(void)
+{
+ int wstatus;
+ void *p;
+
+ p = alloc_mapping();
+
+ printf("Allocate small page...");
+ fill_memory(p, 0, page_size);
+ if (!check_huge(p))
+ success("OK");
+ else
+ fail("Fail");
+
+ printf("Share small page over fork()...");
+ if (!fork()) {
+ /* Do not touch settings on child exit */
+ skip_settings_restore = true;
+ exit_status = 0;
+
+ if (!check_huge(p))
+ success("OK");
+ else
+ fail("Fail");
+
+ fill_memory(p, page_size, 2 * page_size);
+
+ if (wait_for_scan("Collapse PTE table with single page shared with parent process", p))
+ fail("Timeout");
+ else if (check_huge(p))
+ success("OK");
+ else
+ fail("Fail");
+
+ validate_memory(p, 0, page_size);
+ munmap(p, hpage_pmd_size);
+ exit(exit_status);
+ }
+
+ wait(&wstatus);
+ exit_status += WEXITSTATUS(wstatus);
+
+ printf("Check if parent still has small page...");
+ if (!check_huge(p))
+ success("OK");
+ else
+ fail("Fail");
+ validate_memory(p, 0, page_size);
+ munmap(p, hpage_pmd_size);
+}
+
+static void collapse_fork_compound(void)
+{
+ int wstatus;
+ void *p;
+
+ p = alloc_mapping();
+
+ printf("Allocate huge page...");
+ madvise(p, hpage_pmd_size, MADV_HUGEPAGE);
+ fill_memory(p, 0, hpage_pmd_size);
+ if (check_huge(p))
+ success("OK");
+ else
+ fail("Fail");
+
+ printf("Share huge page over fork()...");
+ if (!fork()) {
+ /* Do not touch settings on child exit */
+ skip_settings_restore = true;
+ exit_status = 0;
+
+ if (check_huge(p))
+ success("OK");
+ else
+ fail("Fail");
+
+ printf("Split huge page PMD in child process...");
+ madvise(p, page_size, MADV_NOHUGEPAGE);
+ madvise(p, hpage_pmd_size, MADV_NOHUGEPAGE);
+ if (!check_huge(p))
+ success("OK");
+ else
+ fail("Fail");
+ fill_memory(p, 0, page_size);
+
+ write_num("khugepaged/max_ptes_shared", hpage_pmd_nr - 1);
+ if (wait_for_scan("Collapse PTE table full of compound pages in child", p))
+ fail("Timeout");
+ else if (check_huge(p))
+ success("OK");
+ else
+ fail("Fail");
+ write_num("khugepaged/max_ptes_shared",
+ default_settings.khugepaged.max_ptes_shared);
+
+ validate_memory(p, 0, hpage_pmd_size);
+ munmap(p, hpage_pmd_size);
+ exit(exit_status);
+ }
+
+ wait(&wstatus);
+ exit_status += WEXITSTATUS(wstatus);
+
+ printf("Check if parent still has huge page...");
+ if (check_huge(p))
+ success("OK");
+ else
+ fail("Fail");
+ validate_memory(p, 0, hpage_pmd_size);
+ munmap(p, hpage_pmd_size);
+}
+
+static void collapse_max_ptes_shared()
+{
+ int max_ptes_shared = read_num("khugepaged/max_ptes_shared");
+ int wstatus;
+ void *p;
+
+ p = alloc_mapping();
+
+ printf("Allocate huge page...");
+ madvise(p, hpage_pmd_size, MADV_HUGEPAGE);
+ fill_memory(p, 0, hpage_pmd_size);
+ if (check_huge(p))
+ success("OK");
+ else
+ fail("Fail");
+
+ printf("Share huge page over fork()...");
+ if (!fork()) {
+ /* Do not touch settings on child exit */
+ skip_settings_restore = true;
+ exit_status = 0;
+
+ if (check_huge(p))
+ success("OK");
+ else
+ fail("Fail");
+
+ printf("Trigger CoW on page %d of %d...",
+ hpage_pmd_nr - max_ptes_shared - 1, hpage_pmd_nr);
+ fill_memory(p, 0, (hpage_pmd_nr - max_ptes_shared - 1) * page_size);
+ if (!check_huge(p))
+ success("OK");
+ else
+ fail("Fail");
+
+ if (wait_for_scan("Do not collapse with max_ptes_shared exceeded", p))
+ fail("Timeout");
+ else if (!check_huge(p))
+ success("OK");
+ else
+ fail("Fail");
+
+ printf("Trigger CoW on page %d of %d...",
+ hpage_pmd_nr - max_ptes_shared, hpage_pmd_nr);
+ fill_memory(p, 0, (hpage_pmd_nr - max_ptes_shared) * page_size);
+ if (!check_huge(p))
+ success("OK");
+ else
+ fail("Fail");
+
+
+ if (wait_for_scan("Collapse with max_ptes_shared PTEs shared", p))
+ fail("Timeout");
+ else if (check_huge(p))
+ success("OK");
+ else
+ fail("Fail");
+
+ validate_memory(p, 0, hpage_pmd_size);
+ munmap(p, hpage_pmd_size);
+ exit(exit_status);
+ }
+
+ wait(&wstatus);
+ exit_status += WEXITSTATUS(wstatus);
+
+ printf("Check if parent still has huge page...");
+ if (check_huge(p))
+ success("OK");
+ else
+ fail("Fail");
+ validate_memory(p, 0, hpage_pmd_size);
+ munmap(p, hpage_pmd_size);
+}
+
+int main(void)
+{
+ setbuf(stdout, NULL);
+
+ page_size = getpagesize();
+ hpage_pmd_size = read_num("hpage_pmd_size");
+ hpage_pmd_nr = hpage_pmd_size / page_size;
+
+ default_settings.khugepaged.max_ptes_none = hpage_pmd_nr - 1;
+ default_settings.khugepaged.max_ptes_swap = hpage_pmd_nr / 8;
+ default_settings.khugepaged.max_ptes_shared = hpage_pmd_nr / 2;
+ default_settings.khugepaged.pages_to_scan = hpage_pmd_nr * 8;
+
+ save_settings();
+ adjust_settings();
+
+ alloc_at_fault();
+ collapse_full();
+ collapse_empty();
+ collapse_single_pte_entry();
+ collapse_max_ptes_none();
+ collapse_swapin_single_pte();
+ collapse_max_ptes_swap();
+ collapse_single_pte_entry_compound();
+ collapse_full_of_compound();
+ collapse_compound_extreme();
+ collapse_fork();
+ collapse_fork_compound();
+ collapse_max_ptes_shared();
+
+ restore_settings(0);
+}
diff --git a/tools/testing/selftests/vm/map_hugetlb.c b/tools/testing/selftests/vm/map_hugetlb.c
index 5a2d7b8efc40..6af951900aa3 100644
--- a/tools/testing/selftests/vm/map_hugetlb.c
+++ b/tools/testing/selftests/vm/map_hugetlb.c
@@ -45,20 +45,20 @@ static void check_bytes(char *addr)
printf("First hex is %x\n", *((unsigned int *)addr));
}
-static void write_bytes(char *addr)
+static void write_bytes(char *addr, size_t length)
{
unsigned long i;
- for (i = 0; i < LENGTH; i++)
+ for (i = 0; i < length; i++)
*(addr + i) = (char)i;
}
-static int read_bytes(char *addr)
+static int read_bytes(char *addr, size_t length)
{
unsigned long i;
check_bytes(addr);
- for (i = 0; i < LENGTH; i++)
+ for (i = 0; i < length; i++)
if (*(addr + i) != (char)i) {
printf("Mismatch at %lu\n", i);
return 1;
@@ -96,11 +96,11 @@ int main(int argc, char **argv)
printf("Returned address is %p\n", addr);
check_bytes(addr);
- write_bytes(addr);
- ret = read_bytes(addr);
+ write_bytes(addr, length);
+ ret = read_bytes(addr, length);
/* munmap() length of MAP_HUGETLB memory must be hugepage aligned */
- if (munmap(addr, LENGTH)) {
+ if (munmap(addr, length)) {
perror("munmap");
exit(1);
}
diff --git a/tools/testing/selftests/vm/mlock2-tests.c b/tools/testing/selftests/vm/mlock2-tests.c
index 637b6d0ac0d0..11b2301f3aa3 100644
--- a/tools/testing/selftests/vm/mlock2-tests.c
+++ b/tools/testing/selftests/vm/mlock2-tests.c
@@ -67,59 +67,6 @@ out:
return ret;
}
-static uint64_t get_pageflags(unsigned long addr)
-{
- FILE *file;
- uint64_t pfn;
- unsigned long offset;
-
- file = fopen("/proc/self/pagemap", "r");
- if (!file) {
- perror("fopen pagemap");
- _exit(1);
- }
-
- offset = addr / getpagesize() * sizeof(pfn);
-
- if (fseek(file, offset, SEEK_SET)) {
- perror("fseek pagemap");
- _exit(1);
- }
-
- if (fread(&pfn, sizeof(pfn), 1, file) != 1) {
- perror("fread pagemap");
- _exit(1);
- }
-
- fclose(file);
- return pfn;
-}
-
-static uint64_t get_kpageflags(unsigned long pfn)
-{
- uint64_t flags;
- FILE *file;
-
- file = fopen("/proc/kpageflags", "r");
- if (!file) {
- perror("fopen kpageflags");
- _exit(1);
- }
-
- if (fseek(file, pfn * sizeof(flags), SEEK_SET)) {
- perror("fseek kpageflags");
- _exit(1);
- }
-
- if (fread(&flags, sizeof(flags), 1, file) != 1) {
- perror("fread kpageflags");
- _exit(1);
- }
-
- fclose(file);
- return flags;
-}
-
#define VMFLAGS "VmFlags:"
static bool is_vmflag_set(unsigned long addr, const char *vmflag)
@@ -159,19 +106,13 @@ out:
#define RSS "Rss:"
#define LOCKED "lo"
-static bool is_vma_lock_on_fault(unsigned long addr)
+static unsigned long get_value_for_name(unsigned long addr, const char *name)
{
- bool ret = false;
- bool locked;
- FILE *smaps = NULL;
- unsigned long vma_size, vma_rss;
char *line = NULL;
- char *value;
size_t size = 0;
-
- locked = is_vmflag_set(addr, LOCKED);
- if (!locked)
- goto out;
+ char *value_ptr;
+ FILE *smaps = NULL;
+ unsigned long value = -1UL;
smaps = seek_to_smaps_entry(addr);
if (!smaps) {
@@ -180,112 +121,70 @@ static bool is_vma_lock_on_fault(unsigned long addr)
}
while (getline(&line, &size, smaps) > 0) {
- if (!strstr(line, SIZE)) {
+ if (!strstr(line, name)) {
free(line);
line = NULL;
size = 0;
continue;
}
- value = line + strlen(SIZE);
- if (sscanf(value, "%lu kB", &vma_size) < 1) {
+ value_ptr = line + strlen(name);
+ if (sscanf(value_ptr, "%lu kB", &value) < 1) {
printf("Unable to parse smaps entry for Size\n");
goto out;
}
break;
}
- while (getline(&line, &size, smaps) > 0) {
- if (!strstr(line, RSS)) {
- free(line);
- line = NULL;
- size = 0;
- continue;
- }
-
- value = line + strlen(RSS);
- if (sscanf(value, "%lu kB", &vma_rss) < 1) {
- printf("Unable to parse smaps entry for Rss\n");
- goto out;
- }
- break;
- }
-
- ret = locked && (vma_rss < vma_size);
out:
- free(line);
if (smaps)
fclose(smaps);
- return ret;
+ free(line);
+ return value;
}
-#define PRESENT_BIT 0x8000000000000000ULL
-#define PFN_MASK 0x007FFFFFFFFFFFFFULL
-#define UNEVICTABLE_BIT (1UL << 18)
-
-static int lock_check(char *map)
+static bool is_vma_lock_on_fault(unsigned long addr)
{
- unsigned long page_size = getpagesize();
- uint64_t page1_flags, page2_flags;
+ bool locked;
+ unsigned long vma_size, vma_rss;
- page1_flags = get_pageflags((unsigned long)map);
- page2_flags = get_pageflags((unsigned long)map + page_size);
+ locked = is_vmflag_set(addr, LOCKED);
+ if (!locked)
+ return false;
- /* Both pages should be present */
- if (((page1_flags & PRESENT_BIT) == 0) ||
- ((page2_flags & PRESENT_BIT) == 0)) {
- printf("Failed to make both pages present\n");
- return 1;
- }
+ vma_size = get_value_for_name(addr, SIZE);
+ vma_rss = get_value_for_name(addr, RSS);
- page1_flags = get_kpageflags(page1_flags & PFN_MASK);
- page2_flags = get_kpageflags(page2_flags & PFN_MASK);
+ /* only one page is faulted in */
+ return (vma_rss < vma_size);
+}
- /* Both pages should be unevictable */
- if (((page1_flags & UNEVICTABLE_BIT) == 0) ||
- ((page2_flags & UNEVICTABLE_BIT) == 0)) {
- printf("Failed to make both pages unevictable\n");
- return 1;
- }
+#define PRESENT_BIT 0x8000000000000000ULL
+#define PFN_MASK 0x007FFFFFFFFFFFFFULL
+#define UNEVICTABLE_BIT (1UL << 18)
- if (!is_vmflag_set((unsigned long)map, LOCKED)) {
- printf("VMA flag %s is missing on page 1\n", LOCKED);
- return 1;
- }
+static int lock_check(unsigned long addr)
+{
+ bool locked;
+ unsigned long vma_size, vma_rss;
- if (!is_vmflag_set((unsigned long)map + page_size, LOCKED)) {
- printf("VMA flag %s is missing on page 2\n", LOCKED);
- return 1;
- }
+ locked = is_vmflag_set(addr, LOCKED);
+ if (!locked)
+ return false;
- return 0;
+ vma_size = get_value_for_name(addr, SIZE);
+ vma_rss = get_value_for_name(addr, RSS);
+
+ return (vma_rss == vma_size);
}
static int unlock_lock_check(char *map)
{
- unsigned long page_size = getpagesize();
- uint64_t page1_flags, page2_flags;
-
- page1_flags = get_pageflags((unsigned long)map);
- page2_flags = get_pageflags((unsigned long)map + page_size);
- page1_flags = get_kpageflags(page1_flags & PFN_MASK);
- page2_flags = get_kpageflags(page2_flags & PFN_MASK);
-
- if ((page1_flags & UNEVICTABLE_BIT) || (page2_flags & UNEVICTABLE_BIT)) {
- printf("A page is still marked unevictable after unlock\n");
- return 1;
- }
-
if (is_vmflag_set((unsigned long)map, LOCKED)) {
printf("VMA flag %s is present on page 1 after unlock\n", LOCKED);
return 1;
}
- if (is_vmflag_set((unsigned long)map + page_size, LOCKED)) {
- printf("VMA flag %s is present on page 2 after unlock\n", LOCKED);
- return 1;
- }
-
return 0;
}
@@ -311,7 +210,7 @@ static int test_mlock_lock()
goto unmap;
}
- if (lock_check(map))
+ if (!lock_check((unsigned long)map))
goto unmap;
/* Now unlock and recheck attributes */
@@ -330,64 +229,18 @@ out:
static int onfault_check(char *map)
{
- unsigned long page_size = getpagesize();
- uint64_t page1_flags, page2_flags;
-
- page1_flags = get_pageflags((unsigned long)map);
- page2_flags = get_pageflags((unsigned long)map + page_size);
-
- /* Neither page should be present */
- if ((page1_flags & PRESENT_BIT) || (page2_flags & PRESENT_BIT)) {
- printf("Pages were made present by MLOCK_ONFAULT\n");
- return 1;
- }
-
*map = 'a';
- page1_flags = get_pageflags((unsigned long)map);
- page2_flags = get_pageflags((unsigned long)map + page_size);
-
- /* Only page 1 should be present */
- if ((page1_flags & PRESENT_BIT) == 0) {
- printf("Page 1 is not present after fault\n");
- return 1;
- } else if (page2_flags & PRESENT_BIT) {
- printf("Page 2 was made present\n");
- return 1;
- }
-
- page1_flags = get_kpageflags(page1_flags & PFN_MASK);
-
- /* Page 1 should be unevictable */
- if ((page1_flags & UNEVICTABLE_BIT) == 0) {
- printf("Failed to make faulted page unevictable\n");
- return 1;
- }
-
if (!is_vma_lock_on_fault((unsigned long)map)) {
printf("VMA is not marked for lock on fault\n");
return 1;
}
- if (!is_vma_lock_on_fault((unsigned long)map + page_size)) {
- printf("VMA is not marked for lock on fault\n");
- return 1;
- }
-
return 0;
}
static int unlock_onfault_check(char *map)
{
unsigned long page_size = getpagesize();
- uint64_t page1_flags;
-
- page1_flags = get_pageflags((unsigned long)map);
- page1_flags = get_kpageflags(page1_flags & PFN_MASK);
-
- if (page1_flags & UNEVICTABLE_BIT) {
- printf("Page 1 is still marked unevictable after unlock\n");
- return 1;
- }
if (is_vma_lock_on_fault((unsigned long)map) ||
is_vma_lock_on_fault((unsigned long)map + page_size)) {
@@ -445,7 +298,6 @@ static int test_lock_onfault_of_present()
char *map;
int ret = 1;
unsigned long page_size = getpagesize();
- uint64_t page1_flags, page2_flags;
map = mmap(NULL, 2 * page_size, PROT_READ | PROT_WRITE,
MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
@@ -465,17 +317,6 @@ static int test_lock_onfault_of_present()
goto unmap;
}
- page1_flags = get_pageflags((unsigned long)map);
- page2_flags = get_pageflags((unsigned long)map + page_size);
- page1_flags = get_kpageflags(page1_flags & PFN_MASK);
- page2_flags = get_kpageflags(page2_flags & PFN_MASK);
-
- /* Page 1 should be unevictable */
- if ((page1_flags & UNEVICTABLE_BIT) == 0) {
- printf("Failed to make present page unevictable\n");
- goto unmap;
- }
-
if (!is_vma_lock_on_fault((unsigned long)map) ||
!is_vma_lock_on_fault((unsigned long)map + page_size)) {
printf("VMA with present pages is not marked lock on fault\n");
@@ -507,7 +348,7 @@ static int test_munlockall()
goto out;
}
- if (lock_check(map))
+ if (!lock_check((unsigned long)map))
goto unmap;
if (munlockall()) {
@@ -549,7 +390,7 @@ static int test_munlockall()
goto out;
}
- if (lock_check(map))
+ if (!lock_check((unsigned long)map))
goto unmap;
if (munlockall()) {
diff --git a/tools/testing/selftests/vm/mremap_dontunmap.c b/tools/testing/selftests/vm/mremap_dontunmap.c
new file mode 100644
index 000000000000..3a7b5ef0b0c6
--- /dev/null
+++ b/tools/testing/selftests/vm/mremap_dontunmap.c
@@ -0,0 +1,312 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/*
+ * Tests for mremap w/ MREMAP_DONTUNMAP.
+ *
+ * Copyright 2020, Brian Geffon <bgeffon@google.com>
+ */
+#define _GNU_SOURCE
+#include <sys/mman.h>
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+#include "../kselftest.h"
+
+#ifndef MREMAP_DONTUNMAP
+#define MREMAP_DONTUNMAP 4
+#endif
+
+unsigned long page_size;
+char *page_buffer;
+
+static void dump_maps(void)
+{
+ char cmd[32];
+
+ snprintf(cmd, sizeof(cmd), "cat /proc/%d/maps", getpid());
+ system(cmd);
+}
+
+#define BUG_ON(condition, description) \
+ do { \
+ if (condition) { \
+ fprintf(stderr, "[FAIL]\t%s():%d\t%s:%s\n", __func__, \
+ __LINE__, (description), strerror(errno)); \
+ dump_maps(); \
+ exit(1); \
+ } \
+ } while (0)
+
+// Try a simple operation for to "test" for kernel support this prevents
+// reporting tests as failed when it's run on an older kernel.
+static int kernel_support_for_mremap_dontunmap()
+{
+ int ret = 0;
+ unsigned long num_pages = 1;
+ void *source_mapping = mmap(NULL, num_pages * page_size, PROT_NONE,
+ MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+ BUG_ON(source_mapping == MAP_FAILED, "mmap");
+
+ // This simple remap should only fail if MREMAP_DONTUNMAP isn't
+ // supported.
+ void *dest_mapping =
+ mremap(source_mapping, num_pages * page_size, num_pages * page_size,
+ MREMAP_DONTUNMAP | MREMAP_MAYMOVE, 0);
+ if (dest_mapping == MAP_FAILED) {
+ ret = errno;
+ } else {
+ BUG_ON(munmap(dest_mapping, num_pages * page_size) == -1,
+ "unable to unmap destination mapping");
+ }
+
+ BUG_ON(munmap(source_mapping, num_pages * page_size) == -1,
+ "unable to unmap source mapping");
+ return ret;
+}
+
+// This helper will just validate that an entire mapping contains the expected
+// byte.
+static int check_region_contains_byte(void *addr, unsigned long size, char byte)
+{
+ BUG_ON(size & (page_size - 1),
+ "check_region_contains_byte expects page multiples");
+ BUG_ON((unsigned long)addr & (page_size - 1),
+ "check_region_contains_byte expects page alignment");
+
+ memset(page_buffer, byte, page_size);
+
+ unsigned long num_pages = size / page_size;
+ unsigned long i;
+
+ // Compare each page checking that it contains our expected byte.
+ for (i = 0; i < num_pages; ++i) {
+ int ret =
+ memcmp(addr + (i * page_size), page_buffer, page_size);
+ if (ret) {
+ return ret;
+ }
+ }
+
+ return 0;
+}
+
+// this test validates that MREMAP_DONTUNMAP moves the pagetables while leaving
+// the source mapping mapped.
+static void mremap_dontunmap_simple()
+{
+ unsigned long num_pages = 5;
+
+ void *source_mapping =
+ mmap(NULL, num_pages * page_size, PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+ BUG_ON(source_mapping == MAP_FAILED, "mmap");
+
+ memset(source_mapping, 'a', num_pages * page_size);
+
+ // Try to just move the whole mapping anywhere (not fixed).
+ void *dest_mapping =
+ mremap(source_mapping, num_pages * page_size, num_pages * page_size,
+ MREMAP_DONTUNMAP | MREMAP_MAYMOVE, NULL);
+ BUG_ON(dest_mapping == MAP_FAILED, "mremap");
+
+ // Validate that the pages have been moved, we know they were moved if
+ // the dest_mapping contains a's.
+ BUG_ON(check_region_contains_byte
+ (dest_mapping, num_pages * page_size, 'a') != 0,
+ "pages did not migrate");
+ BUG_ON(check_region_contains_byte
+ (source_mapping, num_pages * page_size, 0) != 0,
+ "source should have no ptes");
+
+ BUG_ON(munmap(dest_mapping, num_pages * page_size) == -1,
+ "unable to unmap destination mapping");
+ BUG_ON(munmap(source_mapping, num_pages * page_size) == -1,
+ "unable to unmap source mapping");
+}
+
+// This test validates MREMAP_DONTUNMAP will move page tables to a specific
+// destination using MREMAP_FIXED, also while validating that the source
+// remains intact.
+static void mremap_dontunmap_simple_fixed()
+{
+ unsigned long num_pages = 5;
+
+ // Since we want to guarantee that we can remap to a point, we will
+ // create a mapping up front.
+ void *dest_mapping =
+ mmap(NULL, num_pages * page_size, PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+ BUG_ON(dest_mapping == MAP_FAILED, "mmap");
+ memset(dest_mapping, 'X', num_pages * page_size);
+
+ void *source_mapping =
+ mmap(NULL, num_pages * page_size, PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+ BUG_ON(source_mapping == MAP_FAILED, "mmap");
+ memset(source_mapping, 'a', num_pages * page_size);
+
+ void *remapped_mapping =
+ mremap(source_mapping, num_pages * page_size, num_pages * page_size,
+ MREMAP_FIXED | MREMAP_DONTUNMAP | MREMAP_MAYMOVE,
+ dest_mapping);
+ BUG_ON(remapped_mapping == MAP_FAILED, "mremap");
+ BUG_ON(remapped_mapping != dest_mapping,
+ "mremap should have placed the remapped mapping at dest_mapping");
+
+ // The dest mapping will have been unmap by mremap so we expect the Xs
+ // to be gone and replaced with a's.
+ BUG_ON(check_region_contains_byte
+ (dest_mapping, num_pages * page_size, 'a') != 0,
+ "pages did not migrate");
+
+ // And the source mapping will have had its ptes dropped.
+ BUG_ON(check_region_contains_byte
+ (source_mapping, num_pages * page_size, 0) != 0,
+ "source should have no ptes");
+
+ BUG_ON(munmap(dest_mapping, num_pages * page_size) == -1,
+ "unable to unmap destination mapping");
+ BUG_ON(munmap(source_mapping, num_pages * page_size) == -1,
+ "unable to unmap source mapping");
+}
+
+// This test validates that we can MREMAP_DONTUNMAP for a portion of an
+// existing mapping.
+static void mremap_dontunmap_partial_mapping()
+{
+ /*
+ * source mapping:
+ * --------------
+ * | aaaaaaaaaa |
+ * --------------
+ * to become:
+ * --------------
+ * | aaaaa00000 |
+ * --------------
+ * With the destination mapping containing 5 pages of As.
+ * ---------
+ * | aaaaa |
+ * ---------
+ */
+ unsigned long num_pages = 10;
+ void *source_mapping =
+ mmap(NULL, num_pages * page_size, PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+ BUG_ON(source_mapping == MAP_FAILED, "mmap");
+ memset(source_mapping, 'a', num_pages * page_size);
+
+ // We will grab the last 5 pages of the source and move them.
+ void *dest_mapping =
+ mremap(source_mapping + (5 * page_size), 5 * page_size,
+ 5 * page_size,
+ MREMAP_DONTUNMAP | MREMAP_MAYMOVE, NULL);
+ BUG_ON(dest_mapping == MAP_FAILED, "mremap");
+
+ // We expect the first 5 pages of the source to contain a's and the
+ // final 5 pages to contain zeros.
+ BUG_ON(check_region_contains_byte(source_mapping, 5 * page_size, 'a') !=
+ 0, "first 5 pages of source should have original pages");
+ BUG_ON(check_region_contains_byte
+ (source_mapping + (5 * page_size), 5 * page_size, 0) != 0,
+ "final 5 pages of source should have no ptes");
+
+ // Finally we expect the destination to have 5 pages worth of a's.
+ BUG_ON(check_region_contains_byte(dest_mapping, 5 * page_size, 'a') !=
+ 0, "dest mapping should contain ptes from the source");
+
+ BUG_ON(munmap(dest_mapping, 5 * page_size) == -1,
+ "unable to unmap destination mapping");
+ BUG_ON(munmap(source_mapping, num_pages * page_size) == -1,
+ "unable to unmap source mapping");
+}
+
+// This test validates that we can remap over only a portion of a mapping.
+static void mremap_dontunmap_partial_mapping_overwrite(void)
+{
+ /*
+ * source mapping:
+ * ---------
+ * |aaaaa|
+ * ---------
+ * dest mapping initially:
+ * -----------
+ * |XXXXXXXXXX|
+ * ------------
+ * Source to become:
+ * ---------
+ * |00000|
+ * ---------
+ * With the destination mapping containing 5 pages of As.
+ * ------------
+ * |aaaaaXXXXX|
+ * ------------
+ */
+ void *source_mapping =
+ mmap(NULL, 5 * page_size, PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+ BUG_ON(source_mapping == MAP_FAILED, "mmap");
+ memset(source_mapping, 'a', 5 * page_size);
+
+ void *dest_mapping =
+ mmap(NULL, 10 * page_size, PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+ BUG_ON(dest_mapping == MAP_FAILED, "mmap");
+ memset(dest_mapping, 'X', 10 * page_size);
+
+ // We will grab the last 5 pages of the source and move them.
+ void *remapped_mapping =
+ mremap(source_mapping, 5 * page_size,
+ 5 * page_size,
+ MREMAP_DONTUNMAP | MREMAP_MAYMOVE | MREMAP_FIXED, dest_mapping);
+ BUG_ON(dest_mapping == MAP_FAILED, "mremap");
+ BUG_ON(dest_mapping != remapped_mapping, "expected to remap to dest_mapping");
+
+ BUG_ON(check_region_contains_byte(source_mapping, 5 * page_size, 0) !=
+ 0, "first 5 pages of source should have no ptes");
+
+ // Finally we expect the destination to have 5 pages worth of a's.
+ BUG_ON(check_region_contains_byte(dest_mapping, 5 * page_size, 'a') != 0,
+ "dest mapping should contain ptes from the source");
+
+ // Finally the last 5 pages shouldn't have been touched.
+ BUG_ON(check_region_contains_byte(dest_mapping + (5 * page_size),
+ 5 * page_size, 'X') != 0,
+ "dest mapping should have retained the last 5 pages");
+
+ BUG_ON(munmap(dest_mapping, 10 * page_size) == -1,
+ "unable to unmap destination mapping");
+ BUG_ON(munmap(source_mapping, 5 * page_size) == -1,
+ "unable to unmap source mapping");
+}
+
+int main(void)
+{
+ page_size = sysconf(_SC_PAGE_SIZE);
+
+ // test for kernel support for MREMAP_DONTUNMAP skipping the test if
+ // not.
+ if (kernel_support_for_mremap_dontunmap() != 0) {
+ printf("No kernel support for MREMAP_DONTUNMAP\n");
+ return KSFT_SKIP;
+ }
+
+ // Keep a page sized buffer around for when we need it.
+ page_buffer =
+ mmap(NULL, page_size, PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+ BUG_ON(page_buffer == MAP_FAILED, "unable to mmap a page.");
+
+ mremap_dontunmap_simple();
+ mremap_dontunmap_simple_fixed();
+ mremap_dontunmap_partial_mapping();
+ mremap_dontunmap_partial_mapping_overwrite();
+
+ BUG_ON(munmap(page_buffer, page_size) == -1,
+ "unable to unmap page buffer");
+
+ printf("OK\n");
+ return 0;
+}
diff --git a/tools/testing/selftests/vm/pkey-helpers.h b/tools/testing/selftests/vm/pkey-helpers.h
new file mode 100644
index 000000000000..622a85848f61
--- /dev/null
+++ b/tools/testing/selftests/vm/pkey-helpers.h
@@ -0,0 +1,225 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _PKEYS_HELPER_H
+#define _PKEYS_HELPER_H
+#define _GNU_SOURCE
+#include <string.h>
+#include <stdarg.h>
+#include <stdio.h>
+#include <stdint.h>
+#include <stdbool.h>
+#include <signal.h>
+#include <assert.h>
+#include <stdlib.h>
+#include <ucontext.h>
+#include <sys/mman.h>
+
+/* Define some kernel-like types */
+#define u8 __u8
+#define u16 __u16
+#define u32 __u32
+#define u64 __u64
+
+#define PTR_ERR_ENOTSUP ((void *)-ENOTSUP)
+
+#ifndef DEBUG_LEVEL
+#define DEBUG_LEVEL 0
+#endif
+#define DPRINT_IN_SIGNAL_BUF_SIZE 4096
+extern int dprint_in_signal;
+extern char dprint_in_signal_buffer[DPRINT_IN_SIGNAL_BUF_SIZE];
+
+extern int test_nr;
+extern int iteration_nr;
+
+#ifdef __GNUC__
+__attribute__((format(printf, 1, 2)))
+#endif
+static inline void sigsafe_printf(const char *format, ...)
+{
+ va_list ap;
+
+ if (!dprint_in_signal) {
+ va_start(ap, format);
+ vprintf(format, ap);
+ va_end(ap);
+ } else {
+ int ret;
+ /*
+ * No printf() functions are signal-safe.
+ * They deadlock easily. Write the format
+ * string to get some output, even if
+ * incomplete.
+ */
+ ret = write(1, format, strlen(format));
+ if (ret < 0)
+ exit(1);
+ }
+}
+#define dprintf_level(level, args...) do { \
+ if (level <= DEBUG_LEVEL) \
+ sigsafe_printf(args); \
+} while (0)
+#define dprintf0(args...) dprintf_level(0, args)
+#define dprintf1(args...) dprintf_level(1, args)
+#define dprintf2(args...) dprintf_level(2, args)
+#define dprintf3(args...) dprintf_level(3, args)
+#define dprintf4(args...) dprintf_level(4, args)
+
+extern void abort_hooks(void);
+#define pkey_assert(condition) do { \
+ if (!(condition)) { \
+ dprintf0("assert() at %s::%d test_nr: %d iteration: %d\n", \
+ __FILE__, __LINE__, \
+ test_nr, iteration_nr); \
+ dprintf0("errno at assert: %d", errno); \
+ abort_hooks(); \
+ exit(__LINE__); \
+ } \
+} while (0)
+
+__attribute__((noinline)) int read_ptr(int *ptr);
+void expected_pkey_fault(int pkey);
+int sys_pkey_alloc(unsigned long flags, unsigned long init_val);
+int sys_pkey_free(unsigned long pkey);
+int mprotect_pkey(void *ptr, size_t size, unsigned long orig_prot,
+ unsigned long pkey);
+void record_pkey_malloc(void *ptr, long size, int prot);
+
+#if defined(__i386__) || defined(__x86_64__) /* arch */
+#include "pkey-x86.h"
+#elif defined(__powerpc64__) /* arch */
+#include "pkey-powerpc.h"
+#else /* arch */
+#error Architecture not supported
+#endif /* arch */
+
+#define PKEY_MASK (PKEY_DISABLE_ACCESS | PKEY_DISABLE_WRITE)
+
+static inline u64 set_pkey_bits(u64 reg, int pkey, u64 flags)
+{
+ u32 shift = pkey_bit_position(pkey);
+ /* mask out bits from pkey in old value */
+ reg &= ~((u64)PKEY_MASK << shift);
+ /* OR in new bits for pkey */
+ reg |= (flags & PKEY_MASK) << shift;
+ return reg;
+}
+
+static inline u64 get_pkey_bits(u64 reg, int pkey)
+{
+ u32 shift = pkey_bit_position(pkey);
+ /*
+ * shift down the relevant bits to the lowest two, then
+ * mask off all the other higher bits
+ */
+ return ((reg >> shift) & PKEY_MASK);
+}
+
+extern u64 shadow_pkey_reg;
+
+static inline u64 _read_pkey_reg(int line)
+{
+ u64 pkey_reg = __read_pkey_reg();
+
+ dprintf4("read_pkey_reg(line=%d) pkey_reg: %016llx"
+ " shadow: %016llx\n",
+ line, pkey_reg, shadow_pkey_reg);
+ assert(pkey_reg == shadow_pkey_reg);
+
+ return pkey_reg;
+}
+
+#define read_pkey_reg() _read_pkey_reg(__LINE__)
+
+static inline void write_pkey_reg(u64 pkey_reg)
+{
+ dprintf4("%s() changing %016llx to %016llx\n", __func__,
+ __read_pkey_reg(), pkey_reg);
+ /* will do the shadow check for us: */
+ read_pkey_reg();
+ __write_pkey_reg(pkey_reg);
+ shadow_pkey_reg = pkey_reg;
+ dprintf4("%s(%016llx) pkey_reg: %016llx\n", __func__,
+ pkey_reg, __read_pkey_reg());
+}
+
+/*
+ * These are technically racy. since something could
+ * change PKEY register between the read and the write.
+ */
+static inline void __pkey_access_allow(int pkey, int do_allow)
+{
+ u64 pkey_reg = read_pkey_reg();
+ int bit = pkey * 2;
+
+ if (do_allow)
+ pkey_reg &= (1<<bit);
+ else
+ pkey_reg |= (1<<bit);
+
+ dprintf4("pkey_reg now: %016llx\n", read_pkey_reg());
+ write_pkey_reg(pkey_reg);
+}
+
+static inline void __pkey_write_allow(int pkey, int do_allow_write)
+{
+ u64 pkey_reg = read_pkey_reg();
+ int bit = pkey * 2 + 1;
+
+ if (do_allow_write)
+ pkey_reg &= (1<<bit);
+ else
+ pkey_reg |= (1<<bit);
+
+ write_pkey_reg(pkey_reg);
+ dprintf4("pkey_reg now: %016llx\n", read_pkey_reg());
+}
+
+#define ARRAY_SIZE(x) (sizeof(x) / sizeof(*(x)))
+#define ALIGN_UP(x, align_to) (((x) + ((align_to)-1)) & ~((align_to)-1))
+#define ALIGN_DOWN(x, align_to) ((x) & ~((align_to)-1))
+#define ALIGN_PTR_UP(p, ptr_align_to) \
+ ((typeof(p))ALIGN_UP((unsigned long)(p), ptr_align_to))
+#define ALIGN_PTR_DOWN(p, ptr_align_to) \
+ ((typeof(p))ALIGN_DOWN((unsigned long)(p), ptr_align_to))
+#define __stringify_1(x...) #x
+#define __stringify(x...) __stringify_1(x)
+
+static inline u32 *siginfo_get_pkey_ptr(siginfo_t *si)
+{
+#ifdef si_pkey
+ return &si->si_pkey;
+#else
+ return (u32 *)(((u8 *)si) + si_pkey_offset);
+#endif
+}
+
+static inline int kernel_has_pkeys(void)
+{
+ /* try allocating a key and see if it succeeds */
+ int ret = sys_pkey_alloc(0, 0);
+ if (ret <= 0) {
+ return 0;
+ }
+ sys_pkey_free(ret);
+ return 1;
+}
+
+static inline int is_pkeys_supported(void)
+{
+ /* check if the cpu supports pkeys */
+ if (!cpu_has_pkeys()) {
+ dprintf1("SKIP: %s: no CPU support\n", __func__);
+ return 0;
+ }
+
+ /* check if the kernel supports pkeys */
+ if (!kernel_has_pkeys()) {
+ dprintf1("SKIP: %s: no kernel support\n", __func__);
+ return 0;
+ }
+
+ return 1;
+}
+
+#endif /* _PKEYS_HELPER_H */
diff --git a/tools/testing/selftests/vm/pkey-powerpc.h b/tools/testing/selftests/vm/pkey-powerpc.h
new file mode 100644
index 000000000000..1ebb586b2fbc
--- /dev/null
+++ b/tools/testing/selftests/vm/pkey-powerpc.h
@@ -0,0 +1,133 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef _PKEYS_POWERPC_H
+#define _PKEYS_POWERPC_H
+
+#ifndef SYS_mprotect_key
+# define SYS_mprotect_key 386
+#endif
+#ifndef SYS_pkey_alloc
+# define SYS_pkey_alloc 384
+# define SYS_pkey_free 385
+#endif
+#define REG_IP_IDX PT_NIP
+#define REG_TRAPNO PT_TRAP
+#define gregs gp_regs
+#define fpregs fp_regs
+#define si_pkey_offset 0x20
+
+#undef PKEY_DISABLE_ACCESS
+#define PKEY_DISABLE_ACCESS 0x3 /* disable read and write */
+
+#undef PKEY_DISABLE_WRITE
+#define PKEY_DISABLE_WRITE 0x2
+
+#define NR_PKEYS 32
+#define NR_RESERVED_PKEYS_4K 27 /* pkey-0, pkey-1, exec-only-pkey
+ and 24 other keys that cannot be
+ represented in the PTE */
+#define NR_RESERVED_PKEYS_64K_3KEYS 3 /* PowerNV and KVM: pkey-0,
+ pkey-1 and exec-only key */
+#define NR_RESERVED_PKEYS_64K_4KEYS 4 /* PowerVM: pkey-0, pkey-1,
+ pkey-31 and exec-only key */
+#define PKEY_BITS_PER_PKEY 2
+#define HPAGE_SIZE (1UL << 24)
+#define PAGE_SIZE sysconf(_SC_PAGESIZE)
+
+static inline u32 pkey_bit_position(int pkey)
+{
+ return (NR_PKEYS - pkey - 1) * PKEY_BITS_PER_PKEY;
+}
+
+static inline u64 __read_pkey_reg(void)
+{
+ u64 pkey_reg;
+
+ asm volatile("mfspr %0, 0xd" : "=r" (pkey_reg));
+
+ return pkey_reg;
+}
+
+static inline void __write_pkey_reg(u64 pkey_reg)
+{
+ u64 amr = pkey_reg;
+
+ dprintf4("%s() changing %016llx to %016llx\n",
+ __func__, __read_pkey_reg(), pkey_reg);
+
+ asm volatile("isync; mtspr 0xd, %0; isync"
+ : : "r" ((unsigned long)(amr)) : "memory");
+
+ dprintf4("%s() pkey register after changing %016llx to %016llx\n",
+ __func__, __read_pkey_reg(), pkey_reg);
+}
+
+static inline int cpu_has_pkeys(void)
+{
+ /* No simple way to determine this */
+ return 1;
+}
+
+static inline bool arch_is_powervm()
+{
+ struct stat buf;
+
+ if ((stat("/sys/firmware/devicetree/base/ibm,partition-name", &buf) == 0) &&
+ (stat("/sys/firmware/devicetree/base/hmc-managed?", &buf) == 0) &&
+ (stat("/sys/firmware/devicetree/base/chosen/qemu,graphic-width", &buf) == -1) )
+ return true;
+
+ return false;
+}
+
+static inline int get_arch_reserved_keys(void)
+{
+ if (sysconf(_SC_PAGESIZE) == 4096)
+ return NR_RESERVED_PKEYS_4K;
+ else
+ if (arch_is_powervm())
+ return NR_RESERVED_PKEYS_64K_4KEYS;
+ else
+ return NR_RESERVED_PKEYS_64K_3KEYS;
+}
+
+void expect_fault_on_read_execonly_key(void *p1, int pkey)
+{
+ /*
+ * powerpc does not allow userspace to change permissions of exec-only
+ * keys since those keys are not allocated by userspace. The signal
+ * handler wont be able to reset the permissions, which means the code
+ * will infinitely continue to segfault here.
+ */
+ return;
+}
+
+/* 4-byte instructions * 16384 = 64K page */
+#define __page_o_noops() asm(".rept 16384 ; nop; .endr")
+
+void *malloc_pkey_with_mprotect_subpage(long size, int prot, u16 pkey)
+{
+ void *ptr;
+ int ret;
+
+ dprintf1("doing %s(size=%ld, prot=0x%x, pkey=%d)\n", __func__,
+ size, prot, pkey);
+ pkey_assert(pkey < NR_PKEYS);
+ ptr = mmap(NULL, size, prot, MAP_ANONYMOUS|MAP_PRIVATE, -1, 0);
+ pkey_assert(ptr != (void *)-1);
+
+ ret = syscall(__NR_subpage_prot, ptr, size, NULL);
+ if (ret) {
+ perror("subpage_perm");
+ return PTR_ERR_ENOTSUP;
+ }
+
+ ret = mprotect_pkey((void *)ptr, PAGE_SIZE, prot, pkey);
+ pkey_assert(!ret);
+ record_pkey_malloc(ptr, size, prot);
+
+ dprintf1("%s() for pkey %d @ %p\n", __func__, pkey, ptr);
+ return ptr;
+}
+
+#endif /* _PKEYS_POWERPC_H */
diff --git a/tools/testing/selftests/vm/pkey-x86.h b/tools/testing/selftests/vm/pkey-x86.h
new file mode 100644
index 000000000000..3be20f5d5275
--- /dev/null
+++ b/tools/testing/selftests/vm/pkey-x86.h
@@ -0,0 +1,181 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef _PKEYS_X86_H
+#define _PKEYS_X86_H
+
+#ifdef __i386__
+
+#ifndef SYS_mprotect_key
+# define SYS_mprotect_key 380
+#endif
+
+#ifndef SYS_pkey_alloc
+# define SYS_pkey_alloc 381
+# define SYS_pkey_free 382
+#endif
+
+#define REG_IP_IDX REG_EIP
+#define si_pkey_offset 0x14
+
+#else
+
+#ifndef SYS_mprotect_key
+# define SYS_mprotect_key 329
+#endif
+
+#ifndef SYS_pkey_alloc
+# define SYS_pkey_alloc 330
+# define SYS_pkey_free 331
+#endif
+
+#define REG_IP_IDX REG_RIP
+#define si_pkey_offset 0x20
+
+#endif
+
+#ifndef PKEY_DISABLE_ACCESS
+# define PKEY_DISABLE_ACCESS 0x1
+#endif
+
+#ifndef PKEY_DISABLE_WRITE
+# define PKEY_DISABLE_WRITE 0x2
+#endif
+
+#define NR_PKEYS 16
+#define NR_RESERVED_PKEYS 2 /* pkey-0 and exec-only-pkey */
+#define PKEY_BITS_PER_PKEY 2
+#define HPAGE_SIZE (1UL<<21)
+#define PAGE_SIZE 4096
+#define MB (1<<20)
+
+static inline void __page_o_noops(void)
+{
+ /* 8-bytes of instruction * 512 bytes = 1 page */
+ asm(".rept 512 ; nopl 0x7eeeeeee(%eax) ; .endr");
+}
+
+static inline u64 __read_pkey_reg(void)
+{
+ unsigned int eax, edx;
+ unsigned int ecx = 0;
+ unsigned pkey_reg;
+
+ asm volatile(".byte 0x0f,0x01,0xee\n\t"
+ : "=a" (eax), "=d" (edx)
+ : "c" (ecx));
+ pkey_reg = eax;
+ return pkey_reg;
+}
+
+static inline void __write_pkey_reg(u64 pkey_reg)
+{
+ unsigned int eax = pkey_reg;
+ unsigned int ecx = 0;
+ unsigned int edx = 0;
+
+ dprintf4("%s() changing %016llx to %016llx\n", __func__,
+ __read_pkey_reg(), pkey_reg);
+ asm volatile(".byte 0x0f,0x01,0xef\n\t"
+ : : "a" (eax), "c" (ecx), "d" (edx));
+ assert(pkey_reg == __read_pkey_reg());
+}
+
+static inline void __cpuid(unsigned int *eax, unsigned int *ebx,
+ unsigned int *ecx, unsigned int *edx)
+{
+ /* ecx is often an input as well as an output. */
+ asm volatile(
+ "cpuid;"
+ : "=a" (*eax),
+ "=b" (*ebx),
+ "=c" (*ecx),
+ "=d" (*edx)
+ : "0" (*eax), "2" (*ecx));
+}
+
+/* Intel-defined CPU features, CPUID level 0x00000007:0 (ecx) */
+#define X86_FEATURE_PKU (1<<3) /* Protection Keys for Userspace */
+#define X86_FEATURE_OSPKE (1<<4) /* OS Protection Keys Enable */
+
+static inline int cpu_has_pkeys(void)
+{
+ unsigned int eax;
+ unsigned int ebx;
+ unsigned int ecx;
+ unsigned int edx;
+
+ eax = 0x7;
+ ecx = 0x0;
+ __cpuid(&eax, &ebx, &ecx, &edx);
+
+ if (!(ecx & X86_FEATURE_PKU)) {
+ dprintf2("cpu does not have PKU\n");
+ return 0;
+ }
+ if (!(ecx & X86_FEATURE_OSPKE)) {
+ dprintf2("cpu does not have OSPKE\n");
+ return 0;
+ }
+ return 1;
+}
+
+static inline u32 pkey_bit_position(int pkey)
+{
+ return pkey * PKEY_BITS_PER_PKEY;
+}
+
+#define XSTATE_PKEY_BIT (9)
+#define XSTATE_PKEY 0x200
+
+int pkey_reg_xstate_offset(void)
+{
+ unsigned int eax;
+ unsigned int ebx;
+ unsigned int ecx;
+ unsigned int edx;
+ int xstate_offset;
+ int xstate_size;
+ unsigned long XSTATE_CPUID = 0xd;
+ int leaf;
+
+ /* assume that XSTATE_PKEY is set in XCR0 */
+ leaf = XSTATE_PKEY_BIT;
+ {
+ eax = XSTATE_CPUID;
+ ecx = leaf;
+ __cpuid(&eax, &ebx, &ecx, &edx);
+
+ if (leaf == XSTATE_PKEY_BIT) {
+ xstate_offset = ebx;
+ xstate_size = eax;
+ }
+ }
+
+ if (xstate_size == 0) {
+ printf("could not find size/offset of PKEY in xsave state\n");
+ return 0;
+ }
+
+ return xstate_offset;
+}
+
+static inline int get_arch_reserved_keys(void)
+{
+ return NR_RESERVED_PKEYS;
+}
+
+void expect_fault_on_read_execonly_key(void *p1, int pkey)
+{
+ int ptr_contents;
+
+ ptr_contents = read_ptr(p1);
+ dprintf2("ptr (%p) contents@%d: %x\n", p1, __LINE__, ptr_contents);
+ expected_pkey_fault(pkey);
+}
+
+void *malloc_pkey_with_mprotect_subpage(long size, int prot, u16 pkey)
+{
+ return PTR_ERR_ENOTSUP;
+}
+
+#endif /* _PKEYS_X86_H */
diff --git a/tools/testing/selftests/x86/protection_keys.c b/tools/testing/selftests/vm/protection_keys.c
index 480995bceefa..fc19addcb5c8 100644
--- a/tools/testing/selftests/x86/protection_keys.c
+++ b/tools/testing/selftests/vm/protection_keys.c
@@ -1,11 +1,11 @@
// SPDX-License-Identifier: GPL-2.0
/*
- * Tests x86 Memory Protection Keys (see Documentation/core-api/protection-keys.rst)
+ * Tests Memory Protection Keys (see Documentation/vm/protection-keys.txt)
*
* There are examples in here of:
* * how to set protection keys on memory
- * * how to set/clear bits in PKRU (the rights register)
- * * how to handle SEGV_PKRU signals and extract pkey-relevant
+ * * how to set/clear bits in pkey registers (the rights register)
+ * * how to handle SEGV_PKUERR signals and extract pkey-relevant
* information from the siginfo
*
* Things to add:
@@ -22,8 +22,10 @@
* gcc -m32 -o protection_keys_32 -O2 -g -std=gnu99 -pthread -Wall protection_keys.c -lrt -ldl -lm
*/
#define _GNU_SOURCE
+#define __SANE_USERSPACE_TYPES__
#include <errno.h>
#include <linux/futex.h>
+#include <time.h>
#include <sys/time.h>
#include <sys/syscall.h>
#include <string.h>
@@ -48,34 +50,10 @@
int iteration_nr = 1;
int test_nr;
-unsigned int shadow_pkru;
-
-#define HPAGE_SIZE (1UL<<21)
-#define ARRAY_SIZE(x) (sizeof(x) / sizeof(*(x)))
-#define ALIGN_UP(x, align_to) (((x) + ((align_to)-1)) & ~((align_to)-1))
-#define ALIGN_DOWN(x, align_to) ((x) & ~((align_to)-1))
-#define ALIGN_PTR_UP(p, ptr_align_to) ((typeof(p))ALIGN_UP((unsigned long)(p), ptr_align_to))
-#define ALIGN_PTR_DOWN(p, ptr_align_to) ((typeof(p))ALIGN_DOWN((unsigned long)(p), ptr_align_to))
-#define __stringify_1(x...) #x
-#define __stringify(x...) __stringify_1(x)
-
-#define PTR_ERR_ENOTSUP ((void *)-ENOTSUP)
-
+u64 shadow_pkey_reg;
int dprint_in_signal;
char dprint_in_signal_buffer[DPRINT_IN_SIGNAL_BUF_SIZE];
-extern void abort_hooks(void);
-#define pkey_assert(condition) do { \
- if (!(condition)) { \
- dprintf0("assert() at %s::%d test_nr: %d iteration: %d\n", \
- __FILE__, __LINE__, \
- test_nr, iteration_nr); \
- dprintf0("errno at assert: %d", errno); \
- abort_hooks(); \
- exit(__LINE__); \
- } \
-} while (0)
-
void cat_into_file(char *str, char *file)
{
int fd = open(file, O_RDWR);
@@ -158,12 +136,6 @@ void abort_hooks(void)
#endif
}
-static inline void __page_o_noops(void)
-{
- /* 8-bytes of instruction * 512 bytes = 1 page */
- asm(".rept 512 ; nopl 0x7eeeeeee(%eax) ; .endr");
-}
-
/*
* This attempts to have roughly a page of instructions followed by a few
* instructions that do a write, and another page of instructions. That
@@ -174,7 +146,12 @@ static inline void __page_o_noops(void)
* will then fault, which makes sure that the fault code handles
* execute-only memory properly.
*/
+#ifdef __powerpc64__
+/* This way, both 4K and 64K alignment are maintained */
+__attribute__((__aligned__(65536)))
+#else
__attribute__((__aligned__(PAGE_SIZE)))
+#endif
void lots_o_noops_around_write(int *write_to_me)
{
dprintf3("running %s()\n", __func__);
@@ -186,51 +163,134 @@ void lots_o_noops_around_write(int *write_to_me)
dprintf3("%s() done\n", __func__);
}
-/* Define some kernel-like types */
-#define u8 uint8_t
-#define u16 uint16_t
-#define u32 uint32_t
-#define u64 uint64_t
+void dump_mem(void *dumpme, int len_bytes)
+{
+ char *c = (void *)dumpme;
+ int i;
-#ifdef __i386__
+ for (i = 0; i < len_bytes; i += sizeof(u64)) {
+ u64 *ptr = (u64 *)(c + i);
+ dprintf1("dump[%03d][@%p]: %016llx\n", i, ptr, *ptr);
+ }
+}
-#ifndef SYS_mprotect_key
-# define SYS_mprotect_key 380
-#endif
+static u32 hw_pkey_get(int pkey, unsigned long flags)
+{
+ u64 pkey_reg = __read_pkey_reg();
-#ifndef SYS_pkey_alloc
-# define SYS_pkey_alloc 381
-# define SYS_pkey_free 382
-#endif
+ dprintf1("%s(pkey=%d, flags=%lx) = %x / %d\n",
+ __func__, pkey, flags, 0, 0);
+ dprintf2("%s() raw pkey_reg: %016llx\n", __func__, pkey_reg);
-#define REG_IP_IDX REG_EIP
-#define si_pkey_offset 0x14
+ return (u32) get_pkey_bits(pkey_reg, pkey);
+}
-#else
+static int hw_pkey_set(int pkey, unsigned long rights, unsigned long flags)
+{
+ u32 mask = (PKEY_DISABLE_ACCESS|PKEY_DISABLE_WRITE);
+ u64 old_pkey_reg = __read_pkey_reg();
+ u64 new_pkey_reg;
-#ifndef SYS_mprotect_key
-# define SYS_mprotect_key 329
-#endif
+ /* make sure that 'rights' only contains the bits we expect: */
+ assert(!(rights & ~mask));
-#ifndef SYS_pkey_alloc
-# define SYS_pkey_alloc 330
-# define SYS_pkey_free 331
-#endif
+ /* modify bits accordingly in old pkey_reg and assign it */
+ new_pkey_reg = set_pkey_bits(old_pkey_reg, pkey, rights);
-#define REG_IP_IDX REG_RIP
-#define si_pkey_offset 0x20
+ __write_pkey_reg(new_pkey_reg);
-#endif
+ dprintf3("%s(pkey=%d, rights=%lx, flags=%lx) = %x"
+ " pkey_reg now: %016llx old_pkey_reg: %016llx\n",
+ __func__, pkey, rights, flags, 0, __read_pkey_reg(),
+ old_pkey_reg);
+ return 0;
+}
-void dump_mem(void *dumpme, int len_bytes)
+void pkey_disable_set(int pkey, int flags)
{
- char *c = (void *)dumpme;
- int i;
+ unsigned long syscall_flags = 0;
+ int ret;
+ int pkey_rights;
+ u64 orig_pkey_reg = read_pkey_reg();
- for (i = 0; i < len_bytes; i += sizeof(u64)) {
- u64 *ptr = (u64 *)(c + i);
- dprintf1("dump[%03d][@%p]: %016jx\n", i, ptr, *ptr);
- }
+ dprintf1("START->%s(%d, 0x%x)\n", __func__,
+ pkey, flags);
+ pkey_assert(flags & (PKEY_DISABLE_ACCESS | PKEY_DISABLE_WRITE));
+
+ pkey_rights = hw_pkey_get(pkey, syscall_flags);
+
+ dprintf1("%s(%d) hw_pkey_get(%d): %x\n", __func__,
+ pkey, pkey, pkey_rights);
+
+ pkey_assert(pkey_rights >= 0);
+
+ pkey_rights |= flags;
+
+ ret = hw_pkey_set(pkey, pkey_rights, syscall_flags);
+ assert(!ret);
+ /* pkey_reg and flags have the same format */
+ shadow_pkey_reg = set_pkey_bits(shadow_pkey_reg, pkey, pkey_rights);
+ dprintf1("%s(%d) shadow: 0x%016llx\n",
+ __func__, pkey, shadow_pkey_reg);
+
+ pkey_assert(ret >= 0);
+
+ pkey_rights = hw_pkey_get(pkey, syscall_flags);
+ dprintf1("%s(%d) hw_pkey_get(%d): %x\n", __func__,
+ pkey, pkey, pkey_rights);
+
+ dprintf1("%s(%d) pkey_reg: 0x%016llx\n",
+ __func__, pkey, read_pkey_reg());
+ if (flags)
+ pkey_assert(read_pkey_reg() >= orig_pkey_reg);
+ dprintf1("END<---%s(%d, 0x%x)\n", __func__,
+ pkey, flags);
+}
+
+void pkey_disable_clear(int pkey, int flags)
+{
+ unsigned long syscall_flags = 0;
+ int ret;
+ int pkey_rights = hw_pkey_get(pkey, syscall_flags);
+ u64 orig_pkey_reg = read_pkey_reg();
+
+ pkey_assert(flags & (PKEY_DISABLE_ACCESS | PKEY_DISABLE_WRITE));
+
+ dprintf1("%s(%d) hw_pkey_get(%d): %x\n", __func__,
+ pkey, pkey, pkey_rights);
+ pkey_assert(pkey_rights >= 0);
+
+ pkey_rights &= ~flags;
+
+ ret = hw_pkey_set(pkey, pkey_rights, 0);
+ shadow_pkey_reg = set_pkey_bits(shadow_pkey_reg, pkey, pkey_rights);
+ pkey_assert(ret >= 0);
+
+ pkey_rights = hw_pkey_get(pkey, syscall_flags);
+ dprintf1("%s(%d) hw_pkey_get(%d): %x\n", __func__,
+ pkey, pkey, pkey_rights);
+
+ dprintf1("%s(%d) pkey_reg: 0x%016llx\n", __func__,
+ pkey, read_pkey_reg());
+ if (flags)
+ assert(read_pkey_reg() <= orig_pkey_reg);
+}
+
+void pkey_write_allow(int pkey)
+{
+ pkey_disable_clear(pkey, PKEY_DISABLE_WRITE);
+}
+void pkey_write_deny(int pkey)
+{
+ pkey_disable_set(pkey, PKEY_DISABLE_WRITE);
+}
+void pkey_access_allow(int pkey)
+{
+ pkey_disable_clear(pkey, PKEY_DISABLE_ACCESS);
+}
+void pkey_access_deny(int pkey)
+{
+ pkey_disable_set(pkey, PKEY_DISABLE_ACCESS);
}
/* Failed address bound checks: */
@@ -255,7 +315,7 @@ static char *si_code_str(int si_code)
return "UNKNOWN";
}
-int pkru_faults;
+int pkey_faults;
int last_si_pkey = -1;
void signal_handler(int signum, siginfo_t *si, void *vucontext)
{
@@ -263,24 +323,28 @@ void signal_handler(int signum, siginfo_t *si, void *vucontext)
int trapno;
unsigned long ip;
char *fpregs;
- u32 *pkru_ptr;
+#if defined(__i386__) || defined(__x86_64__) /* arch */
+ u32 *pkey_reg_ptr;
+ int pkey_reg_offset;
+#endif /* arch */
u64 siginfo_pkey;
u32 *si_pkey_ptr;
- int pkru_offset;
- fpregset_t fpregset;
dprint_in_signal = 1;
dprintf1(">>>>===============SIGSEGV============================\n");
- dprintf1("%s()::%d, pkru: 0x%x shadow: %x\n", __func__, __LINE__,
- __rdpkru(), shadow_pkru);
+ dprintf1("%s()::%d, pkey_reg: 0x%016llx shadow: %016llx\n",
+ __func__, __LINE__,
+ __read_pkey_reg(), shadow_pkey_reg);
trapno = uctxt->uc_mcontext.gregs[REG_TRAPNO];
ip = uctxt->uc_mcontext.gregs[REG_IP_IDX];
- fpregset = uctxt->uc_mcontext.fpregs;
- fpregs = (void *)fpregset;
+ fpregs = (char *) uctxt->uc_mcontext.fpregs;
- dprintf2("%s() trapno: %d ip: 0x%lx info->si_code: %s/%d\n", __func__,
- trapno, ip, si_code_str(si->si_code), si->si_code);
+ dprintf2("%s() trapno: %d ip: 0x%016lx info->si_code: %s/%d\n",
+ __func__, trapno, ip, si_code_str(si->si_code),
+ si->si_code);
+
+#if defined(__i386__) || defined(__x86_64__) /* arch */
#ifdef __i386__
/*
* 32-bit has some extra padding so that userspace can tell whether
@@ -288,20 +352,22 @@ void signal_handler(int signum, siginfo_t *si, void *vucontext)
* state. We just assume that it is here.
*/
fpregs += 0x70;
-#endif
- pkru_offset = pkru_xstate_offset();
- pkru_ptr = (void *)(&fpregs[pkru_offset]);
+#endif /* i386 */
+ pkey_reg_offset = pkey_reg_xstate_offset();
+ pkey_reg_ptr = (void *)(&fpregs[pkey_reg_offset]);
- dprintf1("siginfo: %p\n", si);
- dprintf1(" fpregs: %p\n", fpregs);
/*
- * If we got a PKRU fault, we *HAVE* to have at least one bit set in
+ * If we got a PKEY fault, we *HAVE* to have at least one bit set in
* here.
*/
- dprintf1("pkru_xstate_offset: %d\n", pkru_xstate_offset());
+ dprintf1("pkey_reg_xstate_offset: %d\n", pkey_reg_xstate_offset());
if (DEBUG_LEVEL > 4)
- dump_mem(pkru_ptr - 128, 256);
- pkey_assert(*pkru_ptr);
+ dump_mem(pkey_reg_ptr - 128, 256);
+ pkey_assert(*pkey_reg_ptr);
+#endif /* arch */
+
+ dprintf1("siginfo: %p\n", si);
+ dprintf1(" fpregs: %p\n", fpregs);
if ((si->si_code == SEGV_MAPERR) ||
(si->si_code == SEGV_ACCERR) ||
@@ -310,20 +376,29 @@ void signal_handler(int signum, siginfo_t *si, void *vucontext)
exit(4);
}
- si_pkey_ptr = (u32 *)(((u8 *)si) + si_pkey_offset);
+ si_pkey_ptr = siginfo_get_pkey_ptr(si);
dprintf1("si_pkey_ptr: %p\n", si_pkey_ptr);
dump_mem((u8 *)si_pkey_ptr - 8, 24);
siginfo_pkey = *si_pkey_ptr;
pkey_assert(siginfo_pkey < NR_PKEYS);
last_si_pkey = siginfo_pkey;
- dprintf1("signal pkru from xsave: %08x\n", *pkru_ptr);
- /* need __rdpkru() version so we do not do shadow_pkru checking */
- dprintf1("signal pkru from pkru: %08x\n", __rdpkru());
- dprintf1("pkey from siginfo: %jx\n", siginfo_pkey);
- *(u64 *)pkru_ptr = 0x00000000;
- dprintf1("WARNING: set PRKU=0 to allow faulting instruction to continue\n");
- pkru_faults++;
+ /*
+ * need __read_pkey_reg() version so we do not do shadow_pkey_reg
+ * checking
+ */
+ dprintf1("signal pkey_reg from pkey_reg: %016llx\n",
+ __read_pkey_reg());
+ dprintf1("pkey from siginfo: %016llx\n", siginfo_pkey);
+#if defined(__i386__) || defined(__x86_64__) /* arch */
+ dprintf1("signal pkey_reg from xsave: %08x\n", *pkey_reg_ptr);
+ *(u64 *)pkey_reg_ptr = 0x00000000;
+ dprintf1("WARNING: set PKEY_REG=0 to allow faulting instruction to continue\n");
+#elif defined(__powerpc64__) /* arch */
+ /* restore access and let the faulting instruction continue */
+ pkey_access_allow(siginfo_pkey);
+#endif /* arch */
+ pkey_faults++;
dprintf1("<<<<==================================================\n");
dprint_in_signal = 0;
}
@@ -391,143 +466,6 @@ pid_t fork_lazy_child(void)
return forkret;
}
-#ifndef PKEY_DISABLE_ACCESS
-# define PKEY_DISABLE_ACCESS 0x1
-#endif
-
-#ifndef PKEY_DISABLE_WRITE
-# define PKEY_DISABLE_WRITE 0x2
-#endif
-
-static u32 hw_pkey_get(int pkey, unsigned long flags)
-{
- u32 mask = (PKEY_DISABLE_ACCESS|PKEY_DISABLE_WRITE);
- u32 pkru = __rdpkru();
- u32 shifted_pkru;
- u32 masked_pkru;
-
- dprintf1("%s(pkey=%d, flags=%lx) = %x / %d\n",
- __func__, pkey, flags, 0, 0);
- dprintf2("%s() raw pkru: %x\n", __func__, pkru);
-
- shifted_pkru = (pkru >> (pkey * PKRU_BITS_PER_PKEY));
- dprintf2("%s() shifted_pkru: %x\n", __func__, shifted_pkru);
- masked_pkru = shifted_pkru & mask;
- dprintf2("%s() masked pkru: %x\n", __func__, masked_pkru);
- /*
- * shift down the relevant bits to the lowest two, then
- * mask off all the other high bits.
- */
- return masked_pkru;
-}
-
-static int hw_pkey_set(int pkey, unsigned long rights, unsigned long flags)
-{
- u32 mask = (PKEY_DISABLE_ACCESS|PKEY_DISABLE_WRITE);
- u32 old_pkru = __rdpkru();
- u32 new_pkru;
-
- /* make sure that 'rights' only contains the bits we expect: */
- assert(!(rights & ~mask));
-
- /* copy old pkru */
- new_pkru = old_pkru;
- /* mask out bits from pkey in old value: */
- new_pkru &= ~(mask << (pkey * PKRU_BITS_PER_PKEY));
- /* OR in new bits for pkey: */
- new_pkru |= (rights << (pkey * PKRU_BITS_PER_PKEY));
-
- __wrpkru(new_pkru);
-
- dprintf3("%s(pkey=%d, rights=%lx, flags=%lx) = %x pkru now: %x old_pkru: %x\n",
- __func__, pkey, rights, flags, 0, __rdpkru(), old_pkru);
- return 0;
-}
-
-void pkey_disable_set(int pkey, int flags)
-{
- unsigned long syscall_flags = 0;
- int ret;
- int pkey_rights;
- u32 orig_pkru = rdpkru();
-
- dprintf1("START->%s(%d, 0x%x)\n", __func__,
- pkey, flags);
- pkey_assert(flags & (PKEY_DISABLE_ACCESS | PKEY_DISABLE_WRITE));
-
- pkey_rights = hw_pkey_get(pkey, syscall_flags);
-
- dprintf1("%s(%d) hw_pkey_get(%d): %x\n", __func__,
- pkey, pkey, pkey_rights);
- pkey_assert(pkey_rights >= 0);
-
- pkey_rights |= flags;
-
- ret = hw_pkey_set(pkey, pkey_rights, syscall_flags);
- assert(!ret);
- /*pkru and flags have the same format */
- shadow_pkru |= flags << (pkey * 2);
- dprintf1("%s(%d) shadow: 0x%x\n", __func__, pkey, shadow_pkru);
-
- pkey_assert(ret >= 0);
-
- pkey_rights = hw_pkey_get(pkey, syscall_flags);
- dprintf1("%s(%d) hw_pkey_get(%d): %x\n", __func__,
- pkey, pkey, pkey_rights);
-
- dprintf1("%s(%d) pkru: 0x%x\n", __func__, pkey, rdpkru());
- if (flags)
- pkey_assert(rdpkru() > orig_pkru);
- dprintf1("END<---%s(%d, 0x%x)\n", __func__,
- pkey, flags);
-}
-
-void pkey_disable_clear(int pkey, int flags)
-{
- unsigned long syscall_flags = 0;
- int ret;
- int pkey_rights = hw_pkey_get(pkey, syscall_flags);
- u32 orig_pkru = rdpkru();
-
- pkey_assert(flags & (PKEY_DISABLE_ACCESS | PKEY_DISABLE_WRITE));
-
- dprintf1("%s(%d) hw_pkey_get(%d): %x\n", __func__,
- pkey, pkey, pkey_rights);
- pkey_assert(pkey_rights >= 0);
-
- pkey_rights |= flags;
-
- ret = hw_pkey_set(pkey, pkey_rights, 0);
- /* pkru and flags have the same format */
- shadow_pkru &= ~(flags << (pkey * 2));
- pkey_assert(ret >= 0);
-
- pkey_rights = hw_pkey_get(pkey, syscall_flags);
- dprintf1("%s(%d) hw_pkey_get(%d): %x\n", __func__,
- pkey, pkey, pkey_rights);
-
- dprintf1("%s(%d) pkru: 0x%x\n", __func__, pkey, rdpkru());
- if (flags)
- assert(rdpkru() > orig_pkru);
-}
-
-void pkey_write_allow(int pkey)
-{
- pkey_disable_clear(pkey, PKEY_DISABLE_WRITE);
-}
-void pkey_write_deny(int pkey)
-{
- pkey_disable_set(pkey, PKEY_DISABLE_WRITE);
-}
-void pkey_access_allow(int pkey)
-{
- pkey_disable_clear(pkey, PKEY_DISABLE_ACCESS);
-}
-void pkey_access_deny(int pkey)
-{
- pkey_disable_set(pkey, PKEY_DISABLE_ACCESS);
-}
-
int sys_mprotect_pkey(void *ptr, size_t size, unsigned long orig_prot,
unsigned long pkey)
{
@@ -561,33 +499,44 @@ int alloc_pkey(void)
int ret;
unsigned long init_val = 0x0;
- dprintf1("alloc_pkey()::%d, pkru: 0x%x shadow: %x\n",
- __LINE__, __rdpkru(), shadow_pkru);
+ dprintf1("%s()::%d, pkey_reg: 0x%016llx shadow: %016llx\n",
+ __func__, __LINE__, __read_pkey_reg(), shadow_pkey_reg);
ret = sys_pkey_alloc(0, init_val);
/*
- * pkey_alloc() sets PKRU, so we need to reflect it in
- * shadow_pkru:
+ * pkey_alloc() sets PKEY register, so we need to reflect it in
+ * shadow_pkey_reg:
*/
- dprintf4("alloc_pkey()::%d, ret: %d pkru: 0x%x shadow: 0x%x\n",
- __LINE__, ret, __rdpkru(), shadow_pkru);
+ dprintf4("%s()::%d, ret: %d pkey_reg: 0x%016llx"
+ " shadow: 0x%016llx\n",
+ __func__, __LINE__, ret, __read_pkey_reg(),
+ shadow_pkey_reg);
if (ret) {
/* clear both the bits: */
- shadow_pkru &= ~(0x3 << (ret * 2));
- dprintf4("alloc_pkey()::%d, ret: %d pkru: 0x%x shadow: 0x%x\n",
- __LINE__, ret, __rdpkru(), shadow_pkru);
+ shadow_pkey_reg = set_pkey_bits(shadow_pkey_reg, ret,
+ ~PKEY_MASK);
+ dprintf4("%s()::%d, ret: %d pkey_reg: 0x%016llx"
+ " shadow: 0x%016llx\n",
+ __func__,
+ __LINE__, ret, __read_pkey_reg(),
+ shadow_pkey_reg);
/*
* move the new state in from init_val
- * (remember, we cheated and init_val == pkru format)
+ * (remember, we cheated and init_val == pkey_reg format)
*/
- shadow_pkru |= (init_val << (ret * 2));
+ shadow_pkey_reg = set_pkey_bits(shadow_pkey_reg, ret,
+ init_val);
}
- dprintf4("alloc_pkey()::%d, ret: %d pkru: 0x%x shadow: 0x%x\n",
- __LINE__, ret, __rdpkru(), shadow_pkru);
- dprintf1("alloc_pkey()::%d errno: %d\n", __LINE__, errno);
+ dprintf4("%s()::%d, ret: %d pkey_reg: 0x%016llx"
+ " shadow: 0x%016llx\n",
+ __func__, __LINE__, ret, __read_pkey_reg(),
+ shadow_pkey_reg);
+ dprintf1("%s()::%d errno: %d\n", __func__, __LINE__, errno);
/* for shadow checking: */
- rdpkru();
- dprintf4("alloc_pkey()::%d, ret: %d pkru: 0x%x shadow: 0x%x\n",
- __LINE__, ret, __rdpkru(), shadow_pkru);
+ read_pkey_reg();
+ dprintf4("%s()::%d, ret: %d pkey_reg: 0x%016llx"
+ " shadow: 0x%016llx\n",
+ __func__, __LINE__, ret, __read_pkey_reg(),
+ shadow_pkey_reg);
return ret;
}
@@ -612,10 +561,10 @@ int alloc_random_pkey(void)
int nr_alloced = 0;
int random_index;
memset(alloced_pkeys, 0, sizeof(alloced_pkeys));
+ srand((unsigned int)time(NULL));
/* allocate every possible key and make a note of which ones we got */
max_nr_pkey_allocs = NR_PKEYS;
- max_nr_pkey_allocs = 1;
for (i = 0; i < max_nr_pkey_allocs; i++) {
int new_pkey = alloc_pkey();
if (new_pkey < 0)
@@ -638,8 +587,9 @@ int alloc_random_pkey(void)
free_ret = sys_pkey_free(alloced_pkeys[i]);
pkey_assert(!free_ret);
}
- dprintf1("%s()::%d, ret: %d pkru: 0x%x shadow: 0x%x\n", __func__,
- __LINE__, ret, __rdpkru(), shadow_pkru);
+ dprintf1("%s()::%d, ret: %d pkey_reg: 0x%016llx"
+ " shadow: 0x%016llx\n", __func__,
+ __LINE__, ret, __read_pkey_reg(), shadow_pkey_reg);
return ret;
}
@@ -657,11 +607,15 @@ int mprotect_pkey(void *ptr, size_t size, unsigned long orig_prot,
if (nr_iterations-- < 0)
break;
- dprintf1("%s()::%d, ret: %d pkru: 0x%x shadow: 0x%x\n", __func__,
- __LINE__, ret, __rdpkru(), shadow_pkru);
+ dprintf1("%s()::%d, ret: %d pkey_reg: 0x%016llx"
+ " shadow: 0x%016llx\n",
+ __func__, __LINE__, ret, __read_pkey_reg(),
+ shadow_pkey_reg);
sys_pkey_free(rpkey);
- dprintf1("%s()::%d, ret: %d pkru: 0x%x shadow: 0x%x\n", __func__,
- __LINE__, ret, __rdpkru(), shadow_pkru);
+ dprintf1("%s()::%d, ret: %d pkey_reg: 0x%016llx"
+ " shadow: 0x%016llx\n",
+ __func__, __LINE__, ret, __read_pkey_reg(),
+ shadow_pkey_reg);
}
pkey_assert(pkey < NR_PKEYS);
@@ -669,8 +623,9 @@ int mprotect_pkey(void *ptr, size_t size, unsigned long orig_prot,
dprintf1("mprotect_pkey(%p, %zx, prot=0x%lx, pkey=%ld) ret: %d\n",
ptr, size, orig_prot, pkey, ret);
pkey_assert(!ret);
- dprintf1("%s()::%d, ret: %d pkru: 0x%x shadow: 0x%x\n", __func__,
- __LINE__, ret, __rdpkru(), shadow_pkru);
+ dprintf1("%s()::%d, ret: %d pkey_reg: 0x%016llx"
+ " shadow: 0x%016llx\n", __func__,
+ __LINE__, ret, __read_pkey_reg(), shadow_pkey_reg);
return ret;
}
@@ -752,7 +707,7 @@ void *malloc_pkey_with_mprotect(long size, int prot, u16 pkey)
void *ptr;
int ret;
- rdpkru();
+ read_pkey_reg();
dprintf1("doing %s(size=%ld, prot=0x%x, pkey=%d)\n", __func__,
size, prot, pkey);
pkey_assert(pkey < NR_PKEYS);
@@ -761,7 +716,7 @@ void *malloc_pkey_with_mprotect(long size, int prot, u16 pkey)
ret = mprotect_pkey((void *)ptr, PAGE_SIZE, prot, pkey);
pkey_assert(!ret);
record_pkey_malloc(ptr, size, prot);
- rdpkru();
+ read_pkey_reg();
dprintf1("%s() for pkey %d @ %p\n", __func__, pkey, ptr);
return ptr;
@@ -798,12 +753,15 @@ void *malloc_pkey_anon_huge(long size, int prot, u16 pkey)
}
int hugetlb_setup_ok;
+#define SYSFS_FMT_NR_HUGE_PAGES "/sys/kernel/mm/hugepages/hugepages-%ldkB/nr_hugepages"
#define GET_NR_HUGE_PAGES 10
void setup_hugetlbfs(void)
{
int err;
int fd;
- char buf[] = "123";
+ char buf[256];
+ long hpagesz_kb;
+ long hpagesz_mb;
if (geteuid() != 0) {
fprintf(stderr, "WARNING: not run as root, can not do hugetlb test\n");
@@ -814,11 +772,16 @@ void setup_hugetlbfs(void)
/*
* Now go make sure that we got the pages and that they
- * are 2M pages. Someone might have made 1G the default.
+ * are PMD-level pages. Someone might have made PUD-level
+ * pages the default.
*/
- fd = open("/sys/kernel/mm/hugepages/hugepages-2048kB/nr_hugepages", O_RDONLY);
+ hpagesz_kb = HPAGE_SIZE / 1024;
+ hpagesz_mb = hpagesz_kb / 1024;
+ sprintf(buf, SYSFS_FMT_NR_HUGE_PAGES, hpagesz_kb);
+ fd = open(buf, O_RDONLY);
if (fd < 0) {
- perror("opening sysfs 2M hugetlb config");
+ fprintf(stderr, "opening sysfs %ldM hugetlb config: %s\n",
+ hpagesz_mb, strerror(errno));
return;
}
@@ -826,13 +789,14 @@ void setup_hugetlbfs(void)
err = read(fd, buf, sizeof(buf)-1);
close(fd);
if (err <= 0) {
- perror("reading sysfs 2M hugetlb config");
+ fprintf(stderr, "reading sysfs %ldM hugetlb config: %s\n",
+ hpagesz_mb, strerror(errno));
return;
}
if (atoi(buf) != GET_NR_HUGE_PAGES) {
- fprintf(stderr, "could not confirm 2M pages, got: '%s' expected %d\n",
- buf, GET_NR_HUGE_PAGES);
+ fprintf(stderr, "could not confirm %ldM pages, got: '%s' expected %d\n",
+ hpagesz_mb, buf, GET_NR_HUGE_PAGES);
return;
}
@@ -886,6 +850,7 @@ void *malloc_pkey_mmap_dax(long size, int prot, u16 pkey)
void *(*pkey_malloc[])(long size, int prot, u16 pkey) = {
malloc_pkey_with_mprotect,
+ malloc_pkey_with_mprotect_subpage,
malloc_pkey_anon_huge,
malloc_pkey_hugetlb
/* can not do direct with the pkey_mprotect() API:
@@ -924,14 +889,14 @@ void *malloc_pkey(long size, int prot, u16 pkey)
return ret;
}
-int last_pkru_faults;
+int last_pkey_faults;
#define UNKNOWN_PKEY -2
-void expected_pk_fault(int pkey)
+void expected_pkey_fault(int pkey)
{
- dprintf2("%s(): last_pkru_faults: %d pkru_faults: %d\n",
- __func__, last_pkru_faults, pkru_faults);
+ dprintf2("%s(): last_pkey_faults: %d pkey_faults: %d\n",
+ __func__, last_pkey_faults, pkey_faults);
dprintf2("%s(%d): last_si_pkey: %d\n", __func__, pkey, last_si_pkey);
- pkey_assert(last_pkru_faults + 1 == pkru_faults);
+ pkey_assert(last_pkey_faults + 1 == pkey_faults);
/*
* For exec-only memory, we do not know the pkey in
@@ -940,24 +905,28 @@ void expected_pk_fault(int pkey)
if (pkey != UNKNOWN_PKEY)
pkey_assert(last_si_pkey == pkey);
+#if defined(__i386__) || defined(__x86_64__) /* arch */
/*
- * The signal handler shold have cleared out PKRU to let the
+ * The signal handler shold have cleared out PKEY register to let the
* test program continue. We now have to restore it.
*/
- if (__rdpkru() != 0)
+ if (__read_pkey_reg() != 0)
+#else /* arch */
+ if (__read_pkey_reg() != shadow_pkey_reg)
+#endif /* arch */
pkey_assert(0);
- __wrpkru(shadow_pkru);
- dprintf1("%s() set PKRU=%x to restore state after signal nuked it\n",
- __func__, shadow_pkru);
- last_pkru_faults = pkru_faults;
+ __write_pkey_reg(shadow_pkey_reg);
+ dprintf1("%s() set pkey_reg=%016llx to restore state after signal "
+ "nuked it\n", __func__, shadow_pkey_reg);
+ last_pkey_faults = pkey_faults;
last_si_pkey = -1;
}
-#define do_not_expect_pk_fault(msg) do { \
- if (last_pkru_faults != pkru_faults) \
- dprintf0("unexpected PK fault: %s\n", msg); \
- pkey_assert(last_pkru_faults == pkru_faults); \
+#define do_not_expect_pkey_fault(msg) do { \
+ if (last_pkey_faults != pkey_faults) \
+ dprintf0("unexpected PKey fault: %s\n", msg); \
+ pkey_assert(last_pkey_faults == pkey_faults); \
} while (0)
int test_fds[10] = { -1 };
@@ -1000,6 +969,58 @@ __attribute__((noinline)) int read_ptr(int *ptr)
return *ptr;
}
+void test_pkey_alloc_free_attach_pkey0(int *ptr, u16 pkey)
+{
+ int i, err;
+ int max_nr_pkey_allocs;
+ int alloced_pkeys[NR_PKEYS];
+ int nr_alloced = 0;
+ long size;
+
+ pkey_assert(pkey_last_malloc_record);
+ size = pkey_last_malloc_record->size;
+ /*
+ * This is a bit of a hack. But mprotect() requires
+ * huge-page-aligned sizes when operating on hugetlbfs.
+ * So, make sure that we use something that's a multiple
+ * of a huge page when we can.
+ */
+ if (size >= HPAGE_SIZE)
+ size = HPAGE_SIZE;
+
+ /* allocate every possible key and make sure key-0 never got allocated */
+ max_nr_pkey_allocs = NR_PKEYS;
+ for (i = 0; i < max_nr_pkey_allocs; i++) {
+ int new_pkey = alloc_pkey();
+ pkey_assert(new_pkey != 0);
+
+ if (new_pkey < 0)
+ break;
+ alloced_pkeys[nr_alloced++] = new_pkey;
+ }
+ /* free all the allocated keys */
+ for (i = 0; i < nr_alloced; i++) {
+ int free_ret;
+
+ if (!alloced_pkeys[i])
+ continue;
+ free_ret = sys_pkey_free(alloced_pkeys[i]);
+ pkey_assert(!free_ret);
+ }
+
+ /* attach key-0 in various modes */
+ err = sys_mprotect_pkey(ptr, size, PROT_READ, 0);
+ pkey_assert(!err);
+ err = sys_mprotect_pkey(ptr, size, PROT_WRITE, 0);
+ pkey_assert(!err);
+ err = sys_mprotect_pkey(ptr, size, PROT_EXEC, 0);
+ pkey_assert(!err);
+ err = sys_mprotect_pkey(ptr, size, PROT_READ|PROT_WRITE, 0);
+ pkey_assert(!err);
+ err = sys_mprotect_pkey(ptr, size, PROT_READ|PROT_WRITE|PROT_EXEC, 0);
+ pkey_assert(!err);
+}
+
void test_read_of_write_disabled_region(int *ptr, u16 pkey)
{
int ptr_contents;
@@ -1015,26 +1036,67 @@ void test_read_of_access_disabled_region(int *ptr, u16 pkey)
int ptr_contents;
dprintf1("disabling access to PKEY[%02d], doing read @ %p\n", pkey, ptr);
- rdpkru();
+ read_pkey_reg();
+ pkey_access_deny(pkey);
+ ptr_contents = read_ptr(ptr);
+ dprintf1("*ptr: %d\n", ptr_contents);
+ expected_pkey_fault(pkey);
+}
+
+void test_read_of_access_disabled_region_with_page_already_mapped(int *ptr,
+ u16 pkey)
+{
+ int ptr_contents;
+
+ dprintf1("disabling access to PKEY[%02d], doing read @ %p\n",
+ pkey, ptr);
+ ptr_contents = read_ptr(ptr);
+ dprintf1("reading ptr before disabling the read : %d\n",
+ ptr_contents);
+ read_pkey_reg();
pkey_access_deny(pkey);
ptr_contents = read_ptr(ptr);
dprintf1("*ptr: %d\n", ptr_contents);
- expected_pk_fault(pkey);
+ expected_pkey_fault(pkey);
}
+
+void test_write_of_write_disabled_region_with_page_already_mapped(int *ptr,
+ u16 pkey)
+{
+ *ptr = __LINE__;
+ dprintf1("disabling write access; after accessing the page, "
+ "to PKEY[%02d], doing write\n", pkey);
+ pkey_write_deny(pkey);
+ *ptr = __LINE__;
+ expected_pkey_fault(pkey);
+}
+
void test_write_of_write_disabled_region(int *ptr, u16 pkey)
{
dprintf1("disabling write access to PKEY[%02d], doing write\n", pkey);
pkey_write_deny(pkey);
*ptr = __LINE__;
- expected_pk_fault(pkey);
+ expected_pkey_fault(pkey);
}
void test_write_of_access_disabled_region(int *ptr, u16 pkey)
{
dprintf1("disabling access to PKEY[%02d], doing write\n", pkey);
pkey_access_deny(pkey);
*ptr = __LINE__;
- expected_pk_fault(pkey);
+ expected_pkey_fault(pkey);
}
+
+void test_write_of_access_disabled_region_with_page_already_mapped(int *ptr,
+ u16 pkey)
+{
+ *ptr = __LINE__;
+ dprintf1("disabling access; after accessing the page, "
+ " to PKEY[%02d], doing write\n", pkey);
+ pkey_access_deny(pkey);
+ *ptr = __LINE__;
+ expected_pkey_fault(pkey);
+}
+
void test_kernel_write_of_access_disabled_region(int *ptr, u16 pkey)
{
int ret;
@@ -1160,9 +1222,11 @@ void test_pkey_alloc_exhaust(int *ptr, u16 pkey)
int new_pkey;
dprintf1("%s() alloc loop: %d\n", __func__, i);
new_pkey = alloc_pkey();
- dprintf4("%s()::%d, err: %d pkru: 0x%x shadow: 0x%x\n", __func__,
- __LINE__, err, __rdpkru(), shadow_pkru);
- rdpkru(); /* for shadow checking */
+ dprintf4("%s()::%d, err: %d pkey_reg: 0x%016llx"
+ " shadow: 0x%016llx\n",
+ __func__, __LINE__, err, __read_pkey_reg(),
+ shadow_pkey_reg);
+ read_pkey_reg(); /* for shadow checking */
dprintf2("%s() errno: %d ENOSPC: %d\n", __func__, errno, ENOSPC);
if ((new_pkey == -1) && (errno == ENOSPC)) {
dprintf2("%s() failed to allocate pkey after %d tries\n",
@@ -1188,6 +1252,7 @@ void test_pkey_alloc_exhaust(int *ptr, u16 pkey)
dprintf3("%s()::%d\n", __func__, __LINE__);
/*
+ * On x86:
* There are 16 pkeys supported in hardware. Three are
* allocated by the time we get here:
* 1. The default key (0)
@@ -1195,13 +1260,21 @@ void test_pkey_alloc_exhaust(int *ptr, u16 pkey)
* 3. One allocated by the test code and passed in via
* 'pkey' to this function.
* Ensure that we can allocate at least another 13 (16-3).
+ *
+ * On powerpc:
+ * There are either 5, 28, 29 or 32 pkeys supported in
+ * hardware depending on the page size (4K or 64K) and
+ * platform (powernv or powervm). Four are allocated by
+ * the time we get here. These include pkey-0, pkey-1,
+ * exec-only pkey and the one allocated by the test code.
+ * Ensure that we can allocate the remaining.
*/
- pkey_assert(i >= NR_PKEYS-3);
+ pkey_assert(i >= (NR_PKEYS - get_arch_reserved_keys() - 1));
for (i = 0; i < nr_allocated_pkeys; i++) {
err = sys_pkey_free(allocated_pkeys[i]);
pkey_assert(!err);
- rdpkru(); /* for shadow checking */
+ read_pkey_reg(); /* for shadow checking */
}
}
@@ -1287,7 +1360,7 @@ void test_ptrace_of_child(int *ptr, u16 pkey)
pkey_assert(ret != -1);
/* Now access from the current task, and expect an exception: */
peek_result = read_ptr(ptr);
- expected_pk_fault(pkey);
+ expected_pkey_fault(pkey);
/*
* Try to access the NON-pkey-protected "plain_ptr" via ptrace:
@@ -1297,7 +1370,7 @@ void test_ptrace_of_child(int *ptr, u16 pkey)
pkey_assert(ret != -1);
/* Now access from the current task, and expect NO exception: */
peek_result = read_ptr(plain_ptr);
- do_not_expect_pk_fault("read plain pointer after ptrace");
+ do_not_expect_pkey_fault("read plain pointer after ptrace");
ret = ptrace(PTRACE_DETACH, child_pid, ignored, 0);
pkey_assert(ret != -1);
@@ -1347,17 +1420,15 @@ void test_executing_on_unreadable_memory(int *ptr, u16 pkey)
pkey_assert(!ret);
pkey_access_deny(pkey);
- dprintf2("pkru: %x\n", rdpkru());
+ dprintf2("pkey_reg: %016llx\n", read_pkey_reg());
/*
* Make sure this is an *instruction* fault
*/
madvise(p1, PAGE_SIZE, MADV_DONTNEED);
lots_o_noops_around_write(&scratch);
- do_not_expect_pk_fault("executing on PROT_EXEC memory");
- ptr_contents = read_ptr(p1);
- dprintf2("ptr (%p) contents@%d: %x\n", p1, __LINE__, ptr_contents);
- expected_pk_fault(pkey);
+ do_not_expect_pkey_fault("executing on PROT_EXEC memory");
+ expect_fault_on_read_execonly_key(p1, pkey);
}
void test_implicit_mprotect_exec_only_memory(int *ptr, u16 pkey)
@@ -1378,15 +1449,13 @@ void test_implicit_mprotect_exec_only_memory(int *ptr, u16 pkey)
ret = mprotect(p1, PAGE_SIZE, PROT_EXEC);
pkey_assert(!ret);
- dprintf2("pkru: %x\n", rdpkru());
+ dprintf2("pkey_reg: %016llx\n", read_pkey_reg());
/* Make sure this is an *instruction* fault */
madvise(p1, PAGE_SIZE, MADV_DONTNEED);
lots_o_noops_around_write(&scratch);
- do_not_expect_pk_fault("executing on PROT_EXEC memory");
- ptr_contents = read_ptr(p1);
- dprintf2("ptr (%p) contents@%d: %x\n", p1, __LINE__, ptr_contents);
- expected_pk_fault(UNKNOWN_PKEY);
+ do_not_expect_pkey_fault("executing on PROT_EXEC memory");
+ expect_fault_on_read_execonly_key(p1, UNKNOWN_PKEY);
/*
* Put the memory back to non-PROT_EXEC. Should clear the
@@ -1400,7 +1469,7 @@ void test_implicit_mprotect_exec_only_memory(int *ptr, u16 pkey)
ret = mprotect(p1, PAGE_SIZE, PROT_READ|PROT_EXEC);
pkey_assert(!ret);
ptr_contents = read_ptr(p1);
- do_not_expect_pk_fault("plain read on recently PROT_EXEC area");
+ do_not_expect_pkey_fault("plain read on recently PROT_EXEC area");
}
void test_mprotect_pkey_on_unsupported_cpu(int *ptr, u16 pkey)
@@ -1408,7 +1477,7 @@ void test_mprotect_pkey_on_unsupported_cpu(int *ptr, u16 pkey)
int size = PAGE_SIZE;
int sret;
- if (cpu_has_pku()) {
+ if (cpu_has_pkeys()) {
dprintf1("SKIP: %s: no CPU support\n", __func__);
return;
}
@@ -1420,8 +1489,11 @@ void test_mprotect_pkey_on_unsupported_cpu(int *ptr, u16 pkey)
void (*pkey_tests[])(int *ptr, u16 pkey) = {
test_read_of_write_disabled_region,
test_read_of_access_disabled_region,
+ test_read_of_access_disabled_region_with_page_already_mapped,
test_write_of_write_disabled_region,
+ test_write_of_write_disabled_region_with_page_already_mapped,
test_write_of_access_disabled_region,
+ test_write_of_access_disabled_region_with_page_already_mapped,
test_kernel_write_of_access_disabled_region,
test_kernel_write_of_write_disabled_region,
test_kernel_gup_of_access_disabled_region,
@@ -1433,6 +1505,7 @@ void (*pkey_tests[])(int *ptr, u16 pkey) = {
test_pkey_syscalls_on_non_allocated_pkey,
test_pkey_syscalls_bad_args,
test_pkey_alloc_exhaust,
+ test_pkey_alloc_free_attach_pkey0,
};
void run_tests_once(void)
@@ -1442,7 +1515,7 @@ void run_tests_once(void)
for (test_nr = 0; test_nr < ARRAY_SIZE(pkey_tests); test_nr++) {
int pkey;
- int orig_pkru_faults = pkru_faults;
+ int orig_pkey_faults = pkey_faults;
dprintf1("======================\n");
dprintf1("test %d preparing...\n", test_nr);
@@ -1457,8 +1530,8 @@ void run_tests_once(void)
free_pkey_malloc(ptr);
sys_pkey_free(pkey);
- dprintf1("pkru_faults: %d\n", pkru_faults);
- dprintf1("orig_pkru_faults: %d\n", orig_pkru_faults);
+ dprintf1("pkey_faults: %d\n", pkey_faults);
+ dprintf1("orig_pkey_faults: %d\n", orig_pkey_faults);
tracing_off();
close_test_fds();
@@ -1471,18 +1544,19 @@ void run_tests_once(void)
void pkey_setup_shadow(void)
{
- shadow_pkru = __rdpkru();
+ shadow_pkey_reg = __read_pkey_reg();
}
int main(void)
{
int nr_iterations = 22;
+ int pkeys_supported = is_pkeys_supported();
setup_handlers();
- printf("has pku: %d\n", cpu_has_pku());
+ printf("has pkeys: %d\n", pkeys_supported);
- if (!cpu_has_pku()) {
+ if (!pkeys_supported) {
int size = PAGE_SIZE;
int *ptr;
@@ -1495,7 +1569,7 @@ int main(void)
}
pkey_setup_shadow();
- printf("startup pkru: %x\n", rdpkru());
+ printf("startup pkey_reg: %016llx\n", read_pkey_reg());
setup_hugetlbfs();
while (nr_iterations-- > 0)
diff --git a/tools/testing/selftests/vm/run_vmtests b/tools/testing/selftests/vm/run_vmtests
index f33714843198..a3f4f30f0a2e 100755
--- a/tools/testing/selftests/vm/run_vmtests
+++ b/tools/testing/selftests/vm/run_vmtests
@@ -59,7 +59,7 @@ else
fi
#filter 64bit architectures
-ARCH64STR="arm64 ia64 mips64 parisc64 ppc64 riscv64 s390x sh64 sparc64 x86_64"
+ARCH64STR="arm64 ia64 mips64 parisc64 ppc64 ppc64le riscv64 s390x sh64 sparc64 x86_64"
if [ -z $ARCH ]; then
ARCH=`uname -m 2>/dev/null | sed -e 's/aarch64.*/arm64/'`
fi
@@ -123,6 +123,28 @@ else
echo "[PASS]"
fi
+echo "--------------------------------------------"
+echo "running 'gup_benchmark -U' (normal/slow gup)"
+echo "--------------------------------------------"
+./gup_benchmark -U
+if [ $? -ne 0 ]; then
+ echo "[FAIL]"
+ exitcode=1
+else
+ echo "[PASS]"
+fi
+
+echo "------------------------------------------"
+echo "running gup_benchmark -b (pin_user_pages)"
+echo "------------------------------------------"
+./gup_benchmark -b
+if [ $? -ne 0 ]; then
+ echo "[FAIL]"
+ exitcode=1
+else
+ echo "[PASS]"
+fi
+
echo "-------------------"
echo "running userfaultfd"
echo "-------------------"
@@ -270,4 +292,35 @@ else
exitcode=1
fi
+echo "------------------------------------"
+echo "running MREMAP_DONTUNMAP smoke test"
+echo "------------------------------------"
+./mremap_dontunmap
+ret_val=$?
+
+if [ $ret_val -eq 0 ]; then
+ echo "[PASS]"
+elif [ $ret_val -eq $ksft_skip ]; then
+ echo "[SKIP]"
+ exitcode=$ksft_skip
+else
+ echo "[FAIL]"
+ exitcode=1
+fi
+
+echo "running HMM smoke test"
+echo "------------------------------------"
+./test_hmm.sh smoke
+ret_val=$?
+
+if [ $ret_val -eq 0 ]; then
+ echo "[PASS]"
+elif [ $ret_val -eq $ksft_skip ]; then
+ echo "[SKIP]"
+ exitcode=$ksft_skip
+else
+ echo "[FAIL]"
+ exitcode=1
+fi
+
exit $exitcode
diff --git a/tools/testing/selftests/vm/test_hmm.sh b/tools/testing/selftests/vm/test_hmm.sh
new file mode 100755
index 000000000000..0647b525a625
--- /dev/null
+++ b/tools/testing/selftests/vm/test_hmm.sh
@@ -0,0 +1,97 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Copyright (C) 2018 Uladzislau Rezki (Sony) <urezki@gmail.com>
+#
+# This is a test script for the kernel test driver to analyse vmalloc
+# allocator. Therefore it is just a kernel module loader. You can specify
+# and pass different parameters in order to:
+# a) analyse performance of vmalloc allocations;
+# b) stressing and stability check of vmalloc subsystem.
+
+TEST_NAME="test_hmm"
+DRIVER="test_hmm"
+
+# 1 if fails
+exitcode=1
+
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
+check_test_requirements()
+{
+ uid=$(id -u)
+ if [ $uid -ne 0 ]; then
+ echo "$0: Must be run as root"
+ exit $ksft_skip
+ fi
+
+ if ! which modprobe > /dev/null 2>&1; then
+ echo "$0: You need modprobe installed"
+ exit $ksft_skip
+ fi
+
+ if ! modinfo $DRIVER > /dev/null 2>&1; then
+ echo "$0: You must have the following enabled in your kernel:"
+ echo "CONFIG_TEST_HMM=m"
+ exit $ksft_skip
+ fi
+}
+
+load_driver()
+{
+ modprobe $DRIVER > /dev/null 2>&1
+ if [ $? == 0 ]; then
+ major=$(awk "\$2==\"HMM_DMIRROR\" {print \$1}" /proc/devices)
+ mknod /dev/hmm_dmirror0 c $major 0
+ mknod /dev/hmm_dmirror1 c $major 1
+ fi
+}
+
+unload_driver()
+{
+ modprobe -r $DRIVER > /dev/null 2>&1
+ rm -f /dev/hmm_dmirror?
+}
+
+run_smoke()
+{
+ echo "Running smoke test. Note, this test provides basic coverage."
+
+ load_driver
+ $(dirname "${BASH_SOURCE[0]}")/hmm-tests
+ unload_driver
+}
+
+usage()
+{
+ echo -n "Usage: $0"
+ echo
+ echo "Example usage:"
+ echo
+ echo "# Shows help message"
+ echo "./${TEST_NAME}.sh"
+ echo
+ echo "# Smoke testing"
+ echo "./${TEST_NAME}.sh smoke"
+ echo
+ exit 0
+}
+
+function run_test()
+{
+ if [ $# -eq 0 ]; then
+ usage
+ else
+ if [ "$1" = "smoke" ]; then
+ run_smoke
+ else
+ usage
+ fi
+ fi
+}
+
+check_test_requirements
+run_test $@
+
+exit 0
diff --git a/tools/testing/selftests/vm/userfaultfd.c b/tools/testing/selftests/vm/userfaultfd.c
index d3362777a425..61e5cfeb1350 100644
--- a/tools/testing/selftests/vm/userfaultfd.c
+++ b/tools/testing/selftests/vm/userfaultfd.c
@@ -54,6 +54,7 @@
#include <linux/userfaultfd.h>
#include <setjmp.h>
#include <stdbool.h>
+#include <assert.h>
#include "../kselftest.h"
@@ -76,6 +77,8 @@ static int test_type;
#define ALARM_INTERVAL_SECS 10
static volatile bool test_uffdio_copy_eexist = true;
static volatile bool test_uffdio_zeropage_eexist = true;
+/* Whether to test uffd write-protection */
+static bool test_uffdio_wp = false;
static bool map_shared;
static int huge_fd;
@@ -86,6 +89,13 @@ static char *area_src, *area_src_alias, *area_dst, *area_dst_alias;
static char *zeropage;
pthread_attr_t attr;
+/* Userfaultfd test statistics */
+struct uffd_stats {
+ int cpu;
+ unsigned long missing_faults;
+ unsigned long wp_faults;
+};
+
/* pthread_mutex_t starts at page offset 0 */
#define area_mutex(___area, ___nr) \
((pthread_mutex_t *) ((___area) + (___nr)*page_size))
@@ -125,6 +135,37 @@ static void usage(void)
exit(1);
}
+static void uffd_stats_reset(struct uffd_stats *uffd_stats,
+ unsigned long n_cpus)
+{
+ int i;
+
+ for (i = 0; i < n_cpus; i++) {
+ uffd_stats[i].cpu = i;
+ uffd_stats[i].missing_faults = 0;
+ uffd_stats[i].wp_faults = 0;
+ }
+}
+
+static void uffd_stats_report(struct uffd_stats *stats, int n_cpus)
+{
+ int i;
+ unsigned long long miss_total = 0, wp_total = 0;
+
+ for (i = 0; i < n_cpus; i++) {
+ miss_total += stats[i].missing_faults;
+ wp_total += stats[i].wp_faults;
+ }
+
+ printf("userfaults: %llu missing (", miss_total);
+ for (i = 0; i < n_cpus; i++)
+ printf("%lu+", stats[i].missing_faults);
+ printf("\b), %llu wp (", wp_total);
+ for (i = 0; i < n_cpus; i++)
+ printf("%lu+", stats[i].wp_faults);
+ printf("\b)\n");
+}
+
static int anon_release_pages(char *rel_area)
{
int ret = 0;
@@ -245,10 +286,15 @@ struct uffd_test_ops {
void (*alias_mapping)(__u64 *start, size_t len, unsigned long offset);
};
-#define ANON_EXPECTED_IOCTLS ((1 << _UFFDIO_WAKE) | \
+#define SHMEM_EXPECTED_IOCTLS ((1 << _UFFDIO_WAKE) | \
(1 << _UFFDIO_COPY) | \
(1 << _UFFDIO_ZEROPAGE))
+#define ANON_EXPECTED_IOCTLS ((1 << _UFFDIO_WAKE) | \
+ (1 << _UFFDIO_COPY) | \
+ (1 << _UFFDIO_ZEROPAGE) | \
+ (1 << _UFFDIO_WRITEPROTECT))
+
static struct uffd_test_ops anon_uffd_test_ops = {
.expected_ioctls = ANON_EXPECTED_IOCTLS,
.allocate_area = anon_allocate_area,
@@ -257,7 +303,7 @@ static struct uffd_test_ops anon_uffd_test_ops = {
};
static struct uffd_test_ops shmem_uffd_test_ops = {
- .expected_ioctls = ANON_EXPECTED_IOCTLS,
+ .expected_ioctls = SHMEM_EXPECTED_IOCTLS,
.allocate_area = shmem_allocate_area,
.release_pages = shmem_release_pages,
.alias_mapping = noop_alias_mapping,
@@ -281,6 +327,21 @@ static int my_bcmp(char *str1, char *str2, size_t n)
return 0;
}
+static void wp_range(int ufd, __u64 start, __u64 len, bool wp)
+{
+ struct uffdio_writeprotect prms = { 0 };
+
+ /* Write protection page faults */
+ prms.range.start = start;
+ prms.range.len = len;
+ /* Undo write-protect, do wakeup after that */
+ prms.mode = wp ? UFFDIO_WRITEPROTECT_MODE_WP : 0;
+
+ if (ioctl(ufd, UFFDIO_WRITEPROTECT, &prms))
+ fprintf(stderr, "clear WP failed for address 0x%Lx\n",
+ start), exit(1);
+}
+
static void *locking_thread(void *arg)
{
unsigned long cpu = (unsigned long) arg;
@@ -419,7 +480,10 @@ static int __copy_page(int ufd, unsigned long offset, bool retry)
uffdio_copy.dst = (unsigned long) area_dst + offset;
uffdio_copy.src = (unsigned long) area_src + offset;
uffdio_copy.len = page_size;
- uffdio_copy.mode = 0;
+ if (test_uffdio_wp)
+ uffdio_copy.mode = UFFDIO_COPY_MODE_WP;
+ else
+ uffdio_copy.mode = 0;
uffdio_copy.copy = 0;
if (ioctl(ufd, UFFDIO_COPY, &uffdio_copy)) {
/* real retval in ufdio_copy.copy */
@@ -467,8 +531,8 @@ static int uffd_read_msg(int ufd, struct uffd_msg *msg)
return 0;
}
-/* Return 1 if page fault handled by us; otherwise 0 */
-static int uffd_handle_page_fault(struct uffd_msg *msg)
+static void uffd_handle_page_fault(struct uffd_msg *msg,
+ struct uffd_stats *stats)
{
unsigned long offset;
@@ -476,25 +540,32 @@ static int uffd_handle_page_fault(struct uffd_msg *msg)
fprintf(stderr, "unexpected msg event %u\n",
msg->event), exit(1);
- if (bounces & BOUNCE_VERIFY &&
- msg->arg.pagefault.flags & UFFD_PAGEFAULT_FLAG_WRITE)
- fprintf(stderr, "unexpected write fault\n"), exit(1);
+ if (msg->arg.pagefault.flags & UFFD_PAGEFAULT_FLAG_WP) {
+ wp_range(uffd, msg->arg.pagefault.address, page_size, false);
+ stats->wp_faults++;
+ } else {
+ /* Missing page faults */
+ if (bounces & BOUNCE_VERIFY &&
+ msg->arg.pagefault.flags & UFFD_PAGEFAULT_FLAG_WRITE)
+ fprintf(stderr, "unexpected write fault\n"), exit(1);
- offset = (char *)(unsigned long)msg->arg.pagefault.address - area_dst;
- offset &= ~(page_size-1);
+ offset = (char *)(unsigned long)msg->arg.pagefault.address - area_dst;
+ offset &= ~(page_size-1);
- return copy_page(uffd, offset);
+ if (copy_page(uffd, offset))
+ stats->missing_faults++;
+ }
}
static void *uffd_poll_thread(void *arg)
{
- unsigned long cpu = (unsigned long) arg;
+ struct uffd_stats *stats = (struct uffd_stats *)arg;
+ unsigned long cpu = stats->cpu;
struct pollfd pollfd[2];
struct uffd_msg msg;
struct uffdio_register uffd_reg;
int ret;
char tmp_chr;
- unsigned long userfaults = 0;
pollfd[0].fd = uffd;
pollfd[0].events = POLLIN;
@@ -524,7 +595,7 @@ static void *uffd_poll_thread(void *arg)
msg.event), exit(1);
break;
case UFFD_EVENT_PAGEFAULT:
- userfaults += uffd_handle_page_fault(&msg);
+ uffd_handle_page_fault(&msg, stats);
break;
case UFFD_EVENT_FORK:
close(uffd);
@@ -543,50 +614,67 @@ static void *uffd_poll_thread(void *arg)
break;
}
}
- return (void *)userfaults;
+
+ return NULL;
}
pthread_mutex_t uffd_read_mutex = PTHREAD_MUTEX_INITIALIZER;
static void *uffd_read_thread(void *arg)
{
- unsigned long *this_cpu_userfaults;
+ struct uffd_stats *stats = (struct uffd_stats *)arg;
struct uffd_msg msg;
- this_cpu_userfaults = (unsigned long *) arg;
- *this_cpu_userfaults = 0;
-
pthread_mutex_unlock(&uffd_read_mutex);
/* from here cancellation is ok */
for (;;) {
if (uffd_read_msg(uffd, &msg))
continue;
- (*this_cpu_userfaults) += uffd_handle_page_fault(&msg);
+ uffd_handle_page_fault(&msg, stats);
}
- return (void *)NULL;
+
+ return NULL;
}
static void *background_thread(void *arg)
{
unsigned long cpu = (unsigned long) arg;
- unsigned long page_nr;
+ unsigned long page_nr, start_nr, mid_nr, end_nr;
+
+ start_nr = cpu * nr_pages_per_cpu;
+ end_nr = (cpu+1) * nr_pages_per_cpu;
+ mid_nr = (start_nr + end_nr) / 2;
- for (page_nr = cpu * nr_pages_per_cpu;
- page_nr < (cpu+1) * nr_pages_per_cpu;
- page_nr++)
+ /* Copy the first half of the pages */
+ for (page_nr = start_nr; page_nr < mid_nr; page_nr++)
+ copy_page_retry(uffd, page_nr * page_size);
+
+ /*
+ * If we need to test uffd-wp, set it up now. Then we'll have
+ * at least the first half of the pages mapped already which
+ * can be write-protected for testing
+ */
+ if (test_uffdio_wp)
+ wp_range(uffd, (unsigned long)area_dst + start_nr * page_size,
+ nr_pages_per_cpu * page_size, true);
+
+ /*
+ * Continue the 2nd half of the page copying, handling write
+ * protection faults if any
+ */
+ for (page_nr = mid_nr; page_nr < end_nr; page_nr++)
copy_page_retry(uffd, page_nr * page_size);
return NULL;
}
-static int stress(unsigned long *userfaults)
+static int stress(struct uffd_stats *uffd_stats)
{
unsigned long cpu;
pthread_t locking_threads[nr_cpus];
pthread_t uffd_threads[nr_cpus];
pthread_t background_threads[nr_cpus];
- void **_userfaults = (void **) userfaults;
finished = 0;
for (cpu = 0; cpu < nr_cpus; cpu++) {
@@ -595,12 +683,13 @@ static int stress(unsigned long *userfaults)
return 1;
if (bounces & BOUNCE_POLL) {
if (pthread_create(&uffd_threads[cpu], &attr,
- uffd_poll_thread, (void *)cpu))
+ uffd_poll_thread,
+ (void *)&uffd_stats[cpu]))
return 1;
} else {
if (pthread_create(&uffd_threads[cpu], &attr,
uffd_read_thread,
- &_userfaults[cpu]))
+ (void *)&uffd_stats[cpu]))
return 1;
pthread_mutex_lock(&uffd_read_mutex);
}
@@ -637,7 +726,8 @@ static int stress(unsigned long *userfaults)
fprintf(stderr, "pipefd write error\n");
return 1;
}
- if (pthread_join(uffd_threads[cpu], &_userfaults[cpu]))
+ if (pthread_join(uffd_threads[cpu],
+ (void *)&uffd_stats[cpu]))
return 1;
} else {
if (pthread_cancel(uffd_threads[cpu]))
@@ -735,17 +825,31 @@ static int faulting_process(int signal_test)
}
for (nr = 0; nr < split_nr_pages; nr++) {
+ int steps = 1;
+ unsigned long offset = nr * page_size;
+
if (signal_test) {
if (sigsetjmp(*sigbuf, 1) != 0) {
- if (nr == lastnr) {
+ if (steps == 1 && nr == lastnr) {
fprintf(stderr, "Signal repeated\n");
return 1;
}
lastnr = nr;
if (signal_test == 1) {
- if (copy_page(uffd, nr * page_size))
- signalled++;
+ if (steps == 1) {
+ /* This is a MISSING request */
+ steps++;
+ if (copy_page(uffd, offset))
+ signalled++;
+ } else {
+ /* This is a WP request */
+ assert(steps == 2);
+ wp_range(uffd,
+ (__u64)area_dst +
+ offset,
+ page_size, false);
+ }
} else {
signalled++;
continue;
@@ -758,8 +862,13 @@ static int faulting_process(int signal_test)
fprintf(stderr,
"nr %lu memory corruption %Lu %Lu\n",
nr, count,
- count_verify[nr]), exit(1);
- }
+ count_verify[nr]);
+ }
+ /*
+ * Trigger write protection if there is by writting
+ * the same value back.
+ */
+ *area_count(area_dst, nr) = count;
}
if (signal_test)
@@ -781,6 +890,11 @@ static int faulting_process(int signal_test)
nr, count,
count_verify[nr]), exit(1);
}
+ /*
+ * Trigger write protection if there is by writting
+ * the same value back.
+ */
+ *area_count(area_dst, nr) = count;
}
if (uffd_test_ops->release_pages(area_dst))
@@ -884,6 +998,8 @@ static int userfaultfd_zeropage_test(void)
uffdio_register.range.start = (unsigned long) area_dst;
uffdio_register.range.len = nr_pages * page_size;
uffdio_register.mode = UFFDIO_REGISTER_MODE_MISSING;
+ if (test_uffdio_wp)
+ uffdio_register.mode |= UFFDIO_REGISTER_MODE_WP;
if (ioctl(uffd, UFFDIO_REGISTER, &uffdio_register))
fprintf(stderr, "register failure\n"), exit(1);
@@ -908,11 +1024,11 @@ static int userfaultfd_events_test(void)
{
struct uffdio_register uffdio_register;
unsigned long expected_ioctls;
- unsigned long userfaults;
pthread_t uffd_mon;
int err, features;
pid_t pid;
char c;
+ struct uffd_stats stats = { 0 };
printf("testing events (fork, remap, remove): ");
fflush(stdout);
@@ -929,6 +1045,8 @@ static int userfaultfd_events_test(void)
uffdio_register.range.start = (unsigned long) area_dst;
uffdio_register.range.len = nr_pages * page_size;
uffdio_register.mode = UFFDIO_REGISTER_MODE_MISSING;
+ if (test_uffdio_wp)
+ uffdio_register.mode |= UFFDIO_REGISTER_MODE_WP;
if (ioctl(uffd, UFFDIO_REGISTER, &uffdio_register))
fprintf(stderr, "register failure\n"), exit(1);
@@ -939,7 +1057,7 @@ static int userfaultfd_events_test(void)
"unexpected missing ioctl for anon memory\n"),
exit(1);
- if (pthread_create(&uffd_mon, &attr, uffd_poll_thread, NULL))
+ if (pthread_create(&uffd_mon, &attr, uffd_poll_thread, &stats))
perror("uffd_poll_thread create"), exit(1);
pid = fork();
@@ -955,13 +1073,14 @@ static int userfaultfd_events_test(void)
if (write(pipefd[1], &c, sizeof(c)) != sizeof(c))
perror("pipe write"), exit(1);
- if (pthread_join(uffd_mon, (void **)&userfaults))
+ if (pthread_join(uffd_mon, NULL))
return 1;
close(uffd);
- printf("userfaults: %ld\n", userfaults);
- return userfaults != nr_pages;
+ uffd_stats_report(&stats, 1);
+
+ return stats.missing_faults != nr_pages;
}
static int userfaultfd_sig_test(void)
@@ -973,6 +1092,7 @@ static int userfaultfd_sig_test(void)
int err, features;
pid_t pid;
char c;
+ struct uffd_stats stats = { 0 };
printf("testing signal delivery: ");
fflush(stdout);
@@ -988,6 +1108,8 @@ static int userfaultfd_sig_test(void)
uffdio_register.range.start = (unsigned long) area_dst;
uffdio_register.range.len = nr_pages * page_size;
uffdio_register.mode = UFFDIO_REGISTER_MODE_MISSING;
+ if (test_uffdio_wp)
+ uffdio_register.mode |= UFFDIO_REGISTER_MODE_WP;
if (ioctl(uffd, UFFDIO_REGISTER, &uffdio_register))
fprintf(stderr, "register failure\n"), exit(1);
@@ -1004,7 +1126,7 @@ static int userfaultfd_sig_test(void)
if (uffd_test_ops->release_pages(area_dst))
return 1;
- if (pthread_create(&uffd_mon, &attr, uffd_poll_thread, NULL))
+ if (pthread_create(&uffd_mon, &attr, uffd_poll_thread, &stats))
perror("uffd_poll_thread create"), exit(1);
pid = fork();
@@ -1030,6 +1152,7 @@ static int userfaultfd_sig_test(void)
close(uffd);
return userfaults != 0;
}
+
static int userfaultfd_stress(void)
{
void *area;
@@ -1038,7 +1161,7 @@ static int userfaultfd_stress(void)
struct uffdio_register uffdio_register;
unsigned long cpu;
int err;
- unsigned long userfaults[nr_cpus];
+ struct uffd_stats uffd_stats[nr_cpus];
uffd_test_ops->allocate_area((void **)&area_src);
if (!area_src)
@@ -1119,6 +1242,8 @@ static int userfaultfd_stress(void)
uffdio_register.range.start = (unsigned long) area_dst;
uffdio_register.range.len = nr_pages * page_size;
uffdio_register.mode = UFFDIO_REGISTER_MODE_MISSING;
+ if (test_uffdio_wp)
+ uffdio_register.mode |= UFFDIO_REGISTER_MODE_WP;
if (ioctl(uffd, UFFDIO_REGISTER, &uffdio_register)) {
fprintf(stderr, "register failure\n");
return 1;
@@ -1167,10 +1292,17 @@ static int userfaultfd_stress(void)
if (uffd_test_ops->release_pages(area_dst))
return 1;
+ uffd_stats_reset(uffd_stats, nr_cpus);
+
/* bounce pass */
- if (stress(userfaults))
+ if (stress(uffd_stats))
return 1;
+ /* Clear all the write protections if there is any */
+ if (test_uffdio_wp)
+ wp_range(uffd, (unsigned long)area_dst,
+ nr_pages * page_size, false);
+
/* unregister */
if (ioctl(uffd, UFFDIO_UNREGISTER, &uffdio_register.range)) {
fprintf(stderr, "unregister failure\n");
@@ -1209,10 +1341,7 @@ static int userfaultfd_stress(void)
area_src_alias = area_dst_alias;
area_dst_alias = tmp_area;
- printf("userfaults:");
- for (cpu = 0; cpu < nr_cpus; cpu++)
- printf(" %lu", userfaults[cpu]);
- printf("\n");
+ uffd_stats_report(uffd_stats, nr_cpus);
}
if (err)
@@ -1252,6 +1381,8 @@ static void set_test_type(const char *type)
if (!strcmp(type, "anon")) {
test_type = TEST_ANON;
uffd_test_ops = &anon_uffd_test_ops;
+ /* Only enable write-protect test for anonymous test */
+ test_uffdio_wp = true;
} else if (!strcmp(type, "hugetlb")) {
test_type = TEST_HUGETLB;
uffd_test_ops = &hugetlb_uffd_test_ops;
diff --git a/tools/testing/selftests/vm/write_hugetlb_memory.sh b/tools/testing/selftests/vm/write_hugetlb_memory.sh
new file mode 100644
index 000000000000..d3d0d108924d
--- /dev/null
+++ b/tools/testing/selftests/vm/write_hugetlb_memory.sh
@@ -0,0 +1,23 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+
+set -e
+
+size=$1
+populate=$2
+write=$3
+cgroup=$4
+path=$5
+method=$6
+private=$7
+want_sleep=$8
+reserve=$9
+
+echo "Putting task in cgroup '$cgroup'"
+echo $$ > /dev/cgroup/memory/"$cgroup"/cgroup.procs
+
+echo "Method is $method"
+
+set +e
+./write_to_hugetlbfs -p "$path" -s "$size" "$write" "$populate" -m "$method" \
+ "$private" "$want_sleep" "$reserve"
diff --git a/tools/testing/selftests/vm/write_to_hugetlbfs.c b/tools/testing/selftests/vm/write_to_hugetlbfs.c
new file mode 100644
index 000000000000..6a2caba19ee1
--- /dev/null
+++ b/tools/testing/selftests/vm/write_to_hugetlbfs.c
@@ -0,0 +1,240 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * This program reserves and uses hugetlb memory, supporting a bunch of
+ * scenarios needed by the charged_reserved_hugetlb.sh test.
+ */
+
+#include <err.h>
+#include <errno.h>
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <sys/types.h>
+#include <sys/shm.h>
+#include <sys/stat.h>
+#include <sys/mman.h>
+
+/* Global definitions. */
+enum method {
+ HUGETLBFS,
+ MMAP_MAP_HUGETLB,
+ SHM,
+ MAX_METHOD
+};
+
+
+/* Global variables. */
+static const char *self;
+static char *shmaddr;
+static int shmid;
+
+/*
+ * Show usage and exit.
+ */
+static void exit_usage(void)
+{
+ printf("Usage: %s -p <path to hugetlbfs file> -s <size to map> "
+ "[-m <0=hugetlbfs | 1=mmap(MAP_HUGETLB)>] [-l] [-r] "
+ "[-o] [-w] [-n]\n",
+ self);
+ exit(EXIT_FAILURE);
+}
+
+void sig_handler(int signo)
+{
+ printf("Received %d.\n", signo);
+ if (signo == SIGINT) {
+ printf("Deleting the memory\n");
+ if (shmdt((const void *)shmaddr) != 0) {
+ perror("Detach failure");
+ shmctl(shmid, IPC_RMID, NULL);
+ exit(4);
+ }
+
+ shmctl(shmid, IPC_RMID, NULL);
+ printf("Done deleting the memory\n");
+ }
+ exit(2);
+}
+
+int main(int argc, char **argv)
+{
+ int fd = 0;
+ int key = 0;
+ int *ptr = NULL;
+ int c = 0;
+ int size = 0;
+ char path[256] = "";
+ enum method method = MAX_METHOD;
+ int want_sleep = 0, private = 0;
+ int populate = 0;
+ int write = 0;
+ int reserve = 1;
+
+ if (signal(SIGINT, sig_handler) == SIG_ERR)
+ err(1, "\ncan't catch SIGINT\n");
+
+ /* Parse command-line arguments. */
+ setvbuf(stdout, NULL, _IONBF, 0);
+ self = argv[0];
+
+ while ((c = getopt(argc, argv, "s:p:m:owlrn")) != -1) {
+ switch (c) {
+ case 's':
+ size = atoi(optarg);
+ break;
+ case 'p':
+ strncpy(path, optarg, sizeof(path));
+ break;
+ case 'm':
+ if (atoi(optarg) >= MAX_METHOD) {
+ errno = EINVAL;
+ perror("Invalid -m.");
+ exit_usage();
+ }
+ method = atoi(optarg);
+ break;
+ case 'o':
+ populate = 1;
+ break;
+ case 'w':
+ write = 1;
+ break;
+ case 'l':
+ want_sleep = 1;
+ break;
+ case 'r':
+ private
+ = 1;
+ break;
+ case 'n':
+ reserve = 0;
+ break;
+ default:
+ errno = EINVAL;
+ perror("Invalid arg");
+ exit_usage();
+ }
+ }
+
+ if (strncmp(path, "", sizeof(path)) != 0) {
+ printf("Writing to this path: %s\n", path);
+ } else {
+ errno = EINVAL;
+ perror("path not found");
+ exit_usage();
+ }
+
+ if (size != 0) {
+ printf("Writing this size: %d\n", size);
+ } else {
+ errno = EINVAL;
+ perror("size not found");
+ exit_usage();
+ }
+
+ if (!populate)
+ printf("Not populating.\n");
+ else
+ printf("Populating.\n");
+
+ if (!write)
+ printf("Not writing to memory.\n");
+
+ if (method == MAX_METHOD) {
+ errno = EINVAL;
+ perror("-m Invalid");
+ exit_usage();
+ } else
+ printf("Using method=%d\n", method);
+
+ if (!private)
+ printf("Shared mapping.\n");
+ else
+ printf("Private mapping.\n");
+
+ if (!reserve)
+ printf("NO_RESERVE mapping.\n");
+ else
+ printf("RESERVE mapping.\n");
+
+ switch (method) {
+ case HUGETLBFS:
+ printf("Allocating using HUGETLBFS.\n");
+ fd = open(path, O_CREAT | O_RDWR, 0777);
+ if (fd == -1)
+ err(1, "Failed to open file.");
+
+ ptr = mmap(NULL, size, PROT_READ | PROT_WRITE,
+ (private ? MAP_PRIVATE : MAP_SHARED) |
+ (populate ? MAP_POPULATE : 0) |
+ (reserve ? 0 : MAP_NORESERVE),
+ fd, 0);
+
+ if (ptr == MAP_FAILED) {
+ close(fd);
+ err(1, "Error mapping the file");
+ }
+ break;
+ case MMAP_MAP_HUGETLB:
+ printf("Allocating using MAP_HUGETLB.\n");
+ ptr = mmap(NULL, size, PROT_READ | PROT_WRITE,
+ (private ? (MAP_PRIVATE | MAP_ANONYMOUS) :
+ MAP_SHARED) |
+ MAP_HUGETLB | (populate ? MAP_POPULATE : 0) |
+ (reserve ? 0 : MAP_NORESERVE),
+ -1, 0);
+
+ if (ptr == MAP_FAILED)
+ err(1, "mmap");
+
+ printf("Returned address is %p\n", ptr);
+ break;
+ case SHM:
+ printf("Allocating using SHM.\n");
+ shmid = shmget(key, size,
+ SHM_HUGETLB | IPC_CREAT | SHM_R | SHM_W);
+ if (shmid < 0) {
+ shmid = shmget(++key, size,
+ SHM_HUGETLB | IPC_CREAT | SHM_R | SHM_W);
+ if (shmid < 0)
+ err(1, "shmget");
+ }
+ printf("shmid: 0x%x, shmget key:%d\n", shmid, key);
+
+ ptr = shmat(shmid, NULL, 0);
+ if (ptr == (int *)-1) {
+ perror("Shared memory attach failure");
+ shmctl(shmid, IPC_RMID, NULL);
+ exit(2);
+ }
+ printf("shmaddr: %p\n", ptr);
+
+ break;
+ default:
+ errno = EINVAL;
+ err(1, "Invalid method.");
+ }
+
+ if (write) {
+ printf("Writing to memory.\n");
+ memset(ptr, 1, size);
+ }
+
+ if (want_sleep) {
+ /* Signal to caller that we're done. */
+ printf("DONE\n");
+
+ /* Hold memory until external kill signal is delivered. */
+ while (1)
+ sleep(100);
+ }
+
+ if (method == HUGETLBFS)
+ close(fd);
+
+ return 0;
+}
diff --git a/tools/testing/selftests/watchdog/.gitignore b/tools/testing/selftests/watchdog/.gitignore
index 5aac51575c7e..61d7b89cdbca 100644
--- a/tools/testing/selftests/watchdog/.gitignore
+++ b/tools/testing/selftests/watchdog/.gitignore
@@ -1 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0-only
watchdog-test
diff --git a/tools/testing/selftests/wireguard/netns.sh b/tools/testing/selftests/wireguard/netns.sh
index 936e1ca9410e..17a1f53ceba0 100755
--- a/tools/testing/selftests/wireguard/netns.sh
+++ b/tools/testing/selftests/wireguard/netns.sh
@@ -48,8 +48,11 @@ cleanup() {
exec 2>/dev/null
printf "$orig_message_cost" > /proc/sys/net/core/message_cost
ip0 link del dev wg0
+ ip0 link del dev wg1
ip1 link del dev wg0
+ ip1 link del dev wg1
ip2 link del dev wg0
+ ip2 link del dev wg1
local to_kill="$(ip netns pids $netns0) $(ip netns pids $netns1) $(ip netns pids $netns2)"
[[ -n $to_kill ]] && kill $to_kill
pp ip netns del $netns1
@@ -77,18 +80,20 @@ ip0 link set wg0 netns $netns2
key1="$(pp wg genkey)"
key2="$(pp wg genkey)"
key3="$(pp wg genkey)"
+key4="$(pp wg genkey)"
pub1="$(pp wg pubkey <<<"$key1")"
pub2="$(pp wg pubkey <<<"$key2")"
pub3="$(pp wg pubkey <<<"$key3")"
+pub4="$(pp wg pubkey <<<"$key4")"
psk="$(pp wg genpsk)"
[[ -n $key1 && -n $key2 && -n $psk ]]
configure_peers() {
ip1 addr add 192.168.241.1/24 dev wg0
- ip1 addr add fd00::1/24 dev wg0
+ ip1 addr add fd00::1/112 dev wg0
ip2 addr add 192.168.241.2/24 dev wg0
- ip2 addr add fd00::2/24 dev wg0
+ ip2 addr add fd00::2/112 dev wg0
n1 wg set wg0 \
private-key <(echo "$key1") \
@@ -230,9 +235,38 @@ n1 ping -W 1 -c 1 192.168.241.2
n1 wg set wg0 private-key <(echo "$key3")
n2 wg set wg0 peer "$pub3" preshared-key <(echo "$psk") allowed-ips 192.168.241.1/32 peer "$pub1" remove
n1 ping -W 1 -c 1 192.168.241.2
+n2 wg set wg0 peer "$pub3" remove
+
+# Test that we can route wg through wg
+ip1 addr flush dev wg0
+ip2 addr flush dev wg0
+ip1 addr add fd00::5:1/112 dev wg0
+ip2 addr add fd00::5:2/112 dev wg0
+n1 wg set wg0 private-key <(echo "$key1") peer "$pub2" preshared-key <(echo "$psk") allowed-ips fd00::5:2/128 endpoint 127.0.0.1:2
+n2 wg set wg0 private-key <(echo "$key2") listen-port 2 peer "$pub1" preshared-key <(echo "$psk") allowed-ips fd00::5:1/128 endpoint 127.212.121.99:9998
+ip1 link add wg1 type wireguard
+ip2 link add wg1 type wireguard
+ip1 addr add 192.168.241.1/24 dev wg1
+ip1 addr add fd00::1/112 dev wg1
+ip2 addr add 192.168.241.2/24 dev wg1
+ip2 addr add fd00::2/112 dev wg1
+ip1 link set mtu 1340 up dev wg1
+ip2 link set mtu 1340 up dev wg1
+n1 wg set wg1 listen-port 5 private-key <(echo "$key3") peer "$pub4" allowed-ips 192.168.241.2/32,fd00::2/128 endpoint [fd00::5:2]:5
+n2 wg set wg1 listen-port 5 private-key <(echo "$key4") peer "$pub3" allowed-ips 192.168.241.1/32,fd00::1/128 endpoint [fd00::5:1]:5
+tests
+# Try to set up a routing loop between the two namespaces
+ip1 link set netns $netns0 dev wg1
+ip0 addr add 192.168.241.1/24 dev wg1
+ip0 link set up dev wg1
+n0 ping -W 1 -c 1 192.168.241.2
+n1 wg set wg0 peer "$pub2" endpoint 192.168.241.2:7
+ip2 link del wg0
+ip2 link del wg1
+! n0 ping -W 1 -c 10 -f 192.168.241.2 || false # Should not crash kernel
+ip0 link del wg1
ip1 link del wg0
-ip2 link del wg0
# Test using NAT. We now change the topology to this:
# ┌────────────────────────────────────────┐ ┌────────────────────────────────────────────────┐ ┌────────────────────────────────────────┐
@@ -282,6 +316,20 @@ pp sleep 3
n2 ping -W 1 -c 1 192.168.241.1
n1 wg set wg0 peer "$pub2" persistent-keepalive 0
+# Test that onion routing works, even when it loops
+n1 wg set wg0 peer "$pub3" allowed-ips 192.168.242.2/32 endpoint 192.168.241.2:5
+ip1 addr add 192.168.242.1/24 dev wg0
+ip2 link add wg1 type wireguard
+ip2 addr add 192.168.242.2/24 dev wg1
+n2 wg set wg1 private-key <(echo "$key3") listen-port 5 peer "$pub1" allowed-ips 192.168.242.1/32
+ip2 link set wg1 up
+n1 ping -W 1 -c 1 192.168.242.2
+ip2 link del wg1
+n1 wg set wg0 peer "$pub3" endpoint 192.168.242.2:5
+! n1 ping -W 1 -c 1 192.168.242.2 || false # Should not crash kernel
+n1 wg set wg0 peer "$pub3" remove
+ip1 addr del 192.168.242.1/24 dev wg0
+
# Do a wg-quick(8)-style policy routing for the default route, making sure vethc has a v6 address to tease out bugs.
ip1 -6 addr add fc00::9/96 dev vethc
ip1 -6 route add default via fc00::1
diff --git a/tools/testing/selftests/wireguard/qemu/.gitignore b/tools/testing/selftests/wireguard/qemu/.gitignore
index 415b542a9d59..bfa15e6feb2f 100644
--- a/tools/testing/selftests/wireguard/qemu/.gitignore
+++ b/tools/testing/selftests/wireguard/qemu/.gitignore
@@ -1,2 +1,3 @@
+# SPDX-License-Identifier: GPL-2.0-only
build/
distfiles/
diff --git a/tools/testing/selftests/wireguard/qemu/Makefile b/tools/testing/selftests/wireguard/qemu/Makefile
index 90598a425c18..4bdd6c1a19d3 100644
--- a/tools/testing/selftests/wireguard/qemu/Makefile
+++ b/tools/testing/selftests/wireguard/qemu/Makefile
@@ -44,7 +44,7 @@ endef
$(eval $(call tar_download,MUSL,musl,1.2.0,.tar.gz,https://musl.libc.org/releases/,c6de7b191139142d3f9a7b5b702c9cae1b5ee6e7f57e582da9328629408fd4e8))
$(eval $(call tar_download,IPERF,iperf,3.7,.tar.gz,https://downloads.es.net/pub/iperf/,d846040224317caf2f75c843d309a950a7db23f9b44b94688ccbe557d6d1710c))
$(eval $(call tar_download,BASH,bash,5.0,.tar.gz,https://ftp.gnu.org/gnu/bash/,b4a80f2ac66170b2913efbfb9f2594f1f76c7b1afd11f799e22035d63077fb4d))
-$(eval $(call tar_download,IPROUTE2,iproute2,5.4.0,.tar.xz,https://www.kernel.org/pub/linux/utils/net/iproute2/,fe97aa60a0d4c5ac830be18937e18dc3400ca713a33a89ad896ff1e3d46086ae))
+$(eval $(call tar_download,IPROUTE2,iproute2,5.6.0,.tar.xz,https://www.kernel.org/pub/linux/utils/net/iproute2/,1b5b0e25ce6e23da7526ea1da044e814ad85ba761b10dd29c2b027c056b04692))
$(eval $(call tar_download,IPTABLES,iptables,1.8.4,.tar.bz2,https://www.netfilter.org/projects/iptables/files/,993a3a5490a544c2cbf2ef15cf7e7ed21af1845baf228318d5c36ef8827e157c))
$(eval $(call tar_download,NMAP,nmap,7.80,.tar.bz2,https://nmap.org/dist/,fcfa5a0e42099e12e4bf7a68ebe6fde05553383a682e816a7ec9256ab4773faa))
$(eval $(call tar_download,IPUTILS,iputils,s20190709,.tar.gz,https://github.com/iputils/iputils/archive/s20190709.tar.gz/#,a15720dd741d7538dd2645f9f516d193636ae4300ff7dbc8bfca757bf166490a))
diff --git a/tools/testing/selftests/wireguard/qemu/arch/powerpc64le.config b/tools/testing/selftests/wireguard/qemu/arch/powerpc64le.config
index 990c510a9cfa..f52f1e2bc7f6 100644
--- a/tools/testing/selftests/wireguard/qemu/arch/powerpc64le.config
+++ b/tools/testing/selftests/wireguard/qemu/arch/powerpc64le.config
@@ -10,3 +10,4 @@ CONFIG_CMDLINE_BOOL=y
CONFIG_CMDLINE="console=hvc0 wg.success=hvc1"
CONFIG_SECTION_MISMATCH_WARN_ONLY=y
CONFIG_FRAME_WARN=1280
+CONFIG_THREAD_SHIFT=14
diff --git a/tools/testing/selftests/wireguard/qemu/debug.config b/tools/testing/selftests/wireguard/qemu/debug.config
index 5909e7ef2a5c..b50c2085c1ac 100644
--- a/tools/testing/selftests/wireguard/qemu/debug.config
+++ b/tools/testing/selftests/wireguard/qemu/debug.config
@@ -25,7 +25,6 @@ CONFIG_KASAN=y
CONFIG_KASAN_INLINE=y
CONFIG_UBSAN=y
CONFIG_UBSAN_SANITIZE_ALL=y
-CONFIG_UBSAN_NO_ALIGNMENT=y
CONFIG_UBSAN_NULL=y
CONFIG_DEBUG_KMEMLEAK=y
CONFIG_DEBUG_KMEMLEAK_EARLY_LOG_SIZE=8192
@@ -58,7 +57,6 @@ CONFIG_RCU_EQS_DEBUG=y
CONFIG_USER_STACKTRACE_SUPPORT=y
CONFIG_DEBUG_SG=y
CONFIG_DEBUG_NOTIFIERS=y
-CONFIG_DOUBLEFAULT=y
CONFIG_X86_DEBUG_FPU=y
CONFIG_DEBUG_SECTION_MISMATCH=y
CONFIG_DEBUG_PAGEALLOC=y
diff --git a/tools/testing/selftests/x86/.gitignore b/tools/testing/selftests/x86/.gitignore
index 7757f73ff9a3..1aaef5bf119a 100644
--- a/tools/testing/selftests/x86/.gitignore
+++ b/tools/testing/selftests/x86/.gitignore
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
*_32
*_64
single_step_syscall
@@ -11,5 +12,4 @@ ldt_gdt
iopl
mpx-mini-test
ioperm
-protection_keys
test_vdso
diff --git a/tools/testing/selftests/x86/Makefile b/tools/testing/selftests/x86/Makefile
index 5d49bfec1e9a..5f16821c7f63 100644
--- a/tools/testing/selftests/x86/Makefile
+++ b/tools/testing/selftests/x86/Makefile
@@ -12,7 +12,7 @@ CAN_BUILD_WITH_NOPIE := $(shell ./check_cc.sh $(CC) trivial_program.c -no-pie)
TARGETS_C_BOTHBITS := single_step_syscall sysret_ss_attrs syscall_nt test_mremap_vdso \
check_initial_reg_state sigreturn iopl ioperm \
- protection_keys test_vdso test_vsyscall mov_ss_trap \
+ test_vdso test_vsyscall mov_ss_trap \
syscall_arg_fault
TARGETS_C_32BIT_ONLY := entry_from_vm86 test_syscall_vdso unwind_vdso \
test_FCMOV test_FCOMI test_FISTTP \
diff --git a/tools/testing/selftests/x86/pkey-helpers.h b/tools/testing/selftests/x86/pkey-helpers.h
deleted file mode 100644
index 254e5436bdd9..000000000000
--- a/tools/testing/selftests/x86/pkey-helpers.h
+++ /dev/null
@@ -1,219 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _PKEYS_HELPER_H
-#define _PKEYS_HELPER_H
-#define _GNU_SOURCE
-#include <string.h>
-#include <stdarg.h>
-#include <stdio.h>
-#include <stdint.h>
-#include <stdbool.h>
-#include <signal.h>
-#include <assert.h>
-#include <stdlib.h>
-#include <ucontext.h>
-#include <sys/mman.h>
-
-#define NR_PKEYS 16
-#define PKRU_BITS_PER_PKEY 2
-
-#ifndef DEBUG_LEVEL
-#define DEBUG_LEVEL 0
-#endif
-#define DPRINT_IN_SIGNAL_BUF_SIZE 4096
-extern int dprint_in_signal;
-extern char dprint_in_signal_buffer[DPRINT_IN_SIGNAL_BUF_SIZE];
-static inline void sigsafe_printf(const char *format, ...)
-{
- va_list ap;
-
- if (!dprint_in_signal) {
- va_start(ap, format);
- vprintf(format, ap);
- va_end(ap);
- } else {
- int ret;
- /*
- * No printf() functions are signal-safe.
- * They deadlock easily. Write the format
- * string to get some output, even if
- * incomplete.
- */
- ret = write(1, format, strlen(format));
- if (ret < 0)
- exit(1);
- }
-}
-#define dprintf_level(level, args...) do { \
- if (level <= DEBUG_LEVEL) \
- sigsafe_printf(args); \
-} while (0)
-#define dprintf0(args...) dprintf_level(0, args)
-#define dprintf1(args...) dprintf_level(1, args)
-#define dprintf2(args...) dprintf_level(2, args)
-#define dprintf3(args...) dprintf_level(3, args)
-#define dprintf4(args...) dprintf_level(4, args)
-
-extern unsigned int shadow_pkru;
-static inline unsigned int __rdpkru(void)
-{
- unsigned int eax, edx;
- unsigned int ecx = 0;
- unsigned int pkru;
-
- asm volatile(".byte 0x0f,0x01,0xee\n\t"
- : "=a" (eax), "=d" (edx)
- : "c" (ecx));
- pkru = eax;
- return pkru;
-}
-
-static inline unsigned int _rdpkru(int line)
-{
- unsigned int pkru = __rdpkru();
-
- dprintf4("rdpkru(line=%d) pkru: %x shadow: %x\n",
- line, pkru, shadow_pkru);
- assert(pkru == shadow_pkru);
-
- return pkru;
-}
-
-#define rdpkru() _rdpkru(__LINE__)
-
-static inline void __wrpkru(unsigned int pkru)
-{
- unsigned int eax = pkru;
- unsigned int ecx = 0;
- unsigned int edx = 0;
-
- dprintf4("%s() changing %08x to %08x\n", __func__, __rdpkru(), pkru);
- asm volatile(".byte 0x0f,0x01,0xef\n\t"
- : : "a" (eax), "c" (ecx), "d" (edx));
- assert(pkru == __rdpkru());
-}
-
-static inline void wrpkru(unsigned int pkru)
-{
- dprintf4("%s() changing %08x to %08x\n", __func__, __rdpkru(), pkru);
- /* will do the shadow check for us: */
- rdpkru();
- __wrpkru(pkru);
- shadow_pkru = pkru;
- dprintf4("%s(%08x) pkru: %08x\n", __func__, pkru, __rdpkru());
-}
-
-/*
- * These are technically racy. since something could
- * change PKRU between the read and the write.
- */
-static inline void __pkey_access_allow(int pkey, int do_allow)
-{
- unsigned int pkru = rdpkru();
- int bit = pkey * 2;
-
- if (do_allow)
- pkru &= (1<<bit);
- else
- pkru |= (1<<bit);
-
- dprintf4("pkru now: %08x\n", rdpkru());
- wrpkru(pkru);
-}
-
-static inline void __pkey_write_allow(int pkey, int do_allow_write)
-{
- long pkru = rdpkru();
- int bit = pkey * 2 + 1;
-
- if (do_allow_write)
- pkru &= (1<<bit);
- else
- pkru |= (1<<bit);
-
- wrpkru(pkru);
- dprintf4("pkru now: %08x\n", rdpkru());
-}
-
-#define PROT_PKEY0 0x10 /* protection key value (bit 0) */
-#define PROT_PKEY1 0x20 /* protection key value (bit 1) */
-#define PROT_PKEY2 0x40 /* protection key value (bit 2) */
-#define PROT_PKEY3 0x80 /* protection key value (bit 3) */
-
-#define PAGE_SIZE 4096
-#define MB (1<<20)
-
-static inline void __cpuid(unsigned int *eax, unsigned int *ebx,
- unsigned int *ecx, unsigned int *edx)
-{
- /* ecx is often an input as well as an output. */
- asm volatile(
- "cpuid;"
- : "=a" (*eax),
- "=b" (*ebx),
- "=c" (*ecx),
- "=d" (*edx)
- : "0" (*eax), "2" (*ecx));
-}
-
-/* Intel-defined CPU features, CPUID level 0x00000007:0 (ecx) */
-#define X86_FEATURE_PKU (1<<3) /* Protection Keys for Userspace */
-#define X86_FEATURE_OSPKE (1<<4) /* OS Protection Keys Enable */
-
-static inline int cpu_has_pku(void)
-{
- unsigned int eax;
- unsigned int ebx;
- unsigned int ecx;
- unsigned int edx;
-
- eax = 0x7;
- ecx = 0x0;
- __cpuid(&eax, &ebx, &ecx, &edx);
-
- if (!(ecx & X86_FEATURE_PKU)) {
- dprintf2("cpu does not have PKU\n");
- return 0;
- }
- if (!(ecx & X86_FEATURE_OSPKE)) {
- dprintf2("cpu does not have OSPKE\n");
- return 0;
- }
- return 1;
-}
-
-#define XSTATE_PKRU_BIT (9)
-#define XSTATE_PKRU 0x200
-
-int pkru_xstate_offset(void)
-{
- unsigned int eax;
- unsigned int ebx;
- unsigned int ecx;
- unsigned int edx;
- int xstate_offset;
- int xstate_size;
- unsigned long XSTATE_CPUID = 0xd;
- int leaf;
-
- /* assume that XSTATE_PKRU is set in XCR0 */
- leaf = XSTATE_PKRU_BIT;
- {
- eax = XSTATE_CPUID;
- ecx = leaf;
- __cpuid(&eax, &ebx, &ecx, &edx);
-
- if (leaf == XSTATE_PKRU_BIT) {
- xstate_offset = ebx;
- xstate_size = eax;
- }
- }
-
- if (xstate_size == 0) {
- printf("could not find size/offset of PKRU in xsave state\n");
- return 0;
- }
-
- return xstate_offset;
-}
-
-#endif /* _PKEYS_HELPER_H */
diff --git a/tools/testing/selftests/x86/ptrace_syscall.c b/tools/testing/selftests/x86/ptrace_syscall.c
index 6f22238f3217..12aaa063196e 100644
--- a/tools/testing/selftests/x86/ptrace_syscall.c
+++ b/tools/testing/selftests/x86/ptrace_syscall.c
@@ -414,8 +414,12 @@ int main()
#if defined(__i386__) && (!defined(__GLIBC__) || __GLIBC__ > 2 || __GLIBC_MINOR__ >= 16)
vsyscall32 = (void *)getauxval(AT_SYSINFO);
- printf("[RUN]\tCheck AT_SYSINFO return regs\n");
- test_sys32_regs(do_full_vsyscall32);
+ if (vsyscall32) {
+ printf("[RUN]\tCheck AT_SYSINFO return regs\n");
+ test_sys32_regs(do_full_vsyscall32);
+ } else {
+ printf("[SKIP]\tAT_SYSINFO is not available\n");
+ }
#endif
test_ptrace_syscall_restart();
diff --git a/tools/testing/selftests/x86/test_vdso.c b/tools/testing/selftests/x86/test_vdso.c
index 35edd61d1663..42052db0f870 100644
--- a/tools/testing/selftests/x86/test_vdso.c
+++ b/tools/testing/selftests/x86/test_vdso.c
@@ -259,6 +259,11 @@ static void test_one_clock_gettime(int clock, const char *name)
static void test_clock_gettime(void)
{
+ if (!vdso_clock_gettime) {
+ printf("[SKIP]\tNo vDSO, so skipping clock_gettime() tests\n");
+ return;
+ }
+
for (int clock = 0; clock < sizeof(clocknames) / sizeof(clocknames[0]);
clock++) {
test_one_clock_gettime(clock, clocknames[clock]);
diff --git a/tools/testing/selftests/x86/vdso_restorer.c b/tools/testing/selftests/x86/vdso_restorer.c
index 29a5c94c4b50..fe99f2434155 100644
--- a/tools/testing/selftests/x86/vdso_restorer.c
+++ b/tools/testing/selftests/x86/vdso_restorer.c
@@ -15,6 +15,7 @@
#include <err.h>
#include <stdio.h>
+#include <dlfcn.h>
#include <string.h>
#include <signal.h>
#include <unistd.h>
@@ -46,11 +47,23 @@ int main()
int nerrs = 0;
struct real_sigaction sa;
+ void *vdso = dlopen("linux-vdso.so.1",
+ RTLD_LAZY | RTLD_LOCAL | RTLD_NOLOAD);
+ if (!vdso)
+ vdso = dlopen("linux-gate.so.1",
+ RTLD_LAZY | RTLD_LOCAL | RTLD_NOLOAD);
+ if (!vdso) {
+ printf("[SKIP]\tFailed to find vDSO. Tests are not expected to work.\n");
+ return 0;
+ }
+
memset(&sa, 0, sizeof(sa));
sa.handler = handler_with_siginfo;
sa.flags = SA_SIGINFO;
sa.restorer = NULL; /* request kernel-provided restorer */
+ printf("[RUN]\tRaise a signal, SA_SIGINFO, sa.restorer == NULL\n");
+
if (syscall(SYS_rt_sigaction, SIGUSR1, &sa, NULL, 8) != 0)
err(1, "raw rt_sigaction syscall");
@@ -63,6 +76,8 @@ int main()
nerrs++;
}
+ printf("[RUN]\tRaise a signal, !SA_SIGINFO, sa.restorer == NULL\n");
+
sa.flags = 0;
sa.handler = handler_without_siginfo;
if (syscall(SYS_sigaction, SIGUSR1, &sa, 0) != 0)
diff --git a/tools/testing/vsock/.gitignore b/tools/testing/vsock/.gitignore
index 7f7a2ccc30c4..87ca2731cff9 100644
--- a/tools/testing/vsock/.gitignore
+++ b/tools/testing/vsock/.gitignore
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
*.d
vsock_test
vsock_diag_test
diff --git a/tools/testing/vsock/vsock_test.c b/tools/testing/vsock/vsock_test.c
index 1d8b93f1af31..5a4fb80fa832 100644
--- a/tools/testing/vsock/vsock_test.c
+++ b/tools/testing/vsock/vsock_test.c
@@ -55,6 +55,78 @@ static void test_stream_connection_reset(const struct test_opts *opts)
close(fd);
}
+static void test_stream_bind_only_client(const struct test_opts *opts)
+{
+ union {
+ struct sockaddr sa;
+ struct sockaddr_vm svm;
+ } addr = {
+ .svm = {
+ .svm_family = AF_VSOCK,
+ .svm_port = 1234,
+ .svm_cid = opts->peer_cid,
+ },
+ };
+ int ret;
+ int fd;
+
+ /* Wait for the server to be ready */
+ control_expectln("BIND");
+
+ fd = socket(AF_VSOCK, SOCK_STREAM, 0);
+
+ timeout_begin(TIMEOUT);
+ do {
+ ret = connect(fd, &addr.sa, sizeof(addr.svm));
+ timeout_check("connect");
+ } while (ret < 0 && errno == EINTR);
+ timeout_end();
+
+ if (ret != -1) {
+ fprintf(stderr, "expected connect(2) failure, got %d\n", ret);
+ exit(EXIT_FAILURE);
+ }
+ if (errno != ECONNRESET) {
+ fprintf(stderr, "unexpected connect(2) errno %d\n", errno);
+ exit(EXIT_FAILURE);
+ }
+
+ /* Notify the server that the client has finished */
+ control_writeln("DONE");
+
+ close(fd);
+}
+
+static void test_stream_bind_only_server(const struct test_opts *opts)
+{
+ union {
+ struct sockaddr sa;
+ struct sockaddr_vm svm;
+ } addr = {
+ .svm = {
+ .svm_family = AF_VSOCK,
+ .svm_port = 1234,
+ .svm_cid = VMADDR_CID_ANY,
+ },
+ };
+ int fd;
+
+ fd = socket(AF_VSOCK, SOCK_STREAM, 0);
+
+ if (bind(fd, &addr.sa, sizeof(addr.svm)) < 0) {
+ perror("bind");
+ exit(EXIT_FAILURE);
+ }
+
+ /* Notify the client that the server is ready */
+ control_writeln("BIND");
+
+ /* Wait for the client to finish */
+ control_expectln("DONE");
+
+ close(fd);
+}
+
static void test_stream_client_close_client(const struct test_opts *opts)
{
int fd;
@@ -213,6 +285,11 @@ static struct test_case test_cases[] = {
.run_client = test_stream_connection_reset,
},
{
+ .name = "SOCK_STREAM bind only",
+ .run_client = test_stream_bind_only_client,
+ .run_server = test_stream_bind_only_server,
+ },
+ {
.name = "SOCK_STREAM client close",
.run_client = test_stream_client_close_client,
.run_server = test_stream_client_close_server,
diff --git a/tools/thermal/tmon/.gitignore b/tools/thermal/tmon/.gitignore
index 06e96be65276..d9e97a0308f5 100644
--- a/tools/thermal/tmon/.gitignore
+++ b/tools/thermal/tmon/.gitignore
@@ -1 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0-only
/tmon
diff --git a/tools/thermal/tmon/tmon.c b/tools/thermal/tmon/tmon.c
index 83ec6e482f12..7eb3216a27f4 100644
--- a/tools/thermal/tmon/tmon.c
+++ b/tools/thermal/tmon/tmon.c
@@ -46,7 +46,7 @@ static void start_daemon_mode(void);
pthread_t event_tid;
pthread_mutex_t input_lock;
-void usage()
+void usage(void)
{
printf("Usage: tmon [OPTION...]\n");
printf(" -c, --control cooling device in control\n");
@@ -62,7 +62,7 @@ void usage()
exit(0);
}
-void version()
+void version(void)
{
printf("TMON version %s\n", VERSION);
exit(EXIT_SUCCESS);
@@ -70,7 +70,6 @@ void version()
static void tmon_cleanup(void)
{
-
syslog(LOG_INFO, "TMON exit cleanup\n");
fflush(stdout);
refresh();
@@ -96,7 +95,6 @@ static void tmon_cleanup(void)
exit(1);
}
-
static void tmon_sig_handler(int sig)
{
syslog(LOG_INFO, "TMON caught signal %d\n", sig);
@@ -120,7 +118,6 @@ static void tmon_sig_handler(int sig)
tmon_exit = true;
}
-
static void start_syslog(void)
{
if (debug_on)
@@ -167,7 +164,6 @@ static void prepare_logging(void)
return;
}
-
fprintf(tmon_log, "#----------- THERMAL SYSTEM CONFIG -------------\n");
for (i = 0; i < ptdata.nr_tz_sensor; i++) {
char binding_str[33]; /* size of long + 1 */
@@ -175,7 +171,7 @@ static void prepare_logging(void)
memset(binding_str, 0, sizeof(binding_str));
for (j = 0; j < 32; j++)
- binding_str[j] = (ptdata.tzi[i].cdev_binding & 1<<j) ?
+ binding_str[j] = (ptdata.tzi[i].cdev_binding & (1 << j)) ?
'1' : '0';
fprintf(tmon_log, "#thermal zone %s%02d cdevs binding: %32s\n",
@@ -187,7 +183,6 @@ static void prepare_logging(void)
trip_type_name[ptdata.tzi[i].tp[j].type],
ptdata.tzi[i].tp[j].temp);
}
-
}
for (i = 0; i < ptdata.nr_cooling_dev; i++)
@@ -219,7 +214,6 @@ static struct option opts[] = {
{ 0, 0, NULL, 0 }
};
-
int main(int argc, char **argv)
{
int err = 0;
@@ -283,7 +277,7 @@ int main(int argc, char **argv)
if (signal(SIGINT, tmon_sig_handler) == SIG_ERR)
syslog(LOG_DEBUG, "Cannot handle SIGINT\n");
if (signal(SIGTERM, tmon_sig_handler) == SIG_ERR)
- syslog(LOG_DEBUG, "Cannot handle SIGINT\n");
+ syslog(LOG_DEBUG, "Cannot handle SIGTERM\n");
if (probe_thermal_sysfs()) {
pthread_mutex_destroy(&input_lock);
@@ -328,8 +322,7 @@ int main(int argc, char **argv)
show_cooling_device();
}
time_elapsed += ticktime;
- controller_handler(trec[0].temp[target_tz_index] / 1000,
- &yk);
+ controller_handler(trec[0].temp[target_tz_index] / 1000, &yk);
trec[0].pid_out_pct = yk;
if (!dialogue_on)
show_control_w();
@@ -340,14 +333,15 @@ int main(int argc, char **argv)
return 0;
}
-static void start_daemon_mode()
+static void start_daemon_mode(void)
{
daemon_mode = 1;
/* fork */
pid_t sid, pid = fork();
- if (pid < 0) {
+
+ if (pid < 0)
exit(EXIT_FAILURE);
- } else if (pid > 0)
+ else if (pid > 0)
/* kill parent */
exit(EXIT_SUCCESS);
@@ -366,11 +360,9 @@ static void start_daemon_mode()
if ((chdir("/")) < 0)
exit(EXIT_FAILURE);
-
sleep(10);
close(STDIN_FILENO);
close(STDOUT_FILENO);
close(STDERR_FILENO);
-
}
diff --git a/tools/usb/.gitignore b/tools/usb/.gitignore
index 1b7448981435..fce1ef5a9267 100644
--- a/tools/usb/.gitignore
+++ b/tools/usb/.gitignore
@@ -1,2 +1,3 @@
+# SPDX-License-Identifier: GPL-2.0-only
ffs-test
testusb
diff --git a/tools/usb/usbip/.gitignore b/tools/usb/usbip/.gitignore
index 03b892c8bd8c..597361a96dbb 100644
--- a/tools/usb/usbip/.gitignore
+++ b/tools/usb/usbip/.gitignore
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
Makefile
Makefile.in
aclocal.m4
diff --git a/tools/virtio/.gitignore b/tools/virtio/.gitignore
index 1cfbb0157a46..075588c4da08 100644
--- a/tools/virtio/.gitignore
+++ b/tools/virtio/.gitignore
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
*.d
virtio_test
vringh_test
diff --git a/tools/virtio/Makefile b/tools/virtio/Makefile
index 8e2a908115c2..b587b9a7a124 100644
--- a/tools/virtio/Makefile
+++ b/tools/virtio/Makefile
@@ -4,11 +4,37 @@ test: virtio_test vringh_test
virtio_test: virtio_ring.o virtio_test.o
vringh_test: vringh_test.o vringh.o virtio_ring.o
-CFLAGS += -g -O2 -Werror -Wall -I. -I../include/ -I ../../usr/include/ -Wno-pointer-sign -fno-strict-overflow -fno-strict-aliasing -fno-common -MMD -U_FORTIFY_SOURCE
+CFLAGS += -g -O2 -Werror -Wall -I. -I../include/ -I ../../usr/include/ -Wno-pointer-sign -fno-strict-overflow -fno-strict-aliasing -fno-common -MMD -U_FORTIFY_SOURCE -include ../../include/linux/kconfig.h
vpath %.c ../../drivers/virtio ../../drivers/vhost
mod:
${MAKE} -C `pwd`/../.. M=`pwd`/vhost_test V=${V}
-.PHONY: all test mod clean
+
+#oot: build vhost as an out of tree module for a distro kernel
+#no effort is taken to make it actually build or work, but tends to mostly work
+#if the distro kernel is very close to upstream
+#unsupported! this is a development tool only, don't use the
+#resulting modules in production!
+OOT_KSRC=/lib/modules/$$(uname -r)/build
+OOT_VHOST=`pwd`/../../drivers/vhost
+#Everyone depends on vhost
+#Tweak the below to enable more modules
+OOT_CONFIGS=\
+ CONFIG_VHOST=m \
+ CONFIG_VHOST_NET=n \
+ CONFIG_VHOST_SCSI=n \
+ CONFIG_VHOST_VSOCK=n \
+ CONFIG_VHOST_RING=n
+OOT_BUILD=KCFLAGS="-I "${OOT_VHOST} ${MAKE} -C ${OOT_KSRC} V=${V}
+oot-build:
+ echo "UNSUPPORTED! Don't use the resulting modules in production!"
+ ${OOT_BUILD} M=`pwd`/vhost_test
+ ${OOT_BUILD} M=${OOT_VHOST} ${OOT_CONFIGS}
+
+oot-clean: oot-build
+oot: oot-build
+oot-clean: OOT_BUILD+=clean
+
+.PHONY: all test mod clean vhost oot oot-clean oot-build
clean:
${RM} *.o vringh_test virtio_test vhost_test/*.o vhost_test/.*.cmd \
vhost_test/Module.symvers vhost_test/modules.order *.d
diff --git a/tools/virtio/asm/barrier.h b/tools/virtio/asm/barrier.h
index d0351f83aebe..04d563fc9b95 100644
--- a/tools/virtio/asm/barrier.h
+++ b/tools/virtio/asm/barrier.h
@@ -1,4 +1,5 @@
/* SPDX-License-Identifier: GPL-2.0 */
+#include <stdlib.h>
#if defined(__i386__) || defined(__x86_64__)
#define barrier() asm volatile("" ::: "memory")
#define virt_mb() __sync_synchronize()
diff --git a/tools/virtio/generated/autoconf.h b/tools/virtio/generated/autoconf.h
new file mode 100644
index 000000000000..e69de29bb2d1
--- /dev/null
+++ b/tools/virtio/generated/autoconf.h
diff --git a/tools/virtio/linux/compiler.h b/tools/virtio/linux/compiler.h
index 903dc9c4bd11..2c51bccb97bb 100644
--- a/tools/virtio/linux/compiler.h
+++ b/tools/virtio/linux/compiler.h
@@ -7,4 +7,5 @@
#define READ_ONCE(var) (*((volatile typeof(var) *)(&(var))))
+#define __aligned(x) __attribute((__aligned__(x)))
#endif
diff --git a/tools/vm/.gitignore b/tools/vm/.gitignore
index 44f095fa2604..79bb92ae1bb3 100644
--- a/tools/vm/.gitignore
+++ b/tools/vm/.gitignore
@@ -1,2 +1,3 @@
+# SPDX-License-Identifier: GPL-2.0-only
slabinfo
page-types
diff --git a/tools/vm/Makefile b/tools/vm/Makefile
index 20f6cf04377f..9860622cbb15 100644
--- a/tools/vm/Makefile
+++ b/tools/vm/Makefile
@@ -1,6 +1,8 @@
# SPDX-License-Identifier: GPL-2.0
# Makefile for vm tools
#
+include ../scripts/Makefile.include
+
TARGETS=page-types slabinfo page_owner_sort
LIB_DIR = ../lib/api
diff --git a/tools/vm/page_owner_sort.c b/tools/vm/page_owner_sort.c
index d3a8755c039c..85eb65ea16d3 100644
--- a/tools/vm/page_owner_sort.c
+++ b/tools/vm/page_owner_sort.c
@@ -4,8 +4,7 @@
*
* Example use:
* cat /sys/kernel/debug/page_owner > page_owner_full.txt
- * grep -v ^PFN page_owner_full.txt > page_owner.txt
- * ./page_owner_sort page_owner.txt sorted_page_owner.txt
+ * ./page_owner_sort page_owner_full.txt sorted_page_owner.txt
*
* See Documentation/vm/page_owner.rst
*/
@@ -38,6 +37,8 @@ int read_block(char *buf, int buf_size, FILE *fin)
while (buf_end - curr > 1 && fgets(curr, buf_end - curr, fin)) {
if (*curr == '\n') /* empty line */
return curr - buf;
+ if (!strncmp(curr, "PFN", 3))
+ continue;
curr += strlen(curr);
}