diff options
Diffstat (limited to 'tools')
131 files changed, 3246 insertions, 451 deletions
diff --git a/tools/arch/arm64/include/asm/cputype.h b/tools/arch/arm64/include/asm/cputype.h index 5fd7caea4419..5a7dfeb8e8eb 100644 --- a/tools/arch/arm64/include/asm/cputype.h +++ b/tools/arch/arm64/include/asm/cputype.h @@ -143,6 +143,7 @@ #define APPLE_CPU_PART_M2_AVALANCHE_MAX 0x039 #define AMPERE_CPU_PART_AMPERE1 0xAC3 +#define AMPERE_CPU_PART_AMPERE1A 0xAC4 #define MICROSOFT_CPU_PART_AZURE_COBALT_100 0xD49 /* Based on r0p0 of ARM Neoverse N2 */ @@ -212,6 +213,7 @@ #define MIDR_APPLE_M2_BLIZZARD_MAX MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M2_BLIZZARD_MAX) #define MIDR_APPLE_M2_AVALANCHE_MAX MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M2_AVALANCHE_MAX) #define MIDR_AMPERE1 MIDR_CPU_MODEL(ARM_CPU_IMP_AMPERE, AMPERE_CPU_PART_AMPERE1) +#define MIDR_AMPERE1A MIDR_CPU_MODEL(ARM_CPU_IMP_AMPERE, AMPERE_CPU_PART_AMPERE1A) #define MIDR_MICROSOFT_AZURE_COBALT_100 MIDR_CPU_MODEL(ARM_CPU_IMP_MICROSOFT, MICROSOFT_CPU_PART_AZURE_COBALT_100) /* Fujitsu Erratum 010001 affects A64FX 1.0 and 1.1, (v0r0 and v1r0) */ diff --git a/tools/arch/arm64/vdso b/tools/arch/arm64/vdso deleted file mode 120000 index 233c7a26f6e5..000000000000 --- a/tools/arch/arm64/vdso +++ /dev/null @@ -1 +0,0 @@ -../../../arch/arm64/kernel/vdso
\ No newline at end of file diff --git a/tools/arch/loongarch/vdso b/tools/arch/loongarch/vdso deleted file mode 120000 index ebda43a82db7..000000000000 --- a/tools/arch/loongarch/vdso +++ /dev/null @@ -1 +0,0 @@ -../../../arch/loongarch/vdso
\ No newline at end of file diff --git a/tools/arch/powerpc/vdso b/tools/arch/powerpc/vdso deleted file mode 120000 index 4e676d1d1cb4..000000000000 --- a/tools/arch/powerpc/vdso +++ /dev/null @@ -1 +0,0 @@ -../../../arch/powerpc/kernel/vdso
\ No newline at end of file diff --git a/tools/arch/s390/vdso b/tools/arch/s390/vdso deleted file mode 120000 index 6cf4c1cebdcd..000000000000 --- a/tools/arch/s390/vdso +++ /dev/null @@ -1 +0,0 @@ -../../../arch/s390/kernel/vdso64
\ No newline at end of file diff --git a/tools/arch/x86/include/asm/msr-index.h b/tools/arch/x86/include/asm/msr-index.h index 82c6a4d350e0..a7c06a46fb76 100644 --- a/tools/arch/x86/include/asm/msr-index.h +++ b/tools/arch/x86/include/asm/msr-index.h @@ -247,6 +247,8 @@ #define MSR_INTEGRITY_CAPS_ARRAY_BIST BIT(MSR_INTEGRITY_CAPS_ARRAY_BIST_BIT) #define MSR_INTEGRITY_CAPS_PERIODIC_BIST_BIT 4 #define MSR_INTEGRITY_CAPS_PERIODIC_BIST BIT(MSR_INTEGRITY_CAPS_PERIODIC_BIST_BIT) +#define MSR_INTEGRITY_CAPS_SBAF_BIT 8 +#define MSR_INTEGRITY_CAPS_SBAF BIT(MSR_INTEGRITY_CAPS_SBAF_BIT) #define MSR_INTEGRITY_CAPS_SAF_GEN_MASK GENMASK_ULL(10, 9) #define MSR_LBR_NHM_FROM 0x00000680 diff --git a/tools/arch/x86/lib/insn.c b/tools/arch/x86/lib/insn.c index a43b37346a22..ab5cdc3337da 100644 --- a/tools/arch/x86/lib/insn.c +++ b/tools/arch/x86/lib/insn.c @@ -13,7 +13,7 @@ #endif #include "../include/asm/inat.h" /* __ignore_sync_check__ */ #include "../include/asm/insn.h" /* __ignore_sync_check__ */ -#include "../include/asm-generic/unaligned.h" /* __ignore_sync_check__ */ +#include "../include/linux/unaligned.h" /* __ignore_sync_check__ */ #include <linux/errno.h> #include <linux/kconfig.h> diff --git a/tools/arch/x86/vdso b/tools/arch/x86/vdso deleted file mode 120000 index 7eb962fd3454..000000000000 --- a/tools/arch/x86/vdso +++ /dev/null @@ -1 +0,0 @@ -../../../arch/x86/entry/vdso/
\ No newline at end of file diff --git a/tools/build/feature/Makefile b/tools/build/feature/Makefile index 5938cf799dc6..1658596188bf 100644 --- a/tools/build/feature/Makefile +++ b/tools/build/feature/Makefile @@ -172,7 +172,7 @@ DWARFLIBS := -ldw ifeq ($(findstring -static,${LDFLAGS}),-static) DWARFLIBS += -lelf -lz -llzma -lbz2 -lzstd - LIBDW_VERSION := $(shell $(PKG_CONFIG) --modversion libdw) + LIBDW_VERSION := $(shell $(PKG_CONFIG) --modversion libdw).0.0 LIBDW_VERSION_1 := $(word 1, $(subst ., ,$(LIBDW_VERSION))) LIBDW_VERSION_2 := $(word 2, $(subst ., ,$(LIBDW_VERSION))) @@ -181,6 +181,9 @@ ifeq ($(findstring -static,${LDFLAGS}),-static) ifeq ($(shell test $(LIBDW_VERSION_2) -lt 177; echo $$?),0) DWARFLIBS += -lebl endif + + # Must put -ldl after -lebl for dependency + DWARFLIBS += -ldl endif $(OUTPUT)test-dwarf.bin: diff --git a/tools/iio/Makefile b/tools/iio/Makefile index fa720f062229..3bcce0b7d10f 100644 --- a/tools/iio/Makefile +++ b/tools/iio/Makefile @@ -58,7 +58,7 @@ $(OUTPUT)iio_generic_buffer: $(IIO_GENERIC_BUFFER_IN) clean: rm -f $(ALL_PROGRAMS) rm -rf $(OUTPUT)include/linux/iio - find $(or $(OUTPUT),.) -name '*.o' -delete -o -name '\.*.d' -delete + find $(or $(OUTPUT),.) -name '*.o' -delete -o -name '\.*.d' -delete -o -name '\.*.cmd' -delete install: $(ALL_PROGRAMS) install -d -m 755 $(DESTDIR)$(bindir); \ diff --git a/tools/iio/iio_generic_buffer.c b/tools/iio/iio_generic_buffer.c index 0d0a7a19d6f9..9ef5ee087eda 100644 --- a/tools/iio/iio_generic_buffer.c +++ b/tools/iio/iio_generic_buffer.c @@ -498,6 +498,10 @@ int main(int argc, char **argv) return -ENOMEM; } trigger_name = malloc(IIO_MAX_NAME_LENGTH); + if (!trigger_name) { + ret = -ENOMEM; + goto error; + } ret = read_sysfs_string("name", trig_dev_name, trigger_name); free(trig_dev_name); if (ret < 0) { diff --git a/tools/include/linux/compiler.h b/tools/include/linux/compiler.h index 4366da278033..9c05a59f0184 100644 --- a/tools/include/linux/compiler.h +++ b/tools/include/linux/compiler.h @@ -128,10 +128,6 @@ # define unlikely(x) __builtin_expect(!!(x), 0) #endif -#ifndef __init -# define __init -#endif - #include <linux/types.h> /* diff --git a/tools/testing/memblock/linux/init.h b/tools/include/linux/init.h index 828e0ee0bc6c..51b5cde28639 100644 --- a/tools/testing/memblock/linux/init.h +++ b/tools/include/linux/init.h @@ -1,10 +1,16 @@ /* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _LINUX_INIT_H -#define _LINUX_INIT_H +#ifndef _TOOLS_LINUX_INIT_H_ +#define _TOOLS_LINUX_INIT_H_ #include <linux/compiler.h> -#include <asm/export.h> -#include <linux/memory_hotplug.h> + +#ifndef __init +# define __init +#endif + +#ifndef __exit +# define __exit +#endif #define __section(section) __attribute__((__section__(section))) @@ -28,7 +34,10 @@ struct obs_kernel_param { __aligned(__alignof__(struct obs_kernel_param)) = \ { __setup_str_##unique_id, fn, early } +#define __setup(str, fn) \ + __setup_param(str, fn, fn, 0) + #define early_param(str, fn) \ __setup_param(str, fn, fn, 1) -#endif +#endif /* _TOOLS_LINUX_INIT_H_ */ diff --git a/tools/include/linux/linkage.h b/tools/include/linux/linkage.h index a48ff086899c..7baaa5898ca2 100644 --- a/tools/include/linux/linkage.h +++ b/tools/include/linux/linkage.h @@ -1,8 +1,12 @@ #ifndef _TOOLS_INCLUDE_LINUX_LINKAGE_H #define _TOOLS_INCLUDE_LINUX_LINKAGE_H -#define SYM_FUNC_START(x) .globl x; x: +#include <linux/export.h> +#define SYM_FUNC_START(x) .globl x; x: #define SYM_FUNC_END(x) +#define SYM_DATA_START(x) .globl x; x: +#define SYM_DATA_START_LOCAL(x) x: +#define SYM_DATA_END(x) #endif /* _TOOLS_INCLUDE_LINUX_LINKAGE_H */ diff --git a/tools/include/linux/mm.h b/tools/include/linux/mm.h index cad4f2927983..677c37e4a18c 100644 --- a/tools/include/linux/mm.h +++ b/tools/include/linux/mm.h @@ -25,6 +25,12 @@ static inline void *phys_to_virt(unsigned long address) return __va(address); } +#define virt_to_phys virt_to_phys +static inline phys_addr_t virt_to_phys(volatile void *address) +{ + return (phys_addr_t)address; +} + void reserve_bootmem_region(phys_addr_t start, phys_addr_t end, int nid); static inline void totalram_pages_inc(void) diff --git a/tools/include/linux/pfn.h b/tools/include/linux/pfn.h index 7512a58189eb..f77a30d70152 100644 --- a/tools/include/linux/pfn.h +++ b/tools/include/linux/pfn.h @@ -7,4 +7,5 @@ #define PFN_UP(x) (((x) + PAGE_SIZE - 1) >> PAGE_SHIFT) #define PFN_DOWN(x) ((x) >> PAGE_SHIFT) #define PFN_PHYS(x) ((phys_addr_t)(x) << PAGE_SHIFT) +#define PHYS_PFN(x) ((unsigned long)((x) >> PAGE_SHIFT)) #endif diff --git a/tools/include/linux/string.h b/tools/include/linux/string.h index 0acb1fc14e19..8499f509f03e 100644 --- a/tools/include/linux/string.h +++ b/tools/include/linux/string.h @@ -12,6 +12,8 @@ void argv_free(char **argv); int strtobool(const char *s, bool *res); +#define strscpy strcpy + /* * glibc based builds needs the extern while uClibc doesn't. * However uClibc headers also define __GLIBC__ hence the hack below @@ -49,4 +51,5 @@ extern char *strim(char *); extern void remove_spaces(char *s); extern void *memchr_inv(const void *start, int c, size_t bytes); +extern unsigned long long memparse(const char *ptr, char **retptr); #endif /* _TOOLS_LINUX_STRING_H_ */ diff --git a/tools/include/asm-generic/unaligned.h b/tools/include/linux/unaligned.h index cdd2fd078027..bc0633bc4650 100644 --- a/tools/include/asm-generic/unaligned.h +++ b/tools/include/linux/unaligned.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0 */ -#ifndef __ASM_GENERIC_UNALIGNED_H -#define __ASM_GENERIC_UNALIGNED_H +#ifndef __LINUX_UNALIGNED_H +#define __LINUX_UNALIGNED_H /* * This is the most generic implementation of unaligned accesses @@ -154,4 +154,4 @@ static inline u64 get_unaligned_be48(const void *p) } #pragma GCC diagnostic pop -#endif /* __ASM_GENERIC_UNALIGNED_H */ +#endif /* __LINUX_UNALIGNED_H */ diff --git a/tools/include/uapi/linux/in.h b/tools/include/uapi/linux/in.h index d358add1611c..5d32d53508d9 100644 --- a/tools/include/uapi/linux/in.h +++ b/tools/include/uapi/linux/in.h @@ -141,7 +141,7 @@ struct in_addr { */ #define IP_PMTUDISC_INTERFACE 4 /* weaker version of IP_PMTUDISC_INTERFACE, which allows packets to get - * fragmented if they exeed the interface mtu + * fragmented if they exceed the interface mtu */ #define IP_PMTUDISC_OMIT 5 diff --git a/tools/lib/cmdline.c b/tools/lib/cmdline.c new file mode 100644 index 000000000000..c85f00f43c5e --- /dev/null +++ b/tools/lib/cmdline.c @@ -0,0 +1,53 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * From lib/cmdline.c + */ +#include <stdlib.h> + +#if __has_attribute(__fallthrough__) +# define fallthrough __attribute__((__fallthrough__)) +#else +# define fallthrough do {} while (0) /* fallthrough */ +#endif + +unsigned long long memparse(const char *ptr, char **retptr) +{ + char *endptr; /* local pointer to end of parsed string */ + + unsigned long long ret = strtoll(ptr, &endptr, 0); + + switch (*endptr) { + case 'E': + case 'e': + ret <<= 10; + fallthrough; + case 'P': + case 'p': + ret <<= 10; + fallthrough; + case 'T': + case 't': + ret <<= 10; + fallthrough; + case 'G': + case 'g': + ret <<= 10; + fallthrough; + case 'M': + case 'm': + ret <<= 10; + fallthrough; + case 'K': + case 'k': + ret <<= 10; + endptr++; + fallthrough; + default: + break; + } + + if (retptr) + *retptr = endptr; + + return ret; +} diff --git a/tools/objtool/arch/loongarch/decode.c b/tools/objtool/arch/loongarch/decode.c index aee479d2191c..69b66994f2a1 100644 --- a/tools/objtool/arch/loongarch/decode.c +++ b/tools/objtool/arch/loongarch/decode.c @@ -122,7 +122,7 @@ static bool decode_insn_reg2i12_fomat(union loongarch_instruction inst, switch (inst.reg2i12_format.opcode) { case addid_op: if ((inst.reg2i12_format.rd == CFI_SP) || (inst.reg2i12_format.rj == CFI_SP)) { - /* addi.d sp,sp,si12 or addi.d fp,sp,si12 */ + /* addi.d sp,sp,si12 or addi.d fp,sp,si12 or addi.d sp,fp,si12 */ insn->immediate = sign_extend64(inst.reg2i12_format.immediate, 11); ADD_OP(op) { op->src.type = OP_SRC_ADD; @@ -132,6 +132,15 @@ static bool decode_insn_reg2i12_fomat(union loongarch_instruction inst, op->dest.reg = inst.reg2i12_format.rd; } } + if ((inst.reg2i12_format.rd == CFI_SP) && (inst.reg2i12_format.rj == CFI_FP)) { + /* addi.d sp,fp,si12 */ + struct symbol *func = find_func_containing(insn->sec, insn->offset); + + if (!func) + return false; + + func->frame_pointer = true; + } break; case ldd_op: if (inst.reg2i12_format.rj == CFI_SP) { diff --git a/tools/objtool/check.c b/tools/objtool/check.c index 01237d167223..6604f5d038aa 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -178,6 +178,52 @@ static bool is_sibling_call(struct instruction *insn) } /* + * Checks if a string ends with another. + */ +static bool str_ends_with(const char *s, const char *sub) +{ + const int slen = strlen(s); + const int sublen = strlen(sub); + + if (sublen > slen) + return 0; + + return !memcmp(s + slen - sublen, sub, sublen); +} + +/* + * Checks if a function is a Rust "noreturn" one. + */ +static bool is_rust_noreturn(const struct symbol *func) +{ + /* + * If it does not start with "_R", then it is not a Rust symbol. + */ + if (strncmp(func->name, "_R", 2)) + return false; + + /* + * These are just heuristics -- we do not control the precise symbol + * name, due to the crate disambiguators (which depend on the compiler) + * as well as changes to the source code itself between versions (since + * these come from the Rust standard library). + */ + return str_ends_with(func->name, "_4core5sliceSp15copy_from_slice17len_mismatch_fail") || + str_ends_with(func->name, "_4core6option13unwrap_failed") || + str_ends_with(func->name, "_4core6result13unwrap_failed") || + str_ends_with(func->name, "_4core9panicking5panic") || + str_ends_with(func->name, "_4core9panicking9panic_fmt") || + str_ends_with(func->name, "_4core9panicking14panic_explicit") || + str_ends_with(func->name, "_4core9panicking14panic_nounwind") || + str_ends_with(func->name, "_4core9panicking18panic_bounds_check") || + str_ends_with(func->name, "_4core9panicking19assert_failed_inner") || + str_ends_with(func->name, "_4core9panicking36panic_misaligned_pointer_dereference") || + strstr(func->name, "_4core9panicking11panic_const24panic_const_") || + (strstr(func->name, "_4core5slice5index24slice_") && + str_ends_with(func->name, "_fail")); +} + +/* * This checks to see if the given function is a "noreturn" function. * * For global functions which are outside the scope of this object file, we @@ -202,10 +248,14 @@ static bool __dead_end_function(struct objtool_file *file, struct symbol *func, if (!func) return false; - if (func->bind == STB_GLOBAL || func->bind == STB_WEAK) + if (func->bind == STB_GLOBAL || func->bind == STB_WEAK) { + if (is_rust_noreturn(func)) + return true; + for (i = 0; i < ARRAY_SIZE(global_noreturns); i++) if (!strcmp(func->name, global_noreturns[i])) return true; + } if (func->bind == STB_WEAK) return false; @@ -2993,10 +3043,27 @@ static int update_cfi_state(struct instruction *insn, break; } - if (op->dest.reg == CFI_SP && op->src.reg == CFI_BP) { + if (op->dest.reg == CFI_BP && op->src.reg == CFI_SP && + insn->sym->frame_pointer) { + /* addi.d fp,sp,imm on LoongArch */ + if (cfa->base == CFI_SP && cfa->offset == op->src.offset) { + cfa->base = CFI_BP; + cfa->offset = 0; + } + break; + } - /* lea disp(%rbp), %rsp */ - cfi->stack_size = -(op->src.offset + regs[CFI_BP].offset); + if (op->dest.reg == CFI_SP && op->src.reg == CFI_BP) { + /* addi.d sp,fp,imm on LoongArch */ + if (cfa->base == CFI_BP && cfa->offset == 0) { + if (insn->sym->frame_pointer) { + cfa->base = CFI_SP; + cfa->offset = -op->src.offset; + } + } else { + /* lea disp(%rbp), %rsp */ + cfi->stack_size = -(op->src.offset + regs[CFI_BP].offset); + } break; } diff --git a/tools/objtool/include/objtool/elf.h b/tools/objtool/include/objtool/elf.h index 2b8a69de4db8..d7e815c2fd15 100644 --- a/tools/objtool/include/objtool/elf.h +++ b/tools/objtool/include/objtool/elf.h @@ -68,6 +68,7 @@ struct symbol { u8 warned : 1; u8 embedded_insn : 1; u8 local_label : 1; + u8 frame_pointer : 1; struct list_head pv_target; struct reloc *relocs; }; diff --git a/tools/objtool/noreturns.h b/tools/objtool/noreturns.h index 1e8141ef1b15..e7da92489167 100644 --- a/tools/objtool/noreturns.h +++ b/tools/objtool/noreturns.h @@ -39,6 +39,8 @@ NORETURN(panic) NORETURN(panic_smp_self_stop) NORETURN(rest_init) NORETURN(rewind_stack_and_make_dead) +NORETURN(rust_begin_unwind) +NORETURN(rust_helper_BUG) NORETURN(sev_es_terminate) NORETURN(snp_abort) NORETURN(start_kernel) diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config index 4dcf7a0fd235..4ddb27a48eed 100644 --- a/tools/perf/Makefile.config +++ b/tools/perf/Makefile.config @@ -147,9 +147,9 @@ ifdef LIBDW_DIR endif DWARFLIBS := -ldw ifeq ($(findstring -static,${LDFLAGS}),-static) - DWARFLIBS += -lelf -ldl -lz -llzma -lbz2 -lzstd + DWARFLIBS += -lelf -lz -llzma -lbz2 -lzstd - LIBDW_VERSION := $(shell $(PKG_CONFIG) --modversion libdw) + LIBDW_VERSION := $(shell $(PKG_CONFIG) --modversion libdw).0.0 LIBDW_VERSION_1 := $(word 1, $(subst ., ,$(LIBDW_VERSION))) LIBDW_VERSION_2 := $(word 2, $(subst ., ,$(LIBDW_VERSION))) @@ -158,6 +158,9 @@ ifeq ($(findstring -static,${LDFLAGS}),-static) ifeq ($(shell test $(LIBDW_VERSION_2) -lt 177; echo $$?),0) DWARFLIBS += -lebl endif + + # Must put -ldl after -lebl for dependency + DWARFLIBS += -ldl endif FEATURE_CHECK_CFLAGS-libdw-dwarf-unwind := $(LIBDW_CFLAGS) FEATURE_CHECK_LDFLAGS-libdw-dwarf-unwind := $(LIBDW_LDFLAGS) $(DWARFLIBS) diff --git a/tools/perf/check-header_ignore_hunks/lib/list_sort.c b/tools/perf/check-header_ignore_hunks/lib/list_sort.c new file mode 100644 index 000000000000..32d98cb34f80 --- /dev/null +++ b/tools/perf/check-header_ignore_hunks/lib/list_sort.c @@ -0,0 +1,31 @@ +@@ -1,5 +1,6 @@ + // SPDX-License-Identifier: GPL-2.0 + #include <linux/kernel.h> ++#include <linux/bug.h> + #include <linux/compiler.h> + #include <linux/export.h> + #include <linux/string.h> +@@ -52,6 +53,7 @@ + struct list_head *a, struct list_head *b) + { + struct list_head *tail = head; ++ u8 count = 0; + + for (;;) { + /* if equal, take 'a' -- important for sort stability */ +@@ -77,6 +79,15 @@ + /* Finish linking remainder of list b on to tail */ + tail->next = b; + do { ++ /* ++ * If the merge is highly unbalanced (e.g. the input is ++ * already sorted), this loop may run many iterations. ++ * Continue callbacks to the client even though no ++ * element comparison is needed, so the client's cmp() ++ * routine can invoke cond_resched() periodically. ++ */ ++ if (unlikely(!++count)) ++ cmp(priv, b, b); + b->prev = tail; + tail = b; + b = b->next; diff --git a/tools/perf/check-headers.sh b/tools/perf/check-headers.sh index 714c78e5da07..29adbb423327 100755 --- a/tools/perf/check-headers.sh +++ b/tools/perf/check-headers.sh @@ -136,6 +136,30 @@ beauty_check () { check_2 "tools/perf/trace/beauty/$file" "$file" "$@" } +check_ignore_some_hunks () { + orig_file="$1" + tools_file="tools/$orig_file" + hunks_to_ignore="tools/perf/check-header_ignore_hunks/$orig_file" + + if [ ! -f "$hunks_to_ignore" ]; then + echo "$hunks_to_ignore not found. Skipping $orig_file check." + FAILURES+=( + "$tools_file $orig_file" + ) + return + fi + + cmd="diff -u \"$tools_file\" \"$orig_file\" | grep -vf \"$hunks_to_ignore\" | wc -l | grep -qw 0" + + if [ -f "$orig_file" ] && ! eval "$cmd" + then + FAILURES+=( + "$tools_file $orig_file" + ) + fi +} + + # Check if we have the kernel headers (tools/perf/../../include), else # we're probably on a detached tarball, so no point in trying to check # differences. @@ -163,13 +187,12 @@ check arch/x86/lib/memcpy_64.S '-I "^EXPORT_SYMBOL" -I "^#include <asm/ex check arch/x86/lib/memset_64.S '-I "^EXPORT_SYMBOL" -I "^#include <asm/export.h>" -I"^SYM_FUNC_START\(_LOCAL\)*(memset_\(erms\|orig\))"' check arch/x86/include/asm/amd-ibs.h '-I "^#include [<\"]\(asm/\)*msr-index.h"' check arch/arm64/include/asm/cputype.h '-I "^#include [<\"]\(asm/\)*sysreg.h"' -check include/asm-generic/unaligned.h '-I "^#include <linux/unaligned/packed_struct.h>" -I "^#include <asm/byteorder.h>" -I "^#pragma GCC diagnostic"' +check include/linux/unaligned.h '-I "^#include <linux/unaligned/packed_struct.h>" -I "^#include <asm/byteorder.h>" -I "^#pragma GCC diagnostic"' check include/uapi/asm-generic/mman.h '-I "^#include <\(uapi/\)*asm-generic/mman-common\(-tools\)*.h>"' check include/uapi/linux/mman.h '-I "^#include <\(uapi/\)*asm/mman.h>"' check include/linux/build_bug.h '-I "^#\(ifndef\|endif\)\( \/\/\)* static_assert$"' check include/linux/ctype.h '-I "isdigit("' check lib/ctype.c '-I "^EXPORT_SYMBOL" -I "^#include <linux/export.h>" -B' -check lib/list_sort.c '-I "^#include <linux/bug.h>"' # diff non-symmetric files check_2 tools/perf/arch/x86/entry/syscalls/syscall_32.tbl arch/x86/entry/syscalls/syscall_32.tbl @@ -187,6 +210,10 @@ done check_2 tools/perf/util/hashmap.h tools/lib/bpf/hashmap.h check_2 tools/perf/util/hashmap.c tools/lib/bpf/hashmap.c +# Files with larger differences + +check_ignore_some_hunks lib/list_sort.c + cd tools/perf || exit if [ ${#FAILURES[@]} -gt 0 ] diff --git a/tools/perf/trace/beauty/arch/x86/include/asm/irq_vectors.h b/tools/perf/trace/beauty/arch/x86/include/asm/irq_vectors.h index 13aea8fc3d45..47051871b436 100644 --- a/tools/perf/trace/beauty/arch/x86/include/asm/irq_vectors.h +++ b/tools/perf/trace/beauty/arch/x86/include/asm/irq_vectors.h @@ -18,8 +18,8 @@ * Vectors 0 ... 31 : system traps and exceptions - hardcoded events * Vectors 32 ... 127 : device interrupts * Vector 128 : legacy int80 syscall interface - * Vectors 129 ... LOCAL_TIMER_VECTOR-1 - * Vectors LOCAL_TIMER_VECTOR ... 255 : special interrupts + * Vectors 129 ... FIRST_SYSTEM_VECTOR-1 : device interrupts + * Vectors FIRST_SYSTEM_VECTOR ... 255 : special interrupts * * 64-bit x86 has per CPU IDT tables, 32-bit has one shared IDT table. * diff --git a/tools/perf/trace/beauty/fs_at_flags.sh b/tools/perf/trace/beauty/fs_at_flags.sh index 456f59addf74..e3f13f96a27c 100755 --- a/tools/perf/trace/beauty/fs_at_flags.sh +++ b/tools/perf/trace/beauty/fs_at_flags.sh @@ -13,9 +13,14 @@ printf "static const char *fs_at_flags[] = {\n" regex='^[[:space:]]*#[[:space:]]*define[[:space:]]+AT_([^_]+[[:alnum:]_]+)[[:space:]]+(0x[[:xdigit:]]+)[[:space:]]*.*' # AT_EACCESS is only meaningful to faccessat, so we will special case it there... # AT_STATX_SYNC_TYPE is not a bit, its a mask of AT_STATX_SYNC_AS_STAT, AT_STATX_FORCE_SYNC and AT_STATX_DONT_SYNC +# AT_HANDLE_FID and AT_HANDLE_MNT_ID_UNIQUE are reusing values and are valid only for name_to_handle_at() +# AT_RENAME_NOREPLACE reuses 0x1 and is valid only for renameat2() grep -E $regex ${linux_fcntl} | \ grep -v AT_EACCESS | \ grep -v AT_STATX_SYNC_TYPE | \ + grep -v AT_HANDLE_FID | \ + grep -v AT_HANDLE_MNT_ID_UNIQUE | \ + grep -v AT_RENAME_NOREPLACE | \ sed -r "s/$regex/\2 \1/g" | \ xargs printf "\t[ilog2(%s) + 1] = \"%s\",\n" printf "};\n" diff --git a/tools/perf/trace/beauty/include/linux/socket.h b/tools/perf/trace/beauty/include/linux/socket.h index df9cdb8bbfb8..d18cc47e89bd 100644 --- a/tools/perf/trace/beauty/include/linux/socket.h +++ b/tools/perf/trace/beauty/include/linux/socket.h @@ -327,6 +327,7 @@ struct ucred { * plain text and require encryption */ +#define MSG_SOCK_DEVMEM 0x2000000 /* Receive devmem skbs as cmsg */ #define MSG_ZEROCOPY 0x4000000 /* Use user data in kernel path */ #define MSG_SPLICE_PAGES 0x8000000 /* Splice the pages from the iterator in sendmsg() */ #define MSG_FASTOPEN 0x20000000 /* Send data in TCP SYN */ diff --git a/tools/perf/trace/beauty/include/uapi/linux/fcntl.h b/tools/perf/trace/beauty/include/uapi/linux/fcntl.h index c0bcc185fa48..87e2dec79fea 100644 --- a/tools/perf/trace/beauty/include/uapi/linux/fcntl.h +++ b/tools/perf/trace/beauty/include/uapi/linux/fcntl.h @@ -16,6 +16,9 @@ #define F_DUPFD_QUERY (F_LINUX_SPECIFIC_BASE + 3) +/* Was the file just created? */ +#define F_CREATED_QUERY (F_LINUX_SPECIFIC_BASE + 4) + /* * Cancel a blocking posix lock; internal use only until we expose an * asynchronous lock api to userspace: @@ -87,37 +90,70 @@ #define DN_ATTRIB 0x00000020 /* File changed attibutes */ #define DN_MULTISHOT 0x80000000 /* Don't remove notifier */ +#define AT_FDCWD -100 /* Special value for dirfd used to + indicate openat should use the + current working directory. */ + + +/* Generic flags for the *at(2) family of syscalls. */ + +/* Reserved for per-syscall flags 0xff. */ +#define AT_SYMLINK_NOFOLLOW 0x100 /* Do not follow symbolic + links. */ +/* Reserved for per-syscall flags 0x200 */ +#define AT_SYMLINK_FOLLOW 0x400 /* Follow symbolic links. */ +#define AT_NO_AUTOMOUNT 0x800 /* Suppress terminal automount + traversal. */ +#define AT_EMPTY_PATH 0x1000 /* Allow empty relative + pathname to operate on dirfd + directly. */ +/* + * These flags are currently statx(2)-specific, but they could be made generic + * in the future and so they should not be used for other per-syscall flags. + */ +#define AT_STATX_SYNC_TYPE 0x6000 /* Type of synchronisation required from statx() */ +#define AT_STATX_SYNC_AS_STAT 0x0000 /* - Do whatever stat() does */ +#define AT_STATX_FORCE_SYNC 0x2000 /* - Force the attributes to be sync'd with the server */ +#define AT_STATX_DONT_SYNC 0x4000 /* - Don't sync attributes with the server */ + +#define AT_RECURSIVE 0x8000 /* Apply to the entire subtree */ + /* - * The constants AT_REMOVEDIR and AT_EACCESS have the same value. AT_EACCESS is - * meaningful only to faccessat, while AT_REMOVEDIR is meaningful only to - * unlinkat. The two functions do completely different things and therefore, - * the flags can be allowed to overlap. For example, passing AT_REMOVEDIR to - * faccessat would be undefined behavior and thus treating it equivalent to - * AT_EACCESS is valid undefined behavior. + * Per-syscall flags for the *at(2) family of syscalls. + * + * These are flags that are so syscall-specific that a user passing these flags + * to the wrong syscall is so "clearly wrong" that we can safely call such + * usage "undefined behaviour". + * + * For example, the constants AT_REMOVEDIR and AT_EACCESS have the same value. + * AT_EACCESS is meaningful only to faccessat, while AT_REMOVEDIR is meaningful + * only to unlinkat. The two functions do completely different things and + * therefore, the flags can be allowed to overlap. For example, passing + * AT_REMOVEDIR to faccessat would be undefined behavior and thus treating it + * equivalent to AT_EACCESS is valid undefined behavior. + * + * Note for implementers: When picking a new per-syscall AT_* flag, try to + * reuse already existing flags first. This leaves us with as many unused bits + * as possible, so we can use them for generic bits in the future if necessary. */ -#define AT_FDCWD -100 /* Special value used to indicate - openat should use the current - working directory. */ -#define AT_SYMLINK_NOFOLLOW 0x100 /* Do not follow symbolic links. */ + +/* Flags for renameat2(2) (must match legacy RENAME_* flags). */ +#define AT_RENAME_NOREPLACE 0x0001 +#define AT_RENAME_EXCHANGE 0x0002 +#define AT_RENAME_WHITEOUT 0x0004 + +/* Flag for faccessat(2). */ #define AT_EACCESS 0x200 /* Test access permitted for effective IDs, not real IDs. */ +/* Flag for unlinkat(2). */ #define AT_REMOVEDIR 0x200 /* Remove directory instead of unlinking file. */ -#define AT_SYMLINK_FOLLOW 0x400 /* Follow symbolic links. */ -#define AT_NO_AUTOMOUNT 0x800 /* Suppress terminal automount traversal */ -#define AT_EMPTY_PATH 0x1000 /* Allow empty relative pathname */ - -#define AT_STATX_SYNC_TYPE 0x6000 /* Type of synchronisation required from statx() */ -#define AT_STATX_SYNC_AS_STAT 0x0000 /* - Do whatever stat() does */ -#define AT_STATX_FORCE_SYNC 0x2000 /* - Force the attributes to be sync'd with the server */ -#define AT_STATX_DONT_SYNC 0x4000 /* - Don't sync attributes with the server */ - -#define AT_RECURSIVE 0x8000 /* Apply to the entire subtree */ +/* Flags for name_to_handle_at(2). */ +#define AT_HANDLE_FID 0x200 /* File handle is needed to compare + object identity and may not be + usable with open_by_handle_at(2). */ +#define AT_HANDLE_MNT_ID_UNIQUE 0x001 /* Return the u64 unique mount ID. */ -/* Flags for name_to_handle_at(2). We reuse AT_ flag space to save bits... */ -#define AT_HANDLE_FID AT_REMOVEDIR /* file handle is needed to - compare object identity and may not - be usable to open_by_handle_at(2) */ #if defined(__KERNEL__) #define AT_GETATTR_NOSEC 0x80000000 #endif diff --git a/tools/perf/trace/beauty/include/uapi/linux/sched.h b/tools/perf/trace/beauty/include/uapi/linux/sched.h index 3bac0a8ceab2..359a14cc76a4 100644 --- a/tools/perf/trace/beauty/include/uapi/linux/sched.h +++ b/tools/perf/trace/beauty/include/uapi/linux/sched.h @@ -118,6 +118,7 @@ struct clone_args { /* SCHED_ISO: reserved but not implemented yet */ #define SCHED_IDLE 5 #define SCHED_DEADLINE 6 +#define SCHED_EXT 7 /* Can be ORed in to make sure the process is reverted back to SCHED_NORMAL on fork */ #define SCHED_RESET_ON_FORK 0x40000000 diff --git a/tools/perf/trace/beauty/include/uapi/sound/asound.h b/tools/perf/trace/beauty/include/uapi/sound/asound.h index 8bf7e8a0eb6f..4cd513215bcd 100644 --- a/tools/perf/trace/beauty/include/uapi/sound/asound.h +++ b/tools/perf/trace/beauty/include/uapi/sound/asound.h @@ -869,7 +869,7 @@ struct snd_ump_block_info { * Timer section - /dev/snd/timer */ -#define SNDRV_TIMER_VERSION SNDRV_PROTOCOL_VERSION(2, 0, 7) +#define SNDRV_TIMER_VERSION SNDRV_PROTOCOL_VERSION(2, 0, 8) enum { SNDRV_TIMER_CLASS_NONE = -1, @@ -894,6 +894,7 @@ enum { #define SNDRV_TIMER_GLOBAL_RTC 1 /* unused */ #define SNDRV_TIMER_GLOBAL_HPET 2 #define SNDRV_TIMER_GLOBAL_HRTIMER 3 +#define SNDRV_TIMER_GLOBAL_UDRIVEN 4 /* info flags */ #define SNDRV_TIMER_FLG_SLAVE (1<<0) /* cannot be controlled */ @@ -974,6 +975,18 @@ struct snd_timer_status { }; #endif +/* + * This structure describes the userspace-driven timer. Such timers are purely virtual, + * and can only be triggered from software (for instance, by userspace application). + */ +struct snd_timer_uinfo { + /* To pretend being a normal timer, we need to know the resolution in ns. */ + __u64 resolution; + int fd; + unsigned int id; + unsigned char reserved[16]; +}; + #define SNDRV_TIMER_IOCTL_PVERSION _IOR('T', 0x00, int) #define SNDRV_TIMER_IOCTL_NEXT_DEVICE _IOWR('T', 0x01, struct snd_timer_id) #define SNDRV_TIMER_IOCTL_TREAD_OLD _IOW('T', 0x02, int) @@ -990,6 +1003,8 @@ struct snd_timer_status { #define SNDRV_TIMER_IOCTL_CONTINUE _IO('T', 0xa2) #define SNDRV_TIMER_IOCTL_PAUSE _IO('T', 0xa3) #define SNDRV_TIMER_IOCTL_TREAD64 _IOW('T', 0xa4, int) +#define SNDRV_TIMER_IOCTL_CREATE _IOWR('T', 0xa5, struct snd_timer_uinfo) +#define SNDRV_TIMER_IOCTL_TRIGGER _IO('T', 0xa6) #if __BITS_PER_LONG == 64 #define SNDRV_TIMER_IOCTL_TREAD SNDRV_TIMER_IOCTL_TREAD_OLD diff --git a/tools/perf/trace/beauty/msg_flags.c b/tools/perf/trace/beauty/msg_flags.c index ed3ff969b546..2da581ff0c80 100644 --- a/tools/perf/trace/beauty/msg_flags.c +++ b/tools/perf/trace/beauty/msg_flags.c @@ -11,6 +11,9 @@ #ifndef MSG_BATCH #define MSG_BATCH 0x40000 #endif +#ifndef MSG_SOCK_DEVMEM +#define MSG_SOCK_DEVMEM 0x2000000 +#endif #ifndef MSG_ZEROCOPY #define MSG_ZEROCOPY 0x4000000 #endif @@ -57,6 +60,7 @@ static size_t syscall_arg__scnprintf_msg_flags(char *bf, size_t size, P_MSG_FLAG(MORE); P_MSG_FLAG(WAITFORONE); P_MSG_FLAG(BATCH); + P_MSG_FLAG(SOCK_DEVMEM); P_MSG_FLAG(ZEROCOPY); P_MSG_FLAG(SPLICE_PAGES); P_MSG_FLAG(FASTOPEN); diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c index 7bf607d0f6d8..4cef10a83962 100644 --- a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c +++ b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c @@ -11,7 +11,7 @@ #include <linux/bitops.h> #include <stdarg.h> #include <linux/kernel.h> -#include <asm-generic/unaligned.h> +#include <linux/unaligned.h> #include "arm-spe-pkt-decoder.h" diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c index 90f32f327b9b..40f047baef81 100644 --- a/tools/perf/util/cs-etm.c +++ b/tools/perf/util/cs-etm.c @@ -3323,7 +3323,7 @@ static int cs_etm__create_decoders(struct cs_etm_auxtrace *etm) * Don't create decoders for empty queues, mainly because * etmq->format is unknown for empty queues. */ - assert(empty == (etmq->format == UNSET)); + assert(empty || etmq->format != UNSET); if (empty) continue; diff --git a/tools/perf/util/dwarf-aux.h b/tools/perf/util/dwarf-aux.h index 336a3a183a78..bd7505812569 100644 --- a/tools/perf/util/dwarf-aux.h +++ b/tools/perf/util/dwarf-aux.h @@ -9,6 +9,7 @@ #include <elfutils/libdw.h> #include <elfutils/libdwfl.h> #include <elfutils/version.h> +#include <errno.h> struct strbuf; diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.c b/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.c index bccb988a7a44..94fb16cf9e0c 100644 --- a/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.c +++ b/tools/perf/util/intel-pt-decoder/intel-pt-pkt-decoder.c @@ -10,7 +10,7 @@ #include <byteswap.h> #include <linux/kernel.h> #include <linux/compiler.h> -#include <asm-generic/unaligned.h> +#include <linux/unaligned.h> #include "intel-pt-pkt-decoder.h" diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index a18927d792af..3bbf173ad822 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -1931,6 +1931,9 @@ int dso__load(struct dso *dso, struct map *map) if (next_slot) { ss_pos++; + if (dso__binary_type(dso) == DSO_BINARY_TYPE__NOT_FOUND) + dso__set_binary_type(dso, symtab_type); + if (syms_ss && runtime_ss) break; } else { diff --git a/tools/perf/util/vdso.c b/tools/perf/util/vdso.c index 1b6f8f6db7aa..c12f5d8c4bf6 100644 --- a/tools/perf/util/vdso.c +++ b/tools/perf/util/vdso.c @@ -308,8 +308,10 @@ static struct dso *machine__find_vdso(struct machine *machine, if (!dso) { dso = dsos__find(&machine->dsos, DSO__NAME_VDSO, true); - if (dso && dso_type != dso__type(dso, machine)) + if (dso && dso_type != dso__type(dso, machine)) { + dso__put(dso); dso = NULL; + } } break; case DSO__TYPE_X32BIT: diff --git a/tools/testing/cxl/Kbuild b/tools/testing/cxl/Kbuild index 3d1ca9e38b1f..b1256fee3567 100644 --- a/tools/testing/cxl/Kbuild +++ b/tools/testing/cxl/Kbuild @@ -14,7 +14,7 @@ ldflags-y += --wrap=cxl_dvsec_rr_decode ldflags-y += --wrap=devm_cxl_add_rch_dport ldflags-y += --wrap=cxl_rcd_component_reg_phys ldflags-y += --wrap=cxl_endpoint_parse_cdat -ldflags-y += --wrap=cxl_setup_parent_dport +ldflags-y += --wrap=cxl_dport_init_ras_reporting DRIVERS := ../../../drivers CXL_SRC := $(DRIVERS)/cxl diff --git a/tools/testing/cxl/mock_acpi.c b/tools/testing/cxl/mock_acpi.c index 55813de26d46..8da94378ccec 100644 --- a/tools/testing/cxl/mock_acpi.c +++ b/tools/testing/cxl/mock_acpi.c @@ -18,7 +18,7 @@ struct acpi_device *to_cxl_host_bridge(struct device *host, struct device *dev) goto out; } - if (dev->bus == &platform_bus_type) + if (dev_is_platform(dev)) goto out; adev = to_acpi_device(dev); diff --git a/tools/testing/cxl/test/mem.c b/tools/testing/cxl/test/mem.c index 129f179b0ac5..ad5c4c18c5c6 100644 --- a/tools/testing/cxl/test/mem.c +++ b/tools/testing/cxl/test/mem.c @@ -8,7 +8,8 @@ #include <linux/delay.h> #include <linux/sizes.h> #include <linux/bits.h> -#include <asm/unaligned.h> +#include <cxl/mailbox.h> +#include <linux/unaligned.h> #include <crypto/sha2.h> #include <cxlmem.h> @@ -534,6 +535,7 @@ static int mock_gsl(struct cxl_mbox_cmd *cmd) static int mock_get_log(struct cxl_memdev_state *mds, struct cxl_mbox_cmd *cmd) { + struct cxl_mailbox *cxl_mbox = &mds->cxlds.cxl_mbox; struct cxl_mbox_get_log *gl = cmd->payload_in; u32 offset = le32_to_cpu(gl->offset); u32 length = le32_to_cpu(gl->length); @@ -542,7 +544,7 @@ static int mock_get_log(struct cxl_memdev_state *mds, struct cxl_mbox_cmd *cmd) if (cmd->size_in < sizeof(*gl)) return -EINVAL; - if (length > mds->payload_size) + if (length > cxl_mbox->payload_size) return -EINVAL; if (offset + length > sizeof(mock_cel)) return -EINVAL; @@ -617,12 +619,13 @@ void cxl_mockmem_sanitize_work(struct work_struct *work) { struct cxl_memdev_state *mds = container_of(work, typeof(*mds), security.poll_dwork.work); + struct cxl_mailbox *cxl_mbox = &mds->cxlds.cxl_mbox; - mutex_lock(&mds->mbox_mutex); + mutex_lock(&cxl_mbox->mbox_mutex); if (mds->security.sanitize_node) sysfs_notify_dirent(mds->security.sanitize_node); mds->security.sanitize_active = false; - mutex_unlock(&mds->mbox_mutex); + mutex_unlock(&cxl_mbox->mbox_mutex); dev_dbg(mds->cxlds.dev, "sanitize complete\n"); } @@ -631,6 +634,7 @@ static int mock_sanitize(struct cxl_mockmem_data *mdata, struct cxl_mbox_cmd *cmd) { struct cxl_memdev_state *mds = mdata->mds; + struct cxl_mailbox *cxl_mbox = &mds->cxlds.cxl_mbox; int rc = 0; if (cmd->size_in != 0) @@ -648,14 +652,14 @@ static int mock_sanitize(struct cxl_mockmem_data *mdata, return -ENXIO; } - mutex_lock(&mds->mbox_mutex); + mutex_lock(&cxl_mbox->mbox_mutex); if (schedule_delayed_work(&mds->security.poll_dwork, msecs_to_jiffies(mdata->sanitize_timeout))) { mds->security.sanitize_active = true; dev_dbg(mds->cxlds.dev, "sanitize issued\n"); } else rc = -EBUSY; - mutex_unlock(&mds->mbox_mutex); + mutex_unlock(&cxl_mbox->mbox_mutex); return rc; } @@ -1333,12 +1337,13 @@ static int mock_activate_fw(struct cxl_mockmem_data *mdata, return -EINVAL; } -static int cxl_mock_mbox_send(struct cxl_memdev_state *mds, +static int cxl_mock_mbox_send(struct cxl_mailbox *cxl_mbox, struct cxl_mbox_cmd *cmd) { - struct cxl_dev_state *cxlds = &mds->cxlds; - struct device *dev = cxlds->dev; + struct device *dev = cxl_mbox->host; struct cxl_mockmem_data *mdata = dev_get_drvdata(dev); + struct cxl_memdev_state *mds = mdata->mds; + struct cxl_dev_state *cxlds = &mds->cxlds; int rc = -EIO; switch (cmd->opcode) { @@ -1453,6 +1458,17 @@ static ssize_t event_trigger_store(struct device *dev, } static DEVICE_ATTR_WO(event_trigger); +static int cxl_mock_mailbox_create(struct cxl_dev_state *cxlds) +{ + int rc; + + rc = cxl_mailbox_init(&cxlds->cxl_mbox, cxlds->dev); + if (rc) + return rc; + + return 0; +} + static int cxl_mock_mem_probe(struct platform_device *pdev) { struct device *dev = &pdev->dev; @@ -1460,6 +1476,7 @@ static int cxl_mock_mem_probe(struct platform_device *pdev) struct cxl_memdev_state *mds; struct cxl_dev_state *cxlds; struct cxl_mockmem_data *mdata; + struct cxl_mailbox *cxl_mbox; int rc; mdata = devm_kzalloc(dev, sizeof(*mdata), GFP_KERNEL); @@ -1487,13 +1504,18 @@ static int cxl_mock_mem_probe(struct platform_device *pdev) if (IS_ERR(mds)) return PTR_ERR(mds); + cxlds = &mds->cxlds; + rc = cxl_mock_mailbox_create(cxlds); + if (rc) + return rc; + + cxl_mbox = &mds->cxlds.cxl_mbox; mdata->mds = mds; - mds->mbox_send = cxl_mock_mbox_send; - mds->payload_size = SZ_4K; + cxl_mbox->mbox_send = cxl_mock_mbox_send; + cxl_mbox->payload_size = SZ_4K; mds->event.buf = (struct cxl_get_event_payload *) mdata->event_buf; INIT_DELAYED_WORK(&mds->security.poll_dwork, cxl_mockmem_sanitize_work); - cxlds = &mds->cxlds; cxlds->serial = pdev->id; if (is_rcd(pdev)) cxlds->rcd = true; diff --git a/tools/testing/cxl/test/mock.c b/tools/testing/cxl/test/mock.c index d619672faa49..f4ce96cc11d4 100644 --- a/tools/testing/cxl/test/mock.c +++ b/tools/testing/cxl/test/mock.c @@ -228,7 +228,7 @@ int __wrap_cxl_hdm_decode_init(struct cxl_dev_state *cxlds, } EXPORT_SYMBOL_NS_GPL(__wrap_cxl_hdm_decode_init, CXL); -int __wrap_cxl_dvsec_rr_decode(struct device *dev, int dvsec, +int __wrap_cxl_dvsec_rr_decode(struct device *dev, struct cxl_port *port, struct cxl_endpoint_dvsec_info *info) { int rc = 0, index; @@ -237,7 +237,7 @@ int __wrap_cxl_dvsec_rr_decode(struct device *dev, int dvsec, if (ops && ops->is_mock_dev(dev)) rc = 0; else - rc = cxl_dvsec_rr_decode(dev, dvsec, info); + rc = cxl_dvsec_rr_decode(dev, port, info); put_cxl_mock_ops(index); return rc; @@ -299,17 +299,17 @@ void __wrap_cxl_endpoint_parse_cdat(struct cxl_port *port) } EXPORT_SYMBOL_NS_GPL(__wrap_cxl_endpoint_parse_cdat, CXL); -void __wrap_cxl_setup_parent_dport(struct device *host, struct cxl_dport *dport) +void __wrap_cxl_dport_init_ras_reporting(struct cxl_dport *dport, struct device *host) { int index; struct cxl_mock_ops *ops = get_cxl_mock_ops(&index); if (!ops || !ops->is_mock_port(dport->dport_dev)) - cxl_setup_parent_dport(host, dport); + cxl_dport_init_ras_reporting(dport, host); put_cxl_mock_ops(index); } -EXPORT_SYMBOL_NS_GPL(__wrap_cxl_setup_parent_dport, CXL); +EXPORT_SYMBOL_NS_GPL(__wrap_cxl_dport_init_ras_reporting, CXL); MODULE_LICENSE("GPL v2"); MODULE_IMPORT_NS(ACPI); diff --git a/tools/testing/memblock/Makefile b/tools/testing/memblock/Makefile index 7a1ca694a982..d80982ccdc20 100644 --- a/tools/testing/memblock/Makefile +++ b/tools/testing/memblock/Makefile @@ -8,7 +8,7 @@ LDFLAGS += -fsanitize=address -fsanitize=undefined TARGETS = main TEST_OFILES = tests/alloc_nid_api.o tests/alloc_helpers_api.o tests/alloc_api.o \ tests/basic_api.o tests/common.o tests/alloc_exact_nid_api.o -DEP_OFILES = memblock.o lib/slab.o mmzone.o slab.o +DEP_OFILES = memblock.o lib/slab.o mmzone.o slab.o cmdline.o OFILES = main.o $(DEP_OFILES) $(TEST_OFILES) EXTR_SRC = ../../../mm/memblock.c diff --git a/tools/testing/memblock/linux/kernel.h b/tools/testing/memblock/linux/kernel.h index d2f148bd8902..4d1012d5be6e 100644 --- a/tools/testing/memblock/linux/kernel.h +++ b/tools/testing/memblock/linux/kernel.h @@ -8,5 +8,7 @@ #include <linux/printk.h> #include <linux/linkage.h> #include <linux/kconfig.h> +#include <linux/string.h> +#include <linux/ctype.h> #endif diff --git a/tools/testing/memblock/linux/mmzone.h b/tools/testing/memblock/linux/mmzone.h index 71546e15bdd3..bb682659a12d 100644 --- a/tools/testing/memblock/linux/mmzone.h +++ b/tools/testing/memblock/linux/mmzone.h @@ -3,6 +3,7 @@ #define _TOOLS_MMZONE_H #include <linux/atomic.h> +#include <linux/memory_hotplug.h> struct pglist_data *first_online_pgdat(void); struct pglist_data *next_online_pgdat(struct pglist_data *pgdat); diff --git a/tools/testing/radix-tree/maple.c b/tools/testing/radix-tree/maple.c index c5b00aca9def..1873ddbe16cc 100644 --- a/tools/testing/radix-tree/maple.c +++ b/tools/testing/radix-tree/maple.c @@ -14,7 +14,7 @@ #include "test.h" #include <stdlib.h> #include <time.h> -#include "linux/init.h" +#include <linux/init.h> #define module_init(x) #define module_exit(x) diff --git a/tools/testing/selftests/alsa/Makefile b/tools/testing/selftests/alsa/Makefile index 25be68025290..944279160fed 100644 --- a/tools/testing/selftests/alsa/Makefile +++ b/tools/testing/selftests/alsa/Makefile @@ -1,5 +1,9 @@ # SPDX-License-Identifier: GPL-2.0 # +ifneq ($(shell pkg-config --exists alsa && echo 0 || echo 1),0) +$(error Package alsa not found, please install alsa development package or \ + add directory containing `alsa.pc` in PKG_CONFIG_PATH) +endif CFLAGS += $(shell pkg-config --cflags alsa) $(KHDR_INCLUDES) LDLIBS += $(shell pkg-config --libs alsa) diff --git a/tools/testing/selftests/bpf/progs/test_tcp_custom_syncookie.h b/tools/testing/selftests/bpf/progs/test_tcp_custom_syncookie.h index f8b1b7e68d2e..34024de6337e 100644 --- a/tools/testing/selftests/bpf/progs/test_tcp_custom_syncookie.h +++ b/tools/testing/selftests/bpf/progs/test_tcp_custom_syncookie.h @@ -22,7 +22,7 @@ __builtin_memcpy(b, __tmp, sizeof(a)); \ } while (0) -/* asm-generic/unaligned.h */ +/* linux/unaligned.h */ #define __get_unaligned_t(type, ptr) ({ \ const struct { type x; } __packed * __pptr = (typeof(__pptr))(ptr); \ __pptr->x; \ diff --git a/tools/testing/selftests/breakpoints/step_after_suspend_test.c b/tools/testing/selftests/breakpoints/step_after_suspend_test.c index dfec31fb9b30..8d275f03e977 100644 --- a/tools/testing/selftests/breakpoints/step_after_suspend_test.c +++ b/tools/testing/selftests/breakpoints/step_after_suspend_test.c @@ -152,7 +152,10 @@ void suspend(void) if (err < 0) ksft_exit_fail_msg("timerfd_settime() failed\n"); - if (write(power_state_fd, "mem", strlen("mem")) != strlen("mem")) + system("(echo mem > /sys/power/state) 2> /dev/null"); + + timerfd_gettime(timerfd, &spec); + if (spec.it_value.tv_sec != 0 || spec.it_value.tv_nsec != 0) ksft_exit_fail_msg("Failed to enter Suspend state\n"); close(timerfd); diff --git a/tools/testing/selftests/clone3/clone3_cap_checkpoint_restore.c b/tools/testing/selftests/clone3/clone3_cap_checkpoint_restore.c index 31b56d625655..3c196fa86c99 100644 --- a/tools/testing/selftests/clone3/clone3_cap_checkpoint_restore.c +++ b/tools/testing/selftests/clone3/clone3_cap_checkpoint_restore.c @@ -27,8 +27,6 @@ #include "../kselftest_harness.h" #include "clone3_selftests.h" -#define MAX_PID_NS_LEVEL 32 - static void child_exit(int ret) { fflush(stdout); diff --git a/tools/testing/selftests/core/.gitignore b/tools/testing/selftests/core/.gitignore index 6e6712ce5817..7999361992aa 100644 --- a/tools/testing/selftests/core/.gitignore +++ b/tools/testing/selftests/core/.gitignore @@ -1 +1,2 @@ close_range_test +unshare_test diff --git a/tools/testing/selftests/devices/probe/test_discoverable_devices.py b/tools/testing/selftests/devices/probe/test_discoverable_devices.py index d94a74b8a054..d7a2bb91c807 100755 --- a/tools/testing/selftests/devices/probe/test_discoverable_devices.py +++ b/tools/testing/selftests/devices/probe/test_discoverable_devices.py @@ -45,7 +45,7 @@ def find_pci_controller_dirs(): def find_usb_controller_dirs(): - usb_controller_sysfs_dir = "usb[\d]+" + usb_controller_sysfs_dir = r"usb[\d]+" dir_regex = re.compile(usb_controller_sysfs_dir) for d in os.scandir(sysfs_usb_devices): @@ -91,7 +91,7 @@ def get_acpi_uid(sysfs_dev_dir): def get_usb_version(sysfs_dev_dir): - re_usb_version = re.compile("PRODUCT=.*/(\d)/.*") + re_usb_version = re.compile(r"PRODUCT=.*/(\d)/.*") with open(os.path.join(sysfs_dev_dir, "uevent")) as f: return int(re_usb_version.search(f.read()).group(1)) diff --git a/tools/testing/selftests/exec/.gitignore b/tools/testing/selftests/exec/.gitignore index 90c238ba6a4b..a0dc5d4bf733 100644 --- a/tools/testing/selftests/exec/.gitignore +++ b/tools/testing/selftests/exec/.gitignore @@ -9,7 +9,8 @@ execveat.ephemeral execveat.denatured non-regular null-argv -/load_address_* +/load_address.* +!load_address.c /recursion-depth xxxxxxxx* pipe diff --git a/tools/testing/selftests/filesystems/binderfs/binderfs_test.c b/tools/testing/selftests/filesystems/binderfs/binderfs_test.c index 5f362c0fd890..319567f0fae1 100644 --- a/tools/testing/selftests/filesystems/binderfs/binderfs_test.c +++ b/tools/testing/selftests/filesystems/binderfs/binderfs_test.c @@ -65,6 +65,7 @@ static int __do_binderfs_test(struct __test_metadata *_metadata) static const char * const binder_features[] = { "oneway_spam_detection", "extended_error", + "freeze_notification", }; change_mountns(_metadata); diff --git a/tools/testing/selftests/ftrace/config b/tools/testing/selftests/ftrace/config index 048a312abf40..544de0db5f58 100644 --- a/tools/testing/selftests/ftrace/config +++ b/tools/testing/selftests/ftrace/config @@ -20,6 +20,7 @@ CONFIG_PREEMPT_TRACER=y CONFIG_PROBE_EVENTS_BTF_ARGS=y CONFIG_SAMPLES=y CONFIG_SAMPLE_FTRACE_DIRECT=m +CONFIG_SAMPLE_TRACE_EVENTS=m CONFIG_SAMPLE_TRACE_PRINTK=m CONFIG_SCHED_TRACER=y CONFIG_STACK_TRACER=y diff --git a/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_tprobe_module.tc b/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_tprobe_module.tc new file mode 100644 index 000000000000..d319d5ed4226 --- /dev/null +++ b/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_tprobe_module.tc @@ -0,0 +1,61 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0 +# description: Generic dynamic event - add/remove tracepoint probe events on module +# requires: dynamic_events "t[:[<group>/][<event>]] <tracepoint> [<args>]":README + +rmmod trace-events-sample ||: +if ! modprobe trace-events-sample ; then + echo "No trace-events sample module - please make CONFIG_SAMPLE_TRACE_EVENTS=m" + exit_unresolved; +fi +trap "rmmod trace-events-sample" EXIT + +echo 0 > events/enable +echo > dynamic_events + +TRACEPOINT1=foo_bar +TRACEPOINT2=foo_bar_with_cond + +echo "t:myevent1 $TRACEPOINT1" >> dynamic_events +echo "t:myevent2 $TRACEPOINT2" >> dynamic_events + +grep -q myevent1 dynamic_events +grep -q myevent2 dynamic_events +test -d events/tracepoints/myevent1 +test -d events/tracepoints/myevent2 + +echo "-:myevent2" >> dynamic_events + +grep -q myevent1 dynamic_events +! grep -q myevent2 dynamic_events + +echo > dynamic_events + +clear_trace + +:;: "Try to put a probe on a tracepoint in non-loaded module" ;: +rmmod trace-events-sample + +echo "t:myevent1 $TRACEPOINT1" >> dynamic_events +echo "t:myevent2 $TRACEPOINT2" >> dynamic_events + +grep -q myevent1 dynamic_events +grep -q myevent2 dynamic_events +test -d events/tracepoints/myevent1 +test -d events/tracepoints/myevent2 + +echo 1 > events/tracepoints/enable + +modprobe trace-events-sample + +sleep 2 + +grep -q "myevent1" trace +grep -q "myevent2" trace + +rmmod trace-events-sample +trap "" EXIT + +echo 0 > events/tracepoints/enable +echo > dynamic_events +clear_trace diff --git a/tools/testing/selftests/ftrace/test.d/dynevent/tprobe_syntax_errors.tc b/tools/testing/selftests/ftrace/test.d/dynevent/tprobe_syntax_errors.tc index da117b8f1d12..ffe8ffef4027 100644 --- a/tools/testing/selftests/ftrace/test.d/dynevent/tprobe_syntax_errors.tc +++ b/tools/testing/selftests/ftrace/test.d/dynevent/tprobe_syntax_errors.tc @@ -9,7 +9,6 @@ check_error() { # command-with-error-pos-by-^ check_error 't^100 kfree' # BAD_MAXACT_TYPE -check_error 't ^non_exist_tracepoint' # NO_TRACEPOINT check_error 't:^/bar kfree' # NO_GROUP_NAME check_error 't:^12345678901234567890123456789012345678901234567890123456789012345/bar kfree' # GROUP_TOO_LONG diff --git a/tools/testing/selftests/ftrace/test.d/ftrace/fgraph-profiler.tc b/tools/testing/selftests/ftrace/test.d/ftrace/fgraph-profiler.tc new file mode 100644 index 000000000000..ffff8646733c --- /dev/null +++ b/tools/testing/selftests/ftrace/test.d/ftrace/fgraph-profiler.tc @@ -0,0 +1,31 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0 +# description: ftrace - function profiler with function graph tracing +# requires: function_profile_enabled set_ftrace_filter function_graph:tracer + +# The function graph tracer can now be run along side of the function +# profiler. But there was a bug that caused the combination of the two +# to crash. It also required the function graph tracer to be started +# first. +# +# This test triggers that bug +# +# We need both function_graph and profiling to run this test + +fail() { # mesg + echo $1 + exit_fail +} + +echo "Enabling function graph tracer:" +echo function_graph > current_tracer +echo "enable profiler" + +# Older kernels do not allow function_profile to be enabled with +# function graph tracer. If the below fails, mark it as unsupported +echo 1 > function_profile_enabled || exit_unsupported + +# Let it run for a bit to make sure nothing explodes +sleep 1 + +exit 0 diff --git a/tools/testing/selftests/hid/Makefile b/tools/testing/selftests/hid/Makefile index 72be55ac4bdf..38ae31bb07b5 100644 --- a/tools/testing/selftests/hid/Makefile +++ b/tools/testing/selftests/hid/Makefile @@ -17,6 +17,8 @@ TEST_PROGS += hid-tablet.sh TEST_PROGS += hid-usb_crash.sh TEST_PROGS += hid-wacom.sh +TEST_FILES := run-hid-tools-tests.sh + CXX ?= $(CROSS_COMPILE)g++ HOSTPKG_CONFIG := pkg-config diff --git a/tools/testing/selftests/kvm/.gitignore b/tools/testing/selftests/kvm/.gitignore index 6d9381d60172..7f57abf936e7 100644 --- a/tools/testing/selftests/kvm/.gitignore +++ b/tools/testing/selftests/kvm/.gitignore @@ -5,3 +5,7 @@ !*.h !*.S !*.sh +!.gitignore +!config +!settings +!Makefile diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile index 0c4b254ab56b..960cf6a77198 100644 --- a/tools/testing/selftests/kvm/Makefile +++ b/tools/testing/selftests/kvm/Makefile @@ -130,6 +130,7 @@ TEST_GEN_PROGS_x86_64 += x86_64/max_vcpuid_cap_test TEST_GEN_PROGS_x86_64 += x86_64/triple_fault_event_test TEST_GEN_PROGS_x86_64 += x86_64/recalc_apic_map_test TEST_GEN_PROGS_x86_64 += access_tracking_perf_test +TEST_GEN_PROGS_x86_64 += coalesced_io_test TEST_GEN_PROGS_x86_64 += demand_paging_test TEST_GEN_PROGS_x86_64 += dirty_log_test TEST_GEN_PROGS_x86_64 += dirty_log_perf_test @@ -167,6 +168,7 @@ TEST_GEN_PROGS_aarch64 += aarch64/vpmu_counter_access TEST_GEN_PROGS_aarch64 += aarch64/no-vgic-v3 TEST_GEN_PROGS_aarch64 += access_tracking_perf_test TEST_GEN_PROGS_aarch64 += arch_timer +TEST_GEN_PROGS_aarch64 += coalesced_io_test TEST_GEN_PROGS_aarch64 += demand_paging_test TEST_GEN_PROGS_aarch64 += dirty_log_test TEST_GEN_PROGS_aarch64 += dirty_log_perf_test @@ -188,6 +190,7 @@ TEST_GEN_PROGS_s390x += s390x/tprot TEST_GEN_PROGS_s390x += s390x/cmma_test TEST_GEN_PROGS_s390x += s390x/debug_test TEST_GEN_PROGS_s390x += s390x/shared_zeropage_test +TEST_GEN_PROGS_s390x += s390x/ucontrol_test TEST_GEN_PROGS_s390x += demand_paging_test TEST_GEN_PROGS_s390x += dirty_log_test TEST_GEN_PROGS_s390x += guest_print_test @@ -200,6 +203,7 @@ TEST_GEN_PROGS_s390x += kvm_binary_stats_test TEST_GEN_PROGS_riscv += riscv/sbi_pmu_test TEST_GEN_PROGS_riscv += riscv/ebreak_test TEST_GEN_PROGS_riscv += arch_timer +TEST_GEN_PROGS_riscv += coalesced_io_test TEST_GEN_PROGS_riscv += demand_paging_test TEST_GEN_PROGS_riscv += dirty_log_test TEST_GEN_PROGS_riscv += get-reg-list diff --git a/tools/testing/selftests/kvm/coalesced_io_test.c b/tools/testing/selftests/kvm/coalesced_io_test.c new file mode 100644 index 000000000000..60cb25454899 --- /dev/null +++ b/tools/testing/selftests/kvm/coalesced_io_test.c @@ -0,0 +1,236 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <signal.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/ioctl.h> + +#include <linux/sizes.h> + +#include <kvm_util.h> +#include <processor.h> + +#include "ucall_common.h" + +struct kvm_coalesced_io { + struct kvm_coalesced_mmio_ring *ring; + uint32_t ring_size; + uint64_t mmio_gpa; + uint64_t *mmio; + + /* + * x86-only, but define pio_port for all architectures to minimize the + * amount of #ifdeffery and complexity, without having to sacrifice + * verbose error messages. + */ + uint8_t pio_port; +}; + +static struct kvm_coalesced_io kvm_builtin_io_ring; + +#ifdef __x86_64__ +static const int has_pio = 1; +#else +static const int has_pio = 0; +#endif + +static void guest_code(struct kvm_coalesced_io *io) +{ + int i, j; + + for (;;) { + for (j = 0; j < 1 + has_pio; j++) { + /* + * KVM always leaves one free entry, i.e. exits to + * userspace before the last entry is filled. + */ + for (i = 0; i < io->ring_size - 1; i++) { +#ifdef __x86_64__ + if (i & 1) + outl(io->pio_port, io->pio_port + i); + else +#endif + WRITE_ONCE(*io->mmio, io->mmio_gpa + i); + } +#ifdef __x86_64__ + if (j & 1) + outl(io->pio_port, io->pio_port + i); + else +#endif + WRITE_ONCE(*io->mmio, io->mmio_gpa + i); + } + GUEST_SYNC(0); + + WRITE_ONCE(*io->mmio, io->mmio_gpa + i); +#ifdef __x86_64__ + outl(io->pio_port, io->pio_port + i); +#endif + } +} + +static void vcpu_run_and_verify_io_exit(struct kvm_vcpu *vcpu, + struct kvm_coalesced_io *io, + uint32_t ring_start, + uint32_t expected_exit) +{ + const bool want_pio = expected_exit == KVM_EXIT_IO; + struct kvm_coalesced_mmio_ring *ring = io->ring; + struct kvm_run *run = vcpu->run; + uint32_t pio_value; + + WRITE_ONCE(ring->first, ring_start); + WRITE_ONCE(ring->last, ring_start); + + vcpu_run(vcpu); + + /* + * Annoyingly, reading PIO data is safe only for PIO exits, otherwise + * data_offset is garbage, e.g. an MMIO gpa. + */ + if (run->exit_reason == KVM_EXIT_IO) + pio_value = *(uint32_t *)((void *)run + run->io.data_offset); + else + pio_value = 0; + + TEST_ASSERT((!want_pio && (run->exit_reason == KVM_EXIT_MMIO && run->mmio.is_write && + run->mmio.phys_addr == io->mmio_gpa && run->mmio.len == 8 && + *(uint64_t *)run->mmio.data == io->mmio_gpa + io->ring_size - 1)) || + (want_pio && (run->exit_reason == KVM_EXIT_IO && run->io.port == io->pio_port && + run->io.direction == KVM_EXIT_IO_OUT && run->io.count == 1 && + pio_value == io->pio_port + io->ring_size - 1)), + "For start = %u, expected exit on %u-byte %s write 0x%llx = %lx, got exit_reason = %u (%s)\n " + "(MMIO addr = 0x%llx, write = %u, len = %u, data = %lx)\n " + "(PIO port = 0x%x, write = %u, len = %u, count = %u, data = %x", + ring_start, want_pio ? 4 : 8, want_pio ? "PIO" : "MMIO", + want_pio ? (unsigned long long)io->pio_port : io->mmio_gpa, + (want_pio ? io->pio_port : io->mmio_gpa) + io->ring_size - 1, run->exit_reason, + run->exit_reason == KVM_EXIT_MMIO ? "MMIO" : run->exit_reason == KVM_EXIT_IO ? "PIO" : "other", + run->mmio.phys_addr, run->mmio.is_write, run->mmio.len, *(uint64_t *)run->mmio.data, + run->io.port, run->io.direction, run->io.size, run->io.count, pio_value); +} + +static void vcpu_run_and_verify_coalesced_io(struct kvm_vcpu *vcpu, + struct kvm_coalesced_io *io, + uint32_t ring_start, + uint32_t expected_exit) +{ + struct kvm_coalesced_mmio_ring *ring = io->ring; + int i; + + vcpu_run_and_verify_io_exit(vcpu, io, ring_start, expected_exit); + + TEST_ASSERT((ring->last + 1) % io->ring_size == ring->first, + "Expected ring to be full (minus 1), first = %u, last = %u, max = %u, start = %u", + ring->first, ring->last, io->ring_size, ring_start); + + for (i = 0; i < io->ring_size - 1; i++) { + uint32_t idx = (ring->first + i) % io->ring_size; + struct kvm_coalesced_mmio *entry = &ring->coalesced_mmio[idx]; + +#ifdef __x86_64__ + if (i & 1) + TEST_ASSERT(entry->phys_addr == io->pio_port && + entry->len == 4 && entry->pio && + *(uint32_t *)entry->data == io->pio_port + i, + "Wanted 4-byte port I/O 0x%x = 0x%x in entry %u, got %u-byte %s 0x%llx = 0x%x", + io->pio_port, io->pio_port + i, i, + entry->len, entry->pio ? "PIO" : "MMIO", + entry->phys_addr, *(uint32_t *)entry->data); + else +#endif + TEST_ASSERT(entry->phys_addr == io->mmio_gpa && + entry->len == 8 && !entry->pio, + "Wanted 8-byte MMIO to 0x%lx = %lx in entry %u, got %u-byte %s 0x%llx = 0x%lx", + io->mmio_gpa, io->mmio_gpa + i, i, + entry->len, entry->pio ? "PIO" : "MMIO", + entry->phys_addr, *(uint64_t *)entry->data); + } +} + +static void test_coalesced_io(struct kvm_vcpu *vcpu, + struct kvm_coalesced_io *io, uint32_t ring_start) +{ + struct kvm_coalesced_mmio_ring *ring = io->ring; + + kvm_vm_register_coalesced_io(vcpu->vm, io->mmio_gpa, 8, false /* pio */); +#ifdef __x86_64__ + kvm_vm_register_coalesced_io(vcpu->vm, io->pio_port, 8, true /* pio */); +#endif + + vcpu_run_and_verify_coalesced_io(vcpu, io, ring_start, KVM_EXIT_MMIO); +#ifdef __x86_64__ + vcpu_run_and_verify_coalesced_io(vcpu, io, ring_start, KVM_EXIT_IO); +#endif + + /* + * Verify ucall, which may use non-coalesced MMIO or PIO, generates an + * immediate exit. + */ + WRITE_ONCE(ring->first, ring_start); + WRITE_ONCE(ring->last, ring_start); + vcpu_run(vcpu); + TEST_ASSERT_EQ(get_ucall(vcpu, NULL), UCALL_SYNC); + TEST_ASSERT_EQ(ring->first, ring_start); + TEST_ASSERT_EQ(ring->last, ring_start); + + /* Verify that non-coalesced MMIO/PIO generates an exit to userspace. */ + kvm_vm_unregister_coalesced_io(vcpu->vm, io->mmio_gpa, 8, false /* pio */); + vcpu_run_and_verify_io_exit(vcpu, io, ring_start, KVM_EXIT_MMIO); + +#ifdef __x86_64__ + kvm_vm_unregister_coalesced_io(vcpu->vm, io->pio_port, 8, true /* pio */); + vcpu_run_and_verify_io_exit(vcpu, io, ring_start, KVM_EXIT_IO); +#endif +} + +int main(int argc, char *argv[]) +{ + struct kvm_vcpu *vcpu; + struct kvm_vm *vm; + int i; + + TEST_REQUIRE(kvm_has_cap(KVM_CAP_COALESCED_MMIO)); + +#ifdef __x86_64__ + TEST_REQUIRE(kvm_has_cap(KVM_CAP_COALESCED_PIO)); +#endif + + vm = vm_create_with_one_vcpu(&vcpu, guest_code); + + kvm_builtin_io_ring = (struct kvm_coalesced_io) { + /* + * The I/O ring is a kernel-allocated page whose address is + * relative to each vCPU's run page, with the page offset + * provided by KVM in the return of KVM_CAP_COALESCED_MMIO. + */ + .ring = (void *)vcpu->run + + (kvm_check_cap(KVM_CAP_COALESCED_MMIO) * getpagesize()), + + /* + * The size of the I/O ring is fixed, but KVM defines the sized + * based on the kernel's PAGE_SIZE. Thus, userspace must query + * the host's page size at runtime to compute the ring size. + */ + .ring_size = (getpagesize() - sizeof(struct kvm_coalesced_mmio_ring)) / + sizeof(struct kvm_coalesced_mmio), + + /* + * Arbitrary address+port (MMIO mustn't overlap memslots), with + * the MMIO GPA identity mapped in the guest. + */ + .mmio_gpa = 4ull * SZ_1G, + .mmio = (uint64_t *)(4ull * SZ_1G), + .pio_port = 0x80, + }; + + virt_map(vm, (uint64_t)kvm_builtin_io_ring.mmio, kvm_builtin_io_ring.mmio_gpa, 1); + + sync_global_to_guest(vm, kvm_builtin_io_ring); + vcpu_args_set(vcpu, 1, &kvm_builtin_io_ring); + + for (i = 0; i < kvm_builtin_io_ring.ring_size; i++) + test_coalesced_io(vcpu, &kvm_builtin_io_ring, i); + + kvm_vm_free(vm); + return 0; +} diff --git a/tools/testing/selftests/kvm/guest_print_test.c b/tools/testing/selftests/kvm/guest_print_test.c index 8092c2d0f5d6..bcf582852db9 100644 --- a/tools/testing/selftests/kvm/guest_print_test.c +++ b/tools/testing/selftests/kvm/guest_print_test.c @@ -107,6 +107,21 @@ static void ucall_abort(const char *assert_msg, const char *expected_assert_msg) expected_assert_msg, &assert_msg[offset]); } +/* + * Open code vcpu_run(), sans the UCALL_ABORT handling, so that intentional + * guest asserts guest can be verified instead of being reported as failures. + */ +static void do_vcpu_run(struct kvm_vcpu *vcpu) +{ + int r; + + do { + r = __vcpu_run(vcpu); + } while (r == -1 && errno == EINTR); + + TEST_ASSERT(!r, KVM_IOCTL_ERROR(KVM_RUN, r)); +} + static void run_test(struct kvm_vcpu *vcpu, const char *expected_printf, const char *expected_assert) { @@ -114,7 +129,7 @@ static void run_test(struct kvm_vcpu *vcpu, const char *expected_printf, struct ucall uc; while (1) { - vcpu_run(vcpu); + do_vcpu_run(vcpu); TEST_ASSERT(run->exit_reason == UCALL_EXIT_REASON, "Unexpected exit reason: %u (%s),", @@ -159,7 +174,7 @@ static void test_limits(void) vm = vm_create_with_one_vcpu(&vcpu, guest_code_limits); run = vcpu->run; - vcpu_run(vcpu); + do_vcpu_run(vcpu); TEST_ASSERT(run->exit_reason == UCALL_EXIT_REASON, "Unexpected exit reason: %u (%s),", diff --git a/tools/testing/selftests/kvm/include/kvm_util.h b/tools/testing/selftests/kvm/include/kvm_util.h index 63c2aaae51f3..bc7c242480d6 100644 --- a/tools/testing/selftests/kvm/include/kvm_util.h +++ b/tools/testing/selftests/kvm/include/kvm_util.h @@ -428,8 +428,6 @@ const char *vm_guest_mode_string(uint32_t i); void kvm_vm_free(struct kvm_vm *vmp); void kvm_vm_restart(struct kvm_vm *vmp); void kvm_vm_release(struct kvm_vm *vmp); -int kvm_memcmp_hva_gva(void *hva, struct kvm_vm *vm, const vm_vaddr_t gva, - size_t len); void kvm_vm_elf_load(struct kvm_vm *vm, const char *filename); int kvm_memfd_alloc(size_t size, bool hugepages); @@ -460,6 +458,32 @@ static inline uint32_t kvm_vm_reset_dirty_ring(struct kvm_vm *vm) return __vm_ioctl(vm, KVM_RESET_DIRTY_RINGS, NULL); } +static inline void kvm_vm_register_coalesced_io(struct kvm_vm *vm, + uint64_t address, + uint64_t size, bool pio) +{ + struct kvm_coalesced_mmio_zone zone = { + .addr = address, + .size = size, + .pio = pio, + }; + + vm_ioctl(vm, KVM_REGISTER_COALESCED_MMIO, &zone); +} + +static inline void kvm_vm_unregister_coalesced_io(struct kvm_vm *vm, + uint64_t address, + uint64_t size, bool pio) +{ + struct kvm_coalesced_mmio_zone zone = { + .addr = address, + .size = size, + .pio = pio, + }; + + vm_ioctl(vm, KVM_UNREGISTER_COALESCED_MMIO, &zone); +} + static inline int vm_get_stats_fd(struct kvm_vm *vm) { int fd = __vm_ioctl(vm, KVM_GET_STATS_FD, NULL); diff --git a/tools/testing/selftests/kvm/include/s390x/debug_print.h b/tools/testing/selftests/kvm/include/s390x/debug_print.h new file mode 100644 index 000000000000..1bf275631cc6 --- /dev/null +++ b/tools/testing/selftests/kvm/include/s390x/debug_print.h @@ -0,0 +1,69 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Definition for kernel virtual machines on s390x + * + * Copyright IBM Corp. 2024 + * + * Authors: + * Christoph Schlameuss <schlameuss@linux.ibm.com> + */ + +#ifndef SELFTEST_KVM_DEBUG_PRINT_H +#define SELFTEST_KVM_DEBUG_PRINT_H + +#include "asm/ptrace.h" +#include "kvm_util.h" +#include "sie.h" + +static inline void print_hex_bytes(const char *name, u64 addr, size_t len) +{ + u64 pos; + + pr_debug("%s (%p)\n", name, (void *)addr); + pr_debug(" 0/0x00---------|"); + if (len > 8) + pr_debug(" 8/0x08---------|"); + if (len > 16) + pr_debug(" 16/0x10--------|"); + if (len > 24) + pr_debug(" 24/0x18--------|"); + for (pos = 0; pos < len; pos += 8) { + if ((pos % 32) == 0) + pr_debug("\n %3lu 0x%.3lx ", pos, pos); + pr_debug(" %16lx", *((u64 *)(addr + pos))); + } + pr_debug("\n"); +} + +static inline void print_hex(const char *name, u64 addr) +{ + print_hex_bytes(name, addr, 512); +} + +static inline void print_psw(struct kvm_run *run, struct kvm_s390_sie_block *sie_block) +{ + pr_debug("flags:0x%x psw:0x%.16llx:0x%.16llx exit:%u %s\n", + run->flags, + run->psw_mask, run->psw_addr, + run->exit_reason, exit_reason_str(run->exit_reason)); + pr_debug("sie_block psw:0x%.16llx:0x%.16llx\n", + sie_block->psw_mask, sie_block->psw_addr); +} + +static inline void print_run(struct kvm_run *run, struct kvm_s390_sie_block *sie_block) +{ + print_hex_bytes("run", (u64)run, 0x150); + print_hex("sie_block", (u64)sie_block); + print_psw(run, sie_block); +} + +static inline void print_regs(struct kvm_run *run) +{ + struct kvm_sync_regs *sync_regs = &run->s.regs; + + print_hex_bytes("GPRS", (u64)sync_regs->gprs, 8 * NUM_GPRS); + print_hex_bytes("ACRS", (u64)sync_regs->acrs, 4 * NUM_ACRS); + print_hex_bytes("CRS", (u64)sync_regs->crs, 8 * NUM_CRS); +} + +#endif /* SELFTEST_KVM_DEBUG_PRINT_H */ diff --git a/tools/testing/selftests/kvm/include/s390x/processor.h b/tools/testing/selftests/kvm/include/s390x/processor.h index 255c9b990f4c..481bd2fd6a32 100644 --- a/tools/testing/selftests/kvm/include/s390x/processor.h +++ b/tools/testing/selftests/kvm/include/s390x/processor.h @@ -21,6 +21,11 @@ #define PAGE_PROTECT 0x200 /* HW read-only bit */ #define PAGE_NOEXEC 0x100 /* HW no-execute bit */ +/* Page size definitions */ +#define PAGE_SHIFT 12 +#define PAGE_SIZE BIT_ULL(PAGE_SHIFT) +#define PAGE_MASK (~(PAGE_SIZE - 1)) + /* Is there a portable way to do this? */ static inline void cpu_relax(void) { diff --git a/tools/testing/selftests/kvm/include/s390x/sie.h b/tools/testing/selftests/kvm/include/s390x/sie.h new file mode 100644 index 000000000000..160acd4a1db9 --- /dev/null +++ b/tools/testing/selftests/kvm/include/s390x/sie.h @@ -0,0 +1,240 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Definition for kernel virtual machines on s390. + * + * Adapted copy of struct definition kvm_s390_sie_block from + * arch/s390/include/asm/kvm_host.h for use in userspace selftest programs. + * + * Copyright IBM Corp. 2008, 2024 + * + * Authors: + * Christoph Schlameuss <schlameuss@linux.ibm.com> + * Carsten Otte <cotte@de.ibm.com> + */ + +#ifndef SELFTEST_KVM_SIE_H +#define SELFTEST_KVM_SIE_H + +#include <linux/types.h> + +struct kvm_s390_sie_block { +#define CPUSTAT_STOPPED 0x80000000 +#define CPUSTAT_WAIT 0x10000000 +#define CPUSTAT_ECALL_PEND 0x08000000 +#define CPUSTAT_STOP_INT 0x04000000 +#define CPUSTAT_IO_INT 0x02000000 +#define CPUSTAT_EXT_INT 0x01000000 +#define CPUSTAT_RUNNING 0x00800000 +#define CPUSTAT_RETAINED 0x00400000 +#define CPUSTAT_TIMING_SUB 0x00020000 +#define CPUSTAT_SIE_SUB 0x00010000 +#define CPUSTAT_RRF 0x00008000 +#define CPUSTAT_SLSV 0x00004000 +#define CPUSTAT_SLSR 0x00002000 +#define CPUSTAT_ZARCH 0x00000800 +#define CPUSTAT_MCDS 0x00000100 +#define CPUSTAT_KSS 0x00000200 +#define CPUSTAT_SM 0x00000080 +#define CPUSTAT_IBS 0x00000040 +#define CPUSTAT_GED2 0x00000010 +#define CPUSTAT_G 0x00000008 +#define CPUSTAT_GED 0x00000004 +#define CPUSTAT_J 0x00000002 +#define CPUSTAT_P 0x00000001 + __u32 cpuflags; /* 0x0000 */ + __u32: 1; /* 0x0004 */ + __u32 prefix : 18; + __u32: 1; + __u32 ibc : 12; + __u8 reserved08[4]; /* 0x0008 */ +#define PROG_IN_SIE BIT(0) + __u32 prog0c; /* 0x000c */ + union { + __u8 reserved10[16]; /* 0x0010 */ + struct { + __u64 pv_handle_cpu; + __u64 pv_handle_config; + }; + }; +#define PROG_BLOCK_SIE BIT(0) +#define PROG_REQUEST BIT(1) + __u32 prog20; /* 0x0020 */ + __u8 reserved24[4]; /* 0x0024 */ + __u64 cputm; /* 0x0028 */ + __u64 ckc; /* 0x0030 */ + __u64 epoch; /* 0x0038 */ + __u32 svcc; /* 0x0040 */ +#define LCTL_CR0 0x8000 +#define LCTL_CR6 0x0200 +#define LCTL_CR9 0x0040 +#define LCTL_CR10 0x0020 +#define LCTL_CR11 0x0010 +#define LCTL_CR14 0x0002 + __u16 lctl; /* 0x0044 */ + __s16 icpua; /* 0x0046 */ +#define ICTL_OPEREXC 0x80000000 +#define ICTL_PINT 0x20000000 +#define ICTL_LPSW 0x00400000 +#define ICTL_STCTL 0x00040000 +#define ICTL_ISKE 0x00004000 +#define ICTL_SSKE 0x00002000 +#define ICTL_RRBE 0x00001000 +#define ICTL_TPROT 0x00000200 + __u32 ictl; /* 0x0048 */ +#define ECA_CEI 0x80000000 +#define ECA_IB 0x40000000 +#define ECA_SIGPI 0x10000000 +#define ECA_MVPGI 0x01000000 +#define ECA_AIV 0x00200000 +#define ECA_VX 0x00020000 +#define ECA_PROTEXCI 0x00002000 +#define ECA_APIE 0x00000008 +#define ECA_SII 0x00000001 + __u32 eca; /* 0x004c */ +#define ICPT_INST 0x04 +#define ICPT_PROGI 0x08 +#define ICPT_INSTPROGI 0x0C +#define ICPT_EXTREQ 0x10 +#define ICPT_EXTINT 0x14 +#define ICPT_IOREQ 0x18 +#define ICPT_WAIT 0x1c +#define ICPT_VALIDITY 0x20 +#define ICPT_STOP 0x28 +#define ICPT_OPEREXC 0x2C +#define ICPT_PARTEXEC 0x38 +#define ICPT_IOINST 0x40 +#define ICPT_KSS 0x5c +#define ICPT_MCHKREQ 0x60 +#define ICPT_INT_ENABLE 0x64 +#define ICPT_PV_INSTR 0x68 +#define ICPT_PV_NOTIFY 0x6c +#define ICPT_PV_PREF 0x70 + __u8 icptcode; /* 0x0050 */ + __u8 icptstatus; /* 0x0051 */ + __u16 ihcpu; /* 0x0052 */ + __u8 reserved54; /* 0x0054 */ +#define IICTL_CODE_NONE 0x00 +#define IICTL_CODE_MCHK 0x01 +#define IICTL_CODE_EXT 0x02 +#define IICTL_CODE_IO 0x03 +#define IICTL_CODE_RESTART 0x04 +#define IICTL_CODE_SPECIFICATION 0x10 +#define IICTL_CODE_OPERAND 0x11 + __u8 iictl; /* 0x0055 */ + __u16 ipa; /* 0x0056 */ + __u32 ipb; /* 0x0058 */ + __u32 scaoh; /* 0x005c */ +#define FPF_BPBC 0x20 + __u8 fpf; /* 0x0060 */ +#define ECB_GS 0x40 +#define ECB_TE 0x10 +#define ECB_SPECI 0x08 +#define ECB_SRSI 0x04 +#define ECB_HOSTPROTINT 0x02 +#define ECB_PTF 0x01 + __u8 ecb; /* 0x0061 */ +#define ECB2_CMMA 0x80 +#define ECB2_IEP 0x20 +#define ECB2_PFMFI 0x08 +#define ECB2_ESCA 0x04 +#define ECB2_ZPCI_LSI 0x02 + __u8 ecb2; /* 0x0062 */ +#define ECB3_AISI 0x20 +#define ECB3_AISII 0x10 +#define ECB3_DEA 0x08 +#define ECB3_AES 0x04 +#define ECB3_RI 0x01 + __u8 ecb3; /* 0x0063 */ +#define ESCA_SCAOL_MASK ~0x3fU + __u32 scaol; /* 0x0064 */ + __u8 sdf; /* 0x0068 */ + __u8 epdx; /* 0x0069 */ + __u8 cpnc; /* 0x006a */ + __u8 reserved6b; /* 0x006b */ + __u32 todpr; /* 0x006c */ +#define GISA_FORMAT1 0x00000001 + __u32 gd; /* 0x0070 */ + __u8 reserved74[12]; /* 0x0074 */ + __u64 mso; /* 0x0080 */ + __u64 msl; /* 0x0088 */ + __u64 psw_mask; /* 0x0090 */ + __u64 psw_addr; /* 0x0098 */ + __u64 gg14; /* 0x00a0 */ + __u64 gg15; /* 0x00a8 */ + __u8 reservedb0[8]; /* 0x00b0 */ +#define HPID_KVM 0x4 +#define HPID_VSIE 0x5 + __u8 hpid; /* 0x00b8 */ + __u8 reservedb9[7]; /* 0x00b9 */ + union { + struct { + __u32 eiparams; /* 0x00c0 */ + __u16 extcpuaddr; /* 0x00c4 */ + __u16 eic; /* 0x00c6 */ + }; + __u64 mcic; /* 0x00c0 */ + } __packed; + __u32 reservedc8; /* 0x00c8 */ + union { + struct { + __u16 pgmilc; /* 0x00cc */ + __u16 iprcc; /* 0x00ce */ + }; + __u32 edc; /* 0x00cc */ + } __packed; + union { + struct { + __u32 dxc; /* 0x00d0 */ + __u16 mcn; /* 0x00d4 */ + __u8 perc; /* 0x00d6 */ + __u8 peratmid; /* 0x00d7 */ + }; + __u64 faddr; /* 0x00d0 */ + } __packed; + __u64 peraddr; /* 0x00d8 */ + __u8 eai; /* 0x00e0 */ + __u8 peraid; /* 0x00e1 */ + __u8 oai; /* 0x00e2 */ + __u8 armid; /* 0x00e3 */ + __u8 reservede4[4]; /* 0x00e4 */ + union { + __u64 tecmc; /* 0x00e8 */ + struct { + __u16 subchannel_id; /* 0x00e8 */ + __u16 subchannel_nr; /* 0x00ea */ + __u32 io_int_parm; /* 0x00ec */ + __u32 io_int_word; /* 0x00f0 */ + }; + } __packed; + __u8 reservedf4[8]; /* 0x00f4 */ +#define CRYCB_FORMAT_MASK 0x00000003 +#define CRYCB_FORMAT0 0x00000000 +#define CRYCB_FORMAT1 0x00000001 +#define CRYCB_FORMAT2 0x00000003 + __u32 crycbd; /* 0x00fc */ + __u64 gcr[16]; /* 0x0100 */ + union { + __u64 gbea; /* 0x0180 */ + __u64 sidad; + }; + __u8 reserved188[8]; /* 0x0188 */ + __u64 sdnxo; /* 0x0190 */ + __u8 reserved198[8]; /* 0x0198 */ + __u32 fac; /* 0x01a0 */ + __u8 reserved1a4[20]; /* 0x01a4 */ + __u64 cbrlo; /* 0x01b8 */ + __u8 reserved1c0[8]; /* 0x01c0 */ +#define ECD_HOSTREGMGMT 0x20000000 +#define ECD_MEF 0x08000000 +#define ECD_ETOKENF 0x02000000 +#define ECD_ECC 0x00200000 + __u32 ecd; /* 0x01c8 */ + __u8 reserved1cc[18]; /* 0x01cc */ + __u64 pp; /* 0x01de */ + __u8 reserved1e6[2]; /* 0x01e6 */ + __u64 itdba; /* 0x01e8 */ + __u64 riccbd; /* 0x01f0 */ + __u64 gvrd; /* 0x01f8 */ +} __packed __aligned(512); + +#endif /* SELFTEST_KVM_SIE_H */ diff --git a/tools/testing/selftests/kvm/include/x86_64/apic.h b/tools/testing/selftests/kvm/include/x86_64/apic.h index 0f268b55fa06..51990094effd 100644 --- a/tools/testing/selftests/kvm/include/x86_64/apic.h +++ b/tools/testing/selftests/kvm/include/x86_64/apic.h @@ -11,6 +11,7 @@ #include <stdint.h> #include "processor.h" +#include "ucall_common.h" #define APIC_DEFAULT_GPA 0xfee00000ULL @@ -93,9 +94,27 @@ static inline uint64_t x2apic_read_reg(unsigned int reg) return rdmsr(APIC_BASE_MSR + (reg >> 4)); } +static inline uint8_t x2apic_write_reg_safe(unsigned int reg, uint64_t value) +{ + return wrmsr_safe(APIC_BASE_MSR + (reg >> 4), value); +} + static inline void x2apic_write_reg(unsigned int reg, uint64_t value) { - wrmsr(APIC_BASE_MSR + (reg >> 4), value); + uint8_t fault = x2apic_write_reg_safe(reg, value); + + __GUEST_ASSERT(!fault, "Unexpected fault 0x%x on WRMSR(%x) = %lx\n", + fault, APIC_BASE_MSR + (reg >> 4), value); } +static inline void x2apic_write_reg_fault(unsigned int reg, uint64_t value) +{ + uint8_t fault = x2apic_write_reg_safe(reg, value); + + __GUEST_ASSERT(fault == GP_VECTOR, + "Wanted #GP on WRMSR(%x) = %lx, got 0x%x\n", + APIC_BASE_MSR + (reg >> 4), value, fault); +} + + #endif /* SELFTEST_KVM_APIC_H */ diff --git a/tools/testing/selftests/kvm/include/x86_64/hyperv.h b/tools/testing/selftests/kvm/include/x86_64/hyperv.h index fa65b908b13e..6849e2552f1b 100644 --- a/tools/testing/selftests/kvm/include/x86_64/hyperv.h +++ b/tools/testing/selftests/kvm/include/x86_64/hyperv.h @@ -186,6 +186,18 @@ #define HV_X64_ENLIGHTENED_VMCS_RECOMMENDED \ KVM_X86_CPU_FEATURE(HYPERV_CPUID_ENLIGHTMENT_INFO, 0, EAX, 14) +/* HYPERV_CPUID_NESTED_FEATURES.EAX */ +#define HV_X64_NESTED_DIRECT_FLUSH \ + KVM_X86_CPU_FEATURE(HYPERV_CPUID_NESTED_FEATURES, 0, EAX, 17) +#define HV_X64_NESTED_GUEST_MAPPING_FLUSH \ + KVM_X86_CPU_FEATURE(HYPERV_CPUID_NESTED_FEATURES, 0, EAX, 18) +#define HV_X64_NESTED_MSR_BITMAP \ + KVM_X86_CPU_FEATURE(HYPERV_CPUID_NESTED_FEATURES, 0, EAX, 19) + +/* HYPERV_CPUID_NESTED_FEATURES.EBX */ +#define HV_X64_NESTED_EVMCS1_PERF_GLOBAL_CTRL \ + KVM_X86_CPU_FEATURE(HYPERV_CPUID_NESTED_FEATURES, 0, EBX, 0) + /* HYPERV_CPUID_SYNDBG_PLATFORM_CAPABILITIES.EAX */ #define HV_X64_SYNDBG_CAP_ALLOW_KERNEL_DEBUGGING \ KVM_X86_CPU_FEATURE(HYPERV_CPUID_SYNDBG_PLATFORM_CAPABILITIES, 0, EAX, 1) @@ -343,4 +355,10 @@ struct hyperv_test_pages *vcpu_alloc_hyperv_test_pages(struct kvm_vm *vm, /* HV_X64_MSR_TSC_INVARIANT_CONTROL bits */ #define HV_INVARIANT_TSC_EXPOSED BIT_ULL(0) +const struct kvm_cpuid2 *kvm_get_supported_hv_cpuid(void); +const struct kvm_cpuid2 *vcpu_get_supported_hv_cpuid(struct kvm_vcpu *vcpu); +void vcpu_set_hv_cpuid(struct kvm_vcpu *vcpu); + +bool kvm_hv_cpu_has(struct kvm_x86_cpu_feature feature); + #endif /* !SELFTEST_KVM_HYPERV_H */ diff --git a/tools/testing/selftests/kvm/include/x86_64/processor.h b/tools/testing/selftests/kvm/include/x86_64/processor.h index a0c1440017bb..e247f99e0473 100644 --- a/tools/testing/selftests/kvm/include/x86_64/processor.h +++ b/tools/testing/selftests/kvm/include/x86_64/processor.h @@ -25,6 +25,10 @@ extern bool host_cpu_is_intel; extern bool host_cpu_is_amd; extern uint64_t guest_tsc_khz; +#ifndef MAX_NR_CPUID_ENTRIES +#define MAX_NR_CPUID_ENTRIES 100 +#endif + /* Forced emulation prefix, used to invoke the emulator unconditionally. */ #define KVM_FEP "ud2; .byte 'k', 'v', 'm';" @@ -908,8 +912,6 @@ static inline void vcpu_xcrs_set(struct kvm_vcpu *vcpu, struct kvm_xcrs *xcrs) const struct kvm_cpuid_entry2 *get_cpuid_entry(const struct kvm_cpuid2 *cpuid, uint32_t function, uint32_t index); const struct kvm_cpuid2 *kvm_get_supported_cpuid(void); -const struct kvm_cpuid2 *kvm_get_supported_hv_cpuid(void); -const struct kvm_cpuid2 *vcpu_get_supported_hv_cpuid(struct kvm_vcpu *vcpu); static inline uint32_t kvm_cpu_fms(void) { @@ -1009,7 +1011,6 @@ static inline struct kvm_cpuid2 *allocate_kvm_cpuid2(int nr_entries) } void vcpu_init_cpuid(struct kvm_vcpu *vcpu, const struct kvm_cpuid2 *cpuid); -void vcpu_set_hv_cpuid(struct kvm_vcpu *vcpu); static inline struct kvm_cpuid_entry2 *__vcpu_get_cpuid_entry(struct kvm_vcpu *vcpu, uint32_t function, diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c index 56b170b725b3..a2b7df5f1d39 100644 --- a/tools/testing/selftests/kvm/lib/kvm_util.c +++ b/tools/testing/selftests/kvm/lib/kvm_util.c @@ -712,16 +712,13 @@ void kvm_vm_release(struct kvm_vm *vmp) } static void __vm_mem_region_delete(struct kvm_vm *vm, - struct userspace_mem_region *region, - bool unlink) + struct userspace_mem_region *region) { int ret; - if (unlink) { - rb_erase(®ion->gpa_node, &vm->regions.gpa_tree); - rb_erase(®ion->hva_node, &vm->regions.hva_tree); - hash_del(®ion->slot_node); - } + rb_erase(®ion->gpa_node, &vm->regions.gpa_tree); + rb_erase(®ion->hva_node, &vm->regions.hva_tree); + hash_del(®ion->slot_node); region->region.memory_size = 0; vm_ioctl(vm, KVM_SET_USER_MEMORY_REGION2, ®ion->region); @@ -762,7 +759,7 @@ void kvm_vm_free(struct kvm_vm *vmp) /* Free userspace_mem_regions. */ hash_for_each_safe(vmp->regions.slot_hash, ctr, node, region, slot_node) - __vm_mem_region_delete(vmp, region, false); + __vm_mem_region_delete(vmp, region); /* Free sparsebit arrays. */ sparsebit_free(&vmp->vpages_valid); @@ -794,76 +791,6 @@ int kvm_memfd_alloc(size_t size, bool hugepages) return fd; } -/* - * Memory Compare, host virtual to guest virtual - * - * Input Args: - * hva - Starting host virtual address - * vm - Virtual Machine - * gva - Starting guest virtual address - * len - number of bytes to compare - * - * Output Args: None - * - * Input/Output Args: None - * - * Return: - * Returns 0 if the bytes starting at hva for a length of len - * are equal the guest virtual bytes starting at gva. Returns - * a value < 0, if bytes at hva are less than those at gva. - * Otherwise a value > 0 is returned. - * - * Compares the bytes starting at the host virtual address hva, for - * a length of len, to the guest bytes starting at the guest virtual - * address given by gva. - */ -int kvm_memcmp_hva_gva(void *hva, struct kvm_vm *vm, vm_vaddr_t gva, size_t len) -{ - size_t amt; - - /* - * Compare a batch of bytes until either a match is found - * or all the bytes have been compared. - */ - for (uintptr_t offset = 0; offset < len; offset += amt) { - uintptr_t ptr1 = (uintptr_t)hva + offset; - - /* - * Determine host address for guest virtual address - * at offset. - */ - uintptr_t ptr2 = (uintptr_t)addr_gva2hva(vm, gva + offset); - - /* - * Determine amount to compare on this pass. - * Don't allow the comparsion to cross a page boundary. - */ - amt = len - offset; - if ((ptr1 >> vm->page_shift) != ((ptr1 + amt) >> vm->page_shift)) - amt = vm->page_size - (ptr1 % vm->page_size); - if ((ptr2 >> vm->page_shift) != ((ptr2 + amt) >> vm->page_shift)) - amt = vm->page_size - (ptr2 % vm->page_size); - - assert((ptr1 >> vm->page_shift) == ((ptr1 + amt - 1) >> vm->page_shift)); - assert((ptr2 >> vm->page_shift) == ((ptr2 + amt - 1) >> vm->page_shift)); - - /* - * Perform the comparison. If there is a difference - * return that result to the caller, otherwise need - * to continue on looking for a mismatch. - */ - int ret = memcmp((void *)ptr1, (void *)ptr2, amt); - if (ret != 0) - return ret; - } - - /* - * No mismatch found. Let the caller know the two memory - * areas are equal. - */ - return 0; -} - static void vm_userspace_mem_region_gpa_insert(struct rb_root *gpa_tree, struct userspace_mem_region *region) { @@ -1270,7 +1197,7 @@ void vm_mem_region_move(struct kvm_vm *vm, uint32_t slot, uint64_t new_gpa) */ void vm_mem_region_delete(struct kvm_vm *vm, uint32_t slot) { - __vm_mem_region_delete(vm, memslot2region(vm, slot), true); + __vm_mem_region_delete(vm, memslot2region(vm, slot)); } void vm_guest_mem_fallocate(struct kvm_vm *vm, uint64_t base, uint64_t size, diff --git a/tools/testing/selftests/kvm/lib/s390x/processor.c b/tools/testing/selftests/kvm/lib/s390x/processor.c index 4ad4492eea1d..20cfe970e3e3 100644 --- a/tools/testing/selftests/kvm/lib/s390x/processor.c +++ b/tools/testing/selftests/kvm/lib/s390x/processor.c @@ -14,7 +14,7 @@ void virt_arch_pgd_alloc(struct kvm_vm *vm) { vm_paddr_t paddr; - TEST_ASSERT(vm->page_size == 4096, "Unsupported page size: 0x%x", + TEST_ASSERT(vm->page_size == PAGE_SIZE, "Unsupported page size: 0x%x", vm->page_size); if (vm->pgd_created) @@ -79,7 +79,7 @@ void virt_arch_pg_map(struct kvm_vm *vm, uint64_t gva, uint64_t gpa) } /* Fill in page table entry */ - idx = (gva >> 12) & 0x0ffu; /* page index */ + idx = (gva >> PAGE_SHIFT) & 0x0ffu; /* page index */ if (!(entry[idx] & PAGE_INVALID)) fprintf(stderr, "WARNING: PTE for gpa=0x%"PRIx64" already set!\n", gpa); @@ -91,7 +91,7 @@ vm_paddr_t addr_arch_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva) int ri, idx; uint64_t *entry; - TEST_ASSERT(vm->page_size == 4096, "Unsupported page size: 0x%x", + TEST_ASSERT(vm->page_size == PAGE_SIZE, "Unsupported page size: 0x%x", vm->page_size); entry = addr_gpa2hva(vm, vm->pgd); @@ -103,7 +103,7 @@ vm_paddr_t addr_arch_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva) entry = addr_gpa2hva(vm, entry[idx] & REGION_ENTRY_ORIGIN); } - idx = (gva >> 12) & 0x0ffu; /* page index */ + idx = (gva >> PAGE_SHIFT) & 0x0ffu; /* page index */ TEST_ASSERT(!(entry[idx] & PAGE_INVALID), "No page mapping for vm virtual address 0x%lx", gva); @@ -168,7 +168,7 @@ struct kvm_vcpu *vm_arch_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id) struct kvm_sregs sregs; struct kvm_vcpu *vcpu; - TEST_ASSERT(vm->page_size == 4096, "Unsupported page size: 0x%x", + TEST_ASSERT(vm->page_size == PAGE_SIZE, "Unsupported page size: 0x%x", vm->page_size); stack_vaddr = __vm_vaddr_alloc(vm, stack_size, diff --git a/tools/testing/selftests/kvm/lib/x86_64/hyperv.c b/tools/testing/selftests/kvm/lib/x86_64/hyperv.c index efb7e7a1354d..15bc8cd583aa 100644 --- a/tools/testing/selftests/kvm/lib/x86_64/hyperv.c +++ b/tools/testing/selftests/kvm/lib/x86_64/hyperv.c @@ -8,6 +8,73 @@ #include "processor.h" #include "hyperv.h" +const struct kvm_cpuid2 *kvm_get_supported_hv_cpuid(void) +{ + static struct kvm_cpuid2 *cpuid; + int kvm_fd; + + if (cpuid) + return cpuid; + + cpuid = allocate_kvm_cpuid2(MAX_NR_CPUID_ENTRIES); + kvm_fd = open_kvm_dev_path_or_exit(); + + kvm_ioctl(kvm_fd, KVM_GET_SUPPORTED_HV_CPUID, cpuid); + + close(kvm_fd); + return cpuid; +} + +void vcpu_set_hv_cpuid(struct kvm_vcpu *vcpu) +{ + static struct kvm_cpuid2 *cpuid_full; + const struct kvm_cpuid2 *cpuid_sys, *cpuid_hv; + int i, nent = 0; + + if (!cpuid_full) { + cpuid_sys = kvm_get_supported_cpuid(); + cpuid_hv = kvm_get_supported_hv_cpuid(); + + cpuid_full = allocate_kvm_cpuid2(cpuid_sys->nent + cpuid_hv->nent); + if (!cpuid_full) { + perror("malloc"); + abort(); + } + + /* Need to skip KVM CPUID leaves 0x400000xx */ + for (i = 0; i < cpuid_sys->nent; i++) { + if (cpuid_sys->entries[i].function >= 0x40000000 && + cpuid_sys->entries[i].function < 0x40000100) + continue; + cpuid_full->entries[nent] = cpuid_sys->entries[i]; + nent++; + } + + memcpy(&cpuid_full->entries[nent], cpuid_hv->entries, + cpuid_hv->nent * sizeof(struct kvm_cpuid_entry2)); + cpuid_full->nent = nent + cpuid_hv->nent; + } + + vcpu_init_cpuid(vcpu, cpuid_full); +} + +const struct kvm_cpuid2 *vcpu_get_supported_hv_cpuid(struct kvm_vcpu *vcpu) +{ + struct kvm_cpuid2 *cpuid = allocate_kvm_cpuid2(MAX_NR_CPUID_ENTRIES); + + vcpu_ioctl(vcpu, KVM_GET_SUPPORTED_HV_CPUID, cpuid); + + return cpuid; +} + +bool kvm_hv_cpu_has(struct kvm_x86_cpu_feature feature) +{ + if (!kvm_has_cap(KVM_CAP_SYS_HYPERV_CPUID)) + return false; + + return kvm_cpuid_has(kvm_get_supported_hv_cpuid(), feature); +} + struct hyperv_test_pages *vcpu_alloc_hyperv_test_pages(struct kvm_vm *vm, vm_vaddr_t *p_hv_pages_gva) { diff --git a/tools/testing/selftests/kvm/lib/x86_64/processor.c b/tools/testing/selftests/kvm/lib/x86_64/processor.c index 153739f2e201..974bcd2df6d7 100644 --- a/tools/testing/selftests/kvm/lib/x86_64/processor.c +++ b/tools/testing/selftests/kvm/lib/x86_64/processor.c @@ -19,8 +19,6 @@ #define KERNEL_DS 0x10 #define KERNEL_TSS 0x18 -#define MAX_NR_CPUID_ENTRIES 100 - vm_vaddr_t exception_handlers; bool host_cpu_is_amd; bool host_cpu_is_intel; @@ -566,10 +564,8 @@ void route_exception(struct ex_regs *regs) if (kvm_fixup_exception(regs)) return; - ucall_assert(UCALL_UNHANDLED, - "Unhandled exception in guest", __FILE__, __LINE__, - "Unhandled exception '0x%lx' at guest RIP '0x%lx'", - regs->vector, regs->rip); + GUEST_FAIL("Unhandled exception '0x%lx' at guest RIP '0x%lx'", + regs->vector, regs->rip); } static void vm_init_descriptor_tables(struct kvm_vm *vm) @@ -611,7 +607,7 @@ void assert_on_unhandled_exception(struct kvm_vcpu *vcpu) { struct ucall uc; - if (get_ucall(vcpu, &uc) == UCALL_UNHANDLED) + if (get_ucall(vcpu, &uc) == UCALL_ABORT) REPORT_GUEST_ASSERT(uc); } @@ -1195,65 +1191,6 @@ void xen_hypercall(uint64_t nr, uint64_t a0, void *a1) GUEST_ASSERT(!__xen_hypercall(nr, a0, a1)); } -const struct kvm_cpuid2 *kvm_get_supported_hv_cpuid(void) -{ - static struct kvm_cpuid2 *cpuid; - int kvm_fd; - - if (cpuid) - return cpuid; - - cpuid = allocate_kvm_cpuid2(MAX_NR_CPUID_ENTRIES); - kvm_fd = open_kvm_dev_path_or_exit(); - - kvm_ioctl(kvm_fd, KVM_GET_SUPPORTED_HV_CPUID, cpuid); - - close(kvm_fd); - return cpuid; -} - -void vcpu_set_hv_cpuid(struct kvm_vcpu *vcpu) -{ - static struct kvm_cpuid2 *cpuid_full; - const struct kvm_cpuid2 *cpuid_sys, *cpuid_hv; - int i, nent = 0; - - if (!cpuid_full) { - cpuid_sys = kvm_get_supported_cpuid(); - cpuid_hv = kvm_get_supported_hv_cpuid(); - - cpuid_full = allocate_kvm_cpuid2(cpuid_sys->nent + cpuid_hv->nent); - if (!cpuid_full) { - perror("malloc"); - abort(); - } - - /* Need to skip KVM CPUID leaves 0x400000xx */ - for (i = 0; i < cpuid_sys->nent; i++) { - if (cpuid_sys->entries[i].function >= 0x40000000 && - cpuid_sys->entries[i].function < 0x40000100) - continue; - cpuid_full->entries[nent] = cpuid_sys->entries[i]; - nent++; - } - - memcpy(&cpuid_full->entries[nent], cpuid_hv->entries, - cpuid_hv->nent * sizeof(struct kvm_cpuid_entry2)); - cpuid_full->nent = nent + cpuid_hv->nent; - } - - vcpu_init_cpuid(vcpu, cpuid_full); -} - -const struct kvm_cpuid2 *vcpu_get_supported_hv_cpuid(struct kvm_vcpu *vcpu) -{ - struct kvm_cpuid2 *cpuid = allocate_kvm_cpuid2(MAX_NR_CPUID_ENTRIES); - - vcpu_ioctl(vcpu, KVM_GET_SUPPORTED_HV_CPUID, cpuid); - - return cpuid; -} - unsigned long vm_compute_max_gfn(struct kvm_vm *vm) { const unsigned long num_ht_pages = 12 << (30 - vm->page_shift); /* 12 GiB */ diff --git a/tools/testing/selftests/kvm/memslot_modification_stress_test.c b/tools/testing/selftests/kvm/memslot_modification_stress_test.c index 49f162573126..c81a84990eab 100644 --- a/tools/testing/selftests/kvm/memslot_modification_stress_test.c +++ b/tools/testing/selftests/kvm/memslot_modification_stress_test.c @@ -79,6 +79,7 @@ struct test_params { useconds_t delay; uint64_t nr_iterations; bool partition_vcpu_memory_access; + bool disable_slot_zap_quirk; }; static void run_test(enum vm_guest_mode mode, void *arg) @@ -89,6 +90,13 @@ static void run_test(enum vm_guest_mode mode, void *arg) vm = memstress_create_vm(mode, nr_vcpus, guest_percpu_mem_size, 1, VM_MEM_SRC_ANONYMOUS, p->partition_vcpu_memory_access); +#ifdef __x86_64__ + if (p->disable_slot_zap_quirk) + vm_enable_cap(vm, KVM_CAP_DISABLE_QUIRKS2, KVM_X86_QUIRK_SLOT_ZAP_ALL); + + pr_info("Memslot zap quirk %s\n", p->disable_slot_zap_quirk ? + "disabled" : "enabled"); +#endif pr_info("Finished creating vCPUs\n"); @@ -107,11 +115,12 @@ static void run_test(enum vm_guest_mode mode, void *arg) static void help(char *name) { puts(""); - printf("usage: %s [-h] [-m mode] [-d delay_usec]\n" + printf("usage: %s [-h] [-m mode] [-d delay_usec] [-q]\n" " [-b memory] [-v vcpus] [-o] [-i iterations]\n", name); guest_modes_help(); printf(" -d: add a delay between each iteration of adding and\n" " deleting a memslot in usec.\n"); + printf(" -q: Disable memslot zap quirk.\n"); printf(" -b: specify the size of the memory region which should be\n" " accessed by each vCPU. e.g. 10M or 3G.\n" " Default: 1G\n"); @@ -137,7 +146,7 @@ int main(int argc, char *argv[]) guest_modes_append_default(); - while ((opt = getopt(argc, argv, "hm:d:b:v:oi:")) != -1) { + while ((opt = getopt(argc, argv, "hm:d:qb:v:oi:")) != -1) { switch (opt) { case 'm': guest_modes_cmdline(optarg); @@ -160,6 +169,14 @@ int main(int argc, char *argv[]) case 'i': p.nr_iterations = atoi_positive("Number of iterations", optarg); break; +#ifdef __x86_64__ + case 'q': + p.disable_slot_zap_quirk = true; + + TEST_REQUIRE(kvm_check_cap(KVM_CAP_DISABLE_QUIRKS2) & + KVM_X86_QUIRK_SLOT_ZAP_ALL); + break; +#endif case 'h': default: help(argv[0]); diff --git a/tools/testing/selftests/kvm/memslot_perf_test.c b/tools/testing/selftests/kvm/memslot_perf_test.c index 579a64f97333..989ffe0d047f 100644 --- a/tools/testing/selftests/kvm/memslot_perf_test.c +++ b/tools/testing/selftests/kvm/memslot_perf_test.c @@ -113,6 +113,9 @@ static_assert(ATOMIC_BOOL_LOCK_FREE == 2, "atomic bool is not lockless"); static sem_t vcpu_ready; static bool map_unmap_verify; +#ifdef __x86_64__ +static bool disable_slot_zap_quirk; +#endif static bool verbose; #define pr_info_v(...) \ @@ -578,6 +581,11 @@ static bool test_memslot_move_prepare(struct vm_data *data, uint32_t guest_page_size = data->vm->page_size; uint64_t movesrcgpa, movetestgpa; +#ifdef __x86_64__ + if (disable_slot_zap_quirk) + vm_enable_cap(data->vm, KVM_CAP_DISABLE_QUIRKS2, KVM_X86_QUIRK_SLOT_ZAP_ALL); +#endif + movesrcgpa = vm_slot2gpa(data, data->nslots - 1); if (isactive) { @@ -896,6 +904,7 @@ static void help(char *name, struct test_args *targs) pr_info(" -h: print this help screen.\n"); pr_info(" -v: enable verbose mode (not for benchmarking).\n"); pr_info(" -d: enable extra debug checks.\n"); + pr_info(" -q: Disable memslot zap quirk during memslot move.\n"); pr_info(" -s: specify memslot count cap (-1 means no cap; currently: %i)\n", targs->nslots); pr_info(" -f: specify the first test to run (currently: %i; max %zu)\n", @@ -954,7 +963,7 @@ static bool parse_args(int argc, char *argv[], uint32_t max_mem_slots; int opt; - while ((opt = getopt(argc, argv, "hvds:f:e:l:r:")) != -1) { + while ((opt = getopt(argc, argv, "hvdqs:f:e:l:r:")) != -1) { switch (opt) { case 'h': default: @@ -966,6 +975,13 @@ static bool parse_args(int argc, char *argv[], case 'd': map_unmap_verify = true; break; +#ifdef __x86_64__ + case 'q': + disable_slot_zap_quirk = true; + TEST_REQUIRE(kvm_check_cap(KVM_CAP_DISABLE_QUIRKS2) & + KVM_X86_QUIRK_SLOT_ZAP_ALL); + break; +#endif case 's': targs->nslots = atoi_paranoid(optarg); if (targs->nslots <= 1 && targs->nslots != -1) { diff --git a/tools/testing/selftests/kvm/s390x/cmma_test.c b/tools/testing/selftests/kvm/s390x/cmma_test.c index b39033844756..e32dd59703a0 100644 --- a/tools/testing/selftests/kvm/s390x/cmma_test.c +++ b/tools/testing/selftests/kvm/s390x/cmma_test.c @@ -17,16 +17,17 @@ #include "kvm_util.h" #include "kselftest.h" #include "ucall_common.h" +#include "processor.h" #define MAIN_PAGE_COUNT 512 #define TEST_DATA_PAGE_COUNT 512 #define TEST_DATA_MEMSLOT 1 -#define TEST_DATA_START_GFN 4096 +#define TEST_DATA_START_GFN PAGE_SIZE #define TEST_DATA_TWO_PAGE_COUNT 256 #define TEST_DATA_TWO_MEMSLOT 2 -#define TEST_DATA_TWO_START_GFN 8192 +#define TEST_DATA_TWO_START_GFN (2 * PAGE_SIZE) static char cmma_value_buf[MAIN_PAGE_COUNT + TEST_DATA_PAGE_COUNT]; @@ -66,7 +67,7 @@ static void guest_dirty_test_data(void) " lghi 5,%[page_count]\n" /* r5 += r1 */ "2: agfr 5,1\n" - /* r2 = r1 << 12 */ + /* r2 = r1 << PAGE_SHIFT */ "1: sllg 2,1,12(0)\n" /* essa(r4, r2, SET_STABLE) */ " .insn rrf,0xb9ab0000,4,2,1,0\n" diff --git a/tools/testing/selftests/kvm/s390x/config b/tools/testing/selftests/kvm/s390x/config new file mode 100644 index 000000000000..23270f2d679f --- /dev/null +++ b/tools/testing/selftests/kvm/s390x/config @@ -0,0 +1,2 @@ +CONFIG_KVM=y +CONFIG_KVM_S390_UCONTROL=y diff --git a/tools/testing/selftests/kvm/s390x/debug_test.c b/tools/testing/selftests/kvm/s390x/debug_test.c index 84313fb27529..ad8095968601 100644 --- a/tools/testing/selftests/kvm/s390x/debug_test.c +++ b/tools/testing/selftests/kvm/s390x/debug_test.c @@ -2,12 +2,12 @@ /* Test KVM debugging features. */ #include "kvm_util.h" #include "test_util.h" +#include "sie.h" #include <linux/kvm.h> #define __LC_SVC_NEW_PSW 0x1c0 #define __LC_PGM_NEW_PSW 0x1d0 -#define ICPT_INSTRUCTION 0x04 #define IPA0_DIAG 0x8300 #define PGM_SPECIFICATION 0x06 @@ -85,7 +85,7 @@ static void test_step_pgm_diag(void) vm = test_step_int_1(&vcpu, test_step_pgm_diag_guest_code, __LC_PGM_NEW_PSW, new_psw); TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_S390_SIEIC); - TEST_ASSERT_EQ(vcpu->run->s390_sieic.icptcode, ICPT_INSTRUCTION); + TEST_ASSERT_EQ(vcpu->run->s390_sieic.icptcode, ICPT_INST); TEST_ASSERT_EQ(vcpu->run->s390_sieic.ipa & 0xff00, IPA0_DIAG); vcpu_ioctl(vcpu, KVM_S390_IRQ, &irq); vcpu_run(vcpu); diff --git a/tools/testing/selftests/kvm/s390x/memop.c b/tools/testing/selftests/kvm/s390x/memop.c index f2df7416be84..4374b4cd2a80 100644 --- a/tools/testing/selftests/kvm/s390x/memop.c +++ b/tools/testing/selftests/kvm/s390x/memop.c @@ -16,6 +16,7 @@ #include "kvm_util.h" #include "kselftest.h" #include "ucall_common.h" +#include "processor.h" enum mop_target { LOGICAL, @@ -226,9 +227,6 @@ static void memop_ioctl(struct test_info info, struct kvm_s390_mem_op *ksmo, #define CHECK_N_DO(f, ...) ({ f(__VA_ARGS__, CHECK_ONLY); f(__VA_ARGS__); }) -#define PAGE_SHIFT 12 -#define PAGE_SIZE (1ULL << PAGE_SHIFT) -#define PAGE_MASK (~(PAGE_SIZE - 1)) #define CR0_FETCH_PROTECTION_OVERRIDE (1UL << (63 - 38)) #define CR0_STORAGE_PROTECTION_OVERRIDE (1UL << (63 - 39)) diff --git a/tools/testing/selftests/kvm/s390x/tprot.c b/tools/testing/selftests/kvm/s390x/tprot.c index 7a742a673b7c..12d5e1cb62e3 100644 --- a/tools/testing/selftests/kvm/s390x/tprot.c +++ b/tools/testing/selftests/kvm/s390x/tprot.c @@ -9,9 +9,8 @@ #include "kvm_util.h" #include "kselftest.h" #include "ucall_common.h" +#include "processor.h" -#define PAGE_SHIFT 12 -#define PAGE_SIZE (1 << PAGE_SHIFT) #define CR0_FETCH_PROTECTION_OVERRIDE (1UL << (63 - 38)) #define CR0_STORAGE_PROTECTION_OVERRIDE (1UL << (63 - 39)) @@ -151,7 +150,7 @@ static enum stage perform_next_stage(int *i, bool mapped_0) * instead. * In order to skip these tests we detect this inside the guest */ - skip = tests[*i].addr < (void *)4096 && + skip = tests[*i].addr < (void *)PAGE_SIZE && tests[*i].expected != TRANSL_UNAVAIL && !mapped_0; if (!skip) { diff --git a/tools/testing/selftests/kvm/s390x/ucontrol_test.c b/tools/testing/selftests/kvm/s390x/ucontrol_test.c new file mode 100644 index 000000000000..f257beec1430 --- /dev/null +++ b/tools/testing/selftests/kvm/s390x/ucontrol_test.c @@ -0,0 +1,332 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Test code for the s390x kvm ucontrol interface + * + * Copyright IBM Corp. 2024 + * + * Authors: + * Christoph Schlameuss <schlameuss@linux.ibm.com> + */ +#include "debug_print.h" +#include "kselftest_harness.h" +#include "kvm_util.h" +#include "processor.h" +#include "sie.h" + +#include <linux/capability.h> +#include <linux/sizes.h> + +#define VM_MEM_SIZE (4 * SZ_1M) + +/* so directly declare capget to check caps without libcap */ +int capget(cap_user_header_t header, cap_user_data_t data); + +/** + * In order to create user controlled virtual machines on S390, + * check KVM_CAP_S390_UCONTROL and use the flag KVM_VM_S390_UCONTROL + * as privileged user (SYS_ADMIN). + */ +void require_ucontrol_admin(void) +{ + struct __user_cap_data_struct data[_LINUX_CAPABILITY_U32S_3]; + struct __user_cap_header_struct hdr = { + .version = _LINUX_CAPABILITY_VERSION_3, + }; + int rc; + + rc = capget(&hdr, data); + TEST_ASSERT_EQ(0, rc); + TEST_REQUIRE((data->effective & CAP_TO_MASK(CAP_SYS_ADMIN)) > 0); + + TEST_REQUIRE(kvm_has_cap(KVM_CAP_S390_UCONTROL)); +} + +/* Test program setting some registers and looping */ +extern char test_gprs_asm[]; +asm("test_gprs_asm:\n" + "xgr %r0, %r0\n" + "lgfi %r1,1\n" + "lgfi %r2,2\n" + "lgfi %r3,3\n" + "lgfi %r4,4\n" + "lgfi %r5,5\n" + "lgfi %r6,6\n" + "lgfi %r7,7\n" + "0:\n" + " diag 0,0,0x44\n" + " ahi %r0,1\n" + " j 0b\n" +); + +FIXTURE(uc_kvm) +{ + struct kvm_s390_sie_block *sie_block; + struct kvm_run *run; + uintptr_t base_gpa; + uintptr_t code_gpa; + uintptr_t base_hva; + uintptr_t code_hva; + int kvm_run_size; + void *vm_mem; + int vcpu_fd; + int kvm_fd; + int vm_fd; +}; + +/** + * create VM with single vcpu, map kvm_run and SIE control block for easy access + */ +FIXTURE_SETUP(uc_kvm) +{ + struct kvm_s390_vm_cpu_processor info; + int rc; + + require_ucontrol_admin(); + + self->kvm_fd = open_kvm_dev_path_or_exit(); + self->vm_fd = ioctl(self->kvm_fd, KVM_CREATE_VM, KVM_VM_S390_UCONTROL); + ASSERT_GE(self->vm_fd, 0); + + kvm_device_attr_get(self->vm_fd, KVM_S390_VM_CPU_MODEL, + KVM_S390_VM_CPU_PROCESSOR, &info); + TH_LOG("create VM 0x%llx", info.cpuid); + + self->vcpu_fd = ioctl(self->vm_fd, KVM_CREATE_VCPU, 0); + ASSERT_GE(self->vcpu_fd, 0); + + self->kvm_run_size = ioctl(self->kvm_fd, KVM_GET_VCPU_MMAP_SIZE, NULL); + ASSERT_GE(self->kvm_run_size, sizeof(struct kvm_run)) + TH_LOG(KVM_IOCTL_ERROR(KVM_GET_VCPU_MMAP_SIZE, self->kvm_run_size)); + self->run = (struct kvm_run *)mmap(NULL, self->kvm_run_size, + PROT_READ | PROT_WRITE, MAP_SHARED, self->vcpu_fd, 0); + ASSERT_NE(self->run, MAP_FAILED); + /** + * For virtual cpus that have been created with S390 user controlled + * virtual machines, the resulting vcpu fd can be memory mapped at page + * offset KVM_S390_SIE_PAGE_OFFSET in order to obtain a memory map of + * the virtual cpu's hardware control block. + */ + self->sie_block = (struct kvm_s390_sie_block *)mmap(NULL, PAGE_SIZE, + PROT_READ | PROT_WRITE, MAP_SHARED, + self->vcpu_fd, KVM_S390_SIE_PAGE_OFFSET << PAGE_SHIFT); + ASSERT_NE(self->sie_block, MAP_FAILED); + + TH_LOG("VM created %p %p", self->run, self->sie_block); + + self->base_gpa = 0; + self->code_gpa = self->base_gpa + (3 * SZ_1M); + + self->vm_mem = aligned_alloc(SZ_1M, VM_MEM_SIZE); + ASSERT_NE(NULL, self->vm_mem) TH_LOG("malloc failed %u", errno); + self->base_hva = (uintptr_t)self->vm_mem; + self->code_hva = self->base_hva - self->base_gpa + self->code_gpa; + struct kvm_s390_ucas_mapping map = { + .user_addr = self->base_hva, + .vcpu_addr = self->base_gpa, + .length = VM_MEM_SIZE, + }; + TH_LOG("ucas map %p %p 0x%llx", + (void *)map.user_addr, (void *)map.vcpu_addr, map.length); + rc = ioctl(self->vcpu_fd, KVM_S390_UCAS_MAP, &map); + ASSERT_EQ(0, rc) TH_LOG("ucas map result %d not expected, %s", + rc, strerror(errno)); + + TH_LOG("page in %p", (void *)self->base_gpa); + rc = ioctl(self->vcpu_fd, KVM_S390_VCPU_FAULT, self->base_gpa); + ASSERT_EQ(0, rc) TH_LOG("vcpu fault (%p) result %d not expected, %s", + (void *)self->base_hva, rc, strerror(errno)); + + self->sie_block->cpuflags &= ~CPUSTAT_STOPPED; +} + +FIXTURE_TEARDOWN(uc_kvm) +{ + munmap(self->sie_block, PAGE_SIZE); + munmap(self->run, self->kvm_run_size); + close(self->vcpu_fd); + close(self->vm_fd); + close(self->kvm_fd); + free(self->vm_mem); +} + +TEST_F(uc_kvm, uc_sie_assertions) +{ + /* assert interception of Code 08 (Program Interruption) is set */ + EXPECT_EQ(0, self->sie_block->ecb & ECB_SPECI); +} + +TEST_F(uc_kvm, uc_attr_mem_limit) +{ + u64 limit; + struct kvm_device_attr attr = { + .group = KVM_S390_VM_MEM_CTRL, + .attr = KVM_S390_VM_MEM_LIMIT_SIZE, + .addr = (unsigned long)&limit, + }; + int rc; + + rc = ioctl(self->vm_fd, KVM_GET_DEVICE_ATTR, &attr); + EXPECT_EQ(0, rc); + EXPECT_EQ(~0UL, limit); + + /* assert set not supported */ + rc = ioctl(self->vm_fd, KVM_SET_DEVICE_ATTR, &attr); + EXPECT_EQ(-1, rc); + EXPECT_EQ(EINVAL, errno); +} + +TEST_F(uc_kvm, uc_no_dirty_log) +{ + struct kvm_dirty_log dlog; + int rc; + + rc = ioctl(self->vm_fd, KVM_GET_DIRTY_LOG, &dlog); + EXPECT_EQ(-1, rc); + EXPECT_EQ(EINVAL, errno); +} + +/** + * Assert HPAGE CAP cannot be enabled on UCONTROL VM + */ +TEST(uc_cap_hpage) +{ + int rc, kvm_fd, vm_fd, vcpu_fd; + struct kvm_enable_cap cap = { + .cap = KVM_CAP_S390_HPAGE_1M, + }; + + require_ucontrol_admin(); + + kvm_fd = open_kvm_dev_path_or_exit(); + vm_fd = ioctl(kvm_fd, KVM_CREATE_VM, KVM_VM_S390_UCONTROL); + ASSERT_GE(vm_fd, 0); + + /* assert hpages are not supported on ucontrol vm */ + rc = ioctl(vm_fd, KVM_CHECK_EXTENSION, KVM_CAP_S390_HPAGE_1M); + EXPECT_EQ(0, rc); + + /* Test that KVM_CAP_S390_HPAGE_1M can't be enabled for a ucontrol vm */ + rc = ioctl(vm_fd, KVM_ENABLE_CAP, cap); + EXPECT_EQ(-1, rc); + EXPECT_EQ(EINVAL, errno); + + /* assert HPAGE CAP is rejected after vCPU creation */ + vcpu_fd = ioctl(vm_fd, KVM_CREATE_VCPU, 0); + ASSERT_GE(vcpu_fd, 0); + rc = ioctl(vm_fd, KVM_ENABLE_CAP, cap); + EXPECT_EQ(-1, rc); + EXPECT_EQ(EBUSY, errno); + + close(vcpu_fd); + close(vm_fd); + close(kvm_fd); +} + +/* verify SIEIC exit + * * fail on codes not expected in the test cases + */ +static bool uc_handle_sieic(FIXTURE_DATA(uc_kvm) * self) +{ + struct kvm_s390_sie_block *sie_block = self->sie_block; + struct kvm_run *run = self->run; + + /* check SIE interception code */ + pr_info("sieic: 0x%.2x 0x%.4x 0x%.4x\n", + run->s390_sieic.icptcode, + run->s390_sieic.ipa, + run->s390_sieic.ipb); + switch (run->s390_sieic.icptcode) { + case ICPT_INST: + /* end execution in caller on intercepted instruction */ + pr_info("sie instruction interception\n"); + return false; + case ICPT_OPEREXC: + /* operation exception */ + TEST_FAIL("sie exception on %.4x%.8x", sie_block->ipa, sie_block->ipb); + default: + TEST_FAIL("UNEXPECTED SIEIC CODE %d", run->s390_sieic.icptcode); + } + return true; +} + +/* verify VM state on exit */ +static bool uc_handle_exit(FIXTURE_DATA(uc_kvm) * self) +{ + struct kvm_run *run = self->run; + + switch (run->exit_reason) { + case KVM_EXIT_S390_SIEIC: + return uc_handle_sieic(self); + default: + pr_info("exit_reason %2d not handled\n", run->exit_reason); + } + return true; +} + +/* run the VM until interrupted */ +static int uc_run_once(FIXTURE_DATA(uc_kvm) * self) +{ + int rc; + + rc = ioctl(self->vcpu_fd, KVM_RUN, NULL); + print_run(self->run, self->sie_block); + print_regs(self->run); + pr_debug("run %d / %d %s\n", rc, errno, strerror(errno)); + return rc; +} + +static void uc_assert_diag44(FIXTURE_DATA(uc_kvm) * self) +{ + struct kvm_s390_sie_block *sie_block = self->sie_block; + + /* assert vm was interrupted by diag 0x0044 */ + TEST_ASSERT_EQ(KVM_EXIT_S390_SIEIC, self->run->exit_reason); + TEST_ASSERT_EQ(ICPT_INST, sie_block->icptcode); + TEST_ASSERT_EQ(0x8300, sie_block->ipa); + TEST_ASSERT_EQ(0x440000, sie_block->ipb); +} + +TEST_F(uc_kvm, uc_gprs) +{ + struct kvm_sync_regs *sync_regs = &self->run->s.regs; + struct kvm_run *run = self->run; + struct kvm_regs regs = {}; + + /* Set registers to values that are different from the ones that we expect below */ + for (int i = 0; i < 8; i++) + sync_regs->gprs[i] = 8; + run->kvm_dirty_regs |= KVM_SYNC_GPRS; + + /* copy test_gprs_asm to code_hva / code_gpa */ + TH_LOG("copy code %p to vm mapped memory %p / %p", + &test_gprs_asm, (void *)self->code_hva, (void *)self->code_gpa); + memcpy((void *)self->code_hva, &test_gprs_asm, PAGE_SIZE); + + /* DAT disabled + 64 bit mode */ + run->psw_mask = 0x0000000180000000ULL; + run->psw_addr = self->code_gpa; + + /* run and expect interception of diag 44 */ + ASSERT_EQ(0, uc_run_once(self)); + ASSERT_EQ(false, uc_handle_exit(self)); + uc_assert_diag44(self); + + /* Retrieve and check guest register values */ + ASSERT_EQ(0, ioctl(self->vcpu_fd, KVM_GET_REGS, ®s)); + for (int i = 0; i < 8; i++) { + ASSERT_EQ(i, regs.gprs[i]); + ASSERT_EQ(i, sync_regs->gprs[i]); + } + + /* run and expect interception of diag 44 again */ + ASSERT_EQ(0, uc_run_once(self)); + ASSERT_EQ(false, uc_handle_exit(self)); + uc_assert_diag44(self); + + /* check continued increment of register 0 value */ + ASSERT_EQ(0, ioctl(self->vcpu_fd, KVM_GET_REGS, ®s)); + ASSERT_EQ(1, regs.gprs[0]); + ASSERT_EQ(1, sync_regs->gprs[0]); +} + +TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/kvm/set_memory_region_test.c b/tools/testing/selftests/kvm/set_memory_region_test.c index bb8002084f52..a8267628e9ed 100644 --- a/tools/testing/selftests/kvm/set_memory_region_test.c +++ b/tools/testing/selftests/kvm/set_memory_region_test.c @@ -175,7 +175,7 @@ static void guest_code_move_memory_region(void) GUEST_DONE(); } -static void test_move_memory_region(void) +static void test_move_memory_region(bool disable_slot_zap_quirk) { pthread_t vcpu_thread; struct kvm_vcpu *vcpu; @@ -184,6 +184,9 @@ static void test_move_memory_region(void) vm = spawn_vm(&vcpu, &vcpu_thread, guest_code_move_memory_region); + if (disable_slot_zap_quirk) + vm_enable_cap(vm, KVM_CAP_DISABLE_QUIRKS2, KVM_X86_QUIRK_SLOT_ZAP_ALL); + hva = addr_gpa2hva(vm, MEM_REGION_GPA); /* @@ -266,7 +269,7 @@ static void guest_code_delete_memory_region(void) GUEST_ASSERT(0); } -static void test_delete_memory_region(void) +static void test_delete_memory_region(bool disable_slot_zap_quirk) { pthread_t vcpu_thread; struct kvm_vcpu *vcpu; @@ -276,6 +279,9 @@ static void test_delete_memory_region(void) vm = spawn_vm(&vcpu, &vcpu_thread, guest_code_delete_memory_region); + if (disable_slot_zap_quirk) + vm_enable_cap(vm, KVM_CAP_DISABLE_QUIRKS2, KVM_X86_QUIRK_SLOT_ZAP_ALL); + /* Delete the memory region, the guest should not die. */ vm_mem_region_delete(vm, MEM_REGION_SLOT); wait_for_vcpu(); @@ -553,7 +559,10 @@ int main(int argc, char *argv[]) { #ifdef __x86_64__ int i, loops; + int j, disable_slot_zap_quirk = 0; + if (kvm_check_cap(KVM_CAP_DISABLE_QUIRKS2) & KVM_X86_QUIRK_SLOT_ZAP_ALL) + disable_slot_zap_quirk = 1; /* * FIXME: the zero-memslot test fails on aarch64 and s390x because * KVM_RUN fails with ENOEXEC or EFAULT. @@ -579,13 +588,17 @@ int main(int argc, char *argv[]) else loops = 10; - pr_info("Testing MOVE of in-use region, %d loops\n", loops); - for (i = 0; i < loops; i++) - test_move_memory_region(); + for (j = 0; j <= disable_slot_zap_quirk; j++) { + pr_info("Testing MOVE of in-use region, %d loops, slot zap quirk %s\n", + loops, j ? "disabled" : "enabled"); + for (i = 0; i < loops; i++) + test_move_memory_region(!!j); - pr_info("Testing DELETE of in-use region, %d loops\n", loops); - for (i = 0; i < loops; i++) - test_delete_memory_region(); + pr_info("Testing DELETE of in-use region, %d loops, slot zap quirk %s\n", + loops, j ? "disabled" : "enabled"); + for (i = 0; i < loops; i++) + test_delete_memory_region(!!j); + } #endif return 0; diff --git a/tools/testing/selftests/kvm/x86_64/debug_regs.c b/tools/testing/selftests/kvm/x86_64/debug_regs.c index f6b295e0b2d2..76cc2df9238a 100644 --- a/tools/testing/selftests/kvm/x86_64/debug_regs.c +++ b/tools/testing/selftests/kvm/x86_64/debug_regs.c @@ -47,15 +47,18 @@ static void guest_code(void) /* * Single step test, covers 2 basic instructions and 2 emulated * - * Enable interrupts during the single stepping to see that - * pending interrupt we raised is not handled due to KVM_GUESTDBG_BLOCKIRQ + * Enable interrupts during the single stepping to see that pending + * interrupt we raised is not handled due to KVM_GUESTDBG_BLOCKIRQ. + * + * Write MSR_IA32_TSC_DEADLINE to verify that KVM's fastpath handler + * exits to userspace due to single-step being enabled. */ asm volatile("ss_start: " "sti\n\t" "xor %%eax,%%eax\n\t" "cpuid\n\t" - "movl $0x1a0,%%ecx\n\t" - "rdmsr\n\t" + "movl $" __stringify(MSR_IA32_TSC_DEADLINE) ", %%ecx\n\t" + "wrmsr\n\t" "cli\n\t" : : : "eax", "ebx", "ecx", "edx"); diff --git a/tools/testing/selftests/kvm/x86_64/hyperv_evmcs.c b/tools/testing/selftests/kvm/x86_64/hyperv_evmcs.c index e192720bfe14..74cf19661309 100644 --- a/tools/testing/selftests/kvm/x86_64/hyperv_evmcs.c +++ b/tools/testing/selftests/kvm/x86_64/hyperv_evmcs.c @@ -242,7 +242,7 @@ int main(int argc, char *argv[]) TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX)); TEST_REQUIRE(kvm_has_cap(KVM_CAP_NESTED_STATE)); TEST_REQUIRE(kvm_has_cap(KVM_CAP_HYPERV_ENLIGHTENED_VMCS)); - TEST_REQUIRE(kvm_has_cap(KVM_CAP_HYPERV_DIRECT_TLBFLUSH)); + TEST_REQUIRE(kvm_hv_cpu_has(HV_X64_NESTED_DIRECT_FLUSH)); vm = vm_create_with_one_vcpu(&vcpu, guest_code); diff --git a/tools/testing/selftests/kvm/x86_64/hyperv_svm_test.c b/tools/testing/selftests/kvm/x86_64/hyperv_svm_test.c index b987a3d79715..0ddb63229bcb 100644 --- a/tools/testing/selftests/kvm/x86_64/hyperv_svm_test.c +++ b/tools/testing/selftests/kvm/x86_64/hyperv_svm_test.c @@ -157,7 +157,7 @@ int main(int argc, char *argv[]) int stage; TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_SVM)); - TEST_REQUIRE(kvm_has_cap(KVM_CAP_HYPERV_DIRECT_TLBFLUSH)); + TEST_REQUIRE(kvm_hv_cpu_has(HV_X64_NESTED_DIRECT_FLUSH)); /* Create VM */ vm = vm_create_with_one_vcpu(&vcpu, guest_code); diff --git a/tools/testing/selftests/kvm/x86_64/sev_smoke_test.c b/tools/testing/selftests/kvm/x86_64/sev_smoke_test.c index 7c70c0da4fb7..2e9197eb1652 100644 --- a/tools/testing/selftests/kvm/x86_64/sev_smoke_test.c +++ b/tools/testing/selftests/kvm/x86_64/sev_smoke_test.c @@ -160,6 +160,36 @@ static void test_sev(void *guest_code, uint64_t policy) kvm_vm_free(vm); } +static void guest_shutdown_code(void) +{ + struct desc_ptr idt; + + /* Clobber the IDT so that #UD is guaranteed to trigger SHUTDOWN. */ + memset(&idt, 0, sizeof(idt)); + __asm__ __volatile__("lidt %0" :: "m"(idt)); + + __asm__ __volatile__("ud2"); +} + +static void test_sev_es_shutdown(void) +{ + struct kvm_vcpu *vcpu; + struct kvm_vm *vm; + + uint32_t type = KVM_X86_SEV_ES_VM; + + vm = vm_sev_create_with_one_vcpu(type, guest_shutdown_code, &vcpu); + + vm_sev_launch(vm, SEV_POLICY_ES, NULL); + + vcpu_run(vcpu); + TEST_ASSERT(vcpu->run->exit_reason == KVM_EXIT_SHUTDOWN, + "Wanted SHUTDOWN, got %s", + exit_reason_str(vcpu->run->exit_reason)); + + kvm_vm_free(vm); +} + int main(int argc, char *argv[]) { TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_SEV)); @@ -171,6 +201,8 @@ int main(int argc, char *argv[]) test_sev(guest_sev_es_code, SEV_POLICY_ES | SEV_POLICY_NO_DBG); test_sev(guest_sev_es_code, SEV_POLICY_ES); + test_sev_es_shutdown(); + if (kvm_has_cap(KVM_CAP_XCRS) && (xgetbv(0) & XFEATURE_MASK_X87_AVX) == XFEATURE_MASK_X87_AVX) { test_sync_vmsa(0); diff --git a/tools/testing/selftests/kvm/x86_64/xapic_state_test.c b/tools/testing/selftests/kvm/x86_64/xapic_state_test.c index 618cd2442390..88bcca188799 100644 --- a/tools/testing/selftests/kvm/x86_64/xapic_state_test.c +++ b/tools/testing/selftests/kvm/x86_64/xapic_state_test.c @@ -13,6 +13,7 @@ struct xapic_vcpu { struct kvm_vcpu *vcpu; bool is_x2apic; + bool has_xavic_errata; }; static void xapic_guest_code(void) @@ -31,6 +32,10 @@ static void xapic_guest_code(void) } } +#define X2APIC_RSVD_BITS_MASK (GENMASK_ULL(31, 20) | \ + GENMASK_ULL(17, 16) | \ + GENMASK_ULL(13, 13)) + static void x2apic_guest_code(void) { asm volatile("cli"); @@ -41,7 +46,12 @@ static void x2apic_guest_code(void) uint64_t val = x2apic_read_reg(APIC_IRR) | x2apic_read_reg(APIC_IRR + 0x10) << 32; - x2apic_write_reg(APIC_ICR, val); + if (val & X2APIC_RSVD_BITS_MASK) { + x2apic_write_reg_fault(APIC_ICR, val); + } else { + x2apic_write_reg(APIC_ICR, val); + GUEST_ASSERT_EQ(x2apic_read_reg(APIC_ICR), val); + } GUEST_SYNC(val); } while (1); } @@ -71,27 +81,28 @@ static void ____test_icr(struct xapic_vcpu *x, uint64_t val) icr = (u64)(*((u32 *)&xapic.regs[APIC_ICR])) | (u64)(*((u32 *)&xapic.regs[APIC_ICR2])) << 32; if (!x->is_x2apic) { - val &= (-1u | (0xffull << (32 + 24))); - TEST_ASSERT_EQ(icr, val & ~APIC_ICR_BUSY); - } else { - TEST_ASSERT_EQ(icr & ~APIC_ICR_BUSY, val & ~APIC_ICR_BUSY); + if (!x->has_xavic_errata) + val &= (-1u | (0xffull << (32 + 24))); + } else if (val & X2APIC_RSVD_BITS_MASK) { + return; } -} -#define X2APIC_RSVED_BITS_MASK (GENMASK_ULL(31,20) | \ - GENMASK_ULL(17,16) | \ - GENMASK_ULL(13,13)) + if (x->has_xavic_errata) + TEST_ASSERT_EQ(icr & ~APIC_ICR_BUSY, val & ~APIC_ICR_BUSY); + else + TEST_ASSERT_EQ(icr, val & ~APIC_ICR_BUSY); +} static void __test_icr(struct xapic_vcpu *x, uint64_t val) { - if (x->is_x2apic) { - /* Hardware writing vICR register requires reserved bits 31:20, - * 17:16 and 13 kept as zero to avoid #GP exception. Data value - * written to vICR should mask out those bits above. - */ - val &= ~X2APIC_RSVED_BITS_MASK; - } - ____test_icr(x, val | APIC_ICR_BUSY); + /* + * The BUSY bit is reserved on both AMD and Intel, but only AMD treats + * it is as _must_ be zero. Intel simply ignores the bit. Don't test + * the BUSY bit for x2APIC, as there is no single correct behavior. + */ + if (!x->is_x2apic) + ____test_icr(x, val | APIC_ICR_BUSY); + ____test_icr(x, val & ~(u64)APIC_ICR_BUSY); } @@ -231,6 +242,15 @@ int main(int argc, char *argv[]) vm = vm_create_with_one_vcpu(&x.vcpu, xapic_guest_code); x.is_x2apic = false; + /* + * AMD's AVIC implementation is buggy (fails to clear the ICR BUSY bit), + * and also diverges from KVM with respect to ICR2[23:0] (KVM and Intel + * drops writes, AMD does not). Account for the errata when checking + * that KVM reads back what was written. + */ + x.has_xavic_errata = host_cpu_is_amd && + get_kvm_amd_param_bool("avic"); + vcpu_clear_cpuid_feature(x.vcpu, X86_FEATURE_X2APIC); virt_pg_map(vm, APIC_DEFAULT_GPA, APIC_DEFAULT_GPA); diff --git a/tools/testing/selftests/kvm/x86_64/xen_vmcall_test.c b/tools/testing/selftests/kvm/x86_64/xen_vmcall_test.c index e149d0574961..2585087cdf5c 100644 --- a/tools/testing/selftests/kvm/x86_64/xen_vmcall_test.c +++ b/tools/testing/selftests/kvm/x86_64/xen_vmcall_test.c @@ -10,6 +10,7 @@ #include "test_util.h" #include "kvm_util.h" #include "processor.h" +#include "hyperv.h" #define HCALL_REGION_GPA 0xc0000000ULL #define HCALL_REGION_SLOT 10 diff --git a/tools/testing/selftests/mm/hmm-tests.c b/tools/testing/selftests/mm/hmm-tests.c index d2cfc9b494a0..141bf63cbe05 100644 --- a/tools/testing/selftests/mm/hmm-tests.c +++ b/tools/testing/selftests/mm/hmm-tests.c @@ -1657,7 +1657,7 @@ TEST_F(hmm2, double_map) buffer->fd = -1; buffer->size = size; - buffer->mirror = malloc(npages); + buffer->mirror = malloc(size); ASSERT_NE(buffer->mirror, NULL); /* Reserve a range of addresses. */ diff --git a/tools/testing/selftests/mm/pagemap_ioctl.c b/tools/testing/selftests/mm/pagemap_ioctl.c index fc90af2a97b8..bcc73b4e805c 100644 --- a/tools/testing/selftests/mm/pagemap_ioctl.c +++ b/tools/testing/selftests/mm/pagemap_ioctl.c @@ -15,7 +15,7 @@ #include <sys/ioctl.h> #include <sys/stat.h> #include <math.h> -#include <asm-generic/unistd.h> +#include <asm/unistd.h> #include <pthread.h> #include <sys/resource.h> #include <assert.h> diff --git a/tools/testing/selftests/net/.gitignore b/tools/testing/selftests/net/.gitignore index 1c04c780db66..217d8b7a7365 100644 --- a/tools/testing/selftests/net/.gitignore +++ b/tools/testing/selftests/net/.gitignore @@ -16,6 +16,7 @@ ipsec ipv6_flowlabel ipv6_flowlabel_mgr log.txt +msg_oob msg_zerocopy ncdevmem nettest diff --git a/tools/testing/selftests/net/forwarding/no_forwarding.sh b/tools/testing/selftests/net/forwarding/no_forwarding.sh index 9e677aa64a06..694ece9ba3a7 100755 --- a/tools/testing/selftests/net/forwarding/no_forwarding.sh +++ b/tools/testing/selftests/net/forwarding/no_forwarding.sh @@ -202,7 +202,7 @@ one_bridge_two_pvids() ip link set $swp2 master br0 bridge vlan add dev $swp1 vid 1 pvid untagged - bridge vlan add dev $swp1 vid 2 pvid untagged + bridge vlan add dev $swp2 vid 2 pvid untagged run_test "Switch ports in VLAN-aware bridge with different PVIDs" diff --git a/tools/testing/selftests/net/netfilter/Makefile b/tools/testing/selftests/net/netfilter/Makefile index d13fb5ea3e89..542f7886a0bc 100644 --- a/tools/testing/selftests/net/netfilter/Makefile +++ b/tools/testing/selftests/net/netfilter/Makefile @@ -13,6 +13,7 @@ TEST_PROGS += conntrack_ipip_mtu.sh TEST_PROGS += conntrack_tcp_unreplied.sh TEST_PROGS += conntrack_sctp_collision.sh TEST_PROGS += conntrack_vrf.sh +TEST_PROGS += conntrack_reverse_clash.sh TEST_PROGS += ipvs.sh TEST_PROGS += nf_conntrack_packetdrill.sh TEST_PROGS += nf_nat_edemux.sh @@ -26,8 +27,11 @@ TEST_PROGS += nft_nat.sh TEST_PROGS += nft_nat_zones.sh TEST_PROGS += nft_queue.sh TEST_PROGS += nft_synproxy.sh +TEST_PROGS += nft_tproxy_tcp.sh +TEST_PROGS += nft_tproxy_udp.sh TEST_PROGS += nft_zones_many.sh TEST_PROGS += rpath.sh +TEST_PROGS += vxlan_mtu_frag.sh TEST_PROGS += xt_string.sh TEST_PROGS_EXTENDED = nft_concat_range_perf.sh @@ -36,6 +40,7 @@ TEST_GEN_PROGS = conntrack_dump_flush TEST_GEN_FILES = audit_logread TEST_GEN_FILES += connect_close nf_queue +TEST_GEN_FILES += conntrack_reverse_clash TEST_GEN_FILES += sctp_collision include ../../lib.mk diff --git a/tools/testing/selftests/net/netfilter/config b/tools/testing/selftests/net/netfilter/config index b2dd4db45215..43d8b500d391 100644 --- a/tools/testing/selftests/net/netfilter/config +++ b/tools/testing/selftests/net/netfilter/config @@ -7,6 +7,7 @@ CONFIG_BRIDGE_EBT_REDIRECT=m CONFIG_BRIDGE_EBT_T_FILTER=m CONFIG_BRIDGE_NETFILTER=m CONFIG_BRIDGE_NF_EBTABLES=m +CONFIG_BRIDGE_VLAN_FILTERING=y CONFIG_CGROUP_BPF=y CONFIG_DUMMY=m CONFIG_INET_ESP=m @@ -81,8 +82,10 @@ CONFIG_NFT_QUEUE=m CONFIG_NFT_QUOTA=m CONFIG_NFT_REDIR=m CONFIG_NFT_SYNPROXY=m +CONFIG_NFT_TPROXY=m CONFIG_VETH=m CONFIG_VLAN_8021Q=m +CONFIG_VXLAN=m CONFIG_XFRM_USER=m CONFIG_XFRM_STATISTICS=y CONFIG_NET_PKTGEN=m diff --git a/tools/testing/selftests/net/netfilter/conntrack_dump_flush.c b/tools/testing/selftests/net/netfilter/conntrack_dump_flush.c index bd9317bf5ada..dc056fec993b 100644 --- a/tools/testing/selftests/net/netfilter/conntrack_dump_flush.c +++ b/tools/testing/selftests/net/netfilter/conntrack_dump_flush.c @@ -207,6 +207,7 @@ static int conntrack_data_generate_v6(struct mnl_socket *sock, static int count_entries(const struct nlmsghdr *nlh, void *data) { reply_counter++; + return MNL_CB_OK; } static int conntracK_count_zone(struct mnl_socket *sock, uint16_t zone) diff --git a/tools/testing/selftests/net/netfilter/conntrack_reverse_clash.c b/tools/testing/selftests/net/netfilter/conntrack_reverse_clash.c new file mode 100644 index 000000000000..507930cee8cb --- /dev/null +++ b/tools/testing/selftests/net/netfilter/conntrack_reverse_clash.c @@ -0,0 +1,125 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Needs something like: + * + * iptables -t nat -A POSTROUTING -o nomatch -j MASQUERADE + * + * so NAT engine attaches a NAT null-binding to each connection. + * + * With unmodified kernels, child or parent will exit with + * "Port number changed" error, even though no port translation + * was requested. + */ + +#include <errno.h> +#include <stdbool.h> +#include <stdint.h> +#include <stdio.h> +#include <string.h> +#include <stdlib.h> +#include <time.h> +#include <unistd.h> +#include <arpa/inet.h> +#include <sys/socket.h> +#include <sys/wait.h> + +#define LEN 512 +#define PORT 56789 +#define TEST_TIME 5 + +static void die(const char *e) +{ + perror(e); + exit(111); +} + +static void die_port(uint16_t got, uint16_t want) +{ + fprintf(stderr, "Port number changed, wanted %d got %d\n", want, ntohs(got)); + exit(1); +} + +static int udp_socket(void) +{ + static const struct timeval tv = { + .tv_sec = 1, + }; + int fd = socket(AF_INET, SOCK_DGRAM, IPPROTO_UDP); + + if (fd < 0) + die("socket"); + + setsockopt(fd, SOL_SOCKET, SO_RCVTIMEO, &tv, sizeof(tv)); + return fd; +} + +int main(int argc, char *argv[]) +{ + struct sockaddr_in sa1 = { + .sin_family = AF_INET, + }; + struct sockaddr_in sa2 = { + .sin_family = AF_INET, + }; + int s1, s2, status; + time_t end, now; + socklen_t plen; + char buf[LEN]; + bool child; + + sa1.sin_port = htons(PORT); + sa2.sin_port = htons(PORT + 1); + + s1 = udp_socket(); + s2 = udp_socket(); + + inet_pton(AF_INET, "127.0.0.11", &sa1.sin_addr); + inet_pton(AF_INET, "127.0.0.12", &sa2.sin_addr); + + if (bind(s1, (struct sockaddr *)&sa1, sizeof(sa1)) < 0) + die("bind 1"); + if (bind(s2, (struct sockaddr *)&sa2, sizeof(sa2)) < 0) + die("bind 2"); + + child = fork() == 0; + + now = time(NULL); + end = now + TEST_TIME; + + while (now < end) { + struct sockaddr_in peer; + socklen_t plen = sizeof(peer); + + now = time(NULL); + + if (child) { + if (sendto(s1, buf, LEN, 0, (struct sockaddr *)&sa2, sizeof(sa2)) != LEN) + continue; + + if (recvfrom(s2, buf, LEN, 0, (struct sockaddr *)&peer, &plen) < 0) + die("child recvfrom"); + + if (peer.sin_port != htons(PORT)) + die_port(peer.sin_port, PORT); + } else { + if (sendto(s2, buf, LEN, 0, (struct sockaddr *)&sa1, sizeof(sa1)) != LEN) + continue; + + if (recvfrom(s1, buf, LEN, 0, (struct sockaddr *)&peer, &plen) < 0) + die("parent recvfrom"); + + if (peer.sin_port != htons((PORT + 1))) + die_port(peer.sin_port, PORT + 1); + } + } + + if (child) + return 0; + + wait(&status); + + if (WIFEXITED(status)) + return WEXITSTATUS(status); + + return 1; +} diff --git a/tools/testing/selftests/net/netfilter/conntrack_reverse_clash.sh b/tools/testing/selftests/net/netfilter/conntrack_reverse_clash.sh new file mode 100755 index 000000000000..a24c896347a8 --- /dev/null +++ b/tools/testing/selftests/net/netfilter/conntrack_reverse_clash.sh @@ -0,0 +1,51 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +source lib.sh + +cleanup() +{ + cleanup_all_ns +} + +checktool "nft --version" "run test without nft" +checktool "conntrack --version" "run test without conntrack" + +trap cleanup EXIT + +setup_ns ns0 + +# make loopback connections get nat null bindings assigned +ip netns exec "$ns0" nft -f - <<EOF +table ip nat { + chain POSTROUTING { + type nat hook postrouting priority srcnat; policy accept; + oifname "nomatch" counter packets 0 bytes 0 masquerade + } +} +EOF + +do_flush() +{ + local end + local now + + now=$(date +%s) + end=$((now + 5)) + + while [ $now -lt $end ];do + ip netns exec "$ns0" conntrack -F 2>/dev/null + now=$(date +%s) + done +} + +do_flush & + +if ip netns exec "$ns0" ./conntrack_reverse_clash; then + echo "PASS: No SNAT performed for null bindings" +else + echo "ERROR: SNAT performed without any matching snat rule" + exit 1 +fi + +exit 0 diff --git a/tools/testing/selftests/net/netfilter/conntrack_vrf.sh b/tools/testing/selftests/net/netfilter/conntrack_vrf.sh index 073e8e62d350..e95ecb37c2b1 100755 --- a/tools/testing/selftests/net/netfilter/conntrack_vrf.sh +++ b/tools/testing/selftests/net/netfilter/conntrack_vrf.sh @@ -32,6 +32,7 @@ source lib.sh IP0=172.30.30.1 IP1=172.30.30.2 +DUMMYNET=10.9.9 PFXL=30 ret=0 @@ -54,6 +55,7 @@ setup_ns ns0 ns1 ip netns exec "$ns0" sysctl -q -w net.ipv4.conf.default.rp_filter=0 ip netns exec "$ns0" sysctl -q -w net.ipv4.conf.all.rp_filter=0 ip netns exec "$ns0" sysctl -q -w net.ipv4.conf.all.rp_filter=0 +ip netns exec "$ns0" sysctl -q -w net.ipv4.conf.all.forwarding=1 if ! ip link add veth0 netns "$ns0" type veth peer name veth0 netns "$ns1" > /dev/null 2>&1; then echo "SKIP: Could not add veth device" @@ -65,13 +67,18 @@ if ! ip -net "$ns0" li add tvrf type vrf table 9876; then exit $ksft_skip fi +ip -net "$ns0" link add dummy0 type dummy + ip -net "$ns0" li set veth0 master tvrf +ip -net "$ns0" li set dummy0 master tvrf ip -net "$ns0" li set tvrf up ip -net "$ns0" li set veth0 up +ip -net "$ns0" li set dummy0 up ip -net "$ns1" li set veth0 up ip -net "$ns0" addr add $IP0/$PFXL dev veth0 ip -net "$ns1" addr add $IP1/$PFXL dev veth0 +ip -net "$ns0" addr add $DUMMYNET.1/$PFXL dev dummy0 listener_ready() { @@ -212,9 +219,35 @@ EOF fi } +test_fib() +{ +ip netns exec "$ns0" nft -f - <<EOF +flush ruleset +table ip t { + counter fibcount { } + + chain prerouting { + type filter hook prerouting priority 0; + meta iifname veth0 ip daddr $DUMMYNET.2 fib daddr oif dummy0 counter name fibcount notrack + } +} +EOF + ip -net "$ns1" route add 10.9.9.0/24 via "$IP0" dev veth0 + ip netns exec "$ns1" ping -q -w 1 -c 1 "$DUMMYNET".2 > /dev/null + + if ip netns exec "$ns0" nft list counter t fibcount | grep -q "packets 1"; then + echo "PASS: fib lookup returned exepected output interface" + else + echo "FAIL: fib lookup did not return exepected output interface" + ret=1 + return + fi +} + test_ct_zone_in test_masquerade_vrf "default" test_masquerade_vrf "pfifo" test_masquerade_veth +test_fib exit $ret diff --git a/tools/testing/selftests/net/netfilter/ipvs.sh b/tools/testing/selftests/net/netfilter/ipvs.sh index 4ceee9fb3949..d3edb16cd4b3 100755 --- a/tools/testing/selftests/net/netfilter/ipvs.sh +++ b/tools/testing/selftests/net/netfilter/ipvs.sh @@ -97,7 +97,7 @@ cleanup() { } server_listen() { - ip netns exec "$ns2" socat -u -4 TCP-LISTEN:8080,reuseaddr STDOUT > "${outfile}" & + ip netns exec "$ns2" timeout 5 socat -u -4 TCP-LISTEN:8080,reuseaddr STDOUT > "${outfile}" & server_pid=$! sleep 0.2 } diff --git a/tools/testing/selftests/net/netfilter/nft_audit.sh b/tools/testing/selftests/net/netfilter/nft_audit.sh index 902f8114bc80..87f2b4c725aa 100755 --- a/tools/testing/selftests/net/netfilter/nft_audit.sh +++ b/tools/testing/selftests/net/netfilter/nft_audit.sh @@ -48,12 +48,31 @@ logread_pid=$! trap 'kill $logread_pid; rm -f $logfile $rulefile' EXIT exec 3<"$logfile" +lsplit='s/^\(.*\) entries=\([^ ]*\) \(.*\)$/pfx="\1"\nval="\2"\nsfx="\3"/' +summarize_logs() { + sum=0 + while read line; do + eval $(sed "$lsplit" <<< "$line") + [[ $sum -gt 0 ]] && { + [[ "$pfx $sfx" == "$tpfx $tsfx" ]] && { + let "sum += val" + continue + } + echo "$tpfx entries=$sum $tsfx" + } + tpfx="$pfx" + tsfx="$sfx" + sum=$val + done + echo "$tpfx entries=$sum $tsfx" +} + do_test() { # (cmd, log) echo -n "testing for cmd: $1 ... " cat <&3 >/dev/null $1 >/dev/null || exit 1 sleep 0.1 - res=$(diff -a -u <(echo "$2") - <&3) + res=$(diff -a -u <(echo "$2") <(summarize_logs <&3)) [ $? -eq 0 ] && { echo "OK"; return; } echo "FAIL" grep -v '^\(---\|+++\|@@\)' <<< "$res" @@ -152,31 +171,17 @@ do_test 'nft reset rules t1 c2' \ 'table=t1 family=2 entries=3 op=nft_reset_rule' do_test 'nft reset rules table t1' \ -'table=t1 family=2 entries=3 op=nft_reset_rule -table=t1 family=2 entries=3 op=nft_reset_rule -table=t1 family=2 entries=3 op=nft_reset_rule' +'table=t1 family=2 entries=9 op=nft_reset_rule' do_test 'nft reset rules t2 c3' \ -'table=t2 family=2 entries=189 op=nft_reset_rule -table=t2 family=2 entries=188 op=nft_reset_rule -table=t2 family=2 entries=126 op=nft_reset_rule' +'table=t2 family=2 entries=503 op=nft_reset_rule' do_test 'nft reset rules t2' \ -'table=t2 family=2 entries=3 op=nft_reset_rule -table=t2 family=2 entries=3 op=nft_reset_rule -table=t2 family=2 entries=186 op=nft_reset_rule -table=t2 family=2 entries=188 op=nft_reset_rule -table=t2 family=2 entries=129 op=nft_reset_rule' +'table=t2 family=2 entries=509 op=nft_reset_rule' do_test 'nft reset rules' \ -'table=t1 family=2 entries=3 op=nft_reset_rule -table=t1 family=2 entries=3 op=nft_reset_rule -table=t1 family=2 entries=3 op=nft_reset_rule -table=t2 family=2 entries=3 op=nft_reset_rule -table=t2 family=2 entries=3 op=nft_reset_rule -table=t2 family=2 entries=180 op=nft_reset_rule -table=t2 family=2 entries=188 op=nft_reset_rule -table=t2 family=2 entries=135 op=nft_reset_rule' +'table=t1 family=2 entries=9 op=nft_reset_rule +table=t2 family=2 entries=509 op=nft_reset_rule' # resetting sets and elements @@ -200,13 +205,11 @@ do_test 'nft reset counters t1' \ 'table=t1 family=2 entries=1 op=nft_reset_obj' do_test 'nft reset counters t2' \ -'table=t2 family=2 entries=342 op=nft_reset_obj -table=t2 family=2 entries=158 op=nft_reset_obj' +'table=t2 family=2 entries=500 op=nft_reset_obj' do_test 'nft reset counters' \ 'table=t1 family=2 entries=1 op=nft_reset_obj -table=t2 family=2 entries=341 op=nft_reset_obj -table=t2 family=2 entries=159 op=nft_reset_obj' +table=t2 family=2 entries=500 op=nft_reset_obj' # resetting quotas @@ -217,13 +220,11 @@ do_test 'nft reset quotas t1' \ 'table=t1 family=2 entries=1 op=nft_reset_obj' do_test 'nft reset quotas t2' \ -'table=t2 family=2 entries=315 op=nft_reset_obj -table=t2 family=2 entries=185 op=nft_reset_obj' +'table=t2 family=2 entries=500 op=nft_reset_obj' do_test 'nft reset quotas' \ 'table=t1 family=2 entries=1 op=nft_reset_obj -table=t2 family=2 entries=314 op=nft_reset_obj -table=t2 family=2 entries=186 op=nft_reset_obj' +table=t2 family=2 entries=500 op=nft_reset_obj' # deleting rules diff --git a/tools/testing/selftests/net/netfilter/nft_queue.sh b/tools/testing/selftests/net/netfilter/nft_queue.sh index d66e3c4dfec6..a9d109fcc15c 100755 --- a/tools/testing/selftests/net/netfilter/nft_queue.sh +++ b/tools/testing/selftests/net/netfilter/nft_queue.sh @@ -31,7 +31,7 @@ modprobe -q sctp trap cleanup EXIT -setup_ns ns1 ns2 nsrouter +setup_ns ns1 ns2 ns3 nsrouter TMPFILE0=$(mktemp) TMPFILE1=$(mktemp) @@ -48,6 +48,7 @@ if ! ip link add veth0 netns "$nsrouter" type veth peer name eth0 netns "$ns1" > exit $ksft_skip fi ip link add veth1 netns "$nsrouter" type veth peer name eth0 netns "$ns2" +ip link add veth2 netns "$nsrouter" type veth peer name eth0 netns "$ns3" ip -net "$nsrouter" link set veth0 up ip -net "$nsrouter" addr add 10.0.1.1/24 dev veth0 @@ -57,8 +58,13 @@ ip -net "$nsrouter" link set veth1 up ip -net "$nsrouter" addr add 10.0.2.1/24 dev veth1 ip -net "$nsrouter" addr add dead:2::1/64 dev veth1 nodad +ip -net "$nsrouter" link set veth2 up +ip -net "$nsrouter" addr add 10.0.3.1/24 dev veth2 +ip -net "$nsrouter" addr add dead:3::1/64 dev veth2 nodad + ip -net "$ns1" link set eth0 up ip -net "$ns2" link set eth0 up +ip -net "$ns3" link set eth0 up ip -net "$ns1" addr add 10.0.1.99/24 dev eth0 ip -net "$ns1" addr add dead:1::99/64 dev eth0 nodad @@ -70,6 +76,11 @@ ip -net "$ns2" addr add dead:2::99/64 dev eth0 nodad ip -net "$ns2" route add default via 10.0.2.1 ip -net "$ns2" route add default via dead:2::1 +ip -net "$ns3" addr add 10.0.3.99/24 dev eth0 +ip -net "$ns3" addr add dead:3::99/64 dev eth0 nodad +ip -net "$ns3" route add default via 10.0.3.1 +ip -net "$ns3" route add default via dead:3::1 + load_ruleset() { local name=$1 local prio=$2 @@ -473,6 +484,83 @@ EOF check_output_files "$TMPINPUT" "$TMPFILE1" "sctp output" } +udp_listener_ready() +{ + ss -S -N "$1" -uln -o "sport = :12345" | grep -q 12345 +} + +output_files_written() +{ + test -s "$1" && test -s "$2" +} + +test_udp_ct_race() +{ + ip netns exec "$nsrouter" nft -f /dev/stdin <<EOF +flush ruleset +table inet udpq { + chain prerouting { + type nat hook prerouting priority dstnat - 5; policy accept; + ip daddr 10.6.6.6 udp dport 12345 counter dnat to numgen inc mod 2 map { 0 : 10.0.2.99, 1 : 10.0.3.99 } + } + chain postrouting { + type filter hook postrouting priority srcnat - 5; policy accept; + udp dport 12345 counter queue num 12 + } +} +EOF + :> "$TMPFILE1" + :> "$TMPFILE2" + + timeout 10 ip netns exec "$ns2" socat UDP-LISTEN:12345,fork OPEN:"$TMPFILE1",trunc & + local rpid1=$! + + timeout 10 ip netns exec "$ns3" socat UDP-LISTEN:12345,fork OPEN:"$TMPFILE2",trunc & + local rpid2=$! + + ip netns exec "$nsrouter" ./nf_queue -q 12 -d 1000 & + local nfqpid=$! + + busywait "$BUSYWAIT_TIMEOUT" udp_listener_ready "$ns2" + busywait "$BUSYWAIT_TIMEOUT" udp_listener_ready "$ns3" + busywait "$BUSYWAIT_TIMEOUT" nf_queue_wait "$nsrouter" 12 + + # Send two packets, one should end up in ns1, other in ns2. + # This is because nfqueue will delay packet for long enough so that + # second packet will not find existing conntrack entry. + echo "Packet 1" | ip netns exec "$ns1" socat STDIN UDP-DATAGRAM:10.6.6.6:12345,bind=0.0.0.0:55221 + echo "Packet 2" | ip netns exec "$ns1" socat STDIN UDP-DATAGRAM:10.6.6.6:12345,bind=0.0.0.0:55221 + + busywait 10000 output_files_written "$TMPFILE1" "$TMPFILE2" + + kill "$nfqpid" + + if ! ip netns exec "$nsrouter" bash -c 'conntrack -L -p udp --dport 12345 2>/dev/null | wc -l | grep -q "^1"'; then + echo "FAIL: Expected One udp conntrack entry" + ip netns exec "$nsrouter" conntrack -L -p udp --dport 12345 + ret=1 + fi + + if ! ip netns exec "$nsrouter" nft delete table inet udpq; then + echo "FAIL: Could not delete udpq table" + ret=1 + return + fi + + NUMLINES1=$(wc -l < "$TMPFILE1") + NUMLINES2=$(wc -l < "$TMPFILE2") + + if [ "$NUMLINES1" -ne 1 ] || [ "$NUMLINES2" -ne 1 ]; then + ret=1 + echo "FAIL: uneven udp packet distribution: $NUMLINES1 $NUMLINES2" + echo -n "$TMPFILE1: ";cat "$TMPFILE1" + echo -n "$TMPFILE2: ";cat "$TMPFILE2" + return + fi + + echo "PASS: both udp receivers got one packet each" +} + test_queue_removal() { read tainted_then < /proc/sys/kernel/tainted @@ -512,6 +600,7 @@ EOF ip netns exec "$nsrouter" sysctl net.ipv6.conf.all.forwarding=1 > /dev/null ip netns exec "$nsrouter" sysctl net.ipv4.conf.veth0.forwarding=1 > /dev/null ip netns exec "$nsrouter" sysctl net.ipv4.conf.veth1.forwarding=1 > /dev/null +ip netns exec "$nsrouter" sysctl net.ipv4.conf.veth2.forwarding=1 > /dev/null load_ruleset "filter" 0 @@ -549,6 +638,7 @@ test_tcp_localhost_connectclose test_tcp_localhost_requeue test_sctp_forward test_sctp_output +test_udp_ct_race # should be last, adds vrf device in ns1 and changes routes test_icmp_vrf diff --git a/tools/testing/selftests/net/netfilter/nft_tproxy_tcp.sh b/tools/testing/selftests/net/netfilter/nft_tproxy_tcp.sh new file mode 100755 index 000000000000..e208fb03eeb7 --- /dev/null +++ b/tools/testing/selftests/net/netfilter/nft_tproxy_tcp.sh @@ -0,0 +1,358 @@ +#!/bin/bash +# +# This tests tproxy on the following scenario: +# +# +------------+ +# +-------+ | nsrouter | +-------+ +# |ns1 |.99 .1| |.1 .99| ns2| +# | eth0|---------------|veth0 veth1|------------------|eth0 | +# | | 10.0.1.0/24 | | 10.0.2.0/24 | | +# +-------+ dead:1::/64 | veth2 | dead:2::/64 +-------+ +# +------------+ +# |.1 +# | +# | +# | +-------+ +# | .99| ns3| +# +------------------------|eth0 | +# 10.0.3.0/24 | | +# dead:3::/64 +-------+ +# +# The tproxy implementation acts as an echo server so the client +# must receive the same message it sent if it has been proxied. +# If is not proxied the servers return PONG_NS# with the number +# of the namespace the server is running. +# +# shellcheck disable=SC2162,SC2317 + +source lib.sh +ret=0 +timeout=5 + +cleanup() +{ + ip netns pids "$ns1" | xargs kill 2>/dev/null + ip netns pids "$ns2" | xargs kill 2>/dev/null + ip netns pids "$ns3" | xargs kill 2>/dev/null + ip netns pids "$nsrouter" | xargs kill 2>/dev/null + + cleanup_all_ns +} + +checktool "nft --version" "test without nft tool" +checktool "socat -h" "run test without socat" + +trap cleanup EXIT +setup_ns ns1 ns2 ns3 nsrouter + +if ! ip link add veth0 netns "$nsrouter" type veth peer name eth0 netns "$ns1" > /dev/null 2>&1; then + echo "SKIP: No virtual ethernet pair device support in kernel" + exit $ksft_skip +fi +ip link add veth1 netns "$nsrouter" type veth peer name eth0 netns "$ns2" +ip link add veth2 netns "$nsrouter" type veth peer name eth0 netns "$ns3" + +ip -net "$nsrouter" link set veth0 up +ip -net "$nsrouter" addr add 10.0.1.1/24 dev veth0 +ip -net "$nsrouter" addr add dead:1::1/64 dev veth0 nodad + +ip -net "$nsrouter" link set veth1 up +ip -net "$nsrouter" addr add 10.0.2.1/24 dev veth1 +ip -net "$nsrouter" addr add dead:2::1/64 dev veth1 nodad + +ip -net "$nsrouter" link set veth2 up +ip -net "$nsrouter" addr add 10.0.3.1/24 dev veth2 +ip -net "$nsrouter" addr add dead:3::1/64 dev veth2 nodad + +ip -net "$ns1" link set eth0 up +ip -net "$ns2" link set eth0 up +ip -net "$ns3" link set eth0 up + +ip -net "$ns1" addr add 10.0.1.99/24 dev eth0 +ip -net "$ns1" addr add dead:1::99/64 dev eth0 nodad +ip -net "$ns1" route add default via 10.0.1.1 +ip -net "$ns1" route add default via dead:1::1 + +ip -net "$ns2" addr add 10.0.2.99/24 dev eth0 +ip -net "$ns2" addr add dead:2::99/64 dev eth0 nodad +ip -net "$ns2" route add default via 10.0.2.1 +ip -net "$ns2" route add default via dead:2::1 + +ip -net "$ns3" addr add 10.0.3.99/24 dev eth0 +ip -net "$ns3" addr add dead:3::99/64 dev eth0 nodad +ip -net "$ns3" route add default via 10.0.3.1 +ip -net "$ns3" route add default via dead:3::1 + +ip netns exec "$nsrouter" sysctl net.ipv6.conf.all.forwarding=1 > /dev/null +ip netns exec "$nsrouter" sysctl net.ipv4.conf.veth0.forwarding=1 > /dev/null +ip netns exec "$nsrouter" sysctl net.ipv4.conf.veth1.forwarding=1 > /dev/null +ip netns exec "$nsrouter" sysctl net.ipv4.conf.veth2.forwarding=1 > /dev/null + +test_ping() { + if ! ip netns exec "$ns1" ping -c 1 -q 10.0.2.99 > /dev/null; then + return 1 + fi + + if ! ip netns exec "$ns1" ping -c 1 -q dead:2::99 > /dev/null; then + return 2 + fi + + if ! ip netns exec "$ns1" ping -c 1 -q 10.0.3.99 > /dev/null; then + return 1 + fi + + if ! ip netns exec "$ns1" ping -c 1 -q dead:3::99 > /dev/null; then + return 2 + fi + + return 0 +} + +test_ping_router() { + if ! ip netns exec "$ns1" ping -c 1 -q 10.0.2.1 > /dev/null; then + return 3 + fi + + if ! ip netns exec "$ns1" ping -c 1 -q dead:2::1 > /dev/null; then + return 4 + fi + + return 0 +} + + +listener_ready() +{ + local ns="$1" + local port="$2" + local proto="$3" + ss -N "$ns" -ln "$proto" -o "sport = :$port" | grep -q "$port" +} + +test_tproxy() +{ + local traffic_origin="$1" + local ip_proto="$2" + local expect_ns1_ns2="$3" + local expect_ns1_ns3="$4" + local expect_nsrouter_ns2="$5" + local expect_nsrouter_ns3="$6" + + # derived variables + local testname="test_${ip_proto}_tcp_${traffic_origin}" + local socat_ipproto + local ns1_ip + local ns2_ip + local ns3_ip + local ns2_target + local ns3_target + local nftables_subject + local ip_command + + # socat 1.8.0 has a bug that requires to specify the IP family to bind (fixed in 1.8.0.1) + case $ip_proto in + "ip") + socat_ipproto="-4" + ns1_ip=10.0.1.99 + ns2_ip=10.0.2.99 + ns3_ip=10.0.3.99 + ns2_target="tcp:$ns2_ip:8080" + ns3_target="tcp:$ns3_ip:8080" + nftables_subject="ip daddr $ns2_ip tcp dport 8080" + ip_command="ip" + ;; + "ip6") + socat_ipproto="-6" + ns1_ip=dead:1::99 + ns2_ip=dead:2::99 + ns3_ip=dead:3::99 + ns2_target="tcp:[$ns2_ip]:8080" + ns3_target="tcp:[$ns3_ip]:8080" + nftables_subject="ip6 daddr $ns2_ip tcp dport 8080" + ip_command="ip -6" + ;; + *) + echo "FAIL: unsupported protocol" + exit 255 + ;; + esac + + case $traffic_origin in + # to capture the local originated traffic we need to mark the outgoing + # traffic so the policy based routing rule redirects it and can be processed + # in the prerouting chain. + "local") + nftables_rules=" +flush ruleset +table inet filter { + chain divert { + type filter hook prerouting priority 0; policy accept; + $nftables_subject tproxy $ip_proto to :12345 meta mark set 1 accept + } + chain output { + type route hook output priority 0; policy accept; + $nftables_subject meta mark set 1 accept + } +}" + ;; + "forward") + nftables_rules=" +flush ruleset +table inet filter { + chain divert { + type filter hook prerouting priority 0; policy accept; + $nftables_subject tproxy $ip_proto to :12345 meta mark set 1 accept + } +}" + ;; + *) + echo "FAIL: unsupported parameter for traffic origin" + exit 255 + ;; + esac + + # shellcheck disable=SC2046 # Intended splitting of ip_command + ip netns exec "$nsrouter" $ip_command rule add fwmark 1 table 100 + ip netns exec "$nsrouter" $ip_command route add local "${ns2_ip}" dev lo table 100 + echo "$nftables_rules" | ip netns exec "$nsrouter" nft -f /dev/stdin + + timeout "$timeout" ip netns exec "$nsrouter" socat "$socat_ipproto" tcp-listen:12345,fork,ip-transparent SYSTEM:"cat" 2>/dev/null & + local tproxy_pid=$! + + timeout "$timeout" ip netns exec "$ns2" socat "$socat_ipproto" tcp-listen:8080,fork SYSTEM:"echo PONG_NS2" 2>/dev/null & + local server2_pid=$! + + timeout "$timeout" ip netns exec "$ns3" socat "$socat_ipproto" tcp-listen:8080,fork SYSTEM:"echo PONG_NS3" 2>/dev/null & + local server3_pid=$! + + busywait "$BUSYWAIT_TIMEOUT" listener_ready "$nsrouter" 12345 "-t" + busywait "$BUSYWAIT_TIMEOUT" listener_ready "$ns2" 8080 "-t" + busywait "$BUSYWAIT_TIMEOUT" listener_ready "$ns3" 8080 "-t" + + local result + # request from ns1 to ns2 (forwarded traffic) + result=$(echo I_M_PROXIED | ip netns exec "$ns1" socat -t 2 -T 2 STDIO "$ns2_target") + if [ "$result" == "$expect_ns1_ns2" ] ;then + echo "PASS: tproxy test $testname: ns1 got reply \"$result\" connecting to ns2" + else + echo "ERROR: tproxy test $testname: ns1 got reply \"$result\" connecting to ns2, not \"${expect_ns1_ns2}\" as intended" + ret=1 + fi + + # request from ns1 to ns3(forwarded traffic) + result=$(echo I_M_PROXIED | ip netns exec "$ns1" socat -t 2 -T 2 STDIO "$ns3_target") + if [ "$result" = "$expect_ns1_ns3" ] ;then + echo "PASS: tproxy test $testname: ns1 got reply \"$result\" connecting to ns3" + else + echo "ERROR: tproxy test $testname: ns1 got reply \"$result\" connecting to ns3, not \"$expect_ns1_ns3\" as intended" + ret=1 + fi + + # request from nsrouter to ns2 (localy originated traffic) + result=$(echo I_M_PROXIED | ip netns exec "$nsrouter" socat -t 2 -T 2 STDIO "$ns2_target") + if [ "$result" == "$expect_nsrouter_ns2" ] ;then + echo "PASS: tproxy test $testname: nsrouter got reply \"$result\" connecting to ns2" + else + echo "ERROR: tproxy test $testname: nsrouter got reply \"$result\" connecting to ns2, not \"$expect_nsrouter_ns2\" as intended" + ret=1 + fi + + # request from nsrouter to ns3 (localy originated traffic) + result=$(echo I_M_PROXIED | ip netns exec "$nsrouter" socat -t 2 -T 2 STDIO "$ns3_target") + if [ "$result" = "$expect_nsrouter_ns3" ] ;then + echo "PASS: tproxy test $testname: nsrouter got reply \"$result\" connecting to ns3" + else + echo "ERROR: tproxy test $testname: nsrouter got reply \"$result\" connecting to ns3, not \"$expect_nsrouter_ns3\" as intended" + ret=1 + fi + + # cleanup + kill "$tproxy_pid" "$server2_pid" "$server3_pid" 2>/dev/null + # shellcheck disable=SC2046 # Intended splitting of ip_command + ip netns exec "$nsrouter" $ip_command rule del fwmark 1 table 100 + ip netns exec "$nsrouter" $ip_command route flush table 100 +} + + +test_ipv4_tcp_forward() +{ + local traffic_origin="forward" + local ip_proto="ip" + local expect_ns1_ns2="I_M_PROXIED" + local expect_ns1_ns3="PONG_NS3" + local expect_nsrouter_ns2="PONG_NS2" + local expect_nsrouter_ns3="PONG_NS3" + + test_tproxy "$traffic_origin" \ + "$ip_proto" \ + "$expect_ns1_ns2" \ + "$expect_ns1_ns3" \ + "$expect_nsrouter_ns2" \ + "$expect_nsrouter_ns3" +} + +test_ipv4_tcp_local() +{ + local traffic_origin="local" + local ip_proto="ip" + local expect_ns1_ns2="I_M_PROXIED" + local expect_ns1_ns3="PONG_NS3" + local expect_nsrouter_ns2="I_M_PROXIED" + local expect_nsrouter_ns3="PONG_NS3" + + test_tproxy "$traffic_origin" \ + "$ip_proto" \ + "$expect_ns1_ns2" \ + "$expect_ns1_ns3" \ + "$expect_nsrouter_ns2" \ + "$expect_nsrouter_ns3" +} + +test_ipv6_tcp_forward() +{ + local traffic_origin="forward" + local ip_proto="ip6" + local expect_ns1_ns2="I_M_PROXIED" + local expect_ns1_ns3="PONG_NS3" + local expect_nsrouter_ns2="PONG_NS2" + local expect_nsrouter_ns3="PONG_NS3" + + test_tproxy "$traffic_origin" \ + "$ip_proto" \ + "$expect_ns1_ns2" \ + "$expect_ns1_ns3" \ + "$expect_nsrouter_ns2" \ + "$expect_nsrouter_ns3" +} + +test_ipv6_tcp_local() +{ + local traffic_origin="local" + local ip_proto="ip6" + local expect_ns1_ns2="I_M_PROXIED" + local expect_ns1_ns3="PONG_NS3" + local expect_nsrouter_ns2="I_M_PROXIED" + local expect_nsrouter_ns3="PONG_NS3" + + test_tproxy "$traffic_origin" \ + "$ip_proto" \ + "$expect_ns1_ns2" \ + "$expect_ns1_ns3" \ + "$expect_nsrouter_ns2" \ + "$expect_nsrouter_ns3" +} + +if test_ping; then + # queue bypass works (rules were skipped, no listener) + echo "PASS: ${ns1} can reach ${ns2}" +else + echo "FAIL: ${ns1} cannot reach ${ns2}: $ret" 1>&2 + exit $ret +fi + +test_ipv4_tcp_forward +test_ipv4_tcp_local +test_ipv6_tcp_forward +test_ipv6_tcp_local + +exit $ret diff --git a/tools/testing/selftests/net/netfilter/nft_tproxy_udp.sh b/tools/testing/selftests/net/netfilter/nft_tproxy_udp.sh new file mode 100755 index 000000000000..d16de13fe5a7 --- /dev/null +++ b/tools/testing/selftests/net/netfilter/nft_tproxy_udp.sh @@ -0,0 +1,262 @@ +#!/bin/bash +# +# This tests tproxy on the following scenario: +# +# +------------+ +# +-------+ | nsrouter | +-------+ +# |ns1 |.99 .1| |.1 .99| ns2| +# | eth0|---------------|veth0 veth1|------------------|eth0 | +# | | 10.0.1.0/24 | | 10.0.2.0/24 | | +# +-------+ dead:1::/64 | veth2 | dead:2::/64 +-------+ +# +------------+ +# |.1 +# | +# | +# | +-------+ +# | .99| ns3| +# +------------------------|eth0 | +# 10.0.3.0/24 | | +# dead:3::/64 +-------+ +# +# The tproxy implementation acts as an echo server so the client +# must receive the same message it sent if it has been proxied. +# If is not proxied the servers return PONG_NS# with the number +# of the namespace the server is running. +# shellcheck disable=SC2162,SC2317 + +source lib.sh +ret=0 +# UDP is slow +timeout=15 + +cleanup() +{ + ip netns pids "$ns1" | xargs kill 2>/dev/null + ip netns pids "$ns2" | xargs kill 2>/dev/null + ip netns pids "$ns3" | xargs kill 2>/dev/null + ip netns pids "$nsrouter" | xargs kill 2>/dev/null + + cleanup_all_ns +} + +checktool "nft --version" "test without nft tool" +checktool "socat -h" "run test without socat" + +trap cleanup EXIT +setup_ns ns1 ns2 ns3 nsrouter + +if ! ip link add veth0 netns "$nsrouter" type veth peer name eth0 netns "$ns1" > /dev/null 2>&1; then + echo "SKIP: No virtual ethernet pair device support in kernel" + exit $ksft_skip +fi +ip link add veth1 netns "$nsrouter" type veth peer name eth0 netns "$ns2" +ip link add veth2 netns "$nsrouter" type veth peer name eth0 netns "$ns3" + +ip -net "$nsrouter" link set veth0 up +ip -net "$nsrouter" addr add 10.0.1.1/24 dev veth0 +ip -net "$nsrouter" addr add dead:1::1/64 dev veth0 nodad + +ip -net "$nsrouter" link set veth1 up +ip -net "$nsrouter" addr add 10.0.2.1/24 dev veth1 +ip -net "$nsrouter" addr add dead:2::1/64 dev veth1 nodad + +ip -net "$nsrouter" link set veth2 up +ip -net "$nsrouter" addr add 10.0.3.1/24 dev veth2 +ip -net "$nsrouter" addr add dead:3::1/64 dev veth2 nodad + +ip -net "$ns1" link set eth0 up +ip -net "$ns2" link set eth0 up +ip -net "$ns3" link set eth0 up + +ip -net "$ns1" addr add 10.0.1.99/24 dev eth0 +ip -net "$ns1" addr add dead:1::99/64 dev eth0 nodad +ip -net "$ns1" route add default via 10.0.1.1 +ip -net "$ns1" route add default via dead:1::1 + +ip -net "$ns2" addr add 10.0.2.99/24 dev eth0 +ip -net "$ns2" addr add dead:2::99/64 dev eth0 nodad +ip -net "$ns2" route add default via 10.0.2.1 +ip -net "$ns2" route add default via dead:2::1 + +ip -net "$ns3" addr add 10.0.3.99/24 dev eth0 +ip -net "$ns3" addr add dead:3::99/64 dev eth0 nodad +ip -net "$ns3" route add default via 10.0.3.1 +ip -net "$ns3" route add default via dead:3::1 + +ip netns exec "$nsrouter" sysctl net.ipv6.conf.all.forwarding=1 > /dev/null +ip netns exec "$nsrouter" sysctl net.ipv4.conf.veth0.forwarding=1 > /dev/null +ip netns exec "$nsrouter" sysctl net.ipv4.conf.veth1.forwarding=1 > /dev/null +ip netns exec "$nsrouter" sysctl net.ipv4.conf.veth2.forwarding=1 > /dev/null + +test_ping() { + if ! ip netns exec "$ns1" ping -c 1 -q 10.0.2.99 > /dev/null; then + return 1 + fi + + if ! ip netns exec "$ns1" ping -c 1 -q dead:2::99 > /dev/null; then + return 2 + fi + + if ! ip netns exec "$ns1" ping -c 1 -q 10.0.3.99 > /dev/null; then + return 1 + fi + + if ! ip netns exec "$ns1" ping -c 1 -q dead:3::99 > /dev/null; then + return 2 + fi + + return 0 +} + +test_ping_router() { + if ! ip netns exec "$ns1" ping -c 1 -q 10.0.2.1 > /dev/null; then + return 3 + fi + + if ! ip netns exec "$ns1" ping -c 1 -q dead:2::1 > /dev/null; then + return 4 + fi + + return 0 +} + + +listener_ready() +{ + local ns="$1" + local port="$2" + local proto="$3" + ss -N "$ns" -ln "$proto" -o "sport = :$port" | grep -q "$port" +} + +test_tproxy_udp_forward() +{ + local ip_proto="$1" + + local expect_ns1_ns2="I_M_PROXIED" + local expect_ns1_ns3="PONG_NS3" + local expect_nsrouter_ns2="PONG_NS2" + local expect_nsrouter_ns3="PONG_NS3" + + # derived variables + local testname="test_${ip_proto}_udp_forward" + local socat_ipproto + local ns1_ip + local ns2_ip + local ns3_ip + local ns1_ip_port + local ns2_ip_port + local ns3_ip_port + local ip_command + + # socat 1.8.0 has a bug that requires to specify the IP family to bind (fixed in 1.8.0.1) + case $ip_proto in + "ip") + socat_ipproto="-4" + ns1_ip=10.0.1.99 + ns2_ip=10.0.2.99 + ns3_ip=10.0.3.99 + ns1_ip_port="$ns1_ip:18888" + ns2_ip_port="$ns2_ip:8080" + ns3_ip_port="$ns3_ip:8080" + ip_command="ip" + ;; + "ip6") + socat_ipproto="-6" + ns1_ip=dead:1::99 + ns2_ip=dead:2::99 + ns3_ip=dead:3::99 + ns1_ip_port="[$ns1_ip]:18888" + ns2_ip_port="[$ns2_ip]:8080" + ns3_ip_port="[$ns3_ip]:8080" + ip_command="ip -6" + ;; + *) + echo "FAIL: unsupported protocol" + exit 255 + ;; + esac + + # shellcheck disable=SC2046 # Intended splitting of ip_command + ip netns exec "$nsrouter" $ip_command rule add fwmark 1 table 100 + ip netns exec "$nsrouter" $ip_command route add local "$ns2_ip" dev lo table 100 + ip netns exec "$nsrouter" nft -f /dev/stdin <<EOF +flush ruleset +table inet filter { + chain divert { + type filter hook prerouting priority 0; policy accept; + $ip_proto daddr $ns2_ip udp dport 8080 tproxy $ip_proto to :12345 meta mark set 1 accept + } +} +EOF + + timeout "$timeout" ip netns exec "$nsrouter" socat -u "$socat_ipproto" udp-listen:12345,fork,ip-transparent,reuseport udp:"$ns1_ip_port",ip-transparent,reuseport,bind="$ns2_ip_port" 2>/dev/null & + local tproxy_pid=$! + + timeout "$timeout" ip netns exec "$ns2" socat "$socat_ipproto" udp-listen:8080,fork SYSTEM:"echo PONG_NS2" 2>/dev/null & + local server2_pid=$! + + timeout "$timeout" ip netns exec "$ns3" socat "$socat_ipproto" udp-listen:8080,fork SYSTEM:"echo PONG_NS3" 2>/dev/null & + local server3_pid=$! + + busywait "$BUSYWAIT_TIMEOUT" listener_ready "$nsrouter" 12345 "-u" + busywait "$BUSYWAIT_TIMEOUT" listener_ready "$ns2" 8080 "-u" + busywait "$BUSYWAIT_TIMEOUT" listener_ready "$ns3" 8080 "-u" + + local result + # request from ns1 to ns2 (forwarded traffic) + result=$(echo I_M_PROXIED | ip netns exec "$ns1" socat -t 2 -T 2 STDIO udp:"$ns2_ip_port",sourceport=18888) + if [ "$result" == "$expect_ns1_ns2" ] ;then + echo "PASS: tproxy test $testname: ns1 got reply \"$result\" connecting to ns2" + else + echo "ERROR: tproxy test $testname: ns1 got reply \"$result\" connecting to ns2, not \"${expect_ns1_ns2}\" as intended" + ret=1 + fi + + # request from ns1 to ns3 (forwarded traffic) + result=$(echo I_M_PROXIED | ip netns exec "$ns1" socat -t 2 -T 2 STDIO udp:"$ns3_ip_port") + if [ "$result" = "$expect_ns1_ns3" ] ;then + echo "PASS: tproxy test $testname: ns1 got reply \"$result\" connecting to ns3" + else + echo "ERROR: tproxy test $testname: ns1 got reply \"$result\" connecting to ns3, not \"$expect_ns1_ns3\" as intended" + ret=1 + fi + + # request from nsrouter to ns2 (localy originated traffic) + result=$(echo I_M_PROXIED | ip netns exec "$nsrouter" socat -t 2 -T 2 STDIO udp:"$ns2_ip_port") + if [ "$result" == "$expect_nsrouter_ns2" ] ;then + echo "PASS: tproxy test $testname: nsrouter got reply \"$result\" connecting to ns2" + else + echo "ERROR: tproxy test $testname: nsrouter got reply \"$result\" connecting to ns2, not \"$expect_nsrouter_ns2\" as intended" + ret=1 + fi + + # request from nsrouter to ns3 (localy originated traffic) + result=$(echo I_M_PROXIED | ip netns exec "$nsrouter" socat -t 2 -T 2 STDIO udp:"$ns3_ip_port") + if [ "$result" = "$expect_nsrouter_ns3" ] ;then + echo "PASS: tproxy test $testname: nsrouter got reply \"$result\" connecting to ns3" + else + echo "ERROR: tproxy test $testname: nsrouter got reply \"$result\" connecting to ns3, not \"$expect_nsrouter_ns3\" as intended" + ret=1 + fi + + # cleanup + kill "$tproxy_pid" "$server2_pid" "$server3_pid" 2>/dev/null + # shellcheck disable=SC2046 # Intended splitting of ip_command + ip netns exec "$nsrouter" $ip_command rule del fwmark 1 table 100 + ip netns exec "$nsrouter" $ip_command route flush table 100 +} + + +if test_ping; then + # queue bypass works (rules were skipped, no listener) + echo "PASS: ${ns1} can reach ${ns2}" +else + echo "FAIL: ${ns1} cannot reach ${ns2}: $ret" 1>&2 + exit $ret +fi + +test_tproxy_udp_forward "ip" +test_tproxy_udp_forward "ip6" + +exit $ret diff --git a/tools/testing/selftests/net/netfilter/vxlan_mtu_frag.sh b/tools/testing/selftests/net/netfilter/vxlan_mtu_frag.sh new file mode 100755 index 000000000000..912cb9583af1 --- /dev/null +++ b/tools/testing/selftests/net/netfilter/vxlan_mtu_frag.sh @@ -0,0 +1,121 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +source lib.sh + +if ! modprobe -q -n br_netfilter 2>&1; then + echo "SKIP: Test needs br_netfilter kernel module" + exit $ksft_skip +fi + +cleanup() +{ + cleanup_all_ns +} + +trap cleanup EXIT + +setup_ns host vtep router + +create_topology() +{ + ip link add host-eth0 netns "$host" type veth peer name vtep-host netns "$vtep" + ip link add vtep-router netns "$vtep" type veth peer name router-vtep netns "$router" +} + +setup_host() +{ + # bring ports up + ip -n "$host" addr add 10.0.0.1/24 dev host-eth0 + ip -n "$host" link set host-eth0 up + + # Add VLAN 10,20 + for vid in 10 20; do + ip -n "$host" link add link host-eth0 name host-eth0.$vid type vlan id $vid + ip -n "$host" addr add 10.0.$vid.1/24 dev host-eth0.$vid + ip -n "$host" link set host-eth0.$vid up + done +} + +setup_vtep() +{ + # create bridge on vtep + ip -n "$vtep" link add name br0 type bridge + ip -n "$vtep" link set br0 type bridge vlan_filtering 1 + + # VLAN 10 is untagged PVID + ip -n "$vtep" link set dev vtep-host master br0 + bridge -n "$vtep" vlan add dev vtep-host vid 10 pvid untagged + + # VLAN 20 as other VID + ip -n "$vtep" link set dev vtep-host master br0 + bridge -n "$vtep" vlan add dev vtep-host vid 20 + + # single-vxlan device on vtep + ip -n "$vtep" address add dev vtep-router 60.0.0.1/24 + ip -n "$vtep" link add dev vxd type vxlan external \ + vnifilter local 60.0.0.1 remote 60.0.0.2 dstport 4789 ttl 64 + ip -n "$vtep" link set vxd master br0 + + # Add VLAN-VNI 1-1 mappings + bridge -n "$vtep" link set dev vxd vlan_tunnel on + for vid in 10 20; do + bridge -n "$vtep" vlan add dev vxd vid $vid + bridge -n "$vtep" vlan add dev vxd vid $vid tunnel_info id $vid + bridge -n "$vtep" vni add dev vxd vni $vid + done + + # bring ports up + ip -n "$vtep" link set vxd up + ip -n "$vtep" link set vtep-router up + ip -n "$vtep" link set vtep-host up + ip -n "$vtep" link set dev br0 up +} + +setup_router() +{ + # bring ports up + ip -n "$router" link set router-vtep up +} + +setup() +{ + modprobe -q br_netfilter + create_topology + setup_host + setup_vtep + setup_router +} + +test_large_mtu_untagged_traffic() +{ + ip -n "$vtep" link set vxd mtu 1000 + ip -n "$host" neigh add 10.0.0.2 lladdr ca:fe:ba:be:00:01 dev host-eth0 + ip netns exec "$host" \ + ping -q 10.0.0.2 -I host-eth0 -c 1 -W 0.5 -s2000 > /dev/null 2>&1 + return 0 +} + +test_large_mtu_tagged_traffic() +{ + for vid in 10 20; do + ip -n "$vtep" link set vxd mtu 1000 + ip -n "$host" neigh add 10.0.$vid.2 lladdr ca:fe:ba:be:00:01 dev host-eth0.$vid + ip netns exec "$host" \ + ping -q 10.0.$vid.2 -I host-eth0.$vid -c 1 -W 0.5 -s2000 > /dev/null 2>&1 + done + return 0 +} + +do_test() +{ + # Frames will be dropped so ping will not succeed + # If it doesn't panic, it passes + test_large_mtu_tagged_traffic + test_large_mtu_untagged_traffic +} + +setup && \ +echo "Test for VxLAN fragmentation with large MTU in br_netfilter:" && \ +do_test && echo "PASS!" +exit $? diff --git a/tools/testing/selftests/net/packetdrill/ksft_runner.sh b/tools/testing/selftests/net/packetdrill/ksft_runner.sh index 7478c0c0c9aa..4071c133f29e 100755 --- a/tools/testing/selftests/net/packetdrill/ksft_runner.sh +++ b/tools/testing/selftests/net/packetdrill/ksft_runner.sh @@ -30,12 +30,17 @@ if [ -z "$(which packetdrill)" ]; then exit "$KSFT_SKIP" fi +declare -a optargs +if [[ -n "${KSFT_MACHINE_SLOW}" ]]; then + optargs+=('--tolerance_usecs=14000') +fi + ktap_print_header ktap_set_plan 2 -unshare -n packetdrill ${ipv4_args[@]} $(basename $script) > /dev/null \ +unshare -n packetdrill ${ipv4_args[@]} ${optargs[@]} $(basename $script) > /dev/null \ && ktap_test_pass "ipv4" || ktap_test_fail "ipv4" -unshare -n packetdrill ${ipv6_args[@]} $(basename $script) > /dev/null \ +unshare -n packetdrill ${ipv6_args[@]} ${optargs[@]} $(basename $script) > /dev/null \ && ktap_test_pass "ipv6" || ktap_test_fail "ipv6" ktap_finished diff --git a/tools/testing/selftests/net/rds/.gitignore b/tools/testing/selftests/net/rds/.gitignore new file mode 100644 index 000000000000..1c6f04e2aa11 --- /dev/null +++ b/tools/testing/selftests/net/rds/.gitignore @@ -0,0 +1 @@ +include.sh diff --git a/tools/testing/selftests/net/rds/Makefile b/tools/testing/selftests/net/rds/Makefile index da9714bc7aad..1803c39dbacb 100644 --- a/tools/testing/selftests/net/rds/Makefile +++ b/tools/testing/selftests/net/rds/Makefile @@ -4,9 +4,10 @@ all: @echo mk_build_dir="$(shell pwd)" > include.sh TEST_PROGS := run.sh \ - include.sh \ test.py -EXTRA_CLEAN := /tmp/rds_logs +TEST_FILES := include.sh + +EXTRA_CLEAN := /tmp/rds_logs include.sh include ../../lib.mk diff --git a/tools/testing/selftests/net/rds/test.py b/tools/testing/selftests/net/rds/test.py index e6bb109bcead..e6bb109bcead 100644..100755 --- a/tools/testing/selftests/net/rds/test.py +++ b/tools/testing/selftests/net/rds/test.py diff --git a/tools/testing/selftests/net/ynl.mk b/tools/testing/selftests/net/ynl.mk index 59cb26cf3f73..1ef24119def0 100644 --- a/tools/testing/selftests/net/ynl.mk +++ b/tools/testing/selftests/net/ynl.mk @@ -19,3 +19,7 @@ $(YNL_OUTPUTS): CFLAGS += \ $(OUTPUT)/libynl.a: $(Q)$(MAKE) -C $(top_srcdir)/tools/net/ynl GENS="$(YNL_GENS)" libynl.a $(Q)cp $(top_srcdir)/tools/net/ynl/libynl.a $(OUTPUT)/libynl.a + +EXTRA_CLEAN += \ + $(top_srcdir)/tools/net/ynl/lib/__pycache__ \ + $(top_srcdir)/tools/net/ynl/lib/*.[ado] diff --git a/tools/testing/selftests/rseq/rseq.c b/tools/testing/selftests/rseq/rseq.c index 96e812bdf8a4..5b9772cdf265 100644 --- a/tools/testing/selftests/rseq/rseq.c +++ b/tools/testing/selftests/rseq/rseq.c @@ -60,12 +60,6 @@ unsigned int rseq_size = -1U; /* Flags used during rseq registration. */ unsigned int rseq_flags; -/* - * rseq feature size supported by the kernel. 0 if the registration was - * unsuccessful. - */ -unsigned int rseq_feature_size = -1U; - static int rseq_ownership; static int rseq_reg_success; /* At least one rseq registration has succeded. */ @@ -111,6 +105,43 @@ int rseq_available(void) } } +/* The rseq areas need to be at least 32 bytes. */ +static +unsigned int get_rseq_min_alloc_size(void) +{ + unsigned int alloc_size = rseq_size; + + if (alloc_size < ORIG_RSEQ_ALLOC_SIZE) + alloc_size = ORIG_RSEQ_ALLOC_SIZE; + return alloc_size; +} + +/* + * Return the feature size supported by the kernel. + * + * Depending on the value returned by getauxval(AT_RSEQ_FEATURE_SIZE): + * + * 0: Return ORIG_RSEQ_FEATURE_SIZE (20) + * > 0: Return the value from getauxval(AT_RSEQ_FEATURE_SIZE). + * + * It should never return a value below ORIG_RSEQ_FEATURE_SIZE. + */ +static +unsigned int get_rseq_kernel_feature_size(void) +{ + unsigned long auxv_rseq_feature_size, auxv_rseq_align; + + auxv_rseq_align = getauxval(AT_RSEQ_ALIGN); + assert(!auxv_rseq_align || auxv_rseq_align <= RSEQ_THREAD_AREA_ALLOC_SIZE); + + auxv_rseq_feature_size = getauxval(AT_RSEQ_FEATURE_SIZE); + assert(!auxv_rseq_feature_size || auxv_rseq_feature_size <= RSEQ_THREAD_AREA_ALLOC_SIZE); + if (auxv_rseq_feature_size) + return auxv_rseq_feature_size; + else + return ORIG_RSEQ_FEATURE_SIZE; +} + int rseq_register_current_thread(void) { int rc; @@ -119,7 +150,7 @@ int rseq_register_current_thread(void) /* Treat libc's ownership as a successful registration. */ return 0; } - rc = sys_rseq(&__rseq_abi, rseq_size, 0, RSEQ_SIG); + rc = sys_rseq(&__rseq_abi, get_rseq_min_alloc_size(), 0, RSEQ_SIG); if (rc) { if (RSEQ_READ_ONCE(rseq_reg_success)) { /* Incoherent success/failure within process. */ @@ -140,28 +171,12 @@ int rseq_unregister_current_thread(void) /* Treat libc's ownership as a successful unregistration. */ return 0; } - rc = sys_rseq(&__rseq_abi, rseq_size, RSEQ_ABI_FLAG_UNREGISTER, RSEQ_SIG); + rc = sys_rseq(&__rseq_abi, get_rseq_min_alloc_size(), RSEQ_ABI_FLAG_UNREGISTER, RSEQ_SIG); if (rc) return -1; return 0; } -static -unsigned int get_rseq_feature_size(void) -{ - unsigned long auxv_rseq_feature_size, auxv_rseq_align; - - auxv_rseq_align = getauxval(AT_RSEQ_ALIGN); - assert(!auxv_rseq_align || auxv_rseq_align <= RSEQ_THREAD_AREA_ALLOC_SIZE); - - auxv_rseq_feature_size = getauxval(AT_RSEQ_FEATURE_SIZE); - assert(!auxv_rseq_feature_size || auxv_rseq_feature_size <= RSEQ_THREAD_AREA_ALLOC_SIZE); - if (auxv_rseq_feature_size) - return auxv_rseq_feature_size; - else - return ORIG_RSEQ_FEATURE_SIZE; -} - static __attribute__((constructor)) void rseq_init(void) { @@ -178,28 +193,54 @@ void rseq_init(void) } if (libc_rseq_size_p && libc_rseq_offset_p && libc_rseq_flags_p && *libc_rseq_size_p != 0) { + unsigned int libc_rseq_size; + /* rseq registration owned by glibc */ rseq_offset = *libc_rseq_offset_p; - rseq_size = *libc_rseq_size_p; + libc_rseq_size = *libc_rseq_size_p; rseq_flags = *libc_rseq_flags_p; - rseq_feature_size = get_rseq_feature_size(); - if (rseq_feature_size > rseq_size) - rseq_feature_size = rseq_size; + + /* + * Previous versions of glibc expose the value + * 32 even though the kernel only supported 20 + * bytes initially. Therefore treat 32 as a + * special-case. glibc 2.40 exposes a 20 bytes + * __rseq_size without using getauxval(3) to + * query the supported size, while still allocating a 32 + * bytes area. Also treat 20 as a special-case. + * + * Special-cases are handled by using the following + * value as active feature set size: + * + * rseq_size = min(32, get_rseq_kernel_feature_size()) + */ + switch (libc_rseq_size) { + case ORIG_RSEQ_FEATURE_SIZE: + fallthrough; + case ORIG_RSEQ_ALLOC_SIZE: + { + unsigned int rseq_kernel_feature_size = get_rseq_kernel_feature_size(); + + if (rseq_kernel_feature_size < ORIG_RSEQ_ALLOC_SIZE) + rseq_size = rseq_kernel_feature_size; + else + rseq_size = ORIG_RSEQ_ALLOC_SIZE; + break; + } + default: + /* Otherwise just use the __rseq_size from libc as rseq_size. */ + rseq_size = libc_rseq_size; + break; + } return; } rseq_ownership = 1; if (!rseq_available()) { rseq_size = 0; - rseq_feature_size = 0; return; } rseq_offset = (void *)&__rseq_abi - rseq_thread_pointer(); rseq_flags = 0; - rseq_feature_size = get_rseq_feature_size(); - if (rseq_feature_size == ORIG_RSEQ_FEATURE_SIZE) - rseq_size = ORIG_RSEQ_ALLOC_SIZE; - else - rseq_size = RSEQ_THREAD_AREA_ALLOC_SIZE; } static __attribute__((destructor)) @@ -209,7 +250,6 @@ void rseq_exit(void) return; rseq_offset = 0; rseq_size = -1U; - rseq_feature_size = -1U; rseq_ownership = 0; } diff --git a/tools/testing/selftests/rseq/rseq.h b/tools/testing/selftests/rseq/rseq.h index d7364ea4d201..4e217b620e0c 100644 --- a/tools/testing/selftests/rseq/rseq.h +++ b/tools/testing/selftests/rseq/rseq.h @@ -68,12 +68,6 @@ extern unsigned int rseq_size; /* Flags used during rseq registration. */ extern unsigned int rseq_flags; -/* - * rseq feature size supported by the kernel. 0 if the registration was - * unsuccessful. - */ -extern unsigned int rseq_feature_size; - enum rseq_mo { RSEQ_MO_RELAXED = 0, RSEQ_MO_CONSUME = 1, /* Unused */ @@ -193,7 +187,7 @@ static inline uint32_t rseq_current_cpu(void) static inline bool rseq_node_id_available(void) { - return (int) rseq_feature_size >= rseq_offsetofend(struct rseq_abi, node_id); + return (int) rseq_size >= rseq_offsetofend(struct rseq_abi, node_id); } /* @@ -207,7 +201,7 @@ static inline uint32_t rseq_current_node_id(void) static inline bool rseq_mm_cid_available(void) { - return (int) rseq_feature_size >= rseq_offsetofend(struct rseq_abi, mm_cid); + return (int) rseq_size >= rseq_offsetofend(struct rseq_abi, mm_cid); } static inline uint32_t rseq_current_mm_cid(void) diff --git a/tools/testing/selftests/rtc/rtctest.c b/tools/testing/selftests/rtc/rtctest.c index 9647b14b47c5..38a8e67de77d 100644 --- a/tools/testing/selftests/rtc/rtctest.c +++ b/tools/testing/selftests/rtc/rtctest.c @@ -412,6 +412,8 @@ TEST_F_TIMEOUT(rtc, alarm_wkalm_set_minute, 65) { int main(int argc, char **argv) { + int ret = -1; + switch (argc) { case 2: rtc_file = argv[1]; @@ -423,5 +425,12 @@ int main(int argc, char **argv) return 1; } - return test_harness_run(argc, argv); + /* Run the test if rtc_file is accessible */ + if (access(rtc_file, R_OK) == 0) + ret = test_harness_run(argc, argv); + else + ksft_exit_skip("[SKIP]: Cannot access rtc file %s - Exiting\n", + rtc_file); + + return ret; } diff --git a/tools/testing/selftests/timers/posix_timers.c b/tools/testing/selftests/timers/posix_timers.c index 16bd49492efa..ddb1cebc844e 100644 --- a/tools/testing/selftests/timers/posix_timers.c +++ b/tools/testing/selftests/timers/posix_timers.c @@ -26,13 +26,17 @@ static void __fatal_error(const char *test, const char *name, const char *what) { char buf[64]; + char *ret_str = NULL; - strerror_r(errno, buf, sizeof(buf)); + ret_str = strerror_r(errno, buf, sizeof(buf)); - if (name && strlen(name)) - ksft_exit_fail_msg("%s %s %s %s\n", test, name, what, buf); + if (name && strlen(name) && ret_str) + ksft_exit_fail_msg("%s %s %s %s\n", test, name, what, ret_str); + else if (ret_str) + ksft_exit_fail_msg("%s %s %s\n", test, what, ret_str); else - ksft_exit_fail_msg("%s %s %s\n", test, what, buf); + ksft_exit_fail_msg("%s %s\n", test, what); + } #define fatal_error(name, what) __fatal_error(__func__, name, what) diff --git a/tools/testing/selftests/vDSO/Makefile b/tools/testing/selftests/vDSO/Makefile index af9cedbf5357..1cf14a8da438 100644 --- a/tools/testing/selftests/vDSO/Makefile +++ b/tools/testing/selftests/vDSO/Makefile @@ -9,10 +9,8 @@ ifeq ($(ARCH),$(filter $(ARCH),x86 x86_64)) TEST_GEN_PROGS += vdso_standalone_test_x86 endif TEST_GEN_PROGS += vdso_test_correctness -ifeq ($(ARCH)$(CONFIG_X86_32),$(filter $(ARCH)$(CONFIG_X86_32),x86 x86_64 loongarch arm64 powerpc s390)) TEST_GEN_PROGS += vdso_test_getrandom TEST_GEN_PROGS += vdso_test_chacha -endif CFLAGS := -std=gnu99 -O2 @@ -37,9 +35,9 @@ $(OUTPUT)/vdso_test_getrandom: CFLAGS += -isystem $(top_srcdir)/tools/include \ $(KHDR_INCLUDES) \ -isystem $(top_srcdir)/include/uapi -$(OUTPUT)/vdso_test_chacha: $(top_srcdir)/tools/arch/$(SRCARCH)/vdso/vgetrandom-chacha.S +$(OUTPUT)/vdso_test_chacha: vgetrandom-chacha.S $(OUTPUT)/vdso_test_chacha: CFLAGS += -idirafter $(top_srcdir)/tools/include \ -idirafter $(top_srcdir)/tools/include/generated \ -idirafter $(top_srcdir)/arch/$(SRCARCH)/include \ -idirafter $(top_srcdir)/include \ - -D__ASSEMBLY__ -Wa,--noexecstack + -Wa,--noexecstack diff --git a/tools/testing/selftests/vDSO/vdso_standalone_test_x86.c b/tools/testing/selftests/vDSO/vdso_standalone_test_x86.c index 27f6fdf11969..644915862af8 100644 --- a/tools/testing/selftests/vDSO/vdso_standalone_test_x86.c +++ b/tools/testing/selftests/vDSO/vdso_standalone_test_x86.c @@ -131,6 +131,8 @@ asm ( "_start:\n\t" #ifdef __x86_64__ "mov %rsp,%rdi\n\t" + "and $-16,%rsp\n\t" + "sub $8,%rsp\n\t" "jmp c_main" #else "push %esp\n\t" diff --git a/tools/testing/selftests/vDSO/vdso_test_chacha.c b/tools/testing/selftests/vDSO/vdso_test_chacha.c index b1ea532c5996..8757f738b0b1 100644 --- a/tools/testing/selftests/vDSO/vdso_test_chacha.c +++ b/tools/testing/selftests/vDSO/vdso_test_chacha.c @@ -3,6 +3,7 @@ * Copyright (C) 2022-2024 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. */ +#include <linux/compiler.h> #include <tools/le_byteshift.h> #include <sys/random.h> #include <sys/auxv.h> @@ -73,10 +74,10 @@ static void reference_chacha20_blocks(uint8_t *dst_bytes, const uint32_t *key, u counter[1] = s[13]; } -typedef uint8_t u8; -typedef uint32_t u32; -typedef uint64_t u64; -#include <vdso/getrandom.h> +void __weak __arch_chacha20_blocks_nostack(uint8_t *dst_bytes, const uint32_t *key, uint32_t *counter, size_t nblocks) +{ + ksft_exit_skip("Not implemented on architecture\n"); +} int main(int argc, char *argv[]) { @@ -90,10 +91,8 @@ int main(int argc, char *argv[]) ksft_set_plan(1); for (unsigned int trial = 0; trial < TRIALS; ++trial) { - if (getrandom(key, sizeof(key), 0) != sizeof(key)) { - printf("getrandom() failed!\n"); - return KSFT_SKIP; - } + if (getrandom(key, sizeof(key), 0) != sizeof(key)) + ksft_exit_skip("getrandom() failed unexpectedly\n"); memset(counter1, 0, sizeof(counter1)); reference_chacha20_blocks(output1, key, counter1, BLOCKS); for (unsigned int split = 0; split < BLOCKS; ++split) { @@ -102,8 +101,10 @@ int main(int argc, char *argv[]) if (split) __arch_chacha20_blocks_nostack(output2, key, counter2, split); __arch_chacha20_blocks_nostack(output2 + split * BLOCK_SIZE, key, counter2, BLOCKS - split); - if (memcmp(output1, output2, sizeof(output1)) || memcmp(counter1, counter2, sizeof(counter1))) - return KSFT_FAIL; + if (memcmp(output1, output2, sizeof(output1))) + ksft_exit_fail_msg("Main loop outputs do not match on trial %u, split %u\n", trial, split); + if (memcmp(counter1, counter2, sizeof(counter1))) + ksft_exit_fail_msg("Main loop counters do not match on trial %u, split %u\n", trial, split); } } memset(counter1, 0, sizeof(counter1)); @@ -113,14 +114,19 @@ int main(int argc, char *argv[]) reference_chacha20_blocks(output1, key, counter1, BLOCKS); __arch_chacha20_blocks_nostack(output2, key, counter2, BLOCKS); - if (memcmp(output1, output2, sizeof(output1)) || memcmp(counter1, counter2, sizeof(counter1))) - return KSFT_FAIL; + if (memcmp(output1, output2, sizeof(output1))) + ksft_exit_fail_msg("Block limit outputs do not match after first round\n"); + if (memcmp(counter1, counter2, sizeof(counter1))) + ksft_exit_fail_msg("Block limit counters do not match after first round\n"); reference_chacha20_blocks(output1, key, counter1, BLOCKS); __arch_chacha20_blocks_nostack(output2, key, counter2, BLOCKS); - if (memcmp(output1, output2, sizeof(output1)) || memcmp(counter1, counter2, sizeof(counter1))) - return KSFT_FAIL; + if (memcmp(output1, output2, sizeof(output1))) + ksft_exit_fail_msg("Block limit outputs do not match after second round\n"); + if (memcmp(counter1, counter2, sizeof(counter1))) + ksft_exit_fail_msg("Block limit counters do not match after second round\n"); ksft_test_result_pass("chacha: PASS\n"); - return KSFT_PASS; + ksft_exit_pass(); + return 0; } diff --git a/tools/testing/selftests/vDSO/vdso_test_getrandom.c b/tools/testing/selftests/vDSO/vdso_test_getrandom.c index 72a1d9b43a84..95057f7567db 100644 --- a/tools/testing/selftests/vDSO/vdso_test_getrandom.c +++ b/tools/testing/selftests/vDSO/vdso_test_getrandom.c @@ -11,6 +11,7 @@ #include <string.h> #include <time.h> #include <unistd.h> +#include <sched.h> #include <signal.h> #include <sys/auxv.h> #include <sys/mman.h> @@ -40,6 +41,9 @@ } while (0) #endif +#define ksft_assert(condition) \ + do { if (!(condition)) ksft_exit_fail_msg("Assertion failed: %s\n", #condition); } while (0) + static struct { pthread_mutex_t lock; void **states; @@ -59,10 +63,12 @@ static void *vgetrandom_get_state(void) size_t page_size = getpagesize(); size_t new_cap; size_t alloc_size, num = sysconf(_SC_NPROCESSORS_ONLN); /* Just a decent heuristic. */ + size_t state_size_aligned, cache_line_size = sysconf(_SC_LEVEL1_DCACHE_LINESIZE) ?: 1; void *new_block, *new_states; - alloc_size = (num * vgrnd.params.size_of_opaque_state + page_size - 1) & (~(page_size - 1)); - num = (page_size / vgrnd.params.size_of_opaque_state) * (alloc_size / page_size); + state_size_aligned = (vgrnd.params.size_of_opaque_state + cache_line_size - 1) & (~(cache_line_size - 1)); + alloc_size = (num * state_size_aligned + page_size - 1) & (~(page_size - 1)); + num = (page_size / state_size_aligned) * (alloc_size / page_size); new_block = mmap(0, alloc_size, vgrnd.params.mmap_prot, vgrnd.params.mmap_flags, -1, 0); if (new_block == MAP_FAILED) goto out; @@ -78,7 +84,7 @@ static void *vgetrandom_get_state(void) if (((uintptr_t)new_block & (page_size - 1)) + vgrnd.params.size_of_opaque_state > page_size) new_block = (void *)(((uintptr_t)new_block + page_size - 1) & (~(page_size - 1))); vgrnd.states[i] = new_block; - new_block += vgrnd.params.size_of_opaque_state; + new_block += state_size_aligned; } vgrnd.len = num; goto success; @@ -109,26 +115,19 @@ static void vgetrandom_init(void) const char *version = versions[VDSO_VERSION]; const char *name = names[VDSO_NAMES][6]; unsigned long sysinfo_ehdr = getauxval(AT_SYSINFO_EHDR); - size_t ret; + ssize_t ret; - if (!sysinfo_ehdr) { - printf("AT_SYSINFO_EHDR is not present!\n"); - exit(KSFT_SKIP); - } + if (!sysinfo_ehdr) + ksft_exit_skip("AT_SYSINFO_EHDR is not present\n"); vdso_init_from_sysinfo_ehdr(sysinfo_ehdr); vgrnd.fn = (__typeof__(vgrnd.fn))vdso_sym(version, name); - if (!vgrnd.fn) { - printf("%s is missing!\n", name); - exit(KSFT_FAIL); - } + if (!vgrnd.fn) + ksft_exit_skip("%s@%s symbol is missing from vDSO\n", name, version); ret = VDSO_CALL(vgrnd.fn, 5, NULL, 0, 0, &vgrnd.params, ~0UL); - if (ret == -ENOSYS) { - printf("unsupported architecture\n"); - exit(KSFT_SKIP); - } else if (ret) { - printf("failed to fetch vgetrandom params!\n"); - exit(KSFT_FAIL); - } + if (ret == -ENOSYS) + ksft_exit_skip("CPU does not have runtime support\n"); + else if (ret) + ksft_exit_fail_msg("Failed to fetch vgetrandom params: %zd\n", ret); } static ssize_t vgetrandom(void *buf, size_t len, unsigned long flags) @@ -137,10 +136,7 @@ static ssize_t vgetrandom(void *buf, size_t len, unsigned long flags) if (!state) { state = vgetrandom_get_state(); - if (!state) { - printf("vgetrandom_get_state failed!\n"); - exit(KSFT_FAIL); - } + ksft_assert(state); } return VDSO_CALL(vgrnd.fn, 5, buf, len, flags, state, vgrnd.params.size_of_opaque_state); } @@ -152,7 +148,7 @@ static void *test_vdso_getrandom(void *ctx) for (size_t i = 0; i < TRIALS; ++i) { unsigned int val; ssize_t ret = vgetrandom(&val, sizeof(val), 0); - assert(ret == sizeof(val)); + ksft_assert(ret == sizeof(val)); } return NULL; } @@ -162,7 +158,7 @@ static void *test_libc_getrandom(void *ctx) for (size_t i = 0; i < TRIALS; ++i) { unsigned int val; ssize_t ret = getrandom(&val, sizeof(val), 0); - assert(ret == sizeof(val)); + ksft_assert(ret == sizeof(val)); } return NULL; } @@ -172,7 +168,7 @@ static void *test_syscall_getrandom(void *ctx) for (size_t i = 0; i < TRIALS; ++i) { unsigned int val; ssize_t ret = syscall(__NR_getrandom, &val, sizeof(val), 0); - assert(ret == sizeof(val)); + ksft_assert(ret == sizeof(val)); } return NULL; } @@ -207,7 +203,7 @@ static void bench_multi(void) clock_gettime(CLOCK_MONOTONIC, &start); for (size_t i = 0; i < THREADS; ++i) - assert(pthread_create(&threads[i], NULL, test_vdso_getrandom, NULL) == 0); + ksft_assert(pthread_create(&threads[i], NULL, test_vdso_getrandom, NULL) == 0); for (size_t i = 0; i < THREADS; ++i) pthread_join(threads[i], NULL); clock_gettime(CLOCK_MONOTONIC, &end); @@ -216,7 +212,7 @@ static void bench_multi(void) clock_gettime(CLOCK_MONOTONIC, &start); for (size_t i = 0; i < THREADS; ++i) - assert(pthread_create(&threads[i], NULL, test_libc_getrandom, NULL) == 0); + ksft_assert(pthread_create(&threads[i], NULL, test_libc_getrandom, NULL) == 0); for (size_t i = 0; i < THREADS; ++i) pthread_join(threads[i], NULL); clock_gettime(CLOCK_MONOTONIC, &end); @@ -225,7 +221,7 @@ static void bench_multi(void) clock_gettime(CLOCK_MONOTONIC, &start); for (size_t i = 0; i < THREADS; ++i) - assert(pthread_create(&threads[i], NULL, test_syscall_getrandom, NULL) == 0); + ksft_assert(pthread_create(&threads[i], NULL, test_syscall_getrandom, NULL) == 0); for (size_t i = 0; i < THREADS; ++i) pthread_join(threads[i], NULL); clock_gettime(CLOCK_MONOTONIC, &end); @@ -250,48 +246,46 @@ static void kselftest(void) for (size_t i = 0; i < 1000; ++i) { ssize_t ret = vgetrandom(weird_size, sizeof(weird_size), 0); - if (ret != sizeof(weird_size)) - exit(KSFT_FAIL); + ksft_assert(ret == sizeof(weird_size)); } ksft_test_result_pass("getrandom: PASS\n"); unshare(CLONE_NEWUSER); - assert(unshare(CLONE_NEWTIME) == 0); + ksft_assert(unshare(CLONE_NEWTIME) == 0); child = fork(); - assert(child >= 0); + ksft_assert(child >= 0); if (!child) { vgetrandom_init(); child = getpid(); - assert(ptrace(PTRACE_TRACEME, 0, NULL, NULL) == 0); - assert(kill(child, SIGSTOP) == 0); - assert(vgetrandom(weird_size, sizeof(weird_size), 0) == sizeof(weird_size)); + ksft_assert(ptrace(PTRACE_TRACEME, 0, NULL, NULL) == 0); + ksft_assert(kill(child, SIGSTOP) == 0); + ksft_assert(vgetrandom(weird_size, sizeof(weird_size), 0) == sizeof(weird_size)); _exit(0); } for (;;) { struct ptrace_syscall_info info = { 0 }; int status, ret; - assert(waitpid(child, &status, 0) >= 0); + ksft_assert(waitpid(child, &status, 0) >= 0); if (WIFEXITED(status)) { - if (WEXITSTATUS(status) != 0) - exit(KSFT_FAIL); + ksft_assert(WEXITSTATUS(status) == 0); break; } - assert(WIFSTOPPED(status)); + ksft_assert(WIFSTOPPED(status)); if (WSTOPSIG(status) == SIGSTOP) - assert(ptrace(PTRACE_SETOPTIONS, child, 0, PTRACE_O_TRACESYSGOOD) == 0); + ksft_assert(ptrace(PTRACE_SETOPTIONS, child, 0, PTRACE_O_TRACESYSGOOD) == 0); else if (WSTOPSIG(status) == (SIGTRAP | 0x80)) { - assert(ptrace(PTRACE_GET_SYSCALL_INFO, child, sizeof(info), &info) > 0); + ksft_assert(ptrace(PTRACE_GET_SYSCALL_INFO, child, sizeof(info), &info) > 0); if (info.op == PTRACE_SYSCALL_INFO_ENTRY && info.entry.nr == __NR_getrandom && info.entry.args[0] == (uintptr_t)weird_size && info.entry.args[1] == sizeof(weird_size)) - exit(KSFT_FAIL); + ksft_exit_fail_msg("vgetrandom passed buffer to syscall getrandom unexpectedly\n"); } - assert(ptrace(PTRACE_SYSCALL, child, 0, 0) == 0); + ksft_assert(ptrace(PTRACE_SYSCALL, child, 0, 0) == 0); } ksft_test_result_pass("getrandom timens: PASS\n"); - exit(KSFT_PASS); + ksft_exit_pass(); } static void usage(const char *argv0) diff --git a/tools/testing/selftests/vDSO/vgetrandom-chacha.S b/tools/testing/selftests/vDSO/vgetrandom-chacha.S new file mode 100644 index 000000000000..d6e09af7c0a9 --- /dev/null +++ b/tools/testing/selftests/vDSO/vgetrandom-chacha.S @@ -0,0 +1,18 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2024 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. + */ + +#define __ASSEMBLY__ + +#if defined(__aarch64__) +#include "../../../../arch/arm64/kernel/vdso/vgetrandom-chacha.S" +#elif defined(__loongarch__) +#include "../../../../arch/loongarch/vdso/vgetrandom-chacha.S" +#elif defined(__powerpc__) || defined(__powerpc64__) +#include "../../../../arch/powerpc/kernel/vdso/vgetrandom-chacha.S" +#elif defined(__s390x__) +#include "../../../../arch/s390/kernel/vdso64/vgetrandom-chacha.S" +#elif defined(__x86_64__) +#include "../../../../arch/x86/entry/vdso/vgetrandom-chacha.S" +#endif diff --git a/tools/testing/shared/linux/init.h b/tools/testing/shared/linux/init.h deleted file mode 100644 index 81563c3dfce7..000000000000 --- a/tools/testing/shared/linux/init.h +++ /dev/null @@ -1,2 +0,0 @@ -#define __init -#define __exit diff --git a/tools/testing/shared/maple-shared.h b/tools/testing/shared/maple-shared.h index 3d847edd149d..dc4d30f3860b 100644 --- a/tools/testing/shared/maple-shared.h +++ b/tools/testing/shared/maple-shared.h @@ -1,4 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0+ */ +#ifndef __MAPLE_SHARED_H__ +#define __MAPLE_SHARED_H__ #define CONFIG_DEBUG_MAPLE_TREE #define CONFIG_MAPLE_SEARCH @@ -7,3 +9,5 @@ #include <stdlib.h> #include <time.h> #include "linux/init.h" + +#endif /* __MAPLE_SHARED_H__ */ diff --git a/tools/testing/shared/shared.h b/tools/testing/shared/shared.h index f08f683812ad..13fb4d39966b 100644 --- a/tools/testing/shared/shared.h +++ b/tools/testing/shared/shared.h @@ -1,4 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __SHARED_H__ +#define __SHARED_H__ #include <linux/types.h> #include <linux/bug.h> @@ -31,3 +33,5 @@ #ifndef dump_stack #define dump_stack() assert(0) #endif + +#endif /* __SHARED_H__ */ diff --git a/tools/testing/shared/shared.mk b/tools/testing/shared/shared.mk index a05f0588513a..a6bc51d0b0bf 100644 --- a/tools/testing/shared/shared.mk +++ b/tools/testing/shared/shared.mk @@ -15,7 +15,9 @@ SHARED_DEPS = Makefile ../shared/shared.mk ../shared/*.h generated/map-shift.h \ ../../../include/linux/maple_tree.h \ ../../../include/linux/radix-tree.h \ ../../../lib/radix-tree.h \ - ../../../include/linux/idr.h + ../../../include/linux/idr.h \ + ../../../lib/maple_tree.c \ + ../../../lib/test_maple_tree.c ifndef SHIFT SHIFT=3 diff --git a/tools/testing/shared/xarray-shared.h b/tools/testing/shared/xarray-shared.h index ac2d16ff53ae..d50de7884803 100644 --- a/tools/testing/shared/xarray-shared.h +++ b/tools/testing/shared/xarray-shared.h @@ -1,4 +1,8 @@ /* SPDX-License-Identifier: GPL-2.0+ */ +#ifndef __XARRAY_SHARED_H__ +#define __XARRAY_SHARED_H__ #define XA_DEBUG #include "shared.h" + +#endif /* __XARRAY_SHARED_H__ */ diff --git a/tools/tracing/rtla/Makefile.rtla b/tools/tracing/rtla/Makefile.rtla index 3ff0b8970896..cc1d6b615475 100644 --- a/tools/tracing/rtla/Makefile.rtla +++ b/tools/tracing/rtla/Makefile.rtla @@ -38,7 +38,7 @@ BINDIR := /usr/bin .PHONY: install install: doc_install @$(MKDIR) -p $(DESTDIR)$(BINDIR) - $(call QUIET_INSTALL,rtla)$(INSTALL) rtla -m 755 $(DESTDIR)$(BINDIR) + $(call QUIET_INSTALL,rtla)$(INSTALL) $(RTLA) -m 755 $(DESTDIR)$(BINDIR) @$(STRIP) $(DESTDIR)$(BINDIR)/rtla @test ! -f $(DESTDIR)$(BINDIR)/osnoise || $(RM) $(DESTDIR)$(BINDIR)/osnoise @$(LN) rtla $(DESTDIR)$(BINDIR)/osnoise diff --git a/tools/tracing/rtla/src/osnoise_top.c b/tools/tracing/rtla/src/osnoise_top.c index 2f756628613d..30e3853076a0 100644 --- a/tools/tracing/rtla/src/osnoise_top.c +++ b/tools/tracing/rtla/src/osnoise_top.c @@ -442,7 +442,7 @@ struct osnoise_top_params *osnoise_top_parse_args(int argc, char **argv) case 'd': params->duration = parse_seconds_duration(optarg); if (!params->duration) - osnoise_top_usage(params, "Invalid -D duration\n"); + osnoise_top_usage(params, "Invalid -d duration\n"); break; case 'e': tevent = trace_event_alloc(optarg); diff --git a/tools/tracing/rtla/src/timerlat_top.c b/tools/tracing/rtla/src/timerlat_top.c index 8c16419fe22a..210b0f533534 100644 --- a/tools/tracing/rtla/src/timerlat_top.c +++ b/tools/tracing/rtla/src/timerlat_top.c @@ -459,7 +459,7 @@ static void timerlat_top_usage(char *usage) " -c/--cpus cpus: run the tracer only on the given cpus", " -H/--house-keeping cpus: run rtla control threads only on the given cpus", " -C/--cgroup[=cgroup_name]: set cgroup, if no cgroup_name is passed, the rtla's cgroup will be inherited", - " -d/--duration time[m|h|d]: duration of the session in seconds", + " -d/--duration time[s|m|h|d]: duration of the session", " -D/--debug: print debug info", " --dump-tasks: prints the task running on all CPUs if stop conditions are met (depends on !--no-aa)", " -t/--trace[file]: save the stopped trace to [file|timerlat_trace.txt]", @@ -613,7 +613,7 @@ static struct timerlat_top_params case 'd': params->duration = parse_seconds_duration(optarg); if (!params->duration) - timerlat_top_usage("Invalid -D duration\n"); + timerlat_top_usage("Invalid -d duration\n"); break; case 'e': tevent = trace_event_alloc(optarg); diff --git a/tools/usb/p9_fwd.py b/tools/usb/p9_fwd.py new file mode 100755 index 000000000000..12c76cbb046b --- /dev/null +++ b/tools/usb/p9_fwd.py @@ -0,0 +1,243 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0 + +import argparse +import errno +import logging +import socket +import struct +import time + +import usb.core +import usb.util + + +def path_from_usb_dev(dev): + """Takes a pyUSB device as argument and returns a string. + The string is a Path representation of the position of the USB device on the USB bus tree. + + This path is used to find a USB device on the bus or all devices connected to a HUB. + The path is made up of the number of the USB controller followed be the ports of the HUB tree.""" + if dev.port_numbers: + dev_path = ".".join(str(i) for i in dev.port_numbers) + return f"{dev.bus}-{dev_path}" + return "" + + +HEXDUMP_FILTER = "".join(chr(x).isprintable() and chr(x) or "." for x in range(128)) + "." * 128 + + +class Forwarder: + @staticmethod + def _log_hexdump(data): + if not logging.root.isEnabledFor(logging.TRACE): + return + L = 16 + for c in range(0, len(data), L): + chars = data[c : c + L] + dump = " ".join(f"{x:02x}" for x in chars) + printable = "".join(HEXDUMP_FILTER[x] for x in chars) + line = f"{c:08x} {dump:{L*3}s} |{printable:{L}s}|" + logging.root.log(logging.TRACE, "%s", line) + + def __init__(self, server, vid, pid, path): + self.stats = { + "c2s packets": 0, + "c2s bytes": 0, + "s2c packets": 0, + "s2c bytes": 0, + } + self.stats_logged = time.monotonic() + + def find_filter(dev): + dev_path = path_from_usb_dev(dev) + if path is not None: + return dev_path == path + return True + + dev = usb.core.find(idVendor=vid, idProduct=pid, custom_match=find_filter) + if dev is None: + raise ValueError("Device not found") + + logging.info(f"found device: {dev.bus}/{dev.address} located at {path_from_usb_dev(dev)}") + + # dev.set_configuration() is not necessary since g_multi has only one + usb9pfs = None + # g_multi adds 9pfs as last interface + cfg = dev.get_active_configuration() + for intf in cfg: + # we have to detach the usb-storage driver from multi gadget since + # stall option could be set, which will lead to spontaneous port + # resets and our transfers will run dead + if intf.bInterfaceClass == 0x08: + if dev.is_kernel_driver_active(intf.bInterfaceNumber): + dev.detach_kernel_driver(intf.bInterfaceNumber) + + if intf.bInterfaceClass == 0xFF and intf.bInterfaceSubClass == 0xFF and intf.bInterfaceProtocol == 0x09: + usb9pfs = intf + if usb9pfs is None: + raise ValueError("Interface not found") + + logging.info(f"claiming interface:\n{usb9pfs}") + usb.util.claim_interface(dev, usb9pfs.bInterfaceNumber) + ep_out = usb.util.find_descriptor( + usb9pfs, + custom_match=lambda e: usb.util.endpoint_direction(e.bEndpointAddress) == usb.util.ENDPOINT_OUT, + ) + assert ep_out is not None + ep_in = usb.util.find_descriptor( + usb9pfs, + custom_match=lambda e: usb.util.endpoint_direction(e.bEndpointAddress) == usb.util.ENDPOINT_IN, + ) + assert ep_in is not None + logging.info("interface claimed") + + self.ep_out = ep_out + self.ep_in = ep_in + self.dev = dev + + # create and connect socket + self.s = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + self.s.connect(server) + + logging.info("connected to server") + + def c2s(self): + """forward a request from the USB client to the TCP server""" + data = None + while data is None: + try: + logging.log(logging.TRACE, "c2s: reading") + data = self.ep_in.read(self.ep_in.wMaxPacketSize) + except usb.core.USBTimeoutError: + logging.log(logging.TRACE, "c2s: reading timed out") + continue + except usb.core.USBError as e: + if e.errno == errno.EIO: + logging.debug("c2s: reading failed with %s, retrying", repr(e)) + time.sleep(0.5) + continue + logging.error("c2s: reading failed with %s, aborting", repr(e)) + raise + size = struct.unpack("<I", data[:4])[0] + while len(data) < size: + data += self.ep_in.read(size - len(data)) + logging.log(logging.TRACE, "c2s: writing") + self._log_hexdump(data) + self.s.send(data) + logging.debug("c2s: forwarded %i bytes", size) + self.stats["c2s packets"] += 1 + self.stats["c2s bytes"] += size + + def s2c(self): + """forward a response from the TCP server to the USB client""" + logging.log(logging.TRACE, "s2c: reading") + data = self.s.recv(4) + size = struct.unpack("<I", data[:4])[0] + while len(data) < size: + data += self.s.recv(size - len(data)) + logging.log(logging.TRACE, "s2c: writing") + self._log_hexdump(data) + while data: + written = self.ep_out.write(data) + assert written > 0 + data = data[written:] + if size % self.ep_out.wMaxPacketSize == 0: + logging.log(logging.TRACE, "sending zero length packet") + self.ep_out.write(b"") + logging.debug("s2c: forwarded %i bytes", size) + self.stats["s2c packets"] += 1 + self.stats["s2c bytes"] += size + + def log_stats(self): + logging.info("statistics:") + for k, v in self.stats.items(): + logging.info(f" {k+':':14s} {v}") + + def log_stats_interval(self, interval=5): + if (time.monotonic() - self.stats_logged) < interval: + return + + self.log_stats() + self.stats_logged = time.monotonic() + + +def try_get_usb_str(dev, name): + try: + with open(f"/sys/bus/usb/devices/{dev.bus}-{dev.address}/{name}") as f: + return f.read().strip() + except FileNotFoundError: + return None + + +def list_usb(args): + vid, pid = [int(x, 16) for x in args.id.split(":", 1)] + + print("Bus | Addr | Manufacturer | Product | ID | Path") + print("--- | ---- | ---------------- | ---------------- | --------- | ----") + for dev in usb.core.find(find_all=True, idVendor=vid, idProduct=pid): + path = path_from_usb_dev(dev) or "" + manufacturer = try_get_usb_str(dev, "manufacturer") or "unknown" + product = try_get_usb_str(dev, "product") or "unknown" + print( + f"{dev.bus:3} | {dev.address:4} | {manufacturer:16} | {product:16} | {dev.idVendor:04x}:{dev.idProduct:04x} | {path:18}" + ) + + +def connect(args): + vid, pid = [int(x, 16) for x in args.id.split(":", 1)] + + f = Forwarder(server=(args.server, args.port), vid=vid, pid=pid, path=args.path) + + try: + while True: + f.c2s() + f.s2c() + f.log_stats_interval() + finally: + f.log_stats() + + +def main(): + parser = argparse.ArgumentParser( + description="Forward 9PFS requests from USB to TCP", + ) + + parser.add_argument("--id", type=str, default="1d6b:0109", help="vid:pid of target device") + parser.add_argument("--path", type=str, required=False, help="path of target device") + parser.add_argument("-v", "--verbose", action="count", default=0) + + subparsers = parser.add_subparsers() + subparsers.required = True + subparsers.dest = "command" + + parser_list = subparsers.add_parser("list", help="List all connected 9p gadgets") + parser_list.set_defaults(func=list_usb) + + parser_connect = subparsers.add_parser( + "connect", help="Forward messages between the usb9pfs gadget and the 9p server" + ) + parser_connect.set_defaults(func=connect) + connect_group = parser_connect.add_argument_group() + connect_group.required = True + parser_connect.add_argument("-s", "--server", type=str, default="127.0.0.1", help="server hostname") + parser_connect.add_argument("-p", "--port", type=int, default=564, help="server port") + + args = parser.parse_args() + + logging.TRACE = logging.DEBUG - 5 + logging.addLevelName(logging.TRACE, "TRACE") + + if args.verbose >= 2: + level = logging.TRACE + elif args.verbose: + level = logging.DEBUG + else: + level = logging.INFO + logging.basicConfig(level=level, format="%(asctime)-15s %(levelname)-8s %(message)s") + + args.func(args) + + +if __name__ == "__main__": + main() diff --git a/tools/virtio/ringtest/main.c b/tools/virtio/ringtest/main.c index 5a18b2301a63..e471d8e7cfaa 100644 --- a/tools/virtio/ringtest/main.c +++ b/tools/virtio/ringtest/main.c @@ -276,7 +276,7 @@ static void help(void) fprintf(stderr, "Usage: <test> [--help]" " [--host-affinity H]" " [--guest-affinity G]" - " [--ring-size R (default: %d)]" + " [--ring-size R (default: %u)]" " [--run-cycles C (default: %d)]" " [--batch b]" " [--outstanding o]" |