diff options
Diffstat (limited to 'tools')
479 files changed, 24824 insertions, 3425 deletions
diff --git a/tools/arch/arm64/include/uapi/asm/kvm.h b/tools/arch/arm64/include/uapi/asm/kvm.h index af9d9acaf997..ed5f3892674c 100644 --- a/tools/arch/arm64/include/uapi/asm/kvm.h +++ b/tools/arch/arm64/include/uapi/asm/kvm.h @@ -431,10 +431,11 @@ enum { /* Device Control API on vcpu fd */ #define KVM_ARM_VCPU_PMU_V3_CTRL 0 -#define KVM_ARM_VCPU_PMU_V3_IRQ 0 -#define KVM_ARM_VCPU_PMU_V3_INIT 1 -#define KVM_ARM_VCPU_PMU_V3_FILTER 2 -#define KVM_ARM_VCPU_PMU_V3_SET_PMU 3 +#define KVM_ARM_VCPU_PMU_V3_IRQ 0 +#define KVM_ARM_VCPU_PMU_V3_INIT 1 +#define KVM_ARM_VCPU_PMU_V3_FILTER 2 +#define KVM_ARM_VCPU_PMU_V3_SET_PMU 3 +#define KVM_ARM_VCPU_PMU_V3_SET_NR_COUNTERS 4 #define KVM_ARM_VCPU_TIMER_CTRL 1 #define KVM_ARM_VCPU_TIMER_IRQ_VTIMER 0 #define KVM_ARM_VCPU_TIMER_IRQ_PTIMER 1 diff --git a/tools/arch/loongarch/include/asm/orc_types.h b/tools/arch/loongarch/include/asm/orc_types.h index caf1f71a1057..d5fa98d1d177 100644 --- a/tools/arch/loongarch/include/asm/orc_types.h +++ b/tools/arch/loongarch/include/asm/orc_types.h @@ -34,7 +34,7 @@ #define ORC_TYPE_REGS 3 #define ORC_TYPE_REGS_PARTIAL 4 -#ifndef __ASSEMBLY__ +#ifndef __ASSEMBLER__ /* * This struct is more or less a vastly simplified version of the DWARF Call * Frame Information standard. It contains only the necessary parts of DWARF @@ -53,6 +53,6 @@ struct orc_entry { unsigned int type:3; unsigned int signal:1; }; -#endif /* __ASSEMBLY__ */ +#endif /* __ASSEMBLER__ */ #endif /* _ORC_TYPES_H */ diff --git a/tools/arch/x86/include/asm/amd/ibs.h b/tools/arch/x86/include/asm/amd/ibs.h index 300b6e0765b2..cbce54fec7b9 100644 --- a/tools/arch/x86/include/asm/amd/ibs.h +++ b/tools/arch/x86/include/asm/amd/ibs.h @@ -1,4 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_AMD_IBS_H +#define _ASM_X86_AMD_IBS_H + /* * From PPR Vol 1 for AMD Family 19h Model 01h B1 * 55898 Rev 0.35 - Feb 5, 2021 @@ -151,3 +154,5 @@ struct perf_ibs_data { }; u64 regs[MSR_AMD64_IBS_REG_COUNT_MAX]; }; + +#endif /* _ASM_X86_AMD_IBS_H */ diff --git a/tools/arch/x86/include/asm/cpufeatures.h b/tools/arch/x86/include/asm/cpufeatures.h index e02be2962a01..ee176236c2be 100644 --- a/tools/arch/x86/include/asm/cpufeatures.h +++ b/tools/arch/x86/include/asm/cpufeatures.h @@ -336,7 +336,7 @@ #define X86_FEATURE_AMD_IBRS (13*32+14) /* Indirect Branch Restricted Speculation */ #define X86_FEATURE_AMD_STIBP (13*32+15) /* Single Thread Indirect Branch Predictors */ #define X86_FEATURE_AMD_STIBP_ALWAYS_ON (13*32+17) /* Single Thread Indirect Branch Predictors always-on preferred */ -#define X86_FEATURE_AMD_IBRS_SAME_MODE (13*32+19) /* Indirect Branch Restricted Speculation same mode protection*/ +#define X86_FEATURE_AMD_IBRS_SAME_MODE (13*32+19) /* Indirect Branch Restricted Speculation same mode protection*/ #define X86_FEATURE_AMD_PPIN (13*32+23) /* "amd_ppin" Protected Processor Inventory Number */ #define X86_FEATURE_AMD_SSBD (13*32+24) /* Speculative Store Bypass Disable */ #define X86_FEATURE_VIRT_SSBD (13*32+25) /* "virt_ssbd" Virtualized Speculative Store Bypass Disable */ @@ -379,6 +379,7 @@ #define X86_FEATURE_V_SPEC_CTRL (15*32+20) /* "v_spec_ctrl" Virtual SPEC_CTRL */ #define X86_FEATURE_VNMI (15*32+25) /* "vnmi" Virtual NMI */ #define X86_FEATURE_SVME_ADDR_CHK (15*32+28) /* SVME addr check */ +#define X86_FEATURE_BUS_LOCK_THRESHOLD (15*32+29) /* Bus lock threshold */ #define X86_FEATURE_IDLE_HLT (15*32+30) /* IDLE HLT intercept */ /* Intel-defined CPU features, CPUID level 0x00000007:0 (ECX), word 16 */ @@ -447,6 +448,7 @@ #define X86_FEATURE_DEBUG_SWAP (19*32+14) /* "debug_swap" SEV-ES full debug state swap support */ #define X86_FEATURE_RMPREAD (19*32+21) /* RMPREAD instruction */ #define X86_FEATURE_SEGMENTED_RMP (19*32+23) /* Segmented RMP support */ +#define X86_FEATURE_ALLOWED_SEV_FEATURES (19*32+27) /* Allowed SEV Features */ #define X86_FEATURE_SVSM (19*32+28) /* "svsm" SVSM present */ #define X86_FEATURE_HV_INUSE_WR_ALLOWED (19*32+30) /* Allow Write to in-use hypervisor-owned pages */ @@ -458,6 +460,7 @@ #define X86_FEATURE_AUTOIBRS (20*32+ 8) /* Automatic IBRS */ #define X86_FEATURE_NO_SMM_CTL_MSR (20*32+ 9) /* SMM_CTL MSR is not present */ +#define X86_FEATURE_PREFETCHI (20*32+20) /* Prefetch Data/Instruction to Cache Level */ #define X86_FEATURE_SBPB (20*32+27) /* Selective Branch Prediction Barrier */ #define X86_FEATURE_IBPB_BRTYPE (20*32+28) /* MSR_PRED_CMD[IBPB] flushes all branch type predictions */ #define X86_FEATURE_SRSO_NO (20*32+29) /* CPU is not affected by SRSO */ @@ -482,7 +485,8 @@ #define X86_FEATURE_AMD_HTR_CORES (21*32+ 6) /* Heterogeneous Core Topology */ #define X86_FEATURE_AMD_WORKLOAD_CLASS (21*32+ 7) /* Workload Classification */ #define X86_FEATURE_PREFER_YMM (21*32+ 8) /* Avoid ZMM registers due to downclocking */ -#define X86_FEATURE_INDIRECT_THUNK_ITS (21*32+ 9) /* Use thunk for indirect branches in lower half of cacheline */ +#define X86_FEATURE_APX (21*32+ 9) /* Advanced Performance Extensions */ +#define X86_FEATURE_INDIRECT_THUNK_ITS (21*32+10) /* Use thunk for indirect branches in lower half of cacheline */ /* * BUG word(s) @@ -535,6 +539,8 @@ #define X86_BUG_BHI X86_BUG( 1*32+ 3) /* "bhi" CPU is affected by Branch History Injection */ #define X86_BUG_IBPB_NO_RET X86_BUG( 1*32+ 4) /* "ibpb_no_ret" IBPB omits return target predictions */ #define X86_BUG_SPECTRE_V2_USER X86_BUG( 1*32+ 5) /* "spectre_v2_user" CPU is affected by Spectre variant 2 attack between user processes */ -#define X86_BUG_ITS X86_BUG( 1*32+ 6) /* "its" CPU is affected by Indirect Target Selection */ -#define X86_BUG_ITS_NATIVE_ONLY X86_BUG( 1*32+ 7) /* "its_native_only" CPU is affected by ITS, VMX is not affected */ +#define X86_BUG_OLD_MICROCODE X86_BUG( 1*32+ 6) /* "old_microcode" CPU has old microcode, it is surely vulnerable to something */ +#define X86_BUG_ITS X86_BUG( 1*32+ 7) /* "its" CPU is affected by Indirect Target Selection */ +#define X86_BUG_ITS_NATIVE_ONLY X86_BUG( 1*32+ 8) /* "its_native_only" CPU is affected by ITS, VMX is not affected */ + #endif /* _ASM_X86_CPUFEATURES_H */ diff --git a/tools/arch/x86/include/asm/msr-index.h b/tools/arch/x86/include/asm/msr-index.h index e7d2f460fcc6..5cfb5d74dd5f 100644 --- a/tools/arch/x86/include/asm/msr-index.h +++ b/tools/arch/x86/include/asm/msr-index.h @@ -533,7 +533,7 @@ #define MSR_HWP_CAPABILITIES 0x00000771 #define MSR_HWP_REQUEST_PKG 0x00000772 #define MSR_HWP_INTERRUPT 0x00000773 -#define MSR_HWP_REQUEST 0x00000774 +#define MSR_HWP_REQUEST 0x00000774 #define MSR_HWP_STATUS 0x00000777 /* CPUID.6.EAX */ @@ -550,16 +550,16 @@ #define HWP_LOWEST_PERF(x) (((x) >> 24) & 0xff) /* IA32_HWP_REQUEST */ -#define HWP_MIN_PERF(x) (x & 0xff) -#define HWP_MAX_PERF(x) ((x & 0xff) << 8) +#define HWP_MIN_PERF(x) (x & 0xff) +#define HWP_MAX_PERF(x) ((x & 0xff) << 8) #define HWP_DESIRED_PERF(x) ((x & 0xff) << 16) -#define HWP_ENERGY_PERF_PREFERENCE(x) (((unsigned long long) x & 0xff) << 24) +#define HWP_ENERGY_PERF_PREFERENCE(x) (((u64)x & 0xff) << 24) #define HWP_EPP_PERFORMANCE 0x00 #define HWP_EPP_BALANCE_PERFORMANCE 0x80 #define HWP_EPP_BALANCE_POWERSAVE 0xC0 #define HWP_EPP_POWERSAVE 0xFF -#define HWP_ACTIVITY_WINDOW(x) ((unsigned long long)(x & 0xff3) << 32) -#define HWP_PACKAGE_CONTROL(x) ((unsigned long long)(x & 0x1) << 42) +#define HWP_ACTIVITY_WINDOW(x) ((u64)(x & 0xff3) << 32) +#define HWP_PACKAGE_CONTROL(x) ((u64)(x & 0x1) << 42) /* IA32_HWP_STATUS */ #define HWP_GUARANTEED_CHANGE(x) (x & 0x1) @@ -602,7 +602,11 @@ /* V6 PMON MSR range */ #define MSR_IA32_PMC_V6_GP0_CTR 0x1900 #define MSR_IA32_PMC_V6_GP0_CFG_A 0x1901 +#define MSR_IA32_PMC_V6_GP0_CFG_B 0x1902 +#define MSR_IA32_PMC_V6_GP0_CFG_C 0x1903 #define MSR_IA32_PMC_V6_FX0_CTR 0x1980 +#define MSR_IA32_PMC_V6_FX0_CFG_B 0x1982 +#define MSR_IA32_PMC_V6_FX0_CFG_C 0x1983 #define MSR_IA32_PMC_V6_STEP 4 /* KeyID partitioning between MKTME and TDX */ @@ -624,6 +628,7 @@ #define MSR_AMD64_OSVW_STATUS 0xc0010141 #define MSR_AMD_PPIN_CTL 0xc00102f0 #define MSR_AMD_PPIN 0xc00102f1 +#define MSR_AMD64_CPUID_FN_7 0xc0011002 #define MSR_AMD64_CPUID_FN_1 0xc0011004 #define MSR_AMD64_LS_CFG 0xc0011020 #define MSR_AMD64_DC_CFG 0xc0011022 diff --git a/tools/arch/x86/include/uapi/asm/kvm.h b/tools/arch/x86/include/uapi/asm/kvm.h index b663d916f162..6f3499507c5e 100644 --- a/tools/arch/x86/include/uapi/asm/kvm.h +++ b/tools/arch/x86/include/uapi/asm/kvm.h @@ -441,6 +441,7 @@ struct kvm_sync_regs { #define KVM_X86_QUIRK_MWAIT_NEVER_UD_FAULTS (1 << 6) #define KVM_X86_QUIRK_SLOT_ZAP_ALL (1 << 7) #define KVM_X86_QUIRK_STUFF_FEATURE_MSRS (1 << 8) +#define KVM_X86_QUIRK_IGNORE_GUEST_PAT (1 << 9) #define KVM_STATE_NESTED_FORMAT_VMX 0 #define KVM_STATE_NESTED_FORMAT_SVM 1 @@ -931,4 +932,74 @@ struct kvm_hyperv_eventfd { #define KVM_X86_SNP_VM 4 #define KVM_X86_TDX_VM 5 +/* Trust Domain eXtension sub-ioctl() commands. */ +enum kvm_tdx_cmd_id { + KVM_TDX_CAPABILITIES = 0, + KVM_TDX_INIT_VM, + KVM_TDX_INIT_VCPU, + KVM_TDX_INIT_MEM_REGION, + KVM_TDX_FINALIZE_VM, + KVM_TDX_GET_CPUID, + + KVM_TDX_CMD_NR_MAX, +}; + +struct kvm_tdx_cmd { + /* enum kvm_tdx_cmd_id */ + __u32 id; + /* flags for sub-commend. If sub-command doesn't use this, set zero. */ + __u32 flags; + /* + * data for each sub-command. An immediate or a pointer to the actual + * data in process virtual address. If sub-command doesn't use it, + * set zero. + */ + __u64 data; + /* + * Auxiliary error code. The sub-command may return TDX SEAMCALL + * status code in addition to -Exxx. + */ + __u64 hw_error; +}; + +struct kvm_tdx_capabilities { + __u64 supported_attrs; + __u64 supported_xfam; + __u64 reserved[254]; + + /* Configurable CPUID bits for userspace */ + struct kvm_cpuid2 cpuid; +}; + +struct kvm_tdx_init_vm { + __u64 attributes; + __u64 xfam; + __u64 mrconfigid[6]; /* sha384 digest */ + __u64 mrowner[6]; /* sha384 digest */ + __u64 mrownerconfig[6]; /* sha384 digest */ + + /* The total space for TD_PARAMS before the CPUIDs is 256 bytes */ + __u64 reserved[12]; + + /* + * Call KVM_TDX_INIT_VM before vcpu creation, thus before + * KVM_SET_CPUID2. + * This configuration supersedes KVM_SET_CPUID2s for VCPUs because the + * TDX module directly virtualizes those CPUIDs without VMM. The user + * space VMM, e.g. qemu, should make KVM_SET_CPUID2 consistent with + * those values. If it doesn't, KVM may have wrong idea of vCPUIDs of + * the guest, and KVM may wrongly emulate CPUIDs or MSRs that the TDX + * module doesn't virtualize. + */ + struct kvm_cpuid2 cpuid; +}; + +#define KVM_TDX_MEASURE_MEMORY_REGION _BITULL(0) + +struct kvm_tdx_init_mem_region { + __u64 source_addr; + __u64 gpa; + __u64 nr_pages; +}; + #endif /* _ASM_X86_KVM_H */ diff --git a/tools/arch/x86/include/uapi/asm/svm.h b/tools/arch/x86/include/uapi/asm/svm.h index ec1321248dac..9c640a521a67 100644 --- a/tools/arch/x86/include/uapi/asm/svm.h +++ b/tools/arch/x86/include/uapi/asm/svm.h @@ -95,6 +95,7 @@ #define SVM_EXIT_CR14_WRITE_TRAP 0x09e #define SVM_EXIT_CR15_WRITE_TRAP 0x09f #define SVM_EXIT_INVPCID 0x0a2 +#define SVM_EXIT_BUS_LOCK 0x0a5 #define SVM_EXIT_IDLE_HLT 0x0a6 #define SVM_EXIT_NPF 0x400 #define SVM_EXIT_AVIC_INCOMPLETE_IPI 0x401 @@ -225,6 +226,7 @@ { SVM_EXIT_CR4_WRITE_TRAP, "write_cr4_trap" }, \ { SVM_EXIT_CR8_WRITE_TRAP, "write_cr8_trap" }, \ { SVM_EXIT_INVPCID, "invpcid" }, \ + { SVM_EXIT_BUS_LOCK, "buslock" }, \ { SVM_EXIT_IDLE_HLT, "idle-halt" }, \ { SVM_EXIT_NPF, "npf" }, \ { SVM_EXIT_AVIC_INCOMPLETE_IPI, "avic_incomplete_ipi" }, \ diff --git a/tools/arch/x86/include/uapi/asm/vmx.h b/tools/arch/x86/include/uapi/asm/vmx.h index a5faf6d88f1b..f0f4a4cf84a7 100644 --- a/tools/arch/x86/include/uapi/asm/vmx.h +++ b/tools/arch/x86/include/uapi/asm/vmx.h @@ -34,6 +34,7 @@ #define EXIT_REASON_TRIPLE_FAULT 2 #define EXIT_REASON_INIT_SIGNAL 3 #define EXIT_REASON_SIPI_SIGNAL 4 +#define EXIT_REASON_OTHER_SMI 6 #define EXIT_REASON_INTERRUPT_WINDOW 7 #define EXIT_REASON_NMI_WINDOW 8 @@ -92,6 +93,7 @@ #define EXIT_REASON_TPAUSE 68 #define EXIT_REASON_BUS_LOCK 74 #define EXIT_REASON_NOTIFY 75 +#define EXIT_REASON_TDCALL 77 #define VMX_EXIT_REASONS \ { EXIT_REASON_EXCEPTION_NMI, "EXCEPTION_NMI" }, \ @@ -155,7 +157,8 @@ { EXIT_REASON_UMWAIT, "UMWAIT" }, \ { EXIT_REASON_TPAUSE, "TPAUSE" }, \ { EXIT_REASON_BUS_LOCK, "BUS_LOCK" }, \ - { EXIT_REASON_NOTIFY, "NOTIFY" } + { EXIT_REASON_NOTIFY, "NOTIFY" }, \ + { EXIT_REASON_TDCALL, "TDCALL" } #define VMX_EXIT_REASON_FLAGS \ { VMX_EXIT_REASONS_FAILED_VMENTRY, "FAILED_VMENTRY" } diff --git a/tools/arch/x86/lib/memcpy_64.S b/tools/arch/x86/lib/memcpy_64.S index 59cf6f9065aa..ccc3d923fc1e 100644 --- a/tools/arch/x86/lib/memcpy_64.S +++ b/tools/arch/x86/lib/memcpy_64.S @@ -40,6 +40,7 @@ SYM_FUNC_END(__memcpy) EXPORT_SYMBOL(__memcpy) SYM_FUNC_ALIAS_MEMFUNC(memcpy, __memcpy) +SYM_PIC_ALIAS(memcpy) EXPORT_SYMBOL(memcpy) SYM_FUNC_START_LOCAL(memcpy_orig) diff --git a/tools/arch/x86/lib/memset_64.S b/tools/arch/x86/lib/memset_64.S index d66b710d628f..fb5a03cf5ab7 100644 --- a/tools/arch/x86/lib/memset_64.S +++ b/tools/arch/x86/lib/memset_64.S @@ -42,6 +42,7 @@ SYM_FUNC_END(__memset) EXPORT_SYMBOL(__memset) SYM_FUNC_ALIAS_MEMFUNC(memset, __memset) +SYM_PIC_ALIAS(memset) EXPORT_SYMBOL(memset) SYM_FUNC_START_LOCAL(memset_orig) diff --git a/tools/bootconfig/main.c b/tools/bootconfig/main.c index 8a48cc2536f5..57c669d2aa90 100644 --- a/tools/bootconfig/main.c +++ b/tools/bootconfig/main.c @@ -11,11 +11,16 @@ #include <string.h> #include <errno.h> #include <endian.h> +#include <assert.h> #include <linux/bootconfig.h> #define pr_err(fmt, ...) fprintf(stderr, fmt, ##__VA_ARGS__) +/* Bootconfig footer is [size][csum][BOOTCONFIG_MAGIC]. */ +#define BOOTCONFIG_FOOTER_SIZE \ + (sizeof(uint32_t) * 2 + BOOTCONFIG_MAGIC_LEN) + static int xbc_show_value(struct xbc_node *node, bool semicolon) { const char *val, *eol; @@ -185,7 +190,7 @@ static int load_xbc_from_initrd(int fd, char **buf) if (ret < 0) return -errno; - if (stat.st_size < 8 + BOOTCONFIG_MAGIC_LEN) + if (stat.st_size < BOOTCONFIG_FOOTER_SIZE) return 0; if (lseek(fd, -BOOTCONFIG_MAGIC_LEN, SEEK_END) < 0) @@ -198,7 +203,7 @@ static int load_xbc_from_initrd(int fd, char **buf) if (memcmp(magic, BOOTCONFIG_MAGIC, BOOTCONFIG_MAGIC_LEN) != 0) return 0; - if (lseek(fd, -(8 + BOOTCONFIG_MAGIC_LEN), SEEK_END) < 0) + if (lseek(fd, -BOOTCONFIG_FOOTER_SIZE, SEEK_END) < 0) return pr_errno("Failed to lseek for size", -errno); if (read(fd, &size, sizeof(uint32_t)) < 0) @@ -210,12 +215,12 @@ static int load_xbc_from_initrd(int fd, char **buf) csum = le32toh(csum); /* Wrong size error */ - if (stat.st_size < size + 8 + BOOTCONFIG_MAGIC_LEN) { + if (stat.st_size < size + BOOTCONFIG_FOOTER_SIZE) { pr_err("bootconfig size is too big\n"); return -E2BIG; } - if (lseek(fd, stat.st_size - (size + 8 + BOOTCONFIG_MAGIC_LEN), + if (lseek(fd, stat.st_size - (size + BOOTCONFIG_FOOTER_SIZE), SEEK_SET) < 0) return pr_errno("Failed to lseek", -errno); @@ -346,7 +351,7 @@ static int delete_xbc(const char *path) ret = fstat(fd, &stat); if (!ret) ret = ftruncate(fd, stat.st_size - - size - 8 - BOOTCONFIG_MAGIC_LEN); + - size - BOOTCONFIG_FOOTER_SIZE); if (ret) ret = -errno; } /* Ignore if there is no boot config in initrd */ @@ -359,7 +364,12 @@ static int delete_xbc(const char *path) static int apply_xbc(const char *path, const char *xbc_path) { - char *buf, *data, *p; + struct { + uint32_t size; + uint32_t csum; + char magic[BOOTCONFIG_MAGIC_LEN]; + } footer; + char *buf, *data; size_t total_size; struct stat stat; const char *msg; @@ -376,8 +386,7 @@ static int apply_xbc(const char *path, const char *xbc_path) csum = xbc_calc_checksum(buf, size); /* Backup the bootconfig data */ - data = calloc(size + BOOTCONFIG_ALIGN + - sizeof(uint32_t) + sizeof(uint32_t) + BOOTCONFIG_MAGIC_LEN, 1); + data = calloc(size + BOOTCONFIG_ALIGN + BOOTCONFIG_FOOTER_SIZE, 1); if (!data) return -ENOMEM; memcpy(data, buf, size); @@ -425,22 +434,18 @@ static int apply_xbc(const char *path, const char *xbc_path) } /* To align up the total size to BOOTCONFIG_ALIGN, get padding size */ - total_size = stat.st_size + size + sizeof(uint32_t) * 2 + BOOTCONFIG_MAGIC_LEN; + total_size = stat.st_size + size + BOOTCONFIG_FOOTER_SIZE; pad = ((total_size + BOOTCONFIG_ALIGN - 1) & (~BOOTCONFIG_ALIGN_MASK)) - total_size; size += pad; /* Add a footer */ - p = data + size; - *(uint32_t *)p = htole32(size); - p += sizeof(uint32_t); - - *(uint32_t *)p = htole32(csum); - p += sizeof(uint32_t); - - memcpy(p, BOOTCONFIG_MAGIC, BOOTCONFIG_MAGIC_LEN); - p += BOOTCONFIG_MAGIC_LEN; + footer.size = htole32(size); + footer.csum = htole32(csum); + memcpy(footer.magic, BOOTCONFIG_MAGIC, BOOTCONFIG_MAGIC_LEN); + static_assert(sizeof(footer) == BOOTCONFIG_FOOTER_SIZE); + memcpy(data + size, &footer, BOOTCONFIG_FOOTER_SIZE); - total_size = p - data; + total_size = size + BOOTCONFIG_FOOTER_SIZE; ret = write(fd, data, total_size); if (ret < total_size) { diff --git a/tools/bootconfig/scripts/ftrace.sh b/tools/bootconfig/scripts/ftrace.sh index 186eed923041..cc5250c64699 100644 --- a/tools/bootconfig/scripts/ftrace.sh +++ b/tools/bootconfig/scripts/ftrace.sh @@ -1,3 +1,4 @@ +#!/bin/sh # SPDX-License-Identifier: GPL-2.0-only clear_trace() { # reset trace output diff --git a/tools/bootconfig/test-bootconfig.sh b/tools/bootconfig/test-bootconfig.sh index a2c484c243f5..7594659af1e1 100755 --- a/tools/bootconfig/test-bootconfig.sh +++ b/tools/bootconfig/test-bootconfig.sh @@ -27,16 +27,16 @@ NO=1 xpass() { # pass test command echo "test case $NO ($*)... " - if ! ($@ && echo "\t\t[OK]"); then - echo "\t\t[NG]"; NG=$((NG + 1)) + if ! ($@ && printf "\t\t[OK]\n"); then + printf "\t\t[NG]\n"; NG=$((NG + 1)) fi NO=$((NO + 1)) } xfail() { # fail test command echo "test case $NO ($*)... " - if ! (! $@ && echo "\t\t[OK]"); then - echo "\t\t[NG]"; NG=$((NG + 1)) + if ! (! $@ && printf "\t\t[OK]\n"); then + printf "\t\t[NG]\n"; NG=$((NG + 1)) fi NO=$((NO + 1)) } @@ -48,13 +48,13 @@ echo "Delete command should success without bootconfig" xpass $BOOTCONF -d $INITRD dd if=/dev/zero of=$INITRD bs=4096 count=1 -echo "key = value;" > $TEMPCONF -bconf_size=$(stat -c %s $TEMPCONF) -initrd_size=$(stat -c %s $INITRD) +printf "key = value;" > $TEMPCONF +bconf_size=$(wc -c < $TEMPCONF) +initrd_size=$(wc -c < $INITRD) echo "Apply command test" xpass $BOOTCONF -a $TEMPCONF $INITRD -new_size=$(stat -c %s $INITRD) +new_size=$(wc -c < $INITRD) echo "Show command test" xpass $BOOTCONF $INITRD @@ -69,13 +69,13 @@ echo "Apply command repeat test" xpass $BOOTCONF -a $TEMPCONF $INITRD echo "File size check" -xpass test $new_size -eq $(stat -c %s $INITRD) +xpass test $new_size -eq $(wc -c < $INITRD) echo "Delete command check" xpass $BOOTCONF -d $INITRD echo "File size check" -new_size=$(stat -c %s $INITRD) +new_size=$(wc -c < $INITRD) xpass test $new_size -eq $initrd_size echo "No error messge while applying" @@ -97,19 +97,20 @@ BEGIN { ' > $TEMPCONF xpass $BOOTCONF -a $TEMPCONF $INITRD -echo "badnode" >> $TEMPCONF +printf "badnode\n" >> $TEMPCONF xfail $BOOTCONF -a $TEMPCONF $INITRD echo "Max filesize check" # Max size is 32767 (including terminal byte) -echo -n "data = \"" > $TEMPCONF +printf "data = \"" > $TEMPCONF dd if=/dev/urandom bs=768 count=32 | base64 -w0 >> $TEMPCONF -echo "\"" >> $TEMPCONF +printf "\"\n" >> $TEMPCONF xfail $BOOTCONF -a $TEMPCONF $INITRD -truncate -s 32764 $TEMPCONF -echo "\"" >> $TEMPCONF # add 2 bytes + terminal ('\"\n\0') +dd if=$TEMPCONF of=$OUTFILE bs=1 count=32764 +cp $OUTFILE $TEMPCONF +printf "\"\n" >> $TEMPCONF # add 2 bytes + terminal ('\"\n\0') xpass $BOOTCONF -a $TEMPCONF $INITRD echo "Adding same-key values" @@ -139,7 +140,7 @@ xfail grep -q "baz" $OUTFILE xpass grep -q "qux" $OUTFILE echo "Double/single quotes test" -echo "key = '\"string\"';" > $TEMPCONF +printf "key = '\"string\"';" > $TEMPCONF $BOOTCONF -a $TEMPCONF $INITRD $BOOTCONF $INITRD > $TEMPCONF cat $TEMPCONF @@ -167,8 +168,8 @@ echo > $INITRD xpass $BOOTCONF -a $TEMPCONF $INITRD $BOOTCONF $INITRD > $OUTFILE -xfail grep -q val[[:space:]] $OUTFILE -xpass grep -q val2[[:space:]] $OUTFILE +xfail grep -q 'val[[:space:]]' $OUTFILE +xpass grep -q 'val2[[:space:]]' $OUTFILE echo "=== expected failure cases ===" for i in samples/bad-* ; do diff --git a/tools/bpf/bpf_jit_disasm.c b/tools/bpf/bpf_jit_disasm.c index 1baee9e2aba9..5ab8f80e2834 100644 --- a/tools/bpf/bpf_jit_disasm.c +++ b/tools/bpf/bpf_jit_disasm.c @@ -45,6 +45,8 @@ static void get_exec_path(char *tpath, size_t size) assert(path); len = readlink(path, tpath, size); + if (len < 0) + len = 0; tpath[len] = 0; free(path); diff --git a/tools/bpf/bpftool/Documentation/bpftool-prog.rst b/tools/bpf/bpftool/Documentation/bpftool-prog.rst index da3152c16228..f69fd92df8d8 100644 --- a/tools/bpf/bpftool/Documentation/bpftool-prog.rst +++ b/tools/bpf/bpftool/Documentation/bpftool-prog.rst @@ -35,6 +35,7 @@ PROG COMMANDS | **bpftool** **prog attach** *PROG* *ATTACH_TYPE* [*MAP*] | **bpftool** **prog detach** *PROG* *ATTACH_TYPE* [*MAP*] | **bpftool** **prog tracelog** +| **bpftool** **prog tracelog** [ { **stdout** | **stderr** } *PROG* ] | **bpftool** **prog run** *PROG* **data_in** *FILE* [**data_out** *FILE* [**data_size_out** *L*]] [**ctx_in** *FILE* [**ctx_out** *FILE* [**ctx_size_out** *M*]]] [**repeat** *N*] | **bpftool** **prog profile** *PROG* [**duration** *DURATION*] *METRICs* | **bpftool** **prog help** @@ -179,6 +180,12 @@ bpftool prog tracelog purposes. For streaming data from BPF programs to user space, one can use perf events (see also **bpftool-map**\ (8)). +bpftool prog tracelog { stdout | stderr } *PROG* + Dump the BPF stream of the program. BPF programs can write to these streams + at runtime with the **bpf_stream_vprintk**\ () kfunc. The kernel may write + error messages to the standard error stream. This facility should be used + only for debugging purposes. + bpftool prog run *PROG* data_in *FILE* [data_out *FILE* [data_size_out *L*]] [ctx_in *FILE* [ctx_out *FILE* [ctx_size_out *M*]]] [repeat *N*] Run BPF program *PROG* in the kernel testing infrastructure for BPF, meaning that the program works on the data and context provided by the diff --git a/tools/bpf/bpftool/bash-completion/bpftool b/tools/bpf/bpftool/bash-completion/bpftool index 27512feb5c70..a759ba24471d 100644 --- a/tools/bpf/bpftool/bash-completion/bpftool +++ b/tools/bpf/bpftool/bash-completion/bpftool @@ -518,7 +518,21 @@ _bpftool() esac ;; tracelog) - return 0 + case $prev in + $command) + COMPREPLY+=( $( compgen -W "stdout stderr" -- \ + "$cur" ) ) + return 0 + ;; + stdout|stderr) + COMPREPLY=( $( compgen -W "$PROG_TYPE" -- \ + "$cur" ) ) + return 0 + ;; + *) + return 0 + ;; + esac ;; profile) case $cword in diff --git a/tools/bpf/bpftool/btf.c b/tools/bpf/bpftool/btf.c index 6b14cbfa58aa..946612029dee 100644 --- a/tools/bpf/bpftool/btf.c +++ b/tools/bpf/bpftool/btf.c @@ -905,7 +905,8 @@ static int do_dump(int argc, char **argv) return -1; } - fd = map_parse_fd_and_info(&argc, &argv, &info, &len); + fd = map_parse_fd_and_info(&argc, &argv, &info, &len, + BPF_F_RDONLY); if (fd < 0) return -1; @@ -1118,10 +1119,13 @@ build_btf_type_table(struct hashmap *tab, enum bpf_obj_type type, [BPF_OBJ_PROG] = "prog", [BPF_OBJ_MAP] = "map", }; + LIBBPF_OPTS(bpf_get_fd_by_id_opts, opts_ro); __u32 btf_id, id = 0; int err; int fd; + opts_ro.open_flags = BPF_F_RDONLY; + while (true) { switch (type) { case BPF_OBJ_PROG: @@ -1151,7 +1155,7 @@ build_btf_type_table(struct hashmap *tab, enum bpf_obj_type type, fd = bpf_prog_get_fd_by_id(id); break; case BPF_OBJ_MAP: - fd = bpf_map_get_fd_by_id(id); + fd = bpf_map_get_fd_by_id_opts(id, &opts_ro); break; default: err = -1; diff --git a/tools/bpf/bpftool/common.c b/tools/bpf/bpftool/common.c index ecfa790adc13..b07317d2842f 100644 --- a/tools/bpf/bpftool/common.c +++ b/tools/bpf/bpftool/common.c @@ -4,6 +4,7 @@ #ifndef _GNU_SOURCE #define _GNU_SOURCE #endif +#include <assert.h> #include <ctype.h> #include <errno.h> #include <fcntl.h> @@ -193,7 +194,8 @@ int mount_tracefs(const char *target) return err; } -int open_obj_pinned(const char *path, bool quiet) +int open_obj_pinned(const char *path, bool quiet, + const struct bpf_obj_get_opts *opts) { char *pname; int fd = -1; @@ -205,7 +207,7 @@ int open_obj_pinned(const char *path, bool quiet) goto out_ret; } - fd = bpf_obj_get(pname); + fd = bpf_obj_get_opts(pname, opts); if (fd < 0) { if (!quiet) p_err("bpf obj get (%s): %s", pname, @@ -221,12 +223,13 @@ out_ret: return fd; } -int open_obj_pinned_any(const char *path, enum bpf_obj_type exp_type) +int open_obj_pinned_any(const char *path, enum bpf_obj_type exp_type, + const struct bpf_obj_get_opts *opts) { enum bpf_obj_type type; int fd; - fd = open_obj_pinned(path, false); + fd = open_obj_pinned(path, false, opts); if (fd < 0) return -1; @@ -555,7 +558,7 @@ static int do_build_table_cb(const char *fpath, const struct stat *sb, if (typeflag != FTW_F) goto out_ret; - fd = open_obj_pinned(fpath, true); + fd = open_obj_pinned(fpath, true, NULL); if (fd < 0) goto out_ret; @@ -928,7 +931,7 @@ int prog_parse_fds(int *argc, char ***argv, int **fds) path = **argv; NEXT_ARGP(); - (*fds)[0] = open_obj_pinned_any(path, BPF_OBJ_PROG); + (*fds)[0] = open_obj_pinned_any(path, BPF_OBJ_PROG, NULL); if ((*fds)[0] < 0) return -1; return 1; @@ -965,7 +968,8 @@ exit_free: return fd; } -static int map_fd_by_name(char *name, int **fds) +static int map_fd_by_name(char *name, int **fds, + const struct bpf_get_fd_by_id_opts *opts) { unsigned int id = 0; int fd, nb_fds = 0; @@ -973,6 +977,7 @@ static int map_fd_by_name(char *name, int **fds) int err; while (true) { + LIBBPF_OPTS(bpf_get_fd_by_id_opts, opts_ro); struct bpf_map_info info = {}; __u32 len = sizeof(info); @@ -985,7 +990,9 @@ static int map_fd_by_name(char *name, int **fds) return nb_fds; } - fd = bpf_map_get_fd_by_id(id); + /* Request a read-only fd to query the map info */ + opts_ro.open_flags = BPF_F_RDONLY; + fd = bpf_map_get_fd_by_id_opts(id, &opts_ro); if (fd < 0) { p_err("can't get map by id (%u): %s", id, strerror(errno)); @@ -1004,6 +1011,19 @@ static int map_fd_by_name(char *name, int **fds) continue; } + /* Get an fd with the requested options, if they differ + * from the read-only options used to get the fd above. + */ + if (memcmp(opts, &opts_ro, sizeof(opts_ro))) { + close(fd); + fd = bpf_map_get_fd_by_id_opts(id, opts); + if (fd < 0) { + p_err("can't get map by id (%u): %s", id, + strerror(errno)); + goto err_close_fds; + } + } + if (nb_fds > 0) { tmp = realloc(*fds, (nb_fds + 1) * sizeof(int)); if (!tmp) { @@ -1023,8 +1043,13 @@ err_close_fds: return -1; } -int map_parse_fds(int *argc, char ***argv, int **fds) +int map_parse_fds(int *argc, char ***argv, int **fds, __u32 open_flags) { + LIBBPF_OPTS(bpf_get_fd_by_id_opts, opts); + + assert((open_flags & ~BPF_F_RDONLY) == 0); + opts.open_flags = open_flags; + if (is_prefix(**argv, "id")) { unsigned int id; char *endptr; @@ -1038,7 +1063,7 @@ int map_parse_fds(int *argc, char ***argv, int **fds) } NEXT_ARGP(); - (*fds)[0] = bpf_map_get_fd_by_id(id); + (*fds)[0] = bpf_map_get_fd_by_id_opts(id, &opts); if ((*fds)[0] < 0) { p_err("get map by id (%u): %s", id, strerror(errno)); return -1; @@ -1056,16 +1081,18 @@ int map_parse_fds(int *argc, char ***argv, int **fds) } NEXT_ARGP(); - return map_fd_by_name(name, fds); + return map_fd_by_name(name, fds, &opts); } else if (is_prefix(**argv, "pinned")) { char *path; + LIBBPF_OPTS(bpf_obj_get_opts, get_opts); + get_opts.file_flags = open_flags; NEXT_ARGP(); path = **argv; NEXT_ARGP(); - (*fds)[0] = open_obj_pinned_any(path, BPF_OBJ_MAP); + (*fds)[0] = open_obj_pinned_any(path, BPF_OBJ_MAP, &get_opts); if ((*fds)[0] < 0) return -1; return 1; @@ -1075,7 +1102,7 @@ int map_parse_fds(int *argc, char ***argv, int **fds) return -1; } -int map_parse_fd(int *argc, char ***argv) +int map_parse_fd(int *argc, char ***argv, __u32 open_flags) { int *fds = NULL; int nb_fds, fd; @@ -1085,7 +1112,7 @@ int map_parse_fd(int *argc, char ***argv) p_err("mem alloc failed"); return -1; } - nb_fds = map_parse_fds(argc, argv, &fds); + nb_fds = map_parse_fds(argc, argv, &fds, open_flags); if (nb_fds != 1) { if (nb_fds > 1) { p_err("several maps match this handle"); @@ -1103,12 +1130,12 @@ exit_free: } int map_parse_fd_and_info(int *argc, char ***argv, struct bpf_map_info *info, - __u32 *info_len) + __u32 *info_len, __u32 open_flags) { int err; int fd; - fd = map_parse_fd(argc, argv); + fd = map_parse_fd(argc, argv, open_flags); if (fd < 0) return -1; diff --git a/tools/bpf/bpftool/iter.c b/tools/bpf/bpftool/iter.c index 5c39c2ed36a2..df5f0d1e07e8 100644 --- a/tools/bpf/bpftool/iter.c +++ b/tools/bpf/bpftool/iter.c @@ -37,7 +37,7 @@ static int do_pin(int argc, char **argv) return -1; } - map_fd = map_parse_fd(&argc, &argv); + map_fd = map_parse_fd(&argc, &argv, BPF_F_RDONLY); if (map_fd < 0) return -1; diff --git a/tools/bpf/bpftool/link.c b/tools/bpf/bpftool/link.c index 3535afc80a49..a773e05d5ade 100644 --- a/tools/bpf/bpftool/link.c +++ b/tools/bpf/bpftool/link.c @@ -117,7 +117,7 @@ static int link_parse_fd(int *argc, char ***argv) path = **argv; NEXT_ARGP(); - return open_obj_pinned_any(path, BPF_OBJ_LINK); + return open_obj_pinned_any(path, BPF_OBJ_LINK, NULL); } p_err("expected 'id' or 'pinned', got: '%s'?", **argv); @@ -485,6 +485,7 @@ static int show_link_close_json(int fd, struct bpf_link_info *info) case BPF_LINK_TYPE_RAW_TRACEPOINT: jsonw_string_field(json_wtr, "tp_name", u64_to_ptr(info->raw_tracepoint.tp_name)); + jsonw_uint_field(json_wtr, "cookie", info->raw_tracepoint.cookie); break; case BPF_LINK_TYPE_TRACING: err = get_prog_info(info->prog_id, &prog_info); @@ -502,6 +503,7 @@ static int show_link_close_json(int fd, struct bpf_link_info *info) json_wtr); jsonw_uint_field(json_wtr, "target_obj_id", info->tracing.target_obj_id); jsonw_uint_field(json_wtr, "target_btf_id", info->tracing.target_btf_id); + jsonw_uint_field(json_wtr, "cookie", info->tracing.cookie); break; case BPF_LINK_TYPE_CGROUP: jsonw_lluint_field(json_wtr, "cgroup_id", @@ -879,6 +881,8 @@ static int show_link_close_plain(int fd, struct bpf_link_info *info) case BPF_LINK_TYPE_RAW_TRACEPOINT: printf("\n\ttp '%s' ", (const char *)u64_to_ptr(info->raw_tracepoint.tp_name)); + if (info->raw_tracepoint.cookie) + printf("cookie %llu ", info->raw_tracepoint.cookie); break; case BPF_LINK_TYPE_TRACING: err = get_prog_info(info->prog_id, &prog_info); @@ -897,6 +901,8 @@ static int show_link_close_plain(int fd, struct bpf_link_info *info) printf("\n\ttarget_obj_id %u target_btf_id %u ", info->tracing.target_obj_id, info->tracing.target_btf_id); + if (info->tracing.cookie) + printf("\n\tcookie %llu ", info->tracing.cookie); break; case BPF_LINK_TYPE_CGROUP: printf("\n\tcgroup_id %zu ", (size_t)info->cgroup.cgroup_id); diff --git a/tools/bpf/bpftool/main.c b/tools/bpf/bpftool/main.c index cd5963cb6058..2b7f2bd3a7db 100644 --- a/tools/bpf/bpftool/main.c +++ b/tools/bpf/bpftool/main.c @@ -534,9 +534,9 @@ int main(int argc, char **argv) usage(); if (version_requested) - return do_version(argc, argv); - - ret = cmd_select(commands, argc, argv, do_help); + ret = do_version(argc, argv); + else + ret = cmd_select(commands, argc, argv, do_help); if (json_output) jsonw_destroy(&json_wtr); diff --git a/tools/bpf/bpftool/main.h b/tools/bpf/bpftool/main.h index 9eb764fe4cc8..6db704fda5c0 100644 --- a/tools/bpf/bpftool/main.h +++ b/tools/bpf/bpftool/main.h @@ -15,6 +15,7 @@ #include <bpf/hashmap.h> #include <bpf/libbpf.h> +#include <bpf/bpf.h> #include "json_writer.h" @@ -140,8 +141,10 @@ void get_prog_full_name(const struct bpf_prog_info *prog_info, int prog_fd, int get_fd_type(int fd); const char *get_fd_type_name(enum bpf_obj_type type); char *get_fdinfo(int fd, const char *key); -int open_obj_pinned(const char *path, bool quiet); -int open_obj_pinned_any(const char *path, enum bpf_obj_type exp_type); +int open_obj_pinned(const char *path, bool quiet, + const struct bpf_obj_get_opts *opts); +int open_obj_pinned_any(const char *path, enum bpf_obj_type exp_type, + const struct bpf_obj_get_opts *opts); int mount_bpffs_for_file(const char *file_name); int create_and_mount_bpffs_dir(const char *dir_name); int do_pin_any(int argc, char **argv, int (*get_fd_by_id)(int *, char ***)); @@ -167,10 +170,10 @@ int do_iter(int argc, char **argv) __weak; int parse_u32_arg(int *argc, char ***argv, __u32 *val, const char *what); int prog_parse_fd(int *argc, char ***argv); int prog_parse_fds(int *argc, char ***argv, int **fds); -int map_parse_fd(int *argc, char ***argv); -int map_parse_fds(int *argc, char ***argv, int **fds); +int map_parse_fd(int *argc, char ***argv, __u32 open_flags); +int map_parse_fds(int *argc, char ***argv, int **fds, __u32 open_flags); int map_parse_fd_and_info(int *argc, char ***argv, struct bpf_map_info *info, - __u32 *info_len); + __u32 *info_len, __u32 open_flags); struct bpf_prog_linfo; #if defined(HAVE_LLVM_SUPPORT) || defined(HAVE_LIBBFD_SUPPORT) diff --git a/tools/bpf/bpftool/map.c b/tools/bpf/bpftool/map.c index 81cc668b4b05..c9de44a45778 100644 --- a/tools/bpf/bpftool/map.c +++ b/tools/bpf/bpftool/map.c @@ -337,9 +337,9 @@ static void fill_per_cpu_value(struct bpf_map_info *info, void *value) memcpy(value + i * step, value, info->value_size); } -static int parse_elem(char **argv, struct bpf_map_info *info, - void *key, void *value, __u32 key_size, __u32 value_size, - __u32 *flags, __u32 **value_fd) +static int parse_elem(char **argv, struct bpf_map_info *info, void *key, + void *value, __u32 key_size, __u32 value_size, + __u32 *flags, __u32 **value_fd, __u32 open_flags) { if (!*argv) { if (!key && !value) @@ -362,7 +362,7 @@ static int parse_elem(char **argv, struct bpf_map_info *info, return -1; return parse_elem(argv, info, NULL, value, key_size, value_size, - flags, value_fd); + flags, value_fd, open_flags); } else if (is_prefix(*argv, "value")) { int fd; @@ -388,7 +388,7 @@ static int parse_elem(char **argv, struct bpf_map_info *info, return -1; } - fd = map_parse_fd(&argc, &argv); + fd = map_parse_fd(&argc, &argv, open_flags); if (fd < 0) return -1; @@ -424,7 +424,7 @@ static int parse_elem(char **argv, struct bpf_map_info *info, } return parse_elem(argv, info, key, NULL, key_size, value_size, - flags, NULL); + flags, NULL, open_flags); } else if (is_prefix(*argv, "any") || is_prefix(*argv, "noexist") || is_prefix(*argv, "exist")) { if (!flags) { @@ -440,7 +440,7 @@ static int parse_elem(char **argv, struct bpf_map_info *info, *flags = BPF_EXIST; return parse_elem(argv + 1, info, key, value, key_size, - value_size, NULL, value_fd); + value_size, NULL, value_fd, open_flags); } p_err("expected key or value, got: %s", *argv); @@ -639,7 +639,7 @@ static int do_show_subset(int argc, char **argv) p_err("mem alloc failed"); return -1; } - nb_fds = map_parse_fds(&argc, &argv, &fds); + nb_fds = map_parse_fds(&argc, &argv, &fds, BPF_F_RDONLY); if (nb_fds < 1) goto exit_free; @@ -672,12 +672,15 @@ exit_free: static int do_show(int argc, char **argv) { + LIBBPF_OPTS(bpf_get_fd_by_id_opts, opts); struct bpf_map_info info = {}; __u32 len = sizeof(info); __u32 id = 0; int err; int fd; + opts.open_flags = BPF_F_RDONLY; + if (show_pinned) { map_table = hashmap__new(hash_fn_for_key_as_id, equal_fn_for_key_as_id, NULL); @@ -707,7 +710,7 @@ static int do_show(int argc, char **argv) break; } - fd = bpf_map_get_fd_by_id(id); + fd = bpf_map_get_fd_by_id_opts(id, &opts); if (fd < 0) { if (errno == ENOENT) continue; @@ -909,7 +912,7 @@ static int do_dump(int argc, char **argv) p_err("mem alloc failed"); return -1; } - nb_fds = map_parse_fds(&argc, &argv, &fds); + nb_fds = map_parse_fds(&argc, &argv, &fds, BPF_F_RDONLY); if (nb_fds < 1) goto exit_free; @@ -997,7 +1000,7 @@ static int do_update(int argc, char **argv) if (argc < 2) usage(); - fd = map_parse_fd_and_info(&argc, &argv, &info, &len); + fd = map_parse_fd_and_info(&argc, &argv, &info, &len, 0); if (fd < 0) return -1; @@ -1006,7 +1009,7 @@ static int do_update(int argc, char **argv) goto exit_free; err = parse_elem(argv, &info, key, value, info.key_size, - info.value_size, &flags, &value_fd); + info.value_size, &flags, &value_fd, 0); if (err) goto exit_free; @@ -1076,7 +1079,7 @@ static int do_lookup(int argc, char **argv) if (argc < 2) usage(); - fd = map_parse_fd_and_info(&argc, &argv, &info, &len); + fd = map_parse_fd_and_info(&argc, &argv, &info, &len, BPF_F_RDONLY); if (fd < 0) return -1; @@ -1084,7 +1087,8 @@ static int do_lookup(int argc, char **argv) if (err) goto exit_free; - err = parse_elem(argv, &info, key, NULL, info.key_size, 0, NULL, NULL); + err = parse_elem(argv, &info, key, NULL, info.key_size, 0, NULL, NULL, + BPF_F_RDONLY); if (err) goto exit_free; @@ -1127,7 +1131,7 @@ static int do_getnext(int argc, char **argv) if (argc < 2) usage(); - fd = map_parse_fd_and_info(&argc, &argv, &info, &len); + fd = map_parse_fd_and_info(&argc, &argv, &info, &len, BPF_F_RDONLY); if (fd < 0) return -1; @@ -1140,8 +1144,8 @@ static int do_getnext(int argc, char **argv) } if (argc) { - err = parse_elem(argv, &info, key, NULL, info.key_size, 0, - NULL, NULL); + err = parse_elem(argv, &info, key, NULL, info.key_size, 0, NULL, + NULL, BPF_F_RDONLY); if (err) goto exit_free; } else { @@ -1198,7 +1202,7 @@ static int do_delete(int argc, char **argv) if (argc < 2) usage(); - fd = map_parse_fd_and_info(&argc, &argv, &info, &len); + fd = map_parse_fd_and_info(&argc, &argv, &info, &len, 0); if (fd < 0) return -1; @@ -1209,7 +1213,8 @@ static int do_delete(int argc, char **argv) goto exit_free; } - err = parse_elem(argv, &info, key, NULL, info.key_size, 0, NULL, NULL); + err = parse_elem(argv, &info, key, NULL, info.key_size, 0, NULL, NULL, + 0); if (err) goto exit_free; @@ -1226,11 +1231,16 @@ exit_free: return err; } +static int map_parse_read_only_fd(int *argc, char ***argv) +{ + return map_parse_fd(argc, argv, BPF_F_RDONLY); +} + static int do_pin(int argc, char **argv) { int err; - err = do_pin_any(argc, argv, map_parse_fd); + err = do_pin_any(argc, argv, map_parse_read_only_fd); if (!err && json_output) jsonw_null(json_wtr); return err; @@ -1319,7 +1329,7 @@ offload_dev: if (!REQ_ARGS(2)) usage(); inner_map_fd = map_parse_fd_and_info(&argc, &argv, - &info, &len); + &info, &len, BPF_F_RDONLY); if (inner_map_fd < 0) return -1; attr.inner_map_fd = inner_map_fd; @@ -1368,7 +1378,7 @@ static int do_pop_dequeue(int argc, char **argv) if (argc < 2) usage(); - fd = map_parse_fd_and_info(&argc, &argv, &info, &len); + fd = map_parse_fd_and_info(&argc, &argv, &info, &len, 0); if (fd < 0) return -1; @@ -1407,7 +1417,7 @@ static int do_freeze(int argc, char **argv) if (!REQ_ARGS(2)) return -1; - fd = map_parse_fd(&argc, &argv); + fd = map_parse_fd(&argc, &argv, 0); if (fd < 0) return -1; diff --git a/tools/bpf/bpftool/map_perf_ring.c b/tools/bpf/bpftool/map_perf_ring.c index 552b4ca40c27..bcb767e2d673 100644 --- a/tools/bpf/bpftool/map_perf_ring.c +++ b/tools/bpf/bpftool/map_perf_ring.c @@ -128,7 +128,8 @@ int do_event_pipe(int argc, char **argv) int err, map_fd; map_info_len = sizeof(map_info); - map_fd = map_parse_fd_and_info(&argc, &argv, &map_info, &map_info_len); + map_fd = map_parse_fd_and_info(&argc, &argv, &map_info, &map_info_len, + 0); if (map_fd < 0) return -1; diff --git a/tools/bpf/bpftool/net.c b/tools/bpf/bpftool/net.c index 64f958f437b0..cfc6f944f7c3 100644 --- a/tools/bpf/bpftool/net.c +++ b/tools/bpf/bpftool/net.c @@ -366,17 +366,18 @@ static int dump_link_nlmsg(void *cookie, void *msg, struct nlattr **tb) { struct bpf_netdev_t *netinfo = cookie; struct ifinfomsg *ifinfo = msg; + struct ip_devname_ifindex *tmp; if (netinfo->filter_idx > 0 && netinfo->filter_idx != ifinfo->ifi_index) return 0; if (netinfo->used_len == netinfo->array_len) { - netinfo->devices = realloc(netinfo->devices, - (netinfo->array_len + 16) * - sizeof(struct ip_devname_ifindex)); - if (!netinfo->devices) + tmp = realloc(netinfo->devices, + (netinfo->array_len + 16) * sizeof(struct ip_devname_ifindex)); + if (!tmp) return -ENOMEM; + netinfo->devices = tmp; netinfo->array_len += 16; } netinfo->devices[netinfo->used_len].ifindex = ifinfo->ifi_index; @@ -395,6 +396,7 @@ static int dump_class_qdisc_nlmsg(void *cookie, void *msg, struct nlattr **tb) { struct bpf_tcinfo_t *tcinfo = cookie; struct tcmsg *info = msg; + struct tc_kind_handle *tmp; if (tcinfo->is_qdisc) { /* skip clsact qdisc */ @@ -406,11 +408,12 @@ static int dump_class_qdisc_nlmsg(void *cookie, void *msg, struct nlattr **tb) } if (tcinfo->used_len == tcinfo->array_len) { - tcinfo->handle_array = realloc(tcinfo->handle_array, + tmp = realloc(tcinfo->handle_array, (tcinfo->array_len + 16) * sizeof(struct tc_kind_handle)); - if (!tcinfo->handle_array) + if (!tmp) return -ENOMEM; + tcinfo->handle_array = tmp; tcinfo->array_len += 16; } tcinfo->handle_array[tcinfo->used_len].handle = info->tcm_handle; diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c index 96eea8a67225..9722d841abc0 100644 --- a/tools/bpf/bpftool/prog.c +++ b/tools/bpf/bpftool/prog.c @@ -1062,7 +1062,7 @@ static int parse_attach_detach_args(int argc, char **argv, int *progfd, if (!REQ_ARGS(2)) return -EINVAL; - *mapfd = map_parse_fd(&argc, &argv); + *mapfd = map_parse_fd(&argc, &argv, 0); if (*mapfd < 0) return *mapfd; @@ -1113,6 +1113,52 @@ static int do_detach(int argc, char **argv) return 0; } +enum prog_tracelog_mode { + TRACE_STDOUT, + TRACE_STDERR, +}; + +static int +prog_tracelog_stream(int prog_fd, enum prog_tracelog_mode mode) +{ + FILE *file = mode == TRACE_STDOUT ? stdout : stderr; + int stream_id = mode == TRACE_STDOUT ? 1 : 2; + char buf[512]; + int ret; + + ret = 0; + do { + ret = bpf_prog_stream_read(prog_fd, stream_id, buf, sizeof(buf), NULL); + if (ret > 0) + fwrite(buf, sizeof(buf[0]), ret, file); + } while (ret > 0); + + fflush(file); + return ret ? -1 : 0; +} + +static int do_tracelog_any(int argc, char **argv) +{ + enum prog_tracelog_mode mode; + int fd; + + if (argc == 0) + return do_tracelog(argc, argv); + if (!is_prefix(*argv, "stdout") && !is_prefix(*argv, "stderr")) + usage(); + mode = is_prefix(*argv, "stdout") ? TRACE_STDOUT : TRACE_STDERR; + NEXT_ARG(); + + if (!REQ_ARGS(2)) + return -1; + + fd = prog_parse_fd(&argc, &argv); + if (fd < 0) + return -1; + + return prog_tracelog_stream(fd, mode); +} + static int check_single_stdin(char *file_data_in, char *file_ctx_in) { if (file_data_in && file_ctx_in && @@ -1608,7 +1654,7 @@ static int load_with_options(int argc, char **argv, bool first_prog_only) } NEXT_ARG(); - fd = map_parse_fd(&argc, &argv); + fd = map_parse_fd(&argc, &argv, 0); if (fd < 0) goto err_free_reuse_maps; @@ -2493,6 +2539,7 @@ static int do_help(int argc, char **argv) " [repeat N]\n" " %1$s %2$s profile PROG [duration DURATION] METRICs\n" " %1$s %2$s tracelog\n" + " %1$s %2$s tracelog { stdout | stderr } PROG\n" " %1$s %2$s help\n" "\n" " " HELP_SPEC_MAP "\n" @@ -2532,7 +2579,7 @@ static const struct cmd cmds[] = { { "loadall", do_loadall }, { "attach", do_attach }, { "detach", do_detach }, - { "tracelog", do_tracelog }, + { "tracelog", do_tracelog_any }, { "run", do_run }, { "profile", do_profile }, { 0 } diff --git a/tools/bpf/resolve_btfids/Makefile b/tools/bpf/resolve_btfids/Makefile index afbddea3a39c..ce1b556dfa90 100644 --- a/tools/bpf/resolve_btfids/Makefile +++ b/tools/bpf/resolve_btfids/Makefile @@ -17,7 +17,7 @@ endif # Overrides for the prepare step libraries. HOST_OVERRIDES := AR="$(HOSTAR)" CC="$(HOSTCC)" LD="$(HOSTLD)" ARCH="$(HOSTARCH)" \ - CROSS_COMPILE="" EXTRA_CFLAGS="$(HOSTCFLAGS)" + CROSS_COMPILE="" CLANG_CROSS_FLAGS="" EXTRA_CFLAGS="$(HOSTCFLAGS)" RM ?= rm HOSTCC ?= gcc diff --git a/tools/cgroup/memcg_slabinfo.py b/tools/cgroup/memcg_slabinfo.py index 270c28a0d098..6bf4bde77903 100644 --- a/tools/cgroup/memcg_slabinfo.py +++ b/tools/cgroup/memcg_slabinfo.py @@ -146,11 +146,11 @@ def detect_kernel_config(): def for_each_slab(prog): - PGSlab = ~prog.constant('PG_slab') + slabtype = prog.constant('PGTY_slab') for page in for_each_page(prog): try: - if page.page_type.value_() == PGSlab: + if (page.page_type.value_() >> 24) == slabtype: yield cast('struct slab *', page) except FaultError: pass diff --git a/tools/hv/hv_fcopy_uio_daemon.c b/tools/hv/hv_fcopy_uio_daemon.c index 0198321d14a2..92e8307b2a46 100644 --- a/tools/hv/hv_fcopy_uio_daemon.c +++ b/tools/hv/hv_fcopy_uio_daemon.c @@ -35,7 +35,10 @@ #define WIN8_SRV_MINOR 1 #define WIN8_SRV_VERSION (WIN8_SRV_MAJOR << 16 | WIN8_SRV_MINOR) -#define FCOPY_UIO "/sys/bus/vmbus/devices/eb765408-105f-49b6-b4aa-c123b64d17d4/uio" +#define FCOPY_DEVICE_PATH(subdir) \ + "/sys/bus/vmbus/devices/eb765408-105f-49b6-b4aa-c123b64d17d4/" #subdir +#define FCOPY_UIO_PATH FCOPY_DEVICE_PATH(uio) +#define FCOPY_CHANNELS_PATH FCOPY_DEVICE_PATH(channels) #define FCOPY_VER_COUNT 1 static const int fcopy_versions[] = { @@ -47,9 +50,62 @@ static const int fw_versions[] = { UTIL_FW_VERSION }; -#define HV_RING_SIZE 0x4000 /* 16KB ring buffer size */ +static uint32_t get_ring_buffer_size(void) +{ + char ring_path[PATH_MAX]; + DIR *dir; + struct dirent *entry; + struct stat st; + uint32_t ring_size = 0; + int retry_count = 0; + + /* Find the channel directory */ + dir = opendir(FCOPY_CHANNELS_PATH); + if (!dir) { + usleep(100 * 1000); /* Avoid race with kernel, wait 100ms and retry once */ + dir = opendir(FCOPY_CHANNELS_PATH); + if (!dir) { + syslog(LOG_ERR, "Failed to open channels directory: %s", strerror(errno)); + return 0; + } + } + +retry_once: + while ((entry = readdir(dir)) != NULL) { + if (entry->d_type == DT_DIR && strcmp(entry->d_name, ".") != 0 && + strcmp(entry->d_name, "..") != 0) { + snprintf(ring_path, sizeof(ring_path), "%s/%s/ring", + FCOPY_CHANNELS_PATH, entry->d_name); + + if (stat(ring_path, &st) == 0) { + /* + * stat returns size of Tx, Rx rings combined, + * so take half of it for individual ring size. + */ + ring_size = (uint32_t)st.st_size / 2; + syslog(LOG_INFO, "Ring buffer size from %s: %u bytes", + ring_path, ring_size); + break; + } + } + } + + if (!ring_size && retry_count == 0) { + retry_count = 1; + rewinddir(dir); + usleep(100 * 1000); /* Wait 100ms and retry once */ + goto retry_once; + } + + closedir(dir); -static unsigned char desc[HV_RING_SIZE]; + if (!ring_size) + syslog(LOG_ERR, "Could not determine ring size"); + + return ring_size; +} + +static unsigned char *desc; static int target_fd; static char target_fname[PATH_MAX]; @@ -62,8 +118,11 @@ static int hv_fcopy_create_file(char *file_name, char *path_name, __u32 flags) filesize = 0; p = path_name; - snprintf(target_fname, sizeof(target_fname), "%s/%s", - path_name, file_name); + if (snprintf(target_fname, sizeof(target_fname), "%s/%s", + path_name, file_name) >= sizeof(target_fname)) { + syslog(LOG_ERR, "target file name is too long: %s/%s", path_name, file_name); + goto done; + } /* * Check to see if the path is already in place; if not, @@ -270,7 +329,7 @@ static void wcstoutf8(char *dest, const __u16 *src, size_t dest_size) { size_t len = 0; - while (len < dest_size) { + while (len < dest_size && *src) { if (src[len] < 0x80) dest[len++] = (char)(*src++); else @@ -282,27 +341,15 @@ static void wcstoutf8(char *dest, const __u16 *src, size_t dest_size) static int hv_fcopy_start(struct hv_start_fcopy *smsg_in) { - setlocale(LC_ALL, "en_US.utf8"); - size_t file_size, path_size; - char *file_name, *path_name; - char *in_file_name = (char *)smsg_in->file_name; - char *in_path_name = (char *)smsg_in->path_name; - - file_size = wcstombs(NULL, (const wchar_t *restrict)in_file_name, 0) + 1; - path_size = wcstombs(NULL, (const wchar_t *restrict)in_path_name, 0) + 1; - - file_name = (char *)malloc(file_size * sizeof(char)); - path_name = (char *)malloc(path_size * sizeof(char)); - - if (!file_name || !path_name) { - free(file_name); - free(path_name); - syslog(LOG_ERR, "Can't allocate memory for file name and/or path name"); - return HV_E_FAIL; - } + /* + * file_name and path_name should have same length with appropriate + * member of hv_start_fcopy. + */ + char file_name[W_MAX_PATH], path_name[W_MAX_PATH]; - wcstoutf8(file_name, (__u16 *)in_file_name, file_size); - wcstoutf8(path_name, (__u16 *)in_path_name, path_size); + setlocale(LC_ALL, "en_US.utf8"); + wcstoutf8(file_name, smsg_in->file_name, W_MAX_PATH - 1); + wcstoutf8(path_name, smsg_in->path_name, W_MAX_PATH - 1); return hv_fcopy_create_file(file_name, path_name, smsg_in->copy_flags); } @@ -406,7 +453,7 @@ int main(int argc, char *argv[]) int daemonize = 1, long_index = 0, opt, ret = -EINVAL; struct vmbus_br txbr, rxbr; void *ring; - uint32_t len = HV_RING_SIZE; + uint32_t ring_size, len; char uio_name[NAME_MAX] = {0}; char uio_dev_path[PATH_MAX] = {0}; @@ -437,7 +484,20 @@ int main(int argc, char *argv[]) openlog("HV_UIO_FCOPY", 0, LOG_USER); syslog(LOG_INFO, "starting; pid is:%d", getpid()); - fcopy_get_first_folder(FCOPY_UIO, uio_name); + ring_size = get_ring_buffer_size(); + if (!ring_size) { + ret = -ENODEV; + goto exit; + } + + desc = malloc(ring_size * sizeof(unsigned char)); + if (!desc) { + syslog(LOG_ERR, "malloc failed for desc buffer"); + ret = -ENOMEM; + goto exit; + } + + fcopy_get_first_folder(FCOPY_UIO_PATH, uio_name); snprintf(uio_dev_path, sizeof(uio_dev_path), "/dev/%s", uio_name); fcopy_fd = open(uio_dev_path, O_RDWR); @@ -445,17 +505,17 @@ int main(int argc, char *argv[]) syslog(LOG_ERR, "open %s failed; error: %d %s", uio_dev_path, errno, strerror(errno)); ret = fcopy_fd; - goto exit; + goto free_desc; } - ring = vmbus_uio_map(&fcopy_fd, HV_RING_SIZE); + ring = vmbus_uio_map(&fcopy_fd, ring_size); if (!ring) { ret = errno; syslog(LOG_ERR, "mmap ringbuffer failed; error: %d %s", ret, strerror(ret)); goto close; } - vmbus_br_setup(&txbr, ring, HV_RING_SIZE); - vmbus_br_setup(&rxbr, (char *)ring + HV_RING_SIZE, HV_RING_SIZE); + vmbus_br_setup(&txbr, ring, ring_size); + vmbus_br_setup(&rxbr, (char *)ring + ring_size, ring_size); rxbr.vbr->imask = 0; @@ -472,7 +532,7 @@ int main(int argc, char *argv[]) goto close; } - len = HV_RING_SIZE; + len = ring_size; ret = rte_vmbus_chan_recv_raw(&rxbr, desc, &len); if (unlikely(ret <= 0)) { /* This indicates a failure to communicate (or worse) */ @@ -492,6 +552,8 @@ int main(int argc, char *argv[]) } close: close(fcopy_fd); +free_desc: + free(desc); exit: return ret; } diff --git a/tools/include/linux/bits.h b/tools/include/linux/bits.h index 14fd0ca9a6cd..7ad056219115 100644 --- a/tools/include/linux/bits.h +++ b/tools/include/linux/bits.h @@ -12,6 +12,7 @@ #define BIT_ULL_MASK(nr) (ULL(1) << ((nr) % BITS_PER_LONG_LONG)) #define BIT_ULL_WORD(nr) ((nr) / BITS_PER_LONG_LONG) #define BITS_PER_BYTE 8 +#define BITS_PER_TYPE(type) (sizeof(type) * BITS_PER_BYTE) /* * Create a contiguous bitmask starting at bit position @l and ending at @@ -19,16 +20,68 @@ * GENMASK_ULL(39, 21) gives us the 64bit vector 0x000000ffffe00000. */ #if !defined(__ASSEMBLY__) + +/* + * Missing asm support + * + * GENMASK_U*() and BIT_U*() depend on BITS_PER_TYPE() which relies on sizeof(), + * something not available in asm. Nevertheless, fixed width integers is a C + * concept. Assembly code can rely on the long and long long versions instead. + */ + #include <linux/build_bug.h> #include <linux/compiler.h> +#include <linux/overflow.h> + #define GENMASK_INPUT_CHECK(h, l) BUILD_BUG_ON_ZERO(const_true((l) > (h))) -#else + +/* + * Generate a mask for the specified type @t. Additional checks are made to + * guarantee the value returned fits in that type, relying on + * -Wshift-count-overflow compiler check to detect incompatible arguments. + * For example, all these create build errors or warnings: + * + * - GENMASK(15, 20): wrong argument order + * - GENMASK(72, 15): doesn't fit unsigned long + * - GENMASK_U32(33, 15): doesn't fit in a u32 + */ +#define GENMASK_TYPE(t, h, l) \ + ((t)(GENMASK_INPUT_CHECK(h, l) + \ + (type_max(t) << (l) & \ + type_max(t) >> (BITS_PER_TYPE(t) - 1 - (h))))) + +#define GENMASK_U8(h, l) GENMASK_TYPE(u8, h, l) +#define GENMASK_U16(h, l) GENMASK_TYPE(u16, h, l) +#define GENMASK_U32(h, l) GENMASK_TYPE(u32, h, l) +#define GENMASK_U64(h, l) GENMASK_TYPE(u64, h, l) + +/* + * Fixed-type variants of BIT(), with additional checks like GENMASK_TYPE(). The + * following examples generate compiler warnings due to -Wshift-count-overflow: + * + * - BIT_U8(8) + * - BIT_U32(-1) + * - BIT_U32(40) + */ +#define BIT_INPUT_CHECK(type, nr) \ + BUILD_BUG_ON_ZERO(const_true((nr) >= BITS_PER_TYPE(type))) + +#define BIT_TYPE(type, nr) ((type)(BIT_INPUT_CHECK(type, nr) + BIT_ULL(nr))) + +#define BIT_U8(nr) BIT_TYPE(u8, nr) +#define BIT_U16(nr) BIT_TYPE(u16, nr) +#define BIT_U32(nr) BIT_TYPE(u32, nr) +#define BIT_U64(nr) BIT_TYPE(u64, nr) + +#else /* defined(__ASSEMBLY__) */ + /* * BUILD_BUG_ON_ZERO is not available in h files included from asm files, * disable the input check if that is the case. */ #define GENMASK_INPUT_CHECK(h, l) 0 -#endif + +#endif /* !defined(__ASSEMBLY__) */ #define GENMASK(h, l) \ (GENMASK_INPUT_CHECK(h, l) + __GENMASK(h, l)) diff --git a/tools/include/linux/build_bug.h b/tools/include/linux/build_bug.h index b4898ff085de..ab2aa97bd8ce 100644 --- a/tools/include/linux/build_bug.h +++ b/tools/include/linux/build_bug.h @@ -4,17 +4,17 @@ #include <linux/compiler.h> -#ifdef __CHECKER__ -#define BUILD_BUG_ON_ZERO(e) (0) -#else /* __CHECKER__ */ /* * Force a compilation error if condition is true, but also produce a * result (of value 0 and type int), so the expression can be used * e.g. in a structure initializer (or where-ever else comma expressions * aren't permitted). + * + * Take an error message as an optional second argument. If omitted, + * default to the stringification of the tested expression. */ -#define BUILD_BUG_ON_ZERO(e) ((int)(sizeof(struct { int:(-!!(e)); }))) -#endif /* __CHECKER__ */ +#define BUILD_BUG_ON_ZERO(e, ...) \ + __BUILD_BUG_ON_ZERO_MSG(e, ##__VA_ARGS__, #e " is true") /* Force a compilation error if a constant expression is not a power of 2 */ #define __BUILD_BUG_ON_NOT_POWER_OF_2(n) \ diff --git a/tools/include/linux/compiler.h b/tools/include/linux/compiler.h index d627e66a04a6..33411ca0cc90 100644 --- a/tools/include/linux/compiler.h +++ b/tools/include/linux/compiler.h @@ -244,6 +244,14 @@ static __always_inline void __write_once_size(volatile void *p, void *res, int s __asm__ ("" : "=r" (var) : "0" (var)) #endif +#ifndef __BUILD_BUG_ON_ZERO_MSG +#if defined(__clang__) +#define __BUILD_BUG_ON_ZERO_MSG(e, msg, ...) ((int)(sizeof(struct { int:(-!!(e)); }))) +#else +#define __BUILD_BUG_ON_ZERO_MSG(e, msg, ...) ((int)sizeof(struct {_Static_assert(!(e), msg);})) +#endif +#endif + #endif /* __ASSEMBLY__ */ #endif /* _TOOLS_LINUX_COMPILER_H */ diff --git a/tools/include/linux/kallsyms.h b/tools/include/linux/kallsyms.h index 5a37ccbec54f..f61a01dd7eb7 100644 --- a/tools/include/linux/kallsyms.h +++ b/tools/include/linux/kallsyms.h @@ -18,6 +18,7 @@ static inline const char *kallsyms_lookup(unsigned long addr, return NULL; } +#ifdef HAVE_BACKTRACE_SUPPORT #include <execinfo.h> #include <stdlib.h> static inline void print_ip_sym(const char *loglvl, unsigned long ip) @@ -30,5 +31,8 @@ static inline void print_ip_sym(const char *loglvl, unsigned long ip) free(name); } +#else +static inline void print_ip_sym(const char *loglvl, unsigned long ip) {} +#endif #endif diff --git a/tools/include/nolibc/Makefile b/tools/include/nolibc/Makefile index c335ce0bd195..143c2d2c2ba6 100644 --- a/tools/include/nolibc/Makefile +++ b/tools/include/nolibc/Makefile @@ -23,8 +23,7 @@ else Q=@ endif -nolibc_arch := $(patsubst arm64,aarch64,$(ARCH)) -arch_file := arch-$(nolibc_arch).h +arch_file := arch-$(ARCH).h all_files := \ compiler.h \ crt.h \ @@ -91,18 +90,12 @@ help: @echo " OUTPUT = $(OUTPUT)" @echo "" -# Note: when ARCH is "x86" we concatenate both x86_64 and i386 headers: $(Q)mkdir -p $(OUTPUT)sysroot $(Q)mkdir -p $(OUTPUT)sysroot/include $(Q)cp --parents $(all_files) $(OUTPUT)sysroot/include/ - $(Q)if [ "$(ARCH)" = "x86" ]; then \ - sed -e \ - 's,^#ifndef _NOLIBC_ARCH_X86_64_H,#if !defined(_NOLIBC_ARCH_X86_64_H) \&\& defined(__x86_64__),' \ - arch-x86_64.h; \ - sed -e \ - 's,^#ifndef _NOLIBC_ARCH_I386_H,#if !defined(_NOLIBC_ARCH_I386_H) \&\& !defined(__x86_64__),' \ - arch-i386.h; \ + $(Q)if [ "$(ARCH)" = "i386" -o "$(ARCH)" = "x86_64" ]; then \ + cat arch-x86.h; \ elif [ -e "$(arch_file)" ]; then \ cat $(arch_file); \ else \ @@ -114,11 +107,8 @@ headers_standalone: headers $(Q)$(MAKE) -C $(srctree) headers $(Q)$(MAKE) -C $(srctree) headers_install INSTALL_HDR_PATH=$(OUTPUT)sysroot -# GCC uses "s390", clang "systemz" -CLANG_CROSS_FLAGS := $(subst --target=s390-linux,--target=systemz-linux,$(CLANG_CROSS_FLAGS)) - headers_check: headers_standalone - for header in $(filter-out crt.h std.h,$(all_files)); do \ + $(Q)for header in $(filter-out crt.h std.h,$(all_files)); do \ $(CC) $(CLANG_CROSS_FLAGS) -Wall -Werror -nostdinc -fsyntax-only -x c /dev/null \ -I$(or $(objtree),$(srctree))/usr/include -include $$header -include $$header || exit 1; \ done diff --git a/tools/include/nolibc/arch-aarch64.h b/tools/include/nolibc/arch-arm64.h index 937a348da42e..02a3f74c8ec8 100644 --- a/tools/include/nolibc/arch-aarch64.h +++ b/tools/include/nolibc/arch-arm64.h @@ -1,16 +1,16 @@ /* SPDX-License-Identifier: LGPL-2.1 OR MIT */ /* - * AARCH64 specific definitions for NOLIBC + * ARM64 specific definitions for NOLIBC * Copyright (C) 2017-2022 Willy Tarreau <w@1wt.eu> */ -#ifndef _NOLIBC_ARCH_AARCH64_H -#define _NOLIBC_ARCH_AARCH64_H +#ifndef _NOLIBC_ARCH_ARM64_H +#define _NOLIBC_ARCH_ARM64_H #include "compiler.h" #include "crt.h" -/* Syscalls for AARCH64 : +/* Syscalls for ARM64 : * - registers are 64-bit * - stack is 16-byte aligned * - syscall number is passed in x8 @@ -150,4 +150,4 @@ void __attribute__((weak, noreturn)) __nolibc_entrypoint __no_stack_protector _s ); __nolibc_entrypoint_epilogue(); } -#endif /* _NOLIBC_ARCH_AARCH64_H */ +#endif /* _NOLIBC_ARCH_ARM64_H */ diff --git a/tools/include/nolibc/arch-i386.h b/tools/include/nolibc/arch-i386.h deleted file mode 100644 index 7c9b38e96418..000000000000 --- a/tools/include/nolibc/arch-i386.h +++ /dev/null @@ -1,178 +0,0 @@ -/* SPDX-License-Identifier: LGPL-2.1 OR MIT */ -/* - * i386 specific definitions for NOLIBC - * Copyright (C) 2017-2022 Willy Tarreau <w@1wt.eu> - */ - -#ifndef _NOLIBC_ARCH_I386_H -#define _NOLIBC_ARCH_I386_H - -#include "compiler.h" -#include "crt.h" - -/* Syscalls for i386 : - * - mostly similar to x86_64 - * - registers are 32-bit - * - syscall number is passed in eax - * - arguments are in ebx, ecx, edx, esi, edi, ebp respectively - * - all registers are preserved (except eax of course) - * - the system call is performed by calling int $0x80 - * - syscall return comes in eax - * - the arguments are cast to long and assigned into the target registers - * which are then simply passed as registers to the asm code, so that we - * don't have to experience issues with register constraints. - * - the syscall number is always specified last in order to allow to force - * some registers before (gcc refuses a %-register at the last position). - * - * Also, i386 supports the old_select syscall if newselect is not available - */ -#define __ARCH_WANT_SYS_OLD_SELECT - -#define my_syscall0(num) \ -({ \ - long _ret; \ - register long _num __asm__ ("eax") = (num); \ - \ - __asm__ volatile ( \ - "int $0x80\n" \ - : "=a" (_ret) \ - : "0"(_num) \ - : "memory", "cc" \ - ); \ - _ret; \ -}) - -#define my_syscall1(num, arg1) \ -({ \ - long _ret; \ - register long _num __asm__ ("eax") = (num); \ - register long _arg1 __asm__ ("ebx") = (long)(arg1); \ - \ - __asm__ volatile ( \ - "int $0x80\n" \ - : "=a" (_ret) \ - : "r"(_arg1), \ - "0"(_num) \ - : "memory", "cc" \ - ); \ - _ret; \ -}) - -#define my_syscall2(num, arg1, arg2) \ -({ \ - long _ret; \ - register long _num __asm__ ("eax") = (num); \ - register long _arg1 __asm__ ("ebx") = (long)(arg1); \ - register long _arg2 __asm__ ("ecx") = (long)(arg2); \ - \ - __asm__ volatile ( \ - "int $0x80\n" \ - : "=a" (_ret) \ - : "r"(_arg1), "r"(_arg2), \ - "0"(_num) \ - : "memory", "cc" \ - ); \ - _ret; \ -}) - -#define my_syscall3(num, arg1, arg2, arg3) \ -({ \ - long _ret; \ - register long _num __asm__ ("eax") = (num); \ - register long _arg1 __asm__ ("ebx") = (long)(arg1); \ - register long _arg2 __asm__ ("ecx") = (long)(arg2); \ - register long _arg3 __asm__ ("edx") = (long)(arg3); \ - \ - __asm__ volatile ( \ - "int $0x80\n" \ - : "=a" (_ret) \ - : "r"(_arg1), "r"(_arg2), "r"(_arg3), \ - "0"(_num) \ - : "memory", "cc" \ - ); \ - _ret; \ -}) - -#define my_syscall4(num, arg1, arg2, arg3, arg4) \ -({ \ - long _ret; \ - register long _num __asm__ ("eax") = (num); \ - register long _arg1 __asm__ ("ebx") = (long)(arg1); \ - register long _arg2 __asm__ ("ecx") = (long)(arg2); \ - register long _arg3 __asm__ ("edx") = (long)(arg3); \ - register long _arg4 __asm__ ("esi") = (long)(arg4); \ - \ - __asm__ volatile ( \ - "int $0x80\n" \ - : "=a" (_ret) \ - : "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), \ - "0"(_num) \ - : "memory", "cc" \ - ); \ - _ret; \ -}) - -#define my_syscall5(num, arg1, arg2, arg3, arg4, arg5) \ -({ \ - long _ret; \ - register long _num __asm__ ("eax") = (num); \ - register long _arg1 __asm__ ("ebx") = (long)(arg1); \ - register long _arg2 __asm__ ("ecx") = (long)(arg2); \ - register long _arg3 __asm__ ("edx") = (long)(arg3); \ - register long _arg4 __asm__ ("esi") = (long)(arg4); \ - register long _arg5 __asm__ ("edi") = (long)(arg5); \ - \ - __asm__ volatile ( \ - "int $0x80\n" \ - : "=a" (_ret) \ - : "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5), \ - "0"(_num) \ - : "memory", "cc" \ - ); \ - _ret; \ -}) - -#define my_syscall6(num, arg1, arg2, arg3, arg4, arg5, arg6) \ -({ \ - long _eax = (long)(num); \ - long _arg6 = (long)(arg6); /* Always in memory */ \ - __asm__ volatile ( \ - "pushl %[_arg6]\n\t" \ - "pushl %%ebp\n\t" \ - "movl 4(%%esp),%%ebp\n\t" \ - "int $0x80\n\t" \ - "popl %%ebp\n\t" \ - "addl $4,%%esp\n\t" \ - : "+a"(_eax) /* %eax */ \ - : "b"(arg1), /* %ebx */ \ - "c"(arg2), /* %ecx */ \ - "d"(arg3), /* %edx */ \ - "S"(arg4), /* %esi */ \ - "D"(arg5), /* %edi */ \ - [_arg6]"m"(_arg6) /* memory */ \ - : "memory", "cc" \ - ); \ - _eax; \ -}) - -/* startup code */ -/* - * i386 System V ABI mandates: - * 1) last pushed argument must be 16-byte aligned. - * 2) The deepest stack frame should be set to zero - * - */ -void __attribute__((weak, noreturn)) __nolibc_entrypoint __no_stack_protector _start(void) -{ - __asm__ volatile ( - "xor %ebp, %ebp\n" /* zero the stack frame */ - "mov %esp, %eax\n" /* save stack pointer to %eax, as arg1 of _start_c */ - "sub $12, %esp\n" /* sub 12 to keep it aligned after the push %eax */ - "push %eax\n" /* push arg1 on stack to support plain stack modes too */ - "call _start_c\n" /* transfer to c runtime */ - "hlt\n" /* ensure it does not return */ - ); - __nolibc_entrypoint_epilogue(); -} - -#endif /* _NOLIBC_ARCH_I386_H */ diff --git a/tools/include/nolibc/arch-mips.h b/tools/include/nolibc/arch-mips.h index 753a8ed2cf69..0cbac63b249a 100644 --- a/tools/include/nolibc/arch-mips.h +++ b/tools/include/nolibc/arch-mips.h @@ -10,7 +10,7 @@ #include "compiler.h" #include "crt.h" -#if !defined(_ABIO32) +#if !defined(_ABIO32) && !defined(_ABIN32) && !defined(_ABI64) #error Unsupported MIPS ABI #endif @@ -32,11 +32,32 @@ * - the arguments are cast to long and assigned into the target registers * which are then simply passed as registers to the asm code, so that we * don't have to experience issues with register constraints. + * + * Syscalls for MIPS ABI N32, same as ABI O32 with the following differences : + * - arguments are in a0, a1, a2, a3, t0, t1, t2, t3. + * t0..t3 are also known as a4..a7. + * - stack is 16-byte aligned */ +#if defined(_ABIO32) + #define _NOLIBC_SYSCALL_CLOBBERLIST \ "memory", "cc", "at", "v1", "hi", "lo", \ "t0", "t1", "t2", "t3", "t4", "t5", "t6", "t7", "t8", "t9" +#define _NOLIBC_SYSCALL_STACK_RESERVE "addiu $sp, $sp, -32\n" +#define _NOLIBC_SYSCALL_STACK_UNRESERVE "addiu $sp, $sp, 32\n" + +#else /* _ABIN32 || _ABI64 */ + +/* binutils, GCC and clang disagree about register aliases, use numbers instead. */ +#define _NOLIBC_SYSCALL_CLOBBERLIST \ + "memory", "cc", "at", "v1", \ + "10", "11", "12", "13", "14", "15", "24", "25" + +#define _NOLIBC_SYSCALL_STACK_RESERVE +#define _NOLIBC_SYSCALL_STACK_UNRESERVE + +#endif /* _ABIO32 */ #define my_syscall0(num) \ ({ \ @@ -44,9 +65,9 @@ register long _arg4 __asm__ ("a3"); \ \ __asm__ volatile ( \ - "addiu $sp, $sp, -32\n" \ + _NOLIBC_SYSCALL_STACK_RESERVE \ "syscall\n" \ - "addiu $sp, $sp, 32\n" \ + _NOLIBC_SYSCALL_STACK_UNRESERVE \ : "=r"(_num), "=r"(_arg4) \ : "r"(_num) \ : _NOLIBC_SYSCALL_CLOBBERLIST \ @@ -61,9 +82,9 @@ register long _arg4 __asm__ ("a3"); \ \ __asm__ volatile ( \ - "addiu $sp, $sp, -32\n" \ + _NOLIBC_SYSCALL_STACK_RESERVE \ "syscall\n" \ - "addiu $sp, $sp, 32\n" \ + _NOLIBC_SYSCALL_STACK_UNRESERVE \ : "=r"(_num), "=r"(_arg4) \ : "0"(_num), \ "r"(_arg1) \ @@ -80,9 +101,9 @@ register long _arg4 __asm__ ("a3"); \ \ __asm__ volatile ( \ - "addiu $sp, $sp, -32\n" \ + _NOLIBC_SYSCALL_STACK_RESERVE \ "syscall\n" \ - "addiu $sp, $sp, 32\n" \ + _NOLIBC_SYSCALL_STACK_UNRESERVE \ : "=r"(_num), "=r"(_arg4) \ : "0"(_num), \ "r"(_arg1), "r"(_arg2) \ @@ -100,9 +121,9 @@ register long _arg4 __asm__ ("a3"); \ \ __asm__ volatile ( \ - "addiu $sp, $sp, -32\n" \ + _NOLIBC_SYSCALL_STACK_RESERVE \ "syscall\n" \ - "addiu $sp, $sp, 32\n" \ + _NOLIBC_SYSCALL_STACK_UNRESERVE \ : "=r"(_num), "=r"(_arg4) \ : "0"(_num), \ "r"(_arg1), "r"(_arg2), "r"(_arg3) \ @@ -120,9 +141,9 @@ register long _arg4 __asm__ ("a3") = (long)(arg4); \ \ __asm__ volatile ( \ - "addiu $sp, $sp, -32\n" \ + _NOLIBC_SYSCALL_STACK_RESERVE \ "syscall\n" \ - "addiu $sp, $sp, 32\n" \ + _NOLIBC_SYSCALL_STACK_UNRESERVE \ : "=r" (_num), "=r"(_arg4) \ : "0"(_num), \ "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4) \ @@ -131,6 +152,8 @@ _arg4 ? -_num : _num; \ }) +#if defined(_ABIO32) + #define my_syscall5(num, arg1, arg2, arg3, arg4, arg5) \ ({ \ register long _num __asm__ ("v0") = (num); \ @@ -141,10 +164,10 @@ register long _arg5 = (long)(arg5); \ \ __asm__ volatile ( \ - "addiu $sp, $sp, -32\n" \ + _NOLIBC_SYSCALL_STACK_RESERVE \ "sw %7, 16($sp)\n" \ "syscall\n" \ - "addiu $sp, $sp, 32\n" \ + _NOLIBC_SYSCALL_STACK_UNRESERVE \ : "=r" (_num), "=r"(_arg4) \ : "0"(_num), \ "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5) \ @@ -164,11 +187,53 @@ register long _arg6 = (long)(arg6); \ \ __asm__ volatile ( \ - "addiu $sp, $sp, -32\n" \ + _NOLIBC_SYSCALL_STACK_RESERVE \ "sw %7, 16($sp)\n" \ "sw %8, 20($sp)\n" \ "syscall\n" \ - "addiu $sp, $sp, 32\n" \ + _NOLIBC_SYSCALL_STACK_UNRESERVE \ + : "=r" (_num), "=r"(_arg4) \ + : "0"(_num), \ + "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5), \ + "r"(_arg6) \ + : _NOLIBC_SYSCALL_CLOBBERLIST \ + ); \ + _arg4 ? -_num : _num; \ +}) + +#else /* _ABIN32 || _ABI64 */ + +#define my_syscall5(num, arg1, arg2, arg3, arg4, arg5) \ +({ \ + register long _num __asm__ ("v0") = (num); \ + register long _arg1 __asm__ ("$4") = (long)(arg1); \ + register long _arg2 __asm__ ("$5") = (long)(arg2); \ + register long _arg3 __asm__ ("$6") = (long)(arg3); \ + register long _arg4 __asm__ ("$7") = (long)(arg4); \ + register long _arg5 __asm__ ("$8") = (long)(arg5); \ + \ + __asm__ volatile ( \ + "syscall\n" \ + : "=r" (_num), "=r"(_arg4) \ + : "0"(_num), \ + "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5) \ + : _NOLIBC_SYSCALL_CLOBBERLIST \ + ); \ + _arg4 ? -_num : _num; \ +}) + +#define my_syscall6(num, arg1, arg2, arg3, arg4, arg5, arg6) \ +({ \ + register long _num __asm__ ("v0") = (num); \ + register long _arg1 __asm__ ("$4") = (long)(arg1); \ + register long _arg2 __asm__ ("$5") = (long)(arg2); \ + register long _arg3 __asm__ ("$6") = (long)(arg3); \ + register long _arg4 __asm__ ("$7") = (long)(arg4); \ + register long _arg5 __asm__ ("$8") = (long)(arg5); \ + register long _arg6 __asm__ ("$9") = (long)(arg6); \ + \ + __asm__ volatile ( \ + "syscall\n" \ : "=r" (_num), "=r"(_arg4) \ : "0"(_num), \ "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5), \ @@ -178,28 +243,26 @@ _arg4 ? -_num : _num; \ }) +#endif /* _ABIO32 */ + /* startup code, note that it's called __start on MIPS */ void __start(void); void __attribute__((weak, noreturn)) __nolibc_entrypoint __no_stack_protector __start(void) { __asm__ volatile ( - ".set push\n" - ".set noreorder\n" - "bal 1f\n" /* prime $ra for .cpload */ - "nop\n" - "1:\n" - ".cpload $ra\n" "move $a0, $sp\n" /* save stack pointer to $a0, as arg1 of _start_c */ - "addiu $sp, $sp, -4\n" /* space for .cprestore to store $gp */ - ".cprestore 0\n" - "li $t0, -8\n" - "and $sp, $sp, $t0\n" /* $sp must be 8-byte aligned */ +#if defined(_ABIO32) "addiu $sp, $sp, -16\n" /* the callee expects to save a0..a3 there */ +#endif /* _ABIO32 */ "lui $t9, %hi(_start_c)\n" /* ABI requires current function address in $t9 */ "ori $t9, %lo(_start_c)\n" +#if defined(_ABI64) + "lui $t0, %highest(_start_c)\n" + "ori $t0, %higher(_start_c)\n" + "dsll $t0, 0x20\n" + "or $t9, $t0\n" +#endif /* _ABI64 */ "jalr $t9\n" /* transfer to c runtime */ - " nop\n" /* delayed slot */ - ".set pop\n" ); __nolibc_entrypoint_epilogue(); } diff --git a/tools/include/nolibc/arch-sh.h b/tools/include/nolibc/arch-sh.h new file mode 100644 index 000000000000..a96b8914607e --- /dev/null +++ b/tools/include/nolibc/arch-sh.h @@ -0,0 +1,162 @@ +/* SPDX-License-Identifier: LGPL-2.1 OR MIT */ +/* + * SuperH specific definitions for NOLIBC + * Copyright (C) 2025 Thomas Weißschuh <linux@weissschuh.net> + */ + +#ifndef _NOLIBC_ARCH_SH_H +#define _NOLIBC_ARCH_SH_H + +#include "compiler.h" +#include "crt.h" + +/* + * Syscalls for SuperH: + * - registers are 32bit wide + * - syscall number is passed in r3 + * - arguments are in r4, r5, r6, r7, r0, r1, r2 + * - the system call is performed by calling trapa #31 + * - syscall return value is in r0 + */ + +#define my_syscall0(num) \ +({ \ + register long _num __asm__ ("r3") = (num); \ + register long _ret __asm__ ("r0"); \ + \ + __asm__ volatile ( \ + "trapa #31" \ + : "=r"(_ret) \ + : "r"(_num) \ + : "memory", "cc" \ + ); \ + _ret; \ +}) + +#define my_syscall1(num, arg1) \ +({ \ + register long _num __asm__ ("r3") = (num); \ + register long _ret __asm__ ("r0"); \ + register long _arg1 __asm__ ("r4") = (long)(arg1); \ + \ + __asm__ volatile ( \ + "trapa #31" \ + : "=r"(_ret) \ + : "r"(_num), "r"(_arg1) \ + : "memory", "cc" \ + ); \ + _ret; \ +}) + +#define my_syscall2(num, arg1, arg2) \ +({ \ + register long _num __asm__ ("r3") = (num); \ + register long _ret __asm__ ("r0"); \ + register long _arg1 __asm__ ("r4") = (long)(arg1); \ + register long _arg2 __asm__ ("r5") = (long)(arg2); \ + \ + __asm__ volatile ( \ + "trapa #31" \ + : "=r"(_ret) \ + : "r"(_num), "r"(_arg1), "r"(_arg2) \ + : "memory", "cc" \ + ); \ + _ret; \ +}) + +#define my_syscall3(num, arg1, arg2, arg3) \ +({ \ + register long _num __asm__ ("r3") = (num); \ + register long _ret __asm__ ("r0"); \ + register long _arg1 __asm__ ("r4") = (long)(arg1); \ + register long _arg2 __asm__ ("r5") = (long)(arg2); \ + register long _arg3 __asm__ ("r6") = (long)(arg3); \ + \ + __asm__ volatile ( \ + "trapa #31" \ + : "=r"(_ret) \ + : "r"(_num), "r"(_arg1), "r"(_arg2), "r"(_arg3) \ + : "memory", "cc" \ + ); \ + _ret; \ +}) + +#define my_syscall4(num, arg1, arg2, arg3, arg4) \ +({ \ + register long _num __asm__ ("r3") = (num); \ + register long _ret __asm__ ("r0"); \ + register long _arg1 __asm__ ("r4") = (long)(arg1); \ + register long _arg2 __asm__ ("r5") = (long)(arg2); \ + register long _arg3 __asm__ ("r6") = (long)(arg3); \ + register long _arg4 __asm__ ("r7") = (long)(arg4); \ + \ + __asm__ volatile ( \ + "trapa #31" \ + : "=r"(_ret) \ + : "r"(_num), "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4) \ + : "memory", "cc" \ + ); \ + _ret; \ +}) + +#define my_syscall5(num, arg1, arg2, arg3, arg4, arg5) \ +({ \ + register long _num __asm__ ("r3") = (num); \ + register long _ret __asm__ ("r0"); \ + register long _arg1 __asm__ ("r4") = (long)(arg1); \ + register long _arg2 __asm__ ("r5") = (long)(arg2); \ + register long _arg3 __asm__ ("r6") = (long)(arg3); \ + register long _arg4 __asm__ ("r7") = (long)(arg4); \ + register long _arg5 __asm__ ("r0") = (long)(arg5); \ + \ + __asm__ volatile ( \ + "trapa #31" \ + : "=r"(_ret) \ + : "r"(_num), "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), \ + "r"(_arg5) \ + : "memory", "cc" \ + ); \ + _ret; \ +}) + +#define my_syscall6(num, arg1, arg2, arg3, arg4, arg5, arg6) \ +({ \ + register long _num __asm__ ("r3") = (num); \ + register long _ret __asm__ ("r0"); \ + register long _arg1 __asm__ ("r4") = (long)(arg1); \ + register long _arg2 __asm__ ("r5") = (long)(arg2); \ + register long _arg3 __asm__ ("r6") = (long)(arg3); \ + register long _arg4 __asm__ ("r7") = (long)(arg4); \ + register long _arg5 __asm__ ("r0") = (long)(arg5); \ + register long _arg6 __asm__ ("r1") = (long)(arg6); \ + \ + __asm__ volatile ( \ + "trapa #31" \ + : "=r"(_ret) \ + : "r"(_num), "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), \ + "r"(_arg5), "r"(_arg6) \ + : "memory", "cc" \ + ); \ + _ret; \ +}) + +/* startup code */ +void _start_wrapper(void); +void __attribute__((weak,noreturn)) __nolibc_entrypoint __no_stack_protector _start_wrapper(void) +{ + __asm__ volatile ( + ".global _start\n" /* The C function will have a prologue, */ + ".type _start, @function\n" /* corrupting "sp" */ + ".weak _start\n" + "_start:\n" + + "mov sp, r4\n" /* save argc pointer to r4, as arg1 of _start_c */ + "bsr _start_c\n" /* transfer to c runtime */ + "nop\n" /* delay slot */ + + ".size _start, .-_start\n" + ); + __nolibc_entrypoint_epilogue(); +} + +#endif /* _NOLIBC_ARCH_SH_H */ diff --git a/tools/include/nolibc/arch-sparc.h b/tools/include/nolibc/arch-sparc.h index 1435172f3dfe..ca420d843e25 100644 --- a/tools/include/nolibc/arch-sparc.h +++ b/tools/include/nolibc/arch-sparc.h @@ -188,4 +188,20 @@ pid_t sys_fork(void) } #define sys_fork sys_fork +static __attribute__((unused)) +pid_t sys_vfork(void) +{ + pid_t parent, ret; + + parent = getpid(); + ret = my_syscall0(__NR_vfork); + + /* The syscall returns the parent pid in the child instead of 0 */ + if (ret == parent) + return 0; + else + return ret; +} +#define sys_vfork sys_vfork + #endif /* _NOLIBC_ARCH_SPARC_H */ diff --git a/tools/include/nolibc/arch-x86_64.h b/tools/include/nolibc/arch-x86.h index 67305e24dbef..d3efc0c3b8ad 100644 --- a/tools/include/nolibc/arch-x86_64.h +++ b/tools/include/nolibc/arch-x86.h @@ -1,15 +1,184 @@ /* SPDX-License-Identifier: LGPL-2.1 OR MIT */ /* - * x86_64 specific definitions for NOLIBC - * Copyright (C) 2017-2022 Willy Tarreau <w@1wt.eu> + * x86 specific definitions for NOLIBC (both 32- and 64-bit) + * Copyright (C) 2017-2025 Willy Tarreau <w@1wt.eu> */ -#ifndef _NOLIBC_ARCH_X86_64_H -#define _NOLIBC_ARCH_X86_64_H +#ifndef _NOLIBC_ARCH_X86_H +#define _NOLIBC_ARCH_X86_H #include "compiler.h" #include "crt.h" +#if !defined(__x86_64__) + +/* Syscalls for i386 : + * - mostly similar to x86_64 + * - registers are 32-bit + * - syscall number is passed in eax + * - arguments are in ebx, ecx, edx, esi, edi, ebp respectively + * - all registers are preserved (except eax of course) + * - the system call is performed by calling int $0x80 + * - syscall return comes in eax + * - the arguments are cast to long and assigned into the target registers + * which are then simply passed as registers to the asm code, so that we + * don't have to experience issues with register constraints. + * - the syscall number is always specified last in order to allow to force + * some registers before (gcc refuses a %-register at the last position). + * + * Also, i386 supports the old_select syscall if newselect is not available + */ +#define __ARCH_WANT_SYS_OLD_SELECT + +#define my_syscall0(num) \ +({ \ + long _ret; \ + register long _num __asm__ ("eax") = (num); \ + \ + __asm__ volatile ( \ + "int $0x80\n" \ + : "=a" (_ret) \ + : "0"(_num) \ + : "memory", "cc" \ + ); \ + _ret; \ +}) + +#define my_syscall1(num, arg1) \ +({ \ + long _ret; \ + register long _num __asm__ ("eax") = (num); \ + register long _arg1 __asm__ ("ebx") = (long)(arg1); \ + \ + __asm__ volatile ( \ + "int $0x80\n" \ + : "=a" (_ret) \ + : "r"(_arg1), \ + "0"(_num) \ + : "memory", "cc" \ + ); \ + _ret; \ +}) + +#define my_syscall2(num, arg1, arg2) \ +({ \ + long _ret; \ + register long _num __asm__ ("eax") = (num); \ + register long _arg1 __asm__ ("ebx") = (long)(arg1); \ + register long _arg2 __asm__ ("ecx") = (long)(arg2); \ + \ + __asm__ volatile ( \ + "int $0x80\n" \ + : "=a" (_ret) \ + : "r"(_arg1), "r"(_arg2), \ + "0"(_num) \ + : "memory", "cc" \ + ); \ + _ret; \ +}) + +#define my_syscall3(num, arg1, arg2, arg3) \ +({ \ + long _ret; \ + register long _num __asm__ ("eax") = (num); \ + register long _arg1 __asm__ ("ebx") = (long)(arg1); \ + register long _arg2 __asm__ ("ecx") = (long)(arg2); \ + register long _arg3 __asm__ ("edx") = (long)(arg3); \ + \ + __asm__ volatile ( \ + "int $0x80\n" \ + : "=a" (_ret) \ + : "r"(_arg1), "r"(_arg2), "r"(_arg3), \ + "0"(_num) \ + : "memory", "cc" \ + ); \ + _ret; \ +}) + +#define my_syscall4(num, arg1, arg2, arg3, arg4) \ +({ \ + long _ret; \ + register long _num __asm__ ("eax") = (num); \ + register long _arg1 __asm__ ("ebx") = (long)(arg1); \ + register long _arg2 __asm__ ("ecx") = (long)(arg2); \ + register long _arg3 __asm__ ("edx") = (long)(arg3); \ + register long _arg4 __asm__ ("esi") = (long)(arg4); \ + \ + __asm__ volatile ( \ + "int $0x80\n" \ + : "=a" (_ret) \ + : "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), \ + "0"(_num) \ + : "memory", "cc" \ + ); \ + _ret; \ +}) + +#define my_syscall5(num, arg1, arg2, arg3, arg4, arg5) \ +({ \ + long _ret; \ + register long _num __asm__ ("eax") = (num); \ + register long _arg1 __asm__ ("ebx") = (long)(arg1); \ + register long _arg2 __asm__ ("ecx") = (long)(arg2); \ + register long _arg3 __asm__ ("edx") = (long)(arg3); \ + register long _arg4 __asm__ ("esi") = (long)(arg4); \ + register long _arg5 __asm__ ("edi") = (long)(arg5); \ + \ + __asm__ volatile ( \ + "int $0x80\n" \ + : "=a" (_ret) \ + : "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5), \ + "0"(_num) \ + : "memory", "cc" \ + ); \ + _ret; \ +}) + +#define my_syscall6(num, arg1, arg2, arg3, arg4, arg5, arg6) \ +({ \ + long _eax = (long)(num); \ + long _arg6 = (long)(arg6); /* Always in memory */ \ + __asm__ volatile ( \ + "pushl %[_arg6]\n\t" \ + "pushl %%ebp\n\t" \ + "movl 4(%%esp),%%ebp\n\t" \ + "int $0x80\n\t" \ + "popl %%ebp\n\t" \ + "addl $4,%%esp\n\t" \ + : "+a"(_eax) /* %eax */ \ + : "b"(arg1), /* %ebx */ \ + "c"(arg2), /* %ecx */ \ + "d"(arg3), /* %edx */ \ + "S"(arg4), /* %esi */ \ + "D"(arg5), /* %edi */ \ + [_arg6]"m"(_arg6) /* memory */ \ + : "memory", "cc" \ + ); \ + _eax; \ +}) + +/* startup code */ +/* + * i386 System V ABI mandates: + * 1) last pushed argument must be 16-byte aligned. + * 2) The deepest stack frame should be set to zero + * + */ +void __attribute__((weak, noreturn)) __nolibc_entrypoint __no_stack_protector _start(void) +{ + __asm__ volatile ( + "xor %ebp, %ebp\n" /* zero the stack frame */ + "mov %esp, %eax\n" /* save stack pointer to %eax, as arg1 of _start_c */ + "sub $12, %esp\n" /* sub 12 to keep it aligned after the push %eax */ + "push %eax\n" /* push arg1 on stack to support plain stack modes too */ + "call _start_c\n" /* transfer to c runtime */ + "hlt\n" /* ensure it does not return */ + ); + __nolibc_entrypoint_epilogue(); +} + +#else /* !defined(__x86_64__) */ + /* Syscalls for x86_64 : * - registers are 64-bit * - syscall number is passed in rax @@ -214,4 +383,5 @@ __asm__ ( "retq\n" ); -#endif /* _NOLIBC_ARCH_X86_64_H */ +#endif /* !defined(__x86_64__) */ +#endif /* _NOLIBC_ARCH_X86_H */ diff --git a/tools/include/nolibc/arch.h b/tools/include/nolibc/arch.h index d20b2304aac2..426c89198135 100644 --- a/tools/include/nolibc/arch.h +++ b/tools/include/nolibc/arch.h @@ -15,14 +15,12 @@ #ifndef _NOLIBC_ARCH_H #define _NOLIBC_ARCH_H -#if defined(__x86_64__) -#include "arch-x86_64.h" -#elif defined(__i386__) || defined(__i486__) || defined(__i586__) || defined(__i686__) -#include "arch-i386.h" +#if defined(__x86_64__) || defined(__i386__) || defined(__i486__) || defined(__i586__) || defined(__i686__) +#include "arch-x86.h" #elif defined(__ARM_EABI__) #include "arch-arm.h" #elif defined(__aarch64__) -#include "arch-aarch64.h" +#include "arch-arm64.h" #elif defined(__mips__) #include "arch-mips.h" #elif defined(__powerpc__) @@ -37,6 +35,8 @@ #include "arch-sparc.h" #elif defined(__m68k__) #include "arch-m68k.h" +#elif defined(__sh__) +#include "arch-sh.h" #else #error Unsupported Architecture #endif diff --git a/tools/include/nolibc/std.h b/tools/include/nolibc/std.h index adda7333d12e..ba950f0e7338 100644 --- a/tools/include/nolibc/std.h +++ b/tools/include/nolibc/std.h @@ -16,6 +16,8 @@ #include "stdint.h" #include "stddef.h" +#include <linux/types.h> + /* those are commonly provided by sys/types.h */ typedef unsigned int dev_t; typedef unsigned long ino_t; @@ -27,6 +29,6 @@ typedef unsigned long nlink_t; typedef signed long off_t; typedef signed long blksize_t; typedef signed long blkcnt_t; -typedef signed long time_t; +typedef __kernel_old_time_t time_t; #endif /* _NOLIBC_STD_H */ diff --git a/tools/include/nolibc/stdio.h b/tools/include/nolibc/stdio.h index c470d334ef3f..7630234408c5 100644 --- a/tools/include/nolibc/stdio.h +++ b/tools/include/nolibc/stdio.h @@ -358,11 +358,11 @@ int __nolibc_printf(__nolibc_printf_cb cb, intptr_t state, size_t n, const char n -= w; while (width-- > w) { if (cb(state, " ", 1) != 0) - break; + return -1; written += 1; } if (cb(state, outstr, w) != 0) - break; + return -1; } written += len; diff --git a/tools/include/nolibc/sys.h b/tools/include/nolibc/sys.h index 9556c69a6ae1..295e71d34aba 100644 --- a/tools/include/nolibc/sys.h +++ b/tools/include/nolibc/sys.h @@ -22,6 +22,7 @@ #include <linux/time.h> #include <linux/auxvec.h> #include <linux/fcntl.h> /* for O_* and AT_* */ +#include <linux/sched.h> /* for clone_args */ #include <linux/stat.h> /* for statx() */ #include "errno.h" @@ -139,7 +140,7 @@ int chdir(const char *path) static __attribute__((unused)) int sys_chmod(const char *path, mode_t mode) { -#ifdef __NR_fchmodat +#if defined(__NR_fchmodat) return my_syscall4(__NR_fchmodat, AT_FDCWD, path, mode, 0); #elif defined(__NR_chmod) return my_syscall2(__NR_chmod, path, mode); @@ -162,7 +163,7 @@ int chmod(const char *path, mode_t mode) static __attribute__((unused)) int sys_chown(const char *path, uid_t owner, gid_t group) { -#ifdef __NR_fchownat +#if defined(__NR_fchownat) return my_syscall5(__NR_fchownat, AT_FDCWD, path, owner, group, 0); #elif defined(__NR_chown) return my_syscall3(__NR_chown, path, owner, group); @@ -236,7 +237,7 @@ int dup(int fd) static __attribute__((unused)) int sys_dup2(int old, int new) { -#ifdef __NR_dup3 +#if defined(__NR_dup3) return my_syscall3(__NR_dup3, old, new, 0); #elif defined(__NR_dup2) return my_syscall2(__NR_dup2, old, new); @@ -256,7 +257,7 @@ int dup2(int old, int new) * int dup3(int old, int new, int flags); */ -#ifdef __NR_dup3 +#if defined(__NR_dup3) static __attribute__((unused)) int sys_dup3(int old, int new, int flags) { @@ -320,7 +321,7 @@ void exit(int status) static __attribute__((unused)) pid_t sys_fork(void) { -#ifdef __NR_clone +#if defined(__NR_clone) /* note: some archs only have clone() and not fork(). Different archs * have a different API, but most archs have the flags on first arg and * will not use the rest with no other flag. @@ -340,6 +341,34 @@ pid_t fork(void) return __sysret(sys_fork()); } +#ifndef sys_vfork +static __attribute__((unused)) +pid_t sys_vfork(void) +{ +#if defined(__NR_vfork) + return my_syscall0(__NR_vfork); +#elif defined(__NR_clone3) + /* + * clone() could be used but has different argument orders per + * architecture. + */ + struct clone_args args = { + .flags = CLONE_VM | CLONE_VFORK, + .exit_signal = SIGCHLD, + }; + + return my_syscall2(__NR_clone3, &args, sizeof(args)); +#else + return __nolibc_enosys(__func__); +#endif +} +#endif + +static __attribute__((unused)) +pid_t vfork(void) +{ + return __sysret(sys_vfork()); +} /* * int fsync(int fd); @@ -382,7 +411,7 @@ int getdents64(int fd, struct linux_dirent64 *dirp, int count) static __attribute__((unused)) uid_t sys_geteuid(void) { -#ifdef __NR_geteuid32 +#if defined(__NR_geteuid32) return my_syscall0(__NR_geteuid32); #else return my_syscall0(__NR_geteuid); @@ -500,7 +529,7 @@ int getpagesize(void) static __attribute__((unused)) uid_t sys_getuid(void) { -#ifdef __NR_getuid32 +#if defined(__NR_getuid32) return my_syscall0(__NR_getuid32); #else return my_syscall0(__NR_getuid); @@ -538,7 +567,7 @@ int kill(pid_t pid, int signal) static __attribute__((unused)) int sys_link(const char *old, const char *new) { -#ifdef __NR_linkat +#if defined(__NR_linkat) return my_syscall5(__NR_linkat, AT_FDCWD, old, AT_FDCWD, new, 0); #elif defined(__NR_link) return my_syscall2(__NR_link, old, new); @@ -561,7 +590,7 @@ int link(const char *old, const char *new) static __attribute__((unused)) off_t sys_lseek(int fd, off_t offset, int whence) { -#ifdef __NR_lseek +#if defined(__NR_lseek) return my_syscall3(__NR_lseek, fd, offset, whence); #else return __nolibc_enosys(__func__, fd, offset, whence); @@ -572,7 +601,7 @@ static __attribute__((unused)) int sys_llseek(int fd, unsigned long offset_high, unsigned long offset_low, __kernel_loff_t *result, int whence) { -#ifdef __NR_llseek +#if defined(__NR_llseek) return my_syscall5(__NR_llseek, fd, offset_high, offset_low, result, whence); #else return __nolibc_enosys(__func__, fd, offset_high, offset_low, result, whence); @@ -609,7 +638,7 @@ off_t lseek(int fd, off_t offset, int whence) static __attribute__((unused)) int sys_mkdir(const char *path, mode_t mode) { -#ifdef __NR_mkdirat +#if defined(__NR_mkdirat) return my_syscall3(__NR_mkdirat, AT_FDCWD, path, mode); #elif defined(__NR_mkdir) return my_syscall2(__NR_mkdir, path, mode); @@ -631,7 +660,7 @@ int mkdir(const char *path, mode_t mode) static __attribute__((unused)) int sys_rmdir(const char *path) { -#ifdef __NR_rmdir +#if defined(__NR_rmdir) return my_syscall1(__NR_rmdir, path); #elif defined(__NR_unlinkat) return my_syscall3(__NR_unlinkat, AT_FDCWD, path, AT_REMOVEDIR); @@ -654,7 +683,7 @@ int rmdir(const char *path) static __attribute__((unused)) long sys_mknod(const char *path, mode_t mode, dev_t dev) { -#ifdef __NR_mknodat +#if defined(__NR_mknodat) return my_syscall4(__NR_mknodat, AT_FDCWD, path, mode, dev); #elif defined(__NR_mknod) return my_syscall3(__NR_mknod, path, mode, dev); @@ -843,7 +872,7 @@ pid_t setsid(void) static __attribute__((unused)) int sys_symlink(const char *old, const char *new) { -#ifdef __NR_symlinkat +#if defined(__NR_symlinkat) return my_syscall3(__NR_symlinkat, old, AT_FDCWD, new); #elif defined(__NR_symlink) return my_syscall2(__NR_symlink, old, new); @@ -900,7 +929,7 @@ int umount2(const char *path, int flags) static __attribute__((unused)) int sys_unlink(const char *path) { -#ifdef __NR_unlinkat +#if defined(__NR_unlinkat) return my_syscall3(__NR_unlinkat, AT_FDCWD, path, 0); #elif defined(__NR_unlink) return my_syscall1(__NR_unlink, path); diff --git a/tools/include/nolibc/sys/wait.h b/tools/include/nolibc/sys/wait.h index 4d44e3da0ba8..56ddb806da7f 100644 --- a/tools/include/nolibc/sys/wait.h +++ b/tools/include/nolibc/sys/wait.h @@ -78,7 +78,7 @@ pid_t waitpid(pid_t pid, int *status, int options) ret = waitid(idtype, id, &info, options); if (ret) - return ret; + return -1; switch (info.si_code) { case 0: diff --git a/tools/include/nolibc/time.h b/tools/include/nolibc/time.h index fc387940d51f..d02bc44d2643 100644 --- a/tools/include/nolibc/time.h +++ b/tools/include/nolibc/time.h @@ -36,6 +36,8 @@ void __nolibc_timespec_kernel_to_user(const struct __kernel_timespec *kts, struc * int clock_getres(clockid_t clockid, struct timespec *res); * int clock_gettime(clockid_t clockid, struct timespec *tp); * int clock_settime(clockid_t clockid, const struct timespec *tp); + * int clock_nanosleep(clockid_t clockid, int flags, const struct timespec *rqtp, + * struct timespec *rmtp) */ static __attribute__((unused)) @@ -107,6 +109,32 @@ int clock_settime(clockid_t clockid, struct timespec *tp) return __sysret(sys_clock_settime(clockid, tp)); } +static __attribute__((unused)) +int sys_clock_nanosleep(clockid_t clockid, int flags, const struct timespec *rqtp, + struct timespec *rmtp) +{ +#if defined(__NR_clock_nanosleep) + return my_syscall4(__NR_clock_nanosleep, clockid, flags, rqtp, rmtp); +#elif defined(__NR_clock_nanosleep_time64) + struct __kernel_timespec krqtp, krmtp; + int ret; + + __nolibc_timespec_user_to_kernel(rqtp, &krqtp); + ret = my_syscall4(__NR_clock_nanosleep_time64, clockid, flags, &krqtp, &krmtp); + if (rmtp) + __nolibc_timespec_kernel_to_user(&krmtp, rmtp); + return ret; +#else + return __nolibc_enosys(__func__, clockid, flags, rqtp, rmtp); +#endif +} + +static __attribute__((unused)) +int clock_nanosleep(clockid_t clockid, int flags, const struct timespec *rqtp, + struct timespec *rmtp) +{ + return __sysret(sys_clock_nanosleep(clockid, flags, rqtp, rmtp)); +} static __inline__ double difftime(time_t time1, time_t time2) @@ -114,6 +142,12 @@ double difftime(time_t time1, time_t time2) return time1 - time2; } +static __inline__ +int nanosleep(const struct timespec *rqtp, struct timespec *rmtp) +{ + return clock_nanosleep(CLOCK_REALTIME, 0, rqtp, rmtp); +} + static __attribute__((unused)) time_t time(time_t *tptr) diff --git a/tools/include/nolibc/types.h b/tools/include/nolibc/types.h index 30904be544ed..16c6e9ec9451 100644 --- a/tools/include/nolibc/types.h +++ b/tools/include/nolibc/types.h @@ -128,7 +128,7 @@ typedef struct { int __fd = (fd); \ if (__fd >= 0) \ __set->fds[__fd / FD_SETIDXMASK] &= \ - ~(1U << (__fd & FX_SETBITMASK)); \ + ~(1U << (__fd & FD_SETBITMASK)); \ } while (0) #define FD_SET(fd, set) do { \ @@ -145,7 +145,7 @@ typedef struct { int __r = 0; \ if (__fd >= 0) \ __r = !!(__set->fds[__fd / FD_SETIDXMASK] & \ -1U << (__fd & FD_SET_BITMASK)); \ +1U << (__fd & FD_SETBITMASK)); \ __r; \ }) diff --git a/tools/include/uapi/drm/drm.h b/tools/include/uapi/drm/drm.h index 7fba37b94401..e63a71d3c607 100644 --- a/tools/include/uapi/drm/drm.h +++ b/tools/include/uapi/drm/drm.h @@ -905,13 +905,17 @@ struct drm_syncobj_destroy { }; #define DRM_SYNCOBJ_FD_TO_HANDLE_FLAGS_IMPORT_SYNC_FILE (1 << 0) +#define DRM_SYNCOBJ_FD_TO_HANDLE_FLAGS_TIMELINE (1 << 1) #define DRM_SYNCOBJ_HANDLE_TO_FD_FLAGS_EXPORT_SYNC_FILE (1 << 0) +#define DRM_SYNCOBJ_HANDLE_TO_FD_FLAGS_TIMELINE (1 << 1) struct drm_syncobj_handle { __u32 handle; __u32 flags; __s32 fd; __u32 pad; + + __u64 point; }; struct drm_syncobj_transfer { diff --git a/tools/include/uapi/linux/bits.h b/tools/include/uapi/linux/bits.h index 682b406e1067..a04afef9efca 100644 --- a/tools/include/uapi/linux/bits.h +++ b/tools/include/uapi/linux/bits.h @@ -4,9 +4,9 @@ #ifndef _UAPI_LINUX_BITS_H #define _UAPI_LINUX_BITS_H -#define __GENMASK(h, l) (((~_UL(0)) << (l)) & (~_UL(0) >> (BITS_PER_LONG - 1 - (h)))) +#define __GENMASK(h, l) (((~_UL(0)) << (l)) & (~_UL(0) >> (__BITS_PER_LONG - 1 - (h)))) -#define __GENMASK_ULL(h, l) (((~_ULL(0)) << (l)) & (~_ULL(0) >> (BITS_PER_LONG_LONG - 1 - (h)))) +#define __GENMASK_ULL(h, l) (((~_ULL(0)) << (l)) & (~_ULL(0) >> (__BITS_PER_LONG_LONG - 1 - (h)))) #define __GENMASK_U128(h, l) \ ((_BIT128((h)) << 1) - (_BIT128(l))) diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 0b4a2f124d11..233de8677382 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -450,6 +450,7 @@ union bpf_iter_link_info { * * **struct bpf_map_info** * * **struct bpf_btf_info** * * **struct bpf_link_info** + * * **struct bpf_token_info** * * Return * Returns zero on success. On error, -1 is returned and *errno* @@ -906,6 +907,17 @@ union bpf_iter_link_info { * A new file descriptor (a nonnegative integer), or -1 if an * error occurred (in which case, *errno* is set appropriately). * + * BPF_PROG_STREAM_READ_BY_FD + * Description + * Read data of a program's BPF stream. The program is identified + * by *prog_fd*, and the stream is identified by the *stream_id*. + * The data is copied to a buffer pointed to by *stream_buf*, and + * filled less than or equal to *stream_buf_len* bytes. + * + * Return + * Number of bytes read from the stream on success, or -1 if an + * error occurred (in which case, *errno* is set appropriately). + * * NOTES * eBPF objects (maps and programs) can be shared between processes. * @@ -961,6 +973,7 @@ enum bpf_cmd { BPF_LINK_DETACH, BPF_PROG_BIND_MAP, BPF_TOKEN_CREATE, + BPF_PROG_STREAM_READ_BY_FD, __MAX_BPF_CMD, }; @@ -1463,6 +1476,11 @@ struct bpf_stack_build_id { #define BPF_OBJ_NAME_LEN 16U +enum { + BPF_STREAM_STDOUT = 1, + BPF_STREAM_STDERR = 2, +}; + union bpf_attr { struct { /* anonymous struct used by BPF_MAP_CREATE command */ __u32 map_type; /* one of enum bpf_map_type */ @@ -1794,6 +1812,13 @@ union bpf_attr { }; __u64 expected_revision; } netkit; + struct { + union { + __u32 relative_fd; + __u32 relative_id; + }; + __u64 expected_revision; + } cgroup; }; } link_create; @@ -1842,6 +1867,13 @@ union bpf_attr { __u32 bpffs_fd; } token_create; + struct { + __aligned_u64 stream_buf; + __u32 stream_buf_len; + __u32 stream_id; + __u32 prog_fd; + } prog_stream_read; + } __attribute__((aligned(8))); /* The description below is an attempt at providing documentation to eBPF @@ -2403,7 +2435,7 @@ union bpf_attr { * into it. An example is available in file * *samples/bpf/trace_output_user.c* in the Linux kernel source * tree (the eBPF program counterpart is in - * *samples/bpf/trace_output_kern.c*). + * *samples/bpf/trace_output.bpf.c*). * * **bpf_perf_event_output**\ () achieves better performance * than **bpf_trace_printk**\ () for sharing data with user @@ -6653,11 +6685,15 @@ struct bpf_link_info { struct { __aligned_u64 tp_name; /* in/out: tp_name buffer ptr */ __u32 tp_name_len; /* in/out: tp_name buffer len */ + __u32 :32; + __u64 cookie; } raw_tracepoint; struct { __u32 attach_type; __u32 target_obj_id; /* prog_id for PROG_EXT, otherwise btf object id */ __u32 target_btf_id; /* BTF type id inside the object */ + __u32 :32; + __u64 cookie; } tracing; struct { __u64 cgroup_id; @@ -6768,6 +6804,13 @@ struct bpf_link_info { }; } __attribute__((aligned(8))); +struct bpf_token_info { + __u64 allowed_cmds; + __u64 allowed_maps; + __u64 allowed_progs; + __u64 allowed_attachs; +} __attribute__((aligned(8))); + /* User bpf_sock_addr struct to access socket fields and sockaddr struct passed * by user and intended to be used by socket (e.g. to bind to, depends on * attach type). diff --git a/tools/include/uapi/linux/coredump.h b/tools/include/uapi/linux/coredump.h new file mode 100644 index 000000000000..dc3789b78af0 --- /dev/null +++ b/tools/include/uapi/linux/coredump.h @@ -0,0 +1,104 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ + +#ifndef _UAPI_LINUX_COREDUMP_H +#define _UAPI_LINUX_COREDUMP_H + +#include <linux/types.h> + +/** + * coredump_{req,ack} flags + * @COREDUMP_KERNEL: kernel writes coredump + * @COREDUMP_USERSPACE: userspace writes coredump + * @COREDUMP_REJECT: don't generate coredump + * @COREDUMP_WAIT: wait for coredump server + */ +enum { + COREDUMP_KERNEL = (1ULL << 0), + COREDUMP_USERSPACE = (1ULL << 1), + COREDUMP_REJECT = (1ULL << 2), + COREDUMP_WAIT = (1ULL << 3), +}; + +/** + * struct coredump_req - message kernel sends to userspace + * @size: size of struct coredump_req + * @size_ack: known size of struct coredump_ack on this kernel + * @mask: supported features + * + * When a coredump happens the kernel will connect to the coredump + * socket and send a coredump request to the coredump server. The @size + * member is set to the size of struct coredump_req and provides a hint + * to userspace how much data can be read. Userspace may use MSG_PEEK to + * peek the size of struct coredump_req and then choose to consume it in + * one go. Userspace may also simply read a COREDUMP_ACK_SIZE_VER0 + * request. If the size the kernel sends is larger userspace simply + * discards any remaining data. + * + * The coredump_req->mask member is set to the currently know features. + * Userspace may only set coredump_ack->mask to the bits raised by the + * kernel in coredump_req->mask. + * + * The coredump_req->size_ack member is set by the kernel to the size of + * struct coredump_ack the kernel knows. Userspace may only send up to + * coredump_req->size_ack bytes to the kernel and must set + * coredump_ack->size accordingly. + */ +struct coredump_req { + __u32 size; + __u32 size_ack; + __u64 mask; +}; + +enum { + COREDUMP_REQ_SIZE_VER0 = 16U, /* size of first published struct */ +}; + +/** + * struct coredump_ack - message userspace sends to kernel + * @size: size of the struct + * @spare: unused + * @mask: features kernel is supposed to use + * + * The @size member must be set to the size of struct coredump_ack. It + * may never exceed what the kernel returned in coredump_req->size_ack + * but it may of course be smaller (>= COREDUMP_ACK_SIZE_VER0 and <= + * coredump_req->size_ack). + * + * The @mask member must be set to the features the coredump server + * wants the kernel to use. Only bits the kernel returned in + * coredump_req->mask may be set. + */ +struct coredump_ack { + __u32 size; + __u32 spare; + __u64 mask; +}; + +enum { + COREDUMP_ACK_SIZE_VER0 = 16U, /* size of first published struct */ +}; + +/** + * enum coredump_mark - Markers for the coredump socket + * + * The kernel will place a single byte on the coredump socket. The + * markers notify userspace whether the coredump ack succeeded or + * failed. + * + * @COREDUMP_MARK_MINSIZE: the provided coredump_ack size was too small + * @COREDUMP_MARK_MAXSIZE: the provided coredump_ack size was too big + * @COREDUMP_MARK_UNSUPPORTED: the provided coredump_ack mask was invalid + * @COREDUMP_MARK_CONFLICTING: the provided coredump_ack mask has conflicting options + * @COREDUMP_MARK_REQACK: the coredump request and ack was successful + * @__COREDUMP_MARK_MAX: the maximum coredump mark value + */ +enum coredump_mark { + COREDUMP_MARK_REQACK = 0U, + COREDUMP_MARK_MINSIZE = 1U, + COREDUMP_MARK_MAXSIZE = 2U, + COREDUMP_MARK_UNSUPPORTED = 3U, + COREDUMP_MARK_CONFLICTING = 4U, + __COREDUMP_MARK_MAX = (1U << 31), +}; + +#endif /* _UAPI_LINUX_COREDUMP_H */ diff --git a/tools/include/uapi/linux/fscrypt.h b/tools/include/uapi/linux/fscrypt.h index 7a8f4c290187..3aff99f2696a 100644 --- a/tools/include/uapi/linux/fscrypt.h +++ b/tools/include/uapi/linux/fscrypt.h @@ -119,7 +119,7 @@ struct fscrypt_key_specifier { */ struct fscrypt_provisioning_key_payload { __u32 type; - __u32 __reserved; + __u32 flags; __u8 raw[]; }; @@ -128,7 +128,9 @@ struct fscrypt_add_key_arg { struct fscrypt_key_specifier key_spec; __u32 raw_size; __u32 key_id; - __u32 __reserved[8]; +#define FSCRYPT_ADD_KEY_FLAG_HW_WRAPPED 0x00000001 + __u32 flags; + __u32 __reserved[7]; __u8 raw[]; }; diff --git a/tools/include/uapi/linux/if_xdp.h b/tools/include/uapi/linux/if_xdp.h index 44f2bb93e7e6..23a062781468 100644 --- a/tools/include/uapi/linux/if_xdp.h +++ b/tools/include/uapi/linux/if_xdp.h @@ -79,6 +79,7 @@ struct xdp_mmap_offsets { #define XDP_UMEM_COMPLETION_RING 6 #define XDP_STATISTICS 7 #define XDP_OPTIONS 8 +#define XDP_MAX_TX_SKB_BUDGET 9 struct xdp_umem_reg { __u64 addr; /* Start of packet data area */ diff --git a/tools/include/uapi/linux/kvm.h b/tools/include/uapi/linux/kvm.h index b6ae8ad8934b..7415a3863891 100644 --- a/tools/include/uapi/linux/kvm.h +++ b/tools/include/uapi/linux/kvm.h @@ -375,6 +375,7 @@ struct kvm_run { #define KVM_SYSTEM_EVENT_WAKEUP 4 #define KVM_SYSTEM_EVENT_SUSPEND 5 #define KVM_SYSTEM_EVENT_SEV_TERM 6 +#define KVM_SYSTEM_EVENT_TDX_FATAL 7 __u32 type; __u32 ndata; union { @@ -617,6 +618,7 @@ struct kvm_ioeventfd { #define KVM_X86_DISABLE_EXITS_HLT (1 << 1) #define KVM_X86_DISABLE_EXITS_PAUSE (1 << 2) #define KVM_X86_DISABLE_EXITS_CSTATE (1 << 3) +#define KVM_X86_DISABLE_EXITS_APERFMPERF (1 << 4) /* for KVM_ENABLE_CAP */ struct kvm_enable_cap { @@ -930,6 +932,9 @@ struct kvm_enable_cap { #define KVM_CAP_X86_APIC_BUS_CYCLES_NS 237 #define KVM_CAP_X86_GUEST_MODE 238 #define KVM_CAP_ARM_WRITABLE_IMP_ID_REGS 239 +#define KVM_CAP_ARM_EL2 240 +#define KVM_CAP_ARM_EL2_E2H0 241 +#define KVM_CAP_RISCV_MP_STATE_RESET 242 struct kvm_irq_routing_irqchip { __u32 irqchip; diff --git a/tools/include/uapi/linux/netdev.h b/tools/include/uapi/linux/netdev.h index 7eb9571786b8..48eb49aa03d4 100644 --- a/tools/include/uapi/linux/netdev.h +++ b/tools/include/uapi/linux/netdev.h @@ -77,6 +77,11 @@ enum netdev_qstats_scope { NETDEV_QSTATS_SCOPE_QUEUE = 1, }; +enum netdev_napi_threaded { + NETDEV_NAPI_THREADED_DISABLED, + NETDEV_NAPI_THREADED_ENABLED, +}; + enum { NETDEV_A_DEV_IFINDEX = 1, NETDEV_A_DEV_PAD, @@ -134,6 +139,7 @@ enum { NETDEV_A_NAPI_DEFER_HARD_IRQS, NETDEV_A_NAPI_GRO_FLUSH_TIMEOUT, NETDEV_A_NAPI_IRQ_SUSPEND_TIMEOUT, + NETDEV_A_NAPI_THREADED, __NETDEV_A_NAPI_MAX, NETDEV_A_NAPI_MAX = (__NETDEV_A_NAPI_MAX - 1) diff --git a/tools/include/uapi/linux/prctl.h b/tools/include/uapi/linux/prctl.h index 43dec6eed559..475fc8ca4403 100644 --- a/tools/include/uapi/linux/prctl.h +++ b/tools/include/uapi/linux/prctl.h @@ -255,7 +255,12 @@ struct prctl_mm_map { /* Dispatch syscalls to a userspace handler */ #define PR_SET_SYSCALL_USER_DISPATCH 59 # define PR_SYS_DISPATCH_OFF 0 -# define PR_SYS_DISPATCH_ON 1 +/* Enable dispatch except for the specified range */ +# define PR_SYS_DISPATCH_EXCLUSIVE_ON 1 +/* Enable dispatch for the specified range */ +# define PR_SYS_DISPATCH_INCLUSIVE_ON 2 +/* Legacy name for backwards compatibility */ +# define PR_SYS_DISPATCH_ON PR_SYS_DISPATCH_EXCLUSIVE_ON /* The control values for the user space selector when dispatch is enabled */ # define SYSCALL_DISPATCH_FILTER_ALLOW 0 # define SYSCALL_DISPATCH_FILTER_BLOCK 1 @@ -367,8 +372,6 @@ struct prctl_mm_map { /* FUTEX hash management */ #define PR_FUTEX_HASH 78 # define PR_FUTEX_HASH_SET_SLOTS 1 -# define FH_FLAG_IMMUTABLE (1ULL << 0) # define PR_FUTEX_HASH_GET_SLOTS 2 -# define PR_FUTEX_HASH_GET_IMMUTABLE 3 #endif /* _LINUX_PRCTL_H */ diff --git a/tools/include/uapi/linux/stat.h b/tools/include/uapi/linux/stat.h index f78ee3670dd5..1686861aae20 100644 --- a/tools/include/uapi/linux/stat.h +++ b/tools/include/uapi/linux/stat.h @@ -182,8 +182,12 @@ struct statx { /* File offset alignment for direct I/O reads */ __u32 stx_dio_read_offset_align; - /* 0xb8 */ - __u64 __spare3[9]; /* Spare space for future expansion */ + /* Optimised max atomic write unit in bytes */ + __u32 stx_atomic_write_unit_max_opt; + __u32 __spare2[1]; + + /* 0xc0 */ + __u64 __spare3[8]; /* Spare space for future expansion */ /* 0x100 */ }; diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c index a9c3e33d0f8a..ab40dbf9f020 100644 --- a/tools/lib/bpf/bpf.c +++ b/tools/lib/bpf/bpf.c @@ -837,6 +837,50 @@ int bpf_link_create(int prog_fd, int target_fd, if (!OPTS_ZEROED(opts, netkit)) return libbpf_err(-EINVAL); break; + case BPF_CGROUP_INET_INGRESS: + case BPF_CGROUP_INET_EGRESS: + case BPF_CGROUP_INET_SOCK_CREATE: + case BPF_CGROUP_INET_SOCK_RELEASE: + case BPF_CGROUP_INET4_BIND: + case BPF_CGROUP_INET6_BIND: + case BPF_CGROUP_INET4_POST_BIND: + case BPF_CGROUP_INET6_POST_BIND: + case BPF_CGROUP_INET4_CONNECT: + case BPF_CGROUP_INET6_CONNECT: + case BPF_CGROUP_UNIX_CONNECT: + case BPF_CGROUP_INET4_GETPEERNAME: + case BPF_CGROUP_INET6_GETPEERNAME: + case BPF_CGROUP_UNIX_GETPEERNAME: + case BPF_CGROUP_INET4_GETSOCKNAME: + case BPF_CGROUP_INET6_GETSOCKNAME: + case BPF_CGROUP_UNIX_GETSOCKNAME: + case BPF_CGROUP_UDP4_SENDMSG: + case BPF_CGROUP_UDP6_SENDMSG: + case BPF_CGROUP_UNIX_SENDMSG: + case BPF_CGROUP_UDP4_RECVMSG: + case BPF_CGROUP_UDP6_RECVMSG: + case BPF_CGROUP_UNIX_RECVMSG: + case BPF_CGROUP_SOCK_OPS: + case BPF_CGROUP_DEVICE: + case BPF_CGROUP_SYSCTL: + case BPF_CGROUP_GETSOCKOPT: + case BPF_CGROUP_SETSOCKOPT: + case BPF_LSM_CGROUP: + relative_fd = OPTS_GET(opts, cgroup.relative_fd, 0); + relative_id = OPTS_GET(opts, cgroup.relative_id, 0); + if (relative_fd && relative_id) + return libbpf_err(-EINVAL); + if (relative_id) { + attr.link_create.cgroup.relative_id = relative_id; + attr.link_create.flags |= BPF_F_ID; + } else { + attr.link_create.cgroup.relative_fd = relative_fd; + } + attr.link_create.cgroup.expected_revision = + OPTS_GET(opts, cgroup.expected_revision, 0); + if (!OPTS_ZEROED(opts, cgroup)) + return libbpf_err(-EINVAL); + break; default: if (!OPTS_ZEROED(opts, flags)) return libbpf_err(-EINVAL); @@ -1331,3 +1375,23 @@ int bpf_token_create(int bpffs_fd, struct bpf_token_create_opts *opts) fd = sys_bpf_fd(BPF_TOKEN_CREATE, &attr, attr_sz); return libbpf_err_errno(fd); } + +int bpf_prog_stream_read(int prog_fd, __u32 stream_id, void *buf, __u32 buf_len, + struct bpf_prog_stream_read_opts *opts) +{ + const size_t attr_sz = offsetofend(union bpf_attr, prog_stream_read); + union bpf_attr attr; + int err; + + if (!OPTS_VALID(opts, bpf_prog_stream_read_opts)) + return libbpf_err(-EINVAL); + + memset(&attr, 0, attr_sz); + attr.prog_stream_read.stream_buf = ptr_to_u64(buf); + attr.prog_stream_read.stream_buf_len = buf_len; + attr.prog_stream_read.stream_id = stream_id; + attr.prog_stream_read.prog_fd = prog_fd; + + err = sys_bpf(BPF_PROG_STREAM_READ_BY_FD, &attr, attr_sz); + return libbpf_err_errno(err); +} diff --git a/tools/lib/bpf/bpf.h b/tools/lib/bpf/bpf.h index 777627d33d25..7252150e7ad3 100644 --- a/tools/lib/bpf/bpf.h +++ b/tools/lib/bpf/bpf.h @@ -438,6 +438,11 @@ struct bpf_link_create_opts { __u32 relative_id; __u64 expected_revision; } netkit; + struct { + __u32 relative_fd; + __u32 relative_id; + __u64 expected_revision; + } cgroup; }; size_t :0; }; @@ -704,6 +709,27 @@ struct bpf_token_create_opts { LIBBPF_API int bpf_token_create(int bpffs_fd, struct bpf_token_create_opts *opts); +struct bpf_prog_stream_read_opts { + size_t sz; + size_t :0; +}; +#define bpf_prog_stream_read_opts__last_field sz +/** + * @brief **bpf_prog_stream_read** reads data from the BPF stream of a given BPF + * program. + * + * @param prog_fd FD for the BPF program whose BPF stream is to be read. + * @param stream_id ID of the BPF stream to be read. + * @param buf Buffer to read data into from the BPF stream. + * @param buf_len Maximum number of bytes to read from the BPF stream. + * @param opts optional options, can be NULL + * + * @return The number of bytes read, on success; negative error code, otherwise + * (errno is also set to the error code) + */ +LIBBPF_API int bpf_prog_stream_read(int prog_fd, __u32 stream_id, void *buf, __u32 buf_len, + struct bpf_prog_stream_read_opts *opts); + #ifdef __cplusplus } /* extern "C" */ #endif diff --git a/tools/lib/bpf/bpf_helpers.h b/tools/lib/bpf/bpf_helpers.h index a50773d4616e..80c028540656 100644 --- a/tools/lib/bpf/bpf_helpers.h +++ b/tools/lib/bpf/bpf_helpers.h @@ -215,6 +215,7 @@ enum libbpf_tristate { #define __arg_nonnull __attribute((btf_decl_tag("arg:nonnull"))) #define __arg_nullable __attribute((btf_decl_tag("arg:nullable"))) #define __arg_trusted __attribute((btf_decl_tag("arg:trusted"))) +#define __arg_untrusted __attribute((btf_decl_tag("arg:untrusted"))) #define __arg_arena __attribute((btf_decl_tag("arg:arena"))) #ifndef ___bpf_concat @@ -314,6 +315,22 @@ enum libbpf_tristate { ___param, sizeof(___param)); \ }) +extern int bpf_stream_vprintk(int stream_id, const char *fmt__str, const void *args, + __u32 len__sz, void *aux__prog) __weak __ksym; + +#define bpf_stream_printk(stream_id, fmt, args...) \ +({ \ + static const char ___fmt[] = fmt; \ + unsigned long long ___param[___bpf_narg(args)]; \ + \ + _Pragma("GCC diagnostic push") \ + _Pragma("GCC diagnostic ignored \"-Wint-conversion\"") \ + ___bpf_fill(___param, args); \ + _Pragma("GCC diagnostic pop") \ + \ + bpf_stream_vprintk(stream_id, ___fmt, ___param, sizeof(___param), NULL);\ +}) + /* Use __bpf_printk when bpf_printk call has 3 or fewer fmt args * Otherwise use __bpf_vprintk */ diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c index f1d495dc66bb..37682908cb0f 100644 --- a/tools/lib/bpf/btf.c +++ b/tools/lib/bpf/btf.c @@ -1384,12 +1384,12 @@ static struct btf *btf_parse_raw_mmap(const char *path, struct btf *base_btf) fd = open(path, O_RDONLY); if (fd < 0) - return libbpf_err_ptr(-errno); + return ERR_PTR(-errno); if (fstat(fd, &st) < 0) { err = -errno; close(fd); - return libbpf_err_ptr(err); + return ERR_PTR(err); } data = mmap(NULL, st.st_size, PROT_READ, MAP_PRIVATE, fd, 0); @@ -1397,7 +1397,7 @@ static struct btf *btf_parse_raw_mmap(const char *path, struct btf *base_btf) close(fd); if (data == MAP_FAILED) - return libbpf_err_ptr(err); + return ERR_PTR(err); btf = btf_new(data, st.st_size, base_btf, true); if (IS_ERR(btf)) diff --git a/tools/lib/bpf/btf.h b/tools/lib/bpf/btf.h index 4392451d634b..ccfd905f03df 100644 --- a/tools/lib/bpf/btf.h +++ b/tools/lib/bpf/btf.h @@ -326,9 +326,10 @@ struct btf_dump_type_data_opts { bool compact; /* no newlines/indentation */ bool skip_names; /* skip member/type names */ bool emit_zeroes; /* show 0-valued fields */ + bool emit_strings; /* print char arrays as strings */ size_t :0; }; -#define btf_dump_type_data_opts__last_field emit_zeroes +#define btf_dump_type_data_opts__last_field emit_strings LIBBPF_API int btf_dump__dump_type_data(struct btf_dump *d, __u32 id, diff --git a/tools/lib/bpf/btf_dump.c b/tools/lib/bpf/btf_dump.c index 460c3e57fadb..f09f25eccf3c 100644 --- a/tools/lib/bpf/btf_dump.c +++ b/tools/lib/bpf/btf_dump.c @@ -68,6 +68,7 @@ struct btf_dump_data { bool compact; bool skip_names; bool emit_zeroes; + bool emit_strings; __u8 indent_lvl; /* base indent level */ char indent_str[BTF_DATA_INDENT_STR_LEN]; /* below are used during iteration */ @@ -226,6 +227,9 @@ static void btf_dump_free_names(struct hashmap *map) size_t bkt; struct hashmap_entry *cur; + if (!map) + return; + hashmap__for_each_entry(map, cur, bkt) free((void *)cur->pkey); @@ -2028,6 +2032,52 @@ static int btf_dump_var_data(struct btf_dump *d, return btf_dump_dump_type_data(d, NULL, t, type_id, data, 0, 0); } +static int btf_dump_string_data(struct btf_dump *d, + const struct btf_type *t, + __u32 id, + const void *data) +{ + const struct btf_array *array = btf_array(t); + const char *chars = data; + __u32 i; + + /* Make sure it is a NUL-terminated string. */ + for (i = 0; i < array->nelems; i++) { + if ((void *)(chars + i) >= d->typed_dump->data_end) + return -E2BIG; + if (chars[i] == '\0') + break; + } + if (i == array->nelems) { + /* The caller will print this as a regular array. */ + return -EINVAL; + } + + btf_dump_data_pfx(d); + btf_dump_printf(d, "\""); + + for (i = 0; i < array->nelems; i++) { + char c = chars[i]; + + if (c == '\0') { + /* + * When printing character arrays as strings, NUL bytes + * are always treated as string terminators; they are + * never printed. + */ + break; + } + if (isprint(c)) + btf_dump_printf(d, "%c", c); + else + btf_dump_printf(d, "\\x%02x", (__u8)c); + } + + btf_dump_printf(d, "\""); + + return 0; +} + static int btf_dump_array_data(struct btf_dump *d, const struct btf_type *t, __u32 id, @@ -2055,8 +2105,13 @@ static int btf_dump_array_data(struct btf_dump *d, * char arrays, so if size is 1 and element is * printable as a char, we'll do that. */ - if (elem_size == 1) + if (elem_size == 1) { + if (d->typed_dump->emit_strings && + btf_dump_string_data(d, t, id, data) == 0) { + return 0; + } d->typed_dump->is_array_char = true; + } } /* note that we increment depth before calling btf_dump_print() below; @@ -2544,6 +2599,7 @@ int btf_dump__dump_type_data(struct btf_dump *d, __u32 id, d->typed_dump->compact = OPTS_GET(opts, compact, false); d->typed_dump->skip_names = OPTS_GET(opts, skip_names, false); d->typed_dump->emit_zeroes = OPTS_GET(opts, emit_zeroes, false); + d->typed_dump->emit_strings = OPTS_GET(opts, emit_strings, false); ret = btf_dump_dump_type_data(d, NULL, t, id, data, 0, 0); diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index e9c641a2fb20..e067cb5776bd 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -597,7 +597,7 @@ struct extern_desc { int sym_idx; int btf_id; int sec_btf_id; - const char *name; + char *name; char *essent_name; bool is_set; bool is_weak; @@ -735,7 +735,7 @@ struct bpf_object { struct usdt_manager *usdt_man; - struct bpf_map *arena_map; + int arena_map_idx; void *arena_data; size_t arena_data_sz; @@ -1517,6 +1517,7 @@ static struct bpf_object *bpf_object__new(const char *path, obj->efile.obj_buf_sz = obj_buf_sz; obj->efile.btf_maps_shndx = -1; obj->kconfig_map_idx = -1; + obj->arena_map_idx = -1; obj->kern_version = get_kernel_version(); obj->state = OBJ_OPEN; @@ -2964,7 +2965,7 @@ static int init_arena_map_data(struct bpf_object *obj, struct bpf_map *map, const long page_sz = sysconf(_SC_PAGE_SIZE); size_t mmap_sz; - mmap_sz = bpf_map_mmap_sz(obj->arena_map); + mmap_sz = bpf_map_mmap_sz(map); if (roundup(data_sz, page_sz) > mmap_sz) { pr_warn("elf: sec '%s': declared ARENA map size (%zu) is too small to hold global __arena variables of size %zu\n", sec_name, mmap_sz, data_sz); @@ -3038,12 +3039,12 @@ static int bpf_object__init_user_btf_maps(struct bpf_object *obj, bool strict, if (map->def.type != BPF_MAP_TYPE_ARENA) continue; - if (obj->arena_map) { + if (obj->arena_map_idx >= 0) { pr_warn("map '%s': only single ARENA map is supported (map '%s' is also ARENA)\n", - map->name, obj->arena_map->name); + map->name, obj->maps[obj->arena_map_idx].name); return -EINVAL; } - obj->arena_map = map; + obj->arena_map_idx = i; if (obj->efile.arena_data) { err = init_arena_map_data(obj, map, ARENA_SEC, obj->efile.arena_data_shndx, @@ -3053,7 +3054,7 @@ static int bpf_object__init_user_btf_maps(struct bpf_object *obj, bool strict, return err; } } - if (obj->efile.arena_data && !obj->arena_map) { + if (obj->efile.arena_data && obj->arena_map_idx < 0) { pr_warn("elf: sec '%s': to use global __arena variables the ARENA map should be explicitly declared in SEC(\".maps\")\n", ARENA_SEC); return -ENOENT; @@ -4259,7 +4260,9 @@ static int bpf_object__collect_externs(struct bpf_object *obj) return ext->btf_id; } t = btf__type_by_id(obj->btf, ext->btf_id); - ext->name = btf__name_by_offset(obj->btf, t->name_off); + ext->name = strdup(btf__name_by_offset(obj->btf, t->name_off)); + if (!ext->name) + return -ENOMEM; ext->sym_idx = i; ext->is_weak = ELF64_ST_BIND(sym->st_info) == STB_WEAK; @@ -4579,10 +4582,20 @@ static int bpf_program__record_reloc(struct bpf_program *prog, /* arena data relocation */ if (shdr_idx == obj->efile.arena_data_shndx) { + if (obj->arena_map_idx < 0) { + pr_warn("prog '%s': bad arena data relocation at insn %u, no arena maps defined\n", + prog->name, insn_idx); + return -LIBBPF_ERRNO__RELOC; + } reloc_desc->type = RELO_DATA; reloc_desc->insn_idx = insn_idx; - reloc_desc->map_idx = obj->arena_map - obj->maps; + reloc_desc->map_idx = obj->arena_map_idx; reloc_desc->sym_off = sym->st_value; + + map = &obj->maps[obj->arena_map_idx]; + pr_debug("prog '%s': found arena map %d (%s, sec %d, off %zu) for insn %u\n", + prog->name, obj->arena_map_idx, map->name, map->sec_idx, + map->sec_offset, insn_idx); return 0; } @@ -9138,8 +9151,10 @@ void bpf_object__close(struct bpf_object *obj) zfree(&obj->btf_custom_path); zfree(&obj->kconfig); - for (i = 0; i < obj->nr_extern; i++) + for (i = 0; i < obj->nr_extern; i++) { + zfree(&obj->externs[i].name); zfree(&obj->externs[i].essent_name); + } zfree(&obj->externs); obj->nr_extern = 0; @@ -9206,7 +9221,7 @@ int bpf_object__gen_loader(struct bpf_object *obj, struct gen_loader_opts *opts) return libbpf_err(-EFAULT); if (!OPTS_VALID(opts, gen_loader_opts)) return libbpf_err(-EINVAL); - gen = calloc(sizeof(*gen), 1); + gen = calloc(1, sizeof(*gen)); if (!gen) return libbpf_err(-ENOMEM); gen->opts = opts; @@ -12838,6 +12853,34 @@ struct bpf_link *bpf_program__attach_xdp(const struct bpf_program *prog, int ifi } struct bpf_link * +bpf_program__attach_cgroup_opts(const struct bpf_program *prog, int cgroup_fd, + const struct bpf_cgroup_opts *opts) +{ + LIBBPF_OPTS(bpf_link_create_opts, link_create_opts); + __u32 relative_id; + int relative_fd; + + if (!OPTS_VALID(opts, bpf_cgroup_opts)) + return libbpf_err_ptr(-EINVAL); + + relative_id = OPTS_GET(opts, relative_id, 0); + relative_fd = OPTS_GET(opts, relative_fd, 0); + + if (relative_fd && relative_id) { + pr_warn("prog '%s': relative_fd and relative_id cannot be set at the same time\n", + prog->name); + return libbpf_err_ptr(-EINVAL); + } + + link_create_opts.cgroup.expected_revision = OPTS_GET(opts, expected_revision, 0); + link_create_opts.cgroup.relative_fd = relative_fd; + link_create_opts.cgroup.relative_id = relative_id; + link_create_opts.flags = OPTS_GET(opts, flags, 0); + + return bpf_program_attach_fd(prog, cgroup_fd, "cgroup", &link_create_opts); +} + +struct bpf_link * bpf_program__attach_tcx(const struct bpf_program *prog, int ifindex, const struct bpf_tcx_opts *opts) { diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h index 1137e7d2e1b5..d1cf813a057b 100644 --- a/tools/lib/bpf/libbpf.h +++ b/tools/lib/bpf/libbpf.h @@ -877,6 +877,21 @@ LIBBPF_API struct bpf_link * bpf_program__attach_netkit(const struct bpf_program *prog, int ifindex, const struct bpf_netkit_opts *opts); +struct bpf_cgroup_opts { + /* size of this struct, for forward/backward compatibility */ + size_t sz; + __u32 flags; + __u32 relative_fd; + __u32 relative_id; + __u64 expected_revision; + size_t :0; +}; +#define bpf_cgroup_opts__last_field expected_revision + +LIBBPF_API struct bpf_link * +bpf_program__attach_cgroup_opts(const struct bpf_program *prog, int cgroup_fd, + const struct bpf_cgroup_opts *opts); + struct bpf_map; LIBBPF_API struct bpf_link *bpf_map__attach_struct_ops(const struct bpf_map *map); diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map index 1205f9a4fe04..d7bd463e7017 100644 --- a/tools/lib/bpf/libbpf.map +++ b/tools/lib/bpf/libbpf.map @@ -437,6 +437,8 @@ LIBBPF_1.6.0 { bpf_linker__add_fd; bpf_linker__new_fd; bpf_object__prepare; + bpf_prog_stream_read; + bpf_program__attach_cgroup_opts; bpf_program__func_info; bpf_program__func_info_cnt; bpf_program__line_info; @@ -444,3 +446,6 @@ LIBBPF_1.6.0 { btf__add_decl_attr; btf__add_type_attr; } LIBBPF_1.5.0; + +LIBBPF_1.7.0 { +} LIBBPF_1.6.0; diff --git a/tools/lib/bpf/libbpf_version.h b/tools/lib/bpf/libbpf_version.h index 28c58fb17250..99331e317dee 100644 --- a/tools/lib/bpf/libbpf_version.h +++ b/tools/lib/bpf/libbpf_version.h @@ -4,6 +4,6 @@ #define __LIBBPF_VERSION_H #define LIBBPF_MAJOR_VERSION 1 -#define LIBBPF_MINOR_VERSION 6 +#define LIBBPF_MINOR_VERSION 7 #endif /* __LIBBPF_VERSION_H */ diff --git a/tools/lib/bpf/usdt.c b/tools/lib/bpf/usdt.c index 4e4a52742b01..3373b9d45ac4 100644 --- a/tools/lib/bpf/usdt.c +++ b/tools/lib/bpf/usdt.c @@ -59,7 +59,7 @@ * * STAP_PROBE3(my_usdt_provider, my_usdt_probe_name, 123, x, &y); * - * USDT is identified by it's <provider-name>:<probe-name> pair of names. Each + * USDT is identified by its <provider-name>:<probe-name> pair of names. Each * individual USDT has a fixed number of arguments (3 in the above example) * and specifies values of each argument as if it was a function call. * @@ -81,7 +81,7 @@ * NOP instruction that kernel can replace with an interrupt instruction to * trigger instrumentation code (BPF program for all that we care about). * - * Semaphore above is and optional feature. It records an address of a 2-byte + * Semaphore above is an optional feature. It records an address of a 2-byte * refcount variable (normally in '.probes' ELF section) used for signaling if * there is anything that is attached to USDT. This is useful for user * applications if, for example, they need to prepare some arguments that are @@ -121,7 +121,7 @@ * a uprobe BPF program (which for kernel, at least currently, is just a kprobe * program, so BPF_PROG_TYPE_KPROBE program type). With the only difference * that uprobe is usually attached at the function entry, while USDT will - * normally will be somewhere inside the function. But it should always be + * normally be somewhere inside the function. But it should always be * pointing to NOP instruction, which makes such uprobes the fastest uprobe * kind. * @@ -151,7 +151,7 @@ * libbpf sets to spec ID during attach time, or, if kernel is too old to * support BPF cookie, through IP-to-spec-ID map that libbpf maintains in such * case. The latter means that some modes of operation can't be supported - * without BPF cookie. Such mode is attaching to shared library "generically", + * without BPF cookie. Such a mode is attaching to shared library "generically", * without specifying target process. In such case, it's impossible to * calculate absolute IP addresses for IP-to-spec-ID map, and thus such mode * is not supported without BPF cookie support. @@ -185,7 +185,7 @@ * as even if USDT spec string is the same, USDT cookie value can be * different. It was deemed excessive to try to deduplicate across independent * USDT attachments by taking into account USDT spec string *and* USDT cookie - * value, which would complicated spec ID accounting significantly for little + * value, which would complicate spec ID accounting significantly for little * gain. */ diff --git a/tools/net/ynl/pyynl/cli.py b/tools/net/ynl/pyynl/cli.py index 33ccc5c1843b..8c192e900bd3 100755 --- a/tools/net/ynl/pyynl/cli.py +++ b/tools/net/ynl/pyynl/cli.py @@ -113,6 +113,8 @@ def main(): spec = f"{spec_dir()}/{args.family}.yaml" if args.schema is None and spec.startswith(sys_schema_dir): args.schema = '' # disable schema validation when installed + if args.process_unknown is None: + args.process_unknown = True else: spec = args.spec if not os.path.isfile(spec): diff --git a/tools/net/ynl/pyynl/lib/ynl.py b/tools/net/ynl/pyynl/lib/ynl.py index 55b59f6c79b8..8244a5f440b2 100644 --- a/tools/net/ynl/pyynl/lib/ynl.py +++ b/tools/net/ynl/pyynl/lib/ynl.py @@ -231,14 +231,7 @@ class NlMsg: self.extack['unknown'].append(extack) if attr_space: - # We don't have the ability to parse nests yet, so only do global - if 'miss-type' in self.extack and 'miss-nest' not in self.extack: - miss_type = self.extack['miss-type'] - if miss_type in attr_space.attrs_by_val: - spec = attr_space.attrs_by_val[miss_type] - self.extack['miss-type'] = spec['name'] - if 'doc' in spec: - self.extack['miss-type-doc'] = spec['doc'] + self.annotate_extack(attr_space) def _decode_policy(self, raw): policy = {} @@ -264,6 +257,18 @@ class NlMsg: policy['mask'] = attr.as_scalar('u64') return policy + def annotate_extack(self, attr_space): + """ Make extack more human friendly with attribute information """ + + # We don't have the ability to parse nests yet, so only do global + if 'miss-type' in self.extack and 'miss-nest' not in self.extack: + miss_type = self.extack['miss-type'] + if miss_type in attr_space.attrs_by_val: + spec = attr_space.attrs_by_val[miss_type] + self.extack['miss-type'] = spec['name'] + if 'doc' in spec: + self.extack['miss-type-doc'] = spec['doc'] + def cmd(self): return self.nl_type @@ -277,12 +282,12 @@ class NlMsg: class NlMsgs: - def __init__(self, data, attr_space=None): + def __init__(self, data): self.msgs = [] offset = 0 while offset < len(data): - msg = NlMsg(data, offset, attr_space=attr_space) + msg = NlMsg(data, offset) offset += msg.nl_len self.msgs.append(msg) @@ -570,7 +575,9 @@ class YnlFamily(SpecFamily): elif attr["type"] == 'string': attr_payload = str(value).encode('ascii') + b'\x00' elif attr["type"] == 'binary': - if isinstance(value, bytes): + if value is None: + attr_payload = b'' + elif isinstance(value, bytes): attr_payload = value elif isinstance(value, str): if attr.display_hint: @@ -579,6 +586,9 @@ class YnlFamily(SpecFamily): attr_payload = bytes.fromhex(value) elif isinstance(value, dict) and attr.struct_name: attr_payload = self._encode_struct(attr.struct_name, value) + elif isinstance(value, list) and attr.sub_type in NlAttr.type_formats: + format = NlAttr.get_format(attr.sub_type) + attr_payload = b''.join([format.pack(x) for x in value]) else: raise Exception(f'Unknown type for binary attribute, value: {value}') elif attr['type'] in NlAttr.type_formats or attr.is_auto_scalar: @@ -613,6 +623,16 @@ class YnlFamily(SpecFamily): pad = b'\x00' * ((4 - len(attr_payload) % 4) % 4) return struct.pack('HH', len(attr_payload) + 4, nl_type) + attr_payload + pad + def _get_enum_or_unknown(self, enum, raw): + try: + name = enum.entries_by_val[raw].name + except KeyError as error: + if self.process_unknown: + name = f"Unknown({raw})" + else: + raise error + return name + def _decode_enum(self, raw, attr_spec): enum = self.consts[attr_spec['enum']] if enum.type == 'flags' or attr_spec.get('enum-as-flags', False): @@ -620,11 +640,11 @@ class YnlFamily(SpecFamily): value = set() while raw: if raw & 1: - value.add(enum.entries_by_val[i].name) + value.add(self._get_enum_or_unknown(enum, i)) raw >>= 1 i += 1 else: - value = enum.entries_by_val[raw].name + value = self._get_enum_or_unknown(enum, raw) return value def _decode_binary(self, attr, attr_spec): @@ -757,6 +777,8 @@ class YnlFamily(SpecFamily): decoded = True elif attr_spec.is_auto_scalar: decoded = attr.as_auto_scalar(attr_spec['type'], attr_spec.byte_order) + if 'enum' in attr_spec: + decoded = self._decode_enum(decoded, attr_spec) elif attr_spec["type"] in NlAttr.type_formats: decoded = attr.as_scalar(attr_spec['type'], attr_spec.byte_order) if 'enum' in attr_spec: @@ -1034,12 +1056,13 @@ class YnlFamily(SpecFamily): op_rsp = [] while not done: reply = self.sock.recv(self._recv_size) - nms = NlMsgs(reply, attr_space=op.attr_set) + nms = NlMsgs(reply) self._recv_dbg_print(reply, nms) for nl_msg in nms: if nl_msg.nl_seq in reqs_by_seq: (op, vals, req_msg, req_flags) = reqs_by_seq[nl_msg.nl_seq] if nl_msg.extack: + nl_msg.annotate_extack(op.attr_set) self._decode_extack(req_msg, op, nl_msg.extack, vals) else: op = None diff --git a/tools/net/ynl/pyynl/ynl_gen_c.py b/tools/net/ynl/pyynl/ynl_gen_c.py index 76032e01c2e7..ef032e17fec4 100755 --- a/tools/net/ynl/pyynl/ynl_gen_c.py +++ b/tools/net/ynl/pyynl/ynl_gen_c.py @@ -275,9 +275,8 @@ class Type(SpecAttr): def _setter_lines(self, ri, member, presence): raise Exception(f"Setter not implemented for class type {self.type}") - def setter(self, ri, space, direction, deref=False, ref=None): + def setter(self, ri, space, direction, deref=False, ref=None, var="req"): ref = (ref if ref else []) + [self.c_name] - var = "req" member = f"{var}->{'.'.join(ref)}" local_vars = [] @@ -332,7 +331,7 @@ class TypeUnused(Type): def attr_get(self, ri, var, first): pass - def setter(self, ri, space, direction, deref=False, ref=None): + def setter(self, ri, space, direction, deref=False, ref=None, var=None): pass @@ -355,7 +354,7 @@ class TypePad(Type): def attr_policy(self, cw): pass - def setter(self, ri, space, direction, deref=False, ref=None): + def setter(self, ri, space, direction, deref=False, ref=None, var=None): pass @@ -695,13 +694,14 @@ class TypeNest(Type): f"parg.data = &{var}->{self.c_name};"] return get_lines, init_lines, None - def setter(self, ri, space, direction, deref=False, ref=None): + def setter(self, ri, space, direction, deref=False, ref=None, var="req"): ref = (ref if ref else []) + [self.c_name] for _, attr in ri.family.pure_nested_structs[self.nested_attrs].member_list(): if attr.is_recursive(): continue - attr.setter(ri, self.nested_attrs, direction, deref=deref, ref=ref) + attr.setter(ri, self.nested_attrs, direction, deref=deref, ref=ref, + var=var) class TypeMultiAttr(Type): @@ -1879,7 +1879,9 @@ def rdir(direction): def op_prefix(ri, direction, deref=False): suffix = f"_{ri.type_name}" - if not ri.op_mode or ri.op_mode == 'do': + if not ri.op_mode: + pass + elif ri.op_mode == 'do': suffix += f"{direction_to_suffix[direction]}" else: if direction == 'request': @@ -2470,11 +2472,22 @@ def free_arg_name(direction): return 'obj' -def print_alloc_wrapper(ri, direction): +def print_alloc_wrapper(ri, direction, struct=None): name = op_prefix(ri, direction) - ri.cw.write_func_prot(f'static inline struct {name} *', f"{name}_alloc", [f"void"]) + struct_name = name + if ri.type_name_conflict: + struct_name += '_' + + args = ["void"] + cnt = "1" + if struct and struct.in_multi_val: + args = ["unsigned int n"] + cnt = "n" + + ri.cw.write_func_prot(f'static inline struct {struct_name} *', + f"{name}_alloc", args) ri.cw.block_start() - ri.cw.p(f'return calloc(1, sizeof(struct {name}));') + ri.cw.p(f'return calloc({cnt}, sizeof(struct {struct_name}));') ri.cw.block_end() @@ -2544,6 +2557,19 @@ def print_type(ri, direction): def print_type_full(ri, struct): _print_type(ri, "", struct) + if struct.request and struct.in_multi_val: + print_alloc_wrapper(ri, "", struct) + ri.cw.nl() + free_rsp_nested_prototype(ri) + ri.cw.nl() + + # Name conflicts are too hard to deal with with the current code base, + # they are very rare so don't bother printing setters in that case. + if ri.ku_space == 'user' and not ri.type_name_conflict: + for _, attr in struct.member_list(): + attr.setter(ri, ri.attr_set, "", var="obj") + ri.cw.nl() + def print_type_helpers(ri, direction, deref=False): print_free_prototype(ri, direction) @@ -3515,9 +3541,6 @@ def main(): for attr_set, struct in parsed.pure_nested_structs.items(): ri = RenderInfo(cw, parsed, args.mode, "", "", attr_set) print_type_full(ri, struct) - if struct.request and struct.in_multi_val: - free_rsp_nested_prototype(ri) - cw.nl() for op_name, op in parsed.ops.items(): cw.p(f"/* ============== {op.enum_name} ============== */") diff --git a/tools/objtool/check.c b/tools/objtool/check.c index f23bdda737aa..d14f20ef1db1 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -224,6 +224,7 @@ static bool is_rust_noreturn(const struct symbol *func) str_ends_with(func->name, "_4core9panicking14panic_explicit") || str_ends_with(func->name, "_4core9panicking14panic_nounwind") || str_ends_with(func->name, "_4core9panicking18panic_bounds_check") || + str_ends_with(func->name, "_4core9panicking18panic_nounwind_fmt") || str_ends_with(func->name, "_4core9panicking19assert_failed_inner") || str_ends_with(func->name, "_4core9panicking30panic_null_pointer_dereference") || str_ends_with(func->name, "_4core9panicking36panic_misaligned_pointer_dereference") || @@ -1192,8 +1193,8 @@ static const char *uaccess_safe_builtin[] = { "__ubsan_handle_type_mismatch_v1", "__ubsan_handle_shift_out_of_bounds", "__ubsan_handle_load_invalid_value", - /* STACKLEAK */ - "stackleak_track_stack", + /* KSTACK_ERASE */ + "__sanitizer_cov_stack_depth", /* TRACE_BRANCH_PROFILING */ "ftrace_likely_update", /* STACKPROTECTOR */ @@ -2318,6 +2319,7 @@ static int read_annotate(struct objtool_file *file, for_each_reloc(sec->rsec, reloc) { type = *(u32 *)(sec->data->d_buf + (reloc_idx(reloc) * sec->sh.sh_entsize) + 4); + type = bswap_if_needed(file->elf, type); offset = reloc->sym->offset + reloc_addend(reloc); insn = find_insn(file, reloc->sym->sec, offset); diff --git a/tools/objtool/noreturns.h b/tools/objtool/noreturns.h index eacfe3b0a8d1..6a922d046b8e 100644 --- a/tools/objtool/noreturns.h +++ b/tools/objtool/noreturns.h @@ -38,6 +38,7 @@ NORETURN(mpt_halt_firmware) NORETURN(mwait_play_dead) NORETURN(nmi_panic_self_stop) NORETURN(panic) +NORETURN(vpanic) NORETURN(panic_smp_self_stop) NORETURN(rest_init) NORETURN(rewind_stack_and_make_dead) diff --git a/tools/perf/Documentation/perf-amd-ibs.txt b/tools/perf/Documentation/perf-amd-ibs.txt index 55f80beae037..548549935760 100644 --- a/tools/perf/Documentation/perf-amd-ibs.txt +++ b/tools/perf/Documentation/perf-amd-ibs.txt @@ -171,23 +171,48 @@ Below is a simple example of the perf mem tool. # perf mem report A normal perf mem report output will provide detailed memory access profile. -However, it can also be aggregated based on output fields. For example: - - # perf mem report -F mem,sample,snoop - Samples: 3M of event 'ibs_op//', Event count (approx.): 23524876 - Memory access Samples Snoop - N/A 1903343 N/A - L1 hit 1056754 N/A - L2 hit 75231 N/A - L3 hit 9496 HitM - L3 hit 2270 N/A - RAM hit 8710 N/A - Remote node, same socket RAM hit 3241 N/A - Remote core, same node Any cache hit 1572 HitM - Remote core, same node Any cache hit 514 N/A - Remote node, same socket Any cache hit 1216 HitM - Remote node, same socket Any cache hit 350 N/A - Uncached hit 18 N/A +New output fields will show related access info together. For example: + + # perf mem report -F overhead,cache,snoop,comm + ... + # Samples: 92K of event 'ibs_op//' + # Total weight : 531104 + # + # ---------- Cache ----------- --- Snoop ---- + # Overhead L1 L2 L1-buf Other HitM Other Command + # ........ ............................ .............. .......... + # + 76.07% 5.8% 35.7% 0.0% 34.6% 23.3% 52.8% cc1 + 5.79% 0.2% 0.0% 0.0% 5.6% 0.1% 5.7% make + 5.78% 0.1% 4.4% 0.0% 1.2% 0.5% 5.3% gcc + 5.33% 0.3% 3.9% 0.0% 1.1% 0.2% 5.2% as + 5.00% 0.1% 3.8% 0.0% 1.0% 0.3% 4.7% sh + 1.56% 0.1% 0.1% 0.0% 1.4% 0.6% 0.9% ld + 0.28% 0.1% 0.0% 0.0% 0.2% 0.1% 0.2% pkg-config + 0.09% 0.0% 0.0% 0.0% 0.1% 0.0% 0.1% git + 0.03% 0.0% 0.0% 0.0% 0.0% 0.0% 0.0% rm + ... + +Also, it can be aggregated based on various memory access info using the +sort keys. For example: + + # perf mem report -s mem,snoop + ... + # Samples: 92K of event 'ibs_op//' + # Total weight : 531104 + # Sort order : mem,snoop + # + # Overhead Samples Memory access Snoop + # ........ ............ ....................................... ............ + # + 47.99% 1509 L2 hit N/A + 25.08% 338 core, same node Any cache hit HitM + 10.24% 54374 N/A N/A + 6.77% 35938 L1 hit N/A + 6.39% 101 core, same node Any cache hit N/A + 3.50% 69 RAM hit N/A + 0.03% 158 LFB/MAB hit N/A + 0.00% 2 Uncached hit N/A Please refer to their man page for more detail. diff --git a/tools/perf/Documentation/perf-mem.txt b/tools/perf/Documentation/perf-mem.txt index 965e73d37772..4d164836d094 100644 --- a/tools/perf/Documentation/perf-mem.txt +++ b/tools/perf/Documentation/perf-mem.txt @@ -119,6 +119,22 @@ REPORT OPTIONS And the default sort keys are changed to local_weight, mem, sym, dso, symbol_daddr, dso_daddr, snoop, tlb, locked, blocked, local_ins_lat. +-F:: +--fields=:: + Specify output field - multiple keys can be specified in CSV format. + Please see linkperf:perf-report[1] for details. + + In addition to the default fields, 'perf mem report' will provide the + following fields to break down sample periods. + + - op: operation in the sample instruction (load, store, prefetch, ...) + - cache: location in CPU cache (L1, L2, ...) where the sample hit + - mem: location in memory or other places the sample hit + - dtlb: location in Data TLB (L1, L2) where the sample hit + - snoop: snoop result for the sampled data access + + Please take a look at the OUTPUT FIELD SELECTION section for caveats. + -T:: --type-profile:: Show data-type profile result instead of code symbols. This requires @@ -156,6 +172,40 @@ but one sample with weight 180 and the other with weight 20: 90% [k] memcpy 10% [.] strcmp +OUTPUT FIELD SELECTION +---------------------- +"perf mem report" adds a number of new output fields specific to data source +information in the sample. Some of them have the same name with the existing +sort keys ("mem" and "snoop"). So unlike other fields and sort keys, they'll +behave differently when it's used by -F/--fields or -s/--sort. + +Using those two as output fields will aggregate samples altogether and show +breakdown. + + $ perf mem report -F mem,snoop + ... + # ------ Memory ------- --- Snoop ---- + # RAM Uncach Other HitM Other + # ..................... .............. + # + 3.5% 0.0% 96.5% 25.1% 74.9% + +But using the same name for sort keys will aggregate samples for each type +separately. + + $ perf mem report -s mem,snoop + # Overhead Samples Memory access Snoop + # ........ ............ ....................................... ............ + # + 47.99% 1509 L2 hit N/A + 25.08% 338 core, same node Any cache hit HitM + 10.24% 54374 N/A N/A + 6.77% 35938 L1 hit N/A + 6.39% 101 core, same node Any cache hit N/A + 3.50% 69 RAM hit N/A + 0.03% 158 LFB/MAB hit N/A + 0.00% 2 Uncached hit N/A + SEE ALSO -------- linkperf:perf-record[1], linkperf:perf-report[1], linkperf:perf-arm-spe[1] diff --git a/tools/perf/arch/riscv/util/kvm-stat.c b/tools/perf/arch/riscv/util/kvm-stat.c index 491aef449d1a..3ea7acb5e159 100644 --- a/tools/perf/arch/riscv/util/kvm-stat.c +++ b/tools/perf/arch/riscv/util/kvm-stat.c @@ -9,10 +9,10 @@ #include <memory.h> #include "../../../util/evsel.h" #include "../../../util/kvm-stat.h" -#include "riscv_exception_types.h" +#include "riscv_trap_types.h" #include "debug.h" -define_exit_reasons_table(riscv_exit_reasons, kvm_riscv_exception_class); +define_exit_reasons_table(riscv_exit_reasons, kvm_riscv_trap_class); const char *vcpu_id_str = "id"; const char *kvm_exit_reason = "scause"; @@ -30,7 +30,7 @@ static void event_get_key(struct evsel *evsel, struct event_key *key) { key->info = 0; - key->key = evsel__intval(evsel, sample, kvm_exit_reason); + key->key = evsel__intval(evsel, sample, kvm_exit_reason) & ~CAUSE_IRQ_FLAG; key->exit_reasons = riscv_exit_reasons; } diff --git a/tools/perf/arch/riscv/util/riscv_exception_types.h b/tools/perf/arch/riscv/util/riscv_exception_types.h deleted file mode 100644 index c49b8fa5e847..000000000000 --- a/tools/perf/arch/riscv/util/riscv_exception_types.h +++ /dev/null @@ -1,35 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -#ifndef ARCH_PERF_RISCV_EXCEPTION_TYPES_H -#define ARCH_PERF_RISCV_EXCEPTION_TYPES_H - -#define EXC_INST_MISALIGNED 0 -#define EXC_INST_ACCESS 1 -#define EXC_INST_ILLEGAL 2 -#define EXC_BREAKPOINT 3 -#define EXC_LOAD_MISALIGNED 4 -#define EXC_LOAD_ACCESS 5 -#define EXC_STORE_MISALIGNED 6 -#define EXC_STORE_ACCESS 7 -#define EXC_SYSCALL 8 -#define EXC_HYPERVISOR_SYSCALL 9 -#define EXC_SUPERVISOR_SYSCALL 10 -#define EXC_INST_PAGE_FAULT 12 -#define EXC_LOAD_PAGE_FAULT 13 -#define EXC_STORE_PAGE_FAULT 15 -#define EXC_INST_GUEST_PAGE_FAULT 20 -#define EXC_LOAD_GUEST_PAGE_FAULT 21 -#define EXC_VIRTUAL_INST_FAULT 22 -#define EXC_STORE_GUEST_PAGE_FAULT 23 - -#define EXC(x) {EXC_##x, #x } - -#define kvm_riscv_exception_class \ - EXC(INST_MISALIGNED), EXC(INST_ACCESS), EXC(INST_ILLEGAL), \ - EXC(BREAKPOINT), EXC(LOAD_MISALIGNED), EXC(LOAD_ACCESS), \ - EXC(STORE_MISALIGNED), EXC(STORE_ACCESS), EXC(SYSCALL), \ - EXC(HYPERVISOR_SYSCALL), EXC(SUPERVISOR_SYSCALL), \ - EXC(INST_PAGE_FAULT), EXC(LOAD_PAGE_FAULT), EXC(STORE_PAGE_FAULT), \ - EXC(INST_GUEST_PAGE_FAULT), EXC(LOAD_GUEST_PAGE_FAULT), \ - EXC(VIRTUAL_INST_FAULT), EXC(STORE_GUEST_PAGE_FAULT) - -#endif /* ARCH_PERF_RISCV_EXCEPTION_TYPES_H */ diff --git a/tools/perf/arch/riscv/util/riscv_trap_types.h b/tools/perf/arch/riscv/util/riscv_trap_types.h new file mode 100644 index 000000000000..6cc71eb01fca --- /dev/null +++ b/tools/perf/arch/riscv/util/riscv_trap_types.h @@ -0,0 +1,57 @@ +// SPDX-License-Identifier: GPL-2.0 +#ifndef ARCH_PERF_RISCV_TRAP_TYPES_H +#define ARCH_PERF_RISCV_TRAP_TYPES_H + +/* Exception cause high bit - is an interrupt if set */ +#define CAUSE_IRQ_FLAG (_AC(1, UL) << (__riscv_xlen - 1)) + +/* Interrupt causes (minus the high bit) */ +#define IRQ_S_SOFT 1 +#define IRQ_VS_SOFT 2 +#define IRQ_M_SOFT 3 +#define IRQ_S_TIMER 5 +#define IRQ_VS_TIMER 6 +#define IRQ_M_TIMER 7 +#define IRQ_S_EXT 9 +#define IRQ_VS_EXT 10 +#define IRQ_M_EXT 11 +#define IRQ_S_GEXT 12 +#define IRQ_PMU_OVF 13 + +/* Exception causes */ +#define EXC_INST_MISALIGNED 0 +#define EXC_INST_ACCESS 1 +#define EXC_INST_ILLEGAL 2 +#define EXC_BREAKPOINT 3 +#define EXC_LOAD_MISALIGNED 4 +#define EXC_LOAD_ACCESS 5 +#define EXC_STORE_MISALIGNED 6 +#define EXC_STORE_ACCESS 7 +#define EXC_SYSCALL 8 +#define EXC_HYPERVISOR_SYSCALL 9 +#define EXC_SUPERVISOR_SYSCALL 10 +#define EXC_INST_PAGE_FAULT 12 +#define EXC_LOAD_PAGE_FAULT 13 +#define EXC_STORE_PAGE_FAULT 15 +#define EXC_INST_GUEST_PAGE_FAULT 20 +#define EXC_LOAD_GUEST_PAGE_FAULT 21 +#define EXC_VIRTUAL_INST_FAULT 22 +#define EXC_STORE_GUEST_PAGE_FAULT 23 + +#define TRAP(x) { x, #x } + +#define kvm_riscv_trap_class \ + TRAP(IRQ_S_SOFT), TRAP(IRQ_VS_SOFT), TRAP(IRQ_M_SOFT), \ + TRAP(IRQ_S_TIMER), TRAP(IRQ_VS_TIMER), TRAP(IRQ_M_TIMER), \ + TRAP(IRQ_S_EXT), TRAP(IRQ_VS_EXT), TRAP(IRQ_M_EXT), \ + TRAP(IRQ_S_GEXT), TRAP(IRQ_PMU_OVF), \ + TRAP(EXC_INST_MISALIGNED), TRAP(EXC_INST_ACCESS), TRAP(EXC_INST_ILLEGAL), \ + TRAP(EXC_BREAKPOINT), TRAP(EXC_LOAD_MISALIGNED), TRAP(EXC_LOAD_ACCESS), \ + TRAP(EXC_STORE_MISALIGNED), TRAP(EXC_STORE_ACCESS), TRAP(EXC_SYSCALL), \ + TRAP(EXC_HYPERVISOR_SYSCALL), TRAP(EXC_SUPERVISOR_SYSCALL), \ + TRAP(EXC_INST_PAGE_FAULT), TRAP(EXC_LOAD_PAGE_FAULT), \ + TRAP(EXC_STORE_PAGE_FAULT), TRAP(EXC_INST_GUEST_PAGE_FAULT), \ + TRAP(EXC_LOAD_GUEST_PAGE_FAULT), TRAP(EXC_VIRTUAL_INST_FAULT), \ + TRAP(EXC_STORE_GUEST_PAGE_FAULT) + +#endif /* ARCH_PERF_RISCV_TRAP_TYPES_H */ diff --git a/tools/perf/bench/futex-hash.c b/tools/perf/bench/futex-hash.c index fdf133c9520f..7e29f04da744 100644 --- a/tools/perf/bench/futex-hash.c +++ b/tools/perf/bench/futex-hash.c @@ -18,7 +18,6 @@ #include <stdlib.h> #include <linux/compiler.h> #include <linux/kernel.h> -#include <linux/prctl.h> #include <linux/zalloc.h> #include <sys/time.h> #include <sys/mman.h> @@ -57,7 +56,6 @@ static struct bench_futex_parameters params = { static const struct option options[] = { OPT_INTEGER( 'b', "buckets", ¶ms.nbuckets, "Specify amount of hash buckets"), - OPT_BOOLEAN( 'I', "immutable", ¶ms.buckets_immutable, "Make the hash buckets immutable"), OPT_UINTEGER('t', "threads", ¶ms.nthreads, "Specify amount of threads"), OPT_UINTEGER('r', "runtime", ¶ms.runtime, "Specify runtime (in seconds)"), OPT_UINTEGER('f', "futexes", ¶ms.nfutexes, "Specify amount of futexes per threads"), diff --git a/tools/perf/bench/futex-lock-pi.c b/tools/perf/bench/futex-lock-pi.c index 5144a158512c..40640b674427 100644 --- a/tools/perf/bench/futex-lock-pi.c +++ b/tools/perf/bench/futex-lock-pi.c @@ -47,7 +47,6 @@ static struct bench_futex_parameters params = { static const struct option options[] = { OPT_INTEGER( 'b', "buckets", ¶ms.nbuckets, "Specify amount of hash buckets"), - OPT_BOOLEAN( 'I', "immutable", ¶ms.buckets_immutable, "Make the hash buckets immutable"), OPT_UINTEGER('t', "threads", ¶ms.nthreads, "Specify amount of threads"), OPT_UINTEGER('r', "runtime", ¶ms.runtime, "Specify runtime (in seconds)"), OPT_BOOLEAN( 'M', "multi", ¶ms.multi, "Use multiple futexes"), diff --git a/tools/perf/bench/futex-requeue.c b/tools/perf/bench/futex-requeue.c index a2f91ee1950b..0748b0fd689e 100644 --- a/tools/perf/bench/futex-requeue.c +++ b/tools/perf/bench/futex-requeue.c @@ -52,7 +52,6 @@ static struct bench_futex_parameters params = { static const struct option options[] = { OPT_INTEGER( 'b', "buckets", ¶ms.nbuckets, "Specify amount of hash buckets"), - OPT_BOOLEAN( 'I', "immutable", ¶ms.buckets_immutable, "Make the hash buckets immutable"), OPT_UINTEGER('t', "threads", ¶ms.nthreads, "Specify amount of threads"), OPT_UINTEGER('q', "nrequeue", ¶ms.nrequeue, "Specify amount of threads to requeue at once"), OPT_BOOLEAN( 's', "silent", ¶ms.silent, "Silent mode: do not display data/details"), diff --git a/tools/perf/bench/futex-wake-parallel.c b/tools/perf/bench/futex-wake-parallel.c index ee66482c29fd..6aede7c46b33 100644 --- a/tools/perf/bench/futex-wake-parallel.c +++ b/tools/perf/bench/futex-wake-parallel.c @@ -63,7 +63,6 @@ static struct bench_futex_parameters params = { static const struct option options[] = { OPT_INTEGER( 'b', "buckets", ¶ms.nbuckets, "Specify amount of hash buckets"), - OPT_BOOLEAN( 'I', "immutable", ¶ms.buckets_immutable, "Make the hash buckets immutable"), OPT_UINTEGER('t', "threads", ¶ms.nthreads, "Specify amount of threads"), OPT_UINTEGER('w', "nwakers", ¶ms.nwakes, "Specify amount of waking threads"), OPT_BOOLEAN( 's', "silent", ¶ms.silent, "Silent mode: do not display data/details"), diff --git a/tools/perf/bench/futex-wake.c b/tools/perf/bench/futex-wake.c index 8d6107f7cd94..a31fc1563862 100644 --- a/tools/perf/bench/futex-wake.c +++ b/tools/perf/bench/futex-wake.c @@ -52,7 +52,6 @@ static struct bench_futex_parameters params = { static const struct option options[] = { OPT_INTEGER( 'b', "buckets", ¶ms.nbuckets, "Specify amount of hash buckets"), - OPT_BOOLEAN( 'I', "immutable", ¶ms.buckets_immutable, "Make the hash buckets immutable"), OPT_UINTEGER('t', "threads", ¶ms.nthreads, "Specify amount of threads"), OPT_UINTEGER('w', "nwakes", ¶ms.nwakes, "Specify amount of threads to wake at once"), OPT_BOOLEAN( 's', "silent", ¶ms.silent, "Silent mode: do not display data/details"), diff --git a/tools/perf/bench/futex.c b/tools/perf/bench/futex.c index 26382e4d8d4c..1481196a22f0 100644 --- a/tools/perf/bench/futex.c +++ b/tools/perf/bench/futex.c @@ -2,21 +2,24 @@ #include <err.h> #include <stdio.h> #include <stdlib.h> -#include <linux/prctl.h> #include <sys/prctl.h> #include "futex.h" +#ifndef PR_FUTEX_HASH +#define PR_FUTEX_HASH 78 +# define PR_FUTEX_HASH_SET_SLOTS 1 +# define PR_FUTEX_HASH_GET_SLOTS 2 +#endif // PR_FUTEX_HASH + void futex_set_nbuckets_param(struct bench_futex_parameters *params) { - unsigned long flags; int ret; if (params->nbuckets < 0) return; - flags = params->buckets_immutable ? FH_FLAG_IMMUTABLE : 0; - ret = prctl(PR_FUTEX_HASH, PR_FUTEX_HASH_SET_SLOTS, params->nbuckets, flags); + ret = prctl(PR_FUTEX_HASH, PR_FUTEX_HASH_SET_SLOTS, params->nbuckets, 0); if (ret) { printf("Requesting %d hash buckets failed: %d/%m\n", params->nbuckets, ret); @@ -40,18 +43,11 @@ void futex_print_nbuckets(struct bench_futex_parameters *params) printf("Requested: %d in usage: %d\n", params->nbuckets, ret); err(EXIT_FAILURE, "prctl(PR_FUTEX_HASH)"); } - if (params->nbuckets == 0) { + if (params->nbuckets == 0) ret = asprintf(&futex_hash_mode, "Futex hashing: global hash"); - } else { - ret = prctl(PR_FUTEX_HASH, PR_FUTEX_HASH_GET_IMMUTABLE); - if (ret < 0) { - printf("Can't check if the hash is immutable: %m\n"); - err(EXIT_FAILURE, "prctl(PR_FUTEX_HASH)"); - } - ret = asprintf(&futex_hash_mode, "Futex hashing: %d hash buckets %s", - params->nbuckets, - ret == 1 ? "(immutable)" : ""); - } + else + ret = asprintf(&futex_hash_mode, "Futex hashing: %d hash buckets", + params->nbuckets); } else { if (ret <= 0) { ret = asprintf(&futex_hash_mode, "Futex hashing: global hash"); diff --git a/tools/perf/bench/futex.h b/tools/perf/bench/futex.h index 9c9a73f9d865..dd295d27044a 100644 --- a/tools/perf/bench/futex.h +++ b/tools/perf/bench/futex.h @@ -26,7 +26,6 @@ struct bench_futex_parameters { unsigned int nwakes; unsigned int nrequeue; int nbuckets; - bool buckets_immutable; }; /** diff --git a/tools/perf/check-headers.sh b/tools/perf/check-headers.sh index e9fab20e9330..8085e4d1d8af 100755 --- a/tools/perf/check-headers.sh +++ b/tools/perf/check-headers.sh @@ -186,7 +186,7 @@ done # diff with extra ignore lines check arch/x86/lib/memcpy_64.S '-I "^EXPORT_SYMBOL" -I "^#include <asm/export.h>" -I"^SYM_FUNC_START\(_LOCAL\)*(memcpy_\(erms\|orig\))" -I"^#include <linux/cfi_types.h>"' check arch/x86/lib/memset_64.S '-I "^EXPORT_SYMBOL" -I "^#include <asm/export.h>" -I"^SYM_FUNC_START\(_LOCAL\)*(memset_\(erms\|orig\))"' -check arch/x86/include/asm/amd/ibs.h '-I "^#include [<\"]\(asm/\)*msr-index.h"' +check arch/x86/include/asm/amd/ibs.h '-I "^#include .*/msr-index.h"' check arch/arm64/include/asm/cputype.h '-I "^#include [<\"]\(asm/\)*sysreg.h"' check include/linux/unaligned.h '-I "^#include <linux/unaligned/packed_struct.h>" -I "^#include <asm/byteorder.h>" -I "^#pragma GCC diagnostic"' check include/uapi/asm-generic/mman.h '-I "^#include <\(uapi/\)*asm-generic/mman-common\(-tools\)*.h>"' diff --git a/tools/perf/tests/shell/stat+event_uniquifying.sh b/tools/perf/tests/shell/stat+event_uniquifying.sh index 5ec35c52b7d9..bf54bd6c3e2e 100755 --- a/tools/perf/tests/shell/stat+event_uniquifying.sh +++ b/tools/perf/tests/shell/stat+event_uniquifying.sh @@ -9,7 +9,8 @@ perf_tool=perf err=0 test_event_uniquifying() { - # We use `clockticks` to verify the uniquify behavior. + # We use `clockticks` in `uncore_imc` to verify the uniquify behavior. + pmu="uncore_imc" event="clockticks" # If the `-A` option is added, the event should be uniquified. @@ -43,11 +44,18 @@ test_event_uniquifying() { echo "stat event uniquifying test" uniquified_event_array=() + # Skip if the machine does not have `uncore_imc` device. + if ! ${perf_tool} list pmu | grep -q ${pmu}; then + echo "Target does not support PMU ${pmu} [Skipped]" + err=2 + return + fi + # Check how many uniquified events. while IFS= read -r line; do uniquified_event=$(echo "$line" | awk '{print $1}') uniquified_event_array+=("${uniquified_event}") - done < <(${perf_tool} list -v ${event} | grep "\[Kernel PMU event\]") + done < <(${perf_tool} list -v ${event} | grep ${pmu}) perf_command="${perf_tool} stat -e $event -A -o ${stat_output} -- true" $perf_command diff --git a/tools/perf/tests/tests-scripts.c b/tools/perf/tests/tests-scripts.c index 1d5759d08141..3a2a8438f9af 100644 --- a/tools/perf/tests/tests-scripts.c +++ b/tools/perf/tests/tests-scripts.c @@ -260,6 +260,7 @@ static void append_scripts_in_dir(int dir_fd, continue; /* Skip scripts that have a separate driver. */ fd = openat(dir_fd, ent->d_name, O_PATH); append_scripts_in_dir(fd, result, result_sz); + close(fd); } for (i = 0; i < n_dirs; i++) /* Clean up */ zfree(&entlist[i]); diff --git a/tools/perf/trace/beauty/include/linux/socket.h b/tools/perf/trace/beauty/include/linux/socket.h index c3322eb3d686..3b262487ec06 100644 --- a/tools/perf/trace/beauty/include/linux/socket.h +++ b/tools/perf/trace/beauty/include/linux/socket.h @@ -168,7 +168,7 @@ static inline struct cmsghdr * cmsg_nxthdr (struct msghdr *__msg, struct cmsghdr return __cmsg_nxthdr(__msg->msg_control, __msg->msg_controllen, __cmsg); } -static inline size_t msg_data_left(struct msghdr *msg) +static inline size_t msg_data_left(const struct msghdr *msg) { return iov_iter_count(&msg->msg_iter); } diff --git a/tools/perf/trace/beauty/include/uapi/linux/fs.h b/tools/perf/trace/beauty/include/uapi/linux/fs.h index e762e1af650c..0098b0ce8ccb 100644 --- a/tools/perf/trace/beauty/include/uapi/linux/fs.h +++ b/tools/perf/trace/beauty/include/uapi/linux/fs.h @@ -361,6 +361,7 @@ typedef int __bitwise __kernel_rwf_t; #define PAGE_IS_PFNZERO (1 << 5) #define PAGE_IS_HUGE (1 << 6) #define PAGE_IS_SOFT_DIRTY (1 << 7) +#define PAGE_IS_GUARD (1 << 8) /* * struct page_region - Page region with flags diff --git a/tools/perf/trace/beauty/include/uapi/linux/prctl.h b/tools/perf/trace/beauty/include/uapi/linux/prctl.h index 15c18ef4eb11..3b93fb906e3c 100644 --- a/tools/perf/trace/beauty/include/uapi/linux/prctl.h +++ b/tools/perf/trace/beauty/include/uapi/linux/prctl.h @@ -364,4 +364,9 @@ struct prctl_mm_map { # define PR_TIMER_CREATE_RESTORE_IDS_ON 1 # define PR_TIMER_CREATE_RESTORE_IDS_GET 2 +/* FUTEX hash management */ +#define PR_FUTEX_HASH 78 +# define PR_FUTEX_HASH_SET_SLOTS 1 +# define PR_FUTEX_HASH_GET_SLOTS 2 + #endif /* _LINUX_PRCTL_H */ diff --git a/tools/perf/trace/beauty/include/uapi/linux/stat.h b/tools/perf/trace/beauty/include/uapi/linux/stat.h index f78ee3670dd5..1686861aae20 100644 --- a/tools/perf/trace/beauty/include/uapi/linux/stat.h +++ b/tools/perf/trace/beauty/include/uapi/linux/stat.h @@ -182,8 +182,12 @@ struct statx { /* File offset alignment for direct I/O reads */ __u32 stx_dio_read_offset_align; - /* 0xb8 */ - __u64 __spare3[9]; /* Spare space for future expansion */ + /* Optimised max atomic write unit in bytes */ + __u32 stx_atomic_write_unit_max_opt; + __u32 __spare2[1]; + + /* 0xc0 */ + __u64 __spare3[8]; /* Spare space for future expansion */ /* 0x100 */ }; diff --git a/tools/perf/util/include/linux/linkage.h b/tools/perf/util/include/linux/linkage.h index 178b00205fe6..89979ca23c3f 100644 --- a/tools/perf/util/include/linux/linkage.h +++ b/tools/perf/util/include/linux/linkage.h @@ -132,4 +132,8 @@ SYM_TYPED_START(name, SYM_L_GLOBAL, SYM_A_ALIGN) #endif +#ifndef SYM_PIC_ALIAS +#define SYM_PIC_ALIAS(sym) SYM_ALIAS(__pi_ ## sym, sym, SYM_T_FUNC, SYM_L_GLOBAL) +#endif + #endif /* PERF_LINUX_LINKAGE_H_ */ diff --git a/tools/perf/util/print-events.c b/tools/perf/util/print-events.c index a786cbfb0ff5..83aaf7cda635 100644 --- a/tools/perf/util/print-events.c +++ b/tools/perf/util/print-events.c @@ -268,6 +268,7 @@ bool is_event_supported(u8 type, u64 config) ret = evsel__open(evsel, NULL, tmap) >= 0; } + evsel__close(evsel); evsel__delete(evsel); } diff --git a/tools/power/cpupower/Makefile b/tools/power/cpupower/Makefile index be8dfac14076..c43db1c41205 100644 --- a/tools/power/cpupower/Makefile +++ b/tools/power/cpupower/Makefile @@ -73,6 +73,7 @@ sbindir ?= /usr/sbin mandir ?= /usr/man libdir ?= /usr/lib libexecdir ?= /usr/libexec +unitdir ?= /usr/lib/systemd/system includedir ?= /usr/include localedir ?= /usr/share/locale docdir ?= /usr/share/doc/packages/cpupower @@ -309,9 +310,9 @@ install-tools: $(OUTPUT)cpupower $(INSTALL_DATA) cpupower-service.conf '$(DESTDIR)${confdir}' $(INSTALL) -d $(DESTDIR)${libexecdir} $(INSTALL_PROGRAM) cpupower.sh '$(DESTDIR)${libexecdir}/cpupower' - $(INSTALL) -d $(DESTDIR)${libdir}/systemd/system - sed 's|___CDIR___|${confdir}|; s|___LDIR___|${libexecdir}|' cpupower.service.in > '$(DESTDIR)${libdir}/systemd/system/cpupower.service' - $(SETPERM_DATA) '$(DESTDIR)${libdir}/systemd/system/cpupower.service' + $(INSTALL) -d $(DESTDIR)${unitdir} + sed 's|___CDIR___|${confdir}|; s|___LDIR___|${libexecdir}|' cpupower.service.in > '$(DESTDIR)${unitdir}/cpupower.service' + $(SETPERM_DATA) '$(DESTDIR)${unitdir}/cpupower.service' install-man: $(INSTALL_DATA) -D man/cpupower.1 $(DESTDIR)${mandir}/man1/cpupower.1 @@ -348,7 +349,7 @@ uninstall: - rm -f $(DESTDIR)${bindir}/utils/cpupower - rm -f $(DESTDIR)${confdir}cpupower-service.conf - rm -f $(DESTDIR)${libexecdir}/cpupower - - rm -f $(DESTDIR)${libdir}/systemd/system/cpupower.service + - rm -f $(DESTDIR)${unitdir}/cpupower.service - rm -f $(DESTDIR)${mandir}/man1/cpupower.1 - rm -f $(DESTDIR)${mandir}/man1/cpupower-frequency-set.1 - rm -f $(DESTDIR)${mandir}/man1/cpupower-frequency-info.1 diff --git a/tools/power/cpupower/bindings/python/Makefile b/tools/power/cpupower/bindings/python/Makefile index 81db39a03efb..4527cd732b42 100644 --- a/tools/power/cpupower/bindings/python/Makefile +++ b/tools/power/cpupower/bindings/python/Makefile @@ -4,20 +4,22 @@ # This Makefile expects you have already run `make install-lib` in the lib # directory for the bindings to be created. -CC := gcc +CC ?= gcc +# CFLAGS ?= +LDFLAGS ?= -lcpupower HAVE_SWIG := $(shell if which swig >/dev/null 2>&1; then echo 1; else echo 0; fi) HAVE_PYCONFIG := $(shell if which python-config >/dev/null 2>&1; then echo 1; else echo 0; fi) -PY_INCLUDE = $(firstword $(shell python-config --includes)) -INSTALL_DIR = $(shell python3 -c "import site; print(site.getsitepackages()[0])") +PY_INCLUDE ?= $(firstword $(shell python-config --includes)) +INSTALL_DIR ?= $(shell python3 -c "import site; print(site.getsitepackages()[0])") all: _raw_pylibcpupower.so _raw_pylibcpupower.so: raw_pylibcpupower_wrap.o - $(CC) -shared -lcpupower raw_pylibcpupower_wrap.o -o _raw_pylibcpupower.so + $(CC) -shared $(LDFLAGS) raw_pylibcpupower_wrap.o -o _raw_pylibcpupower.so raw_pylibcpupower_wrap.o: raw_pylibcpupower_wrap.c - $(CC) -fPIC -c raw_pylibcpupower_wrap.c $(PY_INCLUDE) + $(CC) $(CFLAGS) $(PY_INCLUDE) -fPIC -c raw_pylibcpupower_wrap.c raw_pylibcpupower_wrap.c: raw_pylibcpupower.swg ifeq ($(HAVE_SWIG),0) diff --git a/tools/power/cpupower/utils/idle_monitor/cpupower-monitor.c b/tools/power/cpupower/utils/idle_monitor/cpupower-monitor.c index ad493157f826..e8b3841d5c0f 100644 --- a/tools/power/cpupower/utils/idle_monitor/cpupower-monitor.c +++ b/tools/power/cpupower/utils/idle_monitor/cpupower-monitor.c @@ -121,10 +121,8 @@ void print_header(int topology_depth) switch (topology_depth) { case TOPOLOGY_DEPTH_PKG: printf(" PKG|"); - break; case TOPOLOGY_DEPTH_CORE: printf("CORE|"); - break; case TOPOLOGY_DEPTH_CPU: printf(" CPU|"); break; @@ -167,10 +165,8 @@ void print_results(int topology_depth, int cpu) switch (topology_depth) { case TOPOLOGY_DEPTH_PKG: printf("%4d|", cpu_top.core_info[cpu].pkg); - break; case TOPOLOGY_DEPTH_CORE: printf("%4d|", cpu_top.core_info[cpu].core); - break; case TOPOLOGY_DEPTH_CPU: printf("%4d|", cpu_top.core_info[cpu].cpu); break; diff --git a/tools/power/cpupower/utils/idle_monitor/mperf_monitor.c b/tools/power/cpupower/utils/idle_monitor/mperf_monitor.c index 73b6b10cbdd2..5ae02c3d5b64 100644 --- a/tools/power/cpupower/utils/idle_monitor/mperf_monitor.c +++ b/tools/power/cpupower/utils/idle_monitor/mperf_monitor.c @@ -240,9 +240,9 @@ static int mperf_stop(void) int cpu; for (cpu = 0; cpu < cpu_count; cpu++) { - mperf_measure_stats(cpu); - mperf_get_tsc(&tsc_at_measure_end[cpu]); clock_gettime(CLOCK_REALTIME, &time_end[cpu]); + mperf_get_tsc(&tsc_at_measure_end[cpu]); + mperf_measure_stats(cpu); } return 0; diff --git a/tools/sched/dl_bw_dump.py b/tools/sched/dl_bw_dump.py new file mode 100644 index 000000000000..aae4e42b1769 --- /dev/null +++ b/tools/sched/dl_bw_dump.py @@ -0,0 +1,57 @@ +#!/usr/bin/env drgn +# SPDX-License-Identifier: GPL-2.0 +# Copyright (C) 2025 Juri Lelli <juri.lelli@redhat.com> +# Copyright (C) 2025 Red Hat, Inc. + +desc = """ +This is a drgn script to show dl_rq bandwidth accounting information. For more +info on drgn, visit https://github.com/osandov/drgn. + +Only online CPUs are reported. +""" + +import os +import argparse + +import drgn +from drgn import FaultError +from drgn.helpers.common import * +from drgn.helpers.linux import * + +def print_dl_bws_info(): + + print("Retrieving dl_rq bandwidth accounting information:") + + runqueues = prog['runqueues'] + + for cpu_id in for_each_possible_cpu(prog): + try: + rq = per_cpu(runqueues, cpu_id) + + if rq.online == 0: + continue + + dl_rq = rq.dl + + print(f" From CPU: {cpu_id}") + + # Access and print relevant fields from struct dl_rq + print(f" running_bw : {dl_rq.running_bw}") + print(f" this_bw : {dl_rq.this_bw}") + print(f" extra_bw : {dl_rq.extra_bw}") + print(f" max_bw : {dl_rq.max_bw}") + print(f" bw_ratio : {dl_rq.bw_ratio}") + + except drgn.FaultError as fe: + print(f" (CPU {cpu_id}: Fault accessing kernel memory: {fe})") + except AttributeError as ae: + print(f" (CPU {cpu_id}: Missing attribute for root_domain (kernel struct change?): {ae})") + except Exception as e: + print(f" (CPU {cpu_id}: An unexpected error occurred: {e})") + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description=desc, + formatter_class=argparse.RawTextHelpFormatter) + args = parser.parse_args() + + print_dl_bws_info() diff --git a/tools/sched/root_domains_dump.py b/tools/sched/root_domains_dump.py new file mode 100644 index 000000000000..56dc91f017b2 --- /dev/null +++ b/tools/sched/root_domains_dump.py @@ -0,0 +1,68 @@ +#!/usr/bin/env drgn +# SPDX-License-Identifier: GPL-2.0 +# Copyright (C) 2025 Juri Lelli <juri.lelli@redhat.com> +# Copyright (C) 2025 Red Hat, Inc. + +desc = """ +This is a drgn script to show the current root domains configuration. For more +info on drgn, visit https://github.com/osandov/drgn. + +Root domains are only printed once, as multiple CPUs might be attached to the +same root domain. +""" + +import os +import argparse + +import drgn +from drgn import FaultError +from drgn.helpers.common import * +from drgn.helpers.linux import * + +def print_root_domains_info(): + + # To store unique root domains found + seen_root_domains = set() + + print("Retrieving (unique) Root Domain Information:") + + runqueues = prog['runqueues'] + def_root_domain = prog['def_root_domain'] + + for cpu_id in for_each_possible_cpu(prog): + try: + rq = per_cpu(runqueues, cpu_id) + + root_domain = rq.rd + + # Check if we've already processed this root domain to avoid duplicates + # Use the memory address of the root_domain as a unique identifier + root_domain_cast = int(root_domain) + if root_domain_cast in seen_root_domains: + continue + seen_root_domains.add(root_domain_cast) + + if root_domain_cast == int(def_root_domain.address_): + print(f"\n--- Root Domain @ def_root_domain ---") + else: + print(f"\n--- Root Domain @ 0x{root_domain_cast:x} ---") + + print(f" From CPU: {cpu_id}") # This CPU belongs to this root domain + + # Access and print relevant fields from struct root_domain + print(f" Span : {cpumask_to_cpulist(root_domain.span[0])}") + print(f" Online : {cpumask_to_cpulist(root_domain.span[0])}") + + except drgn.FaultError as fe: + print(f" (CPU {cpu_id}: Fault accessing kernel memory: {fe})") + except AttributeError as ae: + print(f" (CPU {cpu_id}: Missing attribute for root_domain (kernel struct change?): {ae})") + except Exception as e: + print(f" (CPU {cpu_id}: An unexpected error occurred: {e})") + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description=desc, + formatter_class=argparse.RawTextHelpFormatter) + args = parser.parse_args() + + print_root_domains_info() diff --git a/tools/scripts/Makefile.include b/tools/scripts/Makefile.include index 5158250988ce..ded48263dd5e 100644 --- a/tools/scripts/Makefile.include +++ b/tools/scripts/Makefile.include @@ -101,7 +101,9 @@ else ifneq ($(CROSS_COMPILE),) # Allow userspace to override CLANG_CROSS_FLAGS to specify their own # sysroots and flags or to avoid the GCC call in pure Clang builds. ifeq ($(CLANG_CROSS_FLAGS),) -CLANG_CROSS_FLAGS := --target=$(notdir $(CROSS_COMPILE:%-=%)) +CLANG_TARGET := $(notdir $(CROSS_COMPILE:%-=%)) +CLANG_TARGET := $(subst s390-linux,s390x-linux,$(CLANG_TARGET)) +CLANG_CROSS_FLAGS := --target=$(CLANG_TARGET) GCC_TOOLCHAIN_DIR := $(dir $(shell which $(CROSS_COMPILE)gcc 2>/dev/null)) ifneq ($(GCC_TOOLCHAIN_DIR),) CLANG_CROSS_FLAGS += --prefix=$(GCC_TOOLCHAIN_DIR)$(notdir $(CROSS_COMPILE)) diff --git a/tools/testing/cxl/test/mem.c b/tools/testing/cxl/test/mem.c index 0f1d91f57ba3..d533481672b7 100644 --- a/tools/testing/cxl/test/mem.c +++ b/tools/testing/cxl/test/mem.c @@ -1828,27 +1828,10 @@ static ssize_t fw_buf_checksum_show(struct device *dev, { struct cxl_mockmem_data *mdata = dev_get_drvdata(dev); u8 hash[SHA256_DIGEST_SIZE]; - unsigned char *hstr, *hptr; - struct sha256_state sctx; - ssize_t written = 0; - int i; - - sha256_init(&sctx); - sha256_update(&sctx, mdata->fw, mdata->fw_size); - sha256_final(&sctx, hash); - - hstr = kzalloc((SHA256_DIGEST_SIZE * 2) + 1, GFP_KERNEL); - if (!hstr) - return -ENOMEM; - - hptr = hstr; - for (i = 0; i < SHA256_DIGEST_SIZE; i++) - hptr += sprintf(hptr, "%02x", hash[i]); - written = sysfs_emit(buf, "%s\n", hstr); + sha256(mdata->fw, mdata->fw_size, hash); - kfree(hstr); - return written; + return sysfs_emit(buf, "%*phN\n", SHA256_DIGEST_SIZE, hash); } static DEVICE_ATTR_RO(fw_buf_checksum); diff --git a/tools/testing/ktest/ktest.pl b/tools/testing/ktest/ktest.pl index a5f7fdd0c1fb..001c4df9f7df 100755 --- a/tools/testing/ktest/ktest.pl +++ b/tools/testing/ktest/ktest.pl @@ -21,6 +21,8 @@ my %opt; my %repeat_tests; my %repeats; my %evals; +my @command_vars; +my %command_tmp_vars; #default opts my %default = ( @@ -216,6 +218,7 @@ my $patchcheck_type; my $patchcheck_start; my $patchcheck_cherry; my $patchcheck_end; +my $patchcheck_skip; my $build_time; my $install_time; @@ -380,6 +383,7 @@ my %option_map = ( "PATCHCHECK_START" => \$patchcheck_start, "PATCHCHECK_CHERRY" => \$patchcheck_cherry, "PATCHCHECK_END" => \$patchcheck_end, + "PATCHCHECK_SKIP" => \$patchcheck_skip, ); # Options may be used by other options, record them. @@ -900,14 +904,22 @@ sub set_eval { } sub set_variable { - my ($lvalue, $rvalue) = @_; + my ($lvalue, $rvalue, $command) = @_; + # Command line variables override all others + if (defined($command_tmp_vars{$lvalue})) { + return; + } if ($rvalue =~ /^\s*$/) { delete $variable{$lvalue}; } else { $rvalue = process_variables($rvalue); $variable{$lvalue} = $rvalue; } + + if (defined($command)) { + $command_tmp_vars{$lvalue} = 1; + } } sub process_compare { @@ -1286,6 +1298,19 @@ sub read_config { $test_case = __read_config $config, \$test_num; + foreach my $val (@command_vars) { + chomp $val; + my %command_overrides; + if ($val =~ m/^\s*([A-Z_\[\]\d]+)\s*=\s*(.*?)\s*$/) { + my $lvalue = $1; + my $rvalue = $2; + + set_value($lvalue, $rvalue, 1, \%command_overrides, "COMMAND LINE"); + } else { + die "Invalid option definition '$val'\n"; + } + } + # make sure we have all mandatory configs get_mandatory_configs; @@ -1371,7 +1396,10 @@ sub __eval_option { # If a variable contains itself, use the default var if (($var eq $name) && defined($opt{$var})) { $o = $opt{$var}; - $retval = "$retval$o"; + # Only append if the default doesn't contain itself + if ($o !~ m/\$\{$var\}/) { + $retval = "$retval$o"; + } } elsif (defined($opt{$o})) { $o = $opt{$o}; $retval = "$retval$o"; @@ -3511,11 +3539,37 @@ sub patchcheck { @list = reverse @list; } + my %skip_list; + my $will_skip = 0; + + if (defined($patchcheck_skip)) { + foreach my $s (split /\s+/, $patchcheck_skip) { + $s = `git log --pretty=oneline $s~1..$s`; + $s =~ s/^(\S+).*/$1/; + chomp $s; + $skip_list{$s} = 1; + $will_skip++; + } + } + doprint("Going to test the following commits:\n"); foreach my $l (@list) { + my $sha1 = $l; + $sha1 =~ s/^([[:xdigit:]]+).*/$1/; + next if (defined($skip_list{$sha1})); doprint "$l\n"; } + if ($will_skip) { + doprint("\nSkipping the following commits:\n"); + foreach my $l (@list) { + my $sha1 = $l; + $sha1 =~ s/^([[:xdigit:]]+).*/$1/; + next if (!defined($skip_list{$sha1})); + doprint "$l\n"; + } + } + my $save_clean = $noclean; my %ignored_warnings; @@ -3530,6 +3584,11 @@ sub patchcheck { my $sha1 = $item; $sha1 =~ s/^([[:xdigit:]]+).*/$1/; + if (defined($skip_list{$sha1})) { + doprint "\nSkipping \"$item\"\n\n"; + next; + } + doprint "\nProcessing commit \"$item\"\n\n"; run_command "git checkout $sha1" or @@ -4242,8 +4301,55 @@ sub cancel_test { die "\nCaught Sig Int, test interrupted: $!\n" } -$#ARGV < 1 or die "ktest.pl version: $VERSION\n usage: ktest.pl [config-file]\n"; +sub die_usage { + die << "EOF" +ktest.pl version: $VERSION + usage: ktest.pl [options] [config-file] + [options]: + -D value: Where value can act as an option override. + -D BUILD_NOCLEAN=1 + Sets global BUILD_NOCLEAN to 1 + -D TEST_TYPE[2]=build + Sets TEST_TYPE of test 2 to "build" + + It can also override all temp variables. + -D USE_TEMP_DIR:=1 + Will override all variables that use + "USE_TEMP_DIR=" + +EOF +; +} + +while ( $#ARGV >= 0 ) { + if ( $ARGV[0] eq "-D" ) { + shift; + die_usage if ($#ARGV < 1); + my $val = shift; + + if ($val =~ m/(.*?):=(.*)$/) { + set_variable($1, $2, 1); + } else { + $command_vars[$#command_vars + 1] = $val; + } + + } elsif ( $ARGV[0] =~ m/^-D(.*)/) { + my $val = $1; + shift; + + if ($val =~ m/(.*?):=(.*)$/) { + set_variable($1, $2, 1); + } else { + $command_vars[$#command_vars + 1] = $val; + } + } elsif ( $ARGV[0] eq "-h" ) { + die_usage; + } else { + last; + } +} +$#ARGV < 1 or die_usage; if ($#ARGV == 0) { $ktest_config = $ARGV[0]; if (! -f $ktest_config) { @@ -4466,6 +4572,10 @@ for (my $i = 1; $i <= $opt{"NUM_TESTS"}; $i++) { doprint "RUNNING TEST $i of $opt{NUM_TESTS}$name with option $test_type $run_type$installme\n\n"; + # Always show which build directory and output directory is being used + doprint "BUILD_DIR=$builddir\n"; + doprint "OUTPUT_DIR=$outputdir\n\n"; + if (defined($pre_test)) { my $ret = run_command $pre_test; if (!$ret && defined($pre_test_die) && diff --git a/tools/testing/ktest/sample.conf b/tools/testing/ktest/sample.conf index f43477a9b857..9c4c449a8f3e 100644 --- a/tools/testing/ktest/sample.conf +++ b/tools/testing/ktest/sample.conf @@ -1017,6 +1017,8 @@ # Note, PATCHCHECK_CHERRY requires PATCHCHECK_END to be defined. # (default 0) # +# PATCHCHECK_SKIP is an optional list of shas to skip testing +# # PATCHCHECK_TYPE is required and is the type of test to run: # build, boot, test. # diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile index 339b31e6a6b5..030da61dbff3 100644 --- a/tools/testing/selftests/Makefile +++ b/tools/testing/selftests/Makefile @@ -77,6 +77,7 @@ TARGETS += net/ovpn TARGETS += net/packetdrill TARGETS += net/rds TARGETS += net/tcp_ao +TARGETS += nolibc TARGETS += nsfs TARGETS += pci_endpoint TARGETS += pcie_bwctrl @@ -293,6 +294,14 @@ ifdef INSTALL_PATH $(MAKE) -s --no-print-directory OUTPUT=$$BUILD_TARGET COLLECTION=$$TARGET \ -C $$TARGET emit_tests >> $(TEST_LIST); \ done; + @VERSION=$$(git describe HEAD 2>/dev/null); \ + if [ -n "$$VERSION" ]; then \ + echo "$$VERSION" > $(INSTALL_PATH)/VERSION; \ + printf "Version saved to $(INSTALL_PATH)/VERSION\n"; \ + else \ + printf "Unable to get version from git describe\n"; \ + fi + @echo "**Kselftest Installation is complete: $(INSTALL_PATH)**" else $(error Error: set INSTALL_PATH to use install) endif diff --git a/tools/testing/selftests/arm64/abi/Makefile b/tools/testing/selftests/arm64/abi/Makefile index a6d30c620908..483488f8c2ad 100644 --- a/tools/testing/selftests/arm64/abi/Makefile +++ b/tools/testing/selftests/arm64/abi/Makefile @@ -12,4 +12,4 @@ $(OUTPUT)/syscall-abi: syscall-abi.c syscall-abi-asm.S $(OUTPUT)/tpidr2: tpidr2.c $(CC) -fno-asynchronous-unwind-tables -fno-ident -s -Os -nostdlib \ -static -include ../../../../include/nolibc/nolibc.h \ - -ffreestanding -Wall $^ -o $@ -lgcc + -I../.. -ffreestanding -Wall $^ -o $@ -lgcc diff --git a/tools/testing/selftests/arm64/abi/hwcap.c b/tools/testing/selftests/arm64/abi/hwcap.c index 35f521e5f41c..002ec38a8bbb 100644 --- a/tools/testing/selftests/arm64/abi/hwcap.c +++ b/tools/testing/selftests/arm64/abi/hwcap.c @@ -21,6 +21,10 @@ #define TESTS_PER_HWCAP 3 +#ifndef AT_HWCAP3 +#define AT_HWCAP3 29 +#endif + /* * Function expected to generate exception when the feature is not * supported and return when it is supported. If the specific exception @@ -1098,6 +1102,18 @@ static const struct hwcap_data { .sigill_fn = hbc_sigill, .sigill_reliable = true, }, + { + .name = "MTE_FAR", + .at_hwcap = AT_HWCAP3, + .hwcap_bit = HWCAP3_MTE_FAR, + .cpuinfo = "mtefar", + }, + { + .name = "MTE_STOREONLY", + .at_hwcap = AT_HWCAP3, + .hwcap_bit = HWCAP3_MTE_STORE_ONLY, + .cpuinfo = "mtestoreonly", + }, }; typedef void (*sighandler_fn)(int, siginfo_t *, void *); diff --git a/tools/testing/selftests/arm64/abi/tpidr2.c b/tools/testing/selftests/arm64/abi/tpidr2.c index eb19dcc37a75..f58a9f89b952 100644 --- a/tools/testing/selftests/arm64/abi/tpidr2.c +++ b/tools/testing/selftests/arm64/abi/tpidr2.c @@ -3,31 +3,12 @@ #include <linux/sched.h> #include <linux/wait.h> +#include "kselftest.h" + #define SYS_TPIDR2 "S3_3_C13_C0_5" #define EXPECTED_TESTS 5 -static void putstr(const char *str) -{ - write(1, str, strlen(str)); -} - -static void putnum(unsigned int num) -{ - char c; - - if (num / 10) - putnum(num / 10); - - c = '0' + (num % 10); - write(1, &c, 1); -} - -static int tests_run; -static int tests_passed; -static int tests_failed; -static int tests_skipped; - static void set_tpidr2(uint64_t val) { asm volatile ( @@ -50,20 +31,6 @@ static uint64_t get_tpidr2(void) return val; } -static void print_summary(void) -{ - if (tests_passed + tests_failed + tests_skipped != EXPECTED_TESTS) - putstr("# UNEXPECTED TEST COUNT: "); - - putstr("# Totals: pass:"); - putnum(tests_passed); - putstr(" fail:"); - putnum(tests_failed); - putstr(" xfail:0 xpass:0 skip:"); - putnum(tests_skipped); - putstr(" error:0\n"); -} - /* Processes should start with TPIDR2 == 0 */ static int default_value(void) { @@ -105,9 +72,8 @@ static int write_fork_read(void) if (newpid == 0) { /* In child */ if (get_tpidr2() != oldpid) { - putstr("# TPIDR2 changed in child: "); - putnum(get_tpidr2()); - putstr("\n"); + ksft_print_msg("TPIDR2 changed in child: %llx\n", + get_tpidr2()); exit(0); } @@ -115,14 +81,12 @@ static int write_fork_read(void) if (get_tpidr2() == getpid()) { exit(1); } else { - putstr("# Failed to set TPIDR2 in child\n"); + ksft_print_msg("Failed to set TPIDR2 in child\n"); exit(0); } } if (newpid < 0) { - putstr("# fork() failed: -"); - putnum(-newpid); - putstr("\n"); + ksft_print_msg("fork() failed: %d\n", newpid); return 0; } @@ -132,23 +96,22 @@ static int write_fork_read(void) if (waiting < 0) { if (errno == EINTR) continue; - putstr("# waitpid() failed: "); - putnum(errno); - putstr("\n"); + ksft_print_msg("waitpid() failed: %d\n", errno); return 0; } if (waiting != newpid) { - putstr("# waitpid() returned wrong PID\n"); + ksft_print_msg("waitpid() returned wrong PID: %d != %d\n", + waiting, newpid); return 0; } if (!WIFEXITED(status)) { - putstr("# child did not exit\n"); + ksft_print_msg("child did not exit\n"); return 0; } if (getpid() != get_tpidr2()) { - putstr("# TPIDR2 corrupted in parent\n"); + ksft_print_msg("TPIDR2 corrupted in parent\n"); return 0; } @@ -188,35 +151,32 @@ static int write_clone_read(void) stack = malloc(__STACK_SIZE); if (!stack) { - putstr("# malloc() failed\n"); + ksft_print_msg("malloc() failed\n"); return 0; } ret = sys_clone(CLONE_VM, (unsigned long)stack + __STACK_SIZE, &parent_tid, 0, &child_tid); if (ret == -1) { - putstr("# clone() failed\n"); - putnum(errno); - putstr("\n"); + ksft_print_msg("clone() failed: %d\n", errno); return 0; } if (ret == 0) { /* In child */ if (get_tpidr2() != 0) { - putstr("# TPIDR2 non-zero in child: "); - putnum(get_tpidr2()); - putstr("\n"); + ksft_print_msg("TPIDR2 non-zero in child: %llx\n", + get_tpidr2()); exit(0); } if (gettid() == 0) - putstr("# Child TID==0\n"); + ksft_print_msg("Child TID==0\n"); set_tpidr2(gettid()); if (get_tpidr2() == gettid()) { exit(1); } else { - putstr("# Failed to set TPIDR2 in child\n"); + ksft_print_msg("Failed to set TPIDR2 in child\n"); exit(0); } } @@ -227,25 +187,22 @@ static int write_clone_read(void) if (waiting < 0) { if (errno == EINTR) continue; - putstr("# wait4() failed: "); - putnum(errno); - putstr("\n"); + ksft_print_msg("wait4() failed: %d\n", errno); return 0; } if (waiting != ret) { - putstr("# wait4() returned wrong PID "); - putnum(waiting); - putstr("\n"); + ksft_print_msg("wait4() returned wrong PID %d\n", + waiting); return 0; } if (!WIFEXITED(status)) { - putstr("# child did not exit\n"); + ksft_print_msg("child did not exit\n"); return 0; } if (parent != get_tpidr2()) { - putstr("# TPIDR2 corrupted in parent\n"); + ksft_print_msg("TPIDR2 corrupted in parent\n"); return 0; } @@ -253,35 +210,14 @@ static int write_clone_read(void) } } -#define run_test(name) \ - if (name()) { \ - tests_passed++; \ - } else { \ - tests_failed++; \ - putstr("not "); \ - } \ - putstr("ok "); \ - putnum(++tests_run); \ - putstr(" " #name "\n"); - -#define skip_test(name) \ - tests_skipped++; \ - putstr("ok "); \ - putnum(++tests_run); \ - putstr(" # SKIP " #name "\n"); - int main(int argc, char **argv) { int ret; - putstr("TAP version 13\n"); - putstr("1.."); - putnum(EXPECTED_TESTS); - putstr("\n"); + ksft_print_header(); + ksft_set_plan(5); - putstr("# PID: "); - putnum(getpid()); - putstr("\n"); + ksft_print_msg("PID: %d\n", getpid()); /* * This test is run with nolibc which doesn't support hwcap and @@ -290,23 +226,21 @@ int main(int argc, char **argv) */ ret = open("/proc/sys/abi/sme_default_vector_length", O_RDONLY, 0); if (ret >= 0) { - run_test(default_value); - run_test(write_read); - run_test(write_sleep_read); - run_test(write_fork_read); - run_test(write_clone_read); + ksft_test_result(default_value(), "default_value\n"); + ksft_test_result(write_read, "write_read\n"); + ksft_test_result(write_sleep_read, "write_sleep_read\n"); + ksft_test_result(write_fork_read, "write_fork_read\n"); + ksft_test_result(write_clone_read, "write_clone_read\n"); } else { - putstr("# SME support not present\n"); + ksft_print_msg("SME support not present\n"); - skip_test(default_value); - skip_test(write_read); - skip_test(write_sleep_read); - skip_test(write_fork_read); - skip_test(write_clone_read); + ksft_test_result_skip("default_value\n"); + ksft_test_result_skip("write_read\n"); + ksft_test_result_skip("write_sleep_read\n"); + ksft_test_result_skip("write_fork_read\n"); + ksft_test_result_skip("write_clone_read\n"); } - print_summary(); - - return 0; + ksft_finished(); } diff --git a/tools/testing/selftests/arm64/fp/fp-ptrace.c b/tools/testing/selftests/arm64/fp/fp-ptrace.c index 191c47ca0ed8..124bc883365e 100644 --- a/tools/testing/selftests/arm64/fp/fp-ptrace.c +++ b/tools/testing/selftests/arm64/fp/fp-ptrace.c @@ -1061,11 +1061,31 @@ static bool sve_write_supported(struct test_config *config) if (config->sme_vl_in != config->sme_vl_expected) { return false; } + + if (!sve_supported()) + return false; } return true; } +static bool sve_write_fpsimd_supported(struct test_config *config) +{ + if (!sve_supported()) + return false; + + if ((config->svcr_in & SVCR_ZA) != (config->svcr_expected & SVCR_ZA)) + return false; + + if (config->svcr_expected & SVCR_SM) + return false; + + if (config->sme_vl_in != config->sme_vl_expected) + return false; + + return true; +} + static void fpsimd_write_expected(struct test_config *config) { int vl; @@ -1134,6 +1154,9 @@ static void sve_write_expected(struct test_config *config) int vl = vl_expected(config); int sme_vq = __sve_vq_from_vl(config->sme_vl_expected); + if (!vl) + return; + fill_random(z_expected, __SVE_ZREGS_SIZE(__sve_vq_from_vl(vl))); fill_random(p_expected, __SVE_PREGS_SIZE(__sve_vq_from_vl(vl))); @@ -1152,7 +1175,7 @@ static void sve_write_expected(struct test_config *config) } } -static void sve_write(pid_t child, struct test_config *config) +static void sve_write_sve(pid_t child, struct test_config *config) { struct user_sve_header *sve; struct iovec iov; @@ -1161,6 +1184,9 @@ static void sve_write(pid_t child, struct test_config *config) vl = vl_expected(config); vq = __sve_vq_from_vl(vl); + if (!vl) + return; + iov.iov_len = SVE_PT_SVE_OFFSET + SVE_PT_SVE_SIZE(vq, SVE_PT_REGS_SVE); iov.iov_base = malloc(iov.iov_len); if (!iov.iov_base) { @@ -1195,6 +1221,45 @@ static void sve_write(pid_t child, struct test_config *config) free(iov.iov_base); } +static void sve_write_fpsimd(pid_t child, struct test_config *config) +{ + struct user_sve_header *sve; + struct user_fpsimd_state *fpsimd; + struct iovec iov; + int ret, vl, vq; + + vl = vl_expected(config); + vq = __sve_vq_from_vl(vl); + + if (!vl) + return; + + iov.iov_len = SVE_PT_SVE_OFFSET + SVE_PT_SVE_SIZE(vq, + SVE_PT_REGS_FPSIMD); + iov.iov_base = malloc(iov.iov_len); + if (!iov.iov_base) { + ksft_print_msg("Failed allocating %lu byte SVE write buffer\n", + iov.iov_len); + return; + } + memset(iov.iov_base, 0, iov.iov_len); + + sve = iov.iov_base; + sve->size = iov.iov_len; + sve->flags = SVE_PT_REGS_FPSIMD; + sve->vl = vl; + + fpsimd = iov.iov_base + SVE_PT_REGS_OFFSET; + memcpy(&fpsimd->vregs, v_expected, sizeof(v_expected)); + + ret = ptrace(PTRACE_SETREGSET, child, NT_ARM_SVE, &iov); + if (ret != 0) + ksft_print_msg("Failed to write SVE: %s (%d)\n", + strerror(errno), errno); + + free(iov.iov_base); +} + static bool za_write_supported(struct test_config *config) { if ((config->svcr_in & SVCR_SM) != (config->svcr_expected & SVCR_SM)) @@ -1386,7 +1451,13 @@ static struct test_definition sve_test_defs[] = { .name = "SVE write", .supported = sve_write_supported, .set_expected_values = sve_write_expected, - .modify_values = sve_write, + .modify_values = sve_write_sve, + }, + { + .name = "SVE write FPSIMD format", + .supported = sve_write_fpsimd_supported, + .set_expected_values = fpsimd_write_expected, + .modify_values = sve_write_fpsimd, }, }; @@ -1607,7 +1678,7 @@ int main(void) * Run the test set if there is no SVE or SME, with those we * have to pick a VL for each run. */ - if (!sve_supported()) { + if (!sve_supported() && !sme_supported()) { test_config.sve_vl_in = 0; test_config.sve_vl_expected = 0; test_config.sme_vl_in = 0; diff --git a/tools/testing/selftests/arm64/fp/sve-ptrace.c b/tools/testing/selftests/arm64/fp/sve-ptrace.c index 577b6e05e860..b22303778fb0 100644 --- a/tools/testing/selftests/arm64/fp/sve-ptrace.c +++ b/tools/testing/selftests/arm64/fp/sve-ptrace.c @@ -170,7 +170,7 @@ static void ptrace_set_get_inherit(pid_t child, const struct vec_type *type) memset(&sve, 0, sizeof(sve)); sve.size = sizeof(sve); sve.vl = sve_vl_from_vq(SVE_VQ_MIN); - sve.flags = SVE_PT_VL_INHERIT; + sve.flags = SVE_PT_VL_INHERIT | SVE_PT_REGS_SVE; ret = set_sve(child, type, &sve); if (ret != 0) { ksft_test_result_fail("Failed to set %s SVE_PT_VL_INHERIT\n", @@ -235,6 +235,7 @@ static void ptrace_set_get_vl(pid_t child, const struct vec_type *type, /* Set the VL by doing a set with no register payload */ memset(&sve, 0, sizeof(sve)); sve.size = sizeof(sve); + sve.flags = SVE_PT_REGS_SVE; sve.vl = vl; ret = set_sve(child, type, &sve); if (ret != 0) { @@ -253,7 +254,7 @@ static void ptrace_set_get_vl(pid_t child, const struct vec_type *type, return; } - ksft_test_result(new_sve->vl = prctl_vl, "Set %s VL %u\n", + ksft_test_result(new_sve->vl == prctl_vl, "Set %s VL %u\n", type->name, vl); free(new_sve); @@ -301,8 +302,10 @@ static void ptrace_sve_fpsimd(pid_t child, const struct vec_type *type) p[j] = j; } + /* This should only succeed for SVE */ ret = set_sve(child, type, sve); - ksft_test_result(ret == 0, "%s FPSIMD set via SVE: %d\n", + ksft_test_result((type->regset == NT_ARM_SVE) == (ret == 0), + "%s FPSIMD set via SVE: %d\n", type->name, ret); if (ret) goto out; @@ -750,9 +753,6 @@ int main(void) ksft_print_header(); ksft_set_plan(EXPECTED_TESTS); - if (!(getauxval(AT_HWCAP) & HWCAP_SVE)) - ksft_exit_skip("SVE not available\n"); - child = fork(); if (!child) return do_child(); diff --git a/tools/testing/selftests/arm64/gcs/basic-gcs.c b/tools/testing/selftests/arm64/gcs/basic-gcs.c index 3fb9742342a3..54f9c888249d 100644 --- a/tools/testing/selftests/arm64/gcs/basic-gcs.c +++ b/tools/testing/selftests/arm64/gcs/basic-gcs.c @@ -298,6 +298,68 @@ out: return pass; } +/* A vfork()ed process can run and exit */ +static bool test_vfork(void) +{ + unsigned long child_mode; + int ret, status; + pid_t pid; + bool pass = true; + + pid = vfork(); + if (pid == -1) { + ksft_print_msg("vfork() failed: %d\n", errno); + pass = false; + goto out; + } + if (pid == 0) { + /* + * In child, make sure we can call a function, read + * the GCS pointer and status and then exit. + */ + valid_gcs_function(); + get_gcspr(); + + ret = my_syscall5(__NR_prctl, PR_GET_SHADOW_STACK_STATUS, + &child_mode, 0, 0, 0); + if (ret == 0 && !(child_mode & PR_SHADOW_STACK_ENABLE)) { + ksft_print_msg("GCS not enabled in child\n"); + ret = EXIT_FAILURE; + } + + _exit(ret); + } + + /* + * In parent, check we can still do function calls then check + * on the child. + */ + valid_gcs_function(); + + ksft_print_msg("Waiting for child %d\n", pid); + + ret = waitpid(pid, &status, 0); + if (ret == -1) { + ksft_print_msg("Failed to wait for child: %d\n", + errno); + return false; + } + + if (!WIFEXITED(status)) { + ksft_print_msg("Child exited due to signal %d\n", + WTERMSIG(status)); + pass = false; + } else if (WEXITSTATUS(status)) { + ksft_print_msg("Child exited with status %d\n", + WEXITSTATUS(status)); + pass = false; + } + +out: + + return pass; +} + typedef bool (*gcs_test)(void); static struct { @@ -314,6 +376,7 @@ static struct { { "enable_invalid", enable_invalid, true }, { "map_guarded_stack", map_guarded_stack }, { "fork", test_fork }, + { "vfork", test_vfork }, }; int main(void) diff --git a/tools/testing/selftests/arm64/mte/check_buffer_fill.c b/tools/testing/selftests/arm64/mte/check_buffer_fill.c index 2ee7f114d7fa..ff4e07503349 100644 --- a/tools/testing/selftests/arm64/mte/check_buffer_fill.c +++ b/tools/testing/selftests/arm64/mte/check_buffer_fill.c @@ -31,7 +31,7 @@ static int check_buffer_by_byte(int mem_type, int mode) int i, j, item; bool err; - mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG); + mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG, false); item = ARRAY_SIZE(sizes); for (i = 0; i < item; i++) { @@ -68,7 +68,7 @@ static int check_buffer_underflow_by_byte(int mem_type, int mode, bool err; char *und_ptr = NULL; - mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG); + mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG, false); item = ARRAY_SIZE(sizes); for (i = 0; i < item; i++) { ptr = (char *)mte_allocate_memory_tag_range(sizes[i], mem_type, 0, @@ -164,7 +164,7 @@ static int check_buffer_overflow_by_byte(int mem_type, int mode, size_t tagged_size, overflow_size; char *over_ptr = NULL; - mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG); + mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG, false); item = ARRAY_SIZE(sizes); for (i = 0; i < item; i++) { ptr = (char *)mte_allocate_memory_tag_range(sizes[i], mem_type, 0, @@ -337,7 +337,7 @@ static int check_buffer_by_block(int mem_type, int mode) { int i, item, result = KSFT_PASS; - mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG); + mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG, false); item = ARRAY_SIZE(sizes); cur_mte_cxt.fault_valid = false; for (i = 0; i < item; i++) { @@ -368,7 +368,7 @@ static int check_memory_initial_tags(int mem_type, int mode, int mapping) int run, fd; int total = ARRAY_SIZE(sizes); - mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG); + mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG, false); for (run = 0; run < total; run++) { /* check initial tags for anonymous mmap */ ptr = (char *)mte_allocate_memory(sizes[run], mem_type, mapping, false); @@ -415,7 +415,7 @@ int main(int argc, char *argv[]) return err; /* Register SIGSEGV handler */ - mte_register_signal(SIGSEGV, mte_default_handler); + mte_register_signal(SIGSEGV, mte_default_handler, false); /* Set test plan */ ksft_set_plan(20); diff --git a/tools/testing/selftests/arm64/mte/check_child_memory.c b/tools/testing/selftests/arm64/mte/check_child_memory.c index 7597fc632cad..5e97ee792e4d 100644 --- a/tools/testing/selftests/arm64/mte/check_child_memory.c +++ b/tools/testing/selftests/arm64/mte/check_child_memory.c @@ -88,7 +88,7 @@ static int check_child_memory_mapping(int mem_type, int mode, int mapping) int item = ARRAY_SIZE(sizes); item = ARRAY_SIZE(sizes); - mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG); + mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG, false); for (run = 0; run < item; run++) { ptr = (char *)mte_allocate_memory_tag_range(sizes[run], mem_type, mapping, UNDERFLOW, OVERFLOW); @@ -109,7 +109,7 @@ static int check_child_file_mapping(int mem_type, int mode, int mapping) int run, fd, map_size, result = KSFT_PASS; int total = ARRAY_SIZE(sizes); - mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG); + mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG, false); for (run = 0; run < total; run++) { fd = create_temp_file(); if (fd == -1) @@ -160,8 +160,8 @@ int main(int argc, char *argv[]) return err; /* Register SIGSEGV handler */ - mte_register_signal(SIGSEGV, mte_default_handler); - mte_register_signal(SIGBUS, mte_default_handler); + mte_register_signal(SIGSEGV, mte_default_handler, false); + mte_register_signal(SIGBUS, mte_default_handler, false); /* Set test plan */ ksft_set_plan(12); diff --git a/tools/testing/selftests/arm64/mte/check_hugetlb_options.c b/tools/testing/selftests/arm64/mte/check_hugetlb_options.c index 3bfcd3848432..aad1234c7e0f 100644 --- a/tools/testing/selftests/arm64/mte/check_hugetlb_options.c +++ b/tools/testing/selftests/arm64/mte/check_hugetlb_options.c @@ -151,7 +151,7 @@ static int check_hugetlb_memory_mapping(int mem_type, int mode, int mapping, int map_size = default_huge_page_size(); - mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG); + mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG, false); map_ptr = (char *)mte_allocate_memory(map_size, mem_type, mapping, false); if (check_allocated_memory(map_ptr, map_size, mem_type, false) != KSFT_PASS) return KSFT_FAIL; @@ -180,7 +180,7 @@ static int check_clear_prot_mte_flag(int mem_type, int mode, int mapping) unsigned long map_size; prot_flag = PROT_READ | PROT_WRITE; - mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG); + mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG, false); map_size = default_huge_page_size(); map_ptr = (char *)mte_allocate_memory_tag_range(map_size, mem_type, mapping, 0, 0); @@ -210,7 +210,7 @@ static int check_child_hugetlb_memory_mapping(int mem_type, int mode, int mappin map_size = default_huge_page_size(); - mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG); + mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG, false); ptr = (char *)mte_allocate_memory_tag_range(map_size, mem_type, mapping, 0, 0); if (check_allocated_memory_range(ptr, map_size, mem_type, @@ -235,8 +235,8 @@ int main(int argc, char *argv[]) return err; /* Register signal handlers */ - mte_register_signal(SIGBUS, mte_default_handler); - mte_register_signal(SIGSEGV, mte_default_handler); + mte_register_signal(SIGBUS, mte_default_handler, false); + mte_register_signal(SIGSEGV, mte_default_handler, false); allocate_hugetlb(); diff --git a/tools/testing/selftests/arm64/mte/check_ksm_options.c b/tools/testing/selftests/arm64/mte/check_ksm_options.c index 88c74bc46d4f..0cf5faef1724 100644 --- a/tools/testing/selftests/arm64/mte/check_ksm_options.c +++ b/tools/testing/selftests/arm64/mte/check_ksm_options.c @@ -106,7 +106,7 @@ static int check_madvise_options(int mem_type, int mode, int mapping) return err; } - mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG); + mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG, false); ptr = mte_allocate_memory(TEST_UNIT * page_sz, mem_type, mapping, true); if (check_allocated_memory(ptr, TEST_UNIT * page_sz, mem_type, false) != KSFT_PASS) return KSFT_FAIL; @@ -141,8 +141,8 @@ int main(int argc, char *argv[]) return KSFT_FAIL; } /* Register signal handlers */ - mte_register_signal(SIGBUS, mte_default_handler); - mte_register_signal(SIGSEGV, mte_default_handler); + mte_register_signal(SIGBUS, mte_default_handler, false); + mte_register_signal(SIGSEGV, mte_default_handler, false); /* Set test plan */ ksft_set_plan(4); diff --git a/tools/testing/selftests/arm64/mte/check_mmap_options.c b/tools/testing/selftests/arm64/mte/check_mmap_options.c index 17694caaff53..c100af3012cb 100644 --- a/tools/testing/selftests/arm64/mte/check_mmap_options.c +++ b/tools/testing/selftests/arm64/mte/check_mmap_options.c @@ -3,6 +3,7 @@ #define _GNU_SOURCE +#include <assert.h> #include <errno.h> #include <fcntl.h> #include <signal.h> @@ -23,6 +24,35 @@ #define OVERFLOW MT_GRANULE_SIZE #define TAG_CHECK_ON 0 #define TAG_CHECK_OFF 1 +#define ATAG_CHECK_ON 1 +#define ATAG_CHECK_OFF 0 + +#define TEST_NAME_MAX 256 + +enum mte_mem_check_type { + CHECK_ANON_MEM = 0, + CHECK_FILE_MEM = 1, + CHECK_CLEAR_PROT_MTE = 2, +}; + +enum mte_tag_op_type { + TAG_OP_ALL = 0, + TAG_OP_STONLY = 1, +}; + +struct check_mmap_testcase { + int check_type; + int mem_type; + int mte_sync; + int mapping; + int tag_check; + int atag_check; + int tag_op; + bool enable_tco; +}; + +#define TAG_OP_ALL 0 +#define TAG_OP_STONLY 1 static size_t page_size; static int sizes[] = { @@ -30,8 +60,17 @@ static int sizes[] = { /* page size - 1*/ 0, /* page_size */ 0, /* page size + 1 */ 0 }; -static int check_mte_memory(char *ptr, int size, int mode, int tag_check) +static int check_mte_memory(char *ptr, int size, int mode, + int tag_check,int atag_check, int tag_op) { + char buf[MT_GRANULE_SIZE]; + + if (!mtefar_support && atag_check == ATAG_CHECK_ON) + return KSFT_SKIP; + + if (atag_check == ATAG_CHECK_ON) + ptr = mte_insert_atag(ptr); + mte_initialize_current_context(mode, (uintptr_t)ptr, size); memset(ptr, '1', size); mte_wait_after_trig(); @@ -54,16 +93,34 @@ static int check_mte_memory(char *ptr, int size, int mode, int tag_check) if (cur_mte_cxt.fault_valid == true && tag_check == TAG_CHECK_OFF) return KSFT_FAIL; + if (tag_op == TAG_OP_STONLY) { + mte_initialize_current_context(mode, (uintptr_t)ptr, -UNDERFLOW); + memcpy(buf, ptr - UNDERFLOW, MT_GRANULE_SIZE); + mte_wait_after_trig(); + if (cur_mte_cxt.fault_valid == true) + return KSFT_FAIL; + + mte_initialize_current_context(mode, (uintptr_t)ptr, size + OVERFLOW); + memcpy(buf, ptr + size, MT_GRANULE_SIZE); + mte_wait_after_trig(); + if (cur_mte_cxt.fault_valid == true) + return KSFT_FAIL; + } + return KSFT_PASS; } -static int check_anonymous_memory_mapping(int mem_type, int mode, int mapping, int tag_check) +static int check_anonymous_memory_mapping(int mem_type, int mode, int mapping, + int tag_check, int atag_check, int tag_op) { char *ptr, *map_ptr; int run, result, map_size; int item = ARRAY_SIZE(sizes); - mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG); + if (tag_op == TAG_OP_STONLY && !mtestonly_support) + return KSFT_SKIP; + + mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG, tag_op); for (run = 0; run < item; run++) { map_size = sizes[run] + OVERFLOW + UNDERFLOW; map_ptr = (char *)mte_allocate_memory(map_size, mem_type, mapping, false); @@ -79,23 +136,27 @@ static int check_anonymous_memory_mapping(int mem_type, int mode, int mapping, i munmap((void *)map_ptr, map_size); return KSFT_FAIL; } - result = check_mte_memory(ptr, sizes[run], mode, tag_check); + result = check_mte_memory(ptr, sizes[run], mode, tag_check, atag_check, tag_op); mte_clear_tags((void *)ptr, sizes[run]); mte_free_memory((void *)map_ptr, map_size, mem_type, false); - if (result == KSFT_FAIL) - return KSFT_FAIL; + if (result != KSFT_PASS) + return result; } return KSFT_PASS; } -static int check_file_memory_mapping(int mem_type, int mode, int mapping, int tag_check) +static int check_file_memory_mapping(int mem_type, int mode, int mapping, + int tag_check, int atag_check, int tag_op) { char *ptr, *map_ptr; int run, fd, map_size; int total = ARRAY_SIZE(sizes); int result = KSFT_PASS; - mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG); + if (tag_op == TAG_OP_STONLY && !mtestonly_support) + return KSFT_SKIP; + + mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG, tag_op); for (run = 0; run < total; run++) { fd = create_temp_file(); if (fd == -1) @@ -117,24 +178,24 @@ static int check_file_memory_mapping(int mem_type, int mode, int mapping, int ta close(fd); return KSFT_FAIL; } - result = check_mte_memory(ptr, sizes[run], mode, tag_check); + result = check_mte_memory(ptr, sizes[run], mode, tag_check, atag_check, tag_op); mte_clear_tags((void *)ptr, sizes[run]); munmap((void *)map_ptr, map_size); close(fd); - if (result == KSFT_FAIL) - break; + if (result != KSFT_PASS) + return result; } - return result; + return KSFT_PASS; } -static int check_clear_prot_mte_flag(int mem_type, int mode, int mapping) +static int check_clear_prot_mte_flag(int mem_type, int mode, int mapping, int atag_check) { char *ptr, *map_ptr; int run, prot_flag, result, fd, map_size; int total = ARRAY_SIZE(sizes); prot_flag = PROT_READ | PROT_WRITE; - mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG); + mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG, false); for (run = 0; run < total; run++) { map_size = sizes[run] + OVERFLOW + UNDERFLOW; ptr = (char *)mte_allocate_memory_tag_range(sizes[run], mem_type, mapping, @@ -150,10 +211,10 @@ static int check_clear_prot_mte_flag(int mem_type, int mode, int mapping) ksft_print_msg("FAIL: mprotect not ignoring clear PROT_MTE property\n"); return KSFT_FAIL; } - result = check_mte_memory(ptr, sizes[run], mode, TAG_CHECK_ON); + result = check_mte_memory(ptr, sizes[run], mode, TAG_CHECK_ON, atag_check, TAG_OP_ALL); mte_free_memory_tag_range((void *)ptr, sizes[run], mem_type, UNDERFLOW, OVERFLOW); if (result != KSFT_PASS) - return KSFT_FAIL; + return result; fd = create_temp_file(); if (fd == -1) @@ -174,19 +235,715 @@ static int check_clear_prot_mte_flag(int mem_type, int mode, int mapping) close(fd); return KSFT_FAIL; } - result = check_mte_memory(ptr, sizes[run], mode, TAG_CHECK_ON); + result = check_mte_memory(ptr, sizes[run], mode, TAG_CHECK_ON, atag_check, TAG_OP_ALL); mte_free_memory_tag_range((void *)ptr, sizes[run], mem_type, UNDERFLOW, OVERFLOW); close(fd); if (result != KSFT_PASS) - return KSFT_FAIL; + return result; } return KSFT_PASS; } +const char *format_test_name(struct check_mmap_testcase *tc) +{ + static char test_name[TEST_NAME_MAX]; + const char *check_type_str; + const char *mem_type_str; + const char *sync_str; + const char *mapping_str; + const char *tag_check_str; + const char *atag_check_str; + const char *tag_op_str; + + switch (tc->check_type) { + case CHECK_ANON_MEM: + check_type_str = "anonymous memory"; + break; + case CHECK_FILE_MEM: + check_type_str = "file memory"; + break; + case CHECK_CLEAR_PROT_MTE: + check_type_str = "clear PROT_MTE flags"; + break; + default: + assert(0); + break; + } + + switch (tc->mem_type) { + case USE_MMAP: + mem_type_str = "mmap"; + break; + case USE_MPROTECT: + mem_type_str = "mmap/mprotect"; + break; + default: + assert(0); + break; + } + + switch (tc->mte_sync) { + case MTE_NONE_ERR: + sync_str = "no error"; + break; + case MTE_SYNC_ERR: + sync_str = "sync error"; + break; + case MTE_ASYNC_ERR: + sync_str = "async error"; + break; + default: + assert(0); + break; + } + + switch (tc->mapping) { + case MAP_SHARED: + mapping_str = "shared"; + break; + case MAP_PRIVATE: + mapping_str = "private"; + break; + default: + assert(0); + break; + } + + switch (tc->tag_check) { + case TAG_CHECK_ON: + tag_check_str = "tag check on"; + break; + case TAG_CHECK_OFF: + tag_check_str = "tag check off"; + break; + default: + assert(0); + break; + } + + switch (tc->atag_check) { + case ATAG_CHECK_ON: + atag_check_str = "with address tag [63:60]"; + break; + case ATAG_CHECK_OFF: + atag_check_str = "without address tag [63:60]"; + break; + default: + assert(0); + break; + } + + snprintf(test_name, sizeof(test_name), + "Check %s with %s mapping, %s mode, %s memory and %s (%s)\n", + check_type_str, mapping_str, sync_str, mem_type_str, + tag_check_str, atag_check_str); + + switch (tc->tag_op) { + case TAG_OP_ALL: + tag_op_str = ""; + break; + case TAG_OP_STONLY: + tag_op_str = " / store-only"; + break; + default: + assert(0); + break; + } + + snprintf(test_name, TEST_NAME_MAX, + "Check %s with %s mapping, %s mode, %s memory and %s (%s%s)\n", + check_type_str, mapping_str, sync_str, mem_type_str, + tag_check_str, atag_check_str, tag_op_str); + + return test_name; +} + int main(int argc, char *argv[]) { - int err; + int err, i; int item = ARRAY_SIZE(sizes); + struct check_mmap_testcase test_cases[]= { + { + .check_type = CHECK_ANON_MEM, + .mem_type = USE_MMAP, + .mte_sync = MTE_SYNC_ERR, + .mapping = MAP_PRIVATE, + .tag_check = TAG_CHECK_OFF, + .atag_check = ATAG_CHECK_OFF, + .tag_op = TAG_OP_ALL, + .enable_tco = true, + }, + { + .check_type = CHECK_FILE_MEM, + .mem_type = USE_MPROTECT, + .mte_sync = MTE_SYNC_ERR, + .mapping = MAP_PRIVATE, + .tag_check = TAG_CHECK_OFF, + .atag_check = ATAG_CHECK_OFF, + .tag_op = TAG_OP_ALL, + .enable_tco = true, + }, + { + .check_type = CHECK_ANON_MEM, + .mem_type = USE_MMAP, + .mte_sync = MTE_NONE_ERR, + .mapping = MAP_PRIVATE, + .tag_check = TAG_CHECK_OFF, + .atag_check = ATAG_CHECK_OFF, + .tag_op = TAG_OP_ALL, + .enable_tco = false, + }, + { + .check_type = CHECK_FILE_MEM, + .mem_type = USE_MPROTECT, + .mte_sync = MTE_NONE_ERR, + .mapping = MAP_PRIVATE, + .tag_check = TAG_CHECK_OFF, + .atag_check = ATAG_CHECK_OFF, + .tag_op = TAG_OP_ALL, + .enable_tco = false, + }, + { + .check_type = CHECK_ANON_MEM, + .mem_type = USE_MMAP, + .mte_sync = MTE_SYNC_ERR, + .mapping = MAP_PRIVATE, + .tag_check = TAG_CHECK_ON, + .atag_check = ATAG_CHECK_OFF, + .tag_op = TAG_OP_ALL, + .enable_tco = false, + }, + { + .check_type = CHECK_ANON_MEM, + .mem_type = USE_MPROTECT, + .mte_sync = MTE_SYNC_ERR, + .mapping = MAP_PRIVATE, + .tag_check = TAG_CHECK_ON, + .atag_check = ATAG_CHECK_OFF, + .tag_op = TAG_OP_ALL, + .enable_tco = false, + }, + { + .check_type = CHECK_ANON_MEM, + .mem_type = USE_MMAP, + .mte_sync = MTE_SYNC_ERR, + .mapping = MAP_SHARED, + .tag_check = TAG_CHECK_ON, + .atag_check = ATAG_CHECK_OFF, + .tag_op = TAG_OP_ALL, + .enable_tco = false, + }, + { + .check_type = CHECK_ANON_MEM, + .mem_type = USE_MPROTECT, + .mte_sync = MTE_SYNC_ERR, + .mapping = MAP_SHARED, + .tag_check = TAG_CHECK_ON, + .atag_check = ATAG_CHECK_OFF, + .tag_op = TAG_OP_ALL, + .enable_tco = false, + }, + { + .check_type = CHECK_ANON_MEM, + .mem_type = USE_MMAP, + .mte_sync = MTE_ASYNC_ERR, + .mapping = MAP_PRIVATE, + .tag_check = TAG_CHECK_ON, + .atag_check = ATAG_CHECK_OFF, + .tag_op = TAG_OP_ALL, + .enable_tco = false, + }, + { + .check_type = CHECK_ANON_MEM, + .mem_type = USE_MPROTECT, + .mte_sync = MTE_ASYNC_ERR, + .mapping = MAP_PRIVATE, + .tag_check = TAG_CHECK_ON, + .atag_check = ATAG_CHECK_OFF, + .tag_op = TAG_OP_ALL, + .enable_tco = false, + }, + { + .check_type = CHECK_ANON_MEM, + .mem_type = USE_MMAP, + .mte_sync = MTE_ASYNC_ERR, + .mapping = MAP_SHARED, + .tag_check = TAG_CHECK_ON, + .atag_check = ATAG_CHECK_OFF, + .tag_op = TAG_OP_ALL, + .enable_tco = false, + }, + { + .check_type = CHECK_ANON_MEM, + .mem_type = USE_MPROTECT, + .mte_sync = MTE_ASYNC_ERR, + .mapping = MAP_SHARED, + .tag_check = TAG_CHECK_ON, + .atag_check = ATAG_CHECK_OFF, + .tag_op = TAG_OP_ALL, + .enable_tco = false, + }, + { + .check_type = CHECK_FILE_MEM, + .mem_type = USE_MMAP, + .mte_sync = MTE_SYNC_ERR, + .mapping = MAP_PRIVATE, + .tag_check = TAG_CHECK_ON, + .atag_check = ATAG_CHECK_OFF, + .tag_op = TAG_OP_ALL, + .enable_tco = false, + }, + { + .check_type = CHECK_FILE_MEM, + .mem_type = USE_MPROTECT, + .mte_sync = MTE_SYNC_ERR, + .mapping = MAP_PRIVATE, + .tag_check = TAG_CHECK_ON, + .atag_check = ATAG_CHECK_OFF, + .tag_op = TAG_OP_ALL, + .enable_tco = false, + }, + { + .check_type = CHECK_FILE_MEM, + .mem_type = USE_MMAP, + .mte_sync = MTE_SYNC_ERR, + .mapping = MAP_SHARED, + .tag_check = TAG_CHECK_ON, + .atag_check = ATAG_CHECK_OFF, + .tag_op = TAG_OP_ALL, + .enable_tco = false, + }, + { + .check_type = CHECK_FILE_MEM, + .mem_type = USE_MPROTECT, + .mte_sync = MTE_SYNC_ERR, + .mapping = MAP_SHARED, + .tag_check = TAG_CHECK_ON, + .atag_check = ATAG_CHECK_OFF, + .tag_op = TAG_OP_ALL, + .enable_tco = false, + }, + { + .check_type = CHECK_FILE_MEM, + .mem_type = USE_MMAP, + .mte_sync = MTE_ASYNC_ERR, + .mapping = MAP_PRIVATE, + .tag_check = TAG_CHECK_ON, + .atag_check = ATAG_CHECK_OFF, + .tag_op = TAG_OP_ALL, + .enable_tco = false, + }, + { + .check_type = CHECK_FILE_MEM, + .mem_type = USE_MPROTECT, + .mte_sync = MTE_ASYNC_ERR, + .mapping = MAP_PRIVATE, + .tag_check = TAG_CHECK_ON, + .atag_check = ATAG_CHECK_OFF, + .tag_op = TAG_OP_ALL, + .enable_tco = false, + }, + { + .check_type = CHECK_FILE_MEM, + .mem_type = USE_MMAP, + .mte_sync = MTE_ASYNC_ERR, + .mapping = MAP_SHARED, + .tag_check = TAG_CHECK_ON, + .atag_check = ATAG_CHECK_OFF, + .tag_op = TAG_OP_ALL, + .enable_tco = false, + }, + { + .check_type = CHECK_FILE_MEM, + .mem_type = USE_MPROTECT, + .mte_sync = MTE_ASYNC_ERR, + .mapping = MAP_SHARED, + .tag_check = TAG_CHECK_ON, + .atag_check = ATAG_CHECK_OFF, + .tag_op = TAG_OP_ALL, + .enable_tco = false, + }, + { + .check_type = CHECK_CLEAR_PROT_MTE, + .mem_type = USE_MMAP, + .mte_sync = MTE_SYNC_ERR, + .mapping = MAP_PRIVATE, + .tag_check = TAG_CHECK_ON, + .atag_check = ATAG_CHECK_OFF, + .tag_op = TAG_OP_ALL, + .enable_tco = false, + }, + { + .check_type = CHECK_CLEAR_PROT_MTE, + .mem_type = USE_MPROTECT, + .mte_sync = MTE_SYNC_ERR, + .mapping = MAP_PRIVATE, + .tag_check = TAG_CHECK_ON, + .atag_check = ATAG_CHECK_OFF, + .tag_op = TAG_OP_ALL, + .enable_tco = false, + }, + { + .check_type = CHECK_ANON_MEM, + .mem_type = USE_MMAP, + .mte_sync = MTE_SYNC_ERR, + .mapping = MAP_PRIVATE, + .tag_check = TAG_CHECK_ON, + .atag_check = ATAG_CHECK_ON, + .tag_op = TAG_OP_ALL, + .enable_tco = false, + }, + { + .check_type = CHECK_ANON_MEM, + .mem_type = USE_MPROTECT, + .mte_sync = MTE_SYNC_ERR, + .mapping = MAP_PRIVATE, + .tag_check = TAG_CHECK_ON, + .atag_check = ATAG_CHECK_ON, + .tag_op = TAG_OP_ALL, + .enable_tco = false, + }, + { + .check_type = CHECK_ANON_MEM, + .mem_type = USE_MMAP, + .mte_sync = MTE_SYNC_ERR, + .mapping = MAP_SHARED, + .tag_check = TAG_CHECK_ON, + .atag_check = ATAG_CHECK_ON, + .tag_op = TAG_OP_ALL, + .enable_tco = false, + }, + { + .check_type = CHECK_ANON_MEM, + .mem_type = USE_MPROTECT, + .mte_sync = MTE_SYNC_ERR, + .mapping = MAP_SHARED, + .tag_check = TAG_CHECK_ON, + .atag_check = ATAG_CHECK_ON, + .tag_op = TAG_OP_ALL, + .enable_tco = false, + }, + { + .check_type = CHECK_FILE_MEM, + .mem_type = USE_MMAP, + .mte_sync = MTE_SYNC_ERR, + .mapping = MAP_PRIVATE, + .tag_check = TAG_CHECK_ON, + .atag_check = ATAG_CHECK_ON, + .tag_op = TAG_OP_ALL, + .enable_tco = false, + }, + { + .check_type = CHECK_FILE_MEM, + .mem_type = USE_MPROTECT, + .mte_sync = MTE_SYNC_ERR, + .mapping = MAP_PRIVATE, + .tag_check = TAG_CHECK_ON, + .atag_check = ATAG_CHECK_ON, + .tag_op = TAG_OP_ALL, + .enable_tco = false, + }, + { + .check_type = CHECK_FILE_MEM, + .mem_type = USE_MMAP, + .mte_sync = MTE_SYNC_ERR, + .mapping = MAP_SHARED, + .tag_check = TAG_CHECK_ON, + .atag_check = ATAG_CHECK_ON, + .tag_op = TAG_OP_ALL, + .enable_tco = false, + }, + { + .check_type = CHECK_FILE_MEM, + .mem_type = USE_MPROTECT, + .mte_sync = MTE_SYNC_ERR, + .mapping = MAP_SHARED, + .tag_check = TAG_CHECK_ON, + .atag_check = ATAG_CHECK_ON, + .tag_op = TAG_OP_ALL, + .enable_tco = false, + }, + { + .check_type = CHECK_FILE_MEM, + .mem_type = USE_MMAP, + .mte_sync = MTE_ASYNC_ERR, + .mapping = MAP_PRIVATE, + .tag_check = TAG_CHECK_ON, + .atag_check = ATAG_CHECK_ON, + .tag_op = TAG_OP_ALL, + .enable_tco = false, + }, + { + .check_type = CHECK_ANON_MEM, + .mem_type = USE_MMAP, + .mte_sync = MTE_SYNC_ERR, + .mapping = MAP_PRIVATE, + .tag_check = TAG_CHECK_ON, + .atag_check = ATAG_CHECK_OFF, + .tag_op = TAG_OP_STONLY, + .enable_tco = false, + }, + { + .check_type = CHECK_ANON_MEM, + .mem_type = USE_MPROTECT, + .mte_sync = MTE_SYNC_ERR, + .mapping = MAP_PRIVATE, + .tag_check = TAG_CHECK_ON, + .atag_check = ATAG_CHECK_OFF, + .tag_op = TAG_OP_STONLY, + .enable_tco = false, + }, + { + .check_type = CHECK_ANON_MEM, + .mem_type = USE_MMAP, + .mte_sync = MTE_SYNC_ERR, + .mapping = MAP_SHARED, + .tag_check = TAG_CHECK_ON, + .atag_check = ATAG_CHECK_OFF, + .tag_op = TAG_OP_STONLY, + .enable_tco = false, + }, + { + .check_type = CHECK_ANON_MEM, + .mem_type = USE_MPROTECT, + .mte_sync = MTE_SYNC_ERR, + .mapping = MAP_SHARED, + .tag_check = TAG_CHECK_ON, + .atag_check = ATAG_CHECK_OFF, + .tag_op = TAG_OP_STONLY, + .enable_tco = false, + }, + { + .check_type = CHECK_ANON_MEM, + .mem_type = USE_MMAP, + .mte_sync = MTE_ASYNC_ERR, + .mapping = MAP_PRIVATE, + .tag_check = TAG_CHECK_ON, + .atag_check = ATAG_CHECK_OFF, + .tag_op = TAG_OP_STONLY, + .enable_tco = false, + }, + { + .check_type = CHECK_ANON_MEM, + .mem_type = USE_MPROTECT, + .mte_sync = MTE_ASYNC_ERR, + .mapping = MAP_PRIVATE, + .tag_check = TAG_CHECK_ON, + .atag_check = ATAG_CHECK_OFF, + .tag_op = TAG_OP_STONLY, + .enable_tco = false, + }, + { + .check_type = CHECK_ANON_MEM, + .mem_type = USE_MMAP, + .mte_sync = MTE_ASYNC_ERR, + .mapping = MAP_SHARED, + .tag_check = TAG_CHECK_ON, + .atag_check = ATAG_CHECK_OFF, + .tag_op = TAG_OP_STONLY, + .enable_tco = false, + }, + { + .check_type = CHECK_ANON_MEM, + .mem_type = USE_MPROTECT, + .mte_sync = MTE_ASYNC_ERR, + .mapping = MAP_SHARED, + .tag_check = TAG_CHECK_ON, + .atag_check = ATAG_CHECK_OFF, + .tag_op = TAG_OP_STONLY, + .enable_tco = false, + }, + { + .check_type = CHECK_FILE_MEM, + .mem_type = USE_MMAP, + .mte_sync = MTE_SYNC_ERR, + .mapping = MAP_PRIVATE, + .tag_check = TAG_CHECK_ON, + .atag_check = ATAG_CHECK_OFF, + .tag_op = TAG_OP_STONLY, + .enable_tco = false, + }, + { + .check_type = CHECK_FILE_MEM, + .mem_type = USE_MPROTECT, + .mte_sync = MTE_SYNC_ERR, + .mapping = MAP_PRIVATE, + .tag_check = TAG_CHECK_ON, + .atag_check = ATAG_CHECK_OFF, + .tag_op = TAG_OP_STONLY, + .enable_tco = false, + }, + { + .check_type = CHECK_FILE_MEM, + .mem_type = USE_MMAP, + .mte_sync = MTE_SYNC_ERR, + .mapping = MAP_SHARED, + .tag_check = TAG_CHECK_ON, + .atag_check = ATAG_CHECK_OFF, + .tag_op = TAG_OP_STONLY, + .enable_tco = false, + }, + { + .check_type = CHECK_FILE_MEM, + .mem_type = USE_MPROTECT, + .mte_sync = MTE_SYNC_ERR, + .mapping = MAP_SHARED, + .tag_check = TAG_CHECK_ON, + .atag_check = ATAG_CHECK_OFF, + .tag_op = TAG_OP_STONLY, + .enable_tco = false, + }, + { + .check_type = CHECK_FILE_MEM, + .mem_type = USE_MMAP, + .mte_sync = MTE_ASYNC_ERR, + .mapping = MAP_PRIVATE, + .tag_check = TAG_CHECK_ON, + .atag_check = ATAG_CHECK_OFF, + .tag_op = TAG_OP_STONLY, + .enable_tco = false, + }, + { + .check_type = CHECK_FILE_MEM, + .mem_type = USE_MPROTECT, + .mte_sync = MTE_ASYNC_ERR, + .mapping = MAP_PRIVATE, + .tag_check = TAG_CHECK_ON, + .atag_check = ATAG_CHECK_OFF, + .tag_op = TAG_OP_STONLY, + .enable_tco = false, + }, + { + .check_type = CHECK_FILE_MEM, + .mem_type = USE_MMAP, + .mte_sync = MTE_ASYNC_ERR, + .mapping = MAP_SHARED, + .tag_check = TAG_CHECK_ON, + .atag_check = ATAG_CHECK_OFF, + .tag_op = TAG_OP_STONLY, + .enable_tco = false, + }, + { + .check_type = CHECK_FILE_MEM, + .mem_type = USE_MPROTECT, + .mte_sync = MTE_ASYNC_ERR, + .mapping = MAP_SHARED, + .tag_check = TAG_CHECK_ON, + .atag_check = ATAG_CHECK_OFF, + .tag_op = TAG_OP_STONLY, + .enable_tco = false, + }, + { + .check_type = CHECK_ANON_MEM, + .mem_type = USE_MMAP, + .mte_sync = MTE_SYNC_ERR, + .mapping = MAP_PRIVATE, + .tag_check = TAG_CHECK_ON, + .atag_check = ATAG_CHECK_ON, + .tag_op = TAG_OP_STONLY, + .enable_tco = false, + }, + { + .check_type = CHECK_ANON_MEM, + .mem_type = USE_MPROTECT, + .mte_sync = MTE_SYNC_ERR, + .mapping = MAP_PRIVATE, + .tag_check = TAG_CHECK_ON, + .atag_check = ATAG_CHECK_ON, + .tag_op = TAG_OP_STONLY, + .enable_tco = false, + }, + { + .check_type = CHECK_ANON_MEM, + .mem_type = USE_MMAP, + .mte_sync = MTE_SYNC_ERR, + .mapping = MAP_SHARED, + .tag_check = TAG_CHECK_ON, + .atag_check = ATAG_CHECK_ON, + .tag_op = TAG_OP_STONLY, + .enable_tco = false, + }, + { + .check_type = CHECK_ANON_MEM, + .mem_type = USE_MPROTECT, + .mte_sync = MTE_SYNC_ERR, + .mapping = MAP_SHARED, + .tag_check = TAG_CHECK_ON, + .atag_check = ATAG_CHECK_ON, + .tag_op = TAG_OP_STONLY, + .enable_tco = false, + }, + { + .check_type = CHECK_FILE_MEM, + .mem_type = USE_MMAP, + .mte_sync = MTE_SYNC_ERR, + .mapping = MAP_PRIVATE, + .tag_check = TAG_CHECK_ON, + .atag_check = ATAG_CHECK_ON, + .tag_op = TAG_OP_STONLY, + .enable_tco = false, + }, + { + .check_type = CHECK_FILE_MEM, + .mem_type = USE_MPROTECT, + .mte_sync = MTE_SYNC_ERR, + .mapping = MAP_PRIVATE, + .tag_check = TAG_CHECK_ON, + .atag_check = ATAG_CHECK_ON, + .tag_op = TAG_OP_STONLY, + .enable_tco = false, + }, + { + .check_type = CHECK_FILE_MEM, + .mem_type = USE_MMAP, + .mte_sync = MTE_SYNC_ERR, + .mapping = MAP_SHARED, + .tag_check = TAG_CHECK_ON, + .atag_check = ATAG_CHECK_ON, + .tag_op = TAG_OP_STONLY, + .enable_tco = false, + }, + { + .check_type = CHECK_FILE_MEM, + .mem_type = USE_MPROTECT, + .mte_sync = MTE_SYNC_ERR, + .mapping = MAP_SHARED, + .tag_check = TAG_CHECK_ON, + .atag_check = ATAG_CHECK_ON, + .tag_op = TAG_OP_STONLY, + .enable_tco = false, + }, + { + .check_type = CHECK_FILE_MEM, + .mem_type = USE_MMAP, + .mte_sync = MTE_ASYNC_ERR, + .mapping = MAP_PRIVATE, + .tag_check = TAG_CHECK_ON, + .atag_check = ATAG_CHECK_ON, + .tag_op = TAG_OP_STONLY, + .enable_tco = false, + }, + { + .check_type = CHECK_CLEAR_PROT_MTE, + .mem_type = USE_MMAP, + .mte_sync = MTE_SYNC_ERR, + .mapping = MAP_PRIVATE, + .tag_check = TAG_CHECK_ON, + .atag_check = ATAG_CHECK_ON, + .tag_op = TAG_OP_ALL, + .enable_tco = false, + }, + { + .check_type = CHECK_CLEAR_PROT_MTE, + .mem_type = USE_MPROTECT, + .mte_sync = MTE_SYNC_ERR, + .mapping = MAP_PRIVATE, + .tag_check = TAG_CHECK_ON, + .atag_check = ATAG_CHECK_ON, + .tag_op = TAG_OP_ALL, + .enable_tco = false, + }, + }; err = mte_default_setup(); if (err) @@ -200,64 +957,51 @@ int main(int argc, char *argv[]) sizes[item - 2] = page_size; sizes[item - 1] = page_size + 1; - /* Register signal handlers */ - mte_register_signal(SIGBUS, mte_default_handler); - mte_register_signal(SIGSEGV, mte_default_handler); - /* Set test plan */ - ksft_set_plan(22); - - mte_enable_pstate_tco(); - - evaluate_test(check_anonymous_memory_mapping(USE_MMAP, MTE_SYNC_ERR, MAP_PRIVATE, TAG_CHECK_OFF), - "Check anonymous memory with private mapping, sync error mode, mmap memory and tag check off\n"); - evaluate_test(check_file_memory_mapping(USE_MPROTECT, MTE_SYNC_ERR, MAP_PRIVATE, TAG_CHECK_OFF), - "Check file memory with private mapping, sync error mode, mmap/mprotect memory and tag check off\n"); - - mte_disable_pstate_tco(); - evaluate_test(check_anonymous_memory_mapping(USE_MMAP, MTE_NONE_ERR, MAP_PRIVATE, TAG_CHECK_OFF), - "Check anonymous memory with private mapping, no error mode, mmap memory and tag check off\n"); - evaluate_test(check_file_memory_mapping(USE_MPROTECT, MTE_NONE_ERR, MAP_PRIVATE, TAG_CHECK_OFF), - "Check file memory with private mapping, no error mode, mmap/mprotect memory and tag check off\n"); - - evaluate_test(check_anonymous_memory_mapping(USE_MMAP, MTE_SYNC_ERR, MAP_PRIVATE, TAG_CHECK_ON), - "Check anonymous memory with private mapping, sync error mode, mmap memory and tag check on\n"); - evaluate_test(check_anonymous_memory_mapping(USE_MPROTECT, MTE_SYNC_ERR, MAP_PRIVATE, TAG_CHECK_ON), - "Check anonymous memory with private mapping, sync error mode, mmap/mprotect memory and tag check on\n"); - evaluate_test(check_anonymous_memory_mapping(USE_MMAP, MTE_SYNC_ERR, MAP_SHARED, TAG_CHECK_ON), - "Check anonymous memory with shared mapping, sync error mode, mmap memory and tag check on\n"); - evaluate_test(check_anonymous_memory_mapping(USE_MPROTECT, MTE_SYNC_ERR, MAP_SHARED, TAG_CHECK_ON), - "Check anonymous memory with shared mapping, sync error mode, mmap/mprotect memory and tag check on\n"); - evaluate_test(check_anonymous_memory_mapping(USE_MMAP, MTE_ASYNC_ERR, MAP_PRIVATE, TAG_CHECK_ON), - "Check anonymous memory with private mapping, async error mode, mmap memory and tag check on\n"); - evaluate_test(check_anonymous_memory_mapping(USE_MPROTECT, MTE_ASYNC_ERR, MAP_PRIVATE, TAG_CHECK_ON), - "Check anonymous memory with private mapping, async error mode, mmap/mprotect memory and tag check on\n"); - evaluate_test(check_anonymous_memory_mapping(USE_MMAP, MTE_ASYNC_ERR, MAP_SHARED, TAG_CHECK_ON), - "Check anonymous memory with shared mapping, async error mode, mmap memory and tag check on\n"); - evaluate_test(check_anonymous_memory_mapping(USE_MPROTECT, MTE_ASYNC_ERR, MAP_SHARED, TAG_CHECK_ON), - "Check anonymous memory with shared mapping, async error mode, mmap/mprotect memory and tag check on\n"); - - evaluate_test(check_file_memory_mapping(USE_MMAP, MTE_SYNC_ERR, MAP_PRIVATE, TAG_CHECK_ON), - "Check file memory with private mapping, sync error mode, mmap memory and tag check on\n"); - evaluate_test(check_file_memory_mapping(USE_MPROTECT, MTE_SYNC_ERR, MAP_PRIVATE, TAG_CHECK_ON), - "Check file memory with private mapping, sync error mode, mmap/mprotect memory and tag check on\n"); - evaluate_test(check_file_memory_mapping(USE_MMAP, MTE_SYNC_ERR, MAP_SHARED, TAG_CHECK_ON), - "Check file memory with shared mapping, sync error mode, mmap memory and tag check on\n"); - evaluate_test(check_file_memory_mapping(USE_MPROTECT, MTE_SYNC_ERR, MAP_SHARED, TAG_CHECK_ON), - "Check file memory with shared mapping, sync error mode, mmap/mprotect memory and tag check on\n"); - evaluate_test(check_file_memory_mapping(USE_MMAP, MTE_ASYNC_ERR, MAP_PRIVATE, TAG_CHECK_ON), - "Check file memory with private mapping, async error mode, mmap memory and tag check on\n"); - evaluate_test(check_file_memory_mapping(USE_MPROTECT, MTE_ASYNC_ERR, MAP_PRIVATE, TAG_CHECK_ON), - "Check file memory with private mapping, async error mode, mmap/mprotect memory and tag check on\n"); - evaluate_test(check_file_memory_mapping(USE_MMAP, MTE_ASYNC_ERR, MAP_SHARED, TAG_CHECK_ON), - "Check file memory with shared mapping, async error mode, mmap memory and tag check on\n"); - evaluate_test(check_file_memory_mapping(USE_MPROTECT, MTE_ASYNC_ERR, MAP_SHARED, TAG_CHECK_ON), - "Check file memory with shared mapping, async error mode, mmap/mprotect memory and tag check on\n"); - - evaluate_test(check_clear_prot_mte_flag(USE_MMAP, MTE_SYNC_ERR, MAP_PRIVATE), - "Check clear PROT_MTE flags with private mapping, sync error mode and mmap memory\n"); - evaluate_test(check_clear_prot_mte_flag(USE_MPROTECT, MTE_SYNC_ERR, MAP_PRIVATE), - "Check clear PROT_MTE flags with private mapping and sync error mode and mmap/mprotect memory\n"); + ksft_set_plan(ARRAY_SIZE(test_cases)); + + for (i = 0 ; i < ARRAY_SIZE(test_cases); i++) { + /* Register signal handlers */ + mte_register_signal(SIGBUS, mte_default_handler, + test_cases[i].atag_check == ATAG_CHECK_ON); + mte_register_signal(SIGSEGV, mte_default_handler, + test_cases[i].atag_check == ATAG_CHECK_ON); + + if (test_cases[i].enable_tco) + mte_enable_pstate_tco(); + else + mte_disable_pstate_tco(); + + switch (test_cases[i].check_type) { + case CHECK_ANON_MEM: + evaluate_test(check_anonymous_memory_mapping(test_cases[i].mem_type, + test_cases[i].mte_sync, + test_cases[i].mapping, + test_cases[i].tag_check, + test_cases[i].atag_check, + test_cases[i].tag_op), + format_test_name(&test_cases[i])); + break; + case CHECK_FILE_MEM: + evaluate_test(check_file_memory_mapping(test_cases[i].mem_type, + test_cases[i].mte_sync, + test_cases[i].mapping, + test_cases[i].tag_check, + test_cases[i].atag_check, + test_cases[i].tag_op), + format_test_name(&test_cases[i])); + break; + case CHECK_CLEAR_PROT_MTE: + evaluate_test(check_clear_prot_mte_flag(test_cases[i].mem_type, + test_cases[i].mte_sync, + test_cases[i].mapping, + test_cases[i].atag_check), + format_test_name(&test_cases[i])); + break; + default: + exit(KSFT_FAIL); + } + } mte_restore_setup(); ksft_print_cnts(); diff --git a/tools/testing/selftests/arm64/mte/check_prctl.c b/tools/testing/selftests/arm64/mte/check_prctl.c index 4c89e9538ca0..f7f320defa7b 100644 --- a/tools/testing/selftests/arm64/mte/check_prctl.c +++ b/tools/testing/selftests/arm64/mte/check_prctl.c @@ -12,6 +12,10 @@ #include "kselftest.h" +#ifndef AT_HWCAP3 +#define AT_HWCAP3 29 +#endif + static int set_tagged_addr_ctrl(int val) { int ret; @@ -60,7 +64,7 @@ void check_basic_read(void) /* * Attempt to set a specified combination of modes. */ -void set_mode_test(const char *name, int hwcap2, int mask) +void set_mode_test(const char *name, int hwcap2, int hwcap3, int mask) { int ret; @@ -69,6 +73,11 @@ void set_mode_test(const char *name, int hwcap2, int mask) return; } + if ((getauxval(AT_HWCAP3) & hwcap3) != hwcap3) { + ksft_test_result_skip("%s\n", name); + return; + } + ret = set_tagged_addr_ctrl(mask); if (ret < 0) { ksft_test_result_fail("%s\n", name); @@ -81,7 +90,7 @@ void set_mode_test(const char *name, int hwcap2, int mask) return; } - if ((ret & PR_MTE_TCF_MASK) == mask) { + if ((ret & (PR_MTE_TCF_MASK | PR_MTE_STORE_ONLY)) == mask) { ksft_test_result_pass("%s\n", name); } else { ksft_print_msg("Got %x, expected %x\n", @@ -93,12 +102,16 @@ void set_mode_test(const char *name, int hwcap2, int mask) struct mte_mode { int mask; int hwcap2; + int hwcap3; const char *name; } mte_modes[] = { - { PR_MTE_TCF_NONE, 0, "NONE" }, - { PR_MTE_TCF_SYNC, HWCAP2_MTE, "SYNC" }, - { PR_MTE_TCF_ASYNC, HWCAP2_MTE, "ASYNC" }, - { PR_MTE_TCF_SYNC | PR_MTE_TCF_ASYNC, HWCAP2_MTE, "SYNC+ASYNC" }, + { PR_MTE_TCF_NONE, 0, 0, "NONE" }, + { PR_MTE_TCF_SYNC, HWCAP2_MTE, 0, "SYNC" }, + { PR_MTE_TCF_ASYNC, HWCAP2_MTE, 0, "ASYNC" }, + { PR_MTE_TCF_SYNC | PR_MTE_TCF_ASYNC, HWCAP2_MTE, 0, "SYNC+ASYNC" }, + { PR_MTE_TCF_SYNC | PR_MTE_STORE_ONLY, HWCAP2_MTE, HWCAP3_MTE_STORE_ONLY, "SYNC+STONLY" }, + { PR_MTE_TCF_ASYNC | PR_MTE_STORE_ONLY, HWCAP2_MTE, HWCAP3_MTE_STORE_ONLY, "ASYNC+STONLY" }, + { PR_MTE_TCF_SYNC | PR_MTE_TCF_ASYNC | PR_MTE_STORE_ONLY, HWCAP2_MTE, HWCAP3_MTE_STORE_ONLY, "SYNC+ASYNC+STONLY" }, }; int main(void) @@ -106,11 +119,11 @@ int main(void) int i; ksft_print_header(); - ksft_set_plan(5); + ksft_set_plan(ARRAY_SIZE(mte_modes)); check_basic_read(); for (i = 0; i < ARRAY_SIZE(mte_modes); i++) - set_mode_test(mte_modes[i].name, mte_modes[i].hwcap2, + set_mode_test(mte_modes[i].name, mte_modes[i].hwcap2, mte_modes[i].hwcap3, mte_modes[i].mask); ksft_print_cnts(); diff --git a/tools/testing/selftests/arm64/mte/check_tags_inclusion.c b/tools/testing/selftests/arm64/mte/check_tags_inclusion.c index a3d1e23fe02a..4b764f2a8185 100644 --- a/tools/testing/selftests/arm64/mte/check_tags_inclusion.c +++ b/tools/testing/selftests/arm64/mte/check_tags_inclusion.c @@ -57,7 +57,7 @@ static int check_single_included_tags(int mem_type, int mode) return KSFT_FAIL; for (tag = 0; (tag < MT_TAG_COUNT) && (result == KSFT_PASS); tag++) { - ret = mte_switch_mode(mode, MT_INCLUDE_VALID_TAG(tag)); + ret = mte_switch_mode(mode, MT_INCLUDE_VALID_TAG(tag), false); if (ret != 0) result = KSFT_FAIL; /* Try to catch a excluded tag by a number of tries. */ @@ -91,7 +91,7 @@ static int check_multiple_included_tags(int mem_type, int mode) for (tag = 0; (tag < MT_TAG_COUNT - 1) && (result == KSFT_PASS); tag++) { excl_mask |= 1 << tag; - mte_switch_mode(mode, MT_INCLUDE_VALID_TAGS(excl_mask)); + mte_switch_mode(mode, MT_INCLUDE_VALID_TAGS(excl_mask), false); /* Try to catch a excluded tag by a number of tries. */ for (run = 0; (run < RUNS) && (result == KSFT_PASS); run++) { ptr = mte_insert_tags(ptr, BUFFER_SIZE); @@ -120,7 +120,7 @@ static int check_all_included_tags(int mem_type, int mode) mem_type, false) != KSFT_PASS) return KSFT_FAIL; - ret = mte_switch_mode(mode, MT_INCLUDE_TAG_MASK); + ret = mte_switch_mode(mode, MT_INCLUDE_TAG_MASK, false); if (ret != 0) return KSFT_FAIL; /* Try to catch a excluded tag by a number of tries. */ @@ -145,7 +145,7 @@ static int check_none_included_tags(int mem_type, int mode) if (check_allocated_memory(ptr, BUFFER_SIZE, mem_type, false) != KSFT_PASS) return KSFT_FAIL; - ret = mte_switch_mode(mode, MT_EXCLUDE_TAG_MASK); + ret = mte_switch_mode(mode, MT_EXCLUDE_TAG_MASK, false); if (ret != 0) return KSFT_FAIL; /* Try to catch a excluded tag by a number of tries. */ @@ -180,7 +180,7 @@ int main(int argc, char *argv[]) return err; /* Register SIGSEGV handler */ - mte_register_signal(SIGSEGV, mte_default_handler); + mte_register_signal(SIGSEGV, mte_default_handler, false); /* Set test plan */ ksft_set_plan(4); diff --git a/tools/testing/selftests/arm64/mte/check_user_mem.c b/tools/testing/selftests/arm64/mte/check_user_mem.c index f4ae5f87a3b7..fb7936c4e097 100644 --- a/tools/testing/selftests/arm64/mte/check_user_mem.c +++ b/tools/testing/selftests/arm64/mte/check_user_mem.c @@ -44,7 +44,7 @@ static int check_usermem_access_fault(int mem_type, int mode, int mapping, err = KSFT_PASS; len = 2 * page_sz; - mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG); + mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG, false); fd = create_temp_file(); if (fd == -1) return KSFT_FAIL; @@ -211,7 +211,7 @@ int main(int argc, char *argv[]) return err; /* Register signal handlers */ - mte_register_signal(SIGSEGV, mte_default_handler); + mte_register_signal(SIGSEGV, mte_default_handler, false); /* Set test plan */ ksft_set_plan(64); diff --git a/tools/testing/selftests/arm64/mte/mte_common_util.c b/tools/testing/selftests/arm64/mte/mte_common_util.c index a1dc2fe5285b..397e57dd946a 100644 --- a/tools/testing/selftests/arm64/mte/mte_common_util.c +++ b/tools/testing/selftests/arm64/mte/mte_common_util.c @@ -6,6 +6,7 @@ #include <signal.h> #include <stdio.h> #include <stdlib.h> +#include <time.h> #include <unistd.h> #include <linux/auxvec.h> @@ -19,20 +20,40 @@ #include "mte_common_util.h" #include "mte_def.h" +#ifndef SA_EXPOSE_TAGBITS +#define SA_EXPOSE_TAGBITS 0x00000800 +#endif + #define INIT_BUFFER_SIZE 256 struct mte_fault_cxt cur_mte_cxt; +bool mtefar_support; +bool mtestonly_support; static unsigned int mte_cur_mode; static unsigned int mte_cur_pstate_tco; +static bool mte_cur_stonly; void mte_default_handler(int signum, siginfo_t *si, void *uc) { + struct sigaction sa; unsigned long addr = (unsigned long)si->si_addr; + unsigned char si_tag, si_atag; + + sigaction(signum, NULL, &sa); + + if (sa.sa_flags & SA_EXPOSE_TAGBITS) { + si_tag = MT_FETCH_TAG(addr); + si_atag = MT_FETCH_ATAG(addr); + addr = MT_CLEAR_TAGS(addr); + } else { + si_tag = 0; + si_atag = 0; + } if (signum == SIGSEGV) { #ifdef DEBUG - ksft_print_msg("INFO: SIGSEGV signal at pc=%lx, fault addr=%lx, si_code=%lx\n", - ((ucontext_t *)uc)->uc_mcontext.pc, addr, si->si_code); + ksft_print_msg("INFO: SIGSEGV signal at pc=%lx, fault addr=%lx, si_code=%lx, si_tag=%x, si_atag=%x\n", + ((ucontext_t *)uc)->uc_mcontext.pc, addr, si->si_code, si_tag, si_atag); #endif if (si->si_code == SEGV_MTEAERR) { if (cur_mte_cxt.trig_si_code == si->si_code) @@ -45,13 +66,18 @@ void mte_default_handler(int signum, siginfo_t *si, void *uc) } /* Compare the context for precise error */ else if (si->si_code == SEGV_MTESERR) { + if ((!mtefar_support && si_atag) || (si_atag != MT_FETCH_ATAG(cur_mte_cxt.trig_addr))) { + ksft_print_msg("Invalid MTE synchronous exception caught for address tag! si_tag=%x, si_atag: %x\n", si_tag, si_atag); + exit(KSFT_FAIL); + } + if (cur_mte_cxt.trig_si_code == si->si_code && ((cur_mte_cxt.trig_range >= 0 && - addr >= MT_CLEAR_TAG(cur_mte_cxt.trig_addr) && - addr <= (MT_CLEAR_TAG(cur_mte_cxt.trig_addr) + cur_mte_cxt.trig_range)) || + addr >= MT_CLEAR_TAGS(cur_mte_cxt.trig_addr) && + addr <= (MT_CLEAR_TAGS(cur_mte_cxt.trig_addr) + cur_mte_cxt.trig_range)) || (cur_mte_cxt.trig_range < 0 && - addr <= MT_CLEAR_TAG(cur_mte_cxt.trig_addr) && - addr >= (MT_CLEAR_TAG(cur_mte_cxt.trig_addr) + cur_mte_cxt.trig_range)))) { + addr <= MT_CLEAR_TAGS(cur_mte_cxt.trig_addr) && + addr >= (MT_CLEAR_TAGS(cur_mte_cxt.trig_addr) + cur_mte_cxt.trig_range)))) { cur_mte_cxt.fault_valid = true; /* Adjust the pc by 4 */ ((ucontext_t *)uc)->uc_mcontext.pc += 4; @@ -67,11 +93,11 @@ void mte_default_handler(int signum, siginfo_t *si, void *uc) ksft_print_msg("INFO: SIGBUS signal at pc=%llx, fault addr=%lx, si_code=%x\n", ((ucontext_t *)uc)->uc_mcontext.pc, addr, si->si_code); if ((cur_mte_cxt.trig_range >= 0 && - addr >= MT_CLEAR_TAG(cur_mte_cxt.trig_addr) && - addr <= (MT_CLEAR_TAG(cur_mte_cxt.trig_addr) + cur_mte_cxt.trig_range)) || + addr >= MT_CLEAR_TAGS(cur_mte_cxt.trig_addr) && + addr <= (MT_CLEAR_TAGS(cur_mte_cxt.trig_addr) + cur_mte_cxt.trig_range)) || (cur_mte_cxt.trig_range < 0 && - addr <= MT_CLEAR_TAG(cur_mte_cxt.trig_addr) && - addr >= (MT_CLEAR_TAG(cur_mte_cxt.trig_addr) + cur_mte_cxt.trig_range))) { + addr <= MT_CLEAR_TAGS(cur_mte_cxt.trig_addr) && + addr >= (MT_CLEAR_TAGS(cur_mte_cxt.trig_addr) + cur_mte_cxt.trig_range))) { cur_mte_cxt.fault_valid = true; /* Adjust the pc by 4 */ ((ucontext_t *)uc)->uc_mcontext.pc += 4; @@ -79,12 +105,17 @@ void mte_default_handler(int signum, siginfo_t *si, void *uc) } } -void mte_register_signal(int signal, void (*handler)(int, siginfo_t *, void *)) +void mte_register_signal(int signal, void (*handler)(int, siginfo_t *, void *), + bool export_tags) { struct sigaction sa; sa.sa_sigaction = handler; sa.sa_flags = SA_SIGINFO; + + if (export_tags && signal == SIGSEGV) + sa.sa_flags |= SA_EXPOSE_TAGBITS; + sigemptyset(&sa.sa_mask); sigaction(signal, &sa, NULL); } @@ -120,6 +151,19 @@ void mte_clear_tags(void *ptr, size_t size) mte_clear_tag_address_range(ptr, size); } +void *mte_insert_atag(void *ptr) +{ + unsigned char atag; + + atag = mtefar_support ? (random() % MT_ATAG_MASK) + 1 : 0; + return (void *)MT_SET_ATAG((unsigned long)ptr, atag); +} + +void *mte_clear_atag(void *ptr) +{ + return (void *)MT_CLEAR_ATAG((unsigned long)ptr); +} + static void *__mte_allocate_memory_range(size_t size, int mem_type, int mapping, size_t range_before, size_t range_after, bool tags, int fd) @@ -272,7 +316,7 @@ void mte_initialize_current_context(int mode, uintptr_t ptr, ssize_t range) cur_mte_cxt.trig_si_code = 0; } -int mte_switch_mode(int mte_option, unsigned long incl_mask) +int mte_switch_mode(int mte_option, unsigned long incl_mask, bool stonly) { unsigned long en = 0; @@ -304,6 +348,9 @@ int mte_switch_mode(int mte_option, unsigned long incl_mask) break; } + if (mtestonly_support && stonly) + en |= PR_MTE_STORE_ONLY; + en |= (incl_mask << PR_MTE_TAG_SHIFT); /* Enable address tagging ABI, mte error reporting mode and tag inclusion mask. */ if (prctl(PR_SET_TAGGED_ADDR_CTRL, en, 0, 0, 0) != 0) { @@ -316,12 +363,21 @@ int mte_switch_mode(int mte_option, unsigned long incl_mask) int mte_default_setup(void) { unsigned long hwcaps2 = getauxval(AT_HWCAP2); + unsigned long hwcaps3 = getauxval(AT_HWCAP3); unsigned long en = 0; int ret; + /* To generate random address tag */ + srandom(time(NULL)); + if (!(hwcaps2 & HWCAP2_MTE)) ksft_exit_skip("MTE features unavailable\n"); + mtefar_support = !!(hwcaps3 & HWCAP3_MTE_FAR); + + if (hwcaps3 & HWCAP3_MTE_STORE_ONLY) + mtestonly_support = true; + /* Get current mte mode */ ret = prctl(PR_GET_TAGGED_ADDR_CTRL, en, 0, 0, 0); if (ret < 0) { @@ -335,6 +391,8 @@ int mte_default_setup(void) else if (ret & PR_MTE_TCF_NONE) mte_cur_mode = MTE_NONE_ERR; + mte_cur_stonly = (ret & PR_MTE_STORE_ONLY) ? true : false; + mte_cur_pstate_tco = mte_get_pstate_tco(); /* Disable PSTATE.TCO */ mte_disable_pstate_tco(); @@ -343,7 +401,7 @@ int mte_default_setup(void) void mte_restore_setup(void) { - mte_switch_mode(mte_cur_mode, MTE_ALLOW_NON_ZERO_TAG); + mte_switch_mode(mte_cur_mode, MTE_ALLOW_NON_ZERO_TAG, mte_cur_stonly); if (mte_cur_pstate_tco == MT_PSTATE_TCO_EN) mte_enable_pstate_tco(); else if (mte_cur_pstate_tco == MT_PSTATE_TCO_DIS) diff --git a/tools/testing/selftests/arm64/mte/mte_common_util.h b/tools/testing/selftests/arm64/mte/mte_common_util.h index a0017a303beb..250d671329a5 100644 --- a/tools/testing/selftests/arm64/mte/mte_common_util.h +++ b/tools/testing/selftests/arm64/mte/mte_common_util.h @@ -37,10 +37,13 @@ struct mte_fault_cxt { }; extern struct mte_fault_cxt cur_mte_cxt; +extern bool mtefar_support; +extern bool mtestonly_support; /* MTE utility functions */ void mte_default_handler(int signum, siginfo_t *si, void *uc); -void mte_register_signal(int signal, void (*handler)(int, siginfo_t *, void *)); +void mte_register_signal(int signal, void (*handler)(int, siginfo_t *, void *), + bool export_tags); void mte_wait_after_trig(void); void *mte_allocate_memory(size_t size, int mem_type, int mapping, bool tags); void *mte_allocate_memory_tag_range(size_t size, int mem_type, int mapping, @@ -54,9 +57,11 @@ void mte_free_memory_tag_range(void *ptr, size_t size, int mem_type, size_t range_before, size_t range_after); void *mte_insert_tags(void *ptr, size_t size); void mte_clear_tags(void *ptr, size_t size); +void *mte_insert_atag(void *ptr); +void *mte_clear_atag(void *ptr); int mte_default_setup(void); void mte_restore_setup(void); -int mte_switch_mode(int mte_option, unsigned long incl_mask); +int mte_switch_mode(int mte_option, unsigned long incl_mask, bool stonly); void mte_initialize_current_context(int mode, uintptr_t ptr, ssize_t range); /* Common utility functions */ diff --git a/tools/testing/selftests/arm64/mte/mte_def.h b/tools/testing/selftests/arm64/mte/mte_def.h index 9b188254b61a..6ad22f07c9b8 100644 --- a/tools/testing/selftests/arm64/mte/mte_def.h +++ b/tools/testing/selftests/arm64/mte/mte_def.h @@ -42,6 +42,8 @@ #define MT_TAG_COUNT 16 #define MT_INCLUDE_TAG_MASK 0xFFFF #define MT_EXCLUDE_TAG_MASK 0x0 +#define MT_ATAG_SHIFT 60 +#define MT_ATAG_MASK 0xFUL #define MT_ALIGN_GRANULE (MT_GRANULE_SIZE - 1) #define MT_CLEAR_TAG(x) ((x) & ~(MT_TAG_MASK << MT_TAG_SHIFT)) @@ -49,6 +51,12 @@ #define MT_FETCH_TAG(x) ((x >> MT_TAG_SHIFT) & (MT_TAG_MASK)) #define MT_ALIGN_UP(x) ((x + MT_ALIGN_GRANULE) & ~(MT_ALIGN_GRANULE)) +#define MT_CLEAR_ATAG(x) ((x) & ~(MT_TAG_MASK << MT_ATAG_SHIFT)) +#define MT_SET_ATAG(x, y) ((x) | (((y) & MT_ATAG_MASK) << MT_ATAG_SHIFT)) +#define MT_FETCH_ATAG(x) ((x >> MT_ATAG_SHIFT) & (MT_ATAG_MASK)) + +#define MT_CLEAR_TAGS(x) (MT_CLEAR_ATAG(MT_CLEAR_TAG(x))) + #define MT_PSTATE_TCO_SHIFT 25 #define MT_PSTATE_TCO_MASK ~(0x1 << MT_PSTATE_TCO_SHIFT) #define MT_PSTATE_TCO_EN 1 diff --git a/tools/testing/selftests/bpf/.gitignore b/tools/testing/selftests/bpf/.gitignore index e2a2c46c008b..3d8378972d26 100644 --- a/tools/testing/selftests/bpf/.gitignore +++ b/tools/testing/selftests/bpf/.gitignore @@ -21,7 +21,6 @@ test_lirc_mode2_user flow_dissector_load test_tcpnotify_user test_libbpf -test_sysctl xdping test_cpp *.d diff --git a/tools/testing/selftests/bpf/DENYLIST b/tools/testing/selftests/bpf/DENYLIST index 1789a61d0a9b..f748f2c33b22 100644 --- a/tools/testing/selftests/bpf/DENYLIST +++ b/tools/testing/selftests/bpf/DENYLIST @@ -1,6 +1,5 @@ # TEMPORARY # Alphabetical order -dynptr/test_probe_read_user_str_dynptr # disabled until https://patchwork.kernel.org/project/linux-mm/patch/20250422131449.57177-1-mykyta.yatsenko5@gmail.com/ makes it into the bpf-next get_stack_raw_tp # spams with kernel warnings until next bpf -> bpf-next merge stacktrace_build_id stacktrace_build_id_nmi diff --git a/tools/testing/selftests/bpf/DENYLIST.aarch64 b/tools/testing/selftests/bpf/DENYLIST.aarch64 deleted file mode 100644 index 12e99c0277a8..000000000000 --- a/tools/testing/selftests/bpf/DENYLIST.aarch64 +++ /dev/null @@ -1 +0,0 @@ -tracing_struct/struct_many_args # struct_many_args:FAIL:tracing_struct_many_args__attach unexpected error: -524 diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index cf5ed3bee573..4863106034df 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -73,7 +73,7 @@ endif # Order correspond to 'make run_tests' order TEST_GEN_PROGS = test_verifier test_tag test_maps test_lru_map test_progs \ test_sockmap \ - test_tcpnotify_user test_sysctl \ + test_tcpnotify_user \ test_progs-no_alu32 TEST_INST_SUBDIRS := no_alu32 @@ -109,6 +109,7 @@ TEST_PROGS := test_kmod.sh \ test_xdping.sh \ test_bpftool_build.sh \ test_bpftool.sh \ + test_bpftool_map.sh \ test_bpftool_metadata.sh \ test_doc_build.sh \ test_xsk.sh \ @@ -220,7 +221,7 @@ ifeq ($(VMLINUX_BTF),) $(error Cannot find a vmlinux for VMLINUX_BTF at any of "$(VMLINUX_BTF_PATHS)") endif -# Define simple and short `make test_progs`, `make test_sysctl`, etc targets +# Define simple and short `make test_progs`, `make test_maps`, etc targets # to build individual tests. # NOTE: Semicolon at the end is critical to override lib.mk's default static # rule for binaries. @@ -329,7 +330,6 @@ NETWORK_HELPERS := $(OUTPUT)/network_helpers.o $(OUTPUT)/test_sockmap: $(CGROUP_HELPERS) $(TESTING_HELPERS) $(OUTPUT)/test_tcpnotify_user: $(CGROUP_HELPERS) $(TESTING_HELPERS) $(TRACE_HELPERS) $(OUTPUT)/test_sock_fields: $(CGROUP_HELPERS) $(TESTING_HELPERS) -$(OUTPUT)/test_sysctl: $(CGROUP_HELPERS) $(TESTING_HELPERS) $(OUTPUT)/test_tag: $(TESTING_HELPERS) $(OUTPUT)/test_lirc_mode2_user: $(TESTING_HELPERS) $(OUTPUT)/xdping: $(TESTING_HELPERS) @@ -841,6 +841,11 @@ $(OUTPUT)/bench: $(OUTPUT)/bench.o \ $(call msg,BINARY,,$@) $(Q)$(CC) $(CFLAGS) $(LDFLAGS) $(filter %.a %.o,$^) $(LDLIBS) -o $@ +# This works around GCC warning about snprintf truncating strings like: +# +# char a[PATH_MAX], b[PATH_MAX]; +# snprintf(a, "%s/foo", b); // triggers -Wformat-truncation +$(OUTPUT)/veristat.o: CFLAGS += -Wno-format-truncation $(OUTPUT)/veristat.o: $(BPFOBJ) $(OUTPUT)/veristat: $(OUTPUT)/veristat.o $(call msg,BINARY,,$@) diff --git a/tools/testing/selftests/bpf/bpf_arena_common.h b/tools/testing/selftests/bpf/bpf_arena_common.h index 68a51dcc0669..16f8ce832004 100644 --- a/tools/testing/selftests/bpf/bpf_arena_common.h +++ b/tools/testing/selftests/bpf/bpf_arena_common.h @@ -46,8 +46,11 @@ void __arena* bpf_arena_alloc_pages(void *map, void __arena *addr, __u32 page_cnt, int node_id, __u64 flags) __ksym __weak; +int bpf_arena_reserve_pages(void *map, void __arena *addr, __u32 page_cnt) __ksym __weak; void bpf_arena_free_pages(void *map, void __arena *ptr, __u32 page_cnt) __ksym __weak; +#define arena_base(map) ((void __arena *)((struct bpf_arena *)(map))->user_vm_start) + #else /* when compiled as user space code */ #define __arena diff --git a/tools/testing/selftests/bpf/bpf_atomic.h b/tools/testing/selftests/bpf/bpf_atomic.h index a9674e544322..c550e5711967 100644 --- a/tools/testing/selftests/bpf/bpf_atomic.h +++ b/tools/testing/selftests/bpf/bpf_atomic.h @@ -61,7 +61,7 @@ extern bool CONFIG_X86_64 __kconfig __weak; #define smp_mb() \ ({ \ - unsigned long __val; \ + volatile unsigned long __val; \ __sync_fetch_and_add(&__val, 0); \ }) diff --git a/tools/testing/selftests/bpf/bpf_experimental.h b/tools/testing/selftests/bpf/bpf_experimental.h index 5e512a1d09d1..da7e230f2781 100644 --- a/tools/testing/selftests/bpf/bpf_experimental.h +++ b/tools/testing/selftests/bpf/bpf_experimental.h @@ -596,4 +596,7 @@ extern int bpf_iter_dmabuf_new(struct bpf_iter_dmabuf *it) __weak __ksym; extern struct dma_buf *bpf_iter_dmabuf_next(struct bpf_iter_dmabuf *it) __weak __ksym; extern void bpf_iter_dmabuf_destroy(struct bpf_iter_dmabuf *it) __weak __ksym; +extern int bpf_cgroup_read_xattr(struct cgroup *cgroup, const char *name__str, + struct bpf_dynptr *value_p) __weak __ksym; + #endif diff --git a/tools/testing/selftests/bpf/bpf_kfuncs.h b/tools/testing/selftests/bpf/bpf_kfuncs.h index 8215c9b3115e..9386dfe8b884 100644 --- a/tools/testing/selftests/bpf/bpf_kfuncs.h +++ b/tools/testing/selftests/bpf/bpf_kfuncs.h @@ -69,7 +69,7 @@ extern int bpf_get_file_xattr(struct file *file, const char *name, struct bpf_dynptr *value_ptr) __ksym; extern int bpf_get_fsverity_digest(struct file *file, struct bpf_dynptr *digest_ptr) __ksym; -extern struct bpf_key *bpf_lookup_user_key(__u32 serial, __u64 flags) __ksym; +extern struct bpf_key *bpf_lookup_user_key(__s32 serial, __u64 flags) __ksym; extern struct bpf_key *bpf_lookup_system_key(__u64 id) __ksym; extern void bpf_key_put(struct bpf_key *key) __ksym; extern int bpf_verify_pkcs7_signature(struct bpf_dynptr *data_ptr, diff --git a/tools/testing/selftests/bpf/cgroup_helpers.c b/tools/testing/selftests/bpf/cgroup_helpers.c index e4535451322e..15f626014872 100644 --- a/tools/testing/selftests/bpf/cgroup_helpers.c +++ b/tools/testing/selftests/bpf/cgroup_helpers.c @@ -4,6 +4,7 @@ #include <sys/mount.h> #include <sys/stat.h> #include <sys/types.h> +#include <sys/xattr.h> #include <linux/limits.h> #include <stdio.h> #include <stdlib.h> @@ -319,6 +320,26 @@ int join_parent_cgroup(const char *relative_path) } /** + * set_cgroup_xattr() - Set xattr on a cgroup dir + * @relative_path: The cgroup path, relative to the workdir, to set xattr + * @name: xattr name + * @value: xattr value + * + * This function set xattr on cgroup dir. + * + * On success, it returns 0, otherwise on failure it returns -1. + */ +int set_cgroup_xattr(const char *relative_path, + const char *name, + const char *value) +{ + char cgroup_path[PATH_MAX + 1]; + + format_cgroup_path(cgroup_path, relative_path); + return setxattr(cgroup_path, name, value, strlen(value) + 1, 0); +} + +/** * __cleanup_cgroup_environment() - Delete temporary cgroups * * This is a helper for cleanup_cgroup_environment() that is responsible for diff --git a/tools/testing/selftests/bpf/cgroup_helpers.h b/tools/testing/selftests/bpf/cgroup_helpers.h index 502845160d88..182e1ac36c95 100644 --- a/tools/testing/selftests/bpf/cgroup_helpers.h +++ b/tools/testing/selftests/bpf/cgroup_helpers.h @@ -26,6 +26,10 @@ int join_cgroup(const char *relative_path); int join_root_cgroup(void); int join_parent_cgroup(const char *relative_path); +int set_cgroup_xattr(const char *relative_path, + const char *name, + const char *value); + int setup_cgroup_environment(void); void cleanup_cgroup_environment(void); diff --git a/tools/testing/selftests/bpf/config b/tools/testing/selftests/bpf/config index f74e1ea0ad3b..8916ab814a3e 100644 --- a/tools/testing/selftests/bpf/config +++ b/tools/testing/selftests/bpf/config @@ -97,6 +97,9 @@ CONFIG_NF_TABLES_NETDEV=y CONFIG_NF_TABLES_IPV4=y CONFIG_NF_TABLES_IPV6=y CONFIG_NETFILTER_INGRESS=y +CONFIG_IP_NF_IPTABLES_LEGACY=y +CONFIG_IP6_NF_IPTABLES_LEGACY=y +CONFIG_NETFILTER_XTABLES_LEGACY=y CONFIG_NF_FLOW_TABLE=y CONFIG_NF_FLOW_TABLE_INET=y CONFIG_NETFILTER_NETLINK=y @@ -105,6 +108,7 @@ CONFIG_IP_NF_IPTABLES=y CONFIG_IP6_NF_IPTABLES=y CONFIG_IP6_NF_FILTER=y CONFIG_NF_NAT=y +CONFIG_PACKET=y CONFIG_RC_CORE=y CONFIG_SECURITY=y CONFIG_SECURITYFS=y diff --git a/tools/testing/selftests/bpf/config.ppc64el b/tools/testing/selftests/bpf/config.ppc64el new file mode 100644 index 000000000000..9acf389dc4ce --- /dev/null +++ b/tools/testing/selftests/bpf/config.ppc64el @@ -0,0 +1,93 @@ +CONFIG_ALTIVEC=y +CONFIG_AUDIT=y +CONFIG_BLK_CGROUP=y +CONFIG_BLK_DEV_INITRD=y +CONFIG_BLK_DEV_RAM=y +CONFIG_BONDING=y +CONFIG_BPF_JIT_ALWAYS_ON=y +CONFIG_BPF_PRELOAD_UMD=y +CONFIG_BPF_PRELOAD=y +CONFIG_CGROUP_CPUACCT=y +CONFIG_CGROUP_DEVICE=y +CONFIG_CGROUP_FREEZER=y +CONFIG_CGROUP_HUGETLB=y +CONFIG_CGROUP_NET_CLASSID=y +CONFIG_CGROUP_PERF=y +CONFIG_CGROUP_PIDS=y +CONFIG_CGROUP_SCHED=y +CONFIG_CGROUPS=y +CONFIG_CMDLINE_BOOL=y +CONFIG_CMDLINE="console=hvc0 wg.success=hvc1 panic_on_warn=1" +CONFIG_CPU_LITTLE_ENDIAN=y +CONFIG_CPUSETS=y +CONFIG_DEBUG_ATOMIC_SLEEP=y +CONFIG_DEBUG_FS=y +CONFIG_DETECT_HUNG_TASK=y +CONFIG_DEVTMPFS_MOUNT=y +CONFIG_DEVTMPFS=y +CONFIG_EXPERT=y +CONFIG_EXT4_FS_POSIX_ACL=y +CONFIG_EXT4_FS_SECURITY=y +CONFIG_EXT4_FS=y +CONFIG_FRAME_POINTER=y +CONFIG_FRAME_WARN=1280 +CONFIG_HARDLOCKUP_DETECTOR=y +CONFIG_HIGH_RES_TIMERS=y +CONFIG_HUGETLBFS=y +CONFIG_HVC_CONSOLE=y +CONFIG_INET=y +CONFIG_IP_ADVANCED_ROUTER=y +CONFIG_IP_MULTICAST=y +CONFIG_IP_MULTIPLE_TABLES=y +CONFIG_IPV6_SEG6_LWTUNNEL=y +CONFIG_JUMP_LABEL=y +CONFIG_KALLSYMS_ALL=y +CONFIG_KPROBES=y +CONFIG_MEMCG=y +CONFIG_NAMESPACES=y +CONFIG_NET_ACT_BPF=y +CONFIG_NETDEVICES=y +CONFIG_NETFILTER_XT_MATCH_BPF=y +CONFIG_NET_L3_MASTER_DEV=y +CONFIG_NET_VRF=y +CONFIG_NET=y +CONFIG_NO_HZ_IDLE=y +CONFIG_NONPORTABLE=y +CONFIG_NR_CPUS=256 +CONFIG_PACKET=y +CONFIG_PANIC_ON_OOPS=y +CONFIG_PARTITION_ADVANCED=y +CONFIG_PCI_HOST_GENERIC=y +CONFIG_PCI=y +CONFIG_POSIX_MQUEUE=y +CONFIG_PPC64=y +CONFIG_PPC_OF_BOOT_TRAMPOLINE=y +CONFIG_PPC_PSERIES=y +CONFIG_PPC_RADIX_MMU=y +CONFIG_PRINTK_TIME=y +CONFIG_PROC_KCORE=y +CONFIG_PROFILING=y +CONFIG_RCU_CPU_STALL_TIMEOUT=60 +CONFIG_RT_GROUP_SCHED=y +CONFIG_SECTION_MISMATCH_WARN_ONLY=y +CONFIG_SECURITY_NETWORK=y +CONFIG_SERIAL_8250_CONSOLE=y +CONFIG_SERIAL_8250=y +CONFIG_SERIAL_OF_PLATFORM=y +CONFIG_SMP=y +CONFIG_SOC_VIRT=y +CONFIG_SYSVIPC=y +CONFIG_TCP_CONG_ADVANCED=y +CONFIG_THREAD_SHIFT=14 +CONFIG_TLS=y +CONFIG_TMPFS_POSIX_ACL=y +CONFIG_TMPFS=y +CONFIG_TUN=y +CONFIG_UNIX=y +CONFIG_UPROBES=y +CONFIG_USER_NS=y +CONFIG_VETH=y +CONFIG_VLAN_8021Q=y +CONFIG_VSOCKETS_LOOPBACK=y +CONFIG_VSX=y +CONFIG_XFRM_USER=y diff --git a/tools/testing/selftests/bpf/prog_tests/bloom_filter_map.c b/tools/testing/selftests/bpf/prog_tests/bloom_filter_map.c index 67557cda2208..42b49870e520 100644 --- a/tools/testing/selftests/bpf/prog_tests/bloom_filter_map.c +++ b/tools/testing/selftests/bpf/prog_tests/bloom_filter_map.c @@ -13,7 +13,7 @@ static void test_fail_cases(void) { LIBBPF_OPTS(bpf_map_create_opts, opts); - __u32 value; + __u32 value = 0; int fd, err; /* Invalid key size */ diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_cookie.c b/tools/testing/selftests/bpf/prog_tests/bpf_cookie.c index 6befa870434b..4a0670c056ba 100644 --- a/tools/testing/selftests/bpf/prog_tests/bpf_cookie.c +++ b/tools/testing/selftests/bpf/prog_tests/bpf_cookie.c @@ -489,10 +489,28 @@ cleanup: bpf_link__destroy(link); } +static int verify_tracing_link_info(int fd, u64 cookie) +{ + struct bpf_link_info info; + int err; + u32 len = sizeof(info); + + err = bpf_link_get_info_by_fd(fd, &info, &len); + if (!ASSERT_OK(err, "get_link_info")) + return -1; + + if (!ASSERT_EQ(info.type, BPF_LINK_TYPE_TRACING, "link_type")) + return -1; + + ASSERT_EQ(info.tracing.cookie, cookie, "tracing_cookie"); + + return 0; +} + static void tracing_subtest(struct test_bpf_cookie *skel) { __u64 cookie; - int prog_fd; + int prog_fd, err; int fentry_fd = -1, fexit_fd = -1, fmod_ret_fd = -1; LIBBPF_OPTS(bpf_test_run_opts, opts); LIBBPF_OPTS(bpf_link_create_opts, link_opts); @@ -507,6 +525,10 @@ static void tracing_subtest(struct test_bpf_cookie *skel) if (!ASSERT_GE(fentry_fd, 0, "fentry.link_create")) goto cleanup; + err = verify_tracing_link_info(fentry_fd, cookie); + if (!ASSERT_OK(err, "verify_tracing_link_info")) + goto cleanup; + cookie = 0x20000000000000L; prog_fd = bpf_program__fd(skel->progs.fexit_test1); link_opts.tracing.cookie = cookie; @@ -635,10 +657,29 @@ cleanup: bpf_link__destroy(link); } +static int verify_raw_tp_link_info(int fd, u64 cookie) +{ + struct bpf_link_info info; + int err; + u32 len = sizeof(info); + + memset(&info, 0, sizeof(info)); + err = bpf_link_get_info_by_fd(fd, &info, &len); + if (!ASSERT_OK(err, "get_link_info")) + return -1; + + if (!ASSERT_EQ(info.type, BPF_LINK_TYPE_RAW_TRACEPOINT, "link_type")) + return -1; + + ASSERT_EQ(info.raw_tracepoint.cookie, cookie, "raw_tp_cookie"); + + return 0; +} + static void raw_tp_subtest(struct test_bpf_cookie *skel) { __u64 cookie; - int prog_fd, link_fd = -1; + int err, prog_fd, link_fd = -1; struct bpf_link *link = NULL; LIBBPF_OPTS(bpf_raw_tp_opts, raw_tp_opts); LIBBPF_OPTS(bpf_raw_tracepoint_opts, opts); @@ -656,6 +697,11 @@ static void raw_tp_subtest(struct test_bpf_cookie *skel) goto cleanup; usleep(1); /* trigger */ + + err = verify_raw_tp_link_info(link_fd, cookie); + if (!ASSERT_OK(err, "verify_raw_tp_link_info")) + goto cleanup; + close(link_fd); /* detach */ link_fd = -1; diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c index add4a18c33bd..5225d69bf79b 100644 --- a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c +++ b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c @@ -323,7 +323,7 @@ static void test_task_pidfd(void) static void test_task_sleepable(void) { struct bpf_iter_tasks *skel; - int pid, status, err, data_pipe[2], finish_pipe[2], c; + int pid, status, err, data_pipe[2], finish_pipe[2], c = 0; char *test_data = NULL; char *test_data_long = NULL; char *data[2]; diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_mod_race.c b/tools/testing/selftests/bpf/prog_tests/bpf_mod_race.c index fe2c502e5089..ecc3d47919ad 100644 --- a/tools/testing/selftests/bpf/prog_tests/bpf_mod_race.c +++ b/tools/testing/selftests/bpf/prog_tests/bpf_mod_race.c @@ -78,7 +78,7 @@ static int test_setup_uffd(void *fault_addr) } uffd_register.range.start = (unsigned long)fault_addr; - uffd_register.range.len = 4096; + uffd_register.range.len = getpagesize(); uffd_register.mode = UFFDIO_REGISTER_MODE_MISSING; if (ioctl(uffd, UFFDIO_REGISTER, &uffd_register)) { close(uffd); diff --git a/tools/testing/selftests/bpf/prog_tests/btf_dump.c b/tools/testing/selftests/bpf/prog_tests/btf_dump.c index c0a776feec23..82903585c870 100644 --- a/tools/testing/selftests/bpf/prog_tests/btf_dump.c +++ b/tools/testing/selftests/bpf/prog_tests/btf_dump.c @@ -879,6 +879,122 @@ static void test_btf_dump_var_data(struct btf *btf, struct btf_dump *d, "static int bpf_cgrp_storage_busy = (int)2", 2); } +struct btf_dump_string_ctx { + struct btf *btf; + struct btf_dump *d; + char *str; + struct btf_dump_type_data_opts *opts; + int array_id; +}; + +static int btf_dump_one_string(struct btf_dump_string_ctx *ctx, + char *ptr, size_t ptr_sz, + const char *expected_val) +{ + size_t type_sz; + int ret; + + ctx->str[0] = '\0'; + type_sz = btf__resolve_size(ctx->btf, ctx->array_id); + ret = btf_dump__dump_type_data(ctx->d, ctx->array_id, ptr, ptr_sz, ctx->opts); + if (type_sz <= ptr_sz) { + if (!ASSERT_EQ(ret, type_sz, "failed/unexpected type_sz")) + return -EINVAL; + } + if (!ASSERT_STREQ(ctx->str, expected_val, "ensure expected/actual match")) + return -EFAULT; + return 0; +} + +static void btf_dump_strings(struct btf_dump_string_ctx *ctx) +{ + struct btf_dump_type_data_opts *opts = ctx->opts; + + opts->emit_strings = true; + + opts->compact = true; + opts->emit_zeroes = false; + + opts->skip_names = false; + btf_dump_one_string(ctx, "foo", 4, "(char[4])\"foo\""); + + opts->skip_names = true; + btf_dump_one_string(ctx, "foo", 4, "\"foo\""); + + /* This should have no effect. */ + opts->emit_zeroes = false; + btf_dump_one_string(ctx, "foo", 4, "\"foo\""); + + /* This should have no effect. */ + opts->compact = false; + btf_dump_one_string(ctx, "foo", 4, "\"foo\""); + + /* Non-printable characters come out as hex. */ + btf_dump_one_string(ctx, "fo\xff", 4, "\"fo\\xff\""); + btf_dump_one_string(ctx, "fo\x7", 4, "\"fo\\x07\""); + + /* + * Strings that are too long for the specified type ("char[4]") + * should fall back to the current behavior. + */ + opts->compact = true; + btf_dump_one_string(ctx, "abcde", 6, "['a','b','c','d',]"); + + /* + * Strings that are too short for the specified type ("char[4]") + * should work normally. + */ + btf_dump_one_string(ctx, "ab", 3, "\"ab\""); + + /* Non-NUL-terminated arrays don't get printed as strings. */ + char food[4] = { 'f', 'o', 'o', 'd' }; + char bye[3] = { 'b', 'y', 'e' }; + + btf_dump_one_string(ctx, food, 4, "['f','o','o','d',]"); + btf_dump_one_string(ctx, bye, 3, "['b','y','e',]"); + + /* The embedded NUL should terminate the string. */ + char embed[4] = { 'f', 'o', '\0', 'd' }; + + btf_dump_one_string(ctx, embed, 4, "\"fo\""); +} + +static void test_btf_dump_string_data(void) +{ + struct test_ctx t = {}; + char str[STRSIZE]; + struct btf_dump *d; + DECLARE_LIBBPF_OPTS(btf_dump_type_data_opts, opts); + struct btf_dump_string_ctx ctx; + int char_id, int_id, array_id; + + if (test_ctx__init(&t)) + return; + + d = btf_dump__new(t.btf, btf_dump_snprintf, str, NULL); + if (!ASSERT_OK_PTR(d, "could not create BTF dump")) + return; + + /* Generate BTF for a four-element char array. */ + char_id = btf__add_int(t.btf, "char", 1, BTF_INT_CHAR); + ASSERT_EQ(char_id, 1, "char_id"); + int_id = btf__add_int(t.btf, "int", 4, BTF_INT_SIGNED); + ASSERT_EQ(int_id, 2, "int_id"); + array_id = btf__add_array(t.btf, int_id, char_id, 4); + ASSERT_EQ(array_id, 3, "array_id"); + + ctx.btf = t.btf; + ctx.d = d; + ctx.str = str; + ctx.opts = &opts; + ctx.array_id = array_id; + + btf_dump_strings(&ctx); + + btf_dump__free(d); + test_ctx__free(&t); +} + static void test_btf_datasec(struct btf *btf, struct btf_dump *d, char *str, const char *name, const char *expected_val, void *data, size_t data_sz) @@ -970,6 +1086,8 @@ void test_btf_dump() { test_btf_dump_struct_data(btf, d, str); if (test__start_subtest("btf_dump: var_data")) test_btf_dump_var_data(btf, d, str); + if (test__start_subtest("btf_dump: string_data")) + test_btf_dump_string_data(); btf_dump__free(d); btf__free(btf); diff --git a/tools/testing/selftests/bpf/prog_tests/cgroup_mprog_opts.c b/tools/testing/selftests/bpf/prog_tests/cgroup_mprog_opts.c new file mode 100644 index 000000000000..bb60704a3ef9 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/cgroup_mprog_opts.c @@ -0,0 +1,617 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */ +#include <test_progs.h> +#include "cgroup_helpers.h" +#include "cgroup_mprog.skel.h" + +static void assert_mprog_count(int cg, int atype, int expected) +{ + __u32 count = 0, attach_flags = 0; + int err; + + err = bpf_prog_query(cg, atype, 0, &attach_flags, + NULL, &count); + ASSERT_EQ(count, expected, "count"); + ASSERT_EQ(err, 0, "prog_query"); +} + +static void test_prog_attach_detach(int atype) +{ + LIBBPF_OPTS(bpf_prog_attach_opts, opta); + LIBBPF_OPTS(bpf_prog_detach_opts, optd); + LIBBPF_OPTS(bpf_prog_query_opts, optq); + __u32 fd1, fd2, fd3, fd4, id1, id2, id3, id4; + struct cgroup_mprog *skel; + __u32 prog_ids[10]; + int cg, err; + + cg = test__join_cgroup("/prog_attach_detach"); + if (!ASSERT_GE(cg, 0, "join_cgroup /prog_attach_detach")) + return; + + skel = cgroup_mprog__open_and_load(); + if (!ASSERT_OK_PTR(skel, "skel_load")) + goto cleanup; + + fd1 = bpf_program__fd(skel->progs.getsockopt_1); + fd2 = bpf_program__fd(skel->progs.getsockopt_2); + fd3 = bpf_program__fd(skel->progs.getsockopt_3); + fd4 = bpf_program__fd(skel->progs.getsockopt_4); + + id1 = id_from_prog_fd(fd1); + id2 = id_from_prog_fd(fd2); + id3 = id_from_prog_fd(fd3); + id4 = id_from_prog_fd(fd4); + + assert_mprog_count(cg, atype, 0); + + LIBBPF_OPTS_RESET(opta, + .flags = BPF_F_ALLOW_MULTI | BPF_F_BEFORE | BPF_F_AFTER, + .expected_revision = 1, + ); + + /* ordering: [fd1] */ + err = bpf_prog_attach_opts(fd1, cg, atype, &opta); + if (!ASSERT_EQ(err, 0, "prog_attach")) + goto cleanup; + + assert_mprog_count(cg, atype, 1); + + LIBBPF_OPTS_RESET(opta, + .flags = BPF_F_ALLOW_MULTI | BPF_F_BEFORE, + .expected_revision = 2, + ); + + /* ordering: [fd2, fd1] */ + err = bpf_prog_attach_opts(fd2, cg, atype, &opta); + if (!ASSERT_EQ(err, 0, "prog_attach")) + goto cleanup1; + + assert_mprog_count(cg, atype, 2); + + LIBBPF_OPTS_RESET(opta, + .flags = BPF_F_ALLOW_MULTI | BPF_F_AFTER, + .relative_fd = fd2, + .expected_revision = 3, + ); + + /* ordering: [fd2, fd3, fd1] */ + err = bpf_prog_attach_opts(fd3, cg, atype, &opta); + if (!ASSERT_EQ(err, 0, "prog_attach")) + goto cleanup2; + + assert_mprog_count(cg, atype, 3); + + LIBBPF_OPTS_RESET(opta, + .flags = BPF_F_ALLOW_MULTI, + .expected_revision = 4, + ); + + /* ordering: [fd2, fd3, fd1, fd4] */ + err = bpf_prog_attach_opts(fd4, cg, atype, &opta); + if (!ASSERT_EQ(err, 0, "prog_attach")) + goto cleanup3; + + assert_mprog_count(cg, atype, 4); + + /* retrieve optq.prog_cnt */ + err = bpf_prog_query_opts(cg, atype, &optq); + if (!ASSERT_OK(err, "prog_query")) + goto cleanup4; + + /* optq.prog_cnt will be used in below query */ + memset(prog_ids, 0, sizeof(prog_ids)); + optq.prog_ids = prog_ids; + err = bpf_prog_query_opts(cg, atype, &optq); + if (!ASSERT_OK(err, "prog_query")) + goto cleanup4; + + ASSERT_EQ(optq.count, 4, "count"); + ASSERT_EQ(optq.revision, 5, "revision"); + ASSERT_EQ(optq.prog_ids[0], id2, "prog_ids[0]"); + ASSERT_EQ(optq.prog_ids[1], id3, "prog_ids[1]"); + ASSERT_EQ(optq.prog_ids[2], id1, "prog_ids[2]"); + ASSERT_EQ(optq.prog_ids[3], id4, "prog_ids[3]"); + ASSERT_EQ(optq.prog_ids[4], 0, "prog_ids[4]"); + ASSERT_EQ(optq.link_ids, NULL, "link_ids"); + +cleanup4: + optd.expected_revision = 5; + err = bpf_prog_detach_opts(fd4, cg, atype, &optd); + ASSERT_OK(err, "prog_detach"); + assert_mprog_count(cg, atype, 3); + +cleanup3: + LIBBPF_OPTS_RESET(optd); + err = bpf_prog_detach_opts(fd3, cg, atype, &optd); + ASSERT_OK(err, "prog_detach"); + assert_mprog_count(cg, atype, 2); + + /* Check revision after two detach operations */ + err = bpf_prog_query_opts(cg, atype, &optq); + ASSERT_OK(err, "prog_query"); + ASSERT_EQ(optq.revision, 7, "revision"); + +cleanup2: + err = bpf_prog_detach_opts(fd2, cg, atype, &optd); + ASSERT_OK(err, "prog_detach"); + assert_mprog_count(cg, atype, 1); + +cleanup1: + err = bpf_prog_detach_opts(fd1, cg, atype, &optd); + ASSERT_OK(err, "prog_detach"); + assert_mprog_count(cg, atype, 0); + +cleanup: + cgroup_mprog__destroy(skel); + close(cg); +} + +static void test_link_attach_detach(int atype) +{ + LIBBPF_OPTS(bpf_cgroup_opts, opta); + LIBBPF_OPTS(bpf_cgroup_opts, optd); + LIBBPF_OPTS(bpf_prog_query_opts, optq); + struct bpf_link *link1, *link2, *link3, *link4; + __u32 fd1, fd2, fd3, fd4, id1, id2, id3, id4; + struct cgroup_mprog *skel; + __u32 prog_ids[10]; + int cg, err; + + cg = test__join_cgroup("/link_attach_detach"); + if (!ASSERT_GE(cg, 0, "join_cgroup /link_attach_detach")) + return; + + skel = cgroup_mprog__open_and_load(); + if (!ASSERT_OK_PTR(skel, "skel_load")) + goto cleanup; + + fd1 = bpf_program__fd(skel->progs.getsockopt_1); + fd2 = bpf_program__fd(skel->progs.getsockopt_2); + fd3 = bpf_program__fd(skel->progs.getsockopt_3); + fd4 = bpf_program__fd(skel->progs.getsockopt_4); + + id1 = id_from_prog_fd(fd1); + id2 = id_from_prog_fd(fd2); + id3 = id_from_prog_fd(fd3); + id4 = id_from_prog_fd(fd4); + + assert_mprog_count(cg, atype, 0); + + LIBBPF_OPTS_RESET(opta, + .expected_revision = 1, + ); + + /* ordering: [fd1] */ + link1 = bpf_program__attach_cgroup_opts(skel->progs.getsockopt_1, cg, &opta); + if (!ASSERT_OK_PTR(link1, "link_attach")) + goto cleanup; + + assert_mprog_count(cg, atype, 1); + + LIBBPF_OPTS_RESET(opta, + .flags = BPF_F_BEFORE | BPF_F_LINK, + .relative_id = id_from_link_fd(bpf_link__fd(link1)), + .expected_revision = 2, + ); + + /* ordering: [fd2, fd1] */ + link2 = bpf_program__attach_cgroup_opts(skel->progs.getsockopt_2, cg, &opta); + if (!ASSERT_OK_PTR(link2, "link_attach")) + goto cleanup1; + + assert_mprog_count(cg, atype, 2); + + LIBBPF_OPTS_RESET(opta, + .flags = BPF_F_AFTER | BPF_F_LINK, + .relative_fd = bpf_link__fd(link2), + .expected_revision = 3, + ); + + /* ordering: [fd2, fd3, fd1] */ + link3 = bpf_program__attach_cgroup_opts(skel->progs.getsockopt_3, cg, &opta); + if (!ASSERT_OK_PTR(link3, "link_attach")) + goto cleanup2; + + assert_mprog_count(cg, atype, 3); + + LIBBPF_OPTS_RESET(opta, + .expected_revision = 4, + ); + + /* ordering: [fd2, fd3, fd1, fd4] */ + link4 = bpf_program__attach_cgroup_opts(skel->progs.getsockopt_4, cg, &opta); + if (!ASSERT_OK_PTR(link4, "link_attach")) + goto cleanup3; + + assert_mprog_count(cg, atype, 4); + + /* retrieve optq.prog_cnt */ + err = bpf_prog_query_opts(cg, atype, &optq); + if (!ASSERT_OK(err, "prog_query")) + goto cleanup4; + + /* optq.prog_cnt will be used in below query */ + memset(prog_ids, 0, sizeof(prog_ids)); + optq.prog_ids = prog_ids; + err = bpf_prog_query_opts(cg, atype, &optq); + if (!ASSERT_OK(err, "prog_query")) + goto cleanup4; + + ASSERT_EQ(optq.count, 4, "count"); + ASSERT_EQ(optq.revision, 5, "revision"); + ASSERT_EQ(optq.prog_ids[0], id2, "prog_ids[0]"); + ASSERT_EQ(optq.prog_ids[1], id3, "prog_ids[1]"); + ASSERT_EQ(optq.prog_ids[2], id1, "prog_ids[2]"); + ASSERT_EQ(optq.prog_ids[3], id4, "prog_ids[3]"); + ASSERT_EQ(optq.prog_ids[4], 0, "prog_ids[4]"); + ASSERT_EQ(optq.link_ids, NULL, "link_ids"); + +cleanup4: + bpf_link__destroy(link4); + assert_mprog_count(cg, atype, 3); + +cleanup3: + bpf_link__destroy(link3); + assert_mprog_count(cg, atype, 2); + + /* Check revision after two detach operations */ + err = bpf_prog_query_opts(cg, atype, &optq); + ASSERT_OK(err, "prog_query"); + ASSERT_EQ(optq.revision, 7, "revision"); + +cleanup2: + bpf_link__destroy(link2); + assert_mprog_count(cg, atype, 1); + +cleanup1: + bpf_link__destroy(link1); + assert_mprog_count(cg, atype, 0); + +cleanup: + cgroup_mprog__destroy(skel); + close(cg); +} + +static void test_preorder_prog_attach_detach(int atype) +{ + LIBBPF_OPTS(bpf_prog_attach_opts, opta); + LIBBPF_OPTS(bpf_prog_detach_opts, optd); + __u32 fd1, fd2, fd3, fd4; + struct cgroup_mprog *skel; + int cg, err; + + cg = test__join_cgroup("/preorder_prog_attach_detach"); + if (!ASSERT_GE(cg, 0, "join_cgroup /preorder_prog_attach_detach")) + return; + + skel = cgroup_mprog__open_and_load(); + if (!ASSERT_OK_PTR(skel, "skel_load")) + goto cleanup; + + fd1 = bpf_program__fd(skel->progs.getsockopt_1); + fd2 = bpf_program__fd(skel->progs.getsockopt_2); + fd3 = bpf_program__fd(skel->progs.getsockopt_3); + fd4 = bpf_program__fd(skel->progs.getsockopt_4); + + assert_mprog_count(cg, atype, 0); + + LIBBPF_OPTS_RESET(opta, + .flags = BPF_F_ALLOW_MULTI, + .expected_revision = 1, + ); + + /* ordering: [fd1] */ + err = bpf_prog_attach_opts(fd1, cg, atype, &opta); + if (!ASSERT_EQ(err, 0, "prog_attach")) + goto cleanup; + + assert_mprog_count(cg, atype, 1); + + LIBBPF_OPTS_RESET(opta, + .flags = BPF_F_ALLOW_MULTI | BPF_F_PREORDER, + .expected_revision = 2, + ); + + /* ordering: [fd1, fd2] */ + err = bpf_prog_attach_opts(fd2, cg, atype, &opta); + if (!ASSERT_EQ(err, 0, "prog_attach")) + goto cleanup1; + + assert_mprog_count(cg, atype, 2); + + LIBBPF_OPTS_RESET(opta, + .flags = BPF_F_ALLOW_MULTI | BPF_F_AFTER, + .relative_fd = fd2, + .expected_revision = 3, + ); + + err = bpf_prog_attach_opts(fd3, cg, atype, &opta); + if (!ASSERT_EQ(err, -EINVAL, "prog_attach")) + goto cleanup2; + + assert_mprog_count(cg, atype, 2); + + LIBBPF_OPTS_RESET(opta, + .flags = BPF_F_ALLOW_MULTI | BPF_F_AFTER | BPF_F_PREORDER, + .relative_fd = fd2, + .expected_revision = 3, + ); + + /* ordering: [fd1, fd2, fd3] */ + err = bpf_prog_attach_opts(fd3, cg, atype, &opta); + if (!ASSERT_EQ(err, 0, "prog_attach")) + goto cleanup2; + + assert_mprog_count(cg, atype, 3); + + LIBBPF_OPTS_RESET(opta, + .flags = BPF_F_ALLOW_MULTI, + .expected_revision = 4, + ); + + /* ordering: [fd2, fd3, fd1, fd4] */ + err = bpf_prog_attach_opts(fd4, cg, atype, &opta); + if (!ASSERT_EQ(err, 0, "prog_attach")) + goto cleanup3; + + assert_mprog_count(cg, atype, 4); + + err = bpf_prog_detach_opts(fd4, cg, atype, &optd); + ASSERT_OK(err, "prog_detach"); + assert_mprog_count(cg, atype, 3); + +cleanup3: + err = bpf_prog_detach_opts(fd3, cg, atype, &optd); + ASSERT_OK(err, "prog_detach"); + assert_mprog_count(cg, atype, 2); + +cleanup2: + err = bpf_prog_detach_opts(fd2, cg, atype, &optd); + ASSERT_OK(err, "prog_detach"); + assert_mprog_count(cg, atype, 1); + +cleanup1: + err = bpf_prog_detach_opts(fd1, cg, atype, &optd); + ASSERT_OK(err, "prog_detach"); + assert_mprog_count(cg, atype, 0); + +cleanup: + cgroup_mprog__destroy(skel); + close(cg); +} + +static void test_preorder_link_attach_detach(int atype) +{ + LIBBPF_OPTS(bpf_cgroup_opts, opta); + struct bpf_link *link1, *link2, *link3, *link4; + struct cgroup_mprog *skel; + __u32 fd2; + int cg; + + cg = test__join_cgroup("/preorder_link_attach_detach"); + if (!ASSERT_GE(cg, 0, "join_cgroup /preorder_link_attach_detach")) + return; + + skel = cgroup_mprog__open_and_load(); + if (!ASSERT_OK_PTR(skel, "skel_load")) + goto cleanup; + + fd2 = bpf_program__fd(skel->progs.getsockopt_2); + + assert_mprog_count(cg, atype, 0); + + LIBBPF_OPTS_RESET(opta, + .expected_revision = 1, + ); + + /* ordering: [fd1] */ + link1 = bpf_program__attach_cgroup_opts(skel->progs.getsockopt_1, cg, &opta); + if (!ASSERT_OK_PTR(link1, "link_attach")) + goto cleanup; + + assert_mprog_count(cg, atype, 1); + + LIBBPF_OPTS_RESET(opta, + .flags = BPF_F_PREORDER, + .expected_revision = 2, + ); + + /* ordering: [fd1, fd2] */ + link2 = bpf_program__attach_cgroup_opts(skel->progs.getsockopt_2, cg, &opta); + if (!ASSERT_OK_PTR(link2, "link_attach")) + goto cleanup1; + + assert_mprog_count(cg, atype, 2); + + LIBBPF_OPTS_RESET(opta, + .flags = BPF_F_AFTER, + .relative_fd = fd2, + .expected_revision = 3, + ); + + link3 = bpf_program__attach_cgroup_opts(skel->progs.getsockopt_3, cg, &opta); + if (!ASSERT_ERR_PTR(link3, "link_attach")) + goto cleanup2; + + assert_mprog_count(cg, atype, 2); + + LIBBPF_OPTS_RESET(opta, + .flags = BPF_F_AFTER | BPF_F_PREORDER | BPF_F_LINK, + .relative_fd = bpf_link__fd(link2), + .expected_revision = 3, + ); + + /* ordering: [fd1, fd2, fd3] */ + link3 = bpf_program__attach_cgroup_opts(skel->progs.getsockopt_3, cg, &opta); + if (!ASSERT_OK_PTR(link3, "link_attach")) + goto cleanup2; + + assert_mprog_count(cg, atype, 3); + + LIBBPF_OPTS_RESET(opta, + .expected_revision = 4, + ); + + /* ordering: [fd2, fd3, fd1, fd4] */ + link4 = bpf_program__attach_cgroup_opts(skel->progs.getsockopt_4, cg, &opta); + if (!ASSERT_OK_PTR(link4, "prog_attach")) + goto cleanup3; + + assert_mprog_count(cg, atype, 4); + + bpf_link__destroy(link4); + assert_mprog_count(cg, atype, 3); + +cleanup3: + bpf_link__destroy(link3); + assert_mprog_count(cg, atype, 2); + +cleanup2: + bpf_link__destroy(link2); + assert_mprog_count(cg, atype, 1); + +cleanup1: + bpf_link__destroy(link1); + assert_mprog_count(cg, atype, 0); + +cleanup: + cgroup_mprog__destroy(skel); + close(cg); +} + +static void test_invalid_attach_detach(int atype) +{ + LIBBPF_OPTS(bpf_prog_attach_opts, opta); + __u32 fd1, fd2, id2; + struct cgroup_mprog *skel; + int cg, err; + + cg = test__join_cgroup("/invalid_attach_detach"); + if (!ASSERT_GE(cg, 0, "join_cgroup /invalid_attach_detach")) + return; + + skel = cgroup_mprog__open_and_load(); + if (!ASSERT_OK_PTR(skel, "skel_load")) + goto cleanup; + + fd1 = bpf_program__fd(skel->progs.getsockopt_1); + fd2 = bpf_program__fd(skel->progs.getsockopt_2); + + id2 = id_from_prog_fd(fd2); + + assert_mprog_count(cg, atype, 0); + + LIBBPF_OPTS_RESET(opta, + .flags = BPF_F_ALLOW_MULTI | BPF_F_BEFORE | BPF_F_AFTER, + .relative_id = id2, + ); + + err = bpf_prog_attach_opts(fd1, cg, atype, &opta); + ASSERT_EQ(err, -EINVAL, "prog_attach"); + assert_mprog_count(cg, atype, 0); + + LIBBPF_OPTS_RESET(opta, + .flags = BPF_F_ALLOW_MULTI | BPF_F_BEFORE | BPF_F_ID, + ); + + err = bpf_prog_attach_opts(fd1, cg, atype, &opta); + ASSERT_EQ(err, -ENOENT, "prog_attach"); + assert_mprog_count(cg, atype, 0); + + LIBBPF_OPTS_RESET(opta, + .flags = BPF_F_ALLOW_MULTI | BPF_F_AFTER | BPF_F_ID, + ); + + err = bpf_prog_attach_opts(fd1, cg, atype, &opta); + ASSERT_EQ(err, -ENOENT, "prog_attach"); + assert_mprog_count(cg, atype, 0); + + LIBBPF_OPTS_RESET(opta, + .flags = BPF_F_ALLOW_MULTI | BPF_F_BEFORE | BPF_F_AFTER, + .relative_id = id2, + ); + + err = bpf_prog_attach_opts(fd1, cg, atype, &opta); + ASSERT_EQ(err, -EINVAL, "prog_attach"); + assert_mprog_count(cg, atype, 0); + + LIBBPF_OPTS_RESET(opta, + .flags = BPF_F_ALLOW_MULTI | BPF_F_LINK, + .relative_id = id2, + ); + + err = bpf_prog_attach_opts(fd1, cg, atype, &opta); + ASSERT_EQ(err, -EINVAL, "prog_attach"); + assert_mprog_count(cg, atype, 0); + + LIBBPF_OPTS_RESET(opta, + .flags = BPF_F_ALLOW_MULTI, + .relative_id = id2, + ); + + err = bpf_prog_attach_opts(fd1, cg, atype, &opta); + ASSERT_EQ(err, -EINVAL, "prog_attach"); + assert_mprog_count(cg, atype, 0); + + LIBBPF_OPTS_RESET(opta, + .flags = BPF_F_ALLOW_MULTI | BPF_F_BEFORE, + .relative_fd = fd1, + ); + + err = bpf_prog_attach_opts(fd1, cg, atype, &opta); + ASSERT_EQ(err, -ENOENT, "prog_attach"); + assert_mprog_count(cg, atype, 0); + + LIBBPF_OPTS_RESET(opta, + .flags = BPF_F_ALLOW_MULTI | BPF_F_AFTER, + .relative_fd = fd1, + ); + + err = bpf_prog_attach_opts(fd1, cg, atype, &opta); + ASSERT_EQ(err, -ENOENT, "prog_attach"); + assert_mprog_count(cg, atype, 0); + + LIBBPF_OPTS_RESET(opta, + .flags = BPF_F_ALLOW_MULTI, + ); + + err = bpf_prog_attach_opts(fd1, cg, atype, &opta); + if (!ASSERT_EQ(err, 0, "prog_attach")) + goto cleanup; + assert_mprog_count(cg, atype, 1); + + LIBBPF_OPTS_RESET(opta, + .flags = BPF_F_ALLOW_MULTI | BPF_F_AFTER, + ); + + err = bpf_prog_attach_opts(fd1, cg, atype, &opta); + ASSERT_EQ(err, -EINVAL, "prog_attach"); + assert_mprog_count(cg, atype, 1); + + LIBBPF_OPTS_RESET(opta, + .flags = BPF_F_ALLOW_MULTI | BPF_F_REPLACE | BPF_F_AFTER, + .replace_prog_fd = fd1, + ); + + err = bpf_prog_attach_opts(fd1, cg, atype, &opta); + ASSERT_EQ(err, -EINVAL, "prog_attach"); + assert_mprog_count(cg, atype, 1); +cleanup: + cgroup_mprog__destroy(skel); + close(cg); +} + +void test_cgroup_mprog_opts(void) +{ + if (test__start_subtest("prog_attach_detach")) + test_prog_attach_detach(BPF_CGROUP_GETSOCKOPT); + if (test__start_subtest("link_attach_detach")) + test_link_attach_detach(BPF_CGROUP_GETSOCKOPT); + if (test__start_subtest("preorder_prog_attach_detach")) + test_preorder_prog_attach_detach(BPF_CGROUP_GETSOCKOPT); + if (test__start_subtest("preorder_link_attach_detach")) + test_preorder_link_attach_detach(BPF_CGROUP_GETSOCKOPT); + if (test__start_subtest("invalid_attach_detach")) + test_invalid_attach_detach(BPF_CGROUP_GETSOCKOPT); +} diff --git a/tools/testing/selftests/bpf/prog_tests/cgroup_mprog_ordering.c b/tools/testing/selftests/bpf/prog_tests/cgroup_mprog_ordering.c new file mode 100644 index 000000000000..a36d2e968bc5 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/cgroup_mprog_ordering.c @@ -0,0 +1,77 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */ +#include <test_progs.h> +#include "cgroup_helpers.h" +#include "cgroup_preorder.skel.h" + +static int run_getsockopt_test(int cg_parent, int sock_fd, bool has_relative_fd) +{ + LIBBPF_OPTS(bpf_prog_attach_opts, opts); + enum bpf_attach_type prog_p_atype, prog_p2_atype; + int prog_p_fd, prog_p2_fd; + struct cgroup_preorder *skel = NULL; + struct bpf_program *prog; + __u8 *result, buf; + socklen_t optlen = 1; + int err = 0; + + skel = cgroup_preorder__open_and_load(); + if (!ASSERT_OK_PTR(skel, "cgroup_preorder__open_and_load")) + return 0; + + LIBBPF_OPTS_RESET(opts); + opts.flags = BPF_F_ALLOW_MULTI; + prog = skel->progs.parent; + prog_p_fd = bpf_program__fd(prog); + prog_p_atype = bpf_program__expected_attach_type(prog); + err = bpf_prog_attach_opts(prog_p_fd, cg_parent, prog_p_atype, &opts); + if (!ASSERT_OK(err, "bpf_prog_attach_opts-parent")) + goto close_skel; + + opts.flags = BPF_F_ALLOW_MULTI | BPF_F_BEFORE; + if (has_relative_fd) + opts.relative_fd = prog_p_fd; + prog = skel->progs.parent_2; + prog_p2_fd = bpf_program__fd(prog); + prog_p2_atype = bpf_program__expected_attach_type(prog); + err = bpf_prog_attach_opts(prog_p2_fd, cg_parent, prog_p2_atype, &opts); + if (!ASSERT_OK(err, "bpf_prog_attach_opts-parent_2")) + goto detach_parent; + + err = getsockopt(sock_fd, SOL_IP, IP_TOS, &buf, &optlen); + if (!ASSERT_OK(err, "getsockopt")) + goto detach_parent_2; + + result = skel->bss->result; + ASSERT_TRUE(result[0] == 4 && result[1] == 3, "result values"); + +detach_parent_2: + ASSERT_OK(bpf_prog_detach2(prog_p2_fd, cg_parent, prog_p2_atype), + "bpf_prog_detach2-parent_2"); +detach_parent: + ASSERT_OK(bpf_prog_detach2(prog_p_fd, cg_parent, prog_p_atype), + "bpf_prog_detach2-parent"); +close_skel: + cgroup_preorder__destroy(skel); + return err; +} + +void test_cgroup_mprog_ordering(void) +{ + int cg_parent = -1, sock_fd = -1; + + cg_parent = test__join_cgroup("/parent"); + if (!ASSERT_GE(cg_parent, 0, "join_cgroup /parent")) + goto out; + + sock_fd = socket(AF_INET, SOCK_STREAM, 0); + if (!ASSERT_GE(sock_fd, 0, "socket")) + goto out; + + ASSERT_OK(run_getsockopt_test(cg_parent, sock_fd, false), "getsockopt_test_1"); + ASSERT_OK(run_getsockopt_test(cg_parent, sock_fd, true), "getsockopt_test_2"); + +out: + close(sock_fd); + close(cg_parent); +} diff --git a/tools/testing/selftests/bpf/prog_tests/cgroup_xattr.c b/tools/testing/selftests/bpf/prog_tests/cgroup_xattr.c new file mode 100644 index 000000000000..e0dd966e4a3e --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/cgroup_xattr.c @@ -0,0 +1,72 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */ + +#include <errno.h> +#include <fcntl.h> +#include <sys/stat.h> +#include <string.h> +#include <unistd.h> +#include <sys/socket.h> +#include <test_progs.h> +#include "cgroup_helpers.h" + +#include "read_cgroupfs_xattr.skel.h" +#include "cgroup_read_xattr.skel.h" + +#define CGROUP_FS_PARENT "foo/" +#define CGROUP_FS_CHILD CGROUP_FS_PARENT "bar/" +#define TMP_FILE "/tmp/selftests_cgroup_xattr" + +static const char xattr_value_a[] = "bpf_selftest_value_a"; +static const char xattr_value_b[] = "bpf_selftest_value_b"; +static const char xattr_name[] = "user.bpf_test"; + +static void test_read_cgroup_xattr(void) +{ + int tmp_fd, parent_cgroup_fd = -1, child_cgroup_fd = -1; + struct read_cgroupfs_xattr *skel = NULL; + + parent_cgroup_fd = test__join_cgroup(CGROUP_FS_PARENT); + if (!ASSERT_OK_FD(parent_cgroup_fd, "create parent cgroup")) + return; + if (!ASSERT_OK(set_cgroup_xattr(CGROUP_FS_PARENT, xattr_name, xattr_value_a), + "set parent xattr")) + goto out; + + child_cgroup_fd = test__join_cgroup(CGROUP_FS_CHILD); + if (!ASSERT_OK_FD(child_cgroup_fd, "create child cgroup")) + goto out; + if (!ASSERT_OK(set_cgroup_xattr(CGROUP_FS_CHILD, xattr_name, xattr_value_b), + "set child xattr")) + goto out; + + skel = read_cgroupfs_xattr__open_and_load(); + if (!ASSERT_OK_PTR(skel, "read_cgroupfs_xattr__open_and_load")) + goto out; + + skel->bss->target_pid = gettid(); + + if (!ASSERT_OK(read_cgroupfs_xattr__attach(skel), "read_cgroupfs_xattr__attach")) + goto out; + + tmp_fd = open(TMP_FILE, O_RDONLY | O_CREAT); + ASSERT_OK_FD(tmp_fd, "open tmp file"); + close(tmp_fd); + + ASSERT_TRUE(skel->bss->found_value_a, "found_value_a"); + ASSERT_TRUE(skel->bss->found_value_b, "found_value_b"); + +out: + close(child_cgroup_fd); + close(parent_cgroup_fd); + read_cgroupfs_xattr__destroy(skel); + unlink(TMP_FILE); +} + +void test_cgroup_xattr(void) +{ + RUN_TESTS(cgroup_read_xattr); + + if (test__start_subtest("read_cgroupfs_xattr")) + test_read_cgroup_xattr(); +} diff --git a/tools/testing/selftests/bpf/prog_tests/dynptr.c b/tools/testing/selftests/bpf/prog_tests/dynptr.c index 62e7ec775f24..9b2d9ceda210 100644 --- a/tools/testing/selftests/bpf/prog_tests/dynptr.c +++ b/tools/testing/selftests/bpf/prog_tests/dynptr.c @@ -21,6 +21,14 @@ static struct { {"test_dynptr_data", SETUP_SYSCALL_SLEEP}, {"test_dynptr_copy", SETUP_SYSCALL_SLEEP}, {"test_dynptr_copy_xdp", SETUP_XDP_PROG}, + {"test_dynptr_memset_zero", SETUP_SYSCALL_SLEEP}, + {"test_dynptr_memset_notzero", SETUP_SYSCALL_SLEEP}, + {"test_dynptr_memset_zero_offset", SETUP_SYSCALL_SLEEP}, + {"test_dynptr_memset_zero_adjusted", SETUP_SYSCALL_SLEEP}, + {"test_dynptr_memset_overflow", SETUP_SYSCALL_SLEEP}, + {"test_dynptr_memset_overflow_offset", SETUP_SYSCALL_SLEEP}, + {"test_dynptr_memset_readonly", SETUP_SKB_PROG}, + {"test_dynptr_memset_xdp_chunks", SETUP_XDP_PROG}, {"test_ringbuf", SETUP_SYSCALL_SLEEP}, {"test_skb_readonly", SETUP_SKB_PROG}, {"test_dynptr_skb_data", SETUP_SKB_PROG}, @@ -43,6 +51,8 @@ static struct { {"test_copy_from_user_task_str_dynptr", SETUP_SYSCALL_SLEEP}, }; +#define PAGE_SIZE_64K 65536 + static void verify_success(const char *prog_name, enum test_setup_type setup_type) { char user_data[384] = {[0 ... 382] = 'a', '\0'}; @@ -138,14 +148,18 @@ static void verify_success(const char *prog_name, enum test_setup_type setup_typ } case SETUP_XDP_PROG: { - char data[5000]; + char data[90000]; int err, prog_fd; LIBBPF_OPTS(bpf_test_run_opts, opts, .data_in = &data, - .data_size_in = sizeof(data), .repeat = 1, ); + if (getpagesize() == PAGE_SIZE_64K) + opts.data_size_in = sizeof(data); + else + opts.data_size_in = 5000; + prog_fd = bpf_program__fd(prog); err = bpf_prog_test_run_opts(prog_fd, &opts); diff --git a/tools/testing/selftests/bpf/prog_tests/fd_array.c b/tools/testing/selftests/bpf/prog_tests/fd_array.c index 9add890c2d37..241b2c8c6e0f 100644 --- a/tools/testing/selftests/bpf/prog_tests/fd_array.c +++ b/tools/testing/selftests/bpf/prog_tests/fd_array.c @@ -312,7 +312,7 @@ static void check_fd_array_cnt__referenced_btfs(void) /* btf should still exist when original file descriptor is closed */ err = get_btf_id_by_fd(extra_fds[0], &btf_id); - if (!ASSERT_GE(err, 0, "get_btf_id_by_fd")) + if (!ASSERT_EQ(err, 0, "get_btf_id_by_fd")) goto cleanup; Close(extra_fds[0]); diff --git a/tools/testing/selftests/bpf/prog_tests/fexit_noreturns.c b/tools/testing/selftests/bpf/prog_tests/fexit_noreturns.c deleted file mode 100644 index 568d3aa48a78..000000000000 --- a/tools/testing/selftests/bpf/prog_tests/fexit_noreturns.c +++ /dev/null @@ -1,9 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 - -#include <test_progs.h> -#include "fexit_noreturns.skel.h" - -void test_fexit_noreturns(void) -{ - RUN_TESTS(fexit_noreturns); -} diff --git a/tools/testing/selftests/bpf/prog_tests/linked_list.c b/tools/testing/selftests/bpf/prog_tests/linked_list.c index 5266c7022863..14c5a7ef0e87 100644 --- a/tools/testing/selftests/bpf/prog_tests/linked_list.c +++ b/tools/testing/selftests/bpf/prog_tests/linked_list.c @@ -72,7 +72,7 @@ static struct { { "new_null_ret", "R0 invalid mem access 'ptr_or_null_'" }, { "obj_new_acq", "Unreleased reference id=" }, { "use_after_drop", "invalid mem access 'scalar'" }, - { "ptr_walk_scalar", "type=scalar expected=percpu_ptr_" }, + { "ptr_walk_scalar", "type=rdonly_untrusted_mem expected=percpu_ptr_" }, { "direct_read_lock", "direct access to bpf_spin_lock is disallowed" }, { "direct_write_lock", "direct access to bpf_spin_lock is disallowed" }, { "direct_read_head", "direct access to bpf_list_head is disallowed" }, diff --git a/tools/testing/selftests/bpf/prog_tests/log_buf.c b/tools/testing/selftests/bpf/prog_tests/log_buf.c index 169ce689b97c..d6f14a232002 100644 --- a/tools/testing/selftests/bpf/prog_tests/log_buf.c +++ b/tools/testing/selftests/bpf/prog_tests/log_buf.c @@ -7,6 +7,10 @@ #include "test_log_buf.skel.h" #include "bpf_util.h" +#if !defined(__clang__) +#pragma GCC diagnostic ignored "-Wmaybe-uninitialized" +#endif + static size_t libbpf_log_pos; static char libbpf_log_buf[1024 * 1024]; static bool libbpf_log_error; diff --git a/tools/testing/selftests/bpf/prog_tests/mem_rdonly_untrusted.c b/tools/testing/selftests/bpf/prog_tests/mem_rdonly_untrusted.c new file mode 100644 index 000000000000..40d4f687bd9c --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/mem_rdonly_untrusted.c @@ -0,0 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0-only + +#include <test_progs.h> +#include "mem_rdonly_untrusted.skel.h" + +void test_mem_rdonly_untrusted(void) +{ + RUN_TESTS(mem_rdonly_untrusted); +} diff --git a/tools/testing/selftests/bpf/prog_tests/recursive_attach.c b/tools/testing/selftests/bpf/prog_tests/recursive_attach.c index 8100509e561b..0ffa01d54ce2 100644 --- a/tools/testing/selftests/bpf/prog_tests/recursive_attach.c +++ b/tools/testing/selftests/bpf/prog_tests/recursive_attach.c @@ -149,3 +149,70 @@ close_prog: fentry_recursive_target__destroy(target_skel); fentry_recursive__destroy(tracing_skel); } + +static void *fentry_target_test_run(void *arg) +{ + for (;;) { + int prog_fd = __atomic_load_n((int *)arg, __ATOMIC_SEQ_CST); + LIBBPF_OPTS(bpf_test_run_opts, topts); + int err; + + if (prog_fd == -1) + break; + err = bpf_prog_test_run_opts(prog_fd, &topts); + if (!ASSERT_OK(err, "fentry_target test_run")) + break; + } + + return NULL; +} + +void test_fentry_attach_stress(void) +{ + struct fentry_recursive_target *target_skel = NULL; + struct fentry_recursive *tracing_skel = NULL; + struct bpf_program *prog; + int err, i, tgt_prog_fd; + pthread_t thread; + + target_skel = fentry_recursive_target__open_and_load(); + if (!ASSERT_OK_PTR(target_skel, + "fentry_recursive_target__open_and_load")) + goto close_prog; + tgt_prog_fd = bpf_program__fd(target_skel->progs.fentry_target); + err = pthread_create(&thread, NULL, + fentry_target_test_run, &tgt_prog_fd); + if (!ASSERT_OK(err, "bpf_program__set_attach_target")) + goto close_prog; + + for (i = 0; i < 1000; i++) { + tracing_skel = fentry_recursive__open(); + if (!ASSERT_OK_PTR(tracing_skel, "fentry_recursive__open")) + goto stop_thread; + + prog = tracing_skel->progs.recursive_attach; + err = bpf_program__set_attach_target(prog, tgt_prog_fd, + "fentry_target"); + if (!ASSERT_OK(err, "bpf_program__set_attach_target")) + goto stop_thread; + + err = fentry_recursive__load(tracing_skel); + if (!ASSERT_OK(err, "fentry_recursive__load")) + goto stop_thread; + + err = fentry_recursive__attach(tracing_skel); + if (!ASSERT_OK(err, "fentry_recursive__attach")) + goto stop_thread; + + fentry_recursive__destroy(tracing_skel); + tracing_skel = NULL; + } + +stop_thread: + __atomic_store_n(&tgt_prog_fd, -1, __ATOMIC_SEQ_CST); + err = pthread_join(thread, NULL); + ASSERT_OK(err, "pthread_join"); +close_prog: + fentry_recursive__destroy(tracing_skel); + fentry_recursive_target__destroy(target_skel); +} diff --git a/tools/testing/selftests/bpf/prog_tests/reg_bounds.c b/tools/testing/selftests/bpf/prog_tests/reg_bounds.c index 39d42271cc46..e261b0e872db 100644 --- a/tools/testing/selftests/bpf/prog_tests/reg_bounds.c +++ b/tools/testing/selftests/bpf/prog_tests/reg_bounds.c @@ -465,6 +465,20 @@ static struct range range_refine(enum num_t x_t, struct range x, enum num_t y_t, return range_improve(x_t, x, x_swap); } + if (!t_is_32(x_t) && !t_is_32(y_t) && x_t != y_t) { + if (x_t == S64 && x.a > x.b) { + if (x.b < y.a && x.a <= y.b) + return range(x_t, x.a, y.b); + if (x.a > y.b && x.b >= y.a) + return range(x_t, y.a, x.b); + } else if (x_t == U64 && y.a > y.b) { + if (y.b < x.a && y.a <= x.b) + return range(x_t, y.a, x.b); + if (y.a > x.b && y.b >= x.a) + return range(x_t, x.a, y.b); + } + } + /* otherwise, plain range cast and intersection works */ return range_improve(x_t, x, y_cast); } diff --git a/tools/testing/selftests/bpf/prog_tests/ringbuf.c b/tools/testing/selftests/bpf/prog_tests/ringbuf.c index da430df45aa4..d1e4cb28a72c 100644 --- a/tools/testing/selftests/bpf/prog_tests/ringbuf.c +++ b/tools/testing/selftests/bpf/prog_tests/ringbuf.c @@ -97,7 +97,7 @@ static void ringbuf_write_subtest(void) if (!ASSERT_OK_PTR(skel, "skel_open")) return; - skel->maps.ringbuf.max_entries = 0x4000; + skel->maps.ringbuf.max_entries = 0x40000; err = test_ringbuf_write_lskel__load(skel); if (!ASSERT_OK(err, "skel_load")) @@ -108,7 +108,7 @@ static void ringbuf_write_subtest(void) mmap_ptr = mmap(NULL, page_size, PROT_READ | PROT_WRITE, MAP_SHARED, rb_fd, 0); if (!ASSERT_OK_PTR(mmap_ptr, "rw_cons_pos")) goto cleanup; - *mmap_ptr = 0x3000; + *mmap_ptr = 0x30000; ASSERT_OK(munmap(mmap_ptr, page_size), "unmap_rw"); skel->bss->pid = getpid(); diff --git a/tools/testing/selftests/bpf/prog_tests/snprintf.c b/tools/testing/selftests/bpf/prog_tests/snprintf.c index 4be6fdb78c6a..594441acb707 100644 --- a/tools/testing/selftests/bpf/prog_tests/snprintf.c +++ b/tools/testing/selftests/bpf/prog_tests/snprintf.c @@ -116,6 +116,8 @@ static void test_snprintf_negative(void) ASSERT_ERR(load_single_snprintf("%llc"), "invalid specifier 7"); ASSERT_ERR(load_single_snprintf("\x80"), "non ascii character"); ASSERT_ERR(load_single_snprintf("\x1"), "non printable character"); + ASSERT_ERR(load_single_snprintf("%p%"), "invalid specifier 8"); + ASSERT_ERR(load_single_snprintf("%s%"), "invalid specifier 9"); } void test_snprintf(void) diff --git a/tools/testing/selftests/bpf/prog_tests/sock_iter_batch.c b/tools/testing/selftests/bpf/prog_tests/sock_iter_batch.c index a4517bee34d5..27781df8f2fb 100644 --- a/tools/testing/selftests/bpf/prog_tests/sock_iter_batch.c +++ b/tools/testing/selftests/bpf/prog_tests/sock_iter_batch.c @@ -1,11 +1,13 @@ // SPDX-License-Identifier: GPL-2.0 // Copyright (c) 2024 Meta +#include <poll.h> #include <test_progs.h> #include "network_helpers.h" #include "sock_iter_batch.skel.h" #define TEST_NS "sock_iter_batch_netns" +#define TEST_CHILD_NS "sock_iter_batch_child_netns" static const int init_batch_size = 16; static const int nr_soreuse = 4; @@ -118,6 +120,45 @@ done: return nth_sock_idx; } +static void destroy(int fd) +{ + struct sock_iter_batch *skel = NULL; + __u64 cookie = socket_cookie(fd); + struct bpf_link *link = NULL; + int iter_fd = -1; + int nread; + __u64 out; + + skel = sock_iter_batch__open(); + if (!ASSERT_OK_PTR(skel, "sock_iter_batch__open")) + goto done; + + skel->rodata->destroy_cookie = cookie; + + if (!ASSERT_OK(sock_iter_batch__load(skel), "sock_iter_batch__load")) + goto done; + + link = bpf_program__attach_iter(skel->progs.iter_tcp_destroy, NULL); + if (!ASSERT_OK_PTR(link, "bpf_program__attach_iter")) + goto done; + + iter_fd = bpf_iter_create(bpf_link__fd(link)); + if (!ASSERT_OK_FD(iter_fd, "bpf_iter_create")) + goto done; + + /* Delete matching socket. */ + nread = read(iter_fd, &out, sizeof(out)); + ASSERT_GE(nread, 0, "nread"); + if (nread) + ASSERT_EQ(out, cookie, "cookie matches"); +done: + if (iter_fd >= 0) + close(iter_fd); + bpf_link__destroy(link); + sock_iter_batch__destroy(skel); + close(fd); +} + static int get_seen_count(int fd, struct sock_count counts[], int n) { __u64 cookie = socket_cookie(fd); @@ -152,8 +193,71 @@ static void check_n_were_seen_once(int *fds, int fds_len, int n, ASSERT_EQ(seen_once, n, "seen_once"); } +static int accept_from_one(struct pollfd *server_poll_fds, + int server_poll_fds_len) +{ + static const int poll_timeout_ms = 5000; /* 5s */ + int ret; + int i; + + ret = poll(server_poll_fds, server_poll_fds_len, poll_timeout_ms); + if (!ASSERT_EQ(ret, 1, "poll")) + return -1; + + for (i = 0; i < server_poll_fds_len; i++) + if (server_poll_fds[i].revents & POLLIN) + return accept(server_poll_fds[i].fd, NULL, NULL); + + return -1; +} + +static int *connect_to_server(int family, int sock_type, const char *addr, + __u16 port, int nr_connects, int *server_fds, + int server_fds_len) +{ + struct pollfd *server_poll_fds = NULL; + int *established_socks = NULL; + int i; + + server_poll_fds = calloc(server_fds_len, sizeof(*server_poll_fds)); + if (!ASSERT_OK_PTR(server_poll_fds, "server_poll_fds")) + return NULL; + + for (i = 0; i < server_fds_len; i++) { + server_poll_fds[i].fd = server_fds[i]; + server_poll_fds[i].events = POLLIN; + } + + i = 0; + + established_socks = malloc(sizeof(*established_socks) * nr_connects*2); + if (!ASSERT_OK_PTR(established_socks, "established_socks")) + goto error; + + while (nr_connects--) { + established_socks[i] = connect_to_addr_str(family, sock_type, + addr, port, NULL); + if (!ASSERT_OK_FD(established_socks[i], "connect_to_addr_str")) + goto error; + i++; + established_socks[i] = accept_from_one(server_poll_fds, + server_fds_len); + if (!ASSERT_OK_FD(established_socks[i], "accept_from_one")) + goto error; + i++; + } + + free(server_poll_fds); + return established_socks; +error: + free_fds(established_socks, i); + free(server_poll_fds); + return NULL; +} + static void remove_seen(int family, int sock_type, const char *addr, __u16 port, - int *socks, int socks_len, struct sock_count *counts, + int *socks, int socks_len, int *established_socks, + int established_socks_len, struct sock_count *counts, int counts_len, struct bpf_link *link, int iter_fd) { int close_idx; @@ -182,8 +286,46 @@ static void remove_seen(int family, int sock_type, const char *addr, __u16 port, counts_len); } +static void remove_seen_established(int family, int sock_type, const char *addr, + __u16 port, int *listen_socks, + int listen_socks_len, int *established_socks, + int established_socks_len, + struct sock_count *counts, int counts_len, + struct bpf_link *link, int iter_fd) +{ + int close_idx; + + /* Iterate through all listening sockets. */ + read_n(iter_fd, listen_socks_len, counts, counts_len); + + /* Make sure we saw all listening sockets exactly once. */ + check_n_were_seen_once(listen_socks, listen_socks_len, listen_socks_len, + counts, counts_len); + + /* Leave one established socket. */ + read_n(iter_fd, established_socks_len - 1, counts, counts_len); + + /* Close a socket we've already seen to remove it from the bucket. */ + close_idx = get_nth_socket(established_socks, established_socks_len, + link, listen_socks_len + 1); + if (!ASSERT_GE(close_idx, 0, "close_idx")) + return; + destroy(established_socks[close_idx]); + established_socks[close_idx] = -1; + + /* Iterate through the rest of the sockets. */ + read_n(iter_fd, -1, counts, counts_len); + + /* Make sure the last socket wasn't skipped and that there were no + * repeats. + */ + check_n_were_seen_once(established_socks, established_socks_len, + established_socks_len - 1, counts, counts_len); +} + static void remove_unseen(int family, int sock_type, const char *addr, __u16 port, int *socks, int socks_len, + int *established_socks, int established_socks_len, struct sock_count *counts, int counts_len, struct bpf_link *link, int iter_fd) { @@ -214,8 +356,54 @@ static void remove_unseen(int family, int sock_type, const char *addr, counts_len); } +static void remove_unseen_established(int family, int sock_type, + const char *addr, __u16 port, + int *listen_socks, int listen_socks_len, + int *established_socks, + int established_socks_len, + struct sock_count *counts, int counts_len, + struct bpf_link *link, int iter_fd) +{ + int close_idx; + + /* Iterate through all listening sockets. */ + read_n(iter_fd, listen_socks_len, counts, counts_len); + + /* Make sure we saw all listening sockets exactly once. */ + check_n_were_seen_once(listen_socks, listen_socks_len, listen_socks_len, + counts, counts_len); + + /* Iterate through the first established socket. */ + read_n(iter_fd, 1, counts, counts_len); + + /* Make sure we saw one established socks. */ + check_n_were_seen_once(established_socks, established_socks_len, 1, + counts, counts_len); + + /* Close what would be the next socket in the bucket to exercise the + * condition where we need to skip past the first cookie we remembered. + */ + close_idx = get_nth_socket(established_socks, established_socks_len, + link, listen_socks_len + 1); + if (!ASSERT_GE(close_idx, 0, "close_idx")) + return; + + destroy(established_socks[close_idx]); + established_socks[close_idx] = -1; + + /* Iterate through the rest of the sockets. */ + read_n(iter_fd, -1, counts, counts_len); + + /* Make sure the remaining sockets were seen exactly once and that we + * didn't repeat the socket that was already seen. + */ + check_n_were_seen_once(established_socks, established_socks_len, + established_socks_len - 1, counts, counts_len); +} + static void remove_all(int family, int sock_type, const char *addr, __u16 port, int *socks, int socks_len, + int *established_socks, int established_socks_len, struct sock_count *counts, int counts_len, struct bpf_link *link, int iter_fd) { @@ -242,8 +430,57 @@ static void remove_all(int family, int sock_type, const char *addr, ASSERT_EQ(read_n(iter_fd, -1, counts, counts_len), 0, "read_n"); } +static void remove_all_established(int family, int sock_type, const char *addr, + __u16 port, int *listen_socks, + int listen_socks_len, int *established_socks, + int established_socks_len, + struct sock_count *counts, int counts_len, + struct bpf_link *link, int iter_fd) +{ + int *close_idx = NULL; + int i; + + /* Iterate through all listening sockets. */ + read_n(iter_fd, listen_socks_len, counts, counts_len); + + /* Make sure we saw all listening sockets exactly once. */ + check_n_were_seen_once(listen_socks, listen_socks_len, listen_socks_len, + counts, counts_len); + + /* Iterate through the first established socket. */ + read_n(iter_fd, 1, counts, counts_len); + + /* Make sure we saw one established socks. */ + check_n_were_seen_once(established_socks, established_socks_len, 1, + counts, counts_len); + + /* Close all remaining sockets to exhaust the list of saved cookies and + * exit without putting any sockets into the batch on the next read. + */ + close_idx = malloc(sizeof(int) * (established_socks_len - 1)); + if (!ASSERT_OK_PTR(close_idx, "close_idx malloc")) + return; + for (i = 0; i < established_socks_len - 1; i++) { + close_idx[i] = get_nth_socket(established_socks, + established_socks_len, link, + listen_socks_len + i); + if (!ASSERT_GE(close_idx[i], 0, "close_idx")) + return; + } + + for (i = 0; i < established_socks_len - 1; i++) { + destroy(established_socks[close_idx[i]]); + established_socks[close_idx[i]] = -1; + } + + /* Make sure there are no more sockets returned */ + ASSERT_EQ(read_n(iter_fd, -1, counts, counts_len), 0, "read_n"); + free(close_idx); +} + static void add_some(int family, int sock_type, const char *addr, __u16 port, - int *socks, int socks_len, struct sock_count *counts, + int *socks, int socks_len, int *established_socks, + int established_socks_len, struct sock_count *counts, int counts_len, struct bpf_link *link, int iter_fd) { int *new_socks = NULL; @@ -271,8 +508,52 @@ done: free_fds(new_socks, socks_len); } +static void add_some_established(int family, int sock_type, const char *addr, + __u16 port, int *listen_socks, + int listen_socks_len, int *established_socks, + int established_socks_len, + struct sock_count *counts, + int counts_len, struct bpf_link *link, + int iter_fd) +{ + int *new_socks = NULL; + + /* Iterate through all listening sockets. */ + read_n(iter_fd, listen_socks_len, counts, counts_len); + + /* Make sure we saw all listening sockets exactly once. */ + check_n_were_seen_once(listen_socks, listen_socks_len, listen_socks_len, + counts, counts_len); + + /* Iterate through the first established_socks_len - 1 sockets. */ + read_n(iter_fd, established_socks_len - 1, counts, counts_len); + + /* Make sure we saw established_socks_len - 1 sockets exactly once. */ + check_n_were_seen_once(established_socks, established_socks_len, + established_socks_len - 1, counts, counts_len); + + /* Double the number of established sockets in the bucket. */ + new_socks = connect_to_server(family, sock_type, addr, port, + established_socks_len / 2, listen_socks, + listen_socks_len); + if (!ASSERT_OK_PTR(new_socks, "connect_to_server")) + goto done; + + /* Iterate through the rest of the sockets. */ + read_n(iter_fd, -1, counts, counts_len); + + /* Make sure each of the original sockets was seen exactly once. */ + check_n_were_seen_once(listen_socks, listen_socks_len, listen_socks_len, + counts, counts_len); + check_n_were_seen_once(established_socks, established_socks_len, + established_socks_len, counts, counts_len); +done: + free_fds(new_socks, established_socks_len); +} + static void force_realloc(int family, int sock_type, const char *addr, __u16 port, int *socks, int socks_len, + int *established_socks, int established_socks_len, struct sock_count *counts, int counts_len, struct bpf_link *link, int iter_fd) { @@ -299,11 +580,32 @@ done: free_fds(new_socks, socks_len); } +static void force_realloc_established(int family, int sock_type, + const char *addr, __u16 port, + int *listen_socks, int listen_socks_len, + int *established_socks, + int established_socks_len, + struct sock_count *counts, int counts_len, + struct bpf_link *link, int iter_fd) +{ + /* Iterate through all sockets to trigger a realloc. */ + read_n(iter_fd, -1, counts, counts_len); + + /* Make sure each socket was seen exactly once. */ + check_n_were_seen_once(listen_socks, listen_socks_len, listen_socks_len, + counts, counts_len); + check_n_were_seen_once(established_socks, established_socks_len, + established_socks_len, counts, counts_len); +} + struct test_case { void (*test)(int family, int sock_type, const char *addr, __u16 port, - int *socks, int socks_len, struct sock_count *counts, + int *socks, int socks_len, int *established_socks, + int established_socks_len, struct sock_count *counts, int counts_len, struct bpf_link *link, int iter_fd); const char *description; + int ehash_buckets; + int connections; int init_socks; int max_socks; int sock_type; @@ -358,18 +660,140 @@ static struct test_case resume_tests[] = { .family = AF_INET6, .test = force_realloc, }, + { + .description = "tcp: resume after removing a seen socket (listening)", + .init_socks = nr_soreuse, + .max_socks = nr_soreuse, + .sock_type = SOCK_STREAM, + .family = AF_INET6, + .test = remove_seen, + }, + { + .description = "tcp: resume after removing one unseen socket (listening)", + .init_socks = nr_soreuse, + .max_socks = nr_soreuse, + .sock_type = SOCK_STREAM, + .family = AF_INET6, + .test = remove_unseen, + }, + { + .description = "tcp: resume after removing all unseen sockets (listening)", + .init_socks = nr_soreuse, + .max_socks = nr_soreuse, + .sock_type = SOCK_STREAM, + .family = AF_INET6, + .test = remove_all, + }, + { + .description = "tcp: resume after adding a few sockets (listening)", + .init_socks = nr_soreuse, + .max_socks = nr_soreuse, + .sock_type = SOCK_STREAM, + /* Use AF_INET so that new sockets are added to the head of the + * bucket's list. + */ + .family = AF_INET, + .test = add_some, + }, + { + .description = "tcp: force a realloc to occur (listening)", + .init_socks = init_batch_size, + .max_socks = init_batch_size * 2, + .sock_type = SOCK_STREAM, + /* Use AF_INET6 so that new sockets are added to the tail of the + * bucket's list, needing to be added to the next batch to force + * a realloc. + */ + .family = AF_INET6, + .test = force_realloc, + }, + { + .description = "tcp: resume after removing a seen socket (established)", + /* Force all established sockets into one bucket */ + .ehash_buckets = 1, + .connections = nr_soreuse, + .init_socks = nr_soreuse, + /* Room for connect()ed and accept()ed sockets */ + .max_socks = nr_soreuse * 3, + .sock_type = SOCK_STREAM, + .family = AF_INET6, + .test = remove_seen_established, + }, + { + .description = "tcp: resume after removing one unseen socket (established)", + /* Force all established sockets into one bucket */ + .ehash_buckets = 1, + .connections = nr_soreuse, + .init_socks = nr_soreuse, + /* Room for connect()ed and accept()ed sockets */ + .max_socks = nr_soreuse * 3, + .sock_type = SOCK_STREAM, + .family = AF_INET6, + .test = remove_unseen_established, + }, + { + .description = "tcp: resume after removing all unseen sockets (established)", + /* Force all established sockets into one bucket */ + .ehash_buckets = 1, + .connections = nr_soreuse, + .init_socks = nr_soreuse, + /* Room for connect()ed and accept()ed sockets */ + .max_socks = nr_soreuse * 3, + .sock_type = SOCK_STREAM, + .family = AF_INET6, + .test = remove_all_established, + }, + { + .description = "tcp: resume after adding a few sockets (established)", + /* Force all established sockets into one bucket */ + .ehash_buckets = 1, + .connections = nr_soreuse, + .init_socks = nr_soreuse, + /* Room for connect()ed and accept()ed sockets */ + .max_socks = nr_soreuse * 3, + .sock_type = SOCK_STREAM, + .family = AF_INET6, + .test = add_some_established, + }, + { + .description = "tcp: force a realloc to occur (established)", + /* Force all established sockets into one bucket */ + .ehash_buckets = 1, + /* Bucket size will need to double when going from listening to + * established sockets. + */ + .connections = init_batch_size, + .init_socks = nr_soreuse, + /* Room for connect()ed and accept()ed sockets */ + .max_socks = nr_soreuse + (init_batch_size * 2), + .sock_type = SOCK_STREAM, + .family = AF_INET6, + .test = force_realloc_established, + }, }; static void do_resume_test(struct test_case *tc) { struct sock_iter_batch *skel = NULL; + struct sock_count *counts = NULL; static const __u16 port = 10001; + struct nstoken *nstoken = NULL; struct bpf_link *link = NULL; - struct sock_count *counts; + int *established_fds = NULL; int err, iter_fd = -1; const char *addr; int *fds = NULL; - int local_port; + + if (tc->ehash_buckets) { + SYS_NOFAIL("ip netns del " TEST_CHILD_NS); + SYS(done, "sysctl -wq net.ipv4.tcp_child_ehash_entries=%d", + tc->ehash_buckets); + SYS(done, "ip netns add %s", TEST_CHILD_NS); + SYS(done, "ip -net %s link set dev lo up", TEST_CHILD_NS); + nstoken = open_netns(TEST_CHILD_NS); + if (!ASSERT_OK_PTR(nstoken, "open_child_netns")) + goto done; + } counts = calloc(tc->max_socks, sizeof(*counts)); if (!ASSERT_OK_PTR(counts, "counts")) @@ -384,11 +808,18 @@ static void do_resume_test(struct test_case *tc) tc->init_socks); if (!ASSERT_OK_PTR(fds, "start_reuseport_server")) goto done; - local_port = get_socket_local_port(*fds); - if (!ASSERT_GE(local_port, 0, "get_socket_local_port")) - goto done; - skel->rodata->ports[0] = ntohs(local_port); + if (tc->connections) { + established_fds = connect_to_server(tc->family, tc->sock_type, + addr, port, + tc->connections, fds, + tc->init_socks); + if (!ASSERT_OK_PTR(established_fds, "connect_to_server")) + goto done; + } + skel->rodata->ports[0] = 0; + skel->rodata->ports[1] = 0; skel->rodata->sf = tc->family; + skel->rodata->ss = 0; err = sock_iter_batch__load(skel); if (!ASSERT_OK(err, "sock_iter_batch__load")) @@ -406,10 +837,15 @@ static void do_resume_test(struct test_case *tc) goto done; tc->test(tc->family, tc->sock_type, addr, port, fds, tc->init_socks, - counts, tc->max_socks, link, iter_fd); + established_fds, tc->connections*2, counts, tc->max_socks, + link, iter_fd); done: + close_netns(nstoken); + SYS_NOFAIL("ip netns del " TEST_CHILD_NS); + SYS_NOFAIL("sysctl -w net.ipv4.tcp_child_ehash_entries=0"); free(counts); free_fds(fds, tc->init_socks); + free_fds(established_fds, tc->connections*2); if (iter_fd >= 0) close(iter_fd); bpf_link__destroy(link); @@ -454,6 +890,8 @@ static void do_test(int sock_type, bool onebyone) skel->rodata->ports[i] = ntohs(local_port); } skel->rodata->sf = AF_INET6; + if (sock_type == SOCK_STREAM) + skel->rodata->ss = TCP_LISTEN; err = sock_iter_batch__load(skel); if (!ASSERT_OK(err, "sock_iter_batch__load")) diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_ktls.c b/tools/testing/selftests/bpf/prog_tests/sockmap_ktls.c index b6c471da5c28..b87e7f39e15a 100644 --- a/tools/testing/selftests/bpf/prog_tests/sockmap_ktls.c +++ b/tools/testing/selftests/bpf/prog_tests/sockmap_ktls.c @@ -314,6 +314,95 @@ out: test_sockmap_ktls__destroy(skel); } +static void test_sockmap_ktls_tx_pop(int family, int sotype) +{ + char msg[37] = "0123456789abcdefghijklmnopqrstuvwxyz\0"; + int c = 0, p = 0, one = 1, sent, recvd; + struct test_sockmap_ktls *skel; + int prog_fd, map_fd; + char rcv[50] = {0}; + int err; + int i, m, r; + + skel = test_sockmap_ktls__open_and_load(); + if (!ASSERT_TRUE(skel, "open ktls skel")) + return; + + err = create_pair(family, sotype, &c, &p); + if (!ASSERT_OK(err, "create_pair()")) + goto out; + + prog_fd = bpf_program__fd(skel->progs.prog_sk_policy); + map_fd = bpf_map__fd(skel->maps.sock_map); + + err = bpf_prog_attach(prog_fd, map_fd, BPF_SK_MSG_VERDICT, 0); + if (!ASSERT_OK(err, "bpf_prog_attach sk msg")) + goto out; + + err = bpf_map_update_elem(map_fd, &one, &c, BPF_NOEXIST); + if (!ASSERT_OK(err, "bpf_map_update_elem(c)")) + goto out; + + err = init_ktls_pairs(c, p); + if (!ASSERT_OK(err, "init_ktls_pairs(c, p)")) + goto out; + + struct { + int pop_start; + int pop_len; + } pop_policy[] = { + /* trim the start */ + {0, 2}, + {0, 10}, + {1, 2}, + {1, 10}, + /* trim the end */ + {35, 2}, + /* New entries should be added before this line */ + {-1, -1}, + }; + + i = 0; + while (pop_policy[i].pop_start >= 0) { + skel->bss->pop_start = pop_policy[i].pop_start; + skel->bss->pop_end = pop_policy[i].pop_len; + + sent = send(c, msg, sizeof(msg), 0); + if (!ASSERT_EQ(sent, sizeof(msg), "send(msg)")) + goto out; + + recvd = recv_timeout(p, rcv, sizeof(rcv), MSG_DONTWAIT, 1); + if (!ASSERT_EQ(recvd, sizeof(msg) - pop_policy[i].pop_len, "pop len mismatch")) + goto out; + + /* verify the data + * msg: 0123456789a bcdefghij klmnopqrstuvwxyz + * | | + * popped data + */ + for (m = 0, r = 0; m < sizeof(msg);) { + /* skip checking the data that has been popped */ + if (m >= pop_policy[i].pop_start && + m <= pop_policy[i].pop_start + pop_policy[i].pop_len - 1) { + m++; + continue; + } + + if (!ASSERT_EQ(msg[m], rcv[r], "data mismatch")) + goto out; + m++; + r++; + } + i++; + } +out: + if (c) + close(c); + if (p) + close(p); + test_sockmap_ktls__destroy(skel); +} + static void run_tests(int family, enum bpf_map_type map_type) { int map; @@ -338,6 +427,8 @@ static void run_ktls_test(int family, int sotype) test_sockmap_ktls_tx_cork(family, sotype, true); if (test__start_subtest("tls tx egress with no buf")) test_sockmap_ktls_tx_no_buf(family, sotype, true); + if (test__start_subtest("tls tx with pop")) + test_sockmap_ktls_tx_pop(family, sotype); } void test_sockmap_ktls(void) diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c b/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c index 1d98eee7a2c3..f1bdccc7e4e7 100644 --- a/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c +++ b/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c @@ -924,6 +924,8 @@ static void redir_partial(int family, int sotype, int sock_map, int parser_map) goto close; n = xsend(c1, buf, sizeof(buf), 0); + if (n == -1) + goto close; if (n < sizeof(buf)) FAIL("incomplete write"); diff --git a/tools/testing/selftests/bpf/prog_tests/stream.c b/tools/testing/selftests/bpf/prog_tests/stream.c new file mode 100644 index 000000000000..d9f0185dca61 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/stream.c @@ -0,0 +1,141 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */ +#include <test_progs.h> +#include <sys/mman.h> +#include <regex.h> + +#include "stream.skel.h" +#include "stream_fail.skel.h" + +void test_stream_failure(void) +{ + RUN_TESTS(stream_fail); +} + +void test_stream_success(void) +{ + RUN_TESTS(stream); + return; +} + +struct { + int prog_off; + const char *errstr; +} stream_error_arr[] = { + { + offsetof(struct stream, progs.stream_cond_break), + "ERROR: Timeout detected for may_goto instruction\n" + "CPU: [0-9]+ UID: 0 PID: [0-9]+ Comm: .*\n" + "Call trace:\n" + "([a-zA-Z_][a-zA-Z0-9_]*\\+0x[0-9a-fA-F]+/0x[0-9a-fA-F]+\n" + "|[ \t]+[^\n]+\n)*", + }, + { + offsetof(struct stream, progs.stream_deadlock), + "ERROR: AA or ABBA deadlock detected for bpf_res_spin_lock\n" + "Attempted lock = (0x[0-9a-fA-F]+)\n" + "Total held locks = 1\n" + "Held lock\\[ 0\\] = \\1\n" // Lock address must match + "CPU: [0-9]+ UID: 0 PID: [0-9]+ Comm: .*\n" + "Call trace:\n" + "([a-zA-Z_][a-zA-Z0-9_]*\\+0x[0-9a-fA-F]+/0x[0-9a-fA-F]+\n" + "|[ \t]+[^\n]+\n)*", + }, +}; + +static int match_regex(const char *pattern, const char *string) +{ + int err, rc; + regex_t re; + + err = regcomp(&re, pattern, REG_EXTENDED | REG_NEWLINE); + if (err) + return -1; + rc = regexec(&re, string, 0, NULL, 0); + regfree(&re); + return rc == 0 ? 1 : 0; +} + +void test_stream_errors(void) +{ + LIBBPF_OPTS(bpf_test_run_opts, opts); + LIBBPF_OPTS(bpf_prog_stream_read_opts, ropts); + struct stream *skel; + int ret, prog_fd; + char buf[1024]; + + skel = stream__open_and_load(); + if (!ASSERT_OK_PTR(skel, "stream__open_and_load")) + return; + + for (int i = 0; i < ARRAY_SIZE(stream_error_arr); i++) { + struct bpf_program **prog; + + prog = (struct bpf_program **)(((char *)skel) + stream_error_arr[i].prog_off); + prog_fd = bpf_program__fd(*prog); + ret = bpf_prog_test_run_opts(prog_fd, &opts); + ASSERT_OK(ret, "ret"); + ASSERT_OK(opts.retval, "retval"); + +#if !defined(__x86_64__) + ASSERT_TRUE(1, "Timed may_goto unsupported, skip."); + if (i == 0) { + ret = bpf_prog_stream_read(prog_fd, 2, buf, sizeof(buf), &ropts); + ASSERT_EQ(ret, 0, "stream read"); + continue; + } +#endif + + ret = bpf_prog_stream_read(prog_fd, BPF_STREAM_STDERR, buf, sizeof(buf), &ropts); + ASSERT_GT(ret, 0, "stream read"); + ASSERT_LE(ret, 1023, "len for buf"); + buf[ret] = '\0'; + + ret = match_regex(stream_error_arr[i].errstr, buf); + if (!ASSERT_TRUE(ret == 1, "regex match")) + fprintf(stderr, "Output from stream:\n%s\n", buf); + } + + stream__destroy(skel); +} + +void test_stream_syscall(void) +{ + LIBBPF_OPTS(bpf_test_run_opts, opts); + LIBBPF_OPTS(bpf_prog_stream_read_opts, ropts); + struct stream *skel; + int ret, prog_fd; + char buf[64]; + + skel = stream__open_and_load(); + if (!ASSERT_OK_PTR(skel, "stream__open_and_load")) + return; + + prog_fd = bpf_program__fd(skel->progs.stream_syscall); + ret = bpf_prog_test_run_opts(prog_fd, &opts); + ASSERT_OK(ret, "ret"); + ASSERT_OK(opts.retval, "retval"); + + ASSERT_LT(bpf_prog_stream_read(0, BPF_STREAM_STDOUT, buf, sizeof(buf), &ropts), 0, "error"); + ret = -errno; + ASSERT_EQ(ret, -EINVAL, "bad prog_fd"); + + ASSERT_LT(bpf_prog_stream_read(prog_fd, 0, buf, sizeof(buf), &ropts), 0, "error"); + ret = -errno; + ASSERT_EQ(ret, -ENOENT, "bad stream id"); + + ASSERT_LT(bpf_prog_stream_read(prog_fd, BPF_STREAM_STDOUT, NULL, sizeof(buf), NULL), 0, "error"); + ret = -errno; + ASSERT_EQ(ret, -EFAULT, "bad stream buf"); + + ret = bpf_prog_stream_read(prog_fd, BPF_STREAM_STDOUT, buf, 2, NULL); + ASSERT_EQ(ret, 2, "bytes"); + ret = bpf_prog_stream_read(prog_fd, BPF_STREAM_STDOUT, buf, 2, NULL); + ASSERT_EQ(ret, 1, "bytes"); + ret = bpf_prog_stream_read(prog_fd, BPF_STREAM_STDOUT, buf, 1, &ropts); + ASSERT_EQ(ret, 0, "no bytes stdout"); + ret = bpf_prog_stream_read(prog_fd, BPF_STREAM_STDERR, buf, 1, &ropts); + ASSERT_EQ(ret, 0, "no bytes stderr"); + + stream__destroy(skel); +} diff --git a/tools/testing/selftests/bpf/prog_tests/string_kfuncs.c b/tools/testing/selftests/bpf/prog_tests/string_kfuncs.c new file mode 100644 index 000000000000..35af8044d059 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/string_kfuncs.c @@ -0,0 +1,65 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (C) 2025 Red Hat, Inc.*/ +#include <test_progs.h> +#include "string_kfuncs_success.skel.h" +#include "string_kfuncs_failure1.skel.h" +#include "string_kfuncs_failure2.skel.h" +#include <sys/mman.h> + +static const char * const test_cases[] = { + "strcmp", + "strchr", + "strchrnul", + "strnchr", + "strrchr", + "strlen", + "strnlen", + "strspn_str", + "strspn_accept", + "strcspn_str", + "strcspn_reject", + "strstr", + "strnstr", +}; + +void run_too_long_tests(void) +{ + struct string_kfuncs_failure2 *skel; + struct bpf_program *prog; + char test_name[256]; + int err, i; + + skel = string_kfuncs_failure2__open_and_load(); + if (!ASSERT_OK_PTR(skel, "string_kfuncs_failure2__open_and_load")) + return; + + memset(skel->bss->long_str, 'a', sizeof(skel->bss->long_str)); + + for (i = 0; i < ARRAY_SIZE(test_cases); i++) { + sprintf(test_name, "test_%s_too_long", test_cases[i]); + if (!test__start_subtest(test_name)) + continue; + + prog = bpf_object__find_program_by_name(skel->obj, test_name); + if (!ASSERT_OK_PTR(prog, "bpf_object__find_program_by_name")) + goto cleanup; + + LIBBPF_OPTS(bpf_test_run_opts, topts); + err = bpf_prog_test_run_opts(bpf_program__fd(prog), &topts); + if (!ASSERT_OK(err, "bpf_prog_test_run")) + goto cleanup; + + ASSERT_EQ(topts.retval, -E2BIG, "reading too long string fails with -E2BIG"); + } + +cleanup: + string_kfuncs_failure2__destroy(skel); +} + +void test_string_kfuncs(void) +{ + RUN_TESTS(string_kfuncs_success); + RUN_TESTS(string_kfuncs_failure1); + + run_too_long_tests(); +} diff --git a/tools/testing/selftests/bpf/prog_tests/tailcalls.c b/tools/testing/selftests/bpf/prog_tests/tailcalls.c index 66a900327f91..0ab36503c3b2 100644 --- a/tools/testing/selftests/bpf/prog_tests/tailcalls.c +++ b/tools/testing/selftests/bpf/prog_tests/tailcalls.c @@ -1195,7 +1195,7 @@ static void test_tailcall_hierarchy_count(const char *which, bool test_fentry, bool test_fexit, bool test_fentry_entry) { - int err, map_fd, prog_fd, main_data_fd, fentry_data_fd, fexit_data_fd, i, val; + int err, map_fd, prog_fd, main_data_fd, fentry_data_fd = 0, fexit_data_fd = 0, i, val; struct bpf_object *obj = NULL, *fentry_obj = NULL, *fexit_obj = NULL; struct bpf_link *fentry_link = NULL, *fexit_link = NULL; struct bpf_program *prog, *fentry_prog; diff --git a/tools/testing/selftests/bpf/prog_tests/tc_helpers.h b/tools/testing/selftests/bpf/prog_tests/tc_helpers.h index 924d0e25320c..d52a62af77bf 100644 --- a/tools/testing/selftests/bpf/prog_tests/tc_helpers.h +++ b/tools/testing/selftests/bpf/prog_tests/tc_helpers.h @@ -8,34 +8,6 @@ # define loopback 1 #endif -static inline __u32 id_from_prog_fd(int fd) -{ - struct bpf_prog_info prog_info = {}; - __u32 prog_info_len = sizeof(prog_info); - int err; - - err = bpf_obj_get_info_by_fd(fd, &prog_info, &prog_info_len); - if (!ASSERT_OK(err, "id_from_prog_fd")) - return 0; - - ASSERT_NEQ(prog_info.id, 0, "prog_info.id"); - return prog_info.id; -} - -static inline __u32 id_from_link_fd(int fd) -{ - struct bpf_link_info link_info = {}; - __u32 link_info_len = sizeof(link_info); - int err; - - err = bpf_link_get_info_by_fd(fd, &link_info, &link_info_len); - if (!ASSERT_OK(err, "id_from_link_fd")) - return 0; - - ASSERT_NEQ(link_info.id, 0, "link_info.id"); - return link_info.id; -} - static inline __u32 ifindex_from_link_fd(int fd) { struct bpf_link_info link_info = {}; diff --git a/tools/testing/selftests/bpf/test_sysctl.c b/tools/testing/selftests/bpf/prog_tests/test_sysctl.c index bcdbd27f22f0..273dd41ca09e 100644 --- a/tools/testing/selftests/bpf/test_sysctl.c +++ b/tools/testing/selftests/bpf/prog_tests/test_sysctl.c @@ -1,22 +1,8 @@ // SPDX-License-Identifier: GPL-2.0 // Copyright (c) 2019 Facebook -#include <fcntl.h> -#include <stdint.h> -#include <stdio.h> -#include <stdlib.h> -#include <string.h> -#include <unistd.h> - -#include <linux/filter.h> - -#include <bpf/bpf.h> -#include <bpf/libbpf.h> - -#include <bpf/bpf_endian.h> -#include "bpf_util.h" +#include "test_progs.h" #include "cgroup_helpers.h" -#include "testing_helpers.h" #define CG_PATH "/foo" #define MAX_INSNS 512 @@ -1608,26 +1594,19 @@ static int run_tests(int cgfd) return fails ? -1 : 0; } -int main(int argc, char **argv) +void test_sysctl(void) { - int cgfd = -1; - int err = 0; + int cgfd; cgfd = cgroup_setup_and_join(CG_PATH); - if (cgfd < 0) - goto err; + if (!ASSERT_OK_FD(cgfd < 0, "create_cgroup")) + goto out; - /* Use libbpf 1.0 API mode */ - libbpf_set_strict_mode(LIBBPF_STRICT_ALL); + if (!ASSERT_OK(run_tests(cgfd), "run_tests")) + goto out; - if (run_tests(cgfd)) - goto err; - - goto out; -err: - err = -1; out: close(cgfd); cleanup_cgroup_environment(); - return err; + return; } diff --git a/tools/testing/selftests/bpf/prog_tests/test_veristat.c b/tools/testing/selftests/bpf/prog_tests/test_veristat.c index 47b56c258f3f..367f47e4a936 100644 --- a/tools/testing/selftests/bpf/prog_tests/test_veristat.c +++ b/tools/testing/selftests/bpf/prog_tests/test_veristat.c @@ -60,13 +60,19 @@ static void test_set_global_vars_succeeds(void) " -G \"var_s8 = -128\" "\ " -G \"var_u8 = 255\" "\ " -G \"var_ea = EA2\" "\ - " -G \"var_eb = EB2\" "\ - " -G \"var_ec = EC2\" "\ + " -G \"var_eb = EB2\" "\ + " -G \"var_ec=EC2\" "\ " -G \"var_b = 1\" "\ - " -G \"struct1.struct2.u.var_u8 = 170\" "\ + " -G \"struct1[2].struct2[1][2].u.var_u8[2]=170\" "\ " -G \"union1.struct3.var_u8_l = 0xaa\" "\ " -G \"union1.struct3.var_u8_h = 0xaa\" "\ - "-vl2 > %s", fix->veristat, fix->tmpfile); + " -G \"arr[3]= 171\" " \ + " -G \"arr[EA2] =172\" " \ + " -G \"enum_arr[EC2]=EA3\" " \ + " -G \"three_d[31][7][EA2]=173\"" \ + " -G \"struct1[2].struct2[1][2].u.mat[5][3]=174\" " \ + " -G \"struct11 [ 7 ] [ 5 ] .struct2[0][1].u.mat[3][0] = 175\" " \ + " -vl2 > %s", fix->veristat, fix->tmpfile); read(fix->fd, fix->output, fix->sz); __CHECK_STR("_w=0xf000000000000001 ", "var_s64 = 0xf000000000000001"); @@ -81,8 +87,14 @@ static void test_set_global_vars_succeeds(void) __CHECK_STR("_w=12 ", "var_eb = EB2"); __CHECK_STR("_w=13 ", "var_ec = EC2"); __CHECK_STR("_w=1 ", "var_b = 1"); - __CHECK_STR("_w=170 ", "struct1.struct2.u.var_u8 = 170"); + __CHECK_STR("_w=170 ", "struct1[2].struct2[1][2].u.var_u8[2]=170"); __CHECK_STR("_w=0xaaaa ", "union1.var_u16 = 0xaaaa"); + __CHECK_STR("_w=171 ", "arr[3]= 171"); + __CHECK_STR("_w=172 ", "arr[EA2] =172"); + __CHECK_STR("_w=10 ", "enum_arr[EC2]=EA3"); + __CHECK_STR("_w=173 ", "matrix[31][7][11]=173"); + __CHECK_STR("_w=174 ", "struct1[2].struct2[1][2].u.mat[5][3]=174"); + __CHECK_STR("_w=175 ", "struct11[7][5].struct2[0][1].u.mat[3][0]=175"); out: teardown_fixture(fix); @@ -129,6 +141,95 @@ out: teardown_fixture(fix); } +static void test_unsupported_ptr_array_type(void) +{ + struct fixture *fix = init_fixture(); + + SYS_FAIL(out, + "%s set_global_vars.bpf.o -G \"ptr_arr[0] = 0\" -vl2 2> %s", + fix->veristat, fix->tmpfile); + + read(fix->fd, fix->output, fix->sz); + __CHECK_STR("Can't set ptr_arr[0]. Only ints and enums are supported", "ptr_arr"); + +out: + teardown_fixture(fix); +} + +static void test_array_out_of_bounds(void) +{ + struct fixture *fix = init_fixture(); + + SYS_FAIL(out, + "%s set_global_vars.bpf.o -G \"arr[99] = 0\" -vl2 2> %s", + fix->veristat, fix->tmpfile); + + read(fix->fd, fix->output, fix->sz); + __CHECK_STR("Array index 99 is out of bounds", "arr[99]"); + +out: + teardown_fixture(fix); +} + +static void test_array_index_not_found(void) +{ + struct fixture *fix = init_fixture(); + + SYS_FAIL(out, + "%s set_global_vars.bpf.o -G \"arr[EG2] = 0\" -vl2 2> %s", + fix->veristat, fix->tmpfile); + + read(fix->fd, fix->output, fix->sz); + __CHECK_STR("Can't resolve enum value EG2", "arr[EG2]"); + +out: + teardown_fixture(fix); +} + +static void test_array_index_for_non_array(void) +{ + struct fixture *fix = init_fixture(); + + SYS_FAIL(out, + "%s set_global_vars.bpf.o -G \"var_b[0] = 1\" -vl2 2> %s", + fix->veristat, fix->tmpfile); + + pread(fix->fd, fix->output, fix->sz, 0); + __CHECK_STR("Array index is not expected for var_b", "var_b[0] = 1"); + + SYS_FAIL(out, + "%s set_global_vars.bpf.o -G \"union1.struct3[0].var_u8_l=1\" -vl2 2> %s", + fix->veristat, fix->tmpfile); + + pread(fix->fd, fix->output, fix->sz, 0); + __CHECK_STR("Array index is not expected for struct3", "union1.struct3[0].var_u8_l=1"); + +out: + teardown_fixture(fix); +} + +static void test_no_array_index_for_array(void) +{ + struct fixture *fix = init_fixture(); + + SYS_FAIL(out, + "%s set_global_vars.bpf.o -G \"arr = 1\" -vl2 2> %s", + fix->veristat, fix->tmpfile); + + pread(fix->fd, fix->output, fix->sz, 0); + __CHECK_STR("Can't set arr. Only ints and enums are supported", "arr = 1"); + + SYS_FAIL(out, + "%s set_global_vars.bpf.o -G \"struct1[0].struct2.u.var_u8[2]=1\" -vl2 2> %s", + fix->veristat, fix->tmpfile); + + pread(fix->fd, fix->output, fix->sz, 0); + __CHECK_STR("Can't resolve field u for non-composite type", "struct1[0].struct2.u.var_u8[2]=1"); + +out: + teardown_fixture(fix); +} + void test_veristat(void) { if (test__start_subtest("set_global_vars_succeeds")) @@ -139,6 +240,22 @@ void test_veristat(void) if (test__start_subtest("set_global_vars_from_file_succeeds")) test_set_global_vars_from_file_succeeds(); + + if (test__start_subtest("test_unsupported_ptr_array_type")) + test_unsupported_ptr_array_type(); + + if (test__start_subtest("test_array_out_of_bounds")) + test_array_out_of_bounds(); + + if (test__start_subtest("test_array_index_not_found")) + test_array_index_not_found(); + + if (test__start_subtest("test_array_index_for_non_array")) + test_array_index_for_non_array(); + + if (test__start_subtest("test_no_array_index_for_array")) + test_no_array_index_for_array(); + } #undef __CHECK_STR diff --git a/tools/testing/selftests/bpf/prog_tests/token.c b/tools/testing/selftests/bpf/prog_tests/token.c index f9392df23f8a..b81dde283052 100644 --- a/tools/testing/selftests/bpf/prog_tests/token.c +++ b/tools/testing/selftests/bpf/prog_tests/token.c @@ -115,7 +115,7 @@ static int create_bpffs_fd(void) static int materialize_bpffs_fd(int fs_fd, struct bpffs_opts *opts) { - int mnt_fd, err; + int err; /* set up token delegation mount options */ err = set_delegate_mask(fs_fd, "delegate_cmds", opts->cmds, opts->cmds_str); @@ -136,12 +136,7 @@ static int materialize_bpffs_fd(int fs_fd, struct bpffs_opts *opts) if (err < 0) return -errno; - /* create O_PATH fd for detached mount */ - mnt_fd = sys_fsmount(fs_fd, 0, 0); - if (err < 0) - return -errno; - - return mnt_fd; + return 0; } /* send FD over Unix domain (AF_UNIX) socket */ @@ -287,6 +282,7 @@ static void child(int sock_fd, struct bpffs_opts *opts, child_callback_fn callba { int mnt_fd = -1, fs_fd = -1, err = 0, bpffs_fd = -1, token_fd = -1; struct token_lsm *lsm_skel = NULL; + char one; /* load and attach LSM "policy" before we go into unpriv userns */ lsm_skel = token_lsm__open_and_load(); @@ -333,13 +329,19 @@ static void child(int sock_fd, struct bpffs_opts *opts, child_callback_fn callba err = sendfd(sock_fd, fs_fd); if (!ASSERT_OK(err, "send_fs_fd")) goto cleanup; - zclose(fs_fd); + + /* wait that the parent reads the fd, does the fsconfig() calls + * and send us a signal that it is done + */ + err = read(sock_fd, &one, sizeof(one)); + if (!ASSERT_GE(err, 0, "read_one")) + goto cleanup; /* avoid mucking around with mount namespaces and mounting at - * well-known path, just get detach-mounted BPF FS fd back from parent + * well-known path, just create O_PATH fd for detached mount */ - err = recvfd(sock_fd, &mnt_fd); - if (!ASSERT_OK(err, "recv_mnt_fd")) + mnt_fd = sys_fsmount(fs_fd, 0, 0); + if (!ASSERT_OK_FD(mnt_fd, "mnt_fd")) goto cleanup; /* try to fspick() BPF FS and try to add some delegation options */ @@ -429,24 +431,24 @@ again: static void parent(int child_pid, struct bpffs_opts *bpffs_opts, int sock_fd) { - int fs_fd = -1, mnt_fd = -1, token_fd = -1, err; + int fs_fd = -1, token_fd = -1, err; + char one = 1; err = recvfd(sock_fd, &fs_fd); if (!ASSERT_OK(err, "recv_bpffs_fd")) goto cleanup; - mnt_fd = materialize_bpffs_fd(fs_fd, bpffs_opts); - if (!ASSERT_GE(mnt_fd, 0, "materialize_bpffs_fd")) { + err = materialize_bpffs_fd(fs_fd, bpffs_opts); + if (!ASSERT_GE(err, 0, "materialize_bpffs_fd")) { err = -EINVAL; goto cleanup; } - zclose(fs_fd); - /* pass BPF FS context object to parent */ - err = sendfd(sock_fd, mnt_fd); - if (!ASSERT_OK(err, "send_mnt_fd")) + /* notify the child that we did the fsconfig() calls and it can proceed. */ + err = write(sock_fd, &one, sizeof(one)); + if (!ASSERT_EQ(err, sizeof(one), "send_one")) goto cleanup; - zclose(mnt_fd); + zclose(fs_fd); /* receive BPF token FD back from child for some extra tests */ err = recvfd(sock_fd, &token_fd); @@ -459,7 +461,6 @@ static void parent(int child_pid, struct bpffs_opts *bpffs_opts, int sock_fd) cleanup: zclose(sock_fd); zclose(fs_fd); - zclose(mnt_fd); zclose(token_fd); if (child_pid > 0) @@ -1046,6 +1047,41 @@ err_out: #define bit(n) (1ULL << (n)) +static int userns_bpf_token_info(int mnt_fd, struct token_lsm *lsm_skel) +{ + int err, token_fd = -1; + struct bpf_token_info info; + u32 len = sizeof(struct bpf_token_info); + + /* create BPF token from BPF FS mount */ + token_fd = bpf_token_create(mnt_fd, NULL); + if (!ASSERT_GT(token_fd, 0, "token_create")) { + err = -EINVAL; + goto cleanup; + } + + memset(&info, 0, len); + err = bpf_obj_get_info_by_fd(token_fd, &info, &len); + if (!ASSERT_ERR(err, "bpf_obj_get_token_info")) + goto cleanup; + if (!ASSERT_EQ(info.allowed_cmds, bit(BPF_MAP_CREATE), "token_info_cmds_map_create")) { + err = -EINVAL; + goto cleanup; + } + if (!ASSERT_EQ(info.allowed_progs, bit(BPF_PROG_TYPE_XDP), "token_info_progs_xdp")) { + err = -EINVAL; + goto cleanup; + } + + /* The BPF_PROG_TYPE_EXT is not set in token */ + if (ASSERT_EQ(info.allowed_progs, bit(BPF_PROG_TYPE_EXT), "token_info_progs_ext")) + err = -EINVAL; + +cleanup: + zclose(token_fd); + return err; +} + void test_token(void) { if (test__start_subtest("map_token")) { @@ -1149,4 +1185,13 @@ void test_token(void) subtest_userns(&opts, userns_obj_priv_implicit_token_envvar); } + if (test__start_subtest("bpf_token_info")) { + struct bpffs_opts opts = { + .cmds = bit(BPF_MAP_CREATE), + .progs = bit(BPF_PROG_TYPE_XDP), + .attachs = ~0ULL, + }; + + subtest_userns(&opts, userns_bpf_token_info); + } } diff --git a/tools/testing/selftests/bpf/prog_tests/tracing_failure.c b/tools/testing/selftests/bpf/prog_tests/tracing_failure.c index a222df765bc3..10e231965589 100644 --- a/tools/testing/selftests/bpf/prog_tests/tracing_failure.c +++ b/tools/testing/selftests/bpf/prog_tests/tracing_failure.c @@ -28,10 +28,62 @@ out: tracing_failure__destroy(skel); } +static void test_tracing_fail_prog(const char *prog_name, const char *exp_msg) +{ + struct tracing_failure *skel; + struct bpf_program *prog; + char log_buf[256]; + int err; + + skel = tracing_failure__open(); + if (!ASSERT_OK_PTR(skel, "tracing_failure__open")) + return; + + prog = bpf_object__find_program_by_name(skel->obj, prog_name); + if (!ASSERT_OK_PTR(prog, "bpf_object__find_program_by_name")) + goto out; + + bpf_program__set_autoload(prog, true); + bpf_program__set_log_buf(prog, log_buf, sizeof(log_buf)); + + err = tracing_failure__load(skel); + if (!ASSERT_ERR(err, "tracing_failure__load")) + goto out; + + ASSERT_HAS_SUBSTR(log_buf, exp_msg, "log_buf"); +out: + tracing_failure__destroy(skel); +} + +static void test_tracing_deny(void) +{ + int btf_id; + + /* __rcu_read_lock depends on CONFIG_PREEMPT_RCU */ + btf_id = libbpf_find_vmlinux_btf_id("__rcu_read_lock", BPF_TRACE_FENTRY); + if (btf_id <= 0) { + test__skip(); + return; + } + + test_tracing_fail_prog("tracing_deny", + "Attaching tracing programs to function '__rcu_read_lock' is rejected."); +} + +static void test_fexit_noreturns(void) +{ + test_tracing_fail_prog("fexit_noreturns", + "Attaching fexit/fmod_ret to __noreturn function 'do_exit' is rejected."); +} + void test_tracing_failure(void) { if (test__start_subtest("bpf_spin_lock")) test_bpf_spin_lock(true); if (test__start_subtest("bpf_spin_unlock")) test_bpf_spin_lock(false); + if (test__start_subtest("tracing_deny")) + test_tracing_deny(); + if (test__start_subtest("fexit_noreturns")) + test_fexit_noreturns(); } diff --git a/tools/testing/selftests/bpf/prog_tests/uprobe_syscall.c b/tools/testing/selftests/bpf/prog_tests/uprobe_syscall.c index c397336fe1ed..b17dc39a23db 100644 --- a/tools/testing/selftests/bpf/prog_tests/uprobe_syscall.c +++ b/tools/testing/selftests/bpf/prog_tests/uprobe_syscall.c @@ -251,7 +251,7 @@ static void test_uretprobe_syscall_call(void) .retprobe = true, ); struct uprobe_syscall_executed *skel; - int pid, status, err, go[2], c; + int pid, status, err, go[2], c = 0; if (!ASSERT_OK(pipe(go), "pipe")) return; diff --git a/tools/testing/selftests/bpf/prog_tests/usdt.c b/tools/testing/selftests/bpf/prog_tests/usdt.c index 495d66414b57..9057e983cc54 100644 --- a/tools/testing/selftests/bpf/prog_tests/usdt.c +++ b/tools/testing/selftests/bpf/prog_tests/usdt.c @@ -270,8 +270,16 @@ static void subtest_multispec_usdt(void) */ trigger_300_usdts(); - /* we'll reuse usdt_100 BPF program for usdt_300 test */ bpf_link__destroy(skel->links.usdt_100); + + bss->usdt_100_called = 0; + bss->usdt_100_sum = 0; + + /* If built with arm64/clang, there will be much less number of specs + * for usdt_300 call sites. + */ +#if !defined(__aarch64__) || !defined(__clang__) + /* we'll reuse usdt_100 BPF program for usdt_300 test */ skel->links.usdt_100 = bpf_program__attach_usdt(skel->progs.usdt_100, -1, "/proc/self/exe", "test", "usdt_300", NULL); err = -errno; @@ -282,13 +290,11 @@ static void subtest_multispec_usdt(void) /* let's check that there are no "dangling" BPF programs attached due * to partial success of the above test:usdt_300 attachment */ - bss->usdt_100_called = 0; - bss->usdt_100_sum = 0; - f300(777); /* this is 301st instance of usdt_300 */ ASSERT_EQ(bss->usdt_100_called, 0, "usdt_301_called"); ASSERT_EQ(bss->usdt_100_sum, 0, "usdt_301_sum"); +#endif /* This time we have USDT with 400 inlined invocations, but arg specs * should be the same across all sites, so libbpf will only need to diff --git a/tools/testing/selftests/bpf/prog_tests/user_ringbuf.c b/tools/testing/selftests/bpf/prog_tests/user_ringbuf.c index d424e7ecbd12..9fd3ae987321 100644 --- a/tools/testing/selftests/bpf/prog_tests/user_ringbuf.c +++ b/tools/testing/selftests/bpf/prog_tests/user_ringbuf.c @@ -21,8 +21,7 @@ #include "../progs/test_user_ringbuf.h" static const long c_sample_size = sizeof(struct sample) + BPF_RINGBUF_HDR_SZ; -static const long c_ringbuf_size = 1 << 12; /* 1 small page */ -static const long c_max_entries = c_ringbuf_size / c_sample_size; +static long c_ringbuf_size, c_max_entries; static void drain_current_samples(void) { @@ -424,7 +423,9 @@ static void test_user_ringbuf_loop(void) uint32_t remaining_samples = total_samples; int err; - BUILD_BUG_ON(total_samples <= c_max_entries); + if (!ASSERT_LT(c_max_entries, total_samples, "compare_c_max_entries")) + return; + err = load_skel_create_user_ringbuf(&skel, &ringbuf); if (err) return; @@ -686,6 +687,9 @@ void test_user_ringbuf(void) { int i; + c_ringbuf_size = getpagesize(); /* 1 page */ + c_max_entries = c_ringbuf_size / c_sample_size; + for (i = 0; i < ARRAY_SIZE(success_tests); i++) { if (!test__start_subtest(success_tests[i].test_name)) continue; diff --git a/tools/testing/selftests/bpf/prog_tests/verifier.c b/tools/testing/selftests/bpf/prog_tests/verifier.c index c9da06741104..77ec95d4ffaa 100644 --- a/tools/testing/selftests/bpf/prog_tests/verifier.c +++ b/tools/testing/selftests/bpf/prog_tests/verifier.c @@ -85,6 +85,7 @@ #include "verifier_store_release.skel.h" #include "verifier_subprog_precision.skel.h" #include "verifier_subreg.skel.h" +#include "verifier_tailcall.skel.h" #include "verifier_tailcall_jit.skel.h" #include "verifier_typedef.skel.h" #include "verifier_uninit.skel.h" @@ -219,6 +220,7 @@ void test_verifier_stack_ptr(void) { RUN(verifier_stack_ptr); } void test_verifier_store_release(void) { RUN(verifier_store_release); } void test_verifier_subprog_precision(void) { RUN(verifier_subprog_precision); } void test_verifier_subreg(void) { RUN(verifier_subreg); } +void test_verifier_tailcall(void) { RUN(verifier_tailcall); } void test_verifier_tailcall_jit(void) { RUN(verifier_tailcall_jit); } void test_verifier_typedef(void) { RUN(verifier_typedef); } void test_verifier_uninit(void) { RUN(verifier_uninit); } diff --git a/tools/testing/selftests/bpf/prog_tests/verify_pkcs7_sig.c b/tools/testing/selftests/bpf/prog_tests/verify_pkcs7_sig.c index ab0f02faa80c..4d69d9d55e17 100644 --- a/tools/testing/selftests/bpf/prog_tests/verify_pkcs7_sig.c +++ b/tools/testing/selftests/bpf/prog_tests/verify_pkcs7_sig.c @@ -268,7 +268,7 @@ static void test_verify_pkcs7_sig_from_map(void) char *tmp_dir; struct test_verify_pkcs7_sig *skel = NULL; struct bpf_map *map; - struct data data; + struct data data = {}; int ret, zero = 0; /* Trigger creation of session keyring. */ diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_adjust_tail.c b/tools/testing/selftests/bpf/prog_tests/xdp_adjust_tail.c index b2b2d85dbb1b..43264347e7d7 100644 --- a/tools/testing/selftests/bpf/prog_tests/xdp_adjust_tail.c +++ b/tools/testing/selftests/bpf/prog_tests/xdp_adjust_tail.c @@ -37,21 +37,26 @@ static void test_xdp_adjust_tail_shrink(void) bpf_object__close(obj); } -static void test_xdp_adjust_tail_grow(void) +static void test_xdp_adjust_tail_grow(bool is_64k_pagesize) { const char *file = "./test_xdp_adjust_tail_grow.bpf.o"; struct bpf_object *obj; - char buf[4096]; /* avoid segfault: large buf to hold grow results */ + char buf[8192]; /* avoid segfault: large buf to hold grow results */ __u32 expect_sz; int err, prog_fd; LIBBPF_OPTS(bpf_test_run_opts, topts, .data_in = &pkt_v4, - .data_size_in = sizeof(pkt_v4), .data_out = buf, .data_size_out = sizeof(buf), .repeat = 1, ); + /* topts.data_size_in as a special signal to bpf prog */ + if (is_64k_pagesize) + topts.data_size_in = sizeof(pkt_v4) - 1; + else + topts.data_size_in = sizeof(pkt_v4); + err = bpf_prog_test_load(file, BPF_PROG_TYPE_XDP, &obj, &prog_fd); if (!ASSERT_OK(err, "test_xdp_adjust_tail_grow")) return; @@ -208,7 +213,7 @@ out: bpf_object__close(obj); } -static void test_xdp_adjust_frags_tail_grow(void) +static void test_xdp_adjust_frags_tail_grow_4k(void) { const char *file = "./test_xdp_adjust_tail_grow.bpf.o"; __u32 exp_size; @@ -246,14 +251,20 @@ static void test_xdp_adjust_frags_tail_grow(void) ASSERT_EQ(topts.retval, XDP_TX, "9Kb+10b retval"); ASSERT_EQ(topts.data_size_out, exp_size, "9Kb+10b size"); - for (i = 0; i < 9000; i++) - ASSERT_EQ(buf[i], 1, "9Kb+10b-old"); + for (i = 0; i < 9000; i++) { + if (buf[i] != 1) + ASSERT_EQ(buf[i], 1, "9Kb+10b-old"); + } - for (i = 9000; i < 9010; i++) - ASSERT_EQ(buf[i], 0, "9Kb+10b-new"); + for (i = 9000; i < 9010; i++) { + if (buf[i] != 0) + ASSERT_EQ(buf[i], 0, "9Kb+10b-new"); + } - for (i = 9010; i < 16384; i++) - ASSERT_EQ(buf[i], 1, "9Kb+10b-untouched"); + for (i = 9010; i < 16384; i++) { + if (buf[i] != 1) + ASSERT_EQ(buf[i], 1, "9Kb+10b-untouched"); + } /* Test a too large grow */ memset(buf, 1, 16384); @@ -273,16 +284,93 @@ out: bpf_object__close(obj); } +static void test_xdp_adjust_frags_tail_grow_64k(void) +{ + const char *file = "./test_xdp_adjust_tail_grow.bpf.o"; + __u32 exp_size; + struct bpf_program *prog; + struct bpf_object *obj; + int err, i, prog_fd; + __u8 *buf; + LIBBPF_OPTS(bpf_test_run_opts, topts); + + obj = bpf_object__open(file); + if (libbpf_get_error(obj)) + return; + + prog = bpf_object__next_program(obj, NULL); + if (bpf_object__load(obj)) + goto out; + + prog_fd = bpf_program__fd(prog); + + buf = malloc(262144); + if (!ASSERT_OK_PTR(buf, "alloc buf 256Kb")) + goto out; + + /* Test case add 10 bytes to last frag */ + memset(buf, 1, 262144); + exp_size = 90000 + 10; + + topts.data_in = buf; + topts.data_out = buf; + topts.data_size_in = 90000; + topts.data_size_out = 262144; + err = bpf_prog_test_run_opts(prog_fd, &topts); + + ASSERT_OK(err, "90Kb+10b"); + ASSERT_EQ(topts.retval, XDP_TX, "90Kb+10b retval"); + ASSERT_EQ(topts.data_size_out, exp_size, "90Kb+10b size"); + + for (i = 0; i < 90000; i++) { + if (buf[i] != 1) + ASSERT_EQ(buf[i], 1, "90Kb+10b-old"); + } + + for (i = 90000; i < 90010; i++) { + if (buf[i] != 0) + ASSERT_EQ(buf[i], 0, "90Kb+10b-new"); + } + + for (i = 90010; i < 262144; i++) { + if (buf[i] != 1) + ASSERT_EQ(buf[i], 1, "90Kb+10b-untouched"); + } + + /* Test a too large grow */ + memset(buf, 1, 262144); + exp_size = 90001; + + topts.data_in = topts.data_out = buf; + topts.data_size_in = 90001; + topts.data_size_out = 262144; + err = bpf_prog_test_run_opts(prog_fd, &topts); + + ASSERT_OK(err, "90Kb+10b"); + ASSERT_EQ(topts.retval, XDP_DROP, "90Kb+10b retval"); + ASSERT_EQ(topts.data_size_out, exp_size, "90Kb+10b size"); + + free(buf); +out: + bpf_object__close(obj); +} + void test_xdp_adjust_tail(void) { + int page_size = getpagesize(); + if (test__start_subtest("xdp_adjust_tail_shrink")) test_xdp_adjust_tail_shrink(); if (test__start_subtest("xdp_adjust_tail_grow")) - test_xdp_adjust_tail_grow(); + test_xdp_adjust_tail_grow(page_size == 65536); if (test__start_subtest("xdp_adjust_tail_grow2")) test_xdp_adjust_tail_grow2(); if (test__start_subtest("xdp_adjust_frags_tail_shrink")) test_xdp_adjust_frags_tail_shrink(); - if (test__start_subtest("xdp_adjust_frags_tail_grow")) - test_xdp_adjust_frags_tail_grow(); + if (test__start_subtest("xdp_adjust_frags_tail_grow")) { + if (page_size == 65536) + test_xdp_adjust_frags_tail_grow_64k(); + else + test_xdp_adjust_frags_tail_grow_4k(); + } } diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_do_redirect.c b/tools/testing/selftests/bpf/prog_tests/xdp_do_redirect.c index 7dac044664ac..dd34b0cc4b4e 100644 --- a/tools/testing/selftests/bpf/prog_tests/xdp_do_redirect.c +++ b/tools/testing/selftests/bpf/prog_tests/xdp_do_redirect.c @@ -66,16 +66,25 @@ static int attach_tc_prog(struct bpf_tc_hook *hook, int fd) #else #define MAX_PKT_SIZE 3408 #endif + +#define PAGE_SIZE_4K 4096 +#define PAGE_SIZE_64K 65536 + static void test_max_pkt_size(int fd) { - char data[MAX_PKT_SIZE + 1] = {}; + char data[PAGE_SIZE_64K + 1] = {}; int err; DECLARE_LIBBPF_OPTS(bpf_test_run_opts, opts, .data_in = &data, - .data_size_in = MAX_PKT_SIZE, .flags = BPF_F_TEST_XDP_LIVE_FRAMES, .repeat = 1, ); + + if (getpagesize() == PAGE_SIZE_64K) + opts.data_size_in = MAX_PKT_SIZE + PAGE_SIZE_64K - PAGE_SIZE_4K; + else + opts.data_size_in = MAX_PKT_SIZE; + err = bpf_prog_test_run_opts(fd, &opts); ASSERT_OK(err, "prog_run_max_size"); diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_map_elem.c b/tools/testing/selftests/bpf/progs/bpf_iter_map_elem.c new file mode 100644 index 000000000000..2f20485e0de3 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/bpf_iter_map_elem.c @@ -0,0 +1,22 @@ +// SPDX-License-Identifier: GPL-2.0-only + +#include "vmlinux.h" +#include <bpf/bpf_tracing.h> +#include <bpf/bpf_helpers.h> + +char _license[] SEC("license") = "GPL"; + +__u32 value_sum = 0; + +SEC("iter/bpf_map_elem") +int dump_bpf_map_values(struct bpf_iter__bpf_map_elem *ctx) +{ + __u32 value = 0; + + if (ctx->value == (void *)0) + return 0; + + bpf_probe_read_kernel(&value, sizeof(value), ctx->value); + value_sum += value; + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/bpf_misc.h b/tools/testing/selftests/bpf/progs/bpf_misc.h index 6e208e24ba3b..530752ddde8e 100644 --- a/tools/testing/selftests/bpf/progs/bpf_misc.h +++ b/tools/testing/selftests/bpf/progs/bpf_misc.h @@ -83,9 +83,11 @@ * expect return value to match passed parameter: * - a decimal number * - a hexadecimal number, when starts from 0x - * - literal INT_MIN - * - literal POINTER_VALUE (see definition below) - * - literal TEST_DATA_LEN (see definition below) + * - a macro which expands to one of the above + * - literal _INT_MIN (expands to INT_MIN) + * In addition, two special macros are defined below: + * - POINTER_VALUE + * - TEST_DATA_LEN * __retval_unpriv Same, but load program in unprivileged mode. * * __description Text to be used instead of a program name for display @@ -125,8 +127,8 @@ #define __success_unpriv __attribute__((btf_decl_tag("comment:test_expect_success_unpriv"))) #define __log_level(lvl) __attribute__((btf_decl_tag("comment:test_log_level="#lvl))) #define __flag(flag) __attribute__((btf_decl_tag("comment:test_prog_flags="#flag))) -#define __retval(val) __attribute__((btf_decl_tag("comment:test_retval="#val))) -#define __retval_unpriv(val) __attribute__((btf_decl_tag("comment:test_retval_unpriv="#val))) +#define __retval(val) __attribute__((btf_decl_tag("comment:test_retval="XSTR(val)))) +#define __retval_unpriv(val) __attribute__((btf_decl_tag("comment:test_retval_unpriv="XSTR(val)))) #define __auxiliary __attribute__((btf_decl_tag("comment:test_auxiliary"))) #define __auxiliary_unpriv __attribute__((btf_decl_tag("comment:test_auxiliary_unpriv"))) #define __btf_path(path) __attribute__((btf_decl_tag("comment:test_btf_path=" path))) @@ -155,7 +157,7 @@ #define __imm_insn(name, expr) [name]"i"(*(long *)&(expr)) /* Magic constants used with __retval() */ -#define POINTER_VALUE 0xcafe4all +#define POINTER_VALUE 0xbadcafe #define TEST_DATA_LEN 64 #ifndef __used @@ -231,4 +233,12 @@ #define CAN_USE_LOAD_ACQ_STORE_REL #endif +#if defined(__TARGET_ARCH_arm64) || defined(__TARGET_ARCH_x86) +#define SPEC_V1 +#endif + +#if defined(__TARGET_ARCH_x86) +#define SPEC_V4 +#endif + #endif diff --git a/tools/testing/selftests/bpf/progs/cgroup_mprog.c b/tools/testing/selftests/bpf/progs/cgroup_mprog.c new file mode 100644 index 000000000000..6a0ea02c4de2 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/cgroup_mprog.c @@ -0,0 +1,30 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */ +#include <vmlinux.h> +#include <bpf/bpf_helpers.h> + +char _license[] SEC("license") = "GPL"; + +SEC("cgroup/getsockopt") +int getsockopt_1(struct bpf_sockopt *ctx) +{ + return 1; +} + +SEC("cgroup/getsockopt") +int getsockopt_2(struct bpf_sockopt *ctx) +{ + return 1; +} + +SEC("cgroup/getsockopt") +int getsockopt_3(struct bpf_sockopt *ctx) +{ + return 1; +} + +SEC("cgroup/getsockopt") +int getsockopt_4(struct bpf_sockopt *ctx) +{ + return 1; +} diff --git a/tools/testing/selftests/bpf/progs/cgroup_read_xattr.c b/tools/testing/selftests/bpf/progs/cgroup_read_xattr.c new file mode 100644 index 000000000000..092db1d0435e --- /dev/null +++ b/tools/testing/selftests/bpf/progs/cgroup_read_xattr.c @@ -0,0 +1,158 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */ + +#include <vmlinux.h> +#include <bpf/bpf_tracing.h> +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_core_read.h> +#include "bpf_experimental.h" +#include "bpf_misc.h" + +char _license[] SEC("license") = "GPL"; + +char value[16]; + +static __always_inline void read_xattr(struct cgroup *cgroup) +{ + struct bpf_dynptr value_ptr; + + bpf_dynptr_from_mem(value, sizeof(value), 0, &value_ptr); + bpf_cgroup_read_xattr(cgroup, "user.bpf_test", + &value_ptr); +} + +SEC("lsm.s/socket_connect") +__success +int BPF_PROG(trusted_cgroup_ptr_sleepable) +{ + u64 cgrp_id = bpf_get_current_cgroup_id(); + struct cgroup *cgrp; + + cgrp = bpf_cgroup_from_id(cgrp_id); + if (!cgrp) + return 0; + + read_xattr(cgrp); + bpf_cgroup_release(cgrp); + return 0; +} + +SEC("lsm/socket_connect") +__success +int BPF_PROG(trusted_cgroup_ptr_non_sleepable) +{ + u64 cgrp_id = bpf_get_current_cgroup_id(); + struct cgroup *cgrp; + + cgrp = bpf_cgroup_from_id(cgrp_id); + if (!cgrp) + return 0; + + read_xattr(cgrp); + bpf_cgroup_release(cgrp); + return 0; +} + +SEC("lsm/socket_connect") +__success +int BPF_PROG(use_css_iter_non_sleepable) +{ + u64 cgrp_id = bpf_get_current_cgroup_id(); + struct cgroup_subsys_state *css; + struct cgroup *cgrp; + + cgrp = bpf_cgroup_from_id(cgrp_id); + if (!cgrp) + return 0; + + bpf_for_each(css, css, &cgrp->self, BPF_CGROUP_ITER_ANCESTORS_UP) + read_xattr(css->cgroup); + + bpf_cgroup_release(cgrp); + return 0; +} + +SEC("lsm.s/socket_connect") +__failure __msg("expected an RCU CS") +int BPF_PROG(use_css_iter_sleepable_missing_rcu_lock) +{ + u64 cgrp_id = bpf_get_current_cgroup_id(); + struct cgroup_subsys_state *css; + struct cgroup *cgrp; + + cgrp = bpf_cgroup_from_id(cgrp_id); + if (!cgrp) + return 0; + + bpf_for_each(css, css, &cgrp->self, BPF_CGROUP_ITER_ANCESTORS_UP) + read_xattr(css->cgroup); + + bpf_cgroup_release(cgrp); + return 0; +} + +SEC("lsm.s/socket_connect") +__success +int BPF_PROG(use_css_iter_sleepable_with_rcu_lock) +{ + u64 cgrp_id = bpf_get_current_cgroup_id(); + struct cgroup_subsys_state *css; + struct cgroup *cgrp; + + bpf_rcu_read_lock(); + cgrp = bpf_cgroup_from_id(cgrp_id); + if (!cgrp) + goto out; + + bpf_for_each(css, css, &cgrp->self, BPF_CGROUP_ITER_ANCESTORS_UP) + read_xattr(css->cgroup); + + bpf_cgroup_release(cgrp); +out: + bpf_rcu_read_unlock(); + return 0; +} + +SEC("lsm/socket_connect") +__success +int BPF_PROG(use_bpf_cgroup_ancestor) +{ + u64 cgrp_id = bpf_get_current_cgroup_id(); + struct cgroup *cgrp, *ancestor; + + cgrp = bpf_cgroup_from_id(cgrp_id); + if (!cgrp) + return 0; + + ancestor = bpf_cgroup_ancestor(cgrp, 1); + if (!ancestor) + goto out; + + read_xattr(cgrp); + bpf_cgroup_release(ancestor); +out: + bpf_cgroup_release(cgrp); + return 0; +} + +SEC("cgroup/sendmsg4") +__success +int BPF_PROG(cgroup_skb) +{ + u64 cgrp_id = bpf_get_current_cgroup_id(); + struct cgroup *cgrp, *ancestor; + + cgrp = bpf_cgroup_from_id(cgrp_id); + if (!cgrp) + return 0; + + ancestor = bpf_cgroup_ancestor(cgrp, 1); + if (!ancestor) + goto out; + + read_xattr(cgrp); + bpf_cgroup_release(ancestor); +out: + bpf_cgroup_release(cgrp); + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/compute_live_registers.c b/tools/testing/selftests/bpf/progs/compute_live_registers.c index f3d79aecbf93..6884ab99a421 100644 --- a/tools/testing/selftests/bpf/progs/compute_live_registers.c +++ b/tools/testing/selftests/bpf/progs/compute_live_registers.c @@ -240,6 +240,22 @@ __naked void if2(void) ::: __clobber_all); } +/* Verifier misses that r2 is alive if jset is not handled properly */ +SEC("socket") +__log_level(2) +__msg("2: 012....... (45) if r1 & 0x7 goto pc+1") +__naked void if3_jset_bug(void) +{ + asm volatile ( + "r0 = 1;" + "r2 = 2;" + "if r1 & 0x7 goto +1;" + "exit;" + "r0 = r2;" + "exit;" + ::: __clobber_all); +} + SEC("socket") __log_level(2) __msg("0: .......... (b7) r1 = 0") diff --git a/tools/testing/selftests/bpf/progs/dynptr_success.c b/tools/testing/selftests/bpf/progs/dynptr_success.c index a0391f9da2d4..8315273cb900 100644 --- a/tools/testing/selftests/bpf/progs/dynptr_success.c +++ b/tools/testing/selftests/bpf/progs/dynptr_success.c @@ -9,6 +9,8 @@ #include "bpf_misc.h" #include "errno.h" +#define PAGE_SIZE_64K 65536 + char _license[] SEC("license") = "GPL"; int pid, err, val; @@ -611,11 +613,12 @@ int test_dynptr_copy_xdp(struct xdp_md *xdp) struct bpf_dynptr ptr_buf, ptr_xdp; char data[] = "qwertyuiopasdfghjkl"; char buf[32] = {'\0'}; - __u32 len = sizeof(data); + __u32 len = sizeof(data), xdp_data_size; int i, chunks = 200; /* ptr_xdp is backed by non-contiguous memory */ bpf_dynptr_from_xdp(xdp, 0, &ptr_xdp); + xdp_data_size = bpf_dynptr_size(&ptr_xdp); bpf_ringbuf_reserve_dynptr(&ringbuf, len * chunks, 0, &ptr_buf); /* Destination dynptr is backed by non-contiguous memory */ @@ -673,7 +676,7 @@ int test_dynptr_copy_xdp(struct xdp_md *xdp) goto out; } - if (bpf_dynptr_copy(&ptr_xdp, 2000, &ptr_xdp, 0, len * chunks) != -E2BIG) + if (bpf_dynptr_copy(&ptr_xdp, xdp_data_size - 3000, &ptr_xdp, 0, len * chunks) != -E2BIG) err = 1; out: @@ -681,6 +684,173 @@ out: return XDP_DROP; } +char memset_zero_data[] = "data to be zeroed"; + +SEC("?tp/syscalls/sys_enter_nanosleep") +int test_dynptr_memset_zero(void *ctx) +{ + __u32 data_sz = sizeof(memset_zero_data); + char zeroes[32] = {'\0'}; + struct bpf_dynptr ptr; + + err = bpf_dynptr_from_mem(memset_zero_data, data_sz, 0, &ptr); + err = err ?: bpf_dynptr_memset(&ptr, 0, data_sz, 0); + err = err ?: bpf_memcmp(zeroes, memset_zero_data, data_sz); + + return 0; +} + +#define DYNPTR_MEMSET_VAL 42 + +char memset_notzero_data[] = "data to be overwritten"; + +SEC("?tp/syscalls/sys_enter_nanosleep") +int test_dynptr_memset_notzero(void *ctx) +{ + u32 data_sz = sizeof(memset_notzero_data); + struct bpf_dynptr ptr; + char expected[32]; + + __builtin_memset(expected, DYNPTR_MEMSET_VAL, data_sz); + + err = bpf_dynptr_from_mem(memset_notzero_data, data_sz, 0, &ptr); + err = err ?: bpf_dynptr_memset(&ptr, 0, data_sz, DYNPTR_MEMSET_VAL); + err = err ?: bpf_memcmp(expected, memset_notzero_data, data_sz); + + return 0; +} + +char memset_zero_offset_data[] = "data to be zeroed partially"; + +SEC("?tp/syscalls/sys_enter_nanosleep") +int test_dynptr_memset_zero_offset(void *ctx) +{ + char expected[] = "data to \0\0\0\0eroed partially"; + __u32 data_sz = sizeof(memset_zero_offset_data); + struct bpf_dynptr ptr; + + err = bpf_dynptr_from_mem(memset_zero_offset_data, data_sz, 0, &ptr); + err = err ?: bpf_dynptr_memset(&ptr, 8, 4, 0); + err = err ?: bpf_memcmp(expected, memset_zero_offset_data, data_sz); + + return 0; +} + +char memset_zero_adjusted_data[] = "data to be zeroed partially"; + +SEC("?tp/syscalls/sys_enter_nanosleep") +int test_dynptr_memset_zero_adjusted(void *ctx) +{ + char expected[] = "data\0\0\0\0be zeroed partially"; + __u32 data_sz = sizeof(memset_zero_adjusted_data); + struct bpf_dynptr ptr; + + err = bpf_dynptr_from_mem(memset_zero_adjusted_data, data_sz, 0, &ptr); + err = err ?: bpf_dynptr_adjust(&ptr, 4, 8); + err = err ?: bpf_dynptr_memset(&ptr, 0, bpf_dynptr_size(&ptr), 0); + err = err ?: bpf_memcmp(expected, memset_zero_adjusted_data, data_sz); + + return 0; +} + +char memset_overflow_data[] = "memset overflow data"; + +SEC("?tp/syscalls/sys_enter_nanosleep") +int test_dynptr_memset_overflow(void *ctx) +{ + __u32 data_sz = sizeof(memset_overflow_data); + struct bpf_dynptr ptr; + int ret; + + err = bpf_dynptr_from_mem(memset_overflow_data, data_sz, 0, &ptr); + ret = bpf_dynptr_memset(&ptr, 0, data_sz + 1, 0); + if (ret != -E2BIG) + err = 1; + + return 0; +} + +SEC("?tp/syscalls/sys_enter_nanosleep") +int test_dynptr_memset_overflow_offset(void *ctx) +{ + __u32 data_sz = sizeof(memset_overflow_data); + struct bpf_dynptr ptr; + int ret; + + err = bpf_dynptr_from_mem(memset_overflow_data, data_sz, 0, &ptr); + ret = bpf_dynptr_memset(&ptr, 1, data_sz, 0); + if (ret != -E2BIG) + err = 1; + + return 0; +} + +SEC("?cgroup_skb/egress") +int test_dynptr_memset_readonly(struct __sk_buff *skb) +{ + struct bpf_dynptr ptr; + int ret; + + err = bpf_dynptr_from_skb(skb, 0, &ptr); + + /* cgroup skbs are read only, memset should fail */ + ret = bpf_dynptr_memset(&ptr, 0, bpf_dynptr_size(&ptr), 0); + if (ret != -EINVAL) + err = 1; + + return 0; +} + +#define min_t(type, x, y) ({ \ + type __x = (x); \ + type __y = (y); \ + __x < __y ? __x : __y; }) + +SEC("xdp") +int test_dynptr_memset_xdp_chunks(struct xdp_md *xdp) +{ + u32 data_sz, chunk_sz, offset = 0; + const int max_chunks = 200; + struct bpf_dynptr ptr_xdp; + char expected_buf[32]; + char buf[32]; + int i; + + __builtin_memset(expected_buf, DYNPTR_MEMSET_VAL, sizeof(expected_buf)); + + /* ptr_xdp is backed by non-contiguous memory */ + bpf_dynptr_from_xdp(xdp, 0, &ptr_xdp); + data_sz = bpf_dynptr_size(&ptr_xdp); + + err = bpf_dynptr_memset(&ptr_xdp, 0, data_sz, DYNPTR_MEMSET_VAL); + if (err) { + /* bpf_dynptr_memset() eventually called bpf_xdp_pointer() + * where if data_sz is greater than 0xffff, -EFAULT will be + * returned. For 64K page size, data_sz is greater than + * 64K, so error is expected and let us zero out error and + * return success. + */ + if (data_sz >= PAGE_SIZE_64K) + err = 0; + goto out; + } + + bpf_for(i, 0, max_chunks) { + offset = i * sizeof(buf); + if (offset >= data_sz) + goto out; + chunk_sz = min_t(u32, sizeof(buf), data_sz - offset); + err = bpf_dynptr_read(&buf, chunk_sz, &ptr_xdp, offset, 0); + if (err) + goto out; + err = bpf_memcmp(buf, expected_buf, sizeof(buf)); + if (err) + goto out; + } +out: + return XDP_DROP; +} + void *user_ptr; /* Contains the copy of the data pointed by user_ptr. * Size 384 to make it not fit into a single kernel chunk when copying diff --git a/tools/testing/selftests/bpf/progs/fexit_noreturns.c b/tools/testing/selftests/bpf/progs/fexit_noreturns.c deleted file mode 100644 index 54654539f550..000000000000 --- a/tools/testing/selftests/bpf/progs/fexit_noreturns.c +++ /dev/null @@ -1,15 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 - -#include <linux/bpf.h> -#include <bpf/bpf_helpers.h> -#include <bpf/bpf_tracing.h> -#include "bpf_misc.h" - -char _license[] SEC("license") = "GPL"; - -SEC("fexit/do_exit") -__failure __msg("Attaching fexit/fmod_ret to __noreturn functions is rejected.") -int BPF_PROG(noreturns) -{ - return 0; -} diff --git a/tools/testing/selftests/bpf/progs/iters.c b/tools/testing/selftests/bpf/progs/iters.c index 76adf4a8f2da..7dd92a303bf6 100644 --- a/tools/testing/selftests/bpf/progs/iters.c +++ b/tools/testing/selftests/bpf/progs/iters.c @@ -1649,4 +1649,281 @@ int clean_live_states(const void *ctx) return 0; } +SEC("?raw_tp") +__flag(BPF_F_TEST_STATE_FREQ) +__failure __msg("misaligned stack access off 0+-31+0 size 8") +__naked int absent_mark_in_the_middle_state(void) +{ + /* This is equivalent to C program below. + * + * r8 = bpf_get_prandom_u32(); + * r6 = -32; + * bpf_iter_num_new(&fp[-8], 0, 10); + * if (unlikely(bpf_get_prandom_u32())) + * r6 = -31; + * while (bpf_iter_num_next(&fp[-8])) { + * if (unlikely(bpf_get_prandom_u32())) + * *(fp + r6) = 7; + * } + * bpf_iter_num_destroy(&fp[-8]) + * return 0 + */ + asm volatile ( + "call %[bpf_get_prandom_u32];" + "r8 = r0;" + "r7 = 0;" + "r6 = -32;" + "r0 = 0;" + "*(u64 *)(r10 - 16) = r0;" + "r1 = r10;" + "r1 += -8;" + "r2 = 0;" + "r3 = 10;" + "call %[bpf_iter_num_new];" + "call %[bpf_get_prandom_u32];" + "if r0 == r8 goto change_r6_%=;" + "loop_%=:" + "call noop;" + "r1 = r10;" + "r1 += -8;" + "call %[bpf_iter_num_next];" + "if r0 == 0 goto loop_end_%=;" + "call %[bpf_get_prandom_u32];" + "if r0 == r8 goto use_r6_%=;" + "goto loop_%=;" + "loop_end_%=:" + "r1 = r10;" + "r1 += -8;" + "call %[bpf_iter_num_destroy];" + "r0 = 0;" + "exit;" + "use_r6_%=:" + "r0 = r10;" + "r0 += r6;" + "r1 = 7;" + "*(u64 *)(r0 + 0) = r1;" + "goto loop_%=;" + "change_r6_%=:" + "r6 = -31;" + "goto loop_%=;" + : + : __imm(bpf_iter_num_new), + __imm(bpf_iter_num_next), + __imm(bpf_iter_num_destroy), + __imm(bpf_get_prandom_u32) + : __clobber_all + ); +} + +__used __naked +static int noop(void) +{ + asm volatile ( + "r0 = 0;" + "exit;" + ); +} + +SEC("?raw_tp") +__flag(BPF_F_TEST_STATE_FREQ) +__failure __msg("misaligned stack access off 0+-31+0 size 8") +__naked int absent_mark_in_the_middle_state2(void) +{ + /* This is equivalent to C program below. + * + * r8 = bpf_get_prandom_u32(); + * r6 = -32; + * bpf_iter_num_new(&fp[-8], 0, 10); + * if (unlikely(bpf_get_prandom_u32())) { + * r6 = -31; + * jump_into_loop: + * goto +0; + * goto loop; + * } + * if (unlikely(bpf_get_prandom_u32())) + * goto jump_into_loop; + * loop: + * while (bpf_iter_num_next(&fp[-8])) { + * if (unlikely(bpf_get_prandom_u32())) + * *(fp + r6) = 7; + * } + * bpf_iter_num_destroy(&fp[-8]) + * return 0 + */ + asm volatile ( + "call %[bpf_get_prandom_u32];" + "r8 = r0;" + "r7 = 0;" + "r6 = -32;" + "r0 = 0;" + "*(u64 *)(r10 - 16) = r0;" + "r1 = r10;" + "r1 += -8;" + "r2 = 0;" + "r3 = 10;" + "call %[bpf_iter_num_new];" + "call %[bpf_get_prandom_u32];" + "if r0 == r8 goto change_r6_%=;" + "call %[bpf_get_prandom_u32];" + "if r0 == r8 goto jump_into_loop_%=;" + "loop_%=:" + "r1 = r10;" + "r1 += -8;" + "call %[bpf_iter_num_next];" + "if r0 == 0 goto loop_end_%=;" + "call %[bpf_get_prandom_u32];" + "if r0 == r8 goto use_r6_%=;" + "goto loop_%=;" + "loop_end_%=:" + "r1 = r10;" + "r1 += -8;" + "call %[bpf_iter_num_destroy];" + "r0 = 0;" + "exit;" + "use_r6_%=:" + "r0 = r10;" + "r0 += r6;" + "r1 = 7;" + "*(u64 *)(r0 + 0) = r1;" + "goto loop_%=;" + "change_r6_%=:" + "r6 = -31;" + "jump_into_loop_%=: " + "goto +0;" + "goto loop_%=;" + : + : __imm(bpf_iter_num_new), + __imm(bpf_iter_num_next), + __imm(bpf_iter_num_destroy), + __imm(bpf_get_prandom_u32) + : __clobber_all + ); +} + +SEC("?raw_tp") +__flag(BPF_F_TEST_STATE_FREQ) +__failure __msg("misaligned stack access off 0+-31+0 size 8") +__naked int absent_mark_in_the_middle_state3(void) +{ + /* + * bpf_iter_num_new(&fp[-8], 0, 10) + * loop1(-32, &fp[-8]) + * loop1_wrapper(&fp[-8]) + * bpf_iter_num_destroy(&fp[-8]) + */ + asm volatile ( + "r1 = r10;" + "r1 += -8;" + "r2 = 0;" + "r3 = 10;" + "call %[bpf_iter_num_new];" + /* call #1 */ + "r1 = -32;" + "r2 = r10;" + "r2 += -8;" + "call loop1;" + "r1 = r10;" + "r1 += -8;" + "call %[bpf_iter_num_destroy];" + /* call #2 */ + "r1 = r10;" + "r1 += -8;" + "r2 = 0;" + "r3 = 10;" + "call %[bpf_iter_num_new];" + "r1 = r10;" + "r1 += -8;" + "call loop1_wrapper;" + /* return */ + "r1 = r10;" + "r1 += -8;" + "call %[bpf_iter_num_destroy];" + "r0 = 0;" + "exit;" + : + : __imm(bpf_iter_num_new), + __imm(bpf_iter_num_destroy), + __imm(bpf_get_prandom_u32) + : __clobber_all + ); +} + +__used __naked +static int loop1(void) +{ + /* + * int loop1(num, iter) { + * r6 = num; + * r7 = iter; + * while (bpf_iter_num_next(r7)) { + * if (unlikely(bpf_get_prandom_u32())) + * *(fp + r6) = 7; + * } + * return 0 + * } + */ + asm volatile ( + "r6 = r1;" + "r7 = r2;" + "call %[bpf_get_prandom_u32];" + "r8 = r0;" + "loop_%=:" + "r1 = r7;" + "call %[bpf_iter_num_next];" + "if r0 == 0 goto loop_end_%=;" + "call %[bpf_get_prandom_u32];" + "if r0 == r8 goto use_r6_%=;" + "goto loop_%=;" + "loop_end_%=:" + "r0 = 0;" + "exit;" + "use_r6_%=:" + "r0 = r10;" + "r0 += r6;" + "r1 = 7;" + "*(u64 *)(r0 + 0) = r1;" + "goto loop_%=;" + : + : __imm(bpf_iter_num_next), + __imm(bpf_get_prandom_u32) + : __clobber_all + ); +} + +__used __naked +static int loop1_wrapper(void) +{ + /* + * int loop1_wrapper(iter) { + * r6 = -32; + * r7 = iter; + * if (unlikely(bpf_get_prandom_u32())) + * r6 = -31; + * loop1(r6, r7); + * return 0; + * } + */ + asm volatile ( + "r6 = -32;" + "r7 = r1;" + "call %[bpf_get_prandom_u32];" + "r8 = r0;" + "call %[bpf_get_prandom_u32];" + "if r0 == r8 goto change_r6_%=;" + "loop_%=:" + "r1 = r6;" + "r2 = r7;" + "call loop1;" + "r0 = 0;" + "exit;" + "change_r6_%=:" + "r6 = -31;" + "goto loop_%=;" + : + : __imm(bpf_iter_num_next), + __imm(bpf_get_prandom_u32) + : __clobber_all + ); +} + char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/mem_rdonly_untrusted.c b/tools/testing/selftests/bpf/progs/mem_rdonly_untrusted.c new file mode 100644 index 000000000000..4f94c971ae86 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/mem_rdonly_untrusted.c @@ -0,0 +1,229 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <vmlinux.h> +#include <bpf/bpf_core_read.h> +#include "bpf_misc.h" +#include "../test_kmods/bpf_testmod_kfunc.h" + +SEC("tp_btf/sys_enter") +__success +__log_level(2) +__msg("r8 = *(u64 *)(r7 +0) ; R7_w=ptr_nameidata(off={{[0-9]+}}) R8_w=rdonly_untrusted_mem(sz=0)") +__msg("r9 = *(u8 *)(r8 +0) ; R8_w=rdonly_untrusted_mem(sz=0) R9_w=scalar") +int btf_id_to_ptr_mem(void *ctx) +{ + struct task_struct *task; + struct nameidata *idata; + u64 ret, off; + + task = bpf_get_current_task_btf(); + idata = task->nameidata; + off = bpf_core_field_offset(struct nameidata, pathname); + /* + * asm block to have reliable match target for __msg, equivalent of: + * ret = task->nameidata->pathname[0]; + */ + asm volatile ( + "r7 = %[idata];" + "r7 += %[off];" + "r8 = *(u64 *)(r7 + 0);" + "r9 = *(u8 *)(r8 + 0);" + "%[ret] = r9;" + : [ret]"=r"(ret) + : [idata]"r"(idata), + [off]"r"(off) + : "r7", "r8", "r9"); + return ret; +} + +SEC("socket") +__success +__retval(0) +int ldx_is_ok_bad_addr(void *ctx) +{ + char *p; + + if (!bpf_core_enum_value_exists(enum bpf_features, BPF_FEAT_RDONLY_CAST_TO_VOID)) + return 42; + + p = bpf_rdonly_cast(0, 0); + return p[0x7fff]; +} + +SEC("socket") +__success +__retval(1) +int ldx_is_ok_good_addr(void *ctx) +{ + int v, *p; + + v = 1; + p = bpf_rdonly_cast(&v, 0); + return *p; +} + +SEC("socket") +__success +int offset_not_tracked(void *ctx) +{ + int *p, i, s; + + p = bpf_rdonly_cast(0, 0); + s = 0; + bpf_for(i, 0, 1000 * 1000 * 1000) { + p++; + s += *p; + } + return s; +} + +SEC("socket") +__failure +__msg("cannot write into rdonly_untrusted_mem") +int stx_not_ok(void *ctx) +{ + int v, *p; + + v = 1; + p = bpf_rdonly_cast(&v, 0); + *p = 1; + return 0; +} + +SEC("socket") +__failure +__msg("cannot write into rdonly_untrusted_mem") +int atomic_not_ok(void *ctx) +{ + int v, *p; + + v = 1; + p = bpf_rdonly_cast(&v, 0); + __sync_fetch_and_add(p, 1); + return 0; +} + +SEC("socket") +__failure +__msg("cannot write into rdonly_untrusted_mem") +int atomic_rmw_not_ok(void *ctx) +{ + long v, *p; + + v = 1; + p = bpf_rdonly_cast(&v, 0); + return __sync_val_compare_and_swap(p, 0, 42); +} + +SEC("socket") +__failure +__msg("invalid access to memory, mem_size=0 off=0 size=4") +__msg("R1 min value is outside of the allowed memory range") +int kfunc_param_not_ok(void *ctx) +{ + int *p; + + p = bpf_rdonly_cast(0, 0); + bpf_kfunc_trusted_num_test(p); + return 0; +} + +SEC("?fentry.s/" SYS_PREFIX "sys_getpgid") +__failure +__msg("R1 type=rdonly_untrusted_mem expected=") +int helper_param_not_ok(void *ctx) +{ + char *p; + + p = bpf_rdonly_cast(0, 0); + /* + * Any helper with ARG_CONST_SIZE_OR_ZERO constraint will do, + * the most permissive constraint + */ + bpf_copy_from_user(p, 0, (void *)42); + return 0; +} + +static __noinline u64 *get_some_addr(void) +{ + if (bpf_get_prandom_u32()) + return bpf_rdonly_cast(0, bpf_core_type_id_kernel(struct sock)); + else + return bpf_rdonly_cast(0, 0); +} + +SEC("socket") +__success +__retval(0) +int mixed_mem_type(void *ctx) +{ + u64 *p; + + /* Try to avoid compiler hoisting load to if branches by using __noinline func. */ + p = get_some_addr(); + return *p; +} + +__attribute__((__aligned__(8))) +u8 global[] = { + 0x11, 0x22, 0x33, 0x44, + 0x55, 0x66, 0x77, 0x88, + 0x99 +}; + +__always_inline +static u64 combine(void *p) +{ + u64 acc; + + acc = 0; +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ + acc |= (*(u64 *)p >> 56) << 24; + acc |= (*(u32 *)p >> 24) << 16; + acc |= (*(u16 *)p >> 8) << 8; + acc |= *(u8 *)p; +#else + acc |= (*(u64 *)p & 0xff) << 24; + acc |= (*(u32 *)p & 0xff) << 16; + acc |= (*(u16 *)p & 0xff) << 8; + acc |= *(u8 *)p; +#endif + return acc; +} + +SEC("socket") +__retval(0x88442211) +int diff_size_access(void *ctx) +{ + return combine(bpf_rdonly_cast(&global, 0)); +} + +SEC("socket") +__retval(0x99553322) +int misaligned_access(void *ctx) +{ + return combine(bpf_rdonly_cast(&global, 0) + 1); +} + +__weak int return_one(void) +{ + return 1; +} + +SEC("socket") +__success +__retval(1) +int null_check(void *ctx) +{ + int *p; + + p = bpf_rdonly_cast(0, 0); + if (p == 0) + /* make this a function call to avoid compiler + * moving r0 assignment before check. + */ + return return_one(); + return 0; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/rbtree.c b/tools/testing/selftests/bpf/progs/rbtree.c index a3620c15c136..49fe93d7e059 100644 --- a/tools/testing/selftests/bpf/progs/rbtree.c +++ b/tools/testing/selftests/bpf/progs/rbtree.c @@ -61,19 +61,19 @@ static long __add_three(struct bpf_rb_root *root, struct bpf_spin_lock *lock) } m->key = 1; - bpf_spin_lock(&glock); - bpf_rbtree_add(&groot, &n->node, less); - bpf_rbtree_add(&groot, &m->node, less); - bpf_spin_unlock(&glock); + bpf_spin_lock(lock); + bpf_rbtree_add(root, &n->node, less); + bpf_rbtree_add(root, &m->node, less); + bpf_spin_unlock(lock); n = bpf_obj_new(typeof(*n)); if (!n) return 3; n->key = 3; - bpf_spin_lock(&glock); - bpf_rbtree_add(&groot, &n->node, less); - bpf_spin_unlock(&glock); + bpf_spin_lock(lock); + bpf_rbtree_add(root, &n->node, less); + bpf_spin_unlock(lock); return 0; } diff --git a/tools/testing/selftests/bpf/progs/rcu_read_lock.c b/tools/testing/selftests/bpf/progs/rcu_read_lock.c index 43637ee2cdcd..3a868a199349 100644 --- a/tools/testing/selftests/bpf/progs/rcu_read_lock.c +++ b/tools/testing/selftests/bpf/progs/rcu_read_lock.c @@ -16,10 +16,11 @@ struct { __type(value, long); } map_a SEC(".maps"); -__u32 user_data, key_serial, target_pid; +__u32 user_data, target_pid; +__s32 key_serial; __u64 flags, task_storage_val, cgroup_id; -struct bpf_key *bpf_lookup_user_key(__u32 serial, __u64 flags) __ksym; +struct bpf_key *bpf_lookup_user_key(__s32 serial, __u64 flags) __ksym; void bpf_key_put(struct bpf_key *key) __ksym; void bpf_rcu_read_lock(void) __ksym; void bpf_rcu_read_unlock(void) __ksym; diff --git a/tools/testing/selftests/bpf/progs/read_cgroupfs_xattr.c b/tools/testing/selftests/bpf/progs/read_cgroupfs_xattr.c new file mode 100644 index 000000000000..405adbe5e8b0 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/read_cgroupfs_xattr.c @@ -0,0 +1,60 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */ + +#include <vmlinux.h> +#include <bpf/bpf_tracing.h> +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_core_read.h> +#include "bpf_experimental.h" + +char _license[] SEC("license") = "GPL"; + +pid_t target_pid = 0; + +char xattr_value[64]; +static const char expected_value_a[] = "bpf_selftest_value_a"; +static const char expected_value_b[] = "bpf_selftest_value_b"; +bool found_value_a; +bool found_value_b; + +SEC("lsm.s/file_open") +int BPF_PROG(test_file_open) +{ + u64 cgrp_id = bpf_get_current_cgroup_id(); + struct cgroup_subsys_state *css, *tmp; + struct bpf_dynptr value_ptr; + struct cgroup *cgrp; + + if ((bpf_get_current_pid_tgid() >> 32) != target_pid) + return 0; + + bpf_rcu_read_lock(); + cgrp = bpf_cgroup_from_id(cgrp_id); + if (!cgrp) { + bpf_rcu_read_unlock(); + return 0; + } + + css = &cgrp->self; + bpf_dynptr_from_mem(xattr_value, sizeof(xattr_value), 0, &value_ptr); + bpf_for_each(css, tmp, css, BPF_CGROUP_ITER_ANCESTORS_UP) { + int ret; + + ret = bpf_cgroup_read_xattr(tmp->cgroup, "user.bpf_test", + &value_ptr); + if (ret < 0) + continue; + + if (ret == sizeof(expected_value_a) && + !bpf_strncmp(xattr_value, sizeof(expected_value_a), expected_value_a)) + found_value_a = true; + if (ret == sizeof(expected_value_b) && + !bpf_strncmp(xattr_value, sizeof(expected_value_b), expected_value_b)) + found_value_b = true; + } + + bpf_rcu_read_unlock(); + bpf_cgroup_release(cgrp); + + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/security_bpf_map.c b/tools/testing/selftests/bpf/progs/security_bpf_map.c new file mode 100644 index 000000000000..7216b3450e96 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/security_bpf_map.c @@ -0,0 +1,69 @@ +// SPDX-License-Identifier: GPL-2.0-only + +#include "vmlinux.h" +#include <bpf/bpf_tracing.h> +#include <bpf/bpf_helpers.h> + +char _license[] SEC("license") = "GPL"; + +#define EPERM 1 /* Operation not permitted */ + +/* From include/linux/mm.h. */ +#define FMODE_WRITE 0x2 + +struct map; + +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __type(key, __u32); + __type(value, __u32); + __uint(max_entries, 1); +} prot_status_map SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __type(key, __u32); + __type(value, __u32); + __uint(max_entries, 3); +} prot_map SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __type(key, __u32); + __type(value, __u32); + __uint(max_entries, 3); +} not_prot_map SEC(".maps"); + +SEC("fmod_ret/security_bpf_map") +int BPF_PROG(fmod_bpf_map, struct bpf_map *map, int fmode) +{ + __u32 key = 0; + __u32 *status_ptr = bpf_map_lookup_elem(&prot_status_map, &key); + + if (!status_ptr || !*status_ptr) + return 0; + + if (map == &prot_map) { + /* Allow read-only access */ + if (fmode & FMODE_WRITE) + return -EPERM; + } + + return 0; +} + +/* + * This program keeps references to maps. This is needed to prevent + * optimizing them out. + */ +SEC("fentry/bpf_fentry_test1") +int BPF_PROG(fentry_dummy1, int a) +{ + __u32 key = 0; + __u32 val1 = a; + __u32 val2 = a + 1; + + bpf_map_update_elem(&prot_map, &key, &val1, BPF_ANY); + bpf_map_update_elem(¬_prot_map, &key, &val2, BPF_ANY); + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/set_global_vars.c b/tools/testing/selftests/bpf/progs/set_global_vars.c index 90f5656c3991..ebaef28b2cb3 100644 --- a/tools/testing/selftests/bpf/progs/set_global_vars.c +++ b/tools/testing/selftests/bpf/progs/set_global_vars.c @@ -7,22 +7,30 @@ char _license[] SEC("license") = "GPL"; -enum Enum { EA1 = 0, EA2 = 11 }; +typedef __s32 s32; +typedef s32 i32; +typedef __u8 u8; + +enum Enum { EA1 = 0, EA2 = 11, EA3 = 10 }; enum Enumu64 {EB1 = 0llu, EB2 = 12llu }; enum Enums64 { EC1 = 0ll, EC2 = 13ll }; const volatile __s64 var_s64 = -1; const volatile __u64 var_u64 = 0; -const volatile __s32 var_s32 = -1; +const volatile i32 var_s32 = -1; const volatile __u32 var_u32 = 0; const volatile __s16 var_s16 = -1; const volatile __u16 var_u16 = 0; const volatile __s8 var_s8 = -1; -const volatile __u8 var_u8 = 0; +const volatile u8 var_u8 = 0; const volatile enum Enum var_ea = EA1; const volatile enum Enumu64 var_eb = EB1; const volatile enum Enums64 var_ec = EC1; const volatile bool var_b = false; +const volatile i32 arr[32]; +const volatile enum Enum enum_arr[32]; +const volatile i32 three_d[47][19][17]; +const volatile i32 *ptr_arr[32]; struct Struct { int:16; @@ -35,34 +43,38 @@ struct Struct { volatile struct { const int:1; union { - const volatile __u8 var_u8; + const volatile u8 var_u8[3]; const volatile __s16 filler3; const int:1; + s32 mat[7][5]; } u; }; - } struct2; + } struct2[2][4]; }; const volatile __u32 stru = 0; /* same prefix as below */ -const volatile struct Struct struct1 = {.struct2 = {.u = {.var_u8 = 1}}}; +const volatile struct Struct struct1[3]; +const volatile struct Struct struct11[11][7]; -union Union { - __u16 var_u16; - struct Struct3 { - struct { - __u8 var_u8_l; - }; +struct Struct3 { + struct { + u8 var_u8_l; + }; + struct { struct { - struct { - __u8 var_u8_h; - }; + u8 var_u8_h; }; - } struct3; + }; }; -const volatile union Union union1 = {.var_u16 = -1}; +typedef struct Struct3 Struct3_t; -char arr[4] = {0}; +union Union { + __u16 var_u16; + Struct3_t struct3; +}; + +const volatile union Union union1 = {.var_u16 = -1}; SEC("socket") int test_set_globals(void *ctx) @@ -81,8 +93,14 @@ int test_set_globals(void *ctx) a = var_eb; a = var_ec; a = var_b; - a = struct1.struct2.u.var_u8; + a = struct1[2].struct2[1][2].u.var_u8[2]; a = union1.var_u16; + a = arr[3]; + a = arr[EA2]; + a = enum_arr[EC2]; + a = three_d[31][7][EA2]; + a = struct1[2].struct2[1][2].u.mat[5][3]; + a = struct11[7][5].struct2[0][1].u.mat[3][0]; return a; } diff --git a/tools/testing/selftests/bpf/progs/sock_iter_batch.c b/tools/testing/selftests/bpf/progs/sock_iter_batch.c index 8f483337e103..77966ded5467 100644 --- a/tools/testing/selftests/bpf/progs/sock_iter_batch.c +++ b/tools/testing/selftests/bpf/progs/sock_iter_batch.c @@ -23,6 +23,7 @@ static bool ipv4_addr_loopback(__be32 a) } volatile const unsigned int sf; +volatile const unsigned int ss; volatile const __u16 ports[2]; unsigned int bucket[2]; @@ -42,16 +43,18 @@ int iter_tcp_soreuse(struct bpf_iter__tcp *ctx) sock_cookie = bpf_get_socket_cookie(sk); sk = bpf_core_cast(sk, struct sock); if (sk->sk_family != sf || - sk->sk_state != TCP_LISTEN || - sk->sk_family == AF_INET6 ? + (ss && sk->sk_state != ss) || + (sk->sk_family == AF_INET6 ? !ipv6_addr_loopback(&sk->sk_v6_rcv_saddr) : - !ipv4_addr_loopback(sk->sk_rcv_saddr)) + !ipv4_addr_loopback(sk->sk_rcv_saddr))) return 0; if (sk->sk_num == ports[0]) idx = 0; else if (sk->sk_num == ports[1]) idx = 1; + else if (!ports[0] && !ports[1]) + idx = 0; else return 0; @@ -67,6 +70,27 @@ int iter_tcp_soreuse(struct bpf_iter__tcp *ctx) return 0; } +volatile const __u64 destroy_cookie; + +SEC("iter/tcp") +int iter_tcp_destroy(struct bpf_iter__tcp *ctx) +{ + struct sock_common *sk_common = (struct sock_common *)ctx->sk_common; + __u64 sock_cookie; + + if (!sk_common) + return 0; + + sock_cookie = bpf_get_socket_cookie(sk_common); + if (sock_cookie != destroy_cookie) + return 0; + + bpf_sock_destroy(sk_common); + bpf_seq_write(ctx->meta->seq, &sock_cookie, sizeof(sock_cookie)); + + return 0; +} + #define udp_sk(ptr) container_of(ptr, struct udp_sock, inet.sk) SEC("iter/udp") @@ -83,15 +107,17 @@ int iter_udp_soreuse(struct bpf_iter__udp *ctx) sock_cookie = bpf_get_socket_cookie(sk); sk = bpf_core_cast(sk, struct sock); if (sk->sk_family != sf || - sk->sk_family == AF_INET6 ? + (sk->sk_family == AF_INET6 ? !ipv6_addr_loopback(&sk->sk_v6_rcv_saddr) : - !ipv4_addr_loopback(sk->sk_rcv_saddr)) + !ipv4_addr_loopback(sk->sk_rcv_saddr))) return 0; if (sk->sk_num == ports[0]) idx = 0; else if (sk->sk_num == ports[1]) idx = 1; + else if (!ports[0] && !ports[1]) + idx = 0; else return 0; diff --git a/tools/testing/selftests/bpf/progs/stream.c b/tools/testing/selftests/bpf/progs/stream.c new file mode 100644 index 000000000000..35790897dc87 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/stream.c @@ -0,0 +1,79 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */ +#include <vmlinux.h> +#include <bpf/bpf_tracing.h> +#include <bpf/bpf_helpers.h> +#include "bpf_misc.h" +#include "bpf_experimental.h" + +struct arr_elem { + struct bpf_res_spin_lock lock; +}; + +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __uint(max_entries, 1); + __type(key, int); + __type(value, struct arr_elem); +} arrmap SEC(".maps"); + +#define ENOSPC 28 +#define _STR "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx" + +int size; + +SEC("syscall") +__success __retval(0) +int stream_exhaust(void *ctx) +{ + /* Use global variable for loop convergence. */ + size = 0; + bpf_repeat(BPF_MAX_LOOPS) { + if (bpf_stream_printk(BPF_STDOUT, _STR) == -ENOSPC && size == 99954) + return 0; + size += sizeof(_STR) - 1; + } + return 1; +} + +SEC("syscall") +__success __retval(0) +int stream_cond_break(void *ctx) +{ + while (can_loop) + ; + return 0; +} + +SEC("syscall") +__success __retval(0) +int stream_deadlock(void *ctx) +{ + struct bpf_res_spin_lock *lock, *nlock; + + lock = bpf_map_lookup_elem(&arrmap, &(int){0}); + if (!lock) + return 1; + nlock = bpf_map_lookup_elem(&arrmap, &(int){0}); + if (!nlock) + return 1; + if (bpf_res_spin_lock(lock)) + return 1; + if (bpf_res_spin_lock(nlock)) { + bpf_res_spin_unlock(lock); + return 0; + } + bpf_res_spin_unlock(nlock); + bpf_res_spin_unlock(lock); + return 1; +} + +SEC("syscall") +__success __retval(0) +int stream_syscall(void *ctx) +{ + bpf_stream_printk(BPF_STDOUT, "foo"); + return 0; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/stream_fail.c b/tools/testing/selftests/bpf/progs/stream_fail.c new file mode 100644 index 000000000000..b4a0d0cc8ec8 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/stream_fail.c @@ -0,0 +1,33 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */ +#include <vmlinux.h> +#include <bpf/bpf_tracing.h> +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_core_read.h> +#include "bpf_misc.h" + +SEC("syscall") +__failure __msg("Possibly NULL pointer passed") +int stream_vprintk_null_arg(void *ctx) +{ + bpf_stream_vprintk(BPF_STDOUT, "", NULL, 0, NULL); + return 0; +} + +SEC("syscall") +__failure __msg("R3 type=scalar expected=") +int stream_vprintk_scalar_arg(void *ctx) +{ + bpf_stream_vprintk(BPF_STDOUT, "", (void *)46, 0, NULL); + return 0; +} + +SEC("syscall") +__failure __msg("arg#1 doesn't point to a const string") +int stream_vprintk_string_arg(void *ctx) +{ + bpf_stream_vprintk(BPF_STDOUT, ctx, NULL, 0, NULL); + return 0; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/string_kfuncs_failure1.c b/tools/testing/selftests/bpf/progs/string_kfuncs_failure1.c new file mode 100644 index 000000000000..53af438bd998 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/string_kfuncs_failure1.c @@ -0,0 +1,87 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (C) 2025 Red Hat, Inc.*/ +#include "vmlinux.h" +#include <bpf/bpf_helpers.h> +#include <linux/limits.h> +#include "bpf_misc.h" +#include "errno.h" + +char *user_ptr = (char *)1; +char *invalid_kern_ptr = (char *)-1; + +/* + * When passing userspace pointers, the error code differs based on arch: + * -ERANGE on arches with non-overlapping address spaces + * -EFAULT on other arches + */ +#if defined(__TARGET_ARCH_arm) || defined(__TARGET_ARCH_loongarch) || \ + defined(__TARGET_ARCH_powerpc) || defined(__TARGET_ARCH_x86) +#define USER_PTR_ERR -ERANGE +#else +#define USER_PTR_ERR -EFAULT +#endif + +/* + * On s390, __get_kernel_nofault (used in string kfuncs) returns 0 for NULL and + * user_ptr (instead of causing an exception) so the below two groups of tests + * are not applicable. + */ +#ifndef __TARGET_ARCH_s390 + +/* Passing NULL to string kfuncs (treated as a userspace ptr) */ +SEC("syscall") __retval(USER_PTR_ERR) int test_strcmp_null1(void *ctx) { return bpf_strcmp(NULL, "hello"); } +SEC("syscall") __retval(USER_PTR_ERR)int test_strcmp_null2(void *ctx) { return bpf_strcmp("hello", NULL); } +SEC("syscall") __retval(USER_PTR_ERR)int test_strchr_null(void *ctx) { return bpf_strchr(NULL, 'a'); } +SEC("syscall") __retval(USER_PTR_ERR)int test_strchrnul_null(void *ctx) { return bpf_strchrnul(NULL, 'a'); } +SEC("syscall") __retval(USER_PTR_ERR)int test_strnchr_null(void *ctx) { return bpf_strnchr(NULL, 1, 'a'); } +SEC("syscall") __retval(USER_PTR_ERR)int test_strrchr_null(void *ctx) { return bpf_strrchr(NULL, 'a'); } +SEC("syscall") __retval(USER_PTR_ERR)int test_strlen_null(void *ctx) { return bpf_strlen(NULL); } +SEC("syscall") __retval(USER_PTR_ERR)int test_strnlen_null(void *ctx) { return bpf_strnlen(NULL, 1); } +SEC("syscall") __retval(USER_PTR_ERR)int test_strspn_null1(void *ctx) { return bpf_strspn(NULL, "hello"); } +SEC("syscall") __retval(USER_PTR_ERR)int test_strspn_null2(void *ctx) { return bpf_strspn("hello", NULL); } +SEC("syscall") __retval(USER_PTR_ERR)int test_strcspn_null1(void *ctx) { return bpf_strcspn(NULL, "hello"); } +SEC("syscall") __retval(USER_PTR_ERR)int test_strcspn_null2(void *ctx) { return bpf_strcspn("hello", NULL); } +SEC("syscall") __retval(USER_PTR_ERR)int test_strstr_null1(void *ctx) { return bpf_strstr(NULL, "hello"); } +SEC("syscall") __retval(USER_PTR_ERR)int test_strstr_null2(void *ctx) { return bpf_strstr("hello", NULL); } +SEC("syscall") __retval(USER_PTR_ERR)int test_strnstr_null1(void *ctx) { return bpf_strnstr(NULL, "hello", 1); } +SEC("syscall") __retval(USER_PTR_ERR)int test_strnstr_null2(void *ctx) { return bpf_strnstr("hello", NULL, 1); } + +/* Passing userspace ptr to string kfuncs */ +SEC("syscall") __retval(USER_PTR_ERR) int test_strcmp_user_ptr1(void *ctx) { return bpf_strcmp(user_ptr, "hello"); } +SEC("syscall") __retval(USER_PTR_ERR) int test_strcmp_user_ptr2(void *ctx) { return bpf_strcmp("hello", user_ptr); } +SEC("syscall") __retval(USER_PTR_ERR) int test_strchr_user_ptr(void *ctx) { return bpf_strchr(user_ptr, 'a'); } +SEC("syscall") __retval(USER_PTR_ERR) int test_strchrnul_user_ptr(void *ctx) { return bpf_strchrnul(user_ptr, 'a'); } +SEC("syscall") __retval(USER_PTR_ERR) int test_strnchr_user_ptr(void *ctx) { return bpf_strnchr(user_ptr, 1, 'a'); } +SEC("syscall") __retval(USER_PTR_ERR) int test_strrchr_user_ptr(void *ctx) { return bpf_strrchr(user_ptr, 'a'); } +SEC("syscall") __retval(USER_PTR_ERR) int test_strlen_user_ptr(void *ctx) { return bpf_strlen(user_ptr); } +SEC("syscall") __retval(USER_PTR_ERR) int test_strnlen_user_ptr(void *ctx) { return bpf_strnlen(user_ptr, 1); } +SEC("syscall") __retval(USER_PTR_ERR) int test_strspn_user_ptr1(void *ctx) { return bpf_strspn(user_ptr, "hello"); } +SEC("syscall") __retval(USER_PTR_ERR) int test_strspn_user_ptr2(void *ctx) { return bpf_strspn("hello", user_ptr); } +SEC("syscall") __retval(USER_PTR_ERR) int test_strcspn_user_ptr1(void *ctx) { return bpf_strcspn(user_ptr, "hello"); } +SEC("syscall") __retval(USER_PTR_ERR) int test_strcspn_user_ptr2(void *ctx) { return bpf_strcspn("hello", user_ptr); } +SEC("syscall") __retval(USER_PTR_ERR) int test_strstr_user_ptr1(void *ctx) { return bpf_strstr(user_ptr, "hello"); } +SEC("syscall") __retval(USER_PTR_ERR) int test_strstr_user_ptr2(void *ctx) { return bpf_strstr("hello", user_ptr); } +SEC("syscall") __retval(USER_PTR_ERR) int test_strnstr_user_ptr1(void *ctx) { return bpf_strnstr(user_ptr, "hello", 1); } +SEC("syscall") __retval(USER_PTR_ERR) int test_strnstr_user_ptr2(void *ctx) { return bpf_strnstr("hello", user_ptr, 1); } + +#endif /* __TARGET_ARCH_s390 */ + +/* Passing invalid kernel ptr to string kfuncs should always return -EFAULT */ +SEC("syscall") __retval(-EFAULT) int test_strcmp_pagefault1(void *ctx) { return bpf_strcmp(invalid_kern_ptr, "hello"); } +SEC("syscall") __retval(-EFAULT) int test_strcmp_pagefault2(void *ctx) { return bpf_strcmp("hello", invalid_kern_ptr); } +SEC("syscall") __retval(-EFAULT) int test_strchr_pagefault(void *ctx) { return bpf_strchr(invalid_kern_ptr, 'a'); } +SEC("syscall") __retval(-EFAULT) int test_strchrnul_pagefault(void *ctx) { return bpf_strchrnul(invalid_kern_ptr, 'a'); } +SEC("syscall") __retval(-EFAULT) int test_strnchr_pagefault(void *ctx) { return bpf_strnchr(invalid_kern_ptr, 1, 'a'); } +SEC("syscall") __retval(-EFAULT) int test_strrchr_pagefault(void *ctx) { return bpf_strrchr(invalid_kern_ptr, 'a'); } +SEC("syscall") __retval(-EFAULT) int test_strlen_pagefault(void *ctx) { return bpf_strlen(invalid_kern_ptr); } +SEC("syscall") __retval(-EFAULT) int test_strnlen_pagefault(void *ctx) { return bpf_strnlen(invalid_kern_ptr, 1); } +SEC("syscall") __retval(-EFAULT) int test_strspn_pagefault1(void *ctx) { return bpf_strspn(invalid_kern_ptr, "hello"); } +SEC("syscall") __retval(-EFAULT) int test_strspn_pagefault2(void *ctx) { return bpf_strspn("hello", invalid_kern_ptr); } +SEC("syscall") __retval(-EFAULT) int test_strcspn_pagefault1(void *ctx) { return bpf_strcspn(invalid_kern_ptr, "hello"); } +SEC("syscall") __retval(-EFAULT) int test_strcspn_pagefault2(void *ctx) { return bpf_strcspn("hello", invalid_kern_ptr); } +SEC("syscall") __retval(-EFAULT) int test_strstr_pagefault1(void *ctx) { return bpf_strstr(invalid_kern_ptr, "hello"); } +SEC("syscall") __retval(-EFAULT) int test_strstr_pagefault2(void *ctx) { return bpf_strstr("hello", invalid_kern_ptr); } +SEC("syscall") __retval(-EFAULT) int test_strnstr_pagefault1(void *ctx) { return bpf_strnstr(invalid_kern_ptr, "hello", 1); } +SEC("syscall") __retval(-EFAULT) int test_strnstr_pagefault2(void *ctx) { return bpf_strnstr("hello", invalid_kern_ptr, 1); } + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/string_kfuncs_failure2.c b/tools/testing/selftests/bpf/progs/string_kfuncs_failure2.c new file mode 100644 index 000000000000..89fb4669b0e9 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/string_kfuncs_failure2.c @@ -0,0 +1,23 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (C) 2025 Red Hat, Inc.*/ +#include "vmlinux.h" +#include <bpf/bpf_helpers.h> +#include <linux/limits.h> + +char long_str[XATTR_SIZE_MAX + 1]; + +SEC("syscall") int test_strcmp_too_long(void *ctx) { return bpf_strcmp(long_str, long_str); } +SEC("syscall") int test_strchr_too_long(void *ctx) { return bpf_strchr(long_str, 'b'); } +SEC("syscall") int test_strchrnul_too_long(void *ctx) { return bpf_strchrnul(long_str, 'b'); } +SEC("syscall") int test_strnchr_too_long(void *ctx) { return bpf_strnchr(long_str, sizeof(long_str), 'b'); } +SEC("syscall") int test_strrchr_too_long(void *ctx) { return bpf_strrchr(long_str, 'b'); } +SEC("syscall") int test_strlen_too_long(void *ctx) { return bpf_strlen(long_str); } +SEC("syscall") int test_strnlen_too_long(void *ctx) { return bpf_strnlen(long_str, sizeof(long_str)); } +SEC("syscall") int test_strspn_str_too_long(void *ctx) { return bpf_strspn(long_str, "a"); } +SEC("syscall") int test_strspn_accept_too_long(void *ctx) { return bpf_strspn("b", long_str); } +SEC("syscall") int test_strcspn_str_too_long(void *ctx) { return bpf_strcspn(long_str, "b"); } +SEC("syscall") int test_strcspn_reject_too_long(void *ctx) { return bpf_strcspn("b", long_str); } +SEC("syscall") int test_strstr_too_long(void *ctx) { return bpf_strstr(long_str, "hello"); } +SEC("syscall") int test_strnstr_too_long(void *ctx) { return bpf_strnstr(long_str, "hello", sizeof(long_str)); } + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/string_kfuncs_success.c b/tools/testing/selftests/bpf/progs/string_kfuncs_success.c new file mode 100644 index 000000000000..46697f381878 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/string_kfuncs_success.c @@ -0,0 +1,37 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (C) 2025 Red Hat, Inc.*/ +#include "vmlinux.h" +#include <bpf/bpf_helpers.h> +#include "bpf_misc.h" +#include "errno.h" + +char str[] = "hello world"; + +#define __test(retval) SEC("syscall") __success __retval(retval) + +/* Functional tests */ +__test(0) int test_strcmp_eq(void *ctx) { return bpf_strcmp(str, "hello world"); } +__test(1) int test_strcmp_neq(void *ctx) { return bpf_strcmp(str, "hello"); } +__test(1) int test_strchr_found(void *ctx) { return bpf_strchr(str, 'e'); } +__test(11) int test_strchr_null(void *ctx) { return bpf_strchr(str, '\0'); } +__test(-ENOENT) int test_strchr_notfound(void *ctx) { return bpf_strchr(str, 'x'); } +__test(1) int test_strchrnul_found(void *ctx) { return bpf_strchrnul(str, 'e'); } +__test(11) int test_strchrnul_notfound(void *ctx) { return bpf_strchrnul(str, 'x'); } +__test(1) int test_strnchr_found(void *ctx) { return bpf_strnchr(str, 5, 'e'); } +__test(11) int test_strnchr_null(void *ctx) { return bpf_strnchr(str, 12, '\0'); } +__test(-ENOENT) int test_strnchr_notfound(void *ctx) { return bpf_strnchr(str, 5, 'w'); } +__test(9) int test_strrchr_found(void *ctx) { return bpf_strrchr(str, 'l'); } +__test(11) int test_strrchr_null(void *ctx) { return bpf_strrchr(str, '\0'); } +__test(-ENOENT) int test_strrchr_notfound(void *ctx) { return bpf_strrchr(str, 'x'); } +__test(11) int test_strlen(void *ctx) { return bpf_strlen(str); } +__test(11) int test_strnlen(void *ctx) { return bpf_strnlen(str, 12); } +__test(5) int test_strspn(void *ctx) { return bpf_strspn(str, "ehlo"); } +__test(2) int test_strcspn(void *ctx) { return bpf_strcspn(str, "lo"); } +__test(6) int test_strstr_found(void *ctx) { return bpf_strstr(str, "world"); } +__test(-ENOENT) int test_strstr_notfound(void *ctx) { return bpf_strstr(str, "hi"); } +__test(0) int test_strstr_empty(void *ctx) { return bpf_strstr(str, ""); } +__test(0) int test_strnstr_found(void *ctx) { return bpf_strnstr(str, "hello", 6); } +__test(-ENOENT) int test_strnstr_notfound(void *ctx) { return bpf_strnstr(str, "hi", 10); } +__test(0) int test_strnstr_empty(void *ctx) { return bpf_strnstr(str, "", 1); } + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/struct_ops_private_stack.c b/tools/testing/selftests/bpf/progs/struct_ops_private_stack.c index 0e4d2ff63ab8..dbe646013811 100644 --- a/tools/testing/selftests/bpf/progs/struct_ops_private_stack.c +++ b/tools/testing/selftests/bpf/progs/struct_ops_private_stack.c @@ -7,7 +7,7 @@ char _license[] SEC("license") = "GPL"; -#if defined(__TARGET_ARCH_x86) +#if defined(__TARGET_ARCH_x86) || defined(__TARGET_ARCH_arm64) bool skip __attribute((__section__(".data"))) = false; #else bool skip = true; diff --git a/tools/testing/selftests/bpf/progs/struct_ops_private_stack_fail.c b/tools/testing/selftests/bpf/progs/struct_ops_private_stack_fail.c index 58d5d8dc2235..3d89ad7cbe2a 100644 --- a/tools/testing/selftests/bpf/progs/struct_ops_private_stack_fail.c +++ b/tools/testing/selftests/bpf/progs/struct_ops_private_stack_fail.c @@ -7,7 +7,7 @@ char _license[] SEC("license") = "GPL"; -#if defined(__TARGET_ARCH_x86) +#if defined(__TARGET_ARCH_x86) || defined(__TARGET_ARCH_arm64) bool skip __attribute((__section__(".data"))) = false; #else bool skip = true; diff --git a/tools/testing/selftests/bpf/progs/struct_ops_private_stack_recur.c b/tools/testing/selftests/bpf/progs/struct_ops_private_stack_recur.c index 31e58389bb8b..b1f6d7e5a8e5 100644 --- a/tools/testing/selftests/bpf/progs/struct_ops_private_stack_recur.c +++ b/tools/testing/selftests/bpf/progs/struct_ops_private_stack_recur.c @@ -7,7 +7,7 @@ char _license[] SEC("license") = "GPL"; -#if defined(__TARGET_ARCH_x86) +#if defined(__TARGET_ARCH_x86) || defined(__TARGET_ARCH_arm64) bool skip __attribute((__section__(".data"))) = false; #else bool skip = true; diff --git a/tools/testing/selftests/bpf/progs/test_global_map_resize.c b/tools/testing/selftests/bpf/progs/test_global_map_resize.c index a3f220ba7025..ee65bad0436d 100644 --- a/tools/testing/selftests/bpf/progs/test_global_map_resize.c +++ b/tools/testing/selftests/bpf/progs/test_global_map_resize.c @@ -32,6 +32,16 @@ int my_int_last SEC(".data.array_not_last"); int percpu_arr[1] SEC(".data.percpu_arr"); +/* at least one extern is included, to ensure that a specific + * regression is tested whereby resizing resulted in a free-after-use + * bug after type information is invalidated by the resize operation. + * + * There isn't a particularly good API to test for this specific condition, + * but by having externs for the resizing tests it will cover this path. + */ +extern int LINUX_KERNEL_VERSION __kconfig; +long version_sink; + SEC("tp/syscalls/sys_enter_getpid") int bss_array_sum(void *ctx) { @@ -44,6 +54,9 @@ int bss_array_sum(void *ctx) for (size_t i = 0; i < bss_array_len; ++i) sum += array[i]; + /* see above; ensure this is not optimized out */ + version_sink = LINUX_KERNEL_VERSION; + return 0; } @@ -59,6 +72,9 @@ int data_array_sum(void *ctx) for (size_t i = 0; i < data_array_len; ++i) sum += my_array[i]; + /* see above; ensure this is not optimized out */ + version_sink = LINUX_KERNEL_VERSION; + return 0; } diff --git a/tools/testing/selftests/bpf/progs/test_lookup_key.c b/tools/testing/selftests/bpf/progs/test_lookup_key.c index cdbbb12f1491..1f7e1e59b073 100644 --- a/tools/testing/selftests/bpf/progs/test_lookup_key.c +++ b/tools/testing/selftests/bpf/progs/test_lookup_key.c @@ -14,11 +14,11 @@ char _license[] SEC("license") = "GPL"; __u32 monitored_pid; -__u32 key_serial; +__s32 key_serial; __u32 key_id; __u64 flags; -extern struct bpf_key *bpf_lookup_user_key(__u32 serial, __u64 flags) __ksym; +extern struct bpf_key *bpf_lookup_user_key(__s32 serial, __u64 flags) __ksym; extern struct bpf_key *bpf_lookup_system_key(__u64 id) __ksym; extern void bpf_key_put(struct bpf_key *key) __ksym; diff --git a/tools/testing/selftests/bpf/progs/test_ringbuf_write.c b/tools/testing/selftests/bpf/progs/test_ringbuf_write.c index 350513c0e4c9..f063a0013f85 100644 --- a/tools/testing/selftests/bpf/progs/test_ringbuf_write.c +++ b/tools/testing/selftests/bpf/progs/test_ringbuf_write.c @@ -26,11 +26,11 @@ int test_ringbuf_write(void *ctx) if (cur_pid != pid) return 0; - sample1 = bpf_ringbuf_reserve(&ringbuf, 0x3000, 0); + sample1 = bpf_ringbuf_reserve(&ringbuf, 0x30000, 0); if (!sample1) return 0; /* first one can pass */ - sample2 = bpf_ringbuf_reserve(&ringbuf, 0x3000, 0); + sample2 = bpf_ringbuf_reserve(&ringbuf, 0x30000, 0); if (!sample2) { bpf_ringbuf_discard(sample1, 0); __sync_fetch_and_add(&discarded, 1); diff --git a/tools/testing/selftests/bpf/progs/test_sig_in_xattr.c b/tools/testing/selftests/bpf/progs/test_sig_in_xattr.c index 8ef6b39335b6..34b30e2603f0 100644 --- a/tools/testing/selftests/bpf/progs/test_sig_in_xattr.c +++ b/tools/testing/selftests/bpf/progs/test_sig_in_xattr.c @@ -40,7 +40,7 @@ char digest[MAGIC_SIZE + SIZEOF_STRUCT_FSVERITY_DIGEST + SHA256_DIGEST_SIZE]; __u32 monitored_pid; char sig[MAX_SIG_SIZE]; __u32 sig_size; -__u32 user_keyring_serial; +__s32 user_keyring_serial; SEC("lsm.s/file_open") int BPF_PROG(test_file_open, struct file *f) diff --git a/tools/testing/selftests/bpf/progs/test_sockmap_change_tail.c b/tools/testing/selftests/bpf/progs/test_sockmap_change_tail.c index 2796dd8545eb..1c7941a4ad00 100644 --- a/tools/testing/selftests/bpf/progs/test_sockmap_change_tail.c +++ b/tools/testing/selftests/bpf/progs/test_sockmap_change_tail.c @@ -1,8 +1,13 @@ // SPDX-License-Identifier: GPL-2.0 /* Copyright (c) 2024 ByteDance */ -#include <linux/bpf.h> +#include "vmlinux.h" #include <bpf/bpf_helpers.h> +#ifndef PAGE_SIZE +#define PAGE_SIZE __PAGE_SIZE +#endif +#define BPF_SKB_MAX_LEN (PAGE_SIZE << 2) + struct { __uint(type, BPF_MAP_TYPE_SOCKMAP); __uint(max_entries, 1); @@ -31,7 +36,7 @@ int prog_skb_verdict(struct __sk_buff *skb) change_tail_ret = bpf_skb_change_tail(skb, skb->len + 1, 0); return SK_PASS; } else if (data[0] == 'E') { /* Error */ - change_tail_ret = bpf_skb_change_tail(skb, 65535, 0); + change_tail_ret = bpf_skb_change_tail(skb, BPF_SKB_MAX_LEN, 0); return SK_PASS; } return SK_PASS; diff --git a/tools/testing/selftests/bpf/progs/test_sockmap_ktls.c b/tools/testing/selftests/bpf/progs/test_sockmap_ktls.c index 8bdb9987c0c7..83df4919c224 100644 --- a/tools/testing/selftests/bpf/progs/test_sockmap_ktls.c +++ b/tools/testing/selftests/bpf/progs/test_sockmap_ktls.c @@ -7,6 +7,8 @@ int cork_byte; int push_start; int push_end; int apply_bytes; +int pop_start; +int pop_end; struct { __uint(type, BPF_MAP_TYPE_SOCKMAP); @@ -22,6 +24,8 @@ int prog_sk_policy(struct sk_msg_md *msg) bpf_msg_cork_bytes(msg, cork_byte); if (push_start > 0 && push_end > 0) bpf_msg_push_data(msg, push_start, push_end, 0); + if (pop_start >= 0 && pop_end > 0) + bpf_msg_pop_data(msg, pop_start, pop_end, 0); return SK_PASS; } diff --git a/tools/testing/selftests/bpf/progs/test_tc_change_tail.c b/tools/testing/selftests/bpf/progs/test_tc_change_tail.c index 28edafe803f0..fcba8299f0bc 100644 --- a/tools/testing/selftests/bpf/progs/test_tc_change_tail.c +++ b/tools/testing/selftests/bpf/progs/test_tc_change_tail.c @@ -1,11 +1,11 @@ // SPDX-License-Identifier: GPL-2.0 -#include <linux/bpf.h> +#include "vmlinux.h" #include <bpf/bpf_helpers.h> -#include <linux/if_ether.h> -#include <linux/in.h> -#include <linux/ip.h> -#include <linux/udp.h> -#include <linux/pkt_cls.h> + +#ifndef PAGE_SIZE +#define PAGE_SIZE __PAGE_SIZE +#endif +#define BPF_SKB_MAX_LEN (PAGE_SIZE << 2) long change_tail_ret = 1; @@ -94,7 +94,7 @@ int change_tail(struct __sk_buff *skb) bpf_skb_change_tail(skb, len, 0); return TCX_PASS; } else if (payload[0] == 'E') { /* Error */ - change_tail_ret = bpf_skb_change_tail(skb, 65535, 0); + change_tail_ret = bpf_skb_change_tail(skb, BPF_SKB_MAX_LEN, 0); return TCX_PASS; } else if (payload[0] == 'Z') { /* Zero */ change_tail_ret = bpf_skb_change_tail(skb, 0, 0); diff --git a/tools/testing/selftests/bpf/progs/test_verify_pkcs7_sig.c b/tools/testing/selftests/bpf/progs/test_verify_pkcs7_sig.c index e96d09e11115..ff8d755548b9 100644 --- a/tools/testing/selftests/bpf/progs/test_verify_pkcs7_sig.c +++ b/tools/testing/selftests/bpf/progs/test_verify_pkcs7_sig.c @@ -17,7 +17,7 @@ #define MAX_SIG_SIZE 1024 __u32 monitored_pid; -__u32 user_keyring_serial; +__s32 user_keyring_serial; __u64 system_keyring_id; struct data { diff --git a/tools/testing/selftests/bpf/progs/test_xdp_adjust_tail_grow.c b/tools/testing/selftests/bpf/progs/test_xdp_adjust_tail_grow.c index dc74d8cf9e3f..5904f45cfbc4 100644 --- a/tools/testing/selftests/bpf/progs/test_xdp_adjust_tail_grow.c +++ b/tools/testing/selftests/bpf/progs/test_xdp_adjust_tail_grow.c @@ -19,7 +19,9 @@ int _xdp_adjust_tail_grow(struct xdp_md *xdp) /* Data length determine test case */ if (data_len == 54) { /* sizeof(pkt_v4) */ - offset = 4096; /* test too large offset */ + offset = 4096; /* test too large offset, 4k page size */ + } else if (data_len == 53) { /* sizeof(pkt_v4) - 1 */ + offset = 65536; /* test too large offset, 64k page size */ } else if (data_len == 74) { /* sizeof(pkt_v6) */ offset = 40; } else if (data_len == 64) { @@ -31,6 +33,10 @@ int _xdp_adjust_tail_grow(struct xdp_md *xdp) offset = 10; } else if (data_len == 9001) { offset = 4096; + } else if (data_len == 90000) { + offset = 10; /* test a small offset, 64k page size */ + } else if (data_len == 90001) { + offset = 65536; /* test too large offset, 64k page size */ } else { return XDP_ABORTED; /* No matching test */ } diff --git a/tools/testing/selftests/bpf/progs/tracing_failure.c b/tools/testing/selftests/bpf/progs/tracing_failure.c index d41665d2ec8c..65e485c4468c 100644 --- a/tools/testing/selftests/bpf/progs/tracing_failure.c +++ b/tools/testing/selftests/bpf/progs/tracing_failure.c @@ -18,3 +18,15 @@ int BPF_PROG(test_spin_unlock, struct bpf_spin_lock *lock) { return 0; } + +SEC("?fentry/__rcu_read_lock") +int BPF_PROG(tracing_deny) +{ + return 0; +} + +SEC("?fexit/do_exit") +int BPF_PROG(fexit_noreturns) +{ + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/verifier_and.c b/tools/testing/selftests/bpf/progs/verifier_and.c index e97e518516b6..2b4fdca162be 100644 --- a/tools/testing/selftests/bpf/progs/verifier_and.c +++ b/tools/testing/selftests/bpf/progs/verifier_and.c @@ -85,8 +85,14 @@ l0_%=: r0 = r0; \ SEC("socket") __description("check known subreg with unknown reg") -__success __failure_unpriv __msg_unpriv("R1 !read_ok") +__success __success_unpriv __retval(0) +#ifdef SPEC_V1 +__xlated_unpriv("if w0 < 0x1 goto pc+2") +__xlated_unpriv("nospec") /* inserted to prevent `R1 !read_ok'` */ +__xlated_unpriv("goto pc-1") /* `r1 = *(u32*)(r1 + 512)`, sanitized dead code */ +__xlated_unpriv("r0 = 0") +#endif __naked void known_subreg_with_unknown_reg(void) { asm volatile (" \ diff --git a/tools/testing/selftests/bpf/progs/verifier_arena.c b/tools/testing/selftests/bpf/progs/verifier_arena.c index 67509c5d3982..7f4827eede3c 100644 --- a/tools/testing/selftests/bpf/progs/verifier_arena.c +++ b/tools/testing/selftests/bpf/progs/verifier_arena.c @@ -3,6 +3,7 @@ #define BPF_NO_KFUNC_PROTOTYPES #include <vmlinux.h> +#include <errno.h> #include <bpf/bpf_helpers.h> #include <bpf/bpf_tracing.h> #include "bpf_misc.h" @@ -114,6 +115,111 @@ int basic_alloc3(void *ctx) return 0; } +SEC("syscall") +__success __retval(0) +int basic_reserve1(void *ctx) +{ +#if defined(__BPF_FEATURE_ADDR_SPACE_CAST) + char __arena *page; + int ret; + + page = bpf_arena_alloc_pages(&arena, NULL, 1, NUMA_NO_NODE, 0); + if (!page) + return 1; + + page += __PAGE_SIZE; + + /* Reserve the second page */ + ret = bpf_arena_reserve_pages(&arena, page, 1); + if (ret) + return 2; + + /* Try to explicitly allocate the reserved page. */ + page = bpf_arena_alloc_pages(&arena, page, 1, NUMA_NO_NODE, 0); + if (page) + return 3; + + /* Try to implicitly allocate the page (since there's only 2 of them). */ + page = bpf_arena_alloc_pages(&arena, NULL, 1, NUMA_NO_NODE, 0); + if (page) + return 4; +#endif + return 0; +} + +SEC("syscall") +__success __retval(0) +int basic_reserve2(void *ctx) +{ +#if defined(__BPF_FEATURE_ADDR_SPACE_CAST) + char __arena *page; + int ret; + + page = arena_base(&arena); + ret = bpf_arena_reserve_pages(&arena, page, 1); + if (ret) + return 1; + + page = bpf_arena_alloc_pages(&arena, page, 1, NUMA_NO_NODE, 0); + if ((u64)page) + return 2; +#endif + return 0; +} + +/* Reserve the same page twice, should return -EBUSY. */ +SEC("syscall") +__success __retval(0) +int reserve_twice(void *ctx) +{ +#if defined(__BPF_FEATURE_ADDR_SPACE_CAST) + char __arena *page; + int ret; + + page = arena_base(&arena); + + ret = bpf_arena_reserve_pages(&arena, page, 1); + if (ret) + return 1; + + ret = bpf_arena_reserve_pages(&arena, page, 1); + if (ret != -EBUSY) + return 2; +#endif + return 0; +} + +/* Try to reserve past the end of the arena. */ +SEC("syscall") +__success __retval(0) +int reserve_invalid_region(void *ctx) +{ +#if defined(__BPF_FEATURE_ADDR_SPACE_CAST) + char __arena *page; + int ret; + + /* Try a NULL pointer. */ + ret = bpf_arena_reserve_pages(&arena, NULL, 3); + if (ret != -EINVAL) + return 1; + + page = arena_base(&arena); + + ret = bpf_arena_reserve_pages(&arena, page, 3); + if (ret != -EINVAL) + return 2; + + ret = bpf_arena_reserve_pages(&arena, page, 4096); + if (ret != -EINVAL) + return 3; + + ret = bpf_arena_reserve_pages(&arena, page, (1ULL << 32) - 1); + if (ret != -EINVAL) + return 4; +#endif + return 0; +} + SEC("iter.s/bpf_map") __success __log_level(2) int iter_maps1(struct bpf_iter__bpf_map *ctx) diff --git a/tools/testing/selftests/bpf/progs/verifier_arena_large.c b/tools/testing/selftests/bpf/progs/verifier_arena_large.c index f94f30cf1bb8..9dbdf123542d 100644 --- a/tools/testing/selftests/bpf/progs/verifier_arena_large.c +++ b/tools/testing/selftests/bpf/progs/verifier_arena_large.c @@ -67,6 +67,104 @@ int big_alloc1(void *ctx) return 0; } +/* Try to access a reserved page. Behavior should be identical with accessing unallocated pages. */ +SEC("syscall") +__success __retval(0) +int access_reserved(void *ctx) +{ +#if defined(__BPF_FEATURE_ADDR_SPACE_CAST) + volatile char __arena *page; + char __arena *base; + const size_t len = 4; + int ret, i; + + /* Get a separate region of the arena. */ + page = base = arena_base(&arena) + 16384 * PAGE_SIZE; + + ret = bpf_arena_reserve_pages(&arena, base, len); + if (ret) + return 1; + + /* Try to dirty reserved memory. */ + for (i = 0; i < len && can_loop; i++) + *page = 0x5a; + + for (i = 0; i < len && can_loop; i++) { + page = (volatile char __arena *)(base + i * PAGE_SIZE); + + /* + * Error out in case either the write went through, + * or the address has random garbage. + */ + if (*page == 0x5a) + return 2 + 2 * i; + + if (*page) + return 2 + 2 * i + 1; + } +#endif + return 0; +} + +/* Try to allocate a region overlapping with a reservation. */ +SEC("syscall") +__success __retval(0) +int request_partially_reserved(void *ctx) +{ +#if defined(__BPF_FEATURE_ADDR_SPACE_CAST) + volatile char __arena *page; + char __arena *base; + int ret; + + /* Add an arbitrary page offset. */ + page = base = arena_base(&arena) + 4096 * __PAGE_SIZE; + + ret = bpf_arena_reserve_pages(&arena, base + 3 * __PAGE_SIZE, 4); + if (ret) + return 1; + + page = bpf_arena_alloc_pages(&arena, base, 5, NUMA_NO_NODE, 0); + if ((u64)page != 0ULL) + return 2; +#endif + return 0; +} + +SEC("syscall") +__success __retval(0) +int free_reserved(void *ctx) +{ +#if defined(__BPF_FEATURE_ADDR_SPACE_CAST) + char __arena *addr; + char __arena *page; + int ret; + + /* Add an arbitrary page offset. */ + addr = arena_base(&arena) + 32768 * __PAGE_SIZE; + + page = bpf_arena_alloc_pages(&arena, addr, 2, NUMA_NO_NODE, 0); + if (!page) + return 1; + + ret = bpf_arena_reserve_pages(&arena, addr + 2 * __PAGE_SIZE, 2); + if (ret) + return 2; + + /* + * Reserved and allocated pages should be interchangeable for + * bpf_arena_free_pages(). Free a reserved and an allocated + * page with a single call. + */ + bpf_arena_free_pages(&arena, addr + __PAGE_SIZE , 2); + + /* The free call above should have succeeded, so this allocation should too. */ + page = bpf_arena_alloc_pages(&arena, addr + __PAGE_SIZE, 2, NUMA_NO_NODE, 0); + if (!page) + return 3; +#endif + return 0; +} + #if defined(__BPF_FEATURE_ADDR_SPACE_CAST) #define PAGE_CNT 100 __u8 __arena * __arena page[PAGE_CNT]; /* occupies the first page */ diff --git a/tools/testing/selftests/bpf/progs/verifier_bounds.c b/tools/testing/selftests/bpf/progs/verifier_bounds.c index 0eb33bb801b5..87a2c60d86e6 100644 --- a/tools/testing/selftests/bpf/progs/verifier_bounds.c +++ b/tools/testing/selftests/bpf/progs/verifier_bounds.c @@ -2,6 +2,7 @@ /* Converted from tools/testing/selftests/bpf/verifier/bounds.c */ #include <linux/bpf.h> +#include <../../../include/linux/filter.h> #include <bpf/bpf_helpers.h> #include "bpf_misc.h" @@ -620,8 +621,14 @@ l1_%=: exit; \ SEC("socket") __description("bounds check mixed 32bit and 64bit arithmetic. test1") -__success __failure_unpriv __msg_unpriv("R0 invalid mem access 'scalar'") +__success __success_unpriv __retval(0) +#ifdef SPEC_V1 +__xlated_unpriv("goto pc+2") +__xlated_unpriv("nospec") /* inserted to prevent `R0 invalid mem access 'scalar'` */ +__xlated_unpriv("goto pc-1") /* sanitized dead code */ +__xlated_unpriv("exit") +#endif __naked void _32bit_and_64bit_arithmetic_test1(void) { asm volatile (" \ @@ -643,8 +650,14 @@ l1_%=: exit; \ SEC("socket") __description("bounds check mixed 32bit and 64bit arithmetic. test2") -__success __failure_unpriv __msg_unpriv("R0 invalid mem access 'scalar'") +__success __success_unpriv __retval(0) +#ifdef SPEC_V1 +__xlated_unpriv("goto pc+2") +__xlated_unpriv("nospec") /* inserted to prevent `R0 invalid mem access 'scalar'` */ +__xlated_unpriv("goto pc-1") /* sanitized dead code */ +__xlated_unpriv("exit") +#endif __naked void _32bit_and_64bit_arithmetic_test2(void) { asm volatile (" \ @@ -691,9 +704,14 @@ l0_%=: r0 = 0; \ SEC("socket") __description("bounds check for reg = 0, reg xor 1") -__success __failure_unpriv -__msg_unpriv("R0 min value is outside of the allowed memory range") +__success __success_unpriv __retval(0) +#ifdef SPEC_V1 +__xlated_unpriv("if r1 != 0x0 goto pc+2") +__xlated_unpriv("nospec") /* inserted to prevent `R0 min value is outside of the allowed memory range` */ +__xlated_unpriv("goto pc-1") /* sanitized dead code */ +__xlated_unpriv("r0 = 0") +#endif __naked void reg_0_reg_xor_1(void) { asm volatile (" \ @@ -719,9 +737,14 @@ l1_%=: r0 = 0; \ SEC("socket") __description("bounds check for reg32 = 0, reg32 xor 1") -__success __failure_unpriv -__msg_unpriv("R0 min value is outside of the allowed memory range") +__success __success_unpriv __retval(0) +#ifdef SPEC_V1 +__xlated_unpriv("if w1 != 0x0 goto pc+2") +__xlated_unpriv("nospec") /* inserted to prevent `R0 min value is outside of the allowed memory range` */ +__xlated_unpriv("goto pc-1") /* sanitized dead code */ +__xlated_unpriv("r0 = 0") +#endif __naked void reg32_0_reg32_xor_1(void) { asm volatile (" \ @@ -747,9 +770,14 @@ l1_%=: r0 = 0; \ SEC("socket") __description("bounds check for reg = 2, reg xor 3") -__success __failure_unpriv -__msg_unpriv("R0 min value is outside of the allowed memory range") +__success __success_unpriv __retval(0) +#ifdef SPEC_V1 +__xlated_unpriv("if r1 > 0x0 goto pc+2") +__xlated_unpriv("nospec") /* inserted to prevent `R0 min value is outside of the allowed memory range` */ +__xlated_unpriv("goto pc-1") /* sanitized dead code */ +__xlated_unpriv("r0 = 0") +#endif __naked void reg_2_reg_xor_3(void) { asm volatile (" \ @@ -829,9 +857,14 @@ l1_%=: r0 = 0; \ SEC("socket") __description("bounds check for reg > 0, reg xor 3") -__success __failure_unpriv -__msg_unpriv("R0 min value is outside of the allowed memory range") +__success __success_unpriv __retval(0) +#ifdef SPEC_V1 +__xlated_unpriv("if r1 >= 0x0 goto pc+2") +__xlated_unpriv("nospec") /* inserted to prevent `R0 min value is outside of the allowed memory range` */ +__xlated_unpriv("goto pc-1") /* sanitized dead code */ +__xlated_unpriv("r0 = 0") +#endif __naked void reg_0_reg_xor_3(void) { asm volatile (" \ @@ -858,9 +891,14 @@ l1_%=: r0 = 0; \ SEC("socket") __description("bounds check for reg32 > 0, reg32 xor 3") -__success __failure_unpriv -__msg_unpriv("R0 min value is outside of the allowed memory range") +__success __success_unpriv __retval(0) +#ifdef SPEC_V1 +__xlated_unpriv("if w1 >= 0x0 goto pc+2") +__xlated_unpriv("nospec") /* inserted to prevent `R0 min value is outside of the allowed memory range` */ +__xlated_unpriv("goto pc-1") /* sanitized dead code */ +__xlated_unpriv("r0 = 0") +#endif __naked void reg32_0_reg32_xor_3(void) { asm volatile (" \ @@ -1028,7 +1066,7 @@ l0_%=: r0 = 0; \ SEC("xdp") __description("bound check with JMP_JSLT for crossing 64-bit signed boundary") __success __retval(0) -__flag(!BPF_F_TEST_REG_INVARIANTS) /* known invariants violation */ +__flag(BPF_F_TEST_REG_INVARIANTS) __naked void crossing_64_bit_signed_boundary_2(void) { asm volatile (" \ @@ -1334,4 +1372,300 @@ __naked void mult_sign_ovf(void) __imm(bpf_skb_store_bytes) : __clobber_all); } + +SEC("socket") +__description("64-bit addition, all outcomes overflow") +__success __log_level(2) +__msg("5: (0f) r3 += r3 {{.*}} R3_w=scalar(umin=0x4000000000000000,umax=0xfffffffffffffffe)") +__retval(0) +__naked void add64_full_overflow(void) +{ + asm volatile ( + "call %[bpf_get_prandom_u32];" + "r4 = r0;" + "r3 = 0xa000000000000000 ll;" + "r3 |= r4;" + "r3 += r3;" + "r0 = 0;" + "exit" + : + : __imm(bpf_get_prandom_u32) + : __clobber_all); +} + +SEC("socket") +__description("64-bit addition, partial overflow, result in unbounded reg") +__success __log_level(2) +__msg("4: (0f) r3 += r3 {{.*}} R3_w=scalar()") +__retval(0) +__naked void add64_partial_overflow(void) +{ + asm volatile ( + "call %[bpf_get_prandom_u32];" + "r4 = r0;" + "r3 = 2;" + "r3 |= r4;" + "r3 += r3;" + "r0 = 0;" + "exit" + : + : __imm(bpf_get_prandom_u32) + : __clobber_all); +} + +SEC("socket") +__description("32-bit addition overflow, all outcomes overflow") +__success __log_level(2) +__msg("4: (0c) w3 += w3 {{.*}} R3_w=scalar(smin=umin=umin32=0x40000000,smax=umax=umax32=0xfffffffe,var_off=(0x0; 0xffffffff))") +__retval(0) +__naked void add32_full_overflow(void) +{ + asm volatile ( + "call %[bpf_get_prandom_u32];" + "w4 = w0;" + "w3 = 0xa0000000;" + "w3 |= w4;" + "w3 += w3;" + "r0 = 0;" + "exit" + : + : __imm(bpf_get_prandom_u32) + : __clobber_all); +} + +SEC("socket") +__description("32-bit addition, partial overflow, result in unbounded u32 bounds") +__success __log_level(2) +__msg("4: (0c) w3 += w3 {{.*}} R3_w=scalar(smin=0,smax=umax=0xffffffff,var_off=(0x0; 0xffffffff))") +__retval(0) +__naked void add32_partial_overflow(void) +{ + asm volatile ( + "call %[bpf_get_prandom_u32];" + "w4 = w0;" + "w3 = 2;" + "w3 |= w4;" + "w3 += w3;" + "r0 = 0;" + "exit" + : + : __imm(bpf_get_prandom_u32) + : __clobber_all); +} + +SEC("socket") +__description("64-bit subtraction, all outcomes underflow") +__success __log_level(2) +__msg("6: (1f) r3 -= r1 {{.*}} R3_w=scalar(umin=1,umax=0x8000000000000000)") +__retval(0) +__naked void sub64_full_overflow(void) +{ + asm volatile ( + "call %[bpf_get_prandom_u32];" + "r1 = r0;" + "r2 = 0x8000000000000000 ll;" + "r1 |= r2;" + "r3 = 0;" + "r3 -= r1;" + "r0 = 0;" + "exit" + : + : __imm(bpf_get_prandom_u32) + : __clobber_all); +} + +SEC("socket") +__description("64-bit subtraction, partial overflow, result in unbounded reg") +__success __log_level(2) +__msg("3: (1f) r3 -= r2 {{.*}} R3_w=scalar()") +__retval(0) +__naked void sub64_partial_overflow(void) +{ + asm volatile ( + "call %[bpf_get_prandom_u32];" + "r3 = r0;" + "r2 = 1;" + "r3 -= r2;" + "r0 = 0;" + "exit" + : + : __imm(bpf_get_prandom_u32) + : __clobber_all); +} + +SEC("socket") +__description("32-bit subtraction overflow, all outcomes underflow") +__success __log_level(2) +__msg("5: (1c) w3 -= w1 {{.*}} R3_w=scalar(smin=umin=umin32=1,smax=umax=umax32=0x80000000,var_off=(0x0; 0xffffffff))") +__retval(0) +__naked void sub32_full_overflow(void) +{ + asm volatile ( + "call %[bpf_get_prandom_u32];" + "w1 = w0;" + "w2 = 0x80000000;" + "w1 |= w2;" + "w3 = 0;" + "w3 -= w1;" + "r0 = 0;" + "exit" + : + : __imm(bpf_get_prandom_u32) + : __clobber_all); +} + +SEC("socket") +__description("32-bit subtraction, partial overflow, result in unbounded u32 bounds") +__success __log_level(2) +__msg("3: (1c) w3 -= w2 {{.*}} R3_w=scalar(smin=0,smax=umax=0xffffffff,var_off=(0x0; 0xffffffff))") +__retval(0) +__naked void sub32_partial_overflow(void) +{ + asm volatile ( + "call %[bpf_get_prandom_u32];" + "w3 = w0;" + "w2 = 1;" + "w3 -= w2;" + "r0 = 0;" + "exit" + : + : __imm(bpf_get_prandom_u32) + : __clobber_all); +} + +SEC("socket") +__description("dead branch on jset, does not result in invariants violation error") +__success __log_level(2) +__retval(0) __flag(BPF_F_TEST_REG_INVARIANTS) +__naked void jset_range_analysis(void) +{ + asm volatile (" \ + call %[bpf_get_netns_cookie]; \ + if r0 == 0 goto l0_%=; \ + if r0 & 0xffffffff goto +0; \ +l0_%=: r0 = 0; \ + exit; \ +" : + : __imm(bpf_get_netns_cookie) + : __clobber_all); +} + +/* This test covers the bounds deduction on 64bits when the s64 and u64 ranges + * overlap on the negative side. At instruction 7, the ranges look as follows: + * + * 0 umin=0xfffffcf1 umax=0xff..ff6e U64_MAX + * | [xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx] | + * |----------------------------|------------------------------| + * |xxxxxxxxxx] [xxxxxxxxxxxx| + * 0 smax=0xeffffeee smin=-655 -1 + * + * We should therefore deduce the following new bounds: + * + * 0 u64=[0xff..ffd71;0xff..ff6e] U64_MAX + * | [xxx] | + * |----------------------------|------------------------------| + * | [xxx] | + * 0 s64=[-655;-146] -1 + * + * Without the deduction cross sign boundary, we end up with an invariant + * violation error. + */ +SEC("socket") +__description("bounds deduction cross sign boundary, negative overlap") +__success __log_level(2) __flag(BPF_F_TEST_REG_INVARIANTS) +__msg("7: (1f) r0 -= r6 {{.*}} R0=scalar(smin=smin32=-655,smax=smax32=-146,umin=0xfffffffffffffd71,umax=0xffffffffffffff6e,umin32=0xfffffd71,umax32=0xffffff6e,var_off=(0xfffffffffffffc00; 0x3ff))") +__retval(0) +__naked void bounds_deduct_negative_overlap(void) +{ + asm volatile(" \ + call %[bpf_get_prandom_u32]; \ + w3 = w0; \ + w6 = (s8)w0; \ + r0 = (s8)r0; \ + if w6 >= 0xf0000000 goto l0_%=; \ + r0 += r6; \ + r6 += 400; \ + r0 -= r6; \ + if r3 < r0 goto l0_%=; \ +l0_%=: r0 = 0; \ + exit; \ +" : + : __imm(bpf_get_prandom_u32) + : __clobber_all); +} + +/* This test covers the bounds deduction on 64bits when the s64 and u64 ranges + * overlap on the positive side. At instruction 3, the ranges look as follows: + * + * 0 umin=0 umax=0xffffffffffffff00 U64_MAX + * [xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx] | + * |----------------------------|------------------------------| + * |xxxxxxxx] [xxxxxxxx| + * 0 smax=127 smin=-128 -1 + * + * We should therefore deduce the following new bounds: + * + * 0 u64=[0;127] U64_MAX + * [xxxxxxxx] | + * |----------------------------|------------------------------| + * [xxxxxxxx] | + * 0 s64=[0;127] -1 + * + * Without the deduction cross sign boundary, the program is rejected due to + * the frame pointer write. + */ +SEC("socket") +__description("bounds deduction cross sign boundary, positive overlap") +__success __log_level(2) __flag(BPF_F_TEST_REG_INVARIANTS) +__msg("3: (2d) if r0 > r1 {{.*}} R0_w=scalar(smin=smin32=0,smax=umax=smax32=umax32=127,var_off=(0x0; 0x7f))") +__retval(0) +__naked void bounds_deduct_positive_overlap(void) +{ + asm volatile(" \ + call %[bpf_get_prandom_u32]; \ + r0 = (s8)r0; \ + r1 = 0xffffffffffffff00; \ + if r0 > r1 goto l0_%=; \ + if r0 < 128 goto l0_%=; \ + r10 = 0; \ +l0_%=: r0 = 0; \ + exit; \ +" : + : __imm(bpf_get_prandom_u32) + : __clobber_all); +} + +/* This test is the same as above, but the s64 and u64 ranges overlap in two + * places. At instruction 3, the ranges look as follows: + * + * 0 umin=0 umax=0xffffffffffffff80 U64_MAX + * [xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx] | + * |----------------------------|------------------------------| + * |xxxxxxxx] [xxxxxxxx| + * 0 smax=127 smin=-128 -1 + * + * 0xffffffffffffff80 = (u64)-128. We therefore can't deduce anything new and + * the program should fail due to the frame pointer write. + */ +SEC("socket") +__description("bounds deduction cross sign boundary, two overlaps") +__failure __flag(BPF_F_TEST_REG_INVARIANTS) +__msg("3: (2d) if r0 > r1 {{.*}} R0_w=scalar(smin=smin32=-128,smax=smax32=127,umax=0xffffffffffffff80)") +__msg("frame pointer is read only") +__naked void bounds_deduct_two_overlaps(void) +{ + asm volatile(" \ + call %[bpf_get_prandom_u32]; \ + r0 = (s8)r0; \ + r1 = 0xffffffffffffff80; \ + if r0 > r1 goto l0_%=; \ + if r0 < 128 goto l0_%=; \ + r10 = 0; \ +l0_%=: r0 = 0; \ + exit; \ +" : + : __imm(bpf_get_prandom_u32) + : __clobber_all); +} + char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/verifier_bounds_deduction.c b/tools/testing/selftests/bpf/progs/verifier_bounds_deduction.c index c506afbdd936..260a6df264e3 100644 --- a/tools/testing/selftests/bpf/progs/verifier_bounds_deduction.c +++ b/tools/testing/selftests/bpf/progs/verifier_bounds_deduction.c @@ -159,13 +159,16 @@ __failure_unpriv __naked void deducing_bounds_from_const_10(void) { asm volatile (" \ + r6 = r1; \ r0 = 0; \ if r0 s<= 0 goto l0_%=; \ -l0_%=: /* Marks reg as unknown. */ \ - r0 = -r0; \ - r0 -= r1; \ +l0_%=: /* Marks r0 as unknown. */ \ + call %[bpf_get_prandom_u32]; \ + r0 -= r6; \ exit; \ -" ::: __clobber_all); +" : + : __imm(bpf_get_prandom_u32) + : __clobber_all); } char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/verifier_ctx.c b/tools/testing/selftests/bpf/progs/verifier_ctx.c index a83809a1dbbf..0450840c92d9 100644 --- a/tools/testing/selftests/bpf/progs/verifier_ctx.c +++ b/tools/testing/selftests/bpf/progs/verifier_ctx.c @@ -218,4 +218,29 @@ __naked void null_check_8_null_bind(void) : __clobber_all); } +#define narrow_load(type, ctx, field) \ + SEC(type) \ + __description("narrow load on field " #field " of " #ctx) \ + __failure __msg("invalid bpf_context access") \ + __naked void invalid_narrow_load##ctx##field(void) \ + { \ + asm volatile (" \ + r1 = *(u32 *)(r1 + %[off]); \ + r0 = 0; \ + exit;" \ + : \ + : __imm_const(off, offsetof(struct ctx, field) + 4) \ + : __clobber_all); \ + } + +narrow_load("cgroup/getsockopt", bpf_sockopt, sk); +narrow_load("cgroup/getsockopt", bpf_sockopt, optval); +narrow_load("cgroup/getsockopt", bpf_sockopt, optval_end); +narrow_load("tc", __sk_buff, sk); +narrow_load("cgroup/bind4", bpf_sock_addr, sk); +narrow_load("sockops", bpf_sock_ops, sk); +narrow_load("sockops", bpf_sock_ops, skb_data); +narrow_load("sockops", bpf_sock_ops, skb_data_end); +narrow_load("sockops", bpf_sock_ops, skb_hwtstamp); + char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/verifier_div_overflow.c b/tools/testing/selftests/bpf/progs/verifier_div_overflow.c index 458984da804c..34e0c012ee76 100644 --- a/tools/testing/selftests/bpf/progs/verifier_div_overflow.c +++ b/tools/testing/selftests/bpf/progs/verifier_div_overflow.c @@ -77,7 +77,7 @@ l0_%=: exit; \ SEC("tc") __description("MOD32 overflow, check 1") -__success __retval(INT_MIN) +__success __retval(_INT_MIN) __naked void mod32_overflow_check_1(void) { asm volatile (" \ @@ -92,7 +92,7 @@ __naked void mod32_overflow_check_1(void) SEC("tc") __description("MOD32 overflow, check 2") -__success __retval(INT_MIN) +__success __retval(_INT_MIN) __naked void mod32_overflow_check_2(void) { asm volatile (" \ diff --git a/tools/testing/selftests/bpf/progs/verifier_global_ptr_args.c b/tools/testing/selftests/bpf/progs/verifier_global_ptr_args.c index 4ab0ef18d7eb..181da86ba5f0 100644 --- a/tools/testing/selftests/bpf/progs/verifier_global_ptr_args.c +++ b/tools/testing/selftests/bpf/progs/verifier_global_ptr_args.c @@ -179,4 +179,132 @@ int BPF_PROG(trusted_acq_rel, struct task_struct *task, u64 clone_flags) return subprog_trusted_acq_rel(task); } +__weak int subprog_untrusted_bad_tags(struct task_struct *task __arg_untrusted __arg_nullable) +{ + return task->pid; +} + +SEC("tp_btf/sys_enter") +__failure +__msg("arg#0 untrusted cannot be combined with any other tags") +int untrusted_bad_tags(void *ctx) +{ + return subprog_untrusted_bad_tags(0); +} + +struct local_type_wont_be_accepted {}; + +__weak int subprog_untrusted_bad_type(struct local_type_wont_be_accepted *p __arg_untrusted) +{ + return 0; +} + +SEC("tp_btf/sys_enter") +__failure +__msg("arg#0 reference type('STRUCT local_type_wont_be_accepted') has no matches") +int untrusted_bad_type(void *ctx) +{ + return subprog_untrusted_bad_type(bpf_rdonly_cast(0, 0)); +} + +__weak int subprog_untrusted(const volatile struct task_struct *restrict task __arg_untrusted) +{ + return task->pid; +} + +SEC("tp_btf/sys_enter") +__success +__log_level(2) +__msg("r1 = {{.*}}; {{.*}}R1_w=trusted_ptr_task_struct()") +__msg("Func#1 ('subprog_untrusted') is global and assumed valid.") +__msg("Validating subprog_untrusted() func#1...") +__msg(": R1=untrusted_ptr_task_struct") +int trusted_to_untrusted(void *ctx) +{ + return subprog_untrusted(bpf_get_current_task_btf()); +} + +char mem[16]; +u32 off; + +SEC("tp_btf/sys_enter") +__success +int anything_to_untrusted(void *ctx) +{ + /* untrusted to untrusted */ + subprog_untrusted(bpf_core_cast(0, struct task_struct)); + /* wrong type to untrusted */ + subprog_untrusted((void *)bpf_core_cast(0, struct bpf_verifier_env)); + /* map value to untrusted */ + subprog_untrusted((void *)mem); + /* scalar to untrusted */ + subprog_untrusted(0); + /* variable offset to untrusted (map) */ + subprog_untrusted((void *)mem + off); + /* variable offset to untrusted (trusted) */ + subprog_untrusted((void *)bpf_get_current_task_btf() + off); + return 0; +} + +__weak int subprog_untrusted2(struct task_struct *task __arg_untrusted) +{ + return subprog_trusted_task_nullable(task); +} + +SEC("tp_btf/sys_enter") +__failure +__msg("R1 type=untrusted_ptr_ expected=ptr_, trusted_ptr_, rcu_ptr_") +__msg("Caller passes invalid args into func#{{.*}} ('subprog_trusted_task_nullable')") +int untrusted_to_trusted(void *ctx) +{ + return subprog_untrusted2(bpf_get_current_task_btf()); +} + +__weak int subprog_void_untrusted(void *p __arg_untrusted) +{ + return *(int *)p; +} + +__weak int subprog_char_untrusted(char *p __arg_untrusted) +{ + return *(int *)p; +} + +__weak int subprog_enum_untrusted(enum bpf_attach_type *p __arg_untrusted) +{ + return *(int *)p; +} + +SEC("tp_btf/sys_enter") +__success +__log_level(2) +__msg("r1 = {{.*}}; {{.*}}R1_w=trusted_ptr_task_struct()") +__msg("Func#1 ('subprog_void_untrusted') is global and assumed valid.") +__msg("Validating subprog_void_untrusted() func#1...") +__msg(": R1=rdonly_untrusted_mem(sz=0)") +int trusted_to_untrusted_mem(void *ctx) +{ + return subprog_void_untrusted(bpf_get_current_task_btf()); +} + +SEC("tp_btf/sys_enter") +__success +int anything_to_untrusted_mem(void *ctx) +{ + /* untrusted to untrusted mem */ + subprog_void_untrusted(bpf_core_cast(0, struct task_struct)); + /* map value to untrusted mem */ + subprog_void_untrusted(mem); + /* scalar to untrusted mem */ + subprog_void_untrusted(0); + /* variable offset to untrusted mem (map) */ + subprog_void_untrusted((void *)mem + off); + /* variable offset to untrusted mem (trusted) */ + subprog_void_untrusted(bpf_get_current_task_btf() + off); + /* variable offset to untrusted char/enum (map) */ + subprog_char_untrusted(mem + off); + subprog_enum_untrusted((void *)mem + off); + return 0; +} + char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/verifier_map_in_map.c b/tools/testing/selftests/bpf/progs/verifier_map_in_map.c index 7d088ba99ea5..16b761e510f0 100644 --- a/tools/testing/selftests/bpf/progs/verifier_map_in_map.c +++ b/tools/testing/selftests/bpf/progs/verifier_map_in_map.c @@ -139,4 +139,122 @@ __naked void on_the_inner_map_pointer(void) : __clobber_all); } +SEC("socket") +__description("map_ptr is never null") +__success +__naked void map_ptr_is_never_null(void) +{ + asm volatile (" \ + r0 = 0; \ + r1 = %[map_in_map] ll; \ + if r1 != 0 goto l0_%=; \ + r10 = 42; \ +l0_%=: exit; \ +" : + : __imm(bpf_map_lookup_elem), + __imm_addr(map_in_map) + : __clobber_all); +} + +SEC("socket") +__description("map_ptr is never null inner") +__success +__naked void map_ptr_is_never_null_inner(void) +{ + asm volatile (" \ + r1 = 0; \ + *(u32*)(r10 - 4) = r1; \ + r2 = r10; \ + r2 += -4; \ + r1 = %[map_in_map] ll; \ + call %[bpf_map_lookup_elem]; \ + if r0 == 0 goto l0_%=; \ + if r0 != 0 goto l0_%=; \ + r10 = 42; \ +l0_%=: exit; \ +" : + : __imm(bpf_map_lookup_elem), + __imm_addr(map_in_map) + : __clobber_all); +} + +SEC("socket") +__description("map_ptr is never null inner spill fill") +__success +__naked void map_ptr_is_never_null_inner_spill_fill(void) +{ + asm volatile (" \ + r1 = 0; \ + *(u32*)(r10 - 4) = r1; \ + r2 = r10; \ + r2 += -4; \ + r1 = %[map_in_map] ll; \ + call %[bpf_map_lookup_elem]; \ + if r0 != 0 goto l0_%=; \ + exit; \ +l0_%=: *(u64 *)(r10 -16) = r0; \ + r1 = *(u64 *)(r10 -16); \ + if r1 == 0 goto l1_%=; \ + exit; \ +l1_%=: r10 = 42; \ + exit; \ +" : + : __imm(bpf_map_lookup_elem), + __imm_addr(map_in_map) + : __clobber_all); +} + +struct { + __uint(type, BPF_MAP_TYPE_ARRAY_OF_MAPS); + __uint(max_entries, 1); + __type(key, int); + __type(value, int); + __array(values, struct { + __uint(type, BPF_MAP_TYPE_RINGBUF); + __uint(max_entries, 64 * 1024); + }); +} rb_in_map SEC(".maps"); + +struct rb_ctx { + void *rb; + struct bpf_dynptr dptr; +}; + +static __always_inline struct rb_ctx __rb_event_reserve(__u32 sz) +{ + struct rb_ctx rb_ctx = {}; + void *rb; + __u32 cpu = bpf_get_smp_processor_id(); + __u32 rb_slot = cpu & 1; + + rb = bpf_map_lookup_elem(&rb_in_map, &rb_slot); + if (!rb) + return rb_ctx; + + rb_ctx.rb = rb; + bpf_ringbuf_reserve_dynptr(rb, sz, 0, &rb_ctx.dptr); + + return rb_ctx; +} + +static __noinline void __rb_event_submit(struct rb_ctx *ctx) +{ + if (!ctx->rb) + return; + + /* If the verifier (incorrectly) concludes that ctx->rb can be + * NULL at this point, we'll get "BPF_EXIT instruction in main + * prog would lead to reference leak" error + */ + bpf_ringbuf_submit_dynptr(&ctx->dptr, 0); +} + +SEC("socket") +int map_ptr_is_never_null_rb(void *ctx) +{ + struct rb_ctx event_ctx = __rb_event_reserve(256); + __rb_event_submit(&event_ctx); + return 0; +} + char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/verifier_movsx.c b/tools/testing/selftests/bpf/progs/verifier_movsx.c index 994bbc346d25..a4d8814eb5ed 100644 --- a/tools/testing/selftests/bpf/progs/verifier_movsx.c +++ b/tools/testing/selftests/bpf/progs/verifier_movsx.c @@ -245,7 +245,13 @@ l0_%=: \ SEC("socket") __description("MOV32SX, S8, var_off not u32_max, positive after s8 extension") __success __retval(0) -__failure_unpriv __msg_unpriv("frame pointer is read only") +__success_unpriv +#ifdef SPEC_V1 +__xlated_unpriv("w0 = 0") +__xlated_unpriv("exit") +__xlated_unpriv("nospec") /* inserted to prevent `frame pointer is read only` */ +__xlated_unpriv("goto pc-1") +#endif __naked void mov64sx_s32_varoff_2(void) { asm volatile (" \ @@ -267,7 +273,13 @@ l0_%=: \ SEC("socket") __description("MOV32SX, S8, var_off not u32_max, negative after s8 extension") __success __retval(0) -__failure_unpriv __msg_unpriv("frame pointer is read only") +__success_unpriv +#ifdef SPEC_V1 +__xlated_unpriv("w0 = 0") +__xlated_unpriv("exit") +__xlated_unpriv("nospec") /* inserted to prevent `frame pointer is read only` */ +__xlated_unpriv("goto pc-1") +#endif __naked void mov64sx_s32_varoff_3(void) { asm volatile (" \ diff --git a/tools/testing/selftests/bpf/progs/verifier_precision.c b/tools/testing/selftests/bpf/progs/verifier_precision.c index 9fe5d255ee37..73fee2aec698 100644 --- a/tools/testing/selftests/bpf/progs/verifier_precision.c +++ b/tools/testing/selftests/bpf/progs/verifier_precision.c @@ -231,4 +231,74 @@ __naked void bpf_cond_op_not_r10(void) ::: __clobber_all); } +SEC("lsm.s/socket_connect") +__success __log_level(2) +__msg("0: (b7) r0 = 1 ; R0_w=1") +__msg("1: (84) w0 = -w0 ; R0_w=0xffffffff") +__msg("mark_precise: frame0: last_idx 2 first_idx 0 subseq_idx -1") +__msg("mark_precise: frame0: regs=r0 stack= before 1: (84) w0 = -w0") +__msg("mark_precise: frame0: regs=r0 stack= before 0: (b7) r0 = 1") +__naked int bpf_neg_2(void) +{ + /* + * lsm.s/socket_connect requires a return value within [-4095, 0]. + * Returning -1 is allowed + */ + asm volatile ( + "r0 = 1;" + "w0 = -w0;" + "exit;" + ::: __clobber_all); +} + +SEC("lsm.s/socket_connect") +__failure __msg("At program exit the register R0 has") +__naked int bpf_neg_3(void) +{ + /* + * lsm.s/socket_connect requires a return value within [-4095, 0]. + * Returning -10000 is not allowed. + */ + asm volatile ( + "r0 = 10000;" + "w0 = -w0;" + "exit;" + ::: __clobber_all); +} + +SEC("lsm.s/socket_connect") +__success __log_level(2) +__msg("0: (b7) r0 = 1 ; R0_w=1") +__msg("1: (87) r0 = -r0 ; R0_w=-1") +__msg("mark_precise: frame0: last_idx 2 first_idx 0 subseq_idx -1") +__msg("mark_precise: frame0: regs=r0 stack= before 1: (87) r0 = -r0") +__msg("mark_precise: frame0: regs=r0 stack= before 0: (b7) r0 = 1") +__naked int bpf_neg_4(void) +{ + /* + * lsm.s/socket_connect requires a return value within [-4095, 0]. + * Returning -1 is allowed + */ + asm volatile ( + "r0 = 1;" + "r0 = -r0;" + "exit;" + ::: __clobber_all); +} + +SEC("lsm.s/socket_connect") +__failure __msg("At program exit the register R0 has") +__naked int bpf_neg_5(void) +{ + /* + * lsm.s/socket_connect requires a return value within [-4095, 0]. + * Returning -10000 is not allowed. + */ + asm volatile ( + "r0 = 10000;" + "r0 = -r0;" + "exit;" + ::: __clobber_all); +} + char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/verifier_private_stack.c b/tools/testing/selftests/bpf/progs/verifier_private_stack.c index fc91b414364e..1ecd34ebde19 100644 --- a/tools/testing/selftests/bpf/progs/verifier_private_stack.c +++ b/tools/testing/selftests/bpf/progs/verifier_private_stack.c @@ -8,7 +8,7 @@ /* From include/linux/filter.h */ #define MAX_BPF_STACK 512 -#if defined(__TARGET_ARCH_x86) +#if defined(__TARGET_ARCH_x86) || defined(__TARGET_ARCH_arm64) struct elem { struct bpf_timer t; @@ -30,6 +30,18 @@ __jited(" movabsq $0x{{.*}}, %r9") __jited(" addq %gs:{{.*}}, %r9") __jited(" movl $0x2a, %edi") __jited(" movq %rdi, -0x100(%r9)") +__arch_arm64 +__jited(" stp x25, x27, [sp, {{.*}}]!") +__jited(" mov x27, {{.*}}") +__jited(" movk x27, {{.*}}, lsl #16") +__jited(" movk x27, {{.*}}") +__jited(" mrs x10, TPIDR_EL{{[0-1]}}") +__jited(" add x27, x27, x10") +__jited(" add x25, x27, {{.*}}") +__jited(" mov x0, #0x2a") +__jited(" str x0, [x27]") +__jited("...") +__jited(" ldp x25, x27, [sp], {{.*}}") __naked void private_stack_single_prog(void) { asm volatile (" \ @@ -45,6 +57,9 @@ __description("No private stack") __success __arch_x86_64 __jited(" subq $0x8, %rsp") +__arch_arm64 +__jited(" mov x25, sp") +__jited(" sub sp, sp, #0x10") __naked void no_private_stack_nested(void) { asm volatile (" \ @@ -81,6 +96,19 @@ __jited(" pushq %r9") __jited(" callq 0x{{.*}}") __jited(" popq %r9") __jited(" xorl %eax, %eax") +__arch_arm64 +__jited(" stp x25, x27, [sp, {{.*}}]!") +__jited(" mov x27, {{.*}}") +__jited(" movk x27, {{.*}}, lsl #16") +__jited(" movk x27, {{.*}}") +__jited(" mrs x10, TPIDR_EL{{[0-1]}}") +__jited(" add x27, x27, x10") +__jited(" add x25, x27, {{.*}}") +__jited(" mov x0, #0x2a") +__jited(" str x0, [x27]") +__jited(" bl {{.*}}") +__jited("...") +__jited(" ldp x25, x27, [sp], {{.*}}") __naked void private_stack_nested_1(void) { asm volatile (" \ @@ -131,6 +159,24 @@ __jited(" movq %rdi, -0x200(%r9)") __jited(" pushq %r9") __jited(" callq") __jited(" popq %r9") +__arch_arm64 +__jited("func #1") +__jited("...") +__jited(" stp x25, x27, [sp, {{.*}}]!") +__jited(" mov x27, {{.*}}") +__jited(" movk x27, {{.*}}, lsl #16") +__jited(" movk x27, {{.*}}") +__jited(" mrs x10, TPIDR_EL{{[0-1]}}") +__jited(" add x27, x27, x10") +__jited(" add x25, x27, {{.*}}") +__jited(" bl 0x{{.*}}") +__jited(" add x7, x0, #0x0") +__jited(" mov x0, #0x2a") +__jited(" str x0, [x27]") +__jited(" bl 0x{{.*}}") +__jited(" add x7, x0, #0x0") +__jited(" mov x7, #0x0") +__jited(" ldp x25, x27, [sp], {{.*}}") __naked void private_stack_callback(void) { asm volatile (" \ @@ -154,6 +200,28 @@ __arch_x86_64 __jited(" pushq %r9") __jited(" callq") __jited(" popq %r9") +__arch_arm64 +__jited(" stp x29, x30, [sp, #-0x10]!") +__jited(" mov x29, sp") +__jited(" stp xzr, x26, [sp, #-0x10]!") +__jited(" mov x26, sp") +__jited(" stp x19, x20, [sp, #-0x10]!") +__jited(" stp x21, x22, [sp, #-0x10]!") +__jited(" stp x23, x24, [sp, #-0x10]!") +__jited(" stp x25, x26, [sp, #-0x10]!") +__jited(" stp x27, x28, [sp, #-0x10]!") +__jited(" mov x27, {{.*}}") +__jited(" movk x27, {{.*}}, lsl #16") +__jited(" movk x27, {{.*}}") +__jited(" mrs x10, TPIDR_EL{{[0-1]}}") +__jited(" add x27, x27, x10") +__jited(" add x25, x27, {{.*}}") +__jited(" mov x0, #0x2a") +__jited(" str x0, [x27]") +__jited(" mov x0, #0x0") +__jited(" bl 0x{{.*}}") +__jited(" add x7, x0, #0x0") +__jited(" ldp x27, x28, [sp], #0x10") int private_stack_exception_main_prog(void) { asm volatile (" \ @@ -179,6 +247,19 @@ __jited(" movq %rdi, -0x200(%r9)") __jited(" pushq %r9") __jited(" callq") __jited(" popq %r9") +__arch_arm64 +__jited(" stp x27, x28, [sp, #-0x10]!") +__jited(" mov x27, {{.*}}") +__jited(" movk x27, {{.*}}, lsl #16") +__jited(" movk x27, {{.*}}") +__jited(" mrs x10, TPIDR_EL{{[0-1]}}") +__jited(" add x27, x27, x10") +__jited(" add x25, x27, {{.*}}") +__jited(" mov x0, #0x2a") +__jited(" str x0, [x27]") +__jited(" bl 0x{{.*}}") +__jited(" add x7, x0, #0x0") +__jited(" ldp x27, x28, [sp], #0x10") int private_stack_exception_sub_prog(void) { asm volatile (" \ @@ -220,6 +301,10 @@ __description("Private stack, async callback, not nested") __success __retval(0) __arch_x86_64 __jited(" movabsq $0x{{.*}}, %r9") +__arch_arm64 +__jited(" mrs x10, TPIDR_EL{{[0-1]}}") +__jited(" add x27, x27, x10") +__jited(" add x25, x27, {{.*}}") int private_stack_async_callback_1(void) { struct bpf_timer *arr_timer; @@ -241,6 +326,8 @@ __description("Private stack, async callback, potential nesting") __success __retval(0) __arch_x86_64 __jited(" subq $0x100, %rsp") +__arch_arm64 +__jited(" sub sp, sp, #0x100") int private_stack_async_callback_2(void) { struct bpf_timer *arr_timer; diff --git a/tools/testing/selftests/bpf/progs/verifier_ref_tracking.c b/tools/testing/selftests/bpf/progs/verifier_ref_tracking.c index 683a882b3e6d..910365201f68 100644 --- a/tools/testing/selftests/bpf/progs/verifier_ref_tracking.c +++ b/tools/testing/selftests/bpf/progs/verifier_ref_tracking.c @@ -27,7 +27,7 @@ struct bpf_key {} __attribute__((preserve_access_index)); extern void bpf_key_put(struct bpf_key *key) __ksym; extern struct bpf_key *bpf_lookup_system_key(__u64 id) __ksym; -extern struct bpf_key *bpf_lookup_user_key(__u32 serial, __u64 flags) __ksym; +extern struct bpf_key *bpf_lookup_user_key(__s32 serial, __u64 flags) __ksym; /* BTF FUNC records are not generated for kfuncs referenced * from inline assembly. These records are necessary for diff --git a/tools/testing/selftests/bpf/progs/verifier_tailcall.c b/tools/testing/selftests/bpf/progs/verifier_tailcall.c new file mode 100644 index 000000000000..b4acce60fb9b --- /dev/null +++ b/tools/testing/selftests/bpf/progs/verifier_tailcall.c @@ -0,0 +1,31 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <linux/bpf.h> +#include <bpf/bpf_helpers.h> +#include "bpf_misc.h" + +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __uint(max_entries, 1); + __type(key, __u32); + __type(value, __u32); +} map_array SEC(".maps"); + +SEC("socket") +__description("invalid map type for tail call") +__failure __msg("expected prog array map for tail call") +__failure_unpriv +__naked void invalid_map_for_tail_call(void) +{ + asm volatile (" \ + r2 = %[map_array] ll; \ + r3 = 0; \ + call %[bpf_tail_call]; \ + exit; \ +" : + : __imm(bpf_tail_call), + __imm_addr(map_array) + : __clobber_all); +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/verifier_unpriv.c b/tools/testing/selftests/bpf/progs/verifier_unpriv.c index a4a5e2071604..28b4f7035ceb 100644 --- a/tools/testing/selftests/bpf/progs/verifier_unpriv.c +++ b/tools/testing/selftests/bpf/progs/verifier_unpriv.c @@ -572,8 +572,14 @@ l0_%=: exit; \ SEC("socket") __description("alu32: mov u32 const") -__success __failure_unpriv __msg_unpriv("R7 invalid mem access 'scalar'") +__success __success_unpriv __retval(0) +#ifdef SPEC_V1 +__xlated_unpriv("if r0 == 0x0 goto pc+2") +__xlated_unpriv("nospec") /* inserted to prevent `R7 invalid mem access 'scalar'` */ +__xlated_unpriv("goto pc-1") /* sanitized dead code */ +__xlated_unpriv("exit") +#endif __naked void alu32_mov_u32_const(void) { asm volatile (" \ @@ -619,12 +625,11 @@ __naked void pass_pointer_to_tail_call(void) SEC("socket") __description("unpriv: cmp map pointer with zero") -__success __failure_unpriv __msg_unpriv("R1 pointer comparison") +__success __success_unpriv __retval(0) __naked void cmp_map_pointer_with_zero(void) { asm volatile (" \ - r1 = 0; \ r1 = %[map_hash_8b] ll; \ if r1 == 0 goto l0_%=; \ l0_%=: r0 = 0; \ @@ -635,6 +640,22 @@ l0_%=: r0 = 0; \ } SEC("socket") +__description("unpriv: cmp map pointer with const") +__success __failure_unpriv __msg_unpriv("R1 pointer comparison prohibited") +__retval(0) +__naked void cmp_map_pointer_with_const(void) +{ + asm volatile (" \ + r1 = %[map_hash_8b] ll; \ + if r1 == 0x0000beef goto l0_%=; \ +l0_%=: r0 = 0; \ + exit; \ +" : + : __imm_addr(map_hash_8b) + : __clobber_all); +} + +SEC("socket") __description("unpriv: write into frame pointer") __failure __msg("frame pointer is read only") __failure_unpriv @@ -723,4 +744,210 @@ l0_%=: r0 = 0; \ " ::: __clobber_all); } +SEC("socket") +__description("unpriv: Spectre v1 path-based type confusion of scalar as stack-ptr") +__success __success_unpriv __retval(0) +#ifdef SPEC_V1 +__xlated_unpriv("if r0 != 0x1 goto pc+2") +/* This nospec prevents the exploit because it forces the mispredicted (not + * taken) `if r0 != 0x0 goto l0_%=` to resolve before using r6 as a pointer. + * This causes the CPU to realize that `r6 = r9` should have never executed. It + * ensures that r6 always contains a readable stack slot ptr when the insn after + * the nospec executes. + */ +__xlated_unpriv("nospec") +__xlated_unpriv("r9 = *(u8 *)(r6 +0)") +#endif +__naked void unpriv_spec_v1_type_confusion(void) +{ + asm volatile (" \ + r1 = 0; \ + *(u64*)(r10 - 8) = r1; \ + r2 = r10; \ + r2 += -8; \ + r1 = %[map_hash_8b] ll; \ + call %[bpf_map_lookup_elem]; \ + if r0 == 0 goto l2_%=; \ + /* r0: pointer to a map array entry */ \ + r2 = r10; \ + r2 += -8; \ + r1 = %[map_hash_8b] ll; \ + /* r1, r2: prepared call args */ \ + r6 = r10; \ + r6 += -8; \ + /* r6: pointer to readable stack slot */ \ + r9 = 0xffffc900; \ + r9 <<= 32; \ + /* r9: scalar controlled by attacker */ \ + r0 = *(u64 *)(r0 + 0); /* cache miss */ \ + if r0 != 0x0 goto l0_%=; \ + r6 = r9; \ +l0_%=: if r0 != 0x1 goto l1_%=; \ + r9 = *(u8 *)(r6 + 0); \ +l1_%=: /* leak r9 */ \ + r9 &= 1; \ + r9 <<= 9; \ + *(u64*)(r10 - 8) = r9; \ + call %[bpf_map_lookup_elem]; \ + if r0 == 0 goto l2_%=; \ + /* leak secret into is_cached(map[0|512]): */ \ + r0 = *(u64 *)(r0 + 0); \ +l2_%=: \ + r0 = 0; \ + exit; \ +" : + : __imm(bpf_map_lookup_elem), + __imm_addr(map_hash_8b) + : __clobber_all); +} + +SEC("socket") +__description("unpriv: ldimm64 before Spectre v4 barrier") +__success __success_unpriv +__retval(0) +#ifdef SPEC_V4 +__xlated_unpriv("r1 = 0x2020200005642020") /* should not matter */ +__xlated_unpriv("*(u64 *)(r10 -8) = r1") +__xlated_unpriv("nospec") +#endif +__naked void unpriv_ldimm64_spectre_v4(void) +{ + asm volatile (" \ + r1 = 0x2020200005642020 ll; \ + *(u64 *)(r10 -8) = r1; \ + r0 = 0; \ + exit; \ +" ::: __clobber_all); +} + +SEC("socket") +__description("unpriv: Spectre v1 and v4 barrier") +__success __success_unpriv +__retval(0) +#ifdef SPEC_V1 +#ifdef SPEC_V4 +/* starts with r0 == r8 == r9 == 0 */ +__xlated_unpriv("if r8 != 0x0 goto pc+1") +__xlated_unpriv("goto pc+2") +__xlated_unpriv("if r9 == 0x0 goto pc+4") +__xlated_unpriv("r2 = r0") +/* Following nospec required to prevent following dangerous `*(u64 *)(NOT_FP -64) + * = r1` iff `if r9 == 0 goto pc+4` was mispredicted because of Spectre v1. The + * test therefore ensures the Spectre-v4--induced nospec does not prevent the + * Spectre-v1--induced speculative path from being fully analyzed. + */ +__xlated_unpriv("nospec") /* Spectre v1 */ +__xlated_unpriv("*(u64 *)(r2 -64) = r1") /* could be used to leak r2 */ +__xlated_unpriv("nospec") /* Spectre v4 */ +#endif +#endif +__naked void unpriv_spectre_v1_and_v4(void) +{ + asm volatile (" \ + r1 = 0; \ + *(u64*)(r10 - 8) = r1; \ + r2 = r10; \ + r2 += -8; \ + r1 = %[map_hash_8b] ll; \ + call %[bpf_map_lookup_elem]; \ + r8 = r0; \ + r2 = r10; \ + r2 += -8; \ + r1 = %[map_hash_8b] ll; \ + call %[bpf_map_lookup_elem]; \ + r9 = r0; \ + r0 = r10; \ + r1 = 0; \ + r2 = r10; \ + if r8 != 0 goto l0_%=; \ + if r9 != 0 goto l0_%=; \ + r0 = 0; \ +l0_%=: if r8 != 0 goto l1_%=; \ + goto l2_%=; \ +l1_%=: if r9 == 0 goto l3_%=; \ + r2 = r0; \ +l2_%=: *(u64 *)(r2 -64) = r1; \ +l3_%=: r0 = 0; \ + exit; \ +" : + : __imm(bpf_map_lookup_elem), + __imm_addr(map_hash_8b) + : __clobber_all); +} + +SEC("socket") +__description("unpriv: Spectre v1 and v4 barrier (simple)") +__success __success_unpriv +__retval(0) +#ifdef SPEC_V1 +#ifdef SPEC_V4 +__xlated_unpriv("if r8 != 0x0 goto pc+1") +__xlated_unpriv("goto pc+2") +__xlated_unpriv("goto pc-1") /* if r9 == 0 goto l3_%= */ +__xlated_unpriv("goto pc-1") /* r2 = r0 */ +__xlated_unpriv("nospec") +__xlated_unpriv("*(u64 *)(r2 -64) = r1") +__xlated_unpriv("nospec") +#endif +#endif +__naked void unpriv_spectre_v1_and_v4_simple(void) +{ + asm volatile (" \ + r8 = 0; \ + r9 = 0; \ + r0 = r10; \ + r1 = 0; \ + r2 = r10; \ + if r8 != 0 goto l0_%=; \ + if r9 != 0 goto l0_%=; \ + r0 = 0; \ +l0_%=: if r8 != 0 goto l1_%=; \ + goto l2_%=; \ +l1_%=: if r9 == 0 goto l3_%=; \ + r2 = r0; \ +l2_%=: *(u64 *)(r2 -64) = r1; \ +l3_%=: r0 = 0; \ + exit; \ +" ::: __clobber_all); +} + +SEC("socket") +__description("unpriv: ldimm64 before Spectre v1 and v4 barrier (simple)") +__success __success_unpriv +__retval(0) +#ifdef SPEC_V1 +#ifdef SPEC_V4 +__xlated_unpriv("if r8 != 0x0 goto pc+1") +__xlated_unpriv("goto pc+4") +__xlated_unpriv("goto pc-1") /* if r9 == 0 goto l3_%= */ +__xlated_unpriv("goto pc-1") /* r2 = r0 */ +__xlated_unpriv("goto pc-1") /* r1 = 0x2020200005642020 ll */ +__xlated_unpriv("goto pc-1") /* second part of ldimm64 */ +__xlated_unpriv("nospec") +__xlated_unpriv("*(u64 *)(r2 -64) = r1") +__xlated_unpriv("nospec") +#endif +#endif +__naked void unpriv_ldimm64_spectre_v1_and_v4_simple(void) +{ + asm volatile (" \ + r8 = 0; \ + r9 = 0; \ + r0 = r10; \ + r1 = 0; \ + r2 = r10; \ + if r8 != 0 goto l0_%=; \ + if r9 != 0 goto l0_%=; \ + r0 = 0; \ +l0_%=: if r8 != 0 goto l1_%=; \ + goto l2_%=; \ +l1_%=: if r9 == 0 goto l3_%=; \ + r2 = r0; \ + r1 = 0x2020200005642020 ll; \ +l2_%=: *(u64 *)(r2 -64) = r1; \ +l3_%=: r0 = 0; \ + exit; \ +" ::: __clobber_all); +} + char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/verifier_value_ptr_arith.c b/tools/testing/selftests/bpf/progs/verifier_value_ptr_arith.c index 5ba6e53571c8..af7938ce56cb 100644 --- a/tools/testing/selftests/bpf/progs/verifier_value_ptr_arith.c +++ b/tools/testing/selftests/bpf/progs/verifier_value_ptr_arith.c @@ -231,6 +231,10 @@ __retval(1) __naked void ptr_unknown_vs_unknown_lt(void) { asm volatile (" \ + r8 = r1; \ + call %[bpf_get_prandom_u32]; \ + r9 = r0; \ + r1 = r8; \ r0 = *(u32*)(r1 + %[__sk_buff_len]); \ r1 = 0; \ *(u64*)(r10 - 8) = r1; \ @@ -245,11 +249,11 @@ l1_%=: call %[bpf_map_lookup_elem]; \ r4 = *(u8*)(r0 + 0); \ if r4 == 1 goto l3_%=; \ r1 = 6; \ - r1 = -r1; \ + r1 = r9; \ r1 &= 0x3; \ goto l4_%=; \ l3_%=: r1 = 6; \ - r1 = -r1; \ + r1 = r9; \ r1 &= 0x7; \ l4_%=: r1 += r0; \ r0 = *(u8*)(r1 + 0); \ @@ -259,7 +263,8 @@ l2_%=: r0 = 1; \ : __imm(bpf_map_lookup_elem), __imm_addr(map_array_48b), __imm_addr(map_hash_16b), - __imm_const(__sk_buff_len, offsetof(struct __sk_buff, len)) + __imm_const(__sk_buff_len, offsetof(struct __sk_buff, len)), + __imm(bpf_get_prandom_u32) : __clobber_all); } @@ -271,6 +276,10 @@ __retval(1) __naked void ptr_unknown_vs_unknown_gt(void) { asm volatile (" \ + r8 = r1; \ + call %[bpf_get_prandom_u32]; \ + r9 = r0; \ + r1 = r8; \ r0 = *(u32*)(r1 + %[__sk_buff_len]); \ r1 = 0; \ *(u64*)(r10 - 8) = r1; \ @@ -285,11 +294,11 @@ l1_%=: call %[bpf_map_lookup_elem]; \ r4 = *(u8*)(r0 + 0); \ if r4 == 1 goto l3_%=; \ r1 = 6; \ - r1 = -r1; \ + r1 = r9; \ r1 &= 0x7; \ goto l4_%=; \ l3_%=: r1 = 6; \ - r1 = -r1; \ + r1 = r9; \ r1 &= 0x3; \ l4_%=: r1 += r0; \ r0 = *(u8*)(r1 + 0); \ @@ -299,7 +308,8 @@ l2_%=: r0 = 1; \ : __imm(bpf_map_lookup_elem), __imm_addr(map_array_48b), __imm_addr(map_hash_16b), - __imm_const(__sk_buff_len, offsetof(struct __sk_buff, len)) + __imm_const(__sk_buff_len, offsetof(struct __sk_buff, len)), + __imm(bpf_get_prandom_u32) : __clobber_all); } @@ -398,7 +408,8 @@ l2_%=: r0 = 1; \ SEC("socket") __description("map access: mixing value pointer and scalar, 1") -__success __failure_unpriv __msg_unpriv("R2 pointer comparison prohibited") +__success __failure_unpriv +__msg_unpriv("R2 tried to add from different maps, paths or scalars, pointer arithmetic with it prohibited for !root") __retval(0) __naked void value_pointer_and_scalar_1(void) { @@ -433,6 +444,7 @@ l2_%=: /* common instruction */ \ l3_%=: /* branch B */ \ r0 = 0x13371337; \ /* verifier follows fall-through */ \ + /* unpriv: nospec (inserted to prevent `R2 pointer comparison prohibited`) */\ if r2 != 0x100000 goto l4_%=; \ r0 = 0; \ exit; \ @@ -450,7 +462,8 @@ l4_%=: /* fake-dead code; targeted from branch A to \ SEC("socket") __description("map access: mixing value pointer and scalar, 2") -__success __failure_unpriv __msg_unpriv("R0 invalid mem access 'scalar'") +__success __failure_unpriv +__msg_unpriv("R2 tried to add from different maps, paths or scalars, pointer arithmetic with it prohibited for !root") __retval(0) __naked void value_pointer_and_scalar_2(void) { @@ -492,6 +505,7 @@ l4_%=: /* fake-dead code; targeted from branch A to \ * prevent dead code sanitization, rejected \ * via branch B however \ */ \ + /* unpriv: nospec (inserted to prevent `R0 invalid mem access 'scalar'`) */\ r0 = *(u8*)(r0 + 0); \ r0 = 0; \ exit; \ @@ -1296,9 +1310,13 @@ l0_%=: r0 = 1; \ SEC("socket") __description("map access: value_ptr -= unknown scalar, 2") -__success __failure_unpriv -__msg_unpriv("R0 pointer arithmetic of map value goes out of range") +__success __success_unpriv __retval(1) +#ifdef SPEC_V1 +__xlated_unpriv("r1 &= 7") +__xlated_unpriv("nospec") /* inserted to prevent `R0 pointer arithmetic of map value goes out of range` */ +__xlated_unpriv("r0 -= r1") +#endif __naked void value_ptr_unknown_scalar_2_2(void) { asm volatile (" \ diff --git a/tools/testing/selftests/bpf/progs/verifier_vfs_accept.c b/tools/testing/selftests/bpf/progs/verifier_vfs_accept.c index a7c0a553aa50..3e2d76ee8050 100644 --- a/tools/testing/selftests/bpf/progs/verifier_vfs_accept.c +++ b/tools/testing/selftests/bpf/progs/verifier_vfs_accept.c @@ -2,6 +2,7 @@ /* Copyright (c) 2024 Google LLC. */ #include <vmlinux.h> +#include <errno.h> #include <bpf/bpf_helpers.h> #include <bpf/bpf_tracing.h> @@ -82,4 +83,21 @@ int BPF_PROG(path_d_path_from_file_argument, struct file *file) return 0; } +SEC("lsm.s/inode_rename") +__success +int BPF_PROG(inode_rename, struct inode *old_dir, struct dentry *old_dentry, + struct inode *new_dir, struct dentry *new_dentry, + unsigned int flags) +{ + struct inode *inode = new_dentry->d_inode; + ino_t ino; + + if (!inode) + return 0; + ino = inode->i_ino; + if (ino == 0) + return -EACCES; + return 0; +} + char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/verifier_vfs_reject.c b/tools/testing/selftests/bpf/progs/verifier_vfs_reject.c index d6d3f4fcb24c..4b392c6c8fc4 100644 --- a/tools/testing/selftests/bpf/progs/verifier_vfs_reject.c +++ b/tools/testing/selftests/bpf/progs/verifier_vfs_reject.c @@ -2,6 +2,7 @@ /* Copyright (c) 2024 Google LLC. */ #include <vmlinux.h> +#include <errno.h> #include <bpf/bpf_helpers.h> #include <bpf/bpf_tracing.h> #include <linux/limits.h> @@ -158,4 +159,18 @@ int BPF_PROG(path_d_path_kfunc_non_lsm, struct path *path, struct file *f) return 0; } +SEC("lsm.s/inode_rename") +__failure __msg("invalid mem access 'trusted_ptr_or_null_'") +int BPF_PROG(inode_rename, struct inode *old_dir, struct dentry *old_dentry, + struct inode *new_dir, struct dentry *new_dentry, + unsigned int flags) +{ + struct inode *inode = new_dentry->d_inode; + ino_t ino; + + ino = inode->i_ino; + if (ino == 0) + return -EACCES; + return 0; +} char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/test_bpftool_map.sh b/tools/testing/selftests/bpf/test_bpftool_map.sh new file mode 100755 index 000000000000..515b1df0501e --- /dev/null +++ b/tools/testing/selftests/bpf/test_bpftool_map.sh @@ -0,0 +1,398 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0 + +# Kselftest framework requirement - SKIP code is 4. +ksft_skip=4 + +TESTNAME="bpftool_map" +BPF_FILE="security_bpf_map.bpf.o" +BPF_ITER_FILE="bpf_iter_map_elem.bpf.o" +PROTECTED_MAP_NAME="prot_map" +NOT_PROTECTED_MAP_NAME="not_prot_map" +BPF_FS_TMP_PARENT="/tmp" +BPF_FS_PARENT=$(awk '$3 == "bpf" {print $2; exit}' /proc/mounts) +BPF_FS_PARENT=${BPF_FS_PARENT:-$BPF_FS_TMP_PARENT} +# bpftool will mount bpf file system under BPF_DIR if it is not mounted +# under BPF_FS_PARENT. +BPF_DIR="$BPF_FS_PARENT/test_$TESTNAME" +SCRIPT_DIR=$(dirname $(realpath "$0")) +BPF_FILE_PATH="$SCRIPT_DIR/$BPF_FILE" +BPF_ITER_FILE_PATH="$SCRIPT_DIR/$BPF_ITER_FILE" +BPFTOOL_PATH="bpftool" +# Assume the script is located under tools/testing/selftests/bpf/ +KDIR_ROOT_DIR=$(realpath "$SCRIPT_DIR"/../../../../) + +_cleanup() +{ + set +eu + + # If BPF_DIR is a mount point this will not remove the mount point itself. + [ -d "$BPF_DIR" ] && rm -rf "$BPF_DIR" 2> /dev/null + + # Unmount if BPF filesystem was temporarily created. + if [ "$BPF_FS_PARENT" = "$BPF_FS_TMP_PARENT" ]; then + # A loop and recursive unmount are required as bpftool might + # create multiple mounts. For example, a bind mount of the directory + # to itself. The bind mount is created to change mount propagation + # flags on an actual mount point. + max_attempts=3 + attempt=0 + while mountpoint -q "$BPF_DIR" && [ $attempt -lt $max_attempts ]; do + umount -R "$BPF_DIR" 2>/dev/null + attempt=$((attempt+1)) + done + + # The directory still exists. Remove it now. + [ -d "$BPF_DIR" ] && rm -rf "$BPF_DIR" 2>/dev/null + fi +} + +cleanup_skip() +{ + echo "selftests: $TESTNAME [SKIP]" + _cleanup + + exit $ksft_skip +} + +cleanup() +{ + if [ "$?" = 0 ]; then + echo "selftests: $TESTNAME [PASS]" + else + echo "selftests: $TESTNAME [FAILED]" + fi + _cleanup +} + +check_root_privileges() { + if [ $(id -u) -ne 0 ]; then + echo "Need root privileges" + exit $ksft_skip + fi +} + +# Function to verify bpftool path. +# Parameters: +# $1: bpftool path +verify_bpftool_path() { + local bpftool_path="$1" + if ! "$bpftool_path" version > /dev/null 2>&1; then + echo "Could not run test without bpftool" + exit $ksft_skip + fi +} + +# Function to verify BTF support. +# The test requires BTF support for fmod_ret programs. +verify_btf_support() { + if [ ! -f /sys/kernel/btf/vmlinux ]; then + echo "Could not run test without BTF support" + exit $ksft_skip + fi +} + +# Function to initialize map entries with keys [0..2] and values set to 0. +# Parameters: +# $1: Map name +# $2: bpftool path +initialize_map_entries() { + local map_name="$1" + local bpftool_path="$2" + + for key in 0 1 2; do + "$bpftool_path" map update name "$map_name" key $key 0 0 0 value 0 0 0 $key + done +} + +# Test read access to the map. +# Parameters: +# $1: Name command (name/pinned) +# $2: Map name +# $3: bpftool path +# $4: key +access_for_read() { + local name_cmd="$1" + local map_name="$2" + local bpftool_path="$3" + local key="$4" + + # Test read access to the map. + if ! "$bpftool_path" map lookup "$name_cmd" "$map_name" key $key 1>/dev/null; then + echo " Read access to $key in $map_name failed" + exit 1 + fi + + # Test read access to map's BTF data. + if ! "$bpftool_path" btf dump map "$name_cmd" "$map_name" 1>/dev/null; then + echo " Read access to $map_name for BTF data failed" + exit 1 + fi +} + +# Test write access to the map. +# Parameters: +# $1: Name command (name/pinned) +# $2: Map name +# $3: bpftool path +# $4: key +# $5: Whether write should succeed (true/false) +access_for_write() { + local name_cmd="$1" + local map_name="$2" + local bpftool_path="$3" + local key="$4" + local write_should_succeed="$5" + local value="1 1 1 1" + + if "$bpftool_path" map update "$name_cmd" "$map_name" key $key value \ + $value 2>/dev/null; then + if [ "$write_should_succeed" = "false" ]; then + echo " Write access to $key in $map_name succeeded but should have failed" + exit 1 + fi + else + if [ "$write_should_succeed" = "true" ]; then + echo " Write access to $key in $map_name failed but should have succeeded" + exit 1 + fi + fi +} + +# Test entry deletion for the map. +# Parameters: +# $1: Name command (name/pinned) +# $2: Map name +# $3: bpftool path +# $4: key +# $5: Whether write should succeed (true/false) +access_for_deletion() { + local name_cmd="$1" + local map_name="$2" + local bpftool_path="$3" + local key="$4" + local write_should_succeed="$5" + local value="1 1 1 1" + + # Test deletion by key for the map. + # Before deleting, check the key exists. + if ! "$bpftool_path" map lookup "$name_cmd" "$map_name" key $key 1>/dev/null; then + echo " Key $key does not exist in $map_name" + exit 1 + fi + + # Delete by key. + if "$bpftool_path" map delete "$name_cmd" "$map_name" key $key 2>/dev/null; then + if [ "$write_should_succeed" = "false" ]; then + echo " Deletion for $key in $map_name succeeded but should have failed" + exit 1 + fi + else + if [ "$write_should_succeed" = "true" ]; then + echo " Deletion for $key in $map_name failed but should have succeeded" + exit 1 + fi + fi + + # After deleting, check the entry existence according to the expected status. + if "$bpftool_path" map lookup "$name_cmd" "$map_name" key $key 1>/dev/null; then + if [ "$write_should_succeed" = "true" ]; then + echo " Key $key for $map_name was not deleted but should have been deleted" + exit 1 + fi + else + if [ "$write_should_succeed" = "false" ]; then + echo "Key $key for $map_name was deleted but should have not been deleted" + exit 1 + fi + fi + + # Test creation of map's deleted entry, if deletion was successful. + # Otherwise, the entry exists. + if "$bpftool_path" map update "$name_cmd" "$map_name" key $key value \ + $value 2>/dev/null; then + if [ "$write_should_succeed" = "false" ]; then + echo " Write access to $key in $map_name succeeded after deletion attempt but should have failed" + exit 1 + fi + else + if [ "$write_should_succeed" = "true" ]; then + echo " Write access to $key in $map_name failed after deletion attempt but should have succeeded" + exit 1 + fi + fi +} + +# Test map elements iterator. +# Parameters: +# $1: Name command (name/pinned) +# $2: Map name +# $3: bpftool path +# $4: BPF_DIR +# $5: bpf iterator object file path +iterate_map_elem() { + local name_cmd="$1" + local map_name="$2" + local bpftool_path="$3" + local bpf_dir="$4" + local bpf_file="$5" + local pin_path="$bpf_dir/map_iterator" + + "$bpftool_path" iter pin "$bpf_file" "$pin_path" map "$name_cmd" "$map_name" + if [ ! -f "$pin_path" ]; then + echo " Failed to pin iterator to $pin_path" + exit 1 + fi + + cat "$pin_path" 1>/dev/null + rm "$pin_path" 2>/dev/null +} + +# Function to test map access with configurable write expectations +# Parameters: +# $1: Name command (name/pinned) +# $2: Map name +# $3: bpftool path +# $4: key for rw +# $5: key to delete +# $6: Whether write should succeed (true/false) +# $7: BPF_DIR +# $8: bpf iterator object file path +access_map() { + local name_cmd="$1" + local map_name="$2" + local bpftool_path="$3" + local key_for_rw="$4" + local key_to_del="$5" + local write_should_succeed="$6" + local bpf_dir="$7" + local bpf_iter_file_path="$8" + + access_for_read "$name_cmd" "$map_name" "$bpftool_path" "$key_for_rw" + access_for_write "$name_cmd" "$map_name" "$bpftool_path" "$key_for_rw" \ + "$write_should_succeed" + access_for_deletion "$name_cmd" "$map_name" "$bpftool_path" "$key_to_del" \ + "$write_should_succeed" + iterate_map_elem "$name_cmd" "$map_name" "$bpftool_path" "$bpf_dir" \ + "$bpf_iter_file_path" +} + +# Function to test map access with configurable write expectations +# Parameters: +# $1: Map name +# $2: bpftool path +# $3: BPF_DIR +# $4: Whether write should succeed (true/false) +# $5: bpf iterator object file path +test_map_access() { + local map_name="$1" + local bpftool_path="$2" + local bpf_dir="$3" + local pin_path="$bpf_dir/${map_name}_pinned" + local write_should_succeed="$4" + local bpf_iter_file_path="$5" + + # Test access to the map by name. + access_map "name" "$map_name" "$bpftool_path" "0 0 0 0" "1 0 0 0" \ + "$write_should_succeed" "$bpf_dir" "$bpf_iter_file_path" + + # Pin the map to the BPF filesystem + "$bpftool_path" map pin name "$map_name" "$pin_path" + if [ ! -e "$pin_path" ]; then + echo " Failed to pin $map_name" + exit 1 + fi + + # Test access to the pinned map. + access_map "pinned" "$pin_path" "$bpftool_path" "0 0 0 0" "2 0 0 0" \ + "$write_should_succeed" "$bpf_dir" "$bpf_iter_file_path" +} + +# Function to test map creation and map-of-maps +# Parameters: +# $1: bpftool path +# $2: BPF_DIR +test_map_creation_and_map_of_maps() { + local bpftool_path="$1" + local bpf_dir="$2" + local outer_map_name="outer_map_tt" + local inner_map_name="inner_map_tt" + + "$bpftool_path" map create "$bpf_dir/$inner_map_name" type array key 4 \ + value 4 entries 4 name "$inner_map_name" + if [ ! -f "$bpf_dir/$inner_map_name" ]; then + echo " Failed to create inner map file at $bpf_dir/$outer_map_name" + return 1 + fi + + "$bpftool_path" map create "$bpf_dir/$outer_map_name" type hash_of_maps \ + key 4 value 4 entries 2 name "$outer_map_name" inner_map name "$inner_map_name" + if [ ! -f "$bpf_dir/$outer_map_name" ]; then + echo " Failed to create outer map file at $bpf_dir/$outer_map_name" + return 1 + fi + + # Add entries to the outer map by name and by pinned path. + "$bpftool_path" map update pinned "$bpf_dir/$outer_map_name" key 0 0 0 0 \ + value pinned "$bpf_dir/$inner_map_name" + "$bpftool_path" map update name "$outer_map_name" key 1 0 0 0 value \ + name "$inner_map_name" + + # The outer map should be full by now. + # The following map update command is expected to fail. + if "$bpftool_path" map update name "$outer_map_name" key 2 0 0 0 value name \ + "$inner_map_name" 2>/dev/null; then + echo " Update for $outer_map_name succeeded but should have failed" + exit 1 + fi +} + +# Function to test map access with the btf list command +# Parameters: +# $1: bpftool path +test_map_access_with_btf_list() { + local bpftool_path="$1" + + # The btf list command iterates over maps for + # loaded BPF programs. + if ! "$bpftool_path" btf list 1>/dev/null; then + echo " Failed to access btf data" + exit 1 + fi +} + +set -eu + +trap cleanup_skip EXIT + +check_root_privileges + +verify_bpftool_path "$BPFTOOL_PATH" + +verify_btf_support + +trap cleanup EXIT + +# Load and attach the BPF programs to control maps access. +"$BPFTOOL_PATH" prog loadall "$BPF_FILE_PATH" "$BPF_DIR" autoattach + +initialize_map_entries "$PROTECTED_MAP_NAME" "$BPFTOOL_PATH" +initialize_map_entries "$NOT_PROTECTED_MAP_NAME" "$BPFTOOL_PATH" + +# Activate the map protection mechanism. Protection status is controlled +# by a value stored in the prot_status_map at index 0. +"$BPFTOOL_PATH" map update name prot_status_map key 0 0 0 0 value 1 0 0 0 + +# Test protected map (write should fail). +test_map_access "$PROTECTED_MAP_NAME" "$BPFTOOL_PATH" "$BPF_DIR" "false" \ + "$BPF_ITER_FILE_PATH" + +# Test not protected map (write should succeed). +test_map_access "$NOT_PROTECTED_MAP_NAME" "$BPFTOOL_PATH" "$BPF_DIR" "true" \ + "$BPF_ITER_FILE_PATH" + +test_map_creation_and_map_of_maps "$BPFTOOL_PATH" "$BPF_DIR" + +test_map_access_with_btf_list "$BPFTOOL_PATH" + +exit 0 diff --git a/tools/testing/selftests/bpf/test_loader.c b/tools/testing/selftests/bpf/test_loader.c index 9551d8d5f8f9..78423cf89e01 100644 --- a/tools/testing/selftests/bpf/test_loader.c +++ b/tools/testing/selftests/bpf/test_loader.c @@ -40,7 +40,7 @@ #define TEST_TAG_LOAD_MODE_PFX "comment:load_mode=" /* Warning: duplicated in bpf_misc.h */ -#define POINTER_VALUE 0xcafe4all +#define POINTER_VALUE 0xbadcafe #define TEST_DATA_LEN 64 #ifdef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS @@ -318,20 +318,14 @@ static int parse_caps(const char *str, __u64 *val, const char *name) static int parse_retval(const char *str, int *val, const char *name) { - struct { - char *name; - int val; - } named_values[] = { - { "INT_MIN" , INT_MIN }, - { "POINTER_VALUE", POINTER_VALUE }, - { "TEST_DATA_LEN", TEST_DATA_LEN }, - }; - int i; - - for (i = 0; i < ARRAY_SIZE(named_values); ++i) { - if (strcmp(str, named_values[i].name) != 0) - continue; - *val = named_values[i].val; + /* + * INT_MIN is defined as (-INT_MAX -1), i.e. it doesn't expand to a + * single int and cannot be parsed with strtol, so we handle it + * separately here. In addition, it expands to different expressions in + * different compilers so we use a prefixed _INT_MIN instead. + */ + if (strcmp(str, "_INT_MIN") == 0) { + *val = INT_MIN; return 0; } @@ -1103,9 +1097,9 @@ void run_subtest(struct test_loader *tester, } } - do_prog_test_run(bpf_program__fd(tprog), &retval, - bpf_program__type(tprog) == BPF_PROG_TYPE_SYSCALL ? true : false); - if (retval != subspec->retval && subspec->retval != POINTER_VALUE) { + err = do_prog_test_run(bpf_program__fd(tprog), &retval, + bpf_program__type(tprog) == BPF_PROG_TYPE_SYSCALL ? true : false); + if (!err && retval != subspec->retval && subspec->retval != POINTER_VALUE) { PRINT_FAIL("Unexpected retval: %d != %d\n", retval, subspec->retval); goto tobj_cleanup; } diff --git a/tools/testing/selftests/bpf/test_lru_map.c b/tools/testing/selftests/bpf/test_lru_map.c index fda7589c5023..0921939532c6 100644 --- a/tools/testing/selftests/bpf/test_lru_map.c +++ b/tools/testing/selftests/bpf/test_lru_map.c @@ -138,6 +138,18 @@ static int sched_next_online(int pid, int *next_to_try) return ret; } +/* Derive target_free from map_size, same as bpf_common_lru_populate */ +static unsigned int __tgt_size(unsigned int map_size) +{ + return (map_size / nr_cpus) / 2; +} + +/* Inverse of how bpf_common_lru_populate derives target_free from map_size. */ +static unsigned int __map_size(unsigned int tgt_free) +{ + return tgt_free * nr_cpus * 2; +} + /* Size of the LRU map is 2 * Add key=1 (+1 key) * Add key=2 (+1 key) @@ -231,11 +243,11 @@ static void test_lru_sanity0(int map_type, int map_flags) printf("Pass\n"); } -/* Size of the LRU map is 1.5*tgt_free - * Insert 1 to tgt_free (+tgt_free keys) - * Lookup 1 to tgt_free/2 - * Insert 1+tgt_free to 2*tgt_free (+tgt_free keys) - * => 1+tgt_free/2 to LOCALFREE_TARGET will be removed by LRU +/* Verify that unreferenced elements are recycled before referenced ones. + * Insert elements. + * Reference a subset of these. + * Insert more, enough to trigger recycling. + * Verify that unreferenced are recycled. */ static void test_lru_sanity1(int map_type, int map_flags, unsigned int tgt_free) { @@ -257,7 +269,7 @@ static void test_lru_sanity1(int map_type, int map_flags, unsigned int tgt_free) batch_size = tgt_free / 2; assert(batch_size * 2 == tgt_free); - map_size = tgt_free + batch_size; + map_size = __map_size(tgt_free) + batch_size; lru_map_fd = create_map(map_type, map_flags, map_size); assert(lru_map_fd != -1); @@ -266,13 +278,13 @@ static void test_lru_sanity1(int map_type, int map_flags, unsigned int tgt_free) value[0] = 1234; - /* Insert 1 to tgt_free (+tgt_free keys) */ - end_key = 1 + tgt_free; + /* Insert map_size - batch_size keys */ + end_key = 1 + __map_size(tgt_free); for (key = 1; key < end_key; key++) assert(!bpf_map_update_elem(lru_map_fd, &key, value, BPF_NOEXIST)); - /* Lookup 1 to tgt_free/2 */ + /* Lookup 1 to batch_size */ end_key = 1 + batch_size; for (key = 1; key < end_key; key++) { assert(!bpf_map_lookup_elem_with_ref_bit(lru_map_fd, key, value)); @@ -280,12 +292,13 @@ static void test_lru_sanity1(int map_type, int map_flags, unsigned int tgt_free) BPF_NOEXIST)); } - /* Insert 1+tgt_free to 2*tgt_free - * => 1+tgt_free/2 to LOCALFREE_TARGET will be + /* Insert another map_size - batch_size keys + * Map will contain 1 to batch_size plus these latest, i.e., + * => previous 1+batch_size to map_size - batch_size will have been * removed by LRU */ - key = 1 + tgt_free; - end_key = key + tgt_free; + key = 1 + __map_size(tgt_free); + end_key = key + __map_size(tgt_free); for (; key < end_key; key++) { assert(!bpf_map_update_elem(lru_map_fd, &key, value, BPF_NOEXIST)); @@ -301,17 +314,8 @@ static void test_lru_sanity1(int map_type, int map_flags, unsigned int tgt_free) printf("Pass\n"); } -/* Size of the LRU map 1.5 * tgt_free - * Insert 1 to tgt_free (+tgt_free keys) - * Update 1 to tgt_free/2 - * => The original 1 to tgt_free/2 will be removed due to - * the LRU shrink process - * Re-insert 1 to tgt_free/2 again and do a lookup immeidately - * Insert 1+tgt_free to tgt_free*3/2 - * Insert 1+tgt_free*3/2 to tgt_free*5/2 - * => Key 1+tgt_free to tgt_free*3/2 - * will be removed from LRU because it has never - * been lookup and ref bit is not set +/* Verify that insertions exceeding map size will recycle the oldest. + * Verify that unreferenced elements are recycled before referenced. */ static void test_lru_sanity2(int map_type, int map_flags, unsigned int tgt_free) { @@ -334,7 +338,7 @@ static void test_lru_sanity2(int map_type, int map_flags, unsigned int tgt_free) batch_size = tgt_free / 2; assert(batch_size * 2 == tgt_free); - map_size = tgt_free + batch_size; + map_size = __map_size(tgt_free) + batch_size; lru_map_fd = create_map(map_type, map_flags, map_size); assert(lru_map_fd != -1); @@ -343,8 +347,8 @@ static void test_lru_sanity2(int map_type, int map_flags, unsigned int tgt_free) value[0] = 1234; - /* Insert 1 to tgt_free (+tgt_free keys) */ - end_key = 1 + tgt_free; + /* Insert map_size - batch_size keys */ + end_key = 1 + __map_size(tgt_free); for (key = 1; key < end_key; key++) assert(!bpf_map_update_elem(lru_map_fd, &key, value, BPF_NOEXIST)); @@ -357,8 +361,7 @@ static void test_lru_sanity2(int map_type, int map_flags, unsigned int tgt_free) * shrink the inactive list to get tgt_free * number of free nodes. * - * Hence, the oldest key 1 to tgt_free/2 - * are removed from the LRU list. + * Hence, the oldest key is removed from the LRU list. */ key = 1; if (map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH) { @@ -370,8 +373,7 @@ static void test_lru_sanity2(int map_type, int map_flags, unsigned int tgt_free) BPF_EXIST)); } - /* Re-insert 1 to tgt_free/2 again and do a lookup - * immeidately. + /* Re-insert 1 to batch_size again and do a lookup immediately. */ end_key = 1 + batch_size; value[0] = 4321; @@ -387,17 +389,18 @@ static void test_lru_sanity2(int map_type, int map_flags, unsigned int tgt_free) value[0] = 1234; - /* Insert 1+tgt_free to tgt_free*3/2 */ - end_key = 1 + tgt_free + batch_size; - for (key = 1 + tgt_free; key < end_key; key++) + /* Insert batch_size new elements */ + key = 1 + __map_size(tgt_free); + end_key = key + batch_size; + for (; key < end_key; key++) /* These newly added but not referenced keys will be * gone during the next LRU shrink. */ assert(!bpf_map_update_elem(lru_map_fd, &key, value, BPF_NOEXIST)); - /* Insert 1+tgt_free*3/2 to tgt_free*5/2 */ - end_key = key + tgt_free; + /* Insert map_size - batch_size elements */ + end_key += __map_size(tgt_free); for (; key < end_key; key++) { assert(!bpf_map_update_elem(lru_map_fd, &key, value, BPF_NOEXIST)); @@ -413,12 +416,12 @@ static void test_lru_sanity2(int map_type, int map_flags, unsigned int tgt_free) printf("Pass\n"); } -/* Size of the LRU map is 2*tgt_free - * It is to test the active/inactive list rotation - * Insert 1 to 2*tgt_free (+2*tgt_free keys) - * Lookup key 1 to tgt_free*3/2 - * Add 1+2*tgt_free to tgt_free*5/2 (+tgt_free/2 keys) - * => key 1+tgt_free*3/2 to 2*tgt_free are removed from LRU +/* Test the active/inactive list rotation + * + * Fill the whole map, deplete the free list. + * Reference all except the last lru->target_free elements. + * Insert lru->target_free new elements. This triggers one shrink. + * Verify that the non-referenced elements are replaced. */ static void test_lru_sanity3(int map_type, int map_flags, unsigned int tgt_free) { @@ -437,8 +440,7 @@ static void test_lru_sanity3(int map_type, int map_flags, unsigned int tgt_free) assert(sched_next_online(0, &next_cpu) != -1); - batch_size = tgt_free / 2; - assert(batch_size * 2 == tgt_free); + batch_size = __tgt_size(tgt_free); map_size = tgt_free * 2; lru_map_fd = create_map(map_type, map_flags, map_size); @@ -449,23 +451,21 @@ static void test_lru_sanity3(int map_type, int map_flags, unsigned int tgt_free) value[0] = 1234; - /* Insert 1 to 2*tgt_free (+2*tgt_free keys) */ - end_key = 1 + (2 * tgt_free); + /* Fill the map */ + end_key = 1 + map_size; for (key = 1; key < end_key; key++) assert(!bpf_map_update_elem(lru_map_fd, &key, value, BPF_NOEXIST)); - /* Lookup key 1 to tgt_free*3/2 */ - end_key = tgt_free + batch_size; + /* Reference all but the last batch_size */ + end_key = 1 + map_size - batch_size; for (key = 1; key < end_key; key++) { assert(!bpf_map_lookup_elem_with_ref_bit(lru_map_fd, key, value)); assert(!bpf_map_update_elem(expected_map_fd, &key, value, BPF_NOEXIST)); } - /* Add 1+2*tgt_free to tgt_free*5/2 - * (+tgt_free/2 keys) - */ + /* Insert new batch_size: replaces the non-referenced elements */ key = 2 * tgt_free + 1; end_key = key + batch_size; for (; key < end_key; key++) { @@ -500,7 +500,8 @@ static void test_lru_sanity4(int map_type, int map_flags, unsigned int tgt_free) lru_map_fd = create_map(map_type, map_flags, 3 * tgt_free * nr_cpus); else - lru_map_fd = create_map(map_type, map_flags, 3 * tgt_free); + lru_map_fd = create_map(map_type, map_flags, + 3 * __map_size(tgt_free)); assert(lru_map_fd != -1); expected_map_fd = create_map(BPF_MAP_TYPE_HASH, 0, diff --git a/tools/testing/selftests/bpf/test_maps.c b/tools/testing/selftests/bpf/test_maps.c index 986ce32b113a..3fae9ce46ca9 100644 --- a/tools/testing/selftests/bpf/test_maps.c +++ b/tools/testing/selftests/bpf/test_maps.c @@ -535,7 +535,7 @@ static void test_devmap_hash(unsigned int task, void *data) static void test_queuemap(unsigned int task, void *data) { const int MAP_SIZE = 32; - __u32 vals[MAP_SIZE + MAP_SIZE/2], val; + __u32 vals[MAP_SIZE + MAP_SIZE/2], val = 0; int fd, i; /* Fill test values to be used */ @@ -591,7 +591,7 @@ static void test_queuemap(unsigned int task, void *data) static void test_stackmap(unsigned int task, void *data) { const int MAP_SIZE = 32; - __u32 vals[MAP_SIZE + MAP_SIZE/2], val; + __u32 vals[MAP_SIZE + MAP_SIZE/2], val = 0; int fd, i; /* Fill test values to be used */ diff --git a/tools/testing/selftests/bpf/test_progs.h b/tools/testing/selftests/bpf/test_progs.h index 870694f2a359..df2222a1806f 100644 --- a/tools/testing/selftests/bpf/test_progs.h +++ b/tools/testing/selftests/bpf/test_progs.h @@ -460,6 +460,34 @@ static inline void *u64_to_ptr(__u64 ptr) return (void *) (unsigned long) ptr; } +static inline __u32 id_from_prog_fd(int fd) +{ + struct bpf_prog_info prog_info = {}; + __u32 prog_info_len = sizeof(prog_info); + int err; + + err = bpf_obj_get_info_by_fd(fd, &prog_info, &prog_info_len); + if (!ASSERT_OK(err, "id_from_prog_fd")) + return 0; + + ASSERT_NEQ(prog_info.id, 0, "prog_info.id"); + return prog_info.id; +} + +static inline __u32 id_from_link_fd(int fd) +{ + struct bpf_link_info link_info = {}; + __u32 link_info_len = sizeof(link_info); + int err; + + err = bpf_link_get_info_by_fd(fd, &link_info, &link_info_len); + if (!ASSERT_OK(err, "id_from_link_fd")) + return 0; + + ASSERT_NEQ(link_info.id, 0, "link_info.id"); + return link_info.id; +} + int bpf_find_map(const char *test, struct bpf_object *obj, const char *name); int compare_map_keys(int map1_fd, int map2_fd); int compare_stack_ips(int smap_fd, int amap_fd, int stack_trace_len); diff --git a/tools/testing/selftests/bpf/unpriv_helpers.c b/tools/testing/selftests/bpf/unpriv_helpers.c index 220f6a963813..f997d7ec8fd0 100644 --- a/tools/testing/selftests/bpf/unpriv_helpers.c +++ b/tools/testing/selftests/bpf/unpriv_helpers.c @@ -1,15 +1,76 @@ // SPDX-License-Identifier: GPL-2.0-only +#include <errno.h> +#include <limits.h> #include <stdbool.h> #include <stdlib.h> #include <stdio.h> #include <string.h> +#include <sys/utsname.h> #include <unistd.h> #include <fcntl.h> +#include <zlib.h> #include "unpriv_helpers.h" -static bool get_mitigations_off(void) +static gzFile open_config(void) +{ + struct utsname uts; + char buf[PATH_MAX]; + gzFile config; + + if (uname(&uts)) { + perror("uname"); + goto config_gz; + } + + snprintf(buf, sizeof(buf), "/boot/config-%s", uts.release); + config = gzopen(buf, "rb"); + if (config) + return config; + fprintf(stderr, "gzopen %s: %s\n", buf, strerror(errno)); + +config_gz: + config = gzopen("/proc/config.gz", "rb"); + if (!config) + perror("gzopen /proc/config.gz"); + return config; +} + +static int config_contains(const char *pat) +{ + const char *msg; + char buf[1024]; + gzFile config; + int n, err; + + config = open_config(); + if (!config) + return -1; + + for (;;) { + if (!gzgets(config, buf, sizeof(buf))) { + msg = gzerror(config, &err); + if (err == Z_ERRNO) + perror("gzgets /proc/config.gz"); + else if (err != Z_OK) + fprintf(stderr, "gzgets /proc/config.gz: %s", msg); + gzclose(config); + return -1; + } + n = strlen(buf); + if (buf[n - 1] == '\n') + buf[n - 1] = 0; + if (strcmp(buf, pat) == 0) { + gzclose(config); + return 1; + } + } + gzclose(config); + return 0; +} + +static bool cmdline_contains(const char *pat) { char cmdline[4096], *c; int fd, ret = false; @@ -27,7 +88,7 @@ static bool get_mitigations_off(void) cmdline[sizeof(cmdline) - 1] = '\0'; for (c = strtok(cmdline, " \n"); c; c = strtok(NULL, " \n")) { - if (strncmp(c, "mitigations=off", strlen(c))) + if (strncmp(c, pat, strlen(c))) continue; ret = true; break; @@ -37,8 +98,21 @@ out: return ret; } +static int get_mitigations_off(void) +{ + int enabled_in_config; + + if (cmdline_contains("mitigations=off")) + return 1; + enabled_in_config = config_contains("CONFIG_CPU_MITIGATIONS=y"); + if (enabled_in_config < 0) + return -1; + return !enabled_in_config; +} + bool get_unpriv_disabled(void) { + int mitigations_off; bool disabled; char buf[2]; FILE *fd; @@ -52,5 +126,19 @@ bool get_unpriv_disabled(void) disabled = true; } - return disabled ? true : get_mitigations_off(); + if (disabled) + return true; + + /* + * Some unpriv tests rely on spectre mitigations being on. + * If mitigations are off or status can't be determined + * assume that unpriv tests are disabled. + */ + mitigations_off = get_mitigations_off(); + if (mitigations_off < 0) { + fprintf(stderr, + "Can't determine if mitigations are enabled, disabling unpriv tests."); + return true; + } + return mitigations_off; } diff --git a/tools/testing/selftests/bpf/verifier/calls.c b/tools/testing/selftests/bpf/verifier/calls.c index 18596ae0b0c1..f3492efc8834 100644 --- a/tools/testing/selftests/bpf/verifier/calls.c +++ b/tools/testing/selftests/bpf/verifier/calls.c @@ -2409,3 +2409,27 @@ .errstr_unpriv = "", .prog_type = BPF_PROG_TYPE_CGROUP_SKB, }, +{ + "calls: several args with ref_obj_id", + .insns = { + /* Reserve at least sizeof(struct iphdr) bytes in the ring buffer. + * With a smaller size, the verifier would reject the call to + * bpf_tcp_raw_gen_syncookie_ipv4 before we can reach the + * ref_obj_id error. + */ + BPF_MOV64_IMM(BPF_REG_2, 20), + BPF_MOV64_IMM(BPF_REG_3, 0), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_ringbuf_reserve), + /* if r0 == 0 goto <exit> */ + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 3), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_tcp_raw_gen_syncookie_ipv4), + BPF_EXIT_INSN(), + }, + .fixup_map_ringbuf = { 2 }, + .result = REJECT, + .errstr = "more than one arg with ref_obj_id", + .prog_type = BPF_PROG_TYPE_SCHED_CLS, +}, diff --git a/tools/testing/selftests/bpf/verifier/dead_code.c b/tools/testing/selftests/bpf/verifier/dead_code.c index ee454327e5c6..77207b498c6f 100644 --- a/tools/testing/selftests/bpf/verifier/dead_code.c +++ b/tools/testing/selftests/bpf/verifier/dead_code.c @@ -2,14 +2,13 @@ "dead code: start", .insns = { BPF_JMP_IMM(BPF_JA, 0, 0, 2), + /* unpriv: nospec (inserted to prevent "R9 !read_ok") */ BPF_LDX_MEM(BPF_B, BPF_REG_8, BPF_REG_9, 0), BPF_JMP_IMM(BPF_JA, 0, 0, 2), BPF_MOV64_IMM(BPF_REG_0, 7), BPF_JMP_IMM(BPF_JGE, BPF_REG_0, 10, -4), BPF_EXIT_INSN(), }, - .errstr_unpriv = "R9 !read_ok", - .result_unpriv = REJECT, .result = ACCEPT, .retval = 7, }, diff --git a/tools/testing/selftests/bpf/verifier/jmp32.c b/tools/testing/selftests/bpf/verifier/jmp32.c index 43776f6f92f4..91d83e9cb148 100644 --- a/tools/testing/selftests/bpf/verifier/jmp32.c +++ b/tools/testing/selftests/bpf/verifier/jmp32.c @@ -84,11 +84,10 @@ BPF_JMP32_IMM(BPF_JSET, BPF_REG_7, 0x10, 1), BPF_EXIT_INSN(), BPF_JMP32_IMM(BPF_JGE, BPF_REG_7, 0x10, 1), + /* unpriv: nospec (inserted to prevent "R9 !read_ok") */ BPF_LDX_MEM(BPF_B, BPF_REG_8, BPF_REG_9, 0), BPF_EXIT_INSN(), }, - .errstr_unpriv = "R9 !read_ok", - .result_unpriv = REJECT, .result = ACCEPT, }, { @@ -149,11 +148,10 @@ BPF_JMP32_IMM(BPF_JEQ, BPF_REG_7, 0x10, 1), BPF_EXIT_INSN(), BPF_JMP32_IMM(BPF_JSGE, BPF_REG_7, 0xf, 1), + /* unpriv: nospec (inserted to prevent "R9 !read_ok") */ BPF_LDX_MEM(BPF_B, BPF_REG_8, BPF_REG_9, 0), BPF_EXIT_INSN(), }, - .errstr_unpriv = "R9 !read_ok", - .result_unpriv = REJECT, .result = ACCEPT, }, { @@ -214,11 +212,10 @@ BPF_JMP32_IMM(BPF_JNE, BPF_REG_7, 0x10, 1), BPF_JMP_IMM(BPF_JNE, BPF_REG_7, 0x10, 1), BPF_EXIT_INSN(), + /* unpriv: nospec (inserted to prevent "R9 !read_ok") */ BPF_LDX_MEM(BPF_B, BPF_REG_8, BPF_REG_9, 0), BPF_EXIT_INSN(), }, - .errstr_unpriv = "R9 !read_ok", - .result_unpriv = REJECT, .result = ACCEPT, }, { @@ -283,11 +280,10 @@ BPF_JMP32_REG(BPF_JGE, BPF_REG_7, BPF_REG_8, 1), BPF_EXIT_INSN(), BPF_JMP32_IMM(BPF_JGE, BPF_REG_7, 0x7ffffff0, 1), + /* unpriv: nospec (inserted to prevent "R0 invalid mem access 'scalar'") */ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0), BPF_EXIT_INSN(), }, - .errstr_unpriv = "R0 invalid mem access 'scalar'", - .result_unpriv = REJECT, .result = ACCEPT, .retval = 2, .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, @@ -354,11 +350,10 @@ BPF_JMP32_REG(BPF_JGT, BPF_REG_7, BPF_REG_8, 1), BPF_EXIT_INSN(), BPF_JMP_IMM(BPF_JGT, BPF_REG_7, 0x7ffffff0, 1), + /* unpriv: nospec (inserted to prevent "R0 invalid mem access 'scalar'") */ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0), BPF_EXIT_INSN(), }, - .errstr_unpriv = "R0 invalid mem access 'scalar'", - .result_unpriv = REJECT, .result = ACCEPT, .retval = 2, .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, @@ -425,11 +420,10 @@ BPF_JMP32_REG(BPF_JLE, BPF_REG_7, BPF_REG_8, 1), BPF_EXIT_INSN(), BPF_JMP32_IMM(BPF_JLE, BPF_REG_7, 0x7ffffff0, 1), + /* unpriv: nospec (inserted to prevent "R0 invalid mem access 'scalar'") */ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0), BPF_EXIT_INSN(), }, - .errstr_unpriv = "R0 invalid mem access 'scalar'", - .result_unpriv = REJECT, .result = ACCEPT, .retval = 2, .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, @@ -496,11 +490,10 @@ BPF_JMP32_REG(BPF_JLT, BPF_REG_7, BPF_REG_8, 1), BPF_EXIT_INSN(), BPF_JMP_IMM(BPF_JSLT, BPF_REG_7, 0x7ffffff0, 1), + /* unpriv: nospec (inserted to prevent "R0 invalid mem access 'scalar'") */ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0), BPF_EXIT_INSN(), }, - .errstr_unpriv = "R0 invalid mem access 'scalar'", - .result_unpriv = REJECT, .result = ACCEPT, .retval = 2, .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, @@ -567,11 +560,10 @@ BPF_JMP32_REG(BPF_JSGE, BPF_REG_7, BPF_REG_8, 1), BPF_EXIT_INSN(), BPF_JMP_IMM(BPF_JSGE, BPF_REG_7, 0x7ffffff0, 1), + /* unpriv: nospec (inserted to prevent "R0 invalid mem access 'scalar'") */ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0), BPF_EXIT_INSN(), }, - .errstr_unpriv = "R0 invalid mem access 'scalar'", - .result_unpriv = REJECT, .result = ACCEPT, .retval = 2, .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, @@ -638,11 +630,10 @@ BPF_JMP32_REG(BPF_JSGT, BPF_REG_7, BPF_REG_8, 1), BPF_EXIT_INSN(), BPF_JMP_IMM(BPF_JSGT, BPF_REG_7, -2, 1), + /* unpriv: nospec (inserted to prevent "R0 invalid mem access 'scalar'") */ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0), BPF_EXIT_INSN(), }, - .errstr_unpriv = "R0 invalid mem access 'scalar'", - .result_unpriv = REJECT, .result = ACCEPT, .retval = 2, .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, @@ -709,11 +700,10 @@ BPF_JMP32_REG(BPF_JSLE, BPF_REG_7, BPF_REG_8, 1), BPF_EXIT_INSN(), BPF_JMP_IMM(BPF_JSLE, BPF_REG_7, 0x7ffffff0, 1), + /* unpriv: nospec (inserted to prevent "R0 invalid mem access 'scalar'") */ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0), BPF_EXIT_INSN(), }, - .errstr_unpriv = "R0 invalid mem access 'scalar'", - .result_unpriv = REJECT, .result = ACCEPT, .retval = 2, .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, @@ -780,11 +770,10 @@ BPF_JMP32_REG(BPF_JSLT, BPF_REG_7, BPF_REG_8, 1), BPF_EXIT_INSN(), BPF_JMP32_IMM(BPF_JSLT, BPF_REG_7, -1, 1), + /* unpriv: nospec (inserted to prevent "R0 invalid mem access 'scalar'") */ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0), BPF_EXIT_INSN(), }, - .errstr_unpriv = "R0 invalid mem access 'scalar'", - .result_unpriv = REJECT, .result = ACCEPT, .retval = 2, .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, diff --git a/tools/testing/selftests/bpf/verifier/jset.c b/tools/testing/selftests/bpf/verifier/jset.c index 11fc68da735e..e901eefd774a 100644 --- a/tools/testing/selftests/bpf/verifier/jset.c +++ b/tools/testing/selftests/bpf/verifier/jset.c @@ -78,12 +78,11 @@ .insns = { BPF_MOV64_IMM(BPF_REG_0, 1), BPF_JMP_IMM(BPF_JSET, BPF_REG_0, 1, 1), + /* unpriv: nospec (inserted to prevent "R9 !read_ok") */ BPF_LDX_MEM(BPF_B, BPF_REG_8, BPF_REG_9, 0), BPF_EXIT_INSN(), }, .prog_type = BPF_PROG_TYPE_SOCKET_FILTER, - .errstr_unpriv = "R9 !read_ok", - .result_unpriv = REJECT, .retval = 1, .result = ACCEPT, }, @@ -136,13 +135,12 @@ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_prandom_u32), BPF_ALU64_IMM(BPF_OR, BPF_REG_0, 2), BPF_JMP_IMM(BPF_JSET, BPF_REG_0, 3, 1), + /* unpriv: nospec (inserted to prevent "R9 !read_ok") */ BPF_LDX_MEM(BPF_B, BPF_REG_8, BPF_REG_9, 0), BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }, .prog_type = BPF_PROG_TYPE_SOCKET_FILTER, - .errstr_unpriv = "R9 !read_ok", - .result_unpriv = REJECT, .result = ACCEPT, }, { @@ -154,16 +152,16 @@ BPF_ALU64_IMM(BPF_AND, BPF_REG_1, 0xff), BPF_JMP_IMM(BPF_JSET, BPF_REG_1, 0xf0, 3), BPF_JMP_IMM(BPF_JLT, BPF_REG_1, 0x10, 1), + /* unpriv: nospec (inserted to prevent "R9 !read_ok") */ BPF_LDX_MEM(BPF_B, BPF_REG_8, BPF_REG_9, 0), BPF_EXIT_INSN(), BPF_JMP_IMM(BPF_JSET, BPF_REG_1, 0x10, 1), BPF_EXIT_INSN(), BPF_JMP_IMM(BPF_JGE, BPF_REG_1, 0x10, 1), + /* unpriv: nospec (inserted to prevent "R9 !read_ok") */ BPF_LDX_MEM(BPF_B, BPF_REG_8, BPF_REG_9, 0), BPF_EXIT_INSN(), }, .prog_type = BPF_PROG_TYPE_SOCKET_FILTER, - .errstr_unpriv = "R9 !read_ok", - .result_unpriv = REJECT, .result = ACCEPT, }, diff --git a/tools/testing/selftests/bpf/veristat.c b/tools/testing/selftests/bpf/veristat.c index b2bb20b00952..d532dd82a3a8 100644 --- a/tools/testing/selftests/bpf/veristat.c +++ b/tools/testing/selftests/bpf/veristat.c @@ -23,6 +23,7 @@ #include <float.h> #include <math.h> #include <limits.h> +#include <assert.h> #ifndef ARRAY_SIZE #define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0])) @@ -49,6 +50,7 @@ enum stat_id { STACK, PROG_TYPE, ATTACH_TYPE, + MEMORY_PEAK, FILE_NAME, PROG_NAME, @@ -155,13 +157,27 @@ struct filter { bool abs; }; -struct var_preset { - char *name; +struct rvalue { enum { INTEGRAL, ENUMERATOR } type; union { long long ivalue; char *svalue; }; +}; + +struct field_access { + enum { FIELD_NAME, ARRAY_INDEX } type; + union { + char *name; + struct rvalue index; + }; +}; + +struct var_preset { + struct field_access *atoms; + int atom_count; + char *full_name; + struct rvalue value; bool applied; }; @@ -208,6 +224,9 @@ static struct env { int top_src_lines; struct var_preset *presets; int npresets; + char orig_cgroup[PATH_MAX]; + char stat_cgroup[PATH_MAX]; + int memory_peak_fd; } env; static int libbpf_print_fn(enum libbpf_print_level level, const char *format, va_list args) @@ -219,6 +238,22 @@ static int libbpf_print_fn(enum libbpf_print_level level, const char *format, va return vfprintf(stderr, format, args); } +#define log_errno(fmt, ...) log_errno_aux(__FILE__, __LINE__, fmt, ##__VA_ARGS__) + +__attribute__((format(printf, 3, 4))) +static int log_errno_aux(const char *file, int line, const char *fmt, ...) +{ + int err = -errno; + va_list ap; + + va_start(ap, fmt); + fprintf(stderr, "%s:%d: ", file, line); + vfprintf(stderr, fmt, ap); + fprintf(stderr, " failed with error '%s'.\n", strerror(errno)); + va_end(ap); + return err; +} + #ifndef VERISTAT_VERSION #define VERISTAT_VERSION "<kernel>" #endif @@ -344,6 +379,7 @@ static error_t parse_arg(int key, char *arg, struct argp_state *state) fprintf(stderr, "invalid top N specifier: %s\n", arg); argp_usage(state); } + break; case 'C': env.comparison_mode = true; break; @@ -734,13 +770,13 @@ cleanup: } static const struct stat_specs default_csv_output_spec = { - .spec_cnt = 14, + .spec_cnt = 15, .ids = { FILE_NAME, PROG_NAME, VERDICT, DURATION, TOTAL_INSNS, TOTAL_STATES, PEAK_STATES, MAX_STATES_PER_INSN, MARK_READ_MAX_LEN, SIZE, JITED_SIZE, PROG_TYPE, ATTACH_TYPE, - STACK, + STACK, MEMORY_PEAK, }, }; @@ -781,6 +817,7 @@ static struct stat_def { [STACK] = {"Stack depth", {"stack_depth", "stack"}, }, [PROG_TYPE] = { "Program type", {"prog_type"}, }, [ATTACH_TYPE] = { "Attach type", {"attach_type", }, }, + [MEMORY_PEAK] = { "Peak memory (MiB)", {"mem_peak", }, }, }; static bool parse_stat_id_var(const char *name, size_t len, int *id, @@ -854,6 +891,18 @@ static bool is_desc_sym(char c) return c == 'v' || c == 'V' || c == '.' || c == '!' || c == '_'; } +static char *rtrim(char *str) +{ + int i; + + for (i = strlen(str) - 1; i > 0; --i) { + if (!isspace(str[i])) + break; + str[i] = '\0'; + } + return str; +} + static int parse_stat(const char *stat_name, struct stat_specs *specs) { int id; @@ -1182,6 +1231,7 @@ static void fixup_obj(struct bpf_object *obj, struct bpf_program *prog, const ch case BPF_MAP_TYPE_TASK_STORAGE: case BPF_MAP_TYPE_INODE_STORAGE: case BPF_MAP_TYPE_CGROUP_STORAGE: + case BPF_MAP_TYPE_CGRP_STORAGE: break; case BPF_MAP_TYPE_STRUCT_OPS: mask_unrelated_struct_ops_progs(obj, map, prog); @@ -1278,16 +1328,243 @@ static int max_verifier_log_size(void) return log_size; } +static bool output_stat_enabled(int id) +{ + int i; + + for (i = 0; i < env.output_spec.spec_cnt; i++) + if (env.output_spec.ids[i] == id) + return true; + return false; +} + +__attribute__((format(printf, 2, 3))) +static int write_one_line(const char *file, const char *fmt, ...) +{ + int err, saved_errno; + va_list ap; + FILE *f; + + f = fopen(file, "w"); + if (!f) + return -1; + + va_start(ap, fmt); + errno = 0; + err = vfprintf(f, fmt, ap); + saved_errno = errno; + va_end(ap); + fclose(f); + errno = saved_errno; + return err < 0 ? -1 : 0; +} + +__attribute__((format(scanf, 3, 4))) +static int scanf_one_line(const char *file, int fields_expected, const char *fmt, ...) +{ + int res = 0, saved_errno = 0; + char *line = NULL; + size_t line_len; + va_list ap; + FILE *f; + + f = fopen(file, "r"); + if (!f) + return -1; + + va_start(ap, fmt); + while (getline(&line, &line_len, f) > 0) { + res = vsscanf(line, fmt, ap); + if (res == fields_expected) + goto out; + } + if (ferror(f)) { + saved_errno = errno; + res = -1; + } + +out: + va_end(ap); + free(line); + fclose(f); + errno = saved_errno; + return res; +} + +static void destroy_stat_cgroup(void) +{ + char buf[PATH_MAX]; + int err; + + close(env.memory_peak_fd); + + if (env.orig_cgroup[0]) { + snprintf(buf, sizeof(buf), "%s/cgroup.procs", env.orig_cgroup); + err = write_one_line(buf, "%d\n", getpid()); + if (err < 0) + log_errno("moving self to original cgroup %s\n", env.orig_cgroup); + } + + if (env.stat_cgroup[0]) { + err = rmdir(env.stat_cgroup); + if (err < 0) + log_errno("deletion of cgroup %s", env.stat_cgroup); + } + + env.memory_peak_fd = -1; + env.orig_cgroup[0] = 0; + env.stat_cgroup[0] = 0; +} + +/* + * Creates a cgroup at /sys/fs/cgroup/veristat-accounting-<pid>, + * moves current process to this cgroup. + */ +static void create_stat_cgroup(void) +{ + char cgroup_fs_mount[4096]; + char buf[4096]; + int err; + + env.memory_peak_fd = -1; + + if (!output_stat_enabled(MEMORY_PEAK)) + return; + + err = scanf_one_line("/proc/self/mounts", 2, "%*s %4095s cgroup2 %s", + cgroup_fs_mount, buf); + if (err != 2) { + if (err < 0) + log_errno("reading /proc/self/mounts"); + else if (!env.quiet) + fprintf(stderr, "Can't find cgroupfs v2 mount point.\n"); + goto err_out; + } + + /* cgroup-v2.rst promises the line "0::<group>" for cgroups v2 */ + err = scanf_one_line("/proc/self/cgroup", 1, "0::%4095s", buf); + if (err != 1) { + if (err < 0) + log_errno("reading /proc/self/cgroup"); + else if (!env.quiet) + fprintf(stderr, "Can't infer veristat process cgroup."); + goto err_out; + } + + snprintf(env.orig_cgroup, sizeof(env.orig_cgroup), "%s/%s", cgroup_fs_mount, buf); + + snprintf(buf, sizeof(buf), "%s/veristat-accounting-%d", cgroup_fs_mount, getpid()); + err = mkdir(buf, 0777); + if (err < 0) { + log_errno("creation of cgroup %s", buf); + goto err_out; + } + strcpy(env.stat_cgroup, buf); + + snprintf(buf, sizeof(buf), "%s/cgroup.procs", env.stat_cgroup); + err = write_one_line(buf, "%d\n", getpid()); + if (err < 0) { + log_errno("entering cgroup %s", buf); + goto err_out; + } + + snprintf(buf, sizeof(buf), "%s/memory.peak", env.stat_cgroup); + env.memory_peak_fd = open(buf, O_RDWR | O_APPEND); + if (env.memory_peak_fd < 0) { + log_errno("opening %s", buf); + goto err_out; + } + + return; + +err_out: + if (!env.quiet) + fprintf(stderr, "Memory usage metric unavailable.\n"); + destroy_stat_cgroup(); +} + +/* Current value of /sys/fs/cgroup/veristat-accounting-<pid>/memory.peak */ +static long cgroup_memory_peak(void) +{ + long err, memory_peak; + char buf[32]; + + if (env.memory_peak_fd < 0) + return -1; + + err = pread(env.memory_peak_fd, buf, sizeof(buf) - 1, 0); + if (err <= 0) { + log_errno("pread(%s/memory.peak)", env.stat_cgroup); + return -1; + } + + buf[err] = 0; + errno = 0; + memory_peak = strtoll(buf, NULL, 10); + if (errno) { + log_errno("%s/memory.peak:strtoll(%s)", env.stat_cgroup, buf); + return -1; + } + + return memory_peak; +} + +static int reset_stat_cgroup(void) +{ + char buf[] = "r\n"; + int err; + + if (env.memory_peak_fd < 0) + return -1; + + err = pwrite(env.memory_peak_fd, buf, sizeof(buf), 0); + if (err <= 0) { + log_errno("pwrite(%s/memory.peak)", env.stat_cgroup); + return -1; + } + return 0; +} + +static int parse_rvalue(const char *val, struct rvalue *rvalue) +{ + long long value; + char *val_end; + + if (val[0] == '-' || isdigit(val[0])) { + /* must be a number */ + errno = 0; + value = strtoll(val, &val_end, 0); + if (errno == ERANGE) { + errno = 0; + value = strtoull(val, &val_end, 0); + } + if (errno || *val_end != '\0') { + fprintf(stderr, "Failed to parse value '%s'\n", val); + return -EINVAL; + } + rvalue->ivalue = value; + rvalue->type = INTEGRAL; + } else { + /* if not a number, consider it enum value */ + rvalue->svalue = strdup(val); + if (!rvalue->svalue) + return -ENOMEM; + rvalue->type = ENUMERATOR; + } + return 0; +} + static int process_prog(const char *filename, struct bpf_object *obj, struct bpf_program *prog) { const char *base_filename = basename(strdupa(filename)); const char *prog_name = bpf_program__name(prog); + long mem_peak_a, mem_peak_b, mem_peak = -1; char *buf; int buf_sz, log_level; struct verif_stats *stats; struct bpf_prog_info info; __u32 info_len = sizeof(info); - int err = 0; + int err = 0, cgroup_err; void *tmp; int fd; @@ -1332,7 +1609,15 @@ static int process_prog(const char *filename, struct bpf_object *obj, struct bpf if (env.force_reg_invariants) bpf_program__set_flags(prog, bpf_program__flags(prog) | BPF_F_TEST_REG_INVARIANTS); - err = bpf_object__load(obj); + err = bpf_object__prepare(obj); + if (!err) { + cgroup_err = reset_stat_cgroup(); + mem_peak_a = cgroup_memory_peak(); + err = bpf_object__load(obj); + mem_peak_b = cgroup_memory_peak(); + if (!cgroup_err && mem_peak_a >= 0 && mem_peak_b >= 0) + mem_peak = mem_peak_b - mem_peak_a; + } env.progs_processed++; stats->file_name = strdup(base_filename); @@ -1341,6 +1626,7 @@ static int process_prog(const char *filename, struct bpf_object *obj, struct bpf stats->stats[SIZE] = bpf_program__insn_cnt(prog); stats->stats[PROG_TYPE] = bpf_program__type(prog); stats->stats[ATTACH_TYPE] = bpf_program__expected_attach_type(prog); + stats->stats[MEMORY_PEAK] = mem_peak < 0 ? -1 : mem_peak / (1024 * 1024); memset(&info, 0, info_len); fd = bpf_program__fd(prog); @@ -1361,15 +1647,74 @@ static int process_prog(const char *filename, struct bpf_object *obj, struct bpf free(buf); return 0; -}; +} + +static int append_preset_atom(struct var_preset *preset, char *value, bool is_index) +{ + struct field_access *tmp; + int i = preset->atom_count; + int err; + + tmp = reallocarray(preset->atoms, i + 1, sizeof(*preset->atoms)); + if (!tmp) + return -ENOMEM; + + preset->atoms = tmp; + preset->atom_count++; + + if (is_index) { + preset->atoms[i].type = ARRAY_INDEX; + err = parse_rvalue(value, &preset->atoms[i].index); + if (err) + return err; + } else { + preset->atoms[i].type = FIELD_NAME; + preset->atoms[i].name = strdup(value); + if (!preset->atoms[i].name) + return -ENOMEM; + } + return 0; +} + +static int parse_var_atoms(const char *full_var, struct var_preset *preset) +{ + char expr[256], var[256], *name, *saveptr; + int n, len, off, err; + + snprintf(expr, sizeof(expr), "%s", full_var); + preset->atom_count = 0; + while ((name = strtok_r(preset->atom_count ? NULL : expr, ".", &saveptr))) { + len = strlen(name); + /* parse variable name */ + if (sscanf(name, "%[a-zA-Z0-9_] %n", var, &off) != 1) { + fprintf(stderr, "Can't parse %s\n", name); + return -EINVAL; + } + err = append_preset_atom(preset, var, false); + if (err) + return err; + + /* parse optional array indexes */ + while (off < len) { + if (sscanf(name + off, " [ %[a-zA-Z0-9_] ] %n", var, &n) != 1) { + fprintf(stderr, "Can't parse %s as index\n", name + off); + return -EINVAL; + } + err = append_preset_atom(preset, var, true); + if (err) + return err; + off += n; + } + } + return 0; +} static int append_var_preset(struct var_preset **presets, int *cnt, const char *expr) { void *tmp; struct var_preset *cur; - char var[256], val[256], *val_end; - long long value; - int n; + char var[256], val[256]; + int n, err; tmp = realloc(*presets, (*cnt + 1) * sizeof(**presets)); if (!tmp) @@ -1379,37 +1724,25 @@ static int append_var_preset(struct var_preset **presets, int *cnt, const char * memset(cur, 0, sizeof(*cur)); (*cnt)++; - if (sscanf(expr, "%s = %s %n", var, val, &n) != 2 || n != strlen(expr)) { + if (sscanf(expr, " %[][a-zA-Z0-9_. ] = %s %n", var, val, &n) != 2 || n != strlen(expr)) { fprintf(stderr, "Failed to parse expression '%s'\n", expr); return -EINVAL; } + /* Remove trailing spaces from var, as scanf may add those */ + rtrim(var); - if (val[0] == '-' || isdigit(val[0])) { - /* must be a number */ - errno = 0; - value = strtoll(val, &val_end, 0); - if (errno == ERANGE) { - errno = 0; - value = strtoull(val, &val_end, 0); - } - if (errno || *val_end != '\0') { - fprintf(stderr, "Failed to parse value '%s'\n", val); - return -EINVAL; - } - cur->ivalue = value; - cur->type = INTEGRAL; - } else { - /* if not a number, consider it enum value */ - cur->svalue = strdup(val); - if (!cur->svalue) - return -ENOMEM; - cur->type = ENUMERATOR; - } + err = parse_rvalue(val, &cur->value); + if (err) + return err; - cur->name = strdup(var); - if (!cur->name) + cur->full_name = strdup(var); + if (!cur->full_name) return -ENOMEM; + err = parse_var_atoms(var, cur); + if (err) + return err; + return 0; } @@ -1486,22 +1819,96 @@ static bool is_preset_supported(const struct btf_type *t) return btf_is_int(t) || btf_is_enum(t) || btf_is_enum64(t); } -const int btf_find_member(const struct btf *btf, - const struct btf_type *parent_type, - __u32 parent_offset, - const char *member_name, - int *member_tid, - __u32 *member_offset) +static int find_enum_value(const struct btf *btf, const char *name, long long *value) +{ + const struct btf_type *t; + int cnt, i; + long long lvalue; + + cnt = btf__type_cnt(btf); + for (i = 1; i != cnt; ++i) { + t = btf__type_by_id(btf, i); + + if (!btf_is_any_enum(t)) + continue; + + if (enum_value_from_name(btf, t, name, &lvalue) == 0) { + *value = lvalue; + return 0; + } + } + return -ESRCH; +} + +static int resolve_rvalue(struct btf *btf, const struct rvalue *rvalue, long long *result) +{ + int err = 0; + + switch (rvalue->type) { + case INTEGRAL: + *result = rvalue->ivalue; + return 0; + case ENUMERATOR: + err = find_enum_value(btf, rvalue->svalue, result); + if (err) { + fprintf(stderr, "Can't resolve enum value %s\n", rvalue->svalue); + return err; + } + return 0; + default: + fprintf(stderr, "Unknown rvalue type\n"); + return -EOPNOTSUPP; + } + return 0; +} + +static int adjust_var_secinfo_array(struct btf *btf, int tid, struct field_access *atom, + const char *array_name, struct btf_var_secinfo *sinfo) +{ + const struct btf_type *t; + struct btf_array *barr; + long long idx; + int err; + + tid = btf__resolve_type(btf, tid); + t = btf__type_by_id(btf, tid); + if (!btf_is_array(t)) { + fprintf(stderr, "Array index is not expected for %s\n", + array_name); + return -EINVAL; + } + barr = btf_array(t); + err = resolve_rvalue(btf, &atom->index, &idx); + if (err) + return err; + if (idx < 0 || idx >= barr->nelems) { + fprintf(stderr, "Array index %lld is out of bounds [0, %u): %s\n", + idx, barr->nelems, array_name); + return -EINVAL; + } + sinfo->size = btf__resolve_size(btf, barr->type); + sinfo->offset += sinfo->size * idx; + sinfo->type = btf__resolve_type(btf, barr->type); + return 0; +} + +static int adjust_var_secinfo_member(const struct btf *btf, + const struct btf_type *parent_type, + __u32 parent_offset, + const char *member_name, + struct btf_var_secinfo *sinfo) { int i; - if (!btf_is_composite(parent_type)) + if (!btf_is_composite(parent_type)) { + fprintf(stderr, "Can't resolve field %s for non-composite type\n", member_name); return -EINVAL; + } for (i = 0; i < btf_vlen(parent_type); ++i) { const struct btf_member *member; const struct btf_type *member_type; - int tid; + int tid, off; member = btf_members(parent_type) + i; tid = btf__resolve_type(btf, member->type); @@ -1509,6 +1916,7 @@ const int btf_find_member(const struct btf *btf, return -EINVAL; member_type = btf__type_by_id(btf, tid); + off = parent_offset + member->offset; if (member->name_off) { const char *name = btf__name_by_offset(btf, member->name_off); @@ -1518,48 +1926,62 @@ const int btf_find_member(const struct btf *btf, name); return -EINVAL; } - *member_offset = parent_offset + member->offset; - *member_tid = tid; + sinfo->offset += off / 8; + sinfo->type = tid; + sinfo->size = member_type->size; return 0; } } else if (btf_is_composite(member_type)) { int err; - err = btf_find_member(btf, member_type, parent_offset + member->offset, - member_name, member_tid, member_offset); + err = adjust_var_secinfo_member(btf, member_type, off, + member_name, sinfo); if (!err) return 0; } } - return -EINVAL; + return -ESRCH; } static int adjust_var_secinfo(struct btf *btf, const struct btf_type *t, - struct btf_var_secinfo *sinfo, const char *var) + struct btf_var_secinfo *sinfo, struct var_preset *preset) { - char expr[256], *saveptr; - const struct btf_type *base_type, *member_type; - int err, member_tid; - char *name; - __u32 member_offset = 0; + const struct btf_type *base_type; + const char *prev_name; + int err, i; + int tid; - base_type = btf__type_by_id(btf, btf__resolve_type(btf, t->type)); - snprintf(expr, sizeof(expr), "%s", var); - strtok_r(expr, ".", &saveptr); + assert(preset->atom_count > 0); + assert(preset->atoms[0].type == FIELD_NAME); - while ((name = strtok_r(NULL, ".", &saveptr))) { - err = btf_find_member(btf, base_type, 0, name, &member_tid, &member_offset); - if (err) { - fprintf(stderr, "Could not find member %s for variable %s\n", name, var); - return err; + tid = btf__resolve_type(btf, t->type); + base_type = btf__type_by_id(btf, tid); + prev_name = preset->atoms[0].name; + + for (i = 1; i < preset->atom_count; ++i) { + struct field_access *atom = preset->atoms + i; + + switch (atom->type) { + case ARRAY_INDEX: + err = adjust_var_secinfo_array(btf, tid, atom, prev_name, sinfo); + break; + case FIELD_NAME: + err = adjust_var_secinfo_member(btf, base_type, 0, atom->name, sinfo); + if (err == -ESRCH) + fprintf(stderr, "Can't find '%s'\n", atom->name); + prev_name = atom->name; + break; + default: + fprintf(stderr, "Unknown field_access type\n"); + return -EOPNOTSUPP; } - member_type = btf__type_by_id(btf, member_tid); - sinfo->offset += member_offset / 8; - sinfo->size = member_type->size; - sinfo->type = member_tid; - base_type = member_type; + if (err) + return err; + base_type = btf__type_by_id(btf, sinfo->type); + tid = sinfo->type; } + return 0; } @@ -1569,7 +1991,7 @@ static int set_global_var(struct bpf_object *obj, struct btf *btf, { const struct btf_type *base_type; void *ptr; - long long value = preset->ivalue; + long long value = preset->value.ivalue; size_t size; base_type = btf__type_by_id(btf, btf__resolve_type(btf, sinfo->type)); @@ -1578,22 +2000,23 @@ static int set_global_var(struct bpf_object *obj, struct btf *btf, return -EINVAL; } if (!is_preset_supported(base_type)) { - fprintf(stderr, "Setting value for type %s is not supported\n", - btf__name_by_offset(btf, base_type->name_off)); + fprintf(stderr, "Can't set %s. Only ints and enums are supported\n", + preset->full_name); return -EINVAL; } - if (preset->type == ENUMERATOR) { + if (preset->value.type == ENUMERATOR) { if (btf_is_any_enum(base_type)) { - if (enum_value_from_name(btf, base_type, preset->svalue, &value)) { + if (enum_value_from_name(btf, base_type, preset->value.svalue, &value)) { fprintf(stderr, "Failed to find integer value for enum element %s\n", - preset->svalue); + preset->value.svalue); return -EINVAL; } } else { fprintf(stderr, "Value %s is not supported for type %s\n", - preset->svalue, btf__name_by_offset(btf, base_type->name_off)); + preset->value.svalue, + btf__name_by_offset(btf, base_type->name_off)); return -EINVAL; } } @@ -1660,20 +2083,16 @@ static int set_global_vars(struct bpf_object *obj, struct var_preset *presets, i for (j = 0; j < n; ++j, ++sinfo) { const struct btf_type *var_type = btf__type_by_id(btf, sinfo->type); const char *var_name; - int var_len; if (!btf_is_var(var_type)) continue; var_name = btf__name_by_offset(btf, var_type->name_off); - var_len = strlen(var_name); for (k = 0; k < npresets; ++k) { struct btf_var_secinfo tmp_sinfo; - if (strncmp(var_name, presets[k].name, var_len) != 0 || - (presets[k].name[var_len] != '\0' && - presets[k].name[var_len] != '.')) + if (strcmp(var_name, presets[k].atoms[0].name) != 0) continue; if (presets[k].applied) { @@ -1683,7 +2102,7 @@ static int set_global_vars(struct bpf_object *obj, struct var_preset *presets, i } tmp_sinfo = *sinfo; err = adjust_var_secinfo(btf, var_type, - &tmp_sinfo, presets[k].name); + &tmp_sinfo, presets + k); if (err) return err; @@ -1698,7 +2117,8 @@ static int set_global_vars(struct bpf_object *obj, struct var_preset *presets, i for (i = 0; i < npresets; ++i) { if (!presets[i].applied) { fprintf(stderr, "Global variable preset %s has not been applied\n", - presets[i].name); + presets[i].full_name); + err = -EINVAL; } presets[i].applied = false; } @@ -1824,6 +2244,7 @@ static int cmp_stat(const struct verif_stats *s1, const struct verif_stats *s2, case TOTAL_STATES: case PEAK_STATES: case MAX_STATES_PER_INSN: + case MEMORY_PEAK: case MARK_READ_MAX_LEN: { long v1 = s1->stats[id]; long v2 = s2->stats[id]; @@ -2053,6 +2474,7 @@ static void prepare_value(const struct verif_stats *s, enum stat_id id, case STACK: case SIZE: case JITED_SIZE: + case MEMORY_PEAK: *val = s ? s->stats[id] : 0; break; default: @@ -2139,6 +2561,7 @@ static int parse_stat_value(const char *str, enum stat_id id, struct verif_stats case MARK_READ_MAX_LEN: case SIZE: case JITED_SIZE: + case MEMORY_PEAK: case STACK: { long val; int err, n; @@ -2776,7 +3199,7 @@ static void output_prog_stats(void) static int handle_verif_mode(void) { - int i, err; + int i, err = 0; if (env.filename_cnt == 0) { fprintf(stderr, "Please provide path to BPF object file!\n\n"); @@ -2784,11 +3207,12 @@ static int handle_verif_mode(void) return -EINVAL; } + create_stat_cgroup(); for (i = 0; i < env.filename_cnt; i++) { err = process_obj(env.filenames[i]); if (err) { fprintf(stderr, "Failed to process '%s': %d\n", env.filenames[i], err); - return err; + goto out; } } @@ -2796,7 +3220,9 @@ static int handle_verif_mode(void) output_prog_stats(); - return 0; +out: + destroy_stat_cgroup(); + return err; } static int handle_replay_mode(void) @@ -2826,7 +3252,7 @@ static int handle_replay_mode(void) int main(int argc, char **argv) { - int err = 0, i; + int err = 0, i, j; if (argp_parse(&argp, argc, argv, 0, NULL, NULL)) return 1; @@ -2885,9 +3311,19 @@ int main(int argc, char **argv) } free(env.deny_filters); for (i = 0; i < env.npresets; ++i) { - free(env.presets[i].name); - if (env.presets[i].type == ENUMERATOR) - free(env.presets[i].svalue); + free(env.presets[i].full_name); + for (j = 0; j < env.presets[i].atom_count; ++j) { + switch (env.presets[i].atoms[j].type) { + case FIELD_NAME: + free(env.presets[i].atoms[j].name); + break; + case ARRAY_INDEX: + if (env.presets[i].atoms[j].index.type == ENUMERATOR) + free(env.presets[i].atoms[j].index.svalue); + break; + } + } + free(env.presets[i].atoms); } free(env.presets); return -err; diff --git a/tools/testing/selftests/bpf/vmtest.sh b/tools/testing/selftests/bpf/vmtest.sh index 79505d294c44..2f869daf8a06 100755 --- a/tools/testing/selftests/bpf/vmtest.sh +++ b/tools/testing/selftests/bpf/vmtest.sh @@ -43,6 +43,15 @@ riscv64) BZIMAGE="arch/riscv/boot/Image" ARCH="riscv" ;; +ppc64el) + QEMU_BINARY=qemu-system-ppc64 + QEMU_CONSOLE="hvc0" + # KVM could not be tested for powerpc, therefore not enabled for now. + HOST_FLAGS=(-machine pseries -cpu POWER9) + CROSS_FLAGS=(-machine pseries -cpu POWER9) + BZIMAGE="vmlinux" + ARCH="powerpc" + ;; *) echo "Unsupported architecture" exit 1 diff --git a/tools/testing/selftests/bpf/xskxceiver.c b/tools/testing/selftests/bpf/xskxceiver.c index 0ced4026ee44..a29de0713f19 100644 --- a/tools/testing/selftests/bpf/xskxceiver.c +++ b/tools/testing/selftests/bpf/xskxceiver.c @@ -109,6 +109,8 @@ #include <network_helpers.h> +#define MAX_TX_BUDGET_DEFAULT 32 + static bool opt_verbose; static bool opt_print_tests; static enum test_mode opt_mode = TEST_MODE_ALL; @@ -1091,11 +1093,45 @@ static bool is_pkt_valid(struct pkt *pkt, void *buffer, u64 addr, u32 len) return true; } +static u32 load_value(u32 *counter) +{ + return __atomic_load_n(counter, __ATOMIC_ACQUIRE); +} + +static bool kick_tx_with_check(struct xsk_socket_info *xsk, int *ret) +{ + u32 max_budget = MAX_TX_BUDGET_DEFAULT; + u32 cons, ready_to_send; + int delta; + + cons = load_value(xsk->tx.consumer); + ready_to_send = load_value(xsk->tx.producer) - cons; + *ret = sendto(xsk_socket__fd(xsk->xsk), NULL, 0, MSG_DONTWAIT, NULL, 0); + + delta = load_value(xsk->tx.consumer) - cons; + /* By default, xsk should consume exact @max_budget descs at one + * send in this case where hitting the max budget limit in while + * loop is triggered in __xsk_generic_xmit(). Please make sure that + * the number of descs to be sent is larger than @max_budget, or + * else the tx.consumer will be updated in xskq_cons_peek_desc() + * in time which hides the issue we try to verify. + */ + if (ready_to_send > max_budget && delta != max_budget) + return false; + + return true; +} + static int kick_tx(struct xsk_socket_info *xsk) { int ret; - ret = sendto(xsk_socket__fd(xsk->xsk), NULL, 0, MSG_DONTWAIT, NULL, 0); + if (xsk->check_consumer) { + if (!kick_tx_with_check(xsk, &ret)) + return TEST_FAILURE; + } else { + ret = sendto(xsk_socket__fd(xsk->xsk), NULL, 0, MSG_DONTWAIT, NULL, 0); + } if (ret >= 0) return TEST_PASS; if (errno == ENOBUFS || errno == EAGAIN || errno == EBUSY || errno == ENETDOWN) { @@ -2613,6 +2649,23 @@ static int testapp_adjust_tail_grow_mb(struct test_spec *test) XSK_UMEM__LARGE_FRAME_SIZE * 2); } +static int testapp_tx_queue_consumer(struct test_spec *test) +{ + int nr_packets; + + if (test->mode == TEST_MODE_ZC) { + ksft_test_result_skip("Can not run TX_QUEUE_CONSUMER test for ZC mode\n"); + return TEST_SKIP; + } + + nr_packets = MAX_TX_BUDGET_DEFAULT + 1; + pkt_stream_replace(test, nr_packets, MIN_PKT_SIZE); + test->ifobj_tx->xsk->batch_size = nr_packets; + test->ifobj_tx->xsk->check_consumer = true; + + return testapp_validate_traffic(test); +} + static void run_pkt_test(struct test_spec *test) { int ret; @@ -2723,6 +2776,7 @@ static const struct test_spec tests[] = { {.name = "XDP_ADJUST_TAIL_SHRINK_MULTI_BUFF", .test_func = testapp_adjust_tail_shrink_mb}, {.name = "XDP_ADJUST_TAIL_GROW", .test_func = testapp_adjust_tail_grow}, {.name = "XDP_ADJUST_TAIL_GROW_MULTI_BUFF", .test_func = testapp_adjust_tail_grow_mb}, + {.name = "TX_QUEUE_CONSUMER", .test_func = testapp_tx_queue_consumer}, }; static void print_tests(void) diff --git a/tools/testing/selftests/bpf/xskxceiver.h b/tools/testing/selftests/bpf/xskxceiver.h index 67fc44b2813b..4df3a5d329ac 100644 --- a/tools/testing/selftests/bpf/xskxceiver.h +++ b/tools/testing/selftests/bpf/xskxceiver.h @@ -95,6 +95,7 @@ struct xsk_socket_info { u32 batch_size; u8 dst_mac[ETH_ALEN]; u8 src_mac[ETH_ALEN]; + bool check_consumer; }; struct pkt { diff --git a/tools/testing/selftests/breakpoints/step_after_suspend_test.c b/tools/testing/selftests/breakpoints/step_after_suspend_test.c index 8d275f03e977..8d233ac95696 100644 --- a/tools/testing/selftests/breakpoints/step_after_suspend_test.c +++ b/tools/testing/selftests/breakpoints/step_after_suspend_test.c @@ -127,22 +127,42 @@ int run_test(int cpu) return KSFT_PASS; } +/* + * Reads the suspend success count from sysfs. + * Returns the count on success or exits on failure. + */ +static int get_suspend_success_count_or_fail(void) +{ + FILE *fp; + int val; + + fp = fopen("/sys/power/suspend_stats/success", "r"); + if (!fp) + ksft_exit_fail_msg( + "Failed to open suspend_stats/success: %s\n", + strerror(errno)); + + if (fscanf(fp, "%d", &val) != 1) { + fclose(fp); + ksft_exit_fail_msg( + "Failed to read suspend success count\n"); + } + + fclose(fp); + return val; +} + void suspend(void) { - int power_state_fd; int timerfd; int err; + int count_before; + int count_after; struct itimerspec spec = {}; if (getuid() != 0) ksft_exit_skip("Please run the test as root - Exiting.\n"); - power_state_fd = open("/sys/power/state", O_RDWR); - if (power_state_fd < 0) - ksft_exit_fail_msg( - "open(\"/sys/power/state\") failed %s)\n", - strerror(errno)); - timerfd = timerfd_create(CLOCK_BOOTTIME_ALARM, 0); if (timerfd < 0) ksft_exit_fail_msg("timerfd_create() failed\n"); @@ -152,14 +172,15 @@ void suspend(void) if (err < 0) ksft_exit_fail_msg("timerfd_settime() failed\n"); + count_before = get_suspend_success_count_or_fail(); + system("(echo mem > /sys/power/state) 2> /dev/null"); - timerfd_gettime(timerfd, &spec); - if (spec.it_value.tv_sec != 0 || spec.it_value.tv_nsec != 0) + count_after = get_suspend_success_count_or_fail(); + if (count_after <= count_before) ksft_exit_fail_msg("Failed to enter Suspend state\n"); close(timerfd); - close(power_state_fd); } int main(int argc, char **argv) diff --git a/tools/testing/selftests/coredump/Makefile b/tools/testing/selftests/coredump/Makefile index ed210037b29d..77b3665c73c7 100644 --- a/tools/testing/selftests/coredump/Makefile +++ b/tools/testing/selftests/coredump/Makefile @@ -1,5 +1,5 @@ # SPDX-License-Identifier: GPL-2.0-only -CFLAGS = $(KHDR_INCLUDES) +CFLAGS += -Wall -O0 -g $(KHDR_INCLUDES) $(TOOLS_INCLUDES) TEST_GEN_PROGS := stackdump_test TEST_FILES := stackdump diff --git a/tools/testing/selftests/coredump/config b/tools/testing/selftests/coredump/config new file mode 100644 index 000000000000..a05ef112b4f9 --- /dev/null +++ b/tools/testing/selftests/coredump/config @@ -0,0 +1,3 @@ +CONFIG_COREDUMP=y +CONFIG_NET=y +CONFIG_UNIX=y diff --git a/tools/testing/selftests/coredump/stackdump_test.c b/tools/testing/selftests/coredump/stackdump_test.c index 9984413be9f0..5a5a7a5f7e1d 100644 --- a/tools/testing/selftests/coredump/stackdump_test.c +++ b/tools/testing/selftests/coredump/stackdump_test.c @@ -1,12 +1,18 @@ // SPDX-License-Identifier: GPL-2.0 +#include <assert.h> #include <fcntl.h> #include <inttypes.h> #include <libgen.h> +#include <limits.h> +#include <linux/coredump.h> +#include <linux/fs.h> #include <linux/limits.h> #include <pthread.h> #include <string.h> #include <sys/mount.h> +#include <poll.h> +#include <sys/epoll.h> #include <sys/resource.h> #include <sys/stat.h> #include <sys/socket.h> @@ -14,16 +20,23 @@ #include <unistd.h> #include "../kselftest_harness.h" +#include "../filesystems/wrappers.h" #include "../pidfd/pidfd.h" #define STACKDUMP_FILE "stack_values" #define STACKDUMP_SCRIPT "stackdump" #define NUM_THREAD_SPAWN 128 +#ifndef PAGE_SIZE +#define PAGE_SIZE 4096 +#endif + static void *do_nothing(void *) { while (1) pause(); + + return NULL; } static void crashing_child(void) @@ -42,16 +55,32 @@ FIXTURE(coredump) { char original_core_pattern[256]; pid_t pid_coredump_server; + int fd_tmpfs_detached; }; +static int create_detached_tmpfs(void) +{ + int fd_context, fd_tmpfs; + + fd_context = sys_fsopen("tmpfs", 0); + if (fd_context < 0) + return -1; + + if (sys_fsconfig(fd_context, FSCONFIG_CMD_CREATE, NULL, NULL, 0) < 0) + return -1; + + fd_tmpfs = sys_fsmount(fd_context, 0, 0); + close(fd_context); + return fd_tmpfs; +} + FIXTURE_SETUP(coredump) { - char buf[PATH_MAX]; FILE *file; - char *dir; int ret; self->pid_coredump_server = -ESRCH; + self->fd_tmpfs_detached = -1; file = fopen("/proc/sys/kernel/core_pattern", "r"); ASSERT_NE(NULL, file); @@ -60,6 +89,8 @@ FIXTURE_SETUP(coredump) ASSERT_LT(ret, sizeof(self->original_core_pattern)); self->original_core_pattern[ret] = '\0'; + self->fd_tmpfs_detached = create_detached_tmpfs(); + ASSERT_GE(self->fd_tmpfs_detached, 0); ret = fclose(file); ASSERT_EQ(0, ret); @@ -98,6 +129,15 @@ FIXTURE_TEARDOWN(coredump) goto fail; } + if (self->fd_tmpfs_detached >= 0) { + ret = close(self->fd_tmpfs_detached); + if (ret < 0) { + reason = "Unable to close detached tmpfs"; + goto fail; + } + self->fd_tmpfs_detached = -1; + } + return; fail: /* This should never happen */ @@ -106,11 +146,10 @@ fail: TEST_F_TIMEOUT(coredump, stackdump, 120) { - struct sigaction action = {}; unsigned long long stack; char *test_dir, *line; size_t line_length; - char buf[PATH_MAX]; + char buf[PAGE_SIZE]; int ret, i, status; FILE *file; pid_t pid; @@ -169,153 +208,166 @@ TEST_F_TIMEOUT(coredump, stackdump, 120) fclose(file); } +static int create_and_listen_unix_socket(const char *path) +{ + struct sockaddr_un addr = { + .sun_family = AF_UNIX, + }; + assert(strlen(path) < sizeof(addr.sun_path) - 1); + strncpy(addr.sun_path, path, sizeof(addr.sun_path) - 1); + size_t addr_len = + offsetof(struct sockaddr_un, sun_path) + strlen(path) + 1; + int fd, ret; + + fd = socket(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC, 0); + if (fd < 0) + goto out; + + ret = bind(fd, (const struct sockaddr *)&addr, addr_len); + if (ret < 0) + goto out; + + ret = listen(fd, 128); + if (ret < 0) + goto out; + + return fd; + +out: + if (fd >= 0) + close(fd); + return -1; +} + +static bool set_core_pattern(const char *pattern) +{ + int fd; + ssize_t ret; + + fd = open("/proc/sys/kernel/core_pattern", O_WRONLY | O_CLOEXEC); + if (fd < 0) + return false; + + ret = write(fd, pattern, strlen(pattern)); + close(fd); + if (ret < 0) + return false; + + fprintf(stderr, "Set core_pattern to '%s' | %zu == %zu\n", pattern, ret, strlen(pattern)); + return ret == strlen(pattern); +} + +static int get_peer_pidfd(int fd) +{ + int fd_peer_pidfd; + socklen_t fd_peer_pidfd_len = sizeof(fd_peer_pidfd); + int ret = getsockopt(fd, SOL_SOCKET, SO_PEERPIDFD, &fd_peer_pidfd, + &fd_peer_pidfd_len); + if (ret < 0) { + fprintf(stderr, "%m - Failed to retrieve peer pidfd for coredump socket connection\n"); + return -1; + } + return fd_peer_pidfd; +} + +static bool get_pidfd_info(int fd_peer_pidfd, struct pidfd_info *info) +{ + memset(info, 0, sizeof(*info)); + info->mask = PIDFD_INFO_EXIT | PIDFD_INFO_COREDUMP; + return ioctl(fd_peer_pidfd, PIDFD_GET_INFO, info) == 0; +} + +static void +wait_and_check_coredump_server(pid_t pid_coredump_server, + struct __test_metadata *const _metadata, + FIXTURE_DATA(coredump)* self) +{ + int status; + waitpid(pid_coredump_server, &status, 0); + self->pid_coredump_server = -ESRCH; + ASSERT_TRUE(WIFEXITED(status)); + ASSERT_EQ(WEXITSTATUS(status), 0); +} + TEST_F(coredump, socket) { - int fd, pidfd, ret, status; - FILE *file; + int pidfd, ret, status; pid_t pid, pid_coredump_server; struct stat st; - char core_file[PATH_MAX]; struct pidfd_info info = {}; int ipc_sockets[2]; char c; - const struct sockaddr_un coredump_sk = { - .sun_family = AF_UNIX, - .sun_path = "/tmp/coredump.socket", - }; - size_t coredump_sk_len = offsetof(struct sockaddr_un, sun_path) + - sizeof("/tmp/coredump.socket"); + + ASSERT_TRUE(set_core_pattern("@/tmp/coredump.socket")); ret = socketpair(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC, 0, ipc_sockets); ASSERT_EQ(ret, 0); - file = fopen("/proc/sys/kernel/core_pattern", "w"); - ASSERT_NE(file, NULL); - - ret = fprintf(file, "@/tmp/coredump.socket"); - ASSERT_EQ(ret, strlen("@/tmp/coredump.socket")); - ASSERT_EQ(fclose(file), 0); - pid_coredump_server = fork(); ASSERT_GE(pid_coredump_server, 0); if (pid_coredump_server == 0) { - int fd_server, fd_coredump, fd_peer_pidfd, fd_core_file; - socklen_t fd_peer_pidfd_len; + int fd_server = -1, fd_coredump = -1, fd_peer_pidfd = -1, fd_core_file = -1; + int exit_code = EXIT_FAILURE; close(ipc_sockets[0]); - fd_server = socket(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC, 0); + fd_server = create_and_listen_unix_socket("/tmp/coredump.socket"); if (fd_server < 0) - _exit(EXIT_FAILURE); - - ret = bind(fd_server, (const struct sockaddr *)&coredump_sk, coredump_sk_len); - if (ret < 0) { - fprintf(stderr, "Failed to bind coredump socket\n"); - close(fd_server); - close(ipc_sockets[1]); - _exit(EXIT_FAILURE); - } - - ret = listen(fd_server, 1); - if (ret < 0) { - fprintf(stderr, "Failed to listen on coredump socket\n"); - close(fd_server); - close(ipc_sockets[1]); - _exit(EXIT_FAILURE); - } + goto out; - if (write_nointr(ipc_sockets[1], "1", 1) < 0) { - close(fd_server); - close(ipc_sockets[1]); - _exit(EXIT_FAILURE); - } + if (write_nointr(ipc_sockets[1], "1", 1) < 0) + goto out; close(ipc_sockets[1]); fd_coredump = accept4(fd_server, NULL, NULL, SOCK_CLOEXEC); - if (fd_coredump < 0) { - fprintf(stderr, "Failed to accept coredump socket connection\n"); - close(fd_server); - _exit(EXIT_FAILURE); - } + if (fd_coredump < 0) + goto out; - fd_peer_pidfd_len = sizeof(fd_peer_pidfd); - ret = getsockopt(fd_coredump, SOL_SOCKET, SO_PEERPIDFD, - &fd_peer_pidfd, &fd_peer_pidfd_len); - if (ret < 0) { - fprintf(stderr, "%m - Failed to retrieve peer pidfd for coredump socket connection\n"); - close(fd_coredump); - close(fd_server); - _exit(EXIT_FAILURE); - } + fd_peer_pidfd = get_peer_pidfd(fd_coredump); + if (fd_peer_pidfd < 0) + goto out; - memset(&info, 0, sizeof(info)); - info.mask = PIDFD_INFO_EXIT | PIDFD_INFO_COREDUMP; - ret = ioctl(fd_peer_pidfd, PIDFD_GET_INFO, &info); - if (ret < 0) { - fprintf(stderr, "Failed to retrieve pidfd info from peer pidfd for coredump socket connection\n"); - close(fd_coredump); - close(fd_server); - close(fd_peer_pidfd); - _exit(EXIT_FAILURE); - } + if (!get_pidfd_info(fd_peer_pidfd, &info)) + goto out; - if (!(info.mask & PIDFD_INFO_COREDUMP)) { - fprintf(stderr, "Missing coredump information from coredumping task\n"); - close(fd_coredump); - close(fd_server); - close(fd_peer_pidfd); - _exit(EXIT_FAILURE); - } + if (!(info.mask & PIDFD_INFO_COREDUMP)) + goto out; - if (!(info.coredump_mask & PIDFD_COREDUMPED)) { - fprintf(stderr, "Received connection from non-coredumping task\n"); - close(fd_coredump); - close(fd_server); - close(fd_peer_pidfd); - _exit(EXIT_FAILURE); - } + if (!(info.coredump_mask & PIDFD_COREDUMPED)) + goto out; fd_core_file = creat("/tmp/coredump.file", 0644); - if (fd_core_file < 0) { - fprintf(stderr, "Failed to create coredump file\n"); - close(fd_coredump); - close(fd_server); - close(fd_peer_pidfd); - _exit(EXIT_FAILURE); - } + if (fd_core_file < 0) + goto out; for (;;) { char buffer[4096]; ssize_t bytes_read, bytes_write; bytes_read = read(fd_coredump, buffer, sizeof(buffer)); - if (bytes_read < 0) { - close(fd_coredump); - close(fd_server); - close(fd_peer_pidfd); - close(fd_core_file); - _exit(EXIT_FAILURE); - } + if (bytes_read < 0) + goto out; if (bytes_read == 0) break; bytes_write = write(fd_core_file, buffer, bytes_read); - if (bytes_read != bytes_write) { - close(fd_coredump); - close(fd_server); - close(fd_peer_pidfd); - close(fd_core_file); - _exit(EXIT_FAILURE); - } + if (bytes_read != bytes_write) + goto out; } - close(fd_coredump); - close(fd_server); - close(fd_peer_pidfd); - close(fd_core_file); - _exit(EXIT_SUCCESS); + exit_code = EXIT_SUCCESS; +out: + if (fd_core_file >= 0) + close(fd_core_file); + if (fd_peer_pidfd >= 0) + close(fd_peer_pidfd); + if (fd_coredump >= 0) + close(fd_coredump); + if (fd_server >= 0) + close(fd_server); + _exit(exit_code); } self->pid_coredump_server = pid_coredump_server; @@ -335,48 +387,27 @@ TEST_F(coredump, socket) ASSERT_TRUE(WIFSIGNALED(status)); ASSERT_TRUE(WCOREDUMP(status)); - info.mask = PIDFD_INFO_EXIT | PIDFD_INFO_COREDUMP; - ASSERT_EQ(ioctl(pidfd, PIDFD_GET_INFO, &info), 0); + ASSERT_TRUE(get_pidfd_info(pidfd, &info)); ASSERT_GT((info.mask & PIDFD_INFO_COREDUMP), 0); ASSERT_GT((info.coredump_mask & PIDFD_COREDUMPED), 0); - waitpid(pid_coredump_server, &status, 0); - self->pid_coredump_server = -ESRCH; - ASSERT_TRUE(WIFEXITED(status)); - ASSERT_EQ(WEXITSTATUS(status), 0); + wait_and_check_coredump_server(pid_coredump_server, _metadata, self); ASSERT_EQ(stat("/tmp/coredump.file", &st), 0); ASSERT_GT(st.st_size, 0); - /* - * We should somehow validate the produced core file. - * For now just allow for visual inspection - */ system("file /tmp/coredump.file"); } TEST_F(coredump, socket_detect_userspace_client) { - int fd, pidfd, ret, status; - FILE *file; + int pidfd, ret, status; pid_t pid, pid_coredump_server; struct stat st; - char core_file[PATH_MAX]; struct pidfd_info info = {}; int ipc_sockets[2]; char c; - const struct sockaddr_un coredump_sk = { - .sun_family = AF_UNIX, - .sun_path = "/tmp/coredump.socket", - }; - size_t coredump_sk_len = offsetof(struct sockaddr_un, sun_path) + - sizeof("/tmp/coredump.socket"); - - file = fopen("/proc/sys/kernel/core_pattern", "w"); - ASSERT_NE(file, NULL); - ret = fprintf(file, "@/tmp/coredump.socket"); - ASSERT_EQ(ret, strlen("@/tmp/coredump.socket")); - ASSERT_EQ(fclose(file), 0); + ASSERT_TRUE(set_core_pattern("@/tmp/coredump.socket")); ret = socketpair(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC, 0, ipc_sockets); ASSERT_EQ(ret, 0); @@ -384,88 +415,49 @@ TEST_F(coredump, socket_detect_userspace_client) pid_coredump_server = fork(); ASSERT_GE(pid_coredump_server, 0); if (pid_coredump_server == 0) { - int fd_server, fd_coredump, fd_peer_pidfd, fd_core_file; - socklen_t fd_peer_pidfd_len; + int fd_server = -1, fd_coredump = -1, fd_peer_pidfd = -1; + int exit_code = EXIT_FAILURE; close(ipc_sockets[0]); - fd_server = socket(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC, 0); + fd_server = create_and_listen_unix_socket("/tmp/coredump.socket"); if (fd_server < 0) - _exit(EXIT_FAILURE); + goto out; - ret = bind(fd_server, (const struct sockaddr *)&coredump_sk, coredump_sk_len); - if (ret < 0) { - fprintf(stderr, "Failed to bind coredump socket\n"); - close(fd_server); - close(ipc_sockets[1]); - _exit(EXIT_FAILURE); - } - - ret = listen(fd_server, 1); - if (ret < 0) { - fprintf(stderr, "Failed to listen on coredump socket\n"); - close(fd_server); - close(ipc_sockets[1]); - _exit(EXIT_FAILURE); - } - - if (write_nointr(ipc_sockets[1], "1", 1) < 0) { - close(fd_server); - close(ipc_sockets[1]); - _exit(EXIT_FAILURE); - } + if (write_nointr(ipc_sockets[1], "1", 1) < 0) + goto out; close(ipc_sockets[1]); fd_coredump = accept4(fd_server, NULL, NULL, SOCK_CLOEXEC); - if (fd_coredump < 0) { - fprintf(stderr, "Failed to accept coredump socket connection\n"); - close(fd_server); - _exit(EXIT_FAILURE); - } + if (fd_coredump < 0) + goto out; - fd_peer_pidfd_len = sizeof(fd_peer_pidfd); - ret = getsockopt(fd_coredump, SOL_SOCKET, SO_PEERPIDFD, - &fd_peer_pidfd, &fd_peer_pidfd_len); - if (ret < 0) { - fprintf(stderr, "%m - Failed to retrieve peer pidfd for coredump socket connection\n"); - close(fd_coredump); - close(fd_server); - _exit(EXIT_FAILURE); - } + fd_peer_pidfd = get_peer_pidfd(fd_coredump); + if (fd_peer_pidfd < 0) + goto out; - memset(&info, 0, sizeof(info)); - info.mask = PIDFD_INFO_EXIT | PIDFD_INFO_COREDUMP; - ret = ioctl(fd_peer_pidfd, PIDFD_GET_INFO, &info); - if (ret < 0) { - fprintf(stderr, "Failed to retrieve pidfd info from peer pidfd for coredump socket connection\n"); - close(fd_coredump); - close(fd_server); - close(fd_peer_pidfd); - _exit(EXIT_FAILURE); - } + if (!get_pidfd_info(fd_peer_pidfd, &info)) + goto out; - if (!(info.mask & PIDFD_INFO_COREDUMP)) { - fprintf(stderr, "Missing coredump information from coredumping task\n"); - close(fd_coredump); - close(fd_server); - close(fd_peer_pidfd); - _exit(EXIT_FAILURE); - } + if (!(info.mask & PIDFD_INFO_COREDUMP)) + goto out; + + if (info.coredump_mask & PIDFD_COREDUMPED) + goto out; - if (info.coredump_mask & PIDFD_COREDUMPED) { - fprintf(stderr, "Received unexpected connection from coredumping task\n"); + if (read(fd_coredump, &c, 1) < 1) + goto out; + + exit_code = EXIT_SUCCESS; +out: + if (fd_peer_pidfd >= 0) + close(fd_peer_pidfd); + if (fd_coredump >= 0) close(fd_coredump); + if (fd_server >= 0) close(fd_server); - close(fd_peer_pidfd); - _exit(EXIT_FAILURE); - } - - close(fd_coredump); - close(fd_server); - close(fd_peer_pidfd); - close(fd_core_file); - _exit(EXIT_SUCCESS); + _exit(exit_code); } self->pid_coredump_server = pid_coredump_server; @@ -478,17 +470,22 @@ TEST_F(coredump, socket_detect_userspace_client) if (pid == 0) { int fd_socket; ssize_t ret; + const struct sockaddr_un coredump_sk = { + .sun_family = AF_UNIX, + .sun_path = "/tmp/coredump.socket", + }; + size_t coredump_sk_len = + offsetof(struct sockaddr_un, sun_path) + + sizeof("/tmp/coredump.socket"); fd_socket = socket(AF_UNIX, SOCK_STREAM, 0); if (fd_socket < 0) _exit(EXIT_FAILURE); - ret = connect(fd_socket, (const struct sockaddr *)&coredump_sk, coredump_sk_len); if (ret < 0) _exit(EXIT_FAILURE); - (void *)write(fd_socket, &(char){ 0 }, 1); close(fd_socket); _exit(EXIT_SUCCESS); } @@ -500,15 +497,11 @@ TEST_F(coredump, socket_detect_userspace_client) ASSERT_TRUE(WIFEXITED(status)); ASSERT_EQ(WEXITSTATUS(status), 0); - info.mask = PIDFD_INFO_EXIT | PIDFD_INFO_COREDUMP; - ASSERT_EQ(ioctl(pidfd, PIDFD_GET_INFO, &info), 0); + ASSERT_TRUE(get_pidfd_info(pidfd, &info)); ASSERT_GT((info.mask & PIDFD_INFO_COREDUMP), 0); ASSERT_EQ((info.coredump_mask & PIDFD_COREDUMPED), 0); - waitpid(pid_coredump_server, &status, 0); - self->pid_coredump_server = -ESRCH; - ASSERT_TRUE(WIFEXITED(status)); - ASSERT_EQ(WEXITSTATUS(status), 0); + wait_and_check_coredump_server(pid_coredump_server, _metadata, self); ASSERT_NE(stat("/tmp/coredump.file", &st), 0); ASSERT_EQ(errno, ENOENT); @@ -516,17 +509,10 @@ TEST_F(coredump, socket_detect_userspace_client) TEST_F(coredump, socket_enoent) { - int pidfd, ret, status; - FILE *file; + int pidfd, status; pid_t pid; - char core_file[PATH_MAX]; - - file = fopen("/proc/sys/kernel/core_pattern", "w"); - ASSERT_NE(file, NULL); - ret = fprintf(file, "@/tmp/coredump.socket"); - ASSERT_EQ(ret, strlen("@/tmp/coredump.socket")); - ASSERT_EQ(fclose(file), 0); + ASSERT_TRUE(set_core_pattern("@/tmp/coredump.socket")); pid = fork(); ASSERT_GE(pid, 0); @@ -544,7 +530,6 @@ TEST_F(coredump, socket_enoent) TEST_F(coredump, socket_no_listener) { int pidfd, ret, status; - FILE *file; pid_t pid, pid_coredump_server; int ipc_sockets[2]; char c; @@ -555,45 +540,518 @@ TEST_F(coredump, socket_no_listener) size_t coredump_sk_len = offsetof(struct sockaddr_un, sun_path) + sizeof("/tmp/coredump.socket"); + ASSERT_TRUE(set_core_pattern("@/tmp/coredump.socket")); + ret = socketpair(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC, 0, ipc_sockets); ASSERT_EQ(ret, 0); - file = fopen("/proc/sys/kernel/core_pattern", "w"); - ASSERT_NE(file, NULL); - - ret = fprintf(file, "@/tmp/coredump.socket"); - ASSERT_EQ(ret, strlen("@/tmp/coredump.socket")); - ASSERT_EQ(fclose(file), 0); - pid_coredump_server = fork(); ASSERT_GE(pid_coredump_server, 0); if (pid_coredump_server == 0) { - int fd_server; - socklen_t fd_peer_pidfd_len; + int fd_server = -1; + int exit_code = EXIT_FAILURE; close(ipc_sockets[0]); fd_server = socket(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC, 0); if (fd_server < 0) - _exit(EXIT_FAILURE); + goto out; ret = bind(fd_server, (const struct sockaddr *)&coredump_sk, coredump_sk_len); - if (ret < 0) { - fprintf(stderr, "Failed to bind coredump socket\n"); + if (ret < 0) + goto out; + + if (write_nointr(ipc_sockets[1], "1", 1) < 0) + goto out; + + exit_code = EXIT_SUCCESS; +out: + if (fd_server >= 0) close(fd_server); - close(ipc_sockets[1]); - _exit(EXIT_FAILURE); + close(ipc_sockets[1]); + _exit(exit_code); + } + self->pid_coredump_server = pid_coredump_server; + + EXPECT_EQ(close(ipc_sockets[1]), 0); + ASSERT_EQ(read_nointr(ipc_sockets[0], &c, 1), 1); + EXPECT_EQ(close(ipc_sockets[0]), 0); + + pid = fork(); + ASSERT_GE(pid, 0); + if (pid == 0) + crashing_child(); + + pidfd = sys_pidfd_open(pid, 0); + ASSERT_GE(pidfd, 0); + + waitpid(pid, &status, 0); + ASSERT_TRUE(WIFSIGNALED(status)); + ASSERT_FALSE(WCOREDUMP(status)); + + wait_and_check_coredump_server(pid_coredump_server, _metadata, self); +} + +static ssize_t recv_marker(int fd) +{ + enum coredump_mark mark = COREDUMP_MARK_REQACK; + ssize_t ret; + + ret = recv(fd, &mark, sizeof(mark), MSG_WAITALL); + if (ret != sizeof(mark)) + return -1; + + switch (mark) { + case COREDUMP_MARK_REQACK: + fprintf(stderr, "Received marker: ReqAck\n"); + return COREDUMP_MARK_REQACK; + case COREDUMP_MARK_MINSIZE: + fprintf(stderr, "Received marker: MinSize\n"); + return COREDUMP_MARK_MINSIZE; + case COREDUMP_MARK_MAXSIZE: + fprintf(stderr, "Received marker: MaxSize\n"); + return COREDUMP_MARK_MAXSIZE; + case COREDUMP_MARK_UNSUPPORTED: + fprintf(stderr, "Received marker: Unsupported\n"); + return COREDUMP_MARK_UNSUPPORTED; + case COREDUMP_MARK_CONFLICTING: + fprintf(stderr, "Received marker: Conflicting\n"); + return COREDUMP_MARK_CONFLICTING; + default: + fprintf(stderr, "Received unknown marker: %u\n", mark); + break; + } + return -1; +} + +static bool read_marker(int fd, enum coredump_mark mark) +{ + ssize_t ret; + + ret = recv_marker(fd); + if (ret < 0) + return false; + return ret == mark; +} + +static bool read_coredump_req(int fd, struct coredump_req *req) +{ + ssize_t ret; + size_t field_size, user_size, ack_size, kernel_size, remaining_size; + + memset(req, 0, sizeof(*req)); + field_size = sizeof(req->size); + + /* Peek the size of the coredump request. */ + ret = recv(fd, req, field_size, MSG_PEEK | MSG_WAITALL); + if (ret != field_size) + return false; + kernel_size = req->size; + + if (kernel_size < COREDUMP_ACK_SIZE_VER0) + return false; + if (kernel_size >= PAGE_SIZE) + return false; + + /* Use the minimum of user and kernel size to read the full request. */ + user_size = sizeof(struct coredump_req); + ack_size = user_size < kernel_size ? user_size : kernel_size; + ret = recv(fd, req, ack_size, MSG_WAITALL); + if (ret != ack_size) + return false; + + fprintf(stderr, "Read coredump request with size %u and mask 0x%llx\n", + req->size, (unsigned long long)req->mask); + + if (user_size > kernel_size) + remaining_size = user_size - kernel_size; + else + remaining_size = kernel_size - user_size; + + if (PAGE_SIZE <= remaining_size) + return false; + + /* + * Discard any additional data if the kernel's request was larger than + * what we knew about or cared about. + */ + if (remaining_size) { + char buffer[PAGE_SIZE]; + + ret = recv(fd, buffer, sizeof(buffer), MSG_WAITALL); + if (ret != remaining_size) + return false; + fprintf(stderr, "Discarded %zu bytes of data after coredump request\n", remaining_size); + } + + return true; +} + +static bool send_coredump_ack(int fd, const struct coredump_req *req, + __u64 mask, size_t size_ack) +{ + ssize_t ret; + /* + * Wrap struct coredump_ack in a larger struct so we can + * simulate sending to much data to the kernel. + */ + struct large_ack_for_size_testing { + struct coredump_ack ack; + char buffer[PAGE_SIZE]; + } large_ack = {}; + + if (!size_ack) + size_ack = sizeof(struct coredump_ack) < req->size_ack ? + sizeof(struct coredump_ack) : + req->size_ack; + large_ack.ack.mask = mask; + large_ack.ack.size = size_ack; + ret = send(fd, &large_ack, size_ack, MSG_NOSIGNAL); + if (ret != size_ack) + return false; + + fprintf(stderr, "Sent coredump ack with size %zu and mask 0x%llx\n", + size_ack, (unsigned long long)mask); + return true; +} + +static bool check_coredump_req(const struct coredump_req *req, size_t min_size, + __u64 required_mask) +{ + if (req->size < min_size) + return false; + if ((req->mask & required_mask) != required_mask) + return false; + if (req->mask & ~required_mask) + return false; + return true; +} + +TEST_F(coredump, socket_request_kernel) +{ + int pidfd, ret, status; + pid_t pid, pid_coredump_server; + struct stat st; + struct pidfd_info info = {}; + int ipc_sockets[2]; + char c; + + ASSERT_TRUE(set_core_pattern("@@/tmp/coredump.socket")); + + ret = socketpair(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC, 0, ipc_sockets); + ASSERT_EQ(ret, 0); + + pid_coredump_server = fork(); + ASSERT_GE(pid_coredump_server, 0); + if (pid_coredump_server == 0) { + struct coredump_req req = {}; + int fd_server = -1, fd_coredump = -1, fd_core_file = -1, fd_peer_pidfd = -1; + int exit_code = EXIT_FAILURE; + + close(ipc_sockets[0]); + + fd_server = create_and_listen_unix_socket("/tmp/coredump.socket"); + if (fd_server < 0) + goto out; + + if (write_nointr(ipc_sockets[1], "1", 1) < 0) + goto out; + + close(ipc_sockets[1]); + + fd_coredump = accept4(fd_server, NULL, NULL, SOCK_CLOEXEC); + if (fd_coredump < 0) + goto out; + + fd_peer_pidfd = get_peer_pidfd(fd_coredump); + if (fd_peer_pidfd < 0) + goto out; + + if (!get_pidfd_info(fd_peer_pidfd, &info)) + goto out; + + if (!(info.mask & PIDFD_INFO_COREDUMP)) + goto out; + + if (!(info.coredump_mask & PIDFD_COREDUMPED)) + goto out; + + fd_core_file = creat("/tmp/coredump.file", 0644); + if (fd_core_file < 0) + goto out; + + if (!read_coredump_req(fd_coredump, &req)) + goto out; + + if (!check_coredump_req(&req, COREDUMP_ACK_SIZE_VER0, + COREDUMP_KERNEL | COREDUMP_USERSPACE | + COREDUMP_REJECT | COREDUMP_WAIT)) + goto out; + + if (!send_coredump_ack(fd_coredump, &req, + COREDUMP_KERNEL | COREDUMP_WAIT, 0)) + goto out; + + if (!read_marker(fd_coredump, COREDUMP_MARK_REQACK)) + goto out; + + for (;;) { + char buffer[4096]; + ssize_t bytes_read, bytes_write; + + bytes_read = read(fd_coredump, buffer, sizeof(buffer)); + if (bytes_read < 0) + goto out; + + if (bytes_read == 0) + break; + + bytes_write = write(fd_core_file, buffer, bytes_read); + if (bytes_read != bytes_write) + goto out; } - if (write_nointr(ipc_sockets[1], "1", 1) < 0) { + exit_code = EXIT_SUCCESS; +out: + if (fd_core_file >= 0) + close(fd_core_file); + if (fd_peer_pidfd >= 0) + close(fd_peer_pidfd); + if (fd_coredump >= 0) + close(fd_coredump); + if (fd_server >= 0) close(fd_server); - close(ipc_sockets[1]); - _exit(EXIT_FAILURE); + _exit(exit_code); + } + self->pid_coredump_server = pid_coredump_server; + + EXPECT_EQ(close(ipc_sockets[1]), 0); + ASSERT_EQ(read_nointr(ipc_sockets[0], &c, 1), 1); + EXPECT_EQ(close(ipc_sockets[0]), 0); + + pid = fork(); + ASSERT_GE(pid, 0); + if (pid == 0) + crashing_child(); + + pidfd = sys_pidfd_open(pid, 0); + ASSERT_GE(pidfd, 0); + + waitpid(pid, &status, 0); + ASSERT_TRUE(WIFSIGNALED(status)); + ASSERT_TRUE(WCOREDUMP(status)); + + ASSERT_TRUE(get_pidfd_info(pidfd, &info)); + ASSERT_GT((info.mask & PIDFD_INFO_COREDUMP), 0); + ASSERT_GT((info.coredump_mask & PIDFD_COREDUMPED), 0); + + wait_and_check_coredump_server(pid_coredump_server, _metadata, self); + + ASSERT_EQ(stat("/tmp/coredump.file", &st), 0); + ASSERT_GT(st.st_size, 0); + system("file /tmp/coredump.file"); +} + +TEST_F(coredump, socket_request_userspace) +{ + int pidfd, ret, status; + pid_t pid, pid_coredump_server; + struct pidfd_info info = {}; + int ipc_sockets[2]; + char c; + + ASSERT_TRUE(set_core_pattern("@@/tmp/coredump.socket")); + + ret = socketpair(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC, 0, ipc_sockets); + ASSERT_EQ(ret, 0); + + pid_coredump_server = fork(); + ASSERT_GE(pid_coredump_server, 0); + if (pid_coredump_server == 0) { + struct coredump_req req = {}; + int fd_server = -1, fd_coredump = -1, fd_peer_pidfd = -1; + int exit_code = EXIT_FAILURE; + + close(ipc_sockets[0]); + + fd_server = create_and_listen_unix_socket("/tmp/coredump.socket"); + if (fd_server < 0) + goto out; + + if (write_nointr(ipc_sockets[1], "1", 1) < 0) + goto out; + + close(ipc_sockets[1]); + + fd_coredump = accept4(fd_server, NULL, NULL, SOCK_CLOEXEC); + if (fd_coredump < 0) + goto out; + + fd_peer_pidfd = get_peer_pidfd(fd_coredump); + if (fd_peer_pidfd < 0) + goto out; + + if (!get_pidfd_info(fd_peer_pidfd, &info)) + goto out; + + if (!(info.mask & PIDFD_INFO_COREDUMP)) + goto out; + + if (!(info.coredump_mask & PIDFD_COREDUMPED)) + goto out; + + if (!read_coredump_req(fd_coredump, &req)) + goto out; + + if (!check_coredump_req(&req, COREDUMP_ACK_SIZE_VER0, + COREDUMP_KERNEL | COREDUMP_USERSPACE | + COREDUMP_REJECT | COREDUMP_WAIT)) + goto out; + + if (!send_coredump_ack(fd_coredump, &req, + COREDUMP_USERSPACE | COREDUMP_WAIT, 0)) + goto out; + + if (!read_marker(fd_coredump, COREDUMP_MARK_REQACK)) + goto out; + + for (;;) { + char buffer[4096]; + ssize_t bytes_read; + + bytes_read = read(fd_coredump, buffer, sizeof(buffer)); + if (bytes_read > 0) + goto out; + + if (bytes_read < 0) + goto out; + + if (bytes_read == 0) + break; } - close(fd_server); + exit_code = EXIT_SUCCESS; +out: + if (fd_peer_pidfd >= 0) + close(fd_peer_pidfd); + if (fd_coredump >= 0) + close(fd_coredump); + if (fd_server >= 0) + close(fd_server); + _exit(exit_code); + } + self->pid_coredump_server = pid_coredump_server; + + EXPECT_EQ(close(ipc_sockets[1]), 0); + ASSERT_EQ(read_nointr(ipc_sockets[0], &c, 1), 1); + EXPECT_EQ(close(ipc_sockets[0]), 0); + + pid = fork(); + ASSERT_GE(pid, 0); + if (pid == 0) + crashing_child(); + + pidfd = sys_pidfd_open(pid, 0); + ASSERT_GE(pidfd, 0); + + waitpid(pid, &status, 0); + ASSERT_TRUE(WIFSIGNALED(status)); + ASSERT_TRUE(WCOREDUMP(status)); + + ASSERT_TRUE(get_pidfd_info(pidfd, &info)); + ASSERT_GT((info.mask & PIDFD_INFO_COREDUMP), 0); + ASSERT_GT((info.coredump_mask & PIDFD_COREDUMPED), 0); + + wait_and_check_coredump_server(pid_coredump_server, _metadata, self); +} + +TEST_F(coredump, socket_request_reject) +{ + int pidfd, ret, status; + pid_t pid, pid_coredump_server; + struct pidfd_info info = {}; + int ipc_sockets[2]; + char c; + + ASSERT_TRUE(set_core_pattern("@@/tmp/coredump.socket")); + + ret = socketpair(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC, 0, ipc_sockets); + ASSERT_EQ(ret, 0); + + pid_coredump_server = fork(); + ASSERT_GE(pid_coredump_server, 0); + if (pid_coredump_server == 0) { + struct coredump_req req = {}; + int fd_server = -1, fd_coredump = -1, fd_peer_pidfd = -1; + int exit_code = EXIT_FAILURE; + + close(ipc_sockets[0]); + + fd_server = create_and_listen_unix_socket("/tmp/coredump.socket"); + if (fd_server < 0) + goto out; + + if (write_nointr(ipc_sockets[1], "1", 1) < 0) + goto out; + close(ipc_sockets[1]); - _exit(EXIT_SUCCESS); + + fd_coredump = accept4(fd_server, NULL, NULL, SOCK_CLOEXEC); + if (fd_coredump < 0) + goto out; + + fd_peer_pidfd = get_peer_pidfd(fd_coredump); + if (fd_peer_pidfd < 0) + goto out; + + if (!get_pidfd_info(fd_peer_pidfd, &info)) + goto out; + + if (!(info.mask & PIDFD_INFO_COREDUMP)) + goto out; + + if (!(info.coredump_mask & PIDFD_COREDUMPED)) + goto out; + + if (!read_coredump_req(fd_coredump, &req)) + goto out; + + if (!check_coredump_req(&req, COREDUMP_ACK_SIZE_VER0, + COREDUMP_KERNEL | COREDUMP_USERSPACE | + COREDUMP_REJECT | COREDUMP_WAIT)) + goto out; + + if (!send_coredump_ack(fd_coredump, &req, + COREDUMP_REJECT | COREDUMP_WAIT, 0)) + goto out; + + if (!read_marker(fd_coredump, COREDUMP_MARK_REQACK)) + goto out; + + for (;;) { + char buffer[4096]; + ssize_t bytes_read; + + bytes_read = read(fd_coredump, buffer, sizeof(buffer)); + if (bytes_read > 0) + goto out; + + if (bytes_read < 0) + goto out; + + if (bytes_read == 0) + break; + } + + exit_code = EXIT_SUCCESS; +out: + if (fd_peer_pidfd >= 0) + close(fd_peer_pidfd); + if (fd_coredump >= 0) + close(fd_coredump); + if (fd_server >= 0) + close(fd_server); + _exit(exit_code); } self->pid_coredump_server = pid_coredump_server; @@ -613,10 +1071,760 @@ TEST_F(coredump, socket_no_listener) ASSERT_TRUE(WIFSIGNALED(status)); ASSERT_FALSE(WCOREDUMP(status)); - waitpid(pid_coredump_server, &status, 0); - self->pid_coredump_server = -ESRCH; - ASSERT_TRUE(WIFEXITED(status)); - ASSERT_EQ(WEXITSTATUS(status), 0); + ASSERT_TRUE(get_pidfd_info(pidfd, &info)); + ASSERT_GT((info.mask & PIDFD_INFO_COREDUMP), 0); + ASSERT_GT((info.coredump_mask & PIDFD_COREDUMPED), 0); + + wait_and_check_coredump_server(pid_coredump_server, _metadata, self); +} + +TEST_F(coredump, socket_request_invalid_flag_combination) +{ + int pidfd, ret, status; + pid_t pid, pid_coredump_server; + struct pidfd_info info = {}; + int ipc_sockets[2]; + char c; + + ASSERT_TRUE(set_core_pattern("@@/tmp/coredump.socket")); + + ret = socketpair(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC, 0, ipc_sockets); + ASSERT_EQ(ret, 0); + + pid_coredump_server = fork(); + ASSERT_GE(pid_coredump_server, 0); + if (pid_coredump_server == 0) { + struct coredump_req req = {}; + int fd_server = -1, fd_coredump = -1, fd_peer_pidfd = -1; + int exit_code = EXIT_FAILURE; + + close(ipc_sockets[0]); + + fd_server = create_and_listen_unix_socket("/tmp/coredump.socket"); + if (fd_server < 0) + goto out; + + if (write_nointr(ipc_sockets[1], "1", 1) < 0) + goto out; + + close(ipc_sockets[1]); + + fd_coredump = accept4(fd_server, NULL, NULL, SOCK_CLOEXEC); + if (fd_coredump < 0) + goto out; + + fd_peer_pidfd = get_peer_pidfd(fd_coredump); + if (fd_peer_pidfd < 0) + goto out; + + if (!get_pidfd_info(fd_peer_pidfd, &info)) + goto out; + + if (!(info.mask & PIDFD_INFO_COREDUMP)) + goto out; + + if (!(info.coredump_mask & PIDFD_COREDUMPED)) + goto out; + + if (!read_coredump_req(fd_coredump, &req)) + goto out; + + if (!check_coredump_req(&req, COREDUMP_ACK_SIZE_VER0, + COREDUMP_KERNEL | COREDUMP_USERSPACE | + COREDUMP_REJECT | COREDUMP_WAIT)) + goto out; + + if (!send_coredump_ack(fd_coredump, &req, + COREDUMP_KERNEL | COREDUMP_REJECT | COREDUMP_WAIT, 0)) + goto out; + + if (!read_marker(fd_coredump, COREDUMP_MARK_CONFLICTING)) + goto out; + + exit_code = EXIT_SUCCESS; +out: + if (fd_peer_pidfd >= 0) + close(fd_peer_pidfd); + if (fd_coredump >= 0) + close(fd_coredump); + if (fd_server >= 0) + close(fd_server); + _exit(exit_code); + } + self->pid_coredump_server = pid_coredump_server; + + EXPECT_EQ(close(ipc_sockets[1]), 0); + ASSERT_EQ(read_nointr(ipc_sockets[0], &c, 1), 1); + EXPECT_EQ(close(ipc_sockets[0]), 0); + + pid = fork(); + ASSERT_GE(pid, 0); + if (pid == 0) + crashing_child(); + + pidfd = sys_pidfd_open(pid, 0); + ASSERT_GE(pidfd, 0); + + waitpid(pid, &status, 0); + ASSERT_TRUE(WIFSIGNALED(status)); + ASSERT_FALSE(WCOREDUMP(status)); + + ASSERT_TRUE(get_pidfd_info(pidfd, &info)); + ASSERT_GT((info.mask & PIDFD_INFO_COREDUMP), 0); + ASSERT_GT((info.coredump_mask & PIDFD_COREDUMPED), 0); + + wait_and_check_coredump_server(pid_coredump_server, _metadata, self); +} + +TEST_F(coredump, socket_request_unknown_flag) +{ + int pidfd, ret, status; + pid_t pid, pid_coredump_server; + struct pidfd_info info = {}; + int ipc_sockets[2]; + char c; + + ASSERT_TRUE(set_core_pattern("@@/tmp/coredump.socket")); + + ret = socketpair(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC, 0, ipc_sockets); + ASSERT_EQ(ret, 0); + + pid_coredump_server = fork(); + ASSERT_GE(pid_coredump_server, 0); + if (pid_coredump_server == 0) { + struct coredump_req req = {}; + int fd_server = -1, fd_coredump = -1, fd_peer_pidfd = -1; + int exit_code = EXIT_FAILURE; + + close(ipc_sockets[0]); + + fd_server = create_and_listen_unix_socket("/tmp/coredump.socket"); + if (fd_server < 0) + goto out; + + if (write_nointr(ipc_sockets[1], "1", 1) < 0) + goto out; + + close(ipc_sockets[1]); + + fd_coredump = accept4(fd_server, NULL, NULL, SOCK_CLOEXEC); + if (fd_coredump < 0) + goto out; + + fd_peer_pidfd = get_peer_pidfd(fd_coredump); + if (fd_peer_pidfd < 0) + goto out; + + if (!get_pidfd_info(fd_peer_pidfd, &info)) + goto out; + + if (!(info.mask & PIDFD_INFO_COREDUMP)) + goto out; + + if (!(info.coredump_mask & PIDFD_COREDUMPED)) + goto out; + + if (!read_coredump_req(fd_coredump, &req)) + goto out; + + if (!check_coredump_req(&req, COREDUMP_ACK_SIZE_VER0, + COREDUMP_KERNEL | COREDUMP_USERSPACE | + COREDUMP_REJECT | COREDUMP_WAIT)) + goto out; + + if (!send_coredump_ack(fd_coredump, &req, (1ULL << 63), 0)) + goto out; + + if (!read_marker(fd_coredump, COREDUMP_MARK_UNSUPPORTED)) + goto out; + + exit_code = EXIT_SUCCESS; +out: + if (fd_peer_pidfd >= 0) + close(fd_peer_pidfd); + if (fd_coredump >= 0) + close(fd_coredump); + if (fd_server >= 0) + close(fd_server); + _exit(exit_code); + } + self->pid_coredump_server = pid_coredump_server; + + EXPECT_EQ(close(ipc_sockets[1]), 0); + ASSERT_EQ(read_nointr(ipc_sockets[0], &c, 1), 1); + EXPECT_EQ(close(ipc_sockets[0]), 0); + + pid = fork(); + ASSERT_GE(pid, 0); + if (pid == 0) + crashing_child(); + + pidfd = sys_pidfd_open(pid, 0); + ASSERT_GE(pidfd, 0); + + waitpid(pid, &status, 0); + ASSERT_TRUE(WIFSIGNALED(status)); + ASSERT_FALSE(WCOREDUMP(status)); + + ASSERT_TRUE(get_pidfd_info(pidfd, &info)); + ASSERT_GT((info.mask & PIDFD_INFO_COREDUMP), 0); + ASSERT_GT((info.coredump_mask & PIDFD_COREDUMPED), 0); + + wait_and_check_coredump_server(pid_coredump_server, _metadata, self); +} + +TEST_F(coredump, socket_request_invalid_size_small) +{ + int pidfd, ret, status; + pid_t pid, pid_coredump_server; + struct pidfd_info info = {}; + int ipc_sockets[2]; + char c; + + ASSERT_TRUE(set_core_pattern("@@/tmp/coredump.socket")); + + ret = socketpair(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC, 0, ipc_sockets); + ASSERT_EQ(ret, 0); + + pid_coredump_server = fork(); + ASSERT_GE(pid_coredump_server, 0); + if (pid_coredump_server == 0) { + struct coredump_req req = {}; + int fd_server = -1, fd_coredump = -1, fd_peer_pidfd = -1; + int exit_code = EXIT_FAILURE; + + close(ipc_sockets[0]); + + fd_server = create_and_listen_unix_socket("/tmp/coredump.socket"); + if (fd_server < 0) + goto out; + + if (write_nointr(ipc_sockets[1], "1", 1) < 0) + goto out; + + close(ipc_sockets[1]); + + fd_coredump = accept4(fd_server, NULL, NULL, SOCK_CLOEXEC); + if (fd_coredump < 0) + goto out; + + fd_peer_pidfd = get_peer_pidfd(fd_coredump); + if (fd_peer_pidfd < 0) + goto out; + + if (!get_pidfd_info(fd_peer_pidfd, &info)) + goto out; + + if (!(info.mask & PIDFD_INFO_COREDUMP)) + goto out; + + if (!(info.coredump_mask & PIDFD_COREDUMPED)) + goto out; + + if (!read_coredump_req(fd_coredump, &req)) + goto out; + + if (!check_coredump_req(&req, COREDUMP_ACK_SIZE_VER0, + COREDUMP_KERNEL | COREDUMP_USERSPACE | + COREDUMP_REJECT | COREDUMP_WAIT)) + goto out; + + if (!send_coredump_ack(fd_coredump, &req, + COREDUMP_REJECT | COREDUMP_WAIT, + COREDUMP_ACK_SIZE_VER0 / 2)) + goto out; + + if (!read_marker(fd_coredump, COREDUMP_MARK_MINSIZE)) + goto out; + + exit_code = EXIT_SUCCESS; +out: + if (fd_peer_pidfd >= 0) + close(fd_peer_pidfd); + if (fd_coredump >= 0) + close(fd_coredump); + if (fd_server >= 0) + close(fd_server); + _exit(exit_code); + } + self->pid_coredump_server = pid_coredump_server; + + EXPECT_EQ(close(ipc_sockets[1]), 0); + ASSERT_EQ(read_nointr(ipc_sockets[0], &c, 1), 1); + EXPECT_EQ(close(ipc_sockets[0]), 0); + + pid = fork(); + ASSERT_GE(pid, 0); + if (pid == 0) + crashing_child(); + + pidfd = sys_pidfd_open(pid, 0); + ASSERT_GE(pidfd, 0); + + waitpid(pid, &status, 0); + ASSERT_TRUE(WIFSIGNALED(status)); + ASSERT_FALSE(WCOREDUMP(status)); + + ASSERT_TRUE(get_pidfd_info(pidfd, &info)); + ASSERT_GT((info.mask & PIDFD_INFO_COREDUMP), 0); + ASSERT_GT((info.coredump_mask & PIDFD_COREDUMPED), 0); + + wait_and_check_coredump_server(pid_coredump_server, _metadata, self); +} + +TEST_F(coredump, socket_request_invalid_size_large) +{ + int pidfd, ret, status; + pid_t pid, pid_coredump_server; + struct pidfd_info info = {}; + int ipc_sockets[2]; + char c; + + ASSERT_TRUE(set_core_pattern("@@/tmp/coredump.socket")); + + ret = socketpair(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC, 0, ipc_sockets); + ASSERT_EQ(ret, 0); + + pid_coredump_server = fork(); + ASSERT_GE(pid_coredump_server, 0); + if (pid_coredump_server == 0) { + struct coredump_req req = {}; + int fd_server = -1, fd_coredump = -1, fd_peer_pidfd = -1; + int exit_code = EXIT_FAILURE; + + close(ipc_sockets[0]); + + fd_server = create_and_listen_unix_socket("/tmp/coredump.socket"); + if (fd_server < 0) + goto out; + + if (write_nointr(ipc_sockets[1], "1", 1) < 0) + goto out; + + close(ipc_sockets[1]); + + fd_coredump = accept4(fd_server, NULL, NULL, SOCK_CLOEXEC); + if (fd_coredump < 0) + goto out; + + fd_peer_pidfd = get_peer_pidfd(fd_coredump); + if (fd_peer_pidfd < 0) + goto out; + + if (!get_pidfd_info(fd_peer_pidfd, &info)) + goto out; + + if (!(info.mask & PIDFD_INFO_COREDUMP)) + goto out; + + if (!(info.coredump_mask & PIDFD_COREDUMPED)) + goto out; + + if (!read_coredump_req(fd_coredump, &req)) + goto out; + + if (!check_coredump_req(&req, COREDUMP_ACK_SIZE_VER0, + COREDUMP_KERNEL | COREDUMP_USERSPACE | + COREDUMP_REJECT | COREDUMP_WAIT)) + goto out; + + if (!send_coredump_ack(fd_coredump, &req, + COREDUMP_REJECT | COREDUMP_WAIT, + COREDUMP_ACK_SIZE_VER0 + PAGE_SIZE)) + goto out; + + if (!read_marker(fd_coredump, COREDUMP_MARK_MAXSIZE)) + goto out; + + exit_code = EXIT_SUCCESS; +out: + if (fd_peer_pidfd >= 0) + close(fd_peer_pidfd); + if (fd_coredump >= 0) + close(fd_coredump); + if (fd_server >= 0) + close(fd_server); + _exit(exit_code); + } + self->pid_coredump_server = pid_coredump_server; + + EXPECT_EQ(close(ipc_sockets[1]), 0); + ASSERT_EQ(read_nointr(ipc_sockets[0], &c, 1), 1); + EXPECT_EQ(close(ipc_sockets[0]), 0); + + pid = fork(); + ASSERT_GE(pid, 0); + if (pid == 0) + crashing_child(); + + pidfd = sys_pidfd_open(pid, 0); + ASSERT_GE(pidfd, 0); + + waitpid(pid, &status, 0); + ASSERT_TRUE(WIFSIGNALED(status)); + ASSERT_FALSE(WCOREDUMP(status)); + + ASSERT_TRUE(get_pidfd_info(pidfd, &info)); + ASSERT_GT((info.mask & PIDFD_INFO_COREDUMP), 0); + ASSERT_GT((info.coredump_mask & PIDFD_COREDUMPED), 0); + + wait_and_check_coredump_server(pid_coredump_server, _metadata, self); +} + +static int open_coredump_tmpfile(int fd_tmpfs_detached) +{ + return openat(fd_tmpfs_detached, ".", O_TMPFILE | O_RDWR | O_EXCL, 0600); +} + +#define NUM_CRASHING_COREDUMPS 5 + +TEST_F_TIMEOUT(coredump, socket_multiple_crashing_coredumps, 500) +{ + int pidfd[NUM_CRASHING_COREDUMPS], status[NUM_CRASHING_COREDUMPS]; + pid_t pid[NUM_CRASHING_COREDUMPS], pid_coredump_server; + struct pidfd_info info = {}; + int ipc_sockets[2]; + char c; + + ASSERT_TRUE(set_core_pattern("@@/tmp/coredump.socket")); + + ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC, 0, ipc_sockets), 0); + + pid_coredump_server = fork(); + ASSERT_GE(pid_coredump_server, 0); + if (pid_coredump_server == 0) { + int fd_server = -1, fd_coredump = -1, fd_peer_pidfd = -1, fd_core_file = -1; + int exit_code = EXIT_FAILURE; + struct coredump_req req = {}; + + close(ipc_sockets[0]); + fd_server = create_and_listen_unix_socket("/tmp/coredump.socket"); + if (fd_server < 0) { + fprintf(stderr, "Failed to create and listen on unix socket\n"); + goto out; + } + + if (write_nointr(ipc_sockets[1], "1", 1) < 0) { + fprintf(stderr, "Failed to notify parent via ipc socket\n"); + goto out; + } + close(ipc_sockets[1]); + + for (int i = 0; i < NUM_CRASHING_COREDUMPS; i++) { + fd_coredump = accept4(fd_server, NULL, NULL, SOCK_CLOEXEC); + if (fd_coredump < 0) { + fprintf(stderr, "accept4 failed: %m\n"); + goto out; + } + + fd_peer_pidfd = get_peer_pidfd(fd_coredump); + if (fd_peer_pidfd < 0) { + fprintf(stderr, "get_peer_pidfd failed for fd %d: %m\n", fd_coredump); + goto out; + } + + if (!get_pidfd_info(fd_peer_pidfd, &info)) { + fprintf(stderr, "get_pidfd_info failed for fd %d\n", fd_peer_pidfd); + goto out; + } + + if (!(info.mask & PIDFD_INFO_COREDUMP)) { + fprintf(stderr, "pidfd info missing PIDFD_INFO_COREDUMP for fd %d\n", fd_peer_pidfd); + goto out; + } + if (!(info.coredump_mask & PIDFD_COREDUMPED)) { + fprintf(stderr, "pidfd info missing PIDFD_COREDUMPED for fd %d\n", fd_peer_pidfd); + goto out; + } + + if (!read_coredump_req(fd_coredump, &req)) { + fprintf(stderr, "read_coredump_req failed for fd %d\n", fd_coredump); + goto out; + } + + if (!check_coredump_req(&req, COREDUMP_ACK_SIZE_VER0, + COREDUMP_KERNEL | COREDUMP_USERSPACE | + COREDUMP_REJECT | COREDUMP_WAIT)) { + fprintf(stderr, "check_coredump_req failed for fd %d\n", fd_coredump); + goto out; + } + + if (!send_coredump_ack(fd_coredump, &req, + COREDUMP_KERNEL | COREDUMP_WAIT, 0)) { + fprintf(stderr, "send_coredump_ack failed for fd %d\n", fd_coredump); + goto out; + } + + if (!read_marker(fd_coredump, COREDUMP_MARK_REQACK)) { + fprintf(stderr, "read_marker failed for fd %d\n", fd_coredump); + goto out; + } + + fd_core_file = open_coredump_tmpfile(self->fd_tmpfs_detached); + if (fd_core_file < 0) { + fprintf(stderr, "%m - open_coredump_tmpfile failed for fd %d\n", fd_coredump); + goto out; + } + + for (;;) { + char buffer[4096]; + ssize_t bytes_read, bytes_write; + + bytes_read = read(fd_coredump, buffer, sizeof(buffer)); + if (bytes_read < 0) { + fprintf(stderr, "read failed for fd %d: %m\n", fd_coredump); + goto out; + } + + if (bytes_read == 0) + break; + + bytes_write = write(fd_core_file, buffer, bytes_read); + if (bytes_read != bytes_write) { + fprintf(stderr, "write failed for fd %d: %m\n", fd_core_file); + goto out; + } + } + + close(fd_core_file); + close(fd_peer_pidfd); + close(fd_coredump); + fd_peer_pidfd = -1; + fd_coredump = -1; + } + + exit_code = EXIT_SUCCESS; +out: + if (fd_core_file >= 0) + close(fd_core_file); + if (fd_peer_pidfd >= 0) + close(fd_peer_pidfd); + if (fd_coredump >= 0) + close(fd_coredump); + if (fd_server >= 0) + close(fd_server); + _exit(exit_code); + } + self->pid_coredump_server = pid_coredump_server; + + EXPECT_EQ(close(ipc_sockets[1]), 0); + ASSERT_EQ(read_nointr(ipc_sockets[0], &c, 1), 1); + EXPECT_EQ(close(ipc_sockets[0]), 0); + + for (int i = 0; i < NUM_CRASHING_COREDUMPS; i++) { + pid[i] = fork(); + ASSERT_GE(pid[i], 0); + if (pid[i] == 0) + crashing_child(); + pidfd[i] = sys_pidfd_open(pid[i], 0); + ASSERT_GE(pidfd[i], 0); + } + + for (int i = 0; i < NUM_CRASHING_COREDUMPS; i++) { + waitpid(pid[i], &status[i], 0); + ASSERT_TRUE(WIFSIGNALED(status[i])); + ASSERT_TRUE(WCOREDUMP(status[i])); + } + + for (int i = 0; i < NUM_CRASHING_COREDUMPS; i++) { + info.mask = PIDFD_INFO_EXIT | PIDFD_INFO_COREDUMP; + ASSERT_EQ(ioctl(pidfd[i], PIDFD_GET_INFO, &info), 0); + ASSERT_GT((info.mask & PIDFD_INFO_COREDUMP), 0); + ASSERT_GT((info.coredump_mask & PIDFD_COREDUMPED), 0); + } + + wait_and_check_coredump_server(pid_coredump_server, _metadata, self); +} + +#define MAX_EVENTS 128 + +static void process_coredump_worker(int fd_coredump, int fd_peer_pidfd, int fd_core_file) +{ + int epfd = -1; + int exit_code = EXIT_FAILURE; + + epfd = epoll_create1(0); + if (epfd < 0) + goto out; + + struct epoll_event ev; + ev.events = EPOLLIN | EPOLLRDHUP | EPOLLET; + ev.data.fd = fd_coredump; + if (epoll_ctl(epfd, EPOLL_CTL_ADD, fd_coredump, &ev) < 0) + goto out; + + for (;;) { + struct epoll_event events[1]; + int n = epoll_wait(epfd, events, 1, -1); + if (n < 0) + break; + + if (events[0].events & (EPOLLIN | EPOLLRDHUP)) { + for (;;) { + char buffer[4096]; + ssize_t bytes_read = read(fd_coredump, buffer, sizeof(buffer)); + if (bytes_read < 0) { + if (errno == EAGAIN || errno == EWOULDBLOCK) + break; + goto out; + } + if (bytes_read == 0) + goto done; + ssize_t bytes_write = write(fd_core_file, buffer, bytes_read); + if (bytes_write != bytes_read) + goto out; + } + } + } + +done: + exit_code = EXIT_SUCCESS; +out: + if (epfd >= 0) + close(epfd); + if (fd_core_file >= 0) + close(fd_core_file); + if (fd_peer_pidfd >= 0) + close(fd_peer_pidfd); + if (fd_coredump >= 0) + close(fd_coredump); + _exit(exit_code); +} + +TEST_F_TIMEOUT(coredump, socket_multiple_crashing_coredumps_epoll_workers, 500) +{ + int pidfd[NUM_CRASHING_COREDUMPS], status[NUM_CRASHING_COREDUMPS]; + pid_t pid[NUM_CRASHING_COREDUMPS], pid_coredump_server, worker_pids[NUM_CRASHING_COREDUMPS]; + struct pidfd_info info = {}; + int ipc_sockets[2]; + char c; + + ASSERT_TRUE(set_core_pattern("@@/tmp/coredump.socket")); + ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC, 0, ipc_sockets), 0); + + pid_coredump_server = fork(); + ASSERT_GE(pid_coredump_server, 0); + if (pid_coredump_server == 0) { + int fd_server = -1, exit_code = EXIT_FAILURE, n_conns = 0; + fd_server = -1; + exit_code = EXIT_FAILURE; + n_conns = 0; + close(ipc_sockets[0]); + fd_server = create_and_listen_unix_socket("/tmp/coredump.socket"); + if (fd_server < 0) + goto out; + + if (write_nointr(ipc_sockets[1], "1", 1) < 0) + goto out; + close(ipc_sockets[1]); + + while (n_conns < NUM_CRASHING_COREDUMPS) { + int fd_coredump = -1, fd_peer_pidfd = -1, fd_core_file = -1; + struct coredump_req req = {}; + fd_coredump = accept4(fd_server, NULL, NULL, SOCK_CLOEXEC); + if (fd_coredump < 0) { + if (errno == EAGAIN || errno == EWOULDBLOCK) + continue; + goto out; + } + fd_peer_pidfd = get_peer_pidfd(fd_coredump); + if (fd_peer_pidfd < 0) + goto out; + if (!get_pidfd_info(fd_peer_pidfd, &info)) + goto out; + if (!(info.mask & PIDFD_INFO_COREDUMP) || !(info.coredump_mask & PIDFD_COREDUMPED)) + goto out; + if (!read_coredump_req(fd_coredump, &req)) + goto out; + if (!check_coredump_req(&req, COREDUMP_ACK_SIZE_VER0, + COREDUMP_KERNEL | COREDUMP_USERSPACE | + COREDUMP_REJECT | COREDUMP_WAIT)) + goto out; + if (!send_coredump_ack(fd_coredump, &req, COREDUMP_KERNEL | COREDUMP_WAIT, 0)) + goto out; + if (!read_marker(fd_coredump, COREDUMP_MARK_REQACK)) + goto out; + fd_core_file = open_coredump_tmpfile(self->fd_tmpfs_detached); + if (fd_core_file < 0) + goto out; + pid_t worker = fork(); + if (worker == 0) { + close(fd_server); + process_coredump_worker(fd_coredump, fd_peer_pidfd, fd_core_file); + } + worker_pids[n_conns] = worker; + if (fd_coredump >= 0) + close(fd_coredump); + if (fd_peer_pidfd >= 0) + close(fd_peer_pidfd); + if (fd_core_file >= 0) + close(fd_core_file); + n_conns++; + } + exit_code = EXIT_SUCCESS; +out: + if (fd_server >= 0) + close(fd_server); + + // Reap all worker processes + for (int i = 0; i < n_conns; i++) { + int wstatus; + if (waitpid(worker_pids[i], &wstatus, 0) < 0) { + fprintf(stderr, "Failed to wait for worker %d: %m\n", worker_pids[i]); + } else if (WIFEXITED(wstatus) && WEXITSTATUS(wstatus) != EXIT_SUCCESS) { + fprintf(stderr, "Worker %d exited with error code %d\n", worker_pids[i], WEXITSTATUS(wstatus)); + exit_code = EXIT_FAILURE; + } + } + + _exit(exit_code); + } + self->pid_coredump_server = pid_coredump_server; + + EXPECT_EQ(close(ipc_sockets[1]), 0); + ASSERT_EQ(read_nointr(ipc_sockets[0], &c, 1), 1); + EXPECT_EQ(close(ipc_sockets[0]), 0); + + for (int i = 0; i < NUM_CRASHING_COREDUMPS; i++) { + pid[i] = fork(); + ASSERT_GE(pid[i], 0); + if (pid[i] == 0) + crashing_child(); + pidfd[i] = sys_pidfd_open(pid[i], 0); + ASSERT_GE(pidfd[i], 0); + } + + for (int i = 0; i < NUM_CRASHING_COREDUMPS; i++) { + ASSERT_GE(waitpid(pid[i], &status[i], 0), 0); + ASSERT_TRUE(WIFSIGNALED(status[i])); + ASSERT_TRUE(WCOREDUMP(status[i])); + } + + for (int i = 0; i < NUM_CRASHING_COREDUMPS; i++) { + info.mask = PIDFD_INFO_EXIT | PIDFD_INFO_COREDUMP; + ASSERT_EQ(ioctl(pidfd[i], PIDFD_GET_INFO, &info), 0); + ASSERT_GT((info.mask & PIDFD_INFO_COREDUMP), 0); + ASSERT_GT((info.coredump_mask & PIDFD_COREDUMPED), 0); + } + + wait_and_check_coredump_server(pid_coredump_server, _metadata, self); +} + +TEST_F(coredump, socket_invalid_paths) +{ + ASSERT_FALSE(set_core_pattern("@ /tmp/coredump.socket")); + ASSERT_FALSE(set_core_pattern("@/tmp/../coredump.socket")); + ASSERT_FALSE(set_core_pattern("@../coredump.socket")); + ASSERT_FALSE(set_core_pattern("@/tmp/coredump.socket/..")); + ASSERT_FALSE(set_core_pattern("@..")); + + ASSERT_FALSE(set_core_pattern("@@ /tmp/coredump.socket")); + ASSERT_FALSE(set_core_pattern("@@/tmp/../coredump.socket")); + ASSERT_FALSE(set_core_pattern("@@../coredump.socket")); + ASSERT_FALSE(set_core_pattern("@@/tmp/coredump.socket/..")); + ASSERT_FALSE(set_core_pattern("@@..")); + + ASSERT_FALSE(set_core_pattern("@@@/tmp/coredump.socket")); } TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/cpu-hotplug/cpu-on-off-test.sh b/tools/testing/selftests/cpu-hotplug/cpu-on-off-test.sh index d5dc7e0dc726..6232a46ca6e1 100755 --- a/tools/testing/selftests/cpu-hotplug/cpu-on-off-test.sh +++ b/tools/testing/selftests/cpu-hotplug/cpu-on-off-test.sh @@ -67,7 +67,7 @@ hotpluggable_cpus() done } -hotplaggable_offline_cpus() +hotpluggable_offline_cpus() { hotpluggable_cpus 0 } @@ -151,7 +151,7 @@ offline_cpu_expect_fail() online_all_hot_pluggable_cpus() { - for cpu in `hotplaggable_offline_cpus`; do + for cpu in `hotpluggable_offline_cpus`; do online_cpu_expect_success $cpu done } diff --git a/tools/testing/selftests/drivers/net/Makefile b/tools/testing/selftests/drivers/net/Makefile index be780bcb73a3..3556f3563e08 100644 --- a/tools/testing/selftests/drivers/net/Makefile +++ b/tools/testing/selftests/drivers/net/Makefile @@ -12,14 +12,17 @@ TEST_GEN_FILES := \ TEST_PROGS := \ napi_id.py \ netcons_basic.sh \ + netcons_cmdline.sh \ netcons_fragmented_msg.sh \ netcons_overflow.sh \ netcons_sysdata.sh \ + netpoll_basic.py \ ping.py \ queues.py \ stats.py \ shaper.py \ hds.py \ + xdp.py \ # end of TEST_PROGS include ../../lib.mk diff --git a/tools/testing/selftests/drivers/net/hw/Makefile b/tools/testing/selftests/drivers/net/hw/Makefile index df2c047ffa90..fdc97355588c 100644 --- a/tools/testing/selftests/drivers/net/hw/Makefile +++ b/tools/testing/selftests/drivers/net/hw/Makefile @@ -16,6 +16,7 @@ TEST_PROGS = \ irq.py \ loopback.sh \ pp_alloc_fail.py \ + rss_api.py \ rss_ctx.py \ rss_input_xfrm.py \ tso.py \ diff --git a/tools/testing/selftests/drivers/net/hw/devlink_rate_tc_bw.py b/tools/testing/selftests/drivers/net/hw/devlink_rate_tc_bw.py new file mode 100755 index 000000000000..ead6784d1910 --- /dev/null +++ b/tools/testing/selftests/drivers/net/hw/devlink_rate_tc_bw.py @@ -0,0 +1,465 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0 + +""" +Devlink Rate TC Bandwidth Test Suite +=================================== + +This test suite verifies the functionality of devlink-rate traffic class (TC) +bandwidth distribution in a virtualized environment. The tests validate that +bandwidth can be properly allocated between different traffic classes and +that TC mapping works as expected. + +Test Environment: +---------------- +- Creates 1 VF +- Establishes a bridge connecting the VF representor and the uplink representor +- Sets up 2 VLAN interfaces on the VF with different VLAN IDs (101, 102) +- Configures different traffic classes (TC3 and TC4) for each VLAN + +Test Cases: +---------- +1. test_no_tc_mapping_bandwidth: + - Verifies that without TC mapping, bandwidth is NOT distributed according to + the configured 80/20 split between TC4 and TC3 + - This test should fail if bandwidth matches the 80/20 split without TC + mapping + - Expected: Bandwidth should NOT be distributed as 80/20 + +2. test_tc_mapping_bandwidth: + - Configures TC mapping using mqprio qdisc + - Verifies that with TC mapping, bandwidth IS distributed according to the + configured 80/20 split between TC3 and TC4 + - Expected: Bandwidth should be distributed as 80/20 + +Bandwidth Distribution: +---------------------- +- TC3 (VLAN 101): Configured for 80% of total bandwidth +- TC4 (VLAN 102): Configured for 20% of total bandwidth +- Total bandwidth: 1Gbps +- Tolerance: +-12% + +Hardware-Specific Behavior (mlx5): +-------------------------- +mlx5 hardware enforces traffic class separation by ensuring that each transmit +queue (SQ) is associated with a single TC. If a packet is sent on a queue that +doesn't match the expected TC (based on DSCP or VLAN priority and hypervisor-set +mapping), the hardware moves the queue to the correct TC scheduler to preserve +traffic isolation. + +This behavior means that even without explicit TC-to-queue mapping, bandwidth +enforcement may still appear to work—because the hardware dynamically adjusts +the scheduling context. However, this can lead to performance issues in high +rates and HOL blocking if traffic from different TCs is mixed on the same queue. +""" + +import json +import os +import subprocess +import threading +import time + +from lib.py import ksft_pr, ksft_run, ksft_exit +from lib.py import KsftSkipEx, KsftFailEx, KsftXfailEx +from lib.py import NetDrvEpEnv, DevlinkFamily +from lib.py import NlError +from lib.py import cmd, defer, ethtool, ip + + +class BandwidthValidator: + """ + Validates bandwidth totals and per-TC shares against expected values + with a tolerance. + """ + + def __init__(self): + self.tolerance_percent = 12 + self.expected_total_gbps = 1.0 + self.total_min_expected = self.min_expected(self.expected_total_gbps) + self.total_max_expected = self.max_expected(self.expected_total_gbps) + self.tc_expected_percent = { + 3: 20.0, + 4: 80.0, + } + + def min_expected(self, value): + """Calculates the minimum acceptable value based on tolerance.""" + return value - (value * self.tolerance_percent / 100) + + def max_expected(self, value): + """Calculates the maximum acceptable value based on tolerance.""" + return value + (value * self.tolerance_percent / 100) + + def bound(self, expected, value): + """Returns True if value is within expected tolerance.""" + return self.min_expected(expected) <= value <= self.max_expected(expected) + + def tc_bandwidth_bound(self, value, tc_ix): + """ + Returns True if the given bandwidth value is within tolerance + for the TC's expected bandwidth. + """ + expected = self.tc_expected_percent[tc_ix] + return self.bound(expected, value) + + +def setup_vf(cfg, set_tc_mapping=True): + """ + Sets up a VF on the given network interface. + + Enables SR-IOV and switchdev mode, brings the VF interface up, + and optionally configures TC mapping using mqprio. + """ + try: + cmd(f"devlink dev eswitch set pci/{cfg.pci} mode switchdev") + defer(cmd, f"devlink dev eswitch set pci/{cfg.pci} mode legacy") + except Exception as exc: + raise KsftSkipEx(f"Failed to enable switchdev mode on {cfg.pci}") from exc + try: + cmd(f"echo 1 > /sys/class/net/{cfg.ifname}/device/sriov_numvfs") + defer(cmd, f"echo 0 > /sys/class/net/{cfg.ifname}/device/sriov_numvfs") + except Exception as exc: + raise KsftSkipEx(f"Failed to enable SR-IOV on {cfg.ifname}") from exc + + time.sleep(2) + vf_ifc = (os.listdir( + f"/sys/class/net/{cfg.ifname}/device/virtfn0/net") or [None])[0] + if vf_ifc: + ip(f"link set dev {vf_ifc} up") + else: + raise KsftSkipEx("VF interface not found") + if set_tc_mapping: + cmd(f"tc qdisc add dev {vf_ifc} root handle 5 mqprio mode dcb hw 1 num_tc 8") + + return vf_ifc + + +def setup_vlans_on_vf(vf_ifc): + """ + Sets up two VLAN interfaces on the given VF, each mapped to a different TC. + """ + vlan_configs = [ + {"vlan_id": 101, "tc": 3, "ip": "198.51.100.2"}, + {"vlan_id": 102, "tc": 4, "ip": "198.51.100.10"}, + ] + + for config in vlan_configs: + vlan_dev = f"{vf_ifc}.{config['vlan_id']}" + ip(f"link add link {vf_ifc} name {vlan_dev} type vlan id {config['vlan_id']}") + ip(f"addr add {config['ip']}/29 dev {vlan_dev}") + ip(f"link set dev {vlan_dev} up") + ip(f"link set dev {vlan_dev} type vlan egress-qos-map 0:{config['tc']}") + ksft_pr(f"Created VLAN {vlan_dev} on {vf_ifc} with tc {config['tc']} and IP {config['ip']}") + + +def get_vf_info(cfg): + """ + Finds the VF representor interface and devlink port index + for the given PCI device used in the test environment. + """ + cfg.vf_representor = None + cfg.vf_port_index = None + out = subprocess.check_output(["devlink", "-j", "port", "show"], encoding="utf-8") + ports = json.loads(out)["port"] + + for port_name, props in ports.items(): + netdev = props.get("netdev") + + if (port_name.startswith(f"pci/{cfg.pci}/") and + props.get("vfnum") == 0): + cfg.vf_representor = netdev + cfg.vf_port_index = int(port_name.split("/")[-1]) + break + + +def setup_bridge(cfg): + """ + Creates and configures a Linux bridge, with both the uplink + and VF representor interfaces attached to it. + """ + bridge_name = f"br_{os.getpid()}" + ip(f"link add name {bridge_name} type bridge") + defer(cmd, f"ip link del name {bridge_name} type bridge") + + ip(f"link set dev {cfg.ifname} master {bridge_name}") + + rep_name = cfg.vf_representor + if rep_name: + ip(f"link set dev {rep_name} master {bridge_name}") + ip(f"link set dev {rep_name} up") + ksft_pr(f"Set representor {rep_name} up and added to bridge") + else: + raise KsftSkipEx("Could not find representor for the VF") + + ip(f"link set dev {bridge_name} up") + + +def setup_devlink_rate(cfg): + """ + Configures devlink rate tx_max and traffic class bandwidth for the VF. + """ + port_index = cfg.vf_port_index + if port_index is None: + raise KsftSkipEx("Could not find VF port index") + try: + cfg.devnl.rate_set({ + "bus-name": "pci", + "dev-name": cfg.pci, + "port-index": port_index, + "rate-tx-max": 125000000, + "rate-tc-bws": [ + {"index": 0, "bw": 0}, + {"index": 1, "bw": 0}, + {"index": 2, "bw": 0}, + {"index": 3, "bw": 20}, + {"index": 4, "bw": 80}, + {"index": 5, "bw": 0}, + {"index": 6, "bw": 0}, + {"index": 7, "bw": 0}, + ] + }) + except NlError as exc: + if exc.error == 95: # EOPNOTSUPP + raise KsftSkipEx("devlink rate configuration is not supported on the VF") from exc + raise KsftFailEx(f"rate_set failed on VF port {port_index}") from exc + + +def setup_remote_server(cfg): + """ + Sets up VLAN interfaces and starts iperf3 servers on the remote side. + """ + remote_dev = cfg.remote_ifname + vlan_ids = [101, 102] + remote_ips = ["198.51.100.1", "198.51.100.9"] + + for vlan_id, ip_addr in zip(vlan_ids, remote_ips): + vlan_dev = f"{remote_dev}.{vlan_id}" + cmd(f"ip link add link {remote_dev} name {vlan_dev} " + f"type vlan id {vlan_id}", host=cfg.remote) + cmd(f"ip addr add {ip_addr}/29 dev {vlan_dev}", host=cfg.remote) + cmd(f"ip link set dev {vlan_dev} up", host=cfg.remote) + cmd(f"iperf3 -s -1 -B {ip_addr}",background=True, host=cfg.remote) + defer(cmd, f"ip link del {vlan_dev}", host=cfg.remote) + + +def setup_test_environment(cfg, set_tc_mapping=True): + """ + Sets up the complete test environment including VF creation, VLANs, + bridge configuration, devlink rate setup, and the remote server. + """ + vf_ifc = setup_vf(cfg, set_tc_mapping) + ksft_pr(f"Created VF interface: {vf_ifc}") + + setup_vlans_on_vf(vf_ifc) + + get_vf_info(cfg) + setup_bridge(cfg) + + setup_devlink_rate(cfg) + setup_remote_server(cfg) + time.sleep(2) + + +def run_iperf_client(server_ip, local_ip, barrier, min_expected_gbps=0.1): + """ + Runs a single iperf3 client instance, binding to the given local IP. + Waits on a barrier to synchronize with other threads. + """ + try: + barrier.wait(timeout=10) + except Exception as exc: + raise KsftFailEx("iperf3 barrier wait timed") from exc + + iperf_cmd = ["iperf3", "-c", server_ip, "-B", local_ip, "-J"] + result = subprocess.run(iperf_cmd, capture_output=True, text=True, + check=True) + + try: + output = json.loads(result.stdout) + bits_per_second = output["end"]["sum_received"]["bits_per_second"] + gbps = bits_per_second / 1e9 + if gbps < min_expected_gbps: + ksft_pr( + f"iperf3 bandwidth too low: {gbps:.2f} Gbps " + f"(expected ≥ {min_expected_gbps} Gbps)" + ) + return None + return gbps + except json.JSONDecodeError as exc: + ksft_pr(f"Failed to parse iperf3 JSON output: {exc}") + return None + + +def run_bandwidth_test(): + """ + Launches iperf3 client threads for each VLAN/TC pair and collects results. + """ + def _run_iperf_client_thread(server_ip, local_ip, results, barrier, tc_ix): + results[tc_ix] = run_iperf_client(server_ip, local_ip, barrier) + + vf_vlan_data = [ + # (local_ip, remote_ip, TC) + ("198.51.100.2", "198.51.100.1", 3), + ("198.51.100.10", "198.51.100.9", 4), + ] + + results = {} + threads = [] + start_barrier = threading.Barrier(len(vf_vlan_data)) + + for local_ip, remote_ip, tc_ix in vf_vlan_data: + thread = threading.Thread( + target=_run_iperf_client_thread, + args=(remote_ip, local_ip, results, start_barrier, tc_ix) + ) + thread.start() + threads.append(thread) + + for thread in threads: + thread.join() + + for tc_ix, tc_bw in results.items(): + if tc_bw is None: + raise KsftFailEx("iperf3 client failed; cannot evaluate bandwidth") + + return results + +def calculate_bandwidth_percentages(results): + """ + Calculates the percentage of total bandwidth received by TC3 and TC4. + """ + if 3 not in results or 4 not in results: + raise KsftFailEx(f"Missing expected TC results in {results}") + + tc3_bw = results[3] + tc4_bw = results[4] + total_bw = tc3_bw + tc4_bw + tc3_percentage = (tc3_bw / total_bw) * 100 + tc4_percentage = (tc4_bw / total_bw) * 100 + + return { + 'tc3_bw': tc3_bw, + 'tc4_bw': tc4_bw, + 'tc3_percentage': tc3_percentage, + 'tc4_percentage': tc4_percentage, + 'total_bw': total_bw + } + + +def print_bandwidth_results(bw_data, test_name): + """ + Prints bandwidth measurements and TC usage summary for a given test. + """ + ksft_pr(f"Bandwidth check results {test_name}:") + ksft_pr(f"TC 3: {bw_data['tc3_bw']:.2f} Gbits/sec") + ksft_pr(f"TC 4: {bw_data['tc4_bw']:.2f} Gbits/sec") + ksft_pr(f"Total bandwidth: {bw_data['total_bw']:.2f} Gbits/sec") + ksft_pr(f"TC 3 percentage: {bw_data['tc3_percentage']:.1f}%") + ksft_pr(f"TC 4 percentage: {bw_data['tc4_percentage']:.1f}%") + + +def verify_total_bandwidth(bw_data, validator): + """ + Ensures the total measured bandwidth falls within the acceptable tolerance. + """ + total = bw_data['total_bw'] + + if validator.bound(validator.expected_total_gbps, total): + return + + if total < validator.total_min_expected: + raise KsftSkipEx( + f"Total bandwidth {total:.2f} Gbps < minimum " + f"{validator.total_min_expected:.2f} Gbps; " + f"parent tx_max ({validator.expected_total_gbps:.1f} G) " + f"not reached, cannot validate share" + ) + + raise KsftFailEx( + f"Total bandwidth {total:.2f} Gbps exceeds allowed ceiling " + f"{validator.total_max_expected:.2f} Gbps " + f"(VF tx_max set to {validator.expected_total_gbps:.1f} G)" + ) + + +def check_bandwidth_distribution(bw_data, validator): + """ + Checks whether the measured TC3 and TC4 bandwidth percentages + fall within their expected tolerance ranges. + + Returns: + bool: True if both TC3 and TC4 percentages are within bounds. + """ + tc3_valid = validator.tc_bandwidth_bound(bw_data['tc3_percentage'], 3) + tc4_valid = validator.tc_bandwidth_bound(bw_data['tc4_percentage'], 4) + + return tc3_valid and tc4_valid + + +def run_bandwidth_distribution_test(cfg, set_tc_mapping): + """ + Runs parallel iperf3 tests for both TCs and collects results. + """ + setup_test_environment(cfg, set_tc_mapping) + bandwidths = run_bandwidth_test() + bw_data = calculate_bandwidth_percentages(bandwidths) + test_name = "with TC mapping" if set_tc_mapping else "without TC mapping" + print_bandwidth_results(bw_data, test_name) + + verify_total_bandwidth(bw_data, cfg.bw_validator) + + return check_bandwidth_distribution(bw_data, cfg.bw_validator) + + +def test_no_tc_mapping_bandwidth(cfg): + """ + Verifies that bandwidth is not split 80/20 without traffic class mapping. + """ + pass_bw_msg = "Bandwidth is NOT distributed as 80/20 without TC mapping" + fail_bw_msg = "Bandwidth matched 80/20 split without TC mapping" + is_mlx5 = "driver: mlx5" in ethtool(f"-i {cfg.ifname}").stdout + + if run_bandwidth_distribution_test(cfg, set_tc_mapping=False): + if is_mlx5: + raise KsftXfailEx(fail_bw_msg) + raise KsftFailEx(fail_bw_msg) + if is_mlx5: + raise KsftFailEx("mlx5 behavior changed:" + pass_bw_msg) + ksft_pr(pass_bw_msg) + + +def test_tc_mapping_bandwidth(cfg): + """ + Verifies that bandwidth is correctly split 80/20 between TC3 and TC4 + when traffic class mapping is set. + """ + if run_bandwidth_distribution_test(cfg, set_tc_mapping=True): + ksft_pr("Bandwidth is distributed as 80/20 with TC mapping") + else: + raise KsftFailEx("Bandwidth did not match 80/20 split with TC mapping") + + +def main() -> None: + """ + Main entry point for running the test cases. + """ + with NetDrvEpEnv(__file__, nsim_test=False) as cfg: + cfg.devnl = DevlinkFamily() + + cfg.pci = os.path.basename( + os.path.realpath(f"/sys/class/net/{cfg.ifname}/device") + ) + if not cfg.pci: + raise KsftSkipEx("Could not get PCI address of the interface") + cfg.require_cmd("iperf3", local=True, remote=True) + + cfg.bw_validator = BandwidthValidator() + + cases = [test_no_tc_mapping_bandwidth, test_tc_mapping_bandwidth] + + ksft_run(cases=cases, args=(cfg,)) + ksft_exit() + + +if __name__ == "__main__": + main() diff --git a/tools/testing/selftests/drivers/net/hw/devmem.py b/tools/testing/selftests/drivers/net/hw/devmem.py index 7947650210a0..baa2f24240ba 100755 --- a/tools/testing/selftests/drivers/net/hw/devmem.py +++ b/tools/testing/selftests/drivers/net/hw/devmem.py @@ -51,15 +51,14 @@ def check_tx(cfg) -> None: @ksft_disruptive def check_tx_chunks(cfg) -> None: - cfg.require_ipver("6") require_devmem(cfg) port = rand_port() - listen_cmd = f"socat -U - TCP6-LISTEN:{port}" + listen_cmd = f"socat -U - TCP{cfg.addr_ipver}-LISTEN:{port}" with bkg(listen_cmd, exit_wait=True) as socat: wait_port_listen(port) - cmd(f"echo -e \"hello\\nworld\"| {cfg.bin_remote} -f {cfg.ifname} -s {cfg.addr_v['6']} -p {port} -z 3", host=cfg.remote, shell=True) + cmd(f"echo -e \"hello\\nworld\"| {cfg.bin_remote} -f {cfg.ifname} -s {cfg.addr} -p {port} -z 3", host=cfg.remote, shell=True) ksft_eq(socat.stdout.strip(), "hello\nworld") diff --git a/tools/testing/selftests/drivers/net/hw/iou-zcrx.py b/tools/testing/selftests/drivers/net/hw/iou-zcrx.py index 9c03fd777f3d..712c806508b5 100755 --- a/tools/testing/selftests/drivers/net/hw/iou-zcrx.py +++ b/tools/testing/selftests/drivers/net/hw/iou-zcrx.py @@ -3,37 +3,37 @@ import re from os import path -from lib.py import ksft_run, ksft_exit +from lib.py import ksft_run, ksft_exit, KsftSkipEx from lib.py import NetDrvEpEnv from lib.py import bkg, cmd, defer, ethtool, rand_port, wait_port_listen def _get_current_settings(cfg): - output = ethtool(f"-g {cfg.ifname}", json=True, host=cfg.remote)[0] + output = ethtool(f"-g {cfg.ifname}", json=True)[0] return (output['rx'], output['hds-thresh']) def _get_combined_channels(cfg): - output = ethtool(f"-l {cfg.ifname}", host=cfg.remote).stdout + output = ethtool(f"-l {cfg.ifname}").stdout values = re.findall(r'Combined:\s+(\d+)', output) return int(values[1]) def _create_rss_ctx(cfg, chan): - output = ethtool(f"-X {cfg.ifname} context new start {chan} equal 1", host=cfg.remote).stdout + output = ethtool(f"-X {cfg.ifname} context new start {chan} equal 1").stdout values = re.search(r'New RSS context is (\d+)', output).group(1) ctx_id = int(values) - return (ctx_id, defer(ethtool, f"-X {cfg.ifname} delete context {ctx_id}", host=cfg.remote)) + return (ctx_id, defer(ethtool, f"-X {cfg.ifname} delete context {ctx_id}")) def _set_flow_rule(cfg, port, chan): - output = ethtool(f"-N {cfg.ifname} flow-type tcp6 dst-port {port} action {chan}", host=cfg.remote).stdout + output = ethtool(f"-N {cfg.ifname} flow-type tcp6 dst-port {port} action {chan}").stdout values = re.search(r'ID (\d+)', output).group(1) return int(values) def _set_flow_rule_rss(cfg, port, ctx_id): - output = ethtool(f"-N {cfg.ifname} flow-type tcp6 dst-port {port} context {ctx_id}", host=cfg.remote).stdout + output = ethtool(f"-N {cfg.ifname} flow-type tcp6 dst-port {port} context {ctx_id}").stdout values = re.search(r'ID (\d+)', output).group(1) return int(values) @@ -47,26 +47,26 @@ def test_zcrx(cfg) -> None: (rx_ring, hds_thresh) = _get_current_settings(cfg) port = rand_port() - ethtool(f"-G {cfg.ifname} tcp-data-split on", host=cfg.remote) - defer(ethtool, f"-G {cfg.ifname} tcp-data-split auto", host=cfg.remote) + ethtool(f"-G {cfg.ifname} tcp-data-split on") + defer(ethtool, f"-G {cfg.ifname} tcp-data-split auto") - ethtool(f"-G {cfg.ifname} hds-thresh 0", host=cfg.remote) - defer(ethtool, f"-G {cfg.ifname} hds-thresh {hds_thresh}", host=cfg.remote) + ethtool(f"-G {cfg.ifname} hds-thresh 0") + defer(ethtool, f"-G {cfg.ifname} hds-thresh {hds_thresh}") - ethtool(f"-G {cfg.ifname} rx 64", host=cfg.remote) - defer(ethtool, f"-G {cfg.ifname} rx {rx_ring}", host=cfg.remote) + ethtool(f"-G {cfg.ifname} rx 64") + defer(ethtool, f"-G {cfg.ifname} rx {rx_ring}") - ethtool(f"-X {cfg.ifname} equal {combined_chans - 1}", host=cfg.remote) - defer(ethtool, f"-X {cfg.ifname} default", host=cfg.remote) + ethtool(f"-X {cfg.ifname} equal {combined_chans - 1}") + defer(ethtool, f"-X {cfg.ifname} default") flow_rule_id = _set_flow_rule(cfg, port, combined_chans - 1) - defer(ethtool, f"-N {cfg.ifname} delete {flow_rule_id}", host=cfg.remote) + defer(ethtool, f"-N {cfg.ifname} delete {flow_rule_id}") - rx_cmd = f"{cfg.bin_remote} -s -p {port} -i {cfg.ifname} -q {combined_chans - 1}" - tx_cmd = f"{cfg.bin_local} -c -h {cfg.remote_addr_v['6']} -p {port} -l 12840" - with bkg(rx_cmd, host=cfg.remote, exit_wait=True): - wait_port_listen(port, proto="tcp", host=cfg.remote) - cmd(tx_cmd) + rx_cmd = f"{cfg.bin_local} -s -p {port} -i {cfg.ifname} -q {combined_chans - 1}" + tx_cmd = f"{cfg.bin_remote} -c -h {cfg.addr_v['6']} -p {port} -l 12840" + with bkg(rx_cmd, exit_wait=True): + wait_port_listen(port, proto="tcp") + cmd(tx_cmd, host=cfg.remote) def test_zcrx_oneshot(cfg) -> None: @@ -78,26 +78,26 @@ def test_zcrx_oneshot(cfg) -> None: (rx_ring, hds_thresh) = _get_current_settings(cfg) port = rand_port() - ethtool(f"-G {cfg.ifname} tcp-data-split on", host=cfg.remote) - defer(ethtool, f"-G {cfg.ifname} tcp-data-split auto", host=cfg.remote) + ethtool(f"-G {cfg.ifname} tcp-data-split on") + defer(ethtool, f"-G {cfg.ifname} tcp-data-split auto") - ethtool(f"-G {cfg.ifname} hds-thresh 0", host=cfg.remote) - defer(ethtool, f"-G {cfg.ifname} hds-thresh {hds_thresh}", host=cfg.remote) + ethtool(f"-G {cfg.ifname} hds-thresh 0") + defer(ethtool, f"-G {cfg.ifname} hds-thresh {hds_thresh}") - ethtool(f"-G {cfg.ifname} rx 64", host=cfg.remote) - defer(ethtool, f"-G {cfg.ifname} rx {rx_ring}", host=cfg.remote) + ethtool(f"-G {cfg.ifname} rx 64") + defer(ethtool, f"-G {cfg.ifname} rx {rx_ring}") - ethtool(f"-X {cfg.ifname} equal {combined_chans - 1}", host=cfg.remote) - defer(ethtool, f"-X {cfg.ifname} default", host=cfg.remote) + ethtool(f"-X {cfg.ifname} equal {combined_chans - 1}") + defer(ethtool, f"-X {cfg.ifname} default") flow_rule_id = _set_flow_rule(cfg, port, combined_chans - 1) - defer(ethtool, f"-N {cfg.ifname} delete {flow_rule_id}", host=cfg.remote) + defer(ethtool, f"-N {cfg.ifname} delete {flow_rule_id}") - rx_cmd = f"{cfg.bin_remote} -s -p {port} -i {cfg.ifname} -q {combined_chans - 1} -o 4" - tx_cmd = f"{cfg.bin_local} -c -h {cfg.remote_addr_v['6']} -p {port} -l 4096 -z 16384" - with bkg(rx_cmd, host=cfg.remote, exit_wait=True): - wait_port_listen(port, proto="tcp", host=cfg.remote) - cmd(tx_cmd) + rx_cmd = f"{cfg.bin_local} -s -p {port} -i {cfg.ifname} -q {combined_chans - 1} -o 4" + tx_cmd = f"{cfg.bin_remote} -c -h {cfg.addr_v['6']} -p {port} -l 4096 -z 16384" + with bkg(rx_cmd, exit_wait=True): + wait_port_listen(port, proto="tcp") + cmd(tx_cmd, host=cfg.remote) def test_zcrx_rss(cfg) -> None: @@ -109,27 +109,27 @@ def test_zcrx_rss(cfg) -> None: (rx_ring, hds_thresh) = _get_current_settings(cfg) port = rand_port() - ethtool(f"-G {cfg.ifname} tcp-data-split on", host=cfg.remote) - defer(ethtool, f"-G {cfg.ifname} tcp-data-split auto", host=cfg.remote) + ethtool(f"-G {cfg.ifname} tcp-data-split on") + defer(ethtool, f"-G {cfg.ifname} tcp-data-split auto") - ethtool(f"-G {cfg.ifname} hds-thresh 0", host=cfg.remote) - defer(ethtool, f"-G {cfg.ifname} hds-thresh {hds_thresh}", host=cfg.remote) + ethtool(f"-G {cfg.ifname} hds-thresh 0") + defer(ethtool, f"-G {cfg.ifname} hds-thresh {hds_thresh}") - ethtool(f"-G {cfg.ifname} rx 64", host=cfg.remote) - defer(ethtool, f"-G {cfg.ifname} rx {rx_ring}", host=cfg.remote) + ethtool(f"-G {cfg.ifname} rx 64") + defer(ethtool, f"-G {cfg.ifname} rx {rx_ring}") - ethtool(f"-X {cfg.ifname} equal {combined_chans - 1}", host=cfg.remote) - defer(ethtool, f"-X {cfg.ifname} default", host=cfg.remote) + ethtool(f"-X {cfg.ifname} equal {combined_chans - 1}") + defer(ethtool, f"-X {cfg.ifname} default") (ctx_id, delete_ctx) = _create_rss_ctx(cfg, combined_chans - 1) flow_rule_id = _set_flow_rule_rss(cfg, port, ctx_id) - defer(ethtool, f"-N {cfg.ifname} delete {flow_rule_id}", host=cfg.remote) + defer(ethtool, f"-N {cfg.ifname} delete {flow_rule_id}") - rx_cmd = f"{cfg.bin_remote} -s -p {port} -i {cfg.ifname} -q {combined_chans - 1}" - tx_cmd = f"{cfg.bin_local} -c -h {cfg.remote_addr_v['6']} -p {port} -l 12840" - with bkg(rx_cmd, host=cfg.remote, exit_wait=True): - wait_port_listen(port, proto="tcp", host=cfg.remote) - cmd(tx_cmd) + rx_cmd = f"{cfg.bin_local} -s -p {port} -i {cfg.ifname} -q {combined_chans - 1}" + tx_cmd = f"{cfg.bin_remote} -c -h {cfg.addr_v['6']} -p {port} -l 12840" + with bkg(rx_cmd, exit_wait=True): + wait_port_listen(port, proto="tcp") + cmd(tx_cmd, host=cfg.remote) def main() -> None: diff --git a/tools/testing/selftests/drivers/net/hw/lib/py/__init__.py b/tools/testing/selftests/drivers/net/hw/lib/py/__init__.py index b582885786f5..1462a339a74b 100644 --- a/tools/testing/selftests/drivers/net/hw/lib/py/__init__.py +++ b/tools/testing/selftests/drivers/net/hw/lib/py/__init__.py @@ -7,8 +7,25 @@ KSFT_DIR = (Path(__file__).parent / "../../../../..").resolve() try: sys.path.append(KSFT_DIR.as_posix()) + from net.lib.py import * from drivers.net.lib.py import * + + # Import one by one to avoid pylint false positives + from net.lib.py import EthtoolFamily, NetdevFamily, NetshaperFamily, \ + NlError, RtnlFamily, DevlinkFamily + from net.lib.py import CmdExitFailure + from net.lib.py import bkg, cmd, defer, ethtool, fd_read_timeout, ip, \ + rand_port, tool, wait_port_listen + from net.lib.py import fd_read_timeout + from net.lib.py import KsftSkipEx, KsftFailEx, KsftXfailEx + from net.lib.py import ksft_disruptive, ksft_exit, ksft_pr, ksft_run, \ + ksft_setup + from net.lib.py import ksft_eq, ksft_ge, ksft_in, ksft_is, ksft_lt, \ + ksft_ne, ksft_not_in, ksft_raises, ksft_true + from net.lib.py import NetNSEnter + from drivers.net.lib.py import GenerateTraffic + from drivers.net.lib.py import NetDrvEnv, NetDrvEpEnv except ModuleNotFoundError as e: ksft_pr("Failed importing `net` library from kernel sources") ksft_pr(str(e)) diff --git a/tools/testing/selftests/drivers/net/hw/ncdevmem.c b/tools/testing/selftests/drivers/net/hw/ncdevmem.c index 02e4d3d7ded2..72f828021f83 100644 --- a/tools/testing/selftests/drivers/net/hw/ncdevmem.c +++ b/tools/testing/selftests/drivers/net/hw/ncdevmem.c @@ -526,12 +526,10 @@ static struct netdev_queue_id *create_queues(void) struct netdev_queue_id *queues; size_t i = 0; - queues = calloc(num_queues, sizeof(*queues)); + queues = netdev_queue_id_alloc(num_queues); for (i = 0; i < num_queues; i++) { - queues[i]._present.type = 1; - queues[i]._present.id = 1; - queues[i].type = NETDEV_QUEUE_TYPE_RX; - queues[i].id = start_queue + i; + netdev_queue_id_set_type(&queues[i], NETDEV_QUEUE_TYPE_RX); + netdev_queue_id_set_id(&queues[i], start_queue + i); } return queues; @@ -852,7 +850,6 @@ static int do_client(struct memory_buffer *mem) ssize_t line_size = 0; struct cmsghdr *cmsg; char *line = NULL; - unsigned long mid; size_t len = 0; int socket_fd; __u32 ddmabuf; diff --git a/tools/testing/selftests/drivers/net/hw/rss_api.py b/tools/testing/selftests/drivers/net/hw/rss_api.py new file mode 100755 index 000000000000..19847f3d4a00 --- /dev/null +++ b/tools/testing/selftests/drivers/net/hw/rss_api.py @@ -0,0 +1,476 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0 + +""" +API level tests for RSS (mostly Netlink vs IOCTL). +""" + +import errno +import glob +import random +from lib.py import ksft_run, ksft_exit, ksft_eq, ksft_is, ksft_ne, ksft_raises +from lib.py import KsftSkipEx, KsftFailEx +from lib.py import defer, ethtool, CmdExitFailure +from lib.py import EthtoolFamily, NlError +from lib.py import NetDrvEnv + + +def _require_2qs(cfg): + qcnt = len(glob.glob(f"/sys/class/net/{cfg.ifname}/queues/rx-*")) + if qcnt < 2: + raise KsftSkipEx(f"Local has only {qcnt} queues") + return qcnt + + +def _ethtool_create(cfg, act, opts): + output = ethtool(f"{act} {cfg.ifname} {opts}").stdout + # Output will be something like: "New RSS context is 1" or + # "Added rule with ID 7", we want the integer from the end + return int(output.split()[-1]) + + +def _ethtool_get_cfg(cfg, fl_type, to_nl=False): + descr = ethtool(f"-n {cfg.ifname} rx-flow-hash {fl_type}").stdout + + if to_nl: + converter = { + "IP SA": "ip-src", + "IP DA": "ip-dst", + "L4 bytes 0 & 1 [TCP/UDP src port]": "l4-b-0-1", + "L4 bytes 2 & 3 [TCP/UDP dst port]": "l4-b-2-3", + } + + ret = set() + else: + converter = { + "IP SA": "s", + "IP DA": "d", + "L3 proto": "t", + "L4 bytes 0 & 1 [TCP/UDP src port]": "f", + "L4 bytes 2 & 3 [TCP/UDP dst port]": "n", + } + + ret = "" + + for line in descr.split("\n")[1:-2]: + # if this raises we probably need to add more keys to converter above + if to_nl: + ret.add(converter[line]) + else: + ret += converter[line] + return ret + + +def test_rxfh_nl_set_fail(cfg): + """ + Test error path of Netlink SET. + """ + _require_2qs(cfg) + + ethnl = EthtoolFamily() + ethnl.ntf_subscribe("monitor") + + with ksft_raises(NlError): + ethnl.rss_set({"header": {"dev-name": "lo"}, + "indir": None}) + + with ksft_raises(NlError): + ethnl.rss_set({"header": {"dev-index": cfg.ifindex}, + "indir": [100000]}) + ntf = next(ethnl.poll_ntf(duration=0.2), None) + ksft_is(ntf, None) + + +def test_rxfh_nl_set_indir(cfg): + """ + Test setting indirection table via Netlink. + """ + qcnt = _require_2qs(cfg) + + # Test some SETs with a value + reset = defer(cfg.ethnl.rss_set, + {"header": {"dev-index": cfg.ifindex}, "indir": None}) + cfg.ethnl.rss_set({"header": {"dev-index": cfg.ifindex}, + "indir": [1]}) + rss = cfg.ethnl.rss_get({"header": {"dev-index": cfg.ifindex}}) + ksft_eq(set(rss.get("indir", [-1])), {1}) + + cfg.ethnl.rss_set({"header": {"dev-index": cfg.ifindex}, + "indir": [0, 1]}) + rss = cfg.ethnl.rss_get({"header": {"dev-index": cfg.ifindex}}) + ksft_eq(set(rss.get("indir", [-1])), {0, 1}) + + # Make sure we can't set the queue count below max queue used + with ksft_raises(CmdExitFailure): + ethtool(f"-L {cfg.ifname} combined 0 rx 1") + with ksft_raises(CmdExitFailure): + ethtool(f"-L {cfg.ifname} combined 1 rx 0") + + # Test reset back to default + reset.exec() + rss = cfg.ethnl.rss_get({"header": {"dev-index": cfg.ifindex}}) + ksft_eq(set(rss.get("indir", [-1])), set(range(qcnt))) + + +def test_rxfh_nl_set_indir_ctx(cfg): + """ + Test setting indirection table for a custom context via Netlink. + """ + _require_2qs(cfg) + + # Get setting for ctx 0, we'll make sure they don't get clobbered + dflt = cfg.ethnl.rss_get({"header": {"dev-index": cfg.ifindex}}) + + # Create context + ctx_id = _ethtool_create(cfg, "-X", "context new") + defer(ethtool, f"-X {cfg.ifname} context {ctx_id} delete") + + cfg.ethnl.rss_set({"header": {"dev-index": cfg.ifindex}, + "context": ctx_id, "indir": [1]}) + rss = cfg.ethnl.rss_get({"header": {"dev-index": cfg.ifindex}, + "context": ctx_id}) + ksft_eq(set(rss.get("indir", [-1])), {1}) + + ctx0 = cfg.ethnl.rss_get({"header": {"dev-index": cfg.ifindex}}) + ksft_eq(ctx0, dflt) + + cfg.ethnl.rss_set({"header": {"dev-index": cfg.ifindex}, + "context": ctx_id, "indir": [0, 1]}) + rss = cfg.ethnl.rss_get({"header": {"dev-index": cfg.ifindex}, + "context": ctx_id}) + ksft_eq(set(rss.get("indir", [-1])), {0, 1}) + + ctx0 = cfg.ethnl.rss_get({"header": {"dev-index": cfg.ifindex}}) + ksft_eq(ctx0, dflt) + + # Make sure we can't set the queue count below max queue used + with ksft_raises(CmdExitFailure): + ethtool(f"-L {cfg.ifname} combined 0 rx 1") + with ksft_raises(CmdExitFailure): + ethtool(f"-L {cfg.ifname} combined 1 rx 0") + + +def test_rxfh_indir_ntf(cfg): + """ + Check that Netlink notifications are generated when RSS indirection + table was modified. + """ + _require_2qs(cfg) + + ethnl = EthtoolFamily() + ethnl.ntf_subscribe("monitor") + + ethtool(f"--disable-netlink -X {cfg.ifname} weight 0 1") + reset = defer(ethtool, f"-X {cfg.ifname} default") + + ntf = next(ethnl.poll_ntf(duration=0.2), None) + if ntf is None: + raise KsftFailEx("No notification received") + ksft_eq(ntf["name"], "rss-ntf") + ksft_eq(set(ntf["msg"]["indir"]), {1}) + + reset.exec() + ntf = next(ethnl.poll_ntf(duration=0.2), None) + if ntf is None: + raise KsftFailEx("No notification received after reset") + ksft_eq(ntf["name"], "rss-ntf") + ksft_is(ntf["msg"].get("context"), None) + ksft_ne(set(ntf["msg"]["indir"]), {1}) + + +def test_rxfh_indir_ctx_ntf(cfg): + """ + Check that Netlink notifications are generated when RSS indirection + table was modified on an additional RSS context. + """ + _require_2qs(cfg) + + ctx_id = _ethtool_create(cfg, "-X", "context new") + defer(ethtool, f"-X {cfg.ifname} context {ctx_id} delete") + + ethnl = EthtoolFamily() + ethnl.ntf_subscribe("monitor") + + ethtool(f"--disable-netlink -X {cfg.ifname} context {ctx_id} weight 0 1") + + ntf = next(ethnl.poll_ntf(duration=0.2), None) + if ntf is None: + raise KsftFailEx("No notification received") + ksft_eq(ntf["name"], "rss-ntf") + ksft_eq(ntf["msg"].get("context"), ctx_id) + ksft_eq(set(ntf["msg"]["indir"]), {1}) + + +def test_rxfh_nl_set_key(cfg): + """ + Test setting hashing key via Netlink. + """ + + dflt = cfg.ethnl.rss_get({"header": {"dev-index": cfg.ifindex}}) + defer(cfg.ethnl.rss_set, + {"header": {"dev-index": cfg.ifindex}, + "hkey": dflt["hkey"], "indir": None}) + + # Empty key should error out + with ksft_raises(NlError) as cm: + cfg.ethnl.rss_set({"header": {"dev-index": cfg.ifindex}, + "hkey": None}) + ksft_eq(cm.exception.nl_msg.extack['bad-attr'], '.hkey') + + # Set key to random + mod = random.randbytes(len(dflt["hkey"])) + cfg.ethnl.rss_set({"header": {"dev-index": cfg.ifindex}, + "hkey": mod}) + rss = cfg.ethnl.rss_get({"header": {"dev-index": cfg.ifindex}}) + ksft_eq(rss.get("hkey", [-1]), mod) + + # Set key to random and indir tbl to something at once + mod = random.randbytes(len(dflt["hkey"])) + cfg.ethnl.rss_set({"header": {"dev-index": cfg.ifindex}, + "indir": [0, 1], "hkey": mod}) + rss = cfg.ethnl.rss_get({"header": {"dev-index": cfg.ifindex}}) + ksft_eq(rss.get("hkey", [-1]), mod) + ksft_eq(set(rss.get("indir", [-1])), {0, 1}) + + +def test_rxfh_fields(cfg): + """ + Test reading Rx Flow Hash over Netlink. + """ + + flow_types = ["tcp4", "tcp6", "udp4", "udp6"] + ethnl = EthtoolFamily() + + cfg_nl = ethnl.rss_get({"header": {"dev-index": cfg.ifindex}}) + for fl_type in flow_types: + one = _ethtool_get_cfg(cfg, fl_type, to_nl=True) + ksft_eq(one, cfg_nl["flow-hash"][fl_type], + comment="Config for " + fl_type) + + +def test_rxfh_fields_set(cfg): + """ Test configuring Rx Flow Hash over Netlink. """ + + flow_types = ["tcp4", "tcp6", "udp4", "udp6"] + + # Collect current settings + cfg_old = cfg.ethnl.rss_get({"header": {"dev-index": cfg.ifindex}}) + # symmetric hashing is config-order-sensitive make sure we leave + # symmetric mode, or make the flow-hash sym-compatible first + changes = [{"flow-hash": cfg_old["flow-hash"],}, + {"input-xfrm": cfg_old.get("input-xfrm", {}),}] + if cfg_old.get("input-xfrm"): + changes = list(reversed(changes)) + for old in changes: + defer(cfg.ethnl.rss_set, {"header": {"dev-index": cfg.ifindex},} | old) + + # symmetric hashing prevents some of the configs below + if cfg_old.get("input-xfrm"): + cfg.ethnl.rss_set({"header": {"dev-index": cfg.ifindex}, + "input-xfrm": {}}) + + for fl_type in flow_types: + cur = _ethtool_get_cfg(cfg, fl_type) + if cur == "sdfn": + change_nl = {"ip-src", "ip-dst"} + change_ic = "sd" + else: + change_nl = {"l4-b-0-1", "l4-b-2-3", "ip-src", "ip-dst"} + change_ic = "sdfn" + + cfg.ethnl.rss_set({ + "header": {"dev-index": cfg.ifindex}, + "flow-hash": {fl_type: change_nl} + }) + reset = defer(ethtool, f"--disable-netlink -N {cfg.ifname} " + f"rx-flow-hash {fl_type} {cur}") + + cfg_nl = cfg.ethnl.rss_get({"header": {"dev-index": cfg.ifindex}}) + ksft_eq(change_nl, cfg_nl["flow-hash"][fl_type], + comment=f"Config for {fl_type} over Netlink") + cfg_ic = _ethtool_get_cfg(cfg, fl_type) + ksft_eq(change_ic, cfg_ic, + comment=f"Config for {fl_type} over IOCTL") + + reset.exec() + cfg_nl = cfg.ethnl.rss_get({"header": {"dev-index": cfg.ifindex}}) + ksft_eq(cfg_old["flow-hash"][fl_type], cfg_nl["flow-hash"][fl_type], + comment=f"Un-config for {fl_type} over Netlink") + cfg_ic = _ethtool_get_cfg(cfg, fl_type) + ksft_eq(cur, cfg_ic, comment=f"Un-config for {fl_type} over IOCTL") + + # Try to set multiple at once, the defer was already installed at the start + change = {"ip-src"} + if change == cfg_old["flow-hash"]["tcp4"]: + change = {"ip-dst"} + cfg.ethnl.rss_set({ + "header": {"dev-index": cfg.ifindex}, + "flow-hash": {x: change for x in flow_types} + }) + + cfg_nl = cfg.ethnl.rss_get({"header": {"dev-index": cfg.ifindex}}) + for fl_type in flow_types: + ksft_eq(change, cfg_nl["flow-hash"][fl_type], + comment=f"multi-config for {fl_type} over Netlink") + + +def test_rxfh_fields_set_xfrm(cfg): + """ Test changing Rx Flow Hash vs xfrm_input at once. """ + + def set_rss(cfg, xfrm, fh): + cfg.ethnl.rss_set({"header": {"dev-index": cfg.ifindex}, + "input-xfrm": xfrm, "flow-hash": fh}) + + # Install the reset handler + cfg_old = cfg.ethnl.rss_get({"header": {"dev-index": cfg.ifindex}}) + # symmetric hashing is config-order-sensitive make sure we leave + # symmetric mode, or make the flow-hash sym-compatible first + changes = [{"flow-hash": cfg_old["flow-hash"],}, + {"input-xfrm": cfg_old.get("input-xfrm", {}),}] + if cfg_old.get("input-xfrm"): + changes = list(reversed(changes)) + for old in changes: + defer(cfg.ethnl.rss_set, {"header": {"dev-index": cfg.ifindex},} | old) + + # Make sure we start with input-xfrm off, and tcp4 config non-sym + set_rss(cfg, {}, {}) + set_rss(cfg, {}, {"tcp4": {"ip-src"}}) + + # Setting sym and fixing tcp4 config not expected to pass right now + with ksft_raises(NlError): + set_rss(cfg, {"sym-xor"}, {"tcp4": {"ip-src", "ip-dst"}}) + # One at a time should work, hopefully + set_rss(cfg, 0, {"tcp4": {"ip-src", "ip-dst"}}) + no_support = False + try: + set_rss(cfg, {"sym-xor"}, {}) + except NlError: + try: + set_rss(cfg, {"sym-or-xor"}, {}) + except NlError: + no_support = True + if no_support: + raise KsftSkipEx("no input-xfrm supported") + # Disabling two at once should not work either without kernel changes + with ksft_raises(NlError): + set_rss(cfg, {}, {"tcp4": {"ip-src"}}) + + +def test_rxfh_fields_ntf(cfg): + """ Test Rx Flow Hash notifications. """ + + cur = _ethtool_get_cfg(cfg, "tcp4") + if cur == "sdfn": + change = {"ip-src", "ip-dst"} + else: + change = {"l4-b-0-1", "l4-b-2-3", "ip-src", "ip-dst"} + + ethnl = EthtoolFamily() + ethnl.ntf_subscribe("monitor") + + ethnl.rss_set({ + "header": {"dev-index": cfg.ifindex}, + "flow-hash": {"tcp4": change} + }) + reset = defer(ethtool, + f"--disable-netlink -N {cfg.ifname} rx-flow-hash tcp4 {cur}") + + ntf = next(ethnl.poll_ntf(duration=0.2), None) + if ntf is None: + raise KsftFailEx("No notification received after IOCTL change") + ksft_eq(ntf["name"], "rss-ntf") + ksft_eq(ntf["msg"]["flow-hash"]["tcp4"], change) + ksft_eq(next(ethnl.poll_ntf(duration=0.01), None), None) + + reset.exec() + ntf = next(ethnl.poll_ntf(duration=0.2), None) + if ntf is None: + raise KsftFailEx("No notification received after Netlink change") + ksft_eq(ntf["name"], "rss-ntf") + ksft_ne(ntf["msg"]["flow-hash"]["tcp4"], change) + ksft_eq(next(ethnl.poll_ntf(duration=0.01), None), None) + + +def test_rss_ctx_add(cfg): + """ Test creating an additional RSS context via Netlink """ + + _require_2qs(cfg) + + # Test basic creation + ctx = cfg.ethnl.rss_create_act({"header": {"dev-index": cfg.ifindex}}) + d = defer(ethtool, f"-X {cfg.ifname} context {ctx.get('context')} delete") + ksft_ne(ctx.get("context", 0), 0) + ksft_ne(set(ctx.get("indir", [0])), {0}, + comment="Driver should init the indirection table") + + # Try requesting the ID we just got allocated + with ksft_raises(NlError) as cm: + ctx = cfg.ethnl.rss_create_act({ + "header": {"dev-index": cfg.ifindex}, + "context": ctx.get("context"), + }) + ethtool(f"-X {cfg.ifname} context {ctx.get('context')} delete") + d.exec() + ksft_eq(cm.exception.nl_msg.error, -errno.EBUSY) + + # Test creating with a specified RSS table, and context ID + ctx_id = ctx.get("context") + ctx = cfg.ethnl.rss_create_act({ + "header": {"dev-index": cfg.ifindex}, + "context": ctx_id, + "indir": [1], + }) + ethtool(f"-X {cfg.ifname} context {ctx.get('context')} delete") + ksft_eq(ctx.get("context"), ctx_id) + ksft_eq(set(ctx.get("indir", [0])), {1}) + + +def test_rss_ctx_ntf(cfg): + """ Test notifications for creating additional RSS contexts """ + + ethnl = EthtoolFamily() + ethnl.ntf_subscribe("monitor") + + # Create / delete via Netlink + ctx = cfg.ethnl.rss_create_act({"header": {"dev-index": cfg.ifindex}}) + cfg.ethnl.rss_delete_act({ + "header": {"dev-index": cfg.ifindex}, + "context": ctx["context"], + }) + + ntf = next(ethnl.poll_ntf(duration=0.2), None) + if ntf is None: + raise KsftFailEx("[NL] No notification after context creation") + ksft_eq(ntf["name"], "rss-create-ntf") + ksft_eq(ctx, ntf["msg"]) + + ntf = next(ethnl.poll_ntf(duration=0.2), None) + if ntf is None: + raise KsftFailEx("[NL] No notification after context deletion") + ksft_eq(ntf["name"], "rss-delete-ntf") + + # Create / deleve via IOCTL + ctx_id = _ethtool_create(cfg, "--disable-netlink -X", "context new") + ethtool(f"--disable-netlink -X {cfg.ifname} context {ctx_id} delete") + ntf = next(ethnl.poll_ntf(duration=0.2), None) + if ntf is None: + raise KsftFailEx("[IOCTL] No notification after context creation") + ksft_eq(ntf["name"], "rss-create-ntf") + + ntf = next(ethnl.poll_ntf(duration=0.2), None) + if ntf is None: + raise KsftFailEx("[IOCTL] No notification after context deletion") + ksft_eq(ntf["name"], "rss-delete-ntf") + + +def main() -> None: + """ Ksft boiler plate main """ + + with NetDrvEnv(__file__, nsim_test=False) as cfg: + cfg.ethnl = EthtoolFamily() + ksft_run(globs=globals(), case_pfx={"test_"}, args=(cfg, )) + ksft_exit() + + +if __name__ == "__main__": + main() diff --git a/tools/testing/selftests/drivers/net/hw/rss_ctx.py b/tools/testing/selftests/drivers/net/hw/rss_ctx.py index ca60ae325c22..7bb552f8b182 100755 --- a/tools/testing/selftests/drivers/net/hw/rss_ctx.py +++ b/tools/testing/selftests/drivers/net/hw/rss_ctx.py @@ -747,6 +747,62 @@ def test_rss_ntuple_addition(cfg): 'noise' : (0,) }) +def test_rss_default_context_rule(cfg): + """ + Allocate a port, direct this port to context 0, then create a new RSS + context and steer all TCP traffic to it (context 1). Verify that: + * Traffic to the specific port continues to use queues of the main + context (0/1). + * Traffic to any other TCP port is redirected to the new context + (queues 2/3). + """ + + require_ntuple(cfg) + + queue_cnt = len(_get_rx_cnts(cfg)) + if queue_cnt < 4: + try: + ksft_pr(f"Increasing queue count {queue_cnt} -> 4") + ethtool(f"-L {cfg.ifname} combined 4") + defer(ethtool, f"-L {cfg.ifname} combined {queue_cnt}") + except Exception as exc: + raise KsftSkipEx("Not enough queues for the test") from exc + + # Use queues 0 and 1 for the main context + ethtool(f"-X {cfg.ifname} equal 2") + defer(ethtool, f"-X {cfg.ifname} default") + + # Create a new RSS context that uses queues 2 and 3 + ctx_id = ethtool_create(cfg, "-X", "context new start 2 equal 2") + defer(ethtool, f"-X {cfg.ifname} context {ctx_id} delete") + + # Generic low-priority rule: redirect all TCP traffic to the new context. + # Give it an explicit higher location number (lower priority). + flow_generic = f"flow-type tcp{cfg.addr_ipver} dst-ip {cfg.addr} context {ctx_id} loc 1" + ethtool(f"-N {cfg.ifname} {flow_generic}") + defer(ethtool, f"-N {cfg.ifname} delete 1") + + # Specific high-priority rule for a random port that should stay on context 0. + # Assign loc 0 so it is evaluated before the generic rule. + port_main = rand_port() + flow_main = f"flow-type tcp{cfg.addr_ipver} dst-ip {cfg.addr} dst-port {port_main} context 0 loc 0" + ethtool(f"-N {cfg.ifname} {flow_main}") + defer(ethtool, f"-N {cfg.ifname} delete 0") + + _ntuple_rule_check(cfg, 1, ctx_id) + + # Verify that traffic matching the specific rule still goes to queues 0/1 + _send_traffic_check(cfg, port_main, "context 0", + { 'target': (0, 1), + 'empty' : (2, 3) }) + + # And that traffic for any other port is steered to the new context + port_other = rand_port() + _send_traffic_check(cfg, port_other, f"context {ctx_id}", + { 'target': (2, 3), + 'noise' : (0, 1) }) + + def main() -> None: with NetDrvEpEnv(__file__, nsim_test=False) as cfg: cfg.context_cnt = None @@ -760,7 +816,8 @@ def main() -> None: test_rss_context_overlap, test_rss_context_overlap2, test_rss_context_out_of_order, test_rss_context4_create_with_cfg, test_flow_add_context_missing, - test_delete_rss_context_busy, test_rss_ntuple_addition], + test_delete_rss_context_busy, test_rss_ntuple_addition, + test_rss_default_context_rule], args=(cfg, )) ksft_exit() diff --git a/tools/testing/selftests/drivers/net/hw/rss_input_xfrm.py b/tools/testing/selftests/drivers/net/hw/rss_input_xfrm.py index f439c434ba36..72880e388478 100755 --- a/tools/testing/selftests/drivers/net/hw/rss_input_xfrm.py +++ b/tools/testing/selftests/drivers/net/hw/rss_input_xfrm.py @@ -32,16 +32,16 @@ def test_rss_input_xfrm(cfg, ipver): if multiprocessing.cpu_count() < 2: raise KsftSkipEx("Need at least two CPUs to test symmetric RSS hash") - cfg.require_cmd("socat", remote=True) + cfg.require_cmd("socat", local=False, remote=True) if not hasattr(socket, "SO_INCOMING_CPU"): raise KsftSkipEx("socket.SO_INCOMING_CPU was added in Python 3.11") - input_xfrm = cfg.ethnl.rss_get( - {'header': {'dev-name': cfg.ifname}}).get('input_xfrm') + rss = cfg.ethnl.rss_get({'header': {'dev-name': cfg.ifname}}) + input_xfrm = set(filter(lambda x: 'sym' in x, rss.get('input-xfrm', {}))) # Check for symmetric xor/or-xor - if not input_xfrm or (input_xfrm != 1 and input_xfrm != 2): + if not input_xfrm: raise KsftSkipEx("Symmetric RSS hash not requested") cpus = set() diff --git a/tools/testing/selftests/drivers/net/hw/tso.py b/tools/testing/selftests/drivers/net/hw/tso.py index 3370827409aa..c13dd5efa27a 100755 --- a/tools/testing/selftests/drivers/net/hw/tso.py +++ b/tools/testing/selftests/drivers/net/hw/tso.py @@ -34,7 +34,7 @@ def tcp_sock_get_retrans(sock): def run_one_stream(cfg, ipver, remote_v4, remote_v6, should_lso): - cfg.require_cmd("socat", remote=True) + cfg.require_cmd("socat", local=False, remote=True) port = rand_port() listen_cmd = f"socat -{ipver} -t 2 -u TCP-LISTEN:{port},reuseport /dev/null,ignoreeof" @@ -102,7 +102,7 @@ def build_tunnel(cfg, outer_ipver, tun_info): remote_addr = cfg.remote_addr_v[outer_ipver] tun_type = tun_info[0] - tun_arg = tun_info[2] + tun_arg = tun_info[1] ip(f"link add {tun_type}-ksft type {tun_type} {tun_arg} local {local_addr} remote {remote_addr} dev {cfg.ifname}") defer(ip, f"link del {tun_type}-ksft") ip(f"link set dev {tun_type}-ksft up") @@ -119,15 +119,30 @@ def build_tunnel(cfg, outer_ipver, tun_info): return remote_v4, remote_v6 +def restore_wanted_features(cfg): + features_cmd = "" + for feature in cfg.hw_features: + setting = "on" if feature in cfg.wanted_features else "off" + features_cmd += f" {feature} {setting}" + try: + ethtool(f"-K {cfg.ifname} {features_cmd}") + except Exception as e: + ksft_pr(f"WARNING: failure restoring wanted features: {e}") + + def test_builder(name, cfg, outer_ipver, feature, tun=None, inner_ipver=None): """Construct specific tests from the common template.""" def f(cfg): cfg.require_ipver(outer_ipver) + defer(restore_wanted_features, cfg) if not cfg.have_stat_super_count and \ not cfg.have_stat_wire_count: raise KsftSkipEx(f"Device does not support LSO queue stats") + if feature not in cfg.hw_features: + raise KsftSkipEx(f"Device does not support {feature}") + ipver = outer_ipver if tun: remote_v4, remote_v6 = build_tunnel(cfg, ipver, tun) @@ -136,36 +151,21 @@ def test_builder(name, cfg, outer_ipver, feature, tun=None, inner_ipver=None): remote_v4 = cfg.remote_addr_v["4"] remote_v6 = cfg.remote_addr_v["6"] - tun_partial = tun and tun[1] - # Tunnel which can silently fall back to gso-partial - has_gso_partial = tun and 'tx-gso-partial' in cfg.features - - # For TSO4 via partial we need mangleid - if ipver == "4" and feature in cfg.partial_features: - ksft_pr("Testing with mangleid enabled") - if 'tx-tcp-mangleid-segmentation' not in cfg.features: - ethtool(f"-K {cfg.ifname} tx-tcp-mangleid-segmentation on") - defer(ethtool, f"-K {cfg.ifname} tx-tcp-mangleid-segmentation off") - # First test without the feature enabled. ethtool(f"-K {cfg.ifname} {feature} off") - if has_gso_partial: - ethtool(f"-K {cfg.ifname} tx-gso-partial off") run_one_stream(cfg, ipver, remote_v4, remote_v6, should_lso=False) - # Now test with the feature enabled. - # For compatible tunnels only - just GSO partial, not specific feature. - if has_gso_partial: + ethtool(f"-K {cfg.ifname} tx-gso-partial off") + ethtool(f"-K {cfg.ifname} tx-tcp-mangleid-segmentation off") + if feature in cfg.partial_features: ethtool(f"-K {cfg.ifname} tx-gso-partial on") - run_one_stream(cfg, ipver, remote_v4, remote_v6, - should_lso=tun_partial) + if ipver == "4": + ksft_pr("Testing with mangleid enabled") + ethtool(f"-K {cfg.ifname} tx-tcp-mangleid-segmentation on") # Full feature enabled. - if feature in cfg.features: - ethtool(f"-K {cfg.ifname} {feature} on") - run_one_stream(cfg, ipver, remote_v4, remote_v6, should_lso=True) - else: - raise KsftXfailEx(f"Device does not support {feature}") + ethtool(f"-K {cfg.ifname} {feature} on") + run_one_stream(cfg, ipver, remote_v4, remote_v6, should_lso=True) f.__name__ = name + ((outer_ipver + "_") if tun else "") + "ipv" + inner_ipver return f @@ -176,23 +176,39 @@ def query_nic_features(cfg) -> None: cfg.have_stat_super_count = False cfg.have_stat_wire_count = False - cfg.features = set() features = cfg.ethnl.features_get({"header": {"dev-index": cfg.ifindex}}) - for f in features["active"]["bits"]["bit"]: - cfg.features.add(f["name"]) + + cfg.wanted_features = set() + for f in features["wanted"]["bits"]["bit"]: + cfg.wanted_features.add(f["name"]) + + cfg.hw_features = set() + hw_all_features_cmd = "" + for f in features["hw"]["bits"]["bit"]: + if f.get("value", False): + feature = f["name"] + cfg.hw_features.add(feature) + hw_all_features_cmd += f" {feature} on" + try: + ethtool(f"-K {cfg.ifname} {hw_all_features_cmd}") + except Exception as e: + ksft_pr(f"WARNING: failure enabling all hw features: {e}") + ksft_pr("partial gso feature detection may be impacted") # Check which features are supported via GSO partial cfg.partial_features = set() - if 'tx-gso-partial' in cfg.features: + if 'tx-gso-partial' in cfg.hw_features: ethtool(f"-K {cfg.ifname} tx-gso-partial off") no_partial = set() features = cfg.ethnl.features_get({"header": {"dev-index": cfg.ifindex}}) for f in features["active"]["bits"]["bit"]: no_partial.add(f["name"]) - cfg.partial_features = cfg.features - no_partial + cfg.partial_features = cfg.hw_features - no_partial ethtool(f"-K {cfg.ifname} tx-gso-partial on") + restore_wanted_features(cfg) + stats = cfg.netnl.qstats_get({"ifindex": cfg.ifindex}, dump=True) if stats: if 'tx-hw-gso-packets' in stats[0]: @@ -211,13 +227,14 @@ def main() -> None: query_nic_features(cfg) test_info = ( - # name, v4/v6 ethtool_feature tun:(type, partial, args) - ("", "4", "tx-tcp-segmentation", None), - ("", "6", "tx-tcp6-segmentation", None), - ("vxlan", "", "tx-udp_tnl-segmentation", ("vxlan", True, "id 100 dstport 4789 noudpcsum")), - ("vxlan_csum", "", "tx-udp_tnl-csum-segmentation", ("vxlan", False, "id 100 dstport 4789 udpcsum")), - ("gre", "4", "tx-gre-segmentation", ("gre", False, "")), - ("gre", "6", "tx-gre-segmentation", ("ip6gre", False, "")), + # name, v4/v6 ethtool_feature tun:(type, args, inner ip versions) + ("", "4", "tx-tcp-segmentation", None), + ("", "6", "tx-tcp6-segmentation", None), + ("vxlan", "4", "tx-udp_tnl-segmentation", ("vxlan", "id 100 dstport 4789 noudpcsum", ("4", "6"))), + ("vxlan", "6", "tx-udp_tnl-segmentation", ("vxlan", "id 100 dstport 4789 udp6zerocsumtx udp6zerocsumrx", ("4", "6"))), + ("vxlan_csum", "", "tx-udp_tnl-csum-segmentation", ("vxlan", "id 100 dstport 4789 udpcsum", ("4", "6"))), + ("gre", "4", "tx-gre-segmentation", ("gre", "", ("4", "6"))), + ("gre", "6", "tx-gre-segmentation", ("ip6gre","", ("4", "6"))), ) cases = [] @@ -227,11 +244,13 @@ def main() -> None: if info[1] and outer_ipver != info[1]: continue - cases.append(test_builder(info[0], cfg, outer_ipver, info[2], - tun=info[3], inner_ipver="4")) if info[3]: - cases.append(test_builder(info[0], cfg, outer_ipver, info[2], - tun=info[3], inner_ipver="6")) + cases += [ + test_builder(info[0], cfg, outer_ipver, info[2], info[3], inner_ipver) + for inner_ipver in info[3][2] + ] + else: + cases.append(test_builder(info[0], cfg, outer_ipver, info[2], None, outer_ipver)) ksft_run(cases=cases, args=(cfg, )) ksft_exit() diff --git a/tools/testing/selftests/drivers/net/lib/py/__init__.py b/tools/testing/selftests/drivers/net/lib/py/__init__.py index 401e70f7f136..8711c67ad658 100644 --- a/tools/testing/selftests/drivers/net/lib/py/__init__.py +++ b/tools/testing/selftests/drivers/net/lib/py/__init__.py @@ -7,7 +7,21 @@ KSFT_DIR = (Path(__file__).parent / "../../../..").resolve() try: sys.path.append(KSFT_DIR.as_posix()) + from net.lib.py import * + + # Import one by one to avoid pylint false positives + from net.lib.py import EthtoolFamily, NetdevFamily, NetshaperFamily, \ + NlError, RtnlFamily, DevlinkFamily + from net.lib.py import CmdExitFailure + from net.lib.py import bkg, cmd, bpftool, bpftrace, defer, ethtool, \ + fd_read_timeout, ip, rand_port, tool, wait_port_listen + from net.lib.py import fd_read_timeout + from net.lib.py import KsftSkipEx, KsftFailEx, KsftXfailEx + from net.lib.py import ksft_disruptive, ksft_exit, ksft_pr, ksft_run, \ + ksft_setup + from net.lib.py import ksft_eq, ksft_ge, ksft_in, ksft_is, ksft_lt, \ + ksft_ne, ksft_not_in, ksft_raises, ksft_true except ModuleNotFoundError as e: ksft_pr("Failed importing `net` library from kernel sources") ksft_pr(str(e)) diff --git a/tools/testing/selftests/drivers/net/lib/py/env.py b/tools/testing/selftests/drivers/net/lib/py/env.py index 3bccddf8cbc5..1b8bd648048f 100644 --- a/tools/testing/selftests/drivers/net/lib/py/env.py +++ b/tools/testing/selftests/drivers/net/lib/py/env.py @@ -259,7 +259,7 @@ class NetDrvEpEnv(NetDrvEnvBase): if not self._require_cmd(comm, "local"): raise KsftSkipEx("Test requires command: " + comm) if remote: - if not self._require_cmd(comm, "remote"): + if not self._require_cmd(comm, "remote", host=self.remote): raise KsftSkipEx("Test requires (remote) command: " + comm) def wait_hw_stats_settle(self): diff --git a/tools/testing/selftests/drivers/net/lib/py/load.py b/tools/testing/selftests/drivers/net/lib/py/load.py index d9c10613ae67..c4e808407cc4 100644 --- a/tools/testing/selftests/drivers/net/lib/py/load.py +++ b/tools/testing/selftests/drivers/net/lib/py/load.py @@ -1,21 +1,21 @@ # SPDX-License-Identifier: GPL-2.0 +import re import time from lib.py import ksft_pr, cmd, ip, rand_port, wait_port_listen class GenerateTraffic: def __init__(self, env, port=None): - env.require_cmd("iperf3", remote=True) + env.require_cmd("iperf3", local=True, remote=True) self.env = env - if port is None: - port = rand_port() - self._iperf_server = cmd(f"iperf3 -s -1 -p {port}", background=True) - wait_port_listen(port) + self.port = rand_port() if port is None else port + self._iperf_server = cmd(f"iperf3 -s -1 -p {self.port}", background=True) + wait_port_listen(self.port) time.sleep(0.1) - self._iperf_client = cmd(f"iperf3 -c {env.addr} -P 16 -p {port} -t 86400", + self._iperf_client = cmd(f"iperf3 -c {env.addr} -P 16 -p {self.port} -t 86400", background=True, host=env.remote) # Wait for traffic to ramp up @@ -56,3 +56,16 @@ class GenerateTraffic: ksft_pr(">> Server:") ksft_pr(self._iperf_server.stdout) ksft_pr(self._iperf_server.stderr) + self._wait_client_stopped() + + def _wait_client_stopped(self, sleep=0.005, timeout=5): + end = time.monotonic() + timeout + + live_port_pattern = re.compile(fr":{self.port:04X} 0[^6] ") + + while time.monotonic() < end: + data = cmd("cat /proc/net/tcp*", host=self.env.remote).stdout + if not live_port_pattern.search(data): + return + time.sleep(sleep) + raise Exception(f"Waiting for client to stop timed out after {timeout}s") diff --git a/tools/testing/selftests/drivers/net/lib/sh/lib_netcons.sh b/tools/testing/selftests/drivers/net/lib/sh/lib_netcons.sh index 29b01b8e2215..b6071e80ebbb 100644 --- a/tools/testing/selftests/drivers/net/lib/sh/lib_netcons.sh +++ b/tools/testing/selftests/drivers/net/lib/sh/lib_netcons.sh @@ -11,9 +11,11 @@ set -euo pipefail LIBDIR=$(dirname "$(readlink -e "${BASH_SOURCE[0]}")") SRCIF="" # to be populated later -SRCIP=192.0.2.1 +SRCIP4="192.0.2.1" +SRCIP6="fc00::1" DSTIF="" # to be populated later -DSTIP=192.0.2.2 +DSTIP4="192.0.2.2" +DSTIP6="fc00::2" PORT="6666" MSG="netconsole selftest" @@ -80,7 +82,23 @@ function configure_ip() { ip link set "${SRCIF}" up } +function select_ipv4_or_ipv6() +{ + local VERSION=${1} + + if [[ "$VERSION" == "ipv6" ]] + then + DSTIP="${DSTIP6}" + SRCIP="${SRCIP6}" + else + DSTIP="${DSTIP4}" + SRCIP="${SRCIP4}" + fi +} + function set_network() { + local IP_VERSION=${1:-"ipv4"} + # setup_ns function is coming from lib.sh setup_ns NAMESPACE @@ -91,10 +109,13 @@ function set_network() { # Link both interfaces back to back link_ifaces + select_ipv4_or_ipv6 "${IP_VERSION}" configure_ip } function create_dynamic_target() { + local FORMAT=${1:-"extended"} + DSTMAC=$(ip netns exec "${NAMESPACE}" \ ip link show "${DSTIF}" | awk '/ether/ {print $2}') @@ -106,7 +127,33 @@ function create_dynamic_target() { echo "${DSTMAC}" > "${NETCONS_PATH}"/remote_mac echo "${SRCIF}" > "${NETCONS_PATH}"/dev_name + if [ "${FORMAT}" == "basic" ] + then + # Basic target does not support release + echo 0 > "${NETCONS_PATH}"/release + echo 0 > "${NETCONS_PATH}"/extended + elif [ "${FORMAT}" == "extended" ] + then + echo 1 > "${NETCONS_PATH}"/extended + fi + echo 1 > "${NETCONS_PATH}"/enabled + + # This will make sure that the kernel was able to + # load the netconsole driver configuration. The console message + # gets more organized/sequential as well. + sleep 1 +} + +# Generate the command line argument for netconsole following: +# netconsole=[+][src-port]@[src-ip]/[<dev>],[tgt-port]@<tgt-ip>/[tgt-macaddr] +function create_cmdline_str() { + DSTMAC=$(ip netns exec "${NAMESPACE}" \ + ip link show "${DSTIF}" | awk '/ether/ {print $2}') + SRCPORT="1514" + TGTPORT="6666" + + echo "netconsole=\"+${SRCPORT}@${SRCIP}/${SRCIF},${TGTPORT}@${DSTIP}/${DSTMAC}\"" } # Do not append the release to the header of the message @@ -116,16 +163,9 @@ function disable_release_append() { echo 1 > "${NETCONS_PATH}"/enabled } -function cleanup() { +function do_cleanup() { local NSIM_DEV_SYS_DEL="/sys/bus/netdevsim/del_device" - # delete netconsole dynamic reconfiguration - echo 0 > "${NETCONS_PATH}"/enabled - # Remove all the keys that got created during the selftest - find "${NETCONS_PATH}/userdata/" -mindepth 1 -type d -delete - # Remove the configfs entry - rmdir "${NETCONS_PATH}" - # Delete netdevsim devices echo "$NSIM_DEV_2_ID" > "$NSIM_DEV_SYS_DEL" echo "$NSIM_DEV_1_ID" > "$NSIM_DEV_SYS_DEL" @@ -137,6 +177,17 @@ function cleanup() { echo "${DEFAULT_PRINTK_VALUES}" > /proc/sys/kernel/printk } +function cleanup() { + # delete netconsole dynamic reconfiguration + echo 0 > "${NETCONS_PATH}"/enabled + # Remove all the keys that got created during the selftest + find "${NETCONS_PATH}/userdata/" -mindepth 1 -type d -delete + # Remove the configfs entry + rmdir "${NETCONS_PATH}" + + do_cleanup +} + function set_user_data() { if [[ ! -d "${NETCONS_PATH}""/userdata" ]] then @@ -152,18 +203,24 @@ function set_user_data() { function listen_port_and_save_to() { local OUTPUT=${1} + local IPVERSION=${2:-"ipv4"} + + if [ "${IPVERSION}" == "ipv4" ] + then + SOCAT_MODE="UDP-LISTEN" + else + SOCAT_MODE="UDP6-LISTEN" + fi + # Just wait for 2 seconds timeout 2 ip netns exec "${NAMESPACE}" \ - socat UDP-LISTEN:"${PORT}",fork "${OUTPUT}" + socat "${SOCAT_MODE}":"${PORT}",fork "${OUTPUT}" } -function validate_result() { +# Only validate that the message arrived properly +function validate_msg() { local TMPFILENAME="$1" - # TMPFILENAME will contain something like: - # 6.11.1-0_fbk0_rc13_509_g30d75cea12f7,13,1822,115075213798,-;netconsole selftest: netcons_gtJHM - # key=value - # Check if the file exists if [ ! -f "$TMPFILENAME" ]; then echo "FAIL: File was not generated." >&2 @@ -175,17 +232,32 @@ function validate_result() { cat "${TMPFILENAME}" >&2 exit "${ksft_fail}" fi +} - if ! grep -q "${USERDATA_KEY}=${USERDATA_VALUE}" "${TMPFILENAME}"; then - echo "FAIL: ${USERDATA_KEY}=${USERDATA_VALUE} not found in ${TMPFILENAME}" >&2 - cat "${TMPFILENAME}" >&2 - exit "${ksft_fail}" +# Validate the message and userdata +function validate_result() { + local TMPFILENAME="$1" + + # TMPFILENAME will contain something like: + # 6.11.1-0_fbk0_rc13_509_g30d75cea12f7,13,1822,115075213798,-;netconsole selftest: netcons_gtJHM + # key=value + + validate_msg "${TMPFILENAME}" + + # userdata is not supported on basic format target, + # thus, do not validate it. + if [ "${FORMAT}" != "basic" ]; + then + if ! grep -q "${USERDATA_KEY}=${USERDATA_VALUE}" "${TMPFILENAME}"; then + echo "FAIL: ${USERDATA_KEY}=${USERDATA_VALUE} not found in ${TMPFILENAME}" >&2 + cat "${TMPFILENAME}" >&2 + exit "${ksft_fail}" + fi fi # Delete the file once it is validated, otherwise keep it # for debugging purposes rm "${TMPFILENAME}" - exit "${ksft_pass}" } function check_for_dependencies() { @@ -209,6 +281,11 @@ function check_for_dependencies() { exit "${ksft_skip}" fi + if [ ! -f /proc/net/if_inet6 ]; then + echo "SKIP: IPv6 not configured. Check if CONFIG_IPV6 is enabled" >&2 + exit "${ksft_skip}" + fi + if [ ! -f "${NSIM_DEV_SYS_NEW}" ]; then echo "SKIP: file ${NSIM_DEV_SYS_NEW} does not exist. Check if CONFIG_NETDEVSIM is enabled" >&2 exit "${ksft_skip}" @@ -224,8 +301,15 @@ function check_for_dependencies() { exit "${ksft_skip}" fi - if ip addr list | grep -E "inet.*(${SRCIP}|${DSTIP})" 2> /dev/null; then - echo "SKIP: IPs already in use. Skipping it" >&2 + REGEXP4="inet.*(${SRCIP4}|${DSTIP4})" + REGEXP6="inet.*(${SRCIP6}|${DSTIP6})" + if ip addr list | grep -E "${REGEXP4}" 2> /dev/null; then + echo "SKIP: IPv4s already in use. Skipping it" >&2 + exit "${ksft_skip}" + fi + + if ip addr list | grep -E "${REGEXP6}" 2> /dev/null; then + echo "SKIP: IPv6s already in use. Skipping it" >&2 exit "${ksft_skip}" fi } @@ -239,10 +323,41 @@ function check_for_taskset() { # This is necessary if running multiple tests in a row function pkill_socat() { - PROCESS_NAME="socat UDP-LISTEN:6666,fork ${OUTPUT_FILE}" + PROCESS_NAME4="socat UDP-LISTEN:6666,fork ${OUTPUT_FILE}" + PROCESS_NAME6="socat UDP6-LISTEN:6666,fork ${OUTPUT_FILE}" # socat runs under timeout(1), kill it if it is still alive # do not fail if socat doesn't exist anymore set +e - pkill -f "${PROCESS_NAME}" + pkill -f "${PROCESS_NAME4}" + pkill -f "${PROCESS_NAME6}" set -e } + +# Check if netconsole was compiled as a module, otherwise exit +function check_netconsole_module() { + if modinfo netconsole | grep filename: | grep -q builtin + then + echo "SKIP: netconsole should be compiled as a module" >&2 + exit "${ksft_skip}" + fi +} + +# A wrapper to translate protocol version to udp version +function wait_for_port() { + local NAMESPACE=${1} + local PORT=${2} + IP_VERSION=${3} + + if [ "${IP_VERSION}" == "ipv6" ] + then + PROTOCOL="udp6" + else + PROTOCOL="udp" + fi + + wait_local_port_listen "${NAMESPACE}" "${PORT}" "${PROTOCOL}" + # even after the port is open, let's wait 1 second before writing + # otherwise the packet could be missed, and the test will fail. Happens + # more frequently on IPv6 + sleep 1 +} diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/resource_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/resource_scale.sh index 899b6892603f..d7505b933aef 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/resource_scale.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/resource_scale.sh @@ -51,7 +51,7 @@ for current_test in ${TESTS:-$ALL_TESTS}; do fi ${current_test}_setup_prepare - setup_wait $num_netifs + setup_wait_n $num_netifs # Update target in case occupancy of a certain resource changed # following the test setup. target=$(${current_test}_get_target "$should_fail") diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum/resource_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum/resource_scale.sh index 482ebb744eba..7b98cdd0580d 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/spectrum/resource_scale.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/spectrum/resource_scale.sh @@ -55,7 +55,7 @@ for current_test in ${TESTS:-$ALL_TESTS}; do continue fi ${current_test}_setup_prepare - setup_wait $num_netifs + setup_wait_n $num_netifs # Update target in case occupancy of a certain resource # changed following the test setup. target=$(${current_test}_get_target "$should_fail") diff --git a/tools/testing/selftests/drivers/net/napi_id.py b/tools/testing/selftests/drivers/net/napi_id.py index 356bac46ba04..d05eddcad539 100755 --- a/tools/testing/selftests/drivers/net/napi_id.py +++ b/tools/testing/selftests/drivers/net/napi_id.py @@ -7,10 +7,10 @@ from lib.py import bkg, cmd, rand_port, NetNSEnter def test_napi_id(cfg) -> None: port = rand_port() - listen_cmd = f"{cfg.test_dir}/napi_id_helper {cfg.addr_v['4']} {port}" + listen_cmd = f"{cfg.test_dir}/napi_id_helper {cfg.addr} {port}" with bkg(listen_cmd, ksft_wait=3) as server: - cmd(f"echo a | socat - TCP:{cfg.addr_v['4']}:{port}", host=cfg.remote, shell=True) + cmd(f"echo a | socat - TCP:{cfg.baddr}:{port}", host=cfg.remote, shell=True) ksft_eq(0, server.ret) diff --git a/tools/testing/selftests/drivers/net/napi_id_helper.c b/tools/testing/selftests/drivers/net/napi_id_helper.c index eecd610c2109..7f49ca6c8637 100644 --- a/tools/testing/selftests/drivers/net/napi_id_helper.c +++ b/tools/testing/selftests/drivers/net/napi_id_helper.c @@ -7,41 +7,58 @@ #include <unistd.h> #include <arpa/inet.h> #include <sys/socket.h> +#include <netdb.h> #include "../../net/lib/ksft.h" int main(int argc, char *argv[]) { - struct sockaddr_in address; + struct sockaddr_storage address; + struct addrinfo *result; + struct addrinfo hints; unsigned int napi_id; - unsigned int port; + socklen_t addr_len; socklen_t optlen; char buf[1024]; int opt = 1; + int family; int server; int client; int ret; - server = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP); + memset(&hints, 0, sizeof(hints)); + hints.ai_family = AF_UNSPEC; + hints.ai_socktype = SOCK_STREAM; + hints.ai_flags = AI_PASSIVE; + + ret = getaddrinfo(argv[1], argv[2], &hints, &result); + if (ret != 0) { + fprintf(stderr, "getaddrinfo: %s\n", gai_strerror(ret)); + return 1; + } + + family = result->ai_family; + addr_len = result->ai_addrlen; + + server = socket(family, SOCK_STREAM, IPPROTO_TCP); if (server < 0) { perror("socket creation failed"); + freeaddrinfo(result); if (errno == EAFNOSUPPORT) return -1; return 1; } - port = atoi(argv[2]); - if (setsockopt(server, SOL_SOCKET, SO_REUSEADDR, &opt, sizeof(opt))) { perror("setsockopt"); + freeaddrinfo(result); return 1; } - address.sin_family = AF_INET; - inet_pton(AF_INET, argv[1], &address.sin_addr); - address.sin_port = htons(port); + memcpy(&address, result->ai_addr, result->ai_addrlen); + freeaddrinfo(result); - if (bind(server, (struct sockaddr *)&address, sizeof(address)) < 0) { + if (bind(server, (struct sockaddr *)&address, addr_len) < 0) { perror("bind failed"); return 1; } diff --git a/tools/testing/selftests/drivers/net/netcons_basic.sh b/tools/testing/selftests/drivers/net/netcons_basic.sh index fe765da498e8..a3446b569976 100755 --- a/tools/testing/selftests/drivers/net/netcons_basic.sh +++ b/tools/testing/selftests/drivers/net/netcons_basic.sh @@ -32,21 +32,42 @@ check_for_dependencies echo "6 5" > /proc/sys/kernel/printk # Remove the namespace, interfaces and netconsole target on exit trap cleanup EXIT -# Create one namespace and two interfaces -set_network -# Create a dynamic target for netconsole -create_dynamic_target -# Set userdata "key" with the "value" value -set_user_data -# Listed for netconsole port inside the namespace and destination interface -listen_port_and_save_to "${OUTPUT_FILE}" & -# Wait for socat to start and listen to the port. -wait_local_port_listen "${NAMESPACE}" "${PORT}" udp -# Send the message -echo "${MSG}: ${TARGET}" > /dev/kmsg -# Wait until socat saves the file to disk -busywait "${BUSYWAIT_TIMEOUT}" test -s "${OUTPUT_FILE}" - -# Make sure the message was received in the dst part -# and exit -validate_result "${OUTPUT_FILE}" + +# Run the test twice, with different format modes +for FORMAT in "basic" "extended" +do + for IP_VERSION in "ipv6" "ipv4" + do + echo "Running with target mode: ${FORMAT} (${IP_VERSION})" + # Create one namespace and two interfaces + set_network "${IP_VERSION}" + # Create a dynamic target for netconsole + create_dynamic_target "${FORMAT}" + # Only set userdata for extended format + if [ "$FORMAT" == "extended" ] + then + # Set userdata "key" with the "value" value + set_user_data + fi + # Listed for netconsole port inside the namespace and + # destination interface + listen_port_and_save_to "${OUTPUT_FILE}" "${IP_VERSION}" & + # Wait for socat to start and listen to the port. + wait_for_port "${NAMESPACE}" "${PORT}" "${IP_VERSION}" + # Send the message + echo "${MSG}: ${TARGET}" > /dev/kmsg + # Wait until socat saves the file to disk + busywait "${BUSYWAIT_TIMEOUT}" test -s "${OUTPUT_FILE}" + + # Make sure the message was received in the dst part + # and exit + validate_result "${OUTPUT_FILE}" "${FORMAT}" + # kill socat in case it is still running + pkill_socat + cleanup + echo "${FORMAT} : ${IP_VERSION} : Test passed" >&2 + done +done + +trap - EXIT +exit "${ksft_pass}" diff --git a/tools/testing/selftests/drivers/net/netcons_cmdline.sh b/tools/testing/selftests/drivers/net/netcons_cmdline.sh new file mode 100755 index 000000000000..ad2fb8b1c463 --- /dev/null +++ b/tools/testing/selftests/drivers/net/netcons_cmdline.sh @@ -0,0 +1,52 @@ +#!/usr/bin/env bash +# SPDX-License-Identifier: GPL-2.0 + +# This is a selftest to test cmdline arguments on netconsole. +# It exercises loading of netconsole from cmdline instead of the dynamic +# reconfiguration. This includes parsing the long netconsole= line and all the +# flow through init_netconsole(). +# +# Author: Breno Leitao <leitao@debian.org> + +set -euo pipefail + +SCRIPTDIR=$(dirname "$(readlink -e "${BASH_SOURCE[0]}")") + +source "${SCRIPTDIR}"/lib/sh/lib_netcons.sh + +check_netconsole_module + +modprobe netdevsim 2> /dev/null || true +rmmod netconsole 2> /dev/null || true + +# The content of kmsg will be save to the following file +OUTPUT_FILE="/tmp/${TARGET}" + +# Check for basic system dependency and exit if not found +# check_for_dependencies +# Set current loglevel to KERN_INFO(6), and default to KERN_NOTICE(5) +echo "6 5" > /proc/sys/kernel/printk +# Remove the namespace and network interfaces +trap do_cleanup EXIT +# Create one namespace and two interfaces +set_network +# Create the command line for netconsole, with the configuration from the +# function above +CMDLINE="$(create_cmdline_str)" + +# Load the module, with the cmdline set +modprobe netconsole "${CMDLINE}" + +# Listed for netconsole port inside the namespace and destination interface +listen_port_and_save_to "${OUTPUT_FILE}" & +# Wait for socat to start and listen to the port. +wait_local_port_listen "${NAMESPACE}" "${PORT}" udp +# Send the message +echo "${MSG}: ${TARGET}" > /dev/kmsg +# Wait until socat saves the file to disk +busywait "${BUSYWAIT_TIMEOUT}" test -s "${OUTPUT_FILE}" +# Make sure the message was received in the dst part +# and exit +validate_msg "${OUTPUT_FILE}" + +exit "${ksft_pass}" diff --git a/tools/testing/selftests/drivers/net/netcons_sysdata.sh b/tools/testing/selftests/drivers/net/netcons_sysdata.sh index a737e377bf08..baf69031089e 100755 --- a/tools/testing/selftests/drivers/net/netcons_sysdata.sh +++ b/tools/testing/selftests/drivers/net/netcons_sysdata.sh @@ -53,6 +53,17 @@ function set_release() { echo 1 > "${NETCONS_PATH}/userdata/release_enabled" } +# Enable the msgid to be appended to sysdata +function set_msgid() { + if [[ ! -f "${NETCONS_PATH}/userdata/msgid_enabled" ]] + then + echo "Not able to enable msgid sysdata append. Configfs not available in ${NETCONS_PATH}/userdata/msgid_enabled" >&2 + exit "${ksft_skip}" + fi + + echo 1 > "${NETCONS_PATH}/userdata/msgid_enabled" +} + # Disable the sysdata cpu_nr feature function unset_cpu_nr() { echo 0 > "${NETCONS_PATH}/userdata/cpu_nr_enabled" @@ -67,6 +78,10 @@ function unset_release() { echo 0 > "${NETCONS_PATH}/userdata/release_enabled" } +function unset_msgid() { + echo 0 > "${NETCONS_PATH}/userdata/msgid_enabled" +} + # Test if MSG contains sysdata function validate_sysdata() { # OUTPUT_FILE will contain something like: @@ -74,6 +89,7 @@ function validate_sysdata() { # userdatakey=userdatavalue # cpu=X # taskname=<taskname> + # msgid=<id> # Echo is what this test uses to create the message. See runtest() # function @@ -104,6 +120,12 @@ function validate_sysdata() { exit "${ksft_fail}" fi + if ! grep -q "msgid=[0-9]\+$" "${OUTPUT_FILE}"; then + echo "FAIL: 'msgid=<id>' not found in ${OUTPUT_FILE}" >&2 + cat "${OUTPUT_FILE}" >&2 + exit "${ksft_fail}" + fi + rm "${OUTPUT_FILE}" pkill_socat } @@ -155,6 +177,12 @@ function validate_no_sysdata() { exit "${ksft_fail}" fi + if grep -q "msgid=" "${OUTPUT_FILE}"; then + echo "FAIL: 'msgid= found in ${OUTPUT_FILE}" >&2 + cat "${OUTPUT_FILE}" >&2 + exit "${ksft_fail}" + fi + rm "${OUTPUT_FILE}" } @@ -206,6 +234,7 @@ set_cpu_nr # Enable taskname to be appended to sysdata set_taskname set_release +set_msgid runtest # Make sure the message was received in the dst part # and exit @@ -235,6 +264,7 @@ MSG="Test #3 from CPU${CPU}" unset_cpu_nr unset_taskname unset_release +unset_msgid runtest # At this time, cpu= shouldn't be present in the msg validate_no_sysdata diff --git a/tools/testing/selftests/drivers/net/netdevsim/devlink.sh b/tools/testing/selftests/drivers/net/netdevsim/devlink.sh index b5ea2526f23c..030762b203d7 100755 --- a/tools/testing/selftests/drivers/net/netdevsim/devlink.sh +++ b/tools/testing/selftests/drivers/net/netdevsim/devlink.sh @@ -40,6 +40,8 @@ fw_flash_test() return fi + echo "10"> $DEBUGFS_DIR/fw_update_flash_chunk_time_ms + devlink dev flash $DL_HANDLE file $DUMMYFILE check_err $? "Failed to flash with status updates on" @@ -608,6 +610,46 @@ rate_attr_parent_check() check_err $? "Unexpected parent attr value $api_value != $parent" } +rate_attr_tc_bw_check() +{ + local handle=$1 + local tc_bw=$2 + local debug_file=$3 + + local tc_bw_str="" + for bw in $tc_bw; do + local tc=${bw%%:*} + local value=${bw##*:} + tc_bw_str="$tc_bw_str $tc:$value" + done + tc_bw_str=${tc_bw_str# } + + rate_attr_set "$handle" tc-bw "$tc_bw_str" + check_err $? "Failed to set tc-bw values" + + for bw in $tc_bw; do + local tc=${bw%%:*} + local value=${bw##*:} + local debug_value + debug_value=$(cat "$debug_file"/tc"${tc}"_bw) + check_err $? "Failed to read tc-bw value from debugfs for tc$tc" + [ "$debug_value" == "$value" ] + check_err $? "Unexpected tc-bw debug value for tc$tc: $debug_value != $value" + done + + for bw in $tc_bw; do + local tc=${bw%%:*} + local expected_value=${bw##*:} + local api_value + api_value=$(rate_attr_get "$handle" tc_"$tc") + if [ "$api_value" = "null" ]; then + api_value=0 + fi + [ "$api_value" == "$expected_value" ] + check_err $? "Unexpected tc-bw value for tc$tc: $api_value != $expected_value" + done +} + rate_node_add() { local handle=$1 @@ -649,6 +691,13 @@ rate_test() rate=$(($rate+100)) done + local tc_bw="0:0 1:40 2:0 3:0 4:0 5:0 6:60 7:0" + for r_obj in $leafs + do + rate_attr_tc_bw_check "$r_obj" "$tc_bw" \ + "$DEBUGFS_DIR"/ports/"${r_obj##*/}" + done + local node1_name='group1' local node1="$DL_HANDLE/$node1_name" rate_node_add "$node1" @@ -666,6 +715,12 @@ rate_test() rate_attr_tx_rate_check $node1 tx_max $node_tx_max \ $DEBUGFS_DIR/rate_nodes/${node1##*/}/tx_max + + local tc_bw="0:20 1:0 2:0 3:0 4:0 5:20 6:60 7:0" + rate_attr_tc_bw_check $node1 "$tc_bw" \ + "$DEBUGFS_DIR"/rate_nodes/"${node1##*/}" + + rate_node_del "$node1" check_err $? "Failed to delete node $node1" local num_nodes=`rate_nodes_get $DL_HANDLE | wc -w` diff --git a/tools/testing/selftests/drivers/net/netdevsim/peer.sh b/tools/testing/selftests/drivers/net/netdevsim/peer.sh index 1bb46ec435d4..7f32b5600925 100755 --- a/tools/testing/selftests/drivers/net/netdevsim/peer.sh +++ b/tools/testing/selftests/drivers/net/netdevsim/peer.sh @@ -1,7 +1,8 @@ #!/bin/bash # SPDX-License-Identifier: GPL-2.0-only -source ../../../net/lib.sh +lib_dir=$(dirname $0)/../../../net +source $lib_dir/lib.sh NSIM_DEV_1_ID=$((256 + RANDOM % 256)) NSIM_DEV_1_SYS=/sys/bus/netdevsim/devices/netdevsim$NSIM_DEV_1_ID diff --git a/tools/testing/selftests/drivers/net/netdevsim/udp_tunnel_nic.sh b/tools/testing/selftests/drivers/net/netdevsim/udp_tunnel_nic.sh index 92c2f0376c08..4c859ecdad94 100755 --- a/tools/testing/selftests/drivers/net/netdevsim/udp_tunnel_nic.sh +++ b/tools/testing/selftests/drivers/net/netdevsim/udp_tunnel_nic.sh @@ -266,7 +266,6 @@ for port in 0 1; do echo $NSIM_ID > /sys/bus/netdevsim/new_device else echo 1 > $NSIM_DEV_DFS/udp_ports_open_only - echo 1 > $NSIM_DEV_DFS/udp_ports_sleep echo 1 > $NSIM_DEV_SYS/new_port fi NSIM_NETDEV=`get_netdev_name old_netdevs` @@ -350,23 +349,11 @@ old_netdevs=$(ls /sys/class/net) port=0 echo $NSIM_ID > /sys/bus/netdevsim/new_device echo 0 > $NSIM_DEV_SYS/del_port -echo 1000 > $NSIM_DEV_DFS/udp_ports_sleep echo 0 > $NSIM_DEV_SYS/new_port NSIM_NETDEV=`get_netdev_name old_netdevs` msg="create VxLANs" -exp0=( 0 0 0 0 ) # sleep is longer than out wait -new_vxlan vxlan0 10000 $NSIM_NETDEV - -modprobe -r vxlan -modprobe -r udp_tunnel - -msg="remove tunnels" -exp0=( 0 0 0 0 ) -check_tables - -msg="create VxLANs" -exp0=( 0 0 0 0 ) # sleep is longer than out wait +exp0=( `mke 10000 1` 0 0 0 ) new_vxlan vxlan0 10000 $NSIM_NETDEV exp0=( 0 0 0 0 ) @@ -428,7 +415,6 @@ echo 0 > $NSIM_DEV_SYS/del_port for port in 0 1; do if [ $port -ne 0 ]; then echo 1 > $NSIM_DEV_DFS/udp_ports_open_only - echo 1 > $NSIM_DEV_DFS/udp_ports_sleep fi echo $port > $NSIM_DEV_SYS/new_port @@ -486,7 +472,6 @@ echo 1 > $NSIM_DEV_DFS/udp_ports_sync_all for port in 0 1; do if [ $port -ne 0 ]; then echo 1 > $NSIM_DEV_DFS/udp_ports_open_only - echo 1 > $NSIM_DEV_DFS/udp_ports_sleep fi echo $port > $NSIM_DEV_SYS/new_port @@ -543,7 +528,6 @@ echo 0 > $NSIM_DEV_SYS/del_port for port in 0 1; do if [ $port -ne 0 ]; then echo 1 > $NSIM_DEV_DFS/udp_ports_open_only - echo 1 > $NSIM_DEV_DFS/udp_ports_sleep fi echo $port > $NSIM_DEV_SYS/new_port @@ -573,7 +557,6 @@ echo 1 > $NSIM_DEV_DFS/udp_ports_ipv4_only for port in 0 1; do if [ $port -ne 0 ]; then echo 1 > $NSIM_DEV_DFS/udp_ports_open_only - echo 1 > $NSIM_DEV_DFS/udp_ports_sleep fi echo $port > $NSIM_DEV_SYS/new_port @@ -634,7 +617,6 @@ echo 0 > $NSIM_DEV_SYS/del_port for port in 0 1; do if [ $port -ne 0 ]; then echo 1 > $NSIM_DEV_DFS/udp_ports_open_only - echo 1 > $NSIM_DEV_DFS/udp_ports_sleep fi echo $port > $NSIM_DEV_SYS/new_port @@ -690,7 +672,6 @@ echo 0 > $NSIM_DEV_SYS/del_port for port in 0 1; do if [ $port -ne 0 ]; then echo 1 > $NSIM_DEV_DFS/udp_ports_open_only - echo 1 > $NSIM_DEV_DFS/udp_ports_sleep fi echo $port > $NSIM_DEV_SYS/new_port @@ -750,7 +731,6 @@ echo 0 > $NSIM_DEV_SYS/del_port for port in 0 1; do if [ $port -ne 0 ]; then echo 1 > $NSIM_DEV_DFS/udp_ports_open_only - echo 1 > $NSIM_DEV_DFS/udp_ports_sleep fi echo $port > $NSIM_DEV_SYS/new_port @@ -809,7 +789,6 @@ echo $NSIM_ID > /sys/bus/netdevsim/new_device echo 0 > $NSIM_DEV_SYS/del_port echo 0 > $NSIM_DEV_DFS/udp_ports_open_only -echo 1 > $NSIM_DEV_DFS/udp_ports_sleep echo 1 > $NSIM_DEV_DFS/udp_ports_shared old_netdevs=$(ls /sys/class/net) diff --git a/tools/testing/selftests/drivers/net/netpoll_basic.py b/tools/testing/selftests/drivers/net/netpoll_basic.py new file mode 100755 index 000000000000..408bd54d6779 --- /dev/null +++ b/tools/testing/selftests/drivers/net/netpoll_basic.py @@ -0,0 +1,396 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0 +# Author: Breno Leitao <leitao@debian.org> +""" + This test aims to evaluate the netpoll polling mechanism (as in + netpoll_poll_dev()). It presents a complex scenario where the network + attempts to send a packet but fails, prompting it to poll the NIC from within + the netpoll TX side. + + This has been a crucial path in netpoll that was previously untested. Jakub + suggested using a single RX/TX queue, pushing traffic to the NIC, and then + sending netpoll messages (via netconsole) to trigger the poll. + + In parallel, bpftrace is used to detect if netpoll_poll_dev() was called. If + so, the test passes, otherwise it will be skipped. This test is very dependent on + the driver and environment, given we are trying to trigger a tricky scenario. +""" + +import errno +import logging +import os +import random +import string +import threading +import time +from typing import Optional + +from lib.py import ( + bpftrace, + CmdExitFailure, + defer, + ethtool, + GenerateTraffic, + ksft_exit, + ksft_pr, + ksft_run, + KsftFailEx, + KsftSkipEx, + NetDrvEpEnv, + KsftXfailEx, +) + +# Configure logging +logging.basicConfig( + level=logging.INFO, + format="%(asctime)s - %(levelname)s - %(message)s", +) + +NETCONSOLE_CONFIGFS_PATH: str = "/sys/kernel/config/netconsole" +NETCONS_REMOTE_PORT: int = 6666 +NETCONS_LOCAL_PORT: int = 1514 + +# Max number of netcons messages to send. Each iteration will setup +# netconsole and send MAX_WRITES messages +ITERATIONS: int = 20 +# Number of writes to /dev/kmsg per iteration +MAX_WRITES: int = 40 +# MAPS contains the information coming from bpftrace it will have only one +# key: "hits", which tells the number of times netpoll_poll_dev() was called +MAPS: dict[str, int] = {} +# Thread to run bpftrace in parallel +BPF_THREAD: Optional[threading.Thread] = None +# Time bpftrace will be running in parallel. +BPFTRACE_TIMEOUT: int = 10 + + +def ethtool_get_ringsize(interface_name: str) -> tuple[int, int]: + """ + Read the ringsize using ethtool. This will be used to restore it after the test + """ + try: + ethtool_result = ethtool(f"-g {interface_name}", json=True)[0] + rxs = ethtool_result["rx"] + txs = ethtool_result["tx"] + except (KeyError, IndexError) as exception: + raise KsftSkipEx( + f"Failed to read RX/TX ringsize: {exception}. Not going to mess with them." + ) from exception + + return rxs, txs + + +def ethtool_set_ringsize(interface_name: str, ring_size: tuple[int, int]) -> bool: + """Try to the number of RX and TX ringsize.""" + rxs = ring_size[0] + txs = ring_size[1] + + logging.debug("Setting ring size to %d/%d", rxs, txs) + try: + ethtool(f"-G {interface_name} rx {rxs} tx {txs}") + except CmdExitFailure: + # This might fail on real device, retry with a higher value, + # worst case, keep it as it is. + return False + + return True + + +def ethtool_get_queues_cnt(interface_name: str) -> tuple[int, int, int]: + """Read the number of RX, TX and combined queues using ethtool""" + + try: + ethtool_result = ethtool(f"-l {interface_name}", json=True)[0] + rxq = ethtool_result.get("rx", -1) + txq = ethtool_result.get("tx", -1) + combined = ethtool_result.get("combined", -1) + + except IndexError as exception: + raise KsftSkipEx( + f"Failed to read queues numbers: {exception}. Not going to mess with them." + ) from exception + + return rxq, txq, combined + + +def ethtool_set_queues_cnt(interface_name: str, queues: tuple[int, int, int]) -> None: + """Set the number of RX, TX and combined queues using ethtool""" + rxq, txq, combined = queues + + cmdline = f"-L {interface_name}" + + if rxq != -1: + cmdline += f" rx {rxq}" + if txq != -1: + cmdline += f" tx {txq}" + if combined != -1: + cmdline += f" combined {combined}" + + logging.debug("calling: ethtool %s", cmdline) + + try: + ethtool(cmdline) + except CmdExitFailure as exception: + raise KsftSkipEx( + f"Failed to configure RX/TX queues: {exception}. Ethtool not available?" + ) from exception + + +def netcons_generate_random_target_name() -> str: + """Generate a random target name starting with 'netcons'""" + random_suffix = "".join(random.choices(string.ascii_lowercase + string.digits, k=8)) + return f"netcons_{random_suffix}" + + +def netcons_create_target( + config_data: dict[str, str], + target_name: str, +) -> None: + """Create a netconsole dynamic target against the interfaces""" + logging.debug("Using netconsole name: %s", target_name) + try: + os.makedirs(f"{NETCONSOLE_CONFIGFS_PATH}/{target_name}", exist_ok=True) + logging.debug( + "Created target directory: %s/%s", NETCONSOLE_CONFIGFS_PATH, target_name + ) + except OSError as exception: + if exception.errno != errno.EEXIST: + raise KsftFailEx( + f"Failed to create netconsole target directory: {exception}" + ) from exception + + try: + for key, value in config_data.items(): + path = f"{NETCONSOLE_CONFIGFS_PATH}/{target_name}/{key}" + logging.debug("Writing %s to %s", key, path) + with open(path, "w", encoding="utf-8") as file: + # Always convert to string to write to file + file.write(str(value)) + + # Read all configuration values for debugging purposes + for debug_key in config_data.keys(): + with open( + f"{NETCONSOLE_CONFIGFS_PATH}/{target_name}/{debug_key}", + "r", + encoding="utf-8", + ) as file: + content = file.read() + logging.debug( + "%s/%s/%s : %s", + NETCONSOLE_CONFIGFS_PATH, + target_name, + debug_key, + content.strip(), + ) + + except Exception as exception: + raise KsftFailEx( + f"Failed to configure netconsole target: {exception}" + ) from exception + + +def netcons_configure_target( + cfg: NetDrvEpEnv, interface_name: str, target_name: str +) -> None: + """Configure netconsole on the interface with the given target name""" + config_data = { + "extended": "1", + "dev_name": interface_name, + "local_port": NETCONS_LOCAL_PORT, + "remote_port": NETCONS_REMOTE_PORT, + "local_ip": cfg.addr, + "remote_ip": cfg.remote_addr, + "remote_mac": "00:00:00:00:00:00", # Not important for this test + "enabled": "1", + } + + netcons_create_target(config_data, target_name) + logging.debug( + "Created netconsole target: %s on interface %s", target_name, interface_name + ) + + +def netcons_delete_target(name: str) -> None: + """Delete a netconsole dynamic target""" + target_path = f"{NETCONSOLE_CONFIGFS_PATH}/{name}" + try: + if os.path.exists(target_path): + os.rmdir(target_path) + except OSError as exception: + raise KsftFailEx( + f"Failed to delete netconsole target: {exception}" + ) from exception + + +def netcons_load_module() -> None: + """Try to load the netconsole module""" + os.system("modprobe netconsole") + + +def bpftrace_call() -> None: + """Call bpftrace to find how many times netpoll_poll_dev() is called. + Output is saved in the global variable `maps`""" + + # This is going to update the global variable, that will be seen by the + # main function + global MAPS # pylint: disable=W0603 + + # This will be passed to bpftrace as in bpftrace -e "expr" + expr = "kprobe:netpoll_poll_dev { @hits = count(); }" + + MAPS = bpftrace(expr, timeout=BPFTRACE_TIMEOUT, json=True) + logging.debug("BPFtrace output: %s", MAPS) + + +def bpftrace_start(): + """Start a thread to call `call_bpf` in a parallel thread""" + global BPF_THREAD # pylint: disable=W0603 + + BPF_THREAD = threading.Thread(target=bpftrace_call) + BPF_THREAD.start() + if not BPF_THREAD.is_alive(): + raise KsftSkipEx("BPFtrace thread is not alive. Skipping test") + + +def bpftrace_stop() -> None: + """Stop the bpftrace thread""" + if BPF_THREAD: + BPF_THREAD.join() + + +def bpftrace_any_hit(join: bool) -> bool: + """Check if netpoll_poll_dev() was called by checking the global variable `maps`""" + if not BPF_THREAD: + raise KsftFailEx("BPFtrace didn't start") + + if BPF_THREAD.is_alive(): + if join: + # Wait for bpftrace to finish + BPF_THREAD.join() + else: + # bpftrace is still running, so, we will not check the result yet + return False + + logging.debug("MAPS coming from bpftrace = %s", MAPS) + if "hits" not in MAPS.keys(): + raise KsftFailEx(f"bpftrace failed to run!?: {MAPS}") + + logging.debug("Got a total of %d hits", MAPS["hits"]) + return MAPS["hits"] > 0 + + +def do_netpoll_flush_monitored(cfg: NetDrvEpEnv, ifname: str, target_name: str) -> None: + """Print messages to the console, trying to trigger a netpoll poll""" + # Start bpftrace in parallel, so, it is watching + # netpoll_poll_dev() while we are sending netconsole messages + bpftrace_start() + defer(bpftrace_stop) + + do_netpoll_flush(cfg, ifname, target_name) + + if bpftrace_any_hit(join=True): + ksft_pr("netpoll_poll_dev() was called. Success") + return + + raise KsftXfailEx("netpoll_poll_dev() was not called during the test...") + + +def do_netpoll_flush(cfg: NetDrvEpEnv, ifname: str, target_name: str) -> None: + """Print messages to the console, trying to trigger a netpoll poll""" + netcons_configure_target(cfg, ifname, target_name) + retry = 0 + + for i in range(int(ITERATIONS)): + if not BPF_THREAD.is_alive() or bpftrace_any_hit(join=False): + # bpftrace is done, stop sending messages + break + + msg = f"netcons test #{i}" + with open("/dev/kmsg", "w", encoding="utf-8") as kmsg: + for j in range(MAX_WRITES): + try: + kmsg.write(f"{msg}-{j}\n") + except OSError as exception: + # in some cases, kmsg can be busy, so, we will retry + time.sleep(1) + retry += 1 + if retry < 5: + logging.info("Failed to write to kmsg. Retrying") + # Just retry a few times + continue + raise KsftFailEx( + f"Failed to write to kmsg: {exception}" + ) from exception + + netcons_delete_target(target_name) + netcons_configure_target(cfg, ifname, target_name) + # If we sleep here, we will have a better chance of triggering + # This number is based on a few tests I ran while developing this test + time.sleep(0.4) + + +def configure_network(ifname: str) -> None: + """Configure ring size and queue numbers""" + + # Set defined queues to 1 to force congestion + prev_queues = ethtool_get_queues_cnt(ifname) + logging.debug("RX/TX/combined queues: %s", prev_queues) + # Only set the queues to 1 if they exists in the device. I.e, they are > 0 + ethtool_set_queues_cnt(ifname, tuple(1 if x > 0 else x for x in prev_queues)) + defer(ethtool_set_queues_cnt, ifname, prev_queues) + + # Try to set the ring size to some low value. + # Do not fail if the hardware do not accepted desired values + prev_ring_size = ethtool_get_ringsize(ifname) + for size in [(1, 1), (128, 128), (256, 256)]: + if ethtool_set_ringsize(ifname, size): + # hardware accepted the desired ringsize + logging.debug("Set RX/TX ringsize to: %s from %s", size, prev_ring_size) + break + defer(ethtool_set_ringsize, ifname, prev_ring_size) + + +def test_netpoll(cfg: NetDrvEpEnv) -> None: + """ + Test netpoll by sending traffic to the interface and then sending + netconsole messages to trigger a poll + """ + + ifname = cfg.ifname + configure_network(ifname) + target_name = netcons_generate_random_target_name() + traffic = None + + try: + traffic = GenerateTraffic(cfg) + do_netpoll_flush_monitored(cfg, ifname, target_name) + finally: + if traffic: + traffic.stop() + + # Revert RX/TX queues + netcons_delete_target(target_name) + + +def test_check_dependencies() -> None: + """Check if the dependencies are met""" + if not os.path.exists(NETCONSOLE_CONFIGFS_PATH): + raise KsftSkipEx( + f"Directory {NETCONSOLE_CONFIGFS_PATH} does not exist. CONFIG_NETCONSOLE_DYNAMIC might not be set." # pylint: disable=C0301 + ) + + +def main() -> None: + """Main function to run the test""" + netcons_load_module() + test_check_dependencies() + with NetDrvEpEnv(__file__) as cfg: + ksft_run( + [test_netpoll], + args=(cfg,), + ) + ksft_exit() + + +if __name__ == "__main__": + main() diff --git a/tools/testing/selftests/drivers/net/ping.py b/tools/testing/selftests/drivers/net/ping.py index e0f114612c1a..da3623c5e8a9 100755 --- a/tools/testing/selftests/drivers/net/ping.py +++ b/tools/testing/selftests/drivers/net/ping.py @@ -30,7 +30,7 @@ def _test_v6(cfg) -> None: cmd("ping -s 65000 -c 1 -W0.5 " + cfg.addr_v["6"], host=cfg.remote) def _test_tcp(cfg) -> None: - cfg.require_cmd("socat", remote=True) + cfg.require_cmd("socat", local=False, remote=True) port = rand_port() listen_cmd = f"socat -{cfg.addr_ipver} -t 2 -u TCP-LISTEN:{port},reuseport STDOUT" diff --git a/tools/testing/selftests/drivers/net/stats.py b/tools/testing/selftests/drivers/net/stats.py index efcc1e10575b..c2bb5d3f1ca1 100755 --- a/tools/testing/selftests/drivers/net/stats.py +++ b/tools/testing/selftests/drivers/net/stats.py @@ -1,12 +1,16 @@ #!/usr/bin/env python3 # SPDX-License-Identifier: GPL-2.0 +""" +Tests related to standard netdevice statistics. +""" + import errno import subprocess import time from lib.py import ksft_run, ksft_exit, ksft_pr from lib.py import ksft_ge, ksft_eq, ksft_is, ksft_in, ksft_lt, ksft_true, ksft_raises -from lib.py import KsftSkipEx, KsftXfailEx +from lib.py import KsftSkipEx, KsftFailEx from lib.py import ksft_disruptive from lib.py import EthtoolFamily, NetdevFamily, RtnlFamily, NlError from lib.py import NetDrvEnv @@ -18,13 +22,16 @@ rtnl = RtnlFamily() def check_pause(cfg) -> None: - global ethnl + """ + Check that drivers which support Pause config also report standard + pause stats. + """ try: ethnl.pause_get({"header": {"dev-index": cfg.ifindex}}) except NlError as e: if e.error == errno.EOPNOTSUPP: - raise KsftXfailEx("pause not supported by the device") + raise KsftSkipEx("pause not supported by the device") from e raise data = ethnl.pause_get({"header": {"dev-index": cfg.ifindex, @@ -33,13 +40,16 @@ def check_pause(cfg) -> None: def check_fec(cfg) -> None: - global ethnl + """ + Check that drivers which support FEC config also report standard + FEC stats. + """ try: ethnl.fec_get({"header": {"dev-index": cfg.ifindex}}) except NlError as e: if e.error == errno.EOPNOTSUPP: - raise KsftXfailEx("FEC not supported by the device") + raise KsftSkipEx("FEC not supported by the device") from e raise data = ethnl.fec_get({"header": {"dev-index": cfg.ifindex, @@ -48,15 +58,17 @@ def check_fec(cfg) -> None: def pkt_byte_sum(cfg) -> None: - global netfam, rtnl + """ + Check that qstat and interface stats match in value. + """ def get_qstat(test): - global netfam stats = netfam.qstats_get({}, dump=True) if stats: for qs in stats: if qs["ifindex"]== test.ifindex: return qs + return None qstat = get_qstat(cfg) if qstat is None: @@ -77,15 +89,14 @@ def pkt_byte_sum(cfg) -> None: for _ in range(10): rtstat = rtnl.getlink({"ifi-index": cfg.ifindex})['stats64'] if stat_cmp(rtstat, qstat) < 0: - raise Exception("RTNL stats are lower, fetched later") + raise KsftFailEx("RTNL stats are lower, fetched later") qstat = get_qstat(cfg) if stat_cmp(rtstat, qstat) > 0: - raise Exception("Qstats are lower, fetched later") + raise KsftFailEx("Qstats are lower, fetched later") def qstat_by_ifindex(cfg) -> None: - global netfam - global rtnl + """ Qstats Netlink API tests - querying by ifindex. """ # Construct a map ifindex -> [dump, by-index, dump] ifindexes = {} @@ -93,7 +104,7 @@ def qstat_by_ifindex(cfg) -> None: for entry in stats: ifindexes[entry['ifindex']] = [entry, None, None] - for ifindex in ifindexes.keys(): + for ifindex in ifindexes: entry = netfam.qstats_get({"ifindex": ifindex}, dump=True) ksft_eq(len(entry), 1) ifindexes[entry[0]['ifindex']][1] = entry[0] @@ -145,7 +156,7 @@ def qstat_by_ifindex(cfg) -> None: # Try to get stats for lowest unused ifindex but not 0 devs = rtnl.getlink({}, dump=True) - all_ifindexes = set([dev["ifi-index"] for dev in devs]) + all_ifindexes = set(dev["ifi-index"] for dev in devs) lowest = 2 while lowest in all_ifindexes: lowest += 1 @@ -158,18 +169,20 @@ def qstat_by_ifindex(cfg) -> None: @ksft_disruptive def check_down(cfg) -> None: + """ Test statistics (interface and qstat) are not impacted by ifdown """ + try: qstat = netfam.qstats_get({"ifindex": cfg.ifindex}, dump=True)[0] except NlError as e: if e.error == errno.EOPNOTSUPP: - raise KsftSkipEx("qstats not supported by the device") + raise KsftSkipEx("qstats not supported by the device") from e raise ip(f"link set dev {cfg.dev['ifname']} down") defer(ip, f"link set dev {cfg.dev['ifname']} up") qstat2 = netfam.qstats_get({"ifindex": cfg.ifindex}, dump=True)[0] - for k, v in qstat.items(): + for k in qstat: ksft_ge(qstat2[k], qstat[k], comment=f"{k} went backwards on device down") # exercise per-queue API to make sure that "device down" state @@ -263,6 +276,8 @@ def procfs_downup_hammer(cfg) -> None: def main() -> None: + """ Ksft boiler plate main """ + with NetDrvEnv(__file__, queue_count=100) as cfg: ksft_run([check_pause, check_fec, pkt_byte_sum, qstat_by_ifindex, check_down, procfs_hammer, procfs_downup_hammer], diff --git a/tools/testing/selftests/drivers/net/xdp.py b/tools/testing/selftests/drivers/net/xdp.py new file mode 100755 index 000000000000..1dd8bf3bf6c9 --- /dev/null +++ b/tools/testing/selftests/drivers/net/xdp.py @@ -0,0 +1,658 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0 + +""" +This file contains tests to verify native XDP support in network drivers. +The tests utilize the BPF program `xdp_native.bpf.o` from the `selftests.net.lib` +directory, with each test focusing on a specific aspect of XDP functionality. +""" +import random +import string +from dataclasses import dataclass +from enum import Enum + +from lib.py import ksft_run, ksft_exit, ksft_eq, ksft_ne, ksft_pr +from lib.py import KsftFailEx, NetDrvEpEnv, EthtoolFamily, NlError +from lib.py import bkg, cmd, rand_port, wait_port_listen +from lib.py import ip, bpftool, defer + + +class TestConfig(Enum): + """Enum for XDP configuration options.""" + MODE = 0 # Configures the BPF program for a specific test + PORT = 1 # Port configuration to communicate with the remote host + ADJST_OFFSET = 2 # Tail/Head adjustment offset for extension/shrinking + ADJST_TAG = 3 # Adjustment tag to annotate the start and end of extension + + +class XDPAction(Enum): + """Enum for XDP actions.""" + PASS = 0 # Pass the packet up to the stack + DROP = 1 # Drop the packet + TX = 2 # Route the packet to the remote host + TAIL_ADJST = 3 # Adjust the tail of the packet + HEAD_ADJST = 4 # Adjust the head of the packet + + +class XDPStats(Enum): + """Enum for XDP statistics.""" + RX = 0 # Count of valid packets received for testing + PASS = 1 # Count of packets passed up to the stack + DROP = 2 # Count of packets dropped + TX = 3 # Count of incoming packets routed to the remote host + ABORT = 4 # Count of packets that were aborted + + +@dataclass +class BPFProgInfo: + """Data class to store information about a BPF program.""" + name: str # Name of the BPF program + file: str # BPF program object file + xdp_sec: str = "xdp" # XDP section name (e.g., "xdp" or "xdp.frags") + mtu: int = 1500 # Maximum Transmission Unit, default is 1500 + + +def _exchg_udp(cfg, port, test_string): + """ + Exchanges UDP packets between a local and remote host using the socat tool. + + Args: + cfg: Configuration object containing network settings. + port: Port number to use for the UDP communication. + test_string: String that the remote host will send. + + Returns: + The string received by the test host. + """ + cfg.require_cmd("socat", remote=True) + + rx_udp_cmd = f"socat -{cfg.addr_ipver} -T 2 -u UDP-RECV:{port},reuseport STDOUT" + tx_udp_cmd = f"echo -n {test_string} | socat -t 2 -u STDIN UDP:{cfg.baddr}:{port}" + + with bkg(rx_udp_cmd, exit_wait=True) as nc: + wait_port_listen(port, proto="udp") + cmd(tx_udp_cmd, host=cfg.remote, shell=True) + + return nc.stdout.strip() + + +def _test_udp(cfg, port, size=256): + """ + Tests UDP packet exchange between a local and remote host. + + Args: + cfg: Configuration object containing network settings. + port: Port number to use for the UDP communication. + size: The length of the test string to be exchanged, default is 256 characters. + + Returns: + bool: True if the received string matches the sent string, False otherwise. + """ + test_str = "".join(random.choice(string.ascii_lowercase) for _ in range(size)) + recvd_str = _exchg_udp(cfg, port, test_str) + + return recvd_str == test_str + + +def _load_xdp_prog(cfg, bpf_info): + """ + Loads an XDP program onto a network interface. + + Args: + cfg: Configuration object containing network settings. + bpf_info: BPFProgInfo object containing information about the BPF program. + + Returns: + dict: A dictionary containing the XDP program ID, name, and associated map IDs. + """ + abs_path = cfg.net_lib_dir / bpf_info.file + prog_info = {} + + cmd(f"ip link set dev {cfg.remote_ifname} mtu {bpf_info.mtu}", shell=True, host=cfg.remote) + defer(ip, f"link set dev {cfg.remote_ifname} mtu 1500", host=cfg.remote) + + cmd( + f"ip link set dev {cfg.ifname} mtu {bpf_info.mtu} xdp obj {abs_path} sec {bpf_info.xdp_sec}", + shell=True + ) + defer(ip, f"link set dev {cfg.ifname} mtu 1500 xdp off") + + xdp_info = ip(f"-d link show dev {cfg.ifname}", json=True)[0] + prog_info["id"] = xdp_info["xdp"]["prog"]["id"] + prog_info["name"] = xdp_info["xdp"]["prog"]["name"] + prog_id = prog_info["id"] + + map_ids = bpftool(f"prog show id {prog_id}", json=True)["map_ids"] + prog_info["maps"] = {} + for map_id in map_ids: + name = bpftool(f"map show id {map_id}", json=True)["name"] + prog_info["maps"][name] = map_id + + return prog_info + + +def format_hex_bytes(value): + """ + Helper function that converts an integer into a formatted hexadecimal byte string. + + Args: + value: An integer representing the number to be converted. + + Returns: + A string representing hexadecimal equivalent of value, with bytes separated by spaces. + """ + hex_str = value.to_bytes(4, byteorder='little', signed=True) + return ' '.join(f'{byte:02x}' for byte in hex_str) + + +def _set_xdp_map(map_name, key, value): + """ + Updates an XDP map with a given key-value pair using bpftool. + + Args: + map_name: The name of the XDP map to update. + key: The key to update in the map, formatted as a hexadecimal string. + value: The value to associate with the key, formatted as a hexadecimal string. + """ + key_formatted = format_hex_bytes(key) + value_formatted = format_hex_bytes(value) + bpftool( + f"map update name {map_name} key hex {key_formatted} value hex {value_formatted}" + ) + + +def _get_stats(xdp_map_id): + """ + Retrieves and formats statistics from an XDP map. + + Args: + xdp_map_id: The ID of the XDP map from which to retrieve statistics. + + Returns: + A dictionary containing formatted packet statistics for various XDP actions. + The keys are based on the XDPStats Enum values. + + Raises: + KsftFailEx: If the stats retrieval fails. + """ + stats_dump = bpftool(f"map dump id {xdp_map_id}", json=True) + if not stats_dump: + raise KsftFailEx(f"Failed to get stats for map {xdp_map_id}") + + stats_formatted = {} + for key in range(0, 5): + val = stats_dump[key]["formatted"]["value"] + if stats_dump[key]["formatted"]["key"] == XDPStats.RX.value: + stats_formatted[XDPStats.RX.value] = val + elif stats_dump[key]["formatted"]["key"] == XDPStats.PASS.value: + stats_formatted[XDPStats.PASS.value] = val + elif stats_dump[key]["formatted"]["key"] == XDPStats.DROP.value: + stats_formatted[XDPStats.DROP.value] = val + elif stats_dump[key]["formatted"]["key"] == XDPStats.TX.value: + stats_formatted[XDPStats.TX.value] = val + elif stats_dump[key]["formatted"]["key"] == XDPStats.ABORT.value: + stats_formatted[XDPStats.ABORT.value] = val + + return stats_formatted + + +def _test_pass(cfg, bpf_info, msg_sz): + """ + Tests the XDP_PASS action by exchanging UDP packets. + + Args: + cfg: Configuration object containing network settings. + bpf_info: BPFProgInfo object containing information about the BPF program. + msg_sz: Size of the test message to send. + """ + + prog_info = _load_xdp_prog(cfg, bpf_info) + port = rand_port() + + _set_xdp_map("map_xdp_setup", TestConfig.MODE.value, XDPAction.PASS.value) + _set_xdp_map("map_xdp_setup", TestConfig.PORT.value, port) + + ksft_eq(_test_udp(cfg, port, msg_sz), True, "UDP packet exchange failed") + stats = _get_stats(prog_info["maps"]["map_xdp_stats"]) + + ksft_ne(stats[XDPStats.RX.value], 0, "RX stats should not be zero") + ksft_eq(stats[XDPStats.RX.value], stats[XDPStats.PASS.value], "RX and PASS stats mismatch") + + +def test_xdp_native_pass_sb(cfg): + """ + Tests the XDP_PASS action for single buffer case. + + Args: + cfg: Configuration object containing network settings. + """ + bpf_info = BPFProgInfo("xdp_prog", "xdp_native.bpf.o", "xdp", 1500) + + _test_pass(cfg, bpf_info, 256) + + +def test_xdp_native_pass_mb(cfg): + """ + Tests the XDP_PASS action for a multi-buff size. + + Args: + cfg: Configuration object containing network settings. + """ + bpf_info = BPFProgInfo("xdp_prog_frags", "xdp_native.bpf.o", "xdp.frags", 9000) + + _test_pass(cfg, bpf_info, 8000) + + +def _test_drop(cfg, bpf_info, msg_sz): + """ + Tests the XDP_DROP action by exchanging UDP packets. + + Args: + cfg: Configuration object containing network settings. + bpf_info: BPFProgInfo object containing information about the BPF program. + msg_sz: Size of the test message to send. + """ + + prog_info = _load_xdp_prog(cfg, bpf_info) + port = rand_port() + + _set_xdp_map("map_xdp_setup", TestConfig.MODE.value, XDPAction.DROP.value) + _set_xdp_map("map_xdp_setup", TestConfig.PORT.value, port) + + ksft_eq(_test_udp(cfg, port, msg_sz), False, "UDP packet exchange should fail") + stats = _get_stats(prog_info["maps"]["map_xdp_stats"]) + + ksft_ne(stats[XDPStats.RX.value], 0, "RX stats should be zero") + ksft_eq(stats[XDPStats.RX.value], stats[XDPStats.DROP.value], "RX and DROP stats mismatch") + + +def test_xdp_native_drop_sb(cfg): + """ + Tests the XDP_DROP action for a signle-buff case. + + Args: + cfg: Configuration object containing network settings. + """ + bpf_info = BPFProgInfo("xdp_prog", "xdp_native.bpf.o", "xdp", 1500) + + _test_drop(cfg, bpf_info, 256) + + +def test_xdp_native_drop_mb(cfg): + """ + Tests the XDP_DROP action for a multi-buff case. + + Args: + cfg: Configuration object containing network settings. + """ + bpf_info = BPFProgInfo("xdp_prog_frags", "xdp_native.bpf.o", "xdp.frags", 9000) + + _test_drop(cfg, bpf_info, 8000) + + +def test_xdp_native_tx_mb(cfg): + """ + Tests the XDP_TX action for a multi-buff case. + + Args: + cfg: Configuration object containing network settings. + """ + cfg.require_cmd("socat", remote=True) + + bpf_info = BPFProgInfo("xdp_prog_frags", "xdp_native.bpf.o", "xdp.frags", 9000) + prog_info = _load_xdp_prog(cfg, bpf_info) + port = rand_port() + + _set_xdp_map("map_xdp_setup", TestConfig.MODE.value, XDPAction.TX.value) + _set_xdp_map("map_xdp_setup", TestConfig.PORT.value, port) + + test_string = ''.join(random.choice(string.ascii_lowercase) for _ in range(8000)) + rx_udp = f"socat -{cfg.addr_ipver} -T 2 -u UDP-RECV:{port},reuseport STDOUT" + tx_udp = f"echo {test_string} | socat -t 2 -u STDIN UDP:{cfg.baddr}:{port}" + + with bkg(rx_udp, host=cfg.remote, exit_wait=True) as rnc: + wait_port_listen(port, proto="udp", host=cfg.remote) + cmd(tx_udp, host=cfg.remote, shell=True) + + stats = _get_stats(prog_info['maps']['map_xdp_stats']) + + ksft_eq(rnc.stdout.strip(), test_string, "UDP packet exchange failed") + ksft_eq(stats[XDPStats.TX.value], 1, "TX stats mismatch") + + +def _validate_res(res, offset_lst, pkt_sz_lst): + """ + Validates the result of a test. + + Args: + res: The result of the test, which should be a dictionary with a "status" key. + + Raises: + KsftFailEx: If the test fails to pass any combination of offset and packet size. + """ + if "status" not in res: + raise KsftFailEx("Missing 'status' key in result dictionary") + + # Validate that not a single case was successful + if res["status"] == "fail": + if res["offset"] == offset_lst[0] and res["pkt_sz"] == pkt_sz_lst[0]: + raise KsftFailEx(f"{res['reason']}") + + # Get the previous offset and packet size to report the successful run + tmp_idx = offset_lst.index(res["offset"]) + prev_offset = offset_lst[tmp_idx - 1] + if tmp_idx == 0: + tmp_idx = pkt_sz_lst.index(res["pkt_sz"]) + prev_pkt_sz = pkt_sz_lst[tmp_idx - 1] + else: + prev_pkt_sz = res["pkt_sz"] + + # Use these values for error reporting + ksft_pr( + f"Failed run: pkt_sz {res['pkt_sz']}, offset {res['offset']}. " + f"Last successful run: pkt_sz {prev_pkt_sz}, offset {prev_offset}. " + f"Reason: {res['reason']}" + ) + + +def _check_for_failures(recvd_str, stats): + """ + Checks for common failures while adjusting headroom or tailroom. + + Args: + recvd_str: The string received from the remote host after sending a test string. + stats: A dictionary containing formatted packet statistics for various XDP actions. + + Returns: + str: A string describing the failure reason if a failure is detected, otherwise None. + """ + + # Any adjustment failure result in an abort hence, we track this counter + if stats[XDPStats.ABORT.value] != 0: + return "Adjustment failed" + + # Since we are using aggregate stats for a single test across all offsets and packet sizes + # we can't use RX stats only to track data exchange failure without taking a previous + # snapshot. An easier way is to simply check for non-zero length of received string. + if len(recvd_str) == 0: + return "Data exchange failed" + + # Check for RX and PASS stats mismatch. Ideally, they should be equal for a successful run + if stats[XDPStats.RX.value] != stats[XDPStats.PASS.value]: + return "RX stats mismatch" + + return None + + +def _test_xdp_native_tail_adjst(cfg, pkt_sz_lst, offset_lst): + """ + Tests the XDP tail adjustment functionality. + + This function loads the appropriate XDP program based on the provided + program name and configures the XDP map for tail adjustment. It then + validates the tail adjustment by sending and receiving UDP packets + with specified packet sizes and offsets. + + Args: + cfg: Configuration object containing network settings. + prog: Name of the XDP program to load. + pkt_sz_lst: List of packet sizes to test. + offset_lst: List of offsets to validate support for tail adjustment. + + Returns: + dict: A dictionary with test status and failure details if applicable. + """ + port = rand_port() + bpf_info = BPFProgInfo("xdp_prog_frags", "xdp_native.bpf.o", "xdp.frags", 9000) + + prog_info = _load_xdp_prog(cfg, bpf_info) + + # Configure the XDP map for tail adjustment + _set_xdp_map("map_xdp_setup", TestConfig.MODE.value, XDPAction.TAIL_ADJST.value) + _set_xdp_map("map_xdp_setup", TestConfig.PORT.value, port) + + for offset in offset_lst: + tag = format(random.randint(65, 90), "02x") + + _set_xdp_map("map_xdp_setup", TestConfig.ADJST_OFFSET.value, offset) + if offset > 0: + _set_xdp_map("map_xdp_setup", TestConfig.ADJST_TAG.value, int(tag, 16)) + + for pkt_sz in pkt_sz_lst: + test_str = "".join(random.choice(string.ascii_lowercase) for _ in range(pkt_sz)) + recvd_str = _exchg_udp(cfg, port, test_str) + stats = _get_stats(prog_info["maps"]["map_xdp_stats"]) + + failure = _check_for_failures(recvd_str, stats) + if failure is not None: + return { + "status": "fail", + "reason": failure, + "offset": offset, + "pkt_sz": pkt_sz, + } + + # Validate data content based on offset direction + expected_data = None + if offset > 0: + expected_data = test_str + (offset * chr(int(tag, 16))) + else: + expected_data = test_str[0:pkt_sz + offset] + + if recvd_str != expected_data: + return { + "status": "fail", + "reason": "Data mismatch", + "offset": offset, + "pkt_sz": pkt_sz, + } + + return {"status": "pass"} + + +def test_xdp_native_adjst_tail_grow_data(cfg): + """ + Tests the XDP tail adjustment by growing packet data. + + Args: + cfg: Configuration object containing network settings. + """ + pkt_sz_lst = [512, 1024, 2048] + offset_lst = [1, 16, 32, 64, 128, 256] + res = _test_xdp_native_tail_adjst( + cfg, + pkt_sz_lst, + offset_lst, + ) + + _validate_res(res, offset_lst, pkt_sz_lst) + + +def test_xdp_native_adjst_tail_shrnk_data(cfg): + """ + Tests the XDP tail adjustment by shrinking packet data. + + Args: + cfg: Configuration object containing network settings. + """ + pkt_sz_lst = [512, 1024, 2048] + offset_lst = [-16, -32, -64, -128, -256] + res = _test_xdp_native_tail_adjst( + cfg, + pkt_sz_lst, + offset_lst, + ) + + _validate_res(res, offset_lst, pkt_sz_lst) + + +def get_hds_thresh(cfg): + """ + Retrieves the header data split (HDS) threshold for a network interface. + + Args: + cfg: Configuration object containing network settings. + + Returns: + The HDS threshold value. If the threshold is not supported or an error occurs, + a default value of 1500 is returned. + """ + netnl = cfg.netnl + hds_thresh = 1500 + + try: + rings = netnl.rings_get({'header': {'dev-index': cfg.ifindex}}) + if 'hds-thresh' not in rings: + ksft_pr(f'hds-thresh not supported. Using default: {hds_thresh}') + return hds_thresh + hds_thresh = rings['hds-thresh'] + except NlError as e: + ksft_pr(f"Failed to get rings: {e}. Using default: {hds_thresh}") + + return hds_thresh + + +def _test_xdp_native_head_adjst(cfg, prog, pkt_sz_lst, offset_lst): + """ + Tests the XDP head adjustment action for a multi-buffer case. + + Args: + cfg: Configuration object containing network settings. + netnl: Network namespace or link object (not used in this function). + + This function sets up the packet size and offset lists, then performs + the head adjustment test by sending and receiving UDP packets. + """ + cfg.require_cmd("socat", remote=True) + + prog_info = _load_xdp_prog(cfg, BPFProgInfo(prog, "xdp_native.bpf.o", "xdp.frags", 9000)) + port = rand_port() + + _set_xdp_map("map_xdp_setup", TestConfig.MODE.value, XDPAction.HEAD_ADJST.value) + _set_xdp_map("map_xdp_setup", TestConfig.PORT.value, port) + + hds_thresh = get_hds_thresh(cfg) + for offset in offset_lst: + for pkt_sz in pkt_sz_lst: + # The "head" buffer must contain at least the Ethernet header + # after we eat into it. We send large-enough packets, but if HDS + # is enabled head will only contain headers. Don't try to eat + # more than 28 bytes (UDPv4 + eth hdr left: (14 + 20 + 8) - 14) + l2_cut_off = 28 if cfg.addr_ipver == 4 else 48 + if pkt_sz > hds_thresh and offset > l2_cut_off: + ksft_pr( + f"Failed run: pkt_sz ({pkt_sz}) > HDS threshold ({hds_thresh}) and " + f"offset {offset} > {l2_cut_off}" + ) + return {"status": "pass"} + + test_str = ''.join(random.choice(string.ascii_lowercase) for _ in range(pkt_sz)) + tag = format(random.randint(65, 90), '02x') + + _set_xdp_map("map_xdp_setup", + TestConfig.ADJST_OFFSET.value, + offset) + _set_xdp_map("map_xdp_setup", TestConfig.ADJST_TAG.value, int(tag, 16)) + _set_xdp_map("map_xdp_setup", TestConfig.ADJST_OFFSET.value, offset) + + recvd_str = _exchg_udp(cfg, port, test_str) + + # Check for failures around adjustment and data exchange + failure = _check_for_failures(recvd_str, _get_stats(prog_info['maps']['map_xdp_stats'])) + if failure is not None: + return { + "status": "fail", + "reason": failure, + "offset": offset, + "pkt_sz": pkt_sz + } + + # Validate data content based on offset direction + expected_data = None + if offset < 0: + expected_data = chr(int(tag, 16)) * (0 - offset) + test_str + else: + expected_data = test_str[offset:] + + if recvd_str != expected_data: + return { + "status": "fail", + "reason": "Data mismatch", + "offset": offset, + "pkt_sz": pkt_sz + } + + return {"status": "pass"} + + +def test_xdp_native_adjst_head_grow_data(cfg): + """ + Tests the XDP headroom growth support. + + Args: + cfg: Configuration object containing network settings. + + This function sets up the packet size and offset lists, then calls the + _test_xdp_native_head_adjst_mb function to perform the actual test. The + test is passed if the headroom is successfully extended for given packet + sizes and offsets. + """ + pkt_sz_lst = [512, 1024, 2048] + + # Negative values result in headroom shrinking, resulting in growing of payload + offset_lst = [-16, -32, -64, -128, -256] + res = _test_xdp_native_head_adjst(cfg, "xdp_prog_frags", pkt_sz_lst, offset_lst) + + _validate_res(res, offset_lst, pkt_sz_lst) + + +def test_xdp_native_adjst_head_shrnk_data(cfg): + """ + Tests the XDP headroom shrinking support. + + Args: + cfg: Configuration object containing network settings. + + This function sets up the packet size and offset lists, then calls the + _test_xdp_native_head_adjst_mb function to perform the actual test. The + test is passed if the headroom is successfully shrunk for given packet + sizes and offsets. + """ + pkt_sz_lst = [512, 1024, 2048] + + # Positive values result in headroom growing, resulting in shrinking of payload + offset_lst = [16, 32, 64, 128, 256] + res = _test_xdp_native_head_adjst(cfg, "xdp_prog_frags", pkt_sz_lst, offset_lst) + + _validate_res(res, offset_lst, pkt_sz_lst) + + +def main(): + """ + Main function to execute the XDP tests. + + This function runs a series of tests to validate the XDP support for + both the single and multi-buffer. It uses the NetDrvEpEnv context + manager to manage the network driver environment and the ksft_run + function to execute the tests. + """ + with NetDrvEpEnv(__file__) as cfg: + cfg.netnl = EthtoolFamily() + ksft_run( + [ + test_xdp_native_pass_sb, + test_xdp_native_pass_mb, + test_xdp_native_drop_sb, + test_xdp_native_drop_mb, + test_xdp_native_tx_mb, + test_xdp_native_adjst_tail_grow_data, + test_xdp_native_adjst_tail_shrnk_data, + test_xdp_native_adjst_head_grow_data, + test_xdp_native_adjst_head_shrnk_data, + ], + args=(cfg,)) + ksft_exit() + + +if __name__ == "__main__": + main() diff --git a/tools/testing/selftests/filesystems/.gitignore b/tools/testing/selftests/filesystems/.gitignore index 7afa58e2bb20..fcbdb1297e24 100644 --- a/tools/testing/selftests/filesystems/.gitignore +++ b/tools/testing/selftests/filesystems/.gitignore @@ -3,3 +3,4 @@ dnotify_test devpts_pts file_stressor anon_inode_test +kernfs_test diff --git a/tools/testing/selftests/filesystems/Makefile b/tools/testing/selftests/filesystems/Makefile index b02326193fee..73d4650af1a5 100644 --- a/tools/testing/selftests/filesystems/Makefile +++ b/tools/testing/selftests/filesystems/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 CFLAGS += $(KHDR_INCLUDES) -TEST_GEN_PROGS := devpts_pts file_stressor anon_inode_test +TEST_GEN_PROGS := devpts_pts file_stressor anon_inode_test kernfs_test TEST_GEN_PROGS_EXTENDED := dnotify_test include ../lib.mk diff --git a/tools/testing/selftests/filesystems/kernfs_test.c b/tools/testing/selftests/filesystems/kernfs_test.c new file mode 100644 index 000000000000..16538b3b318e --- /dev/null +++ b/tools/testing/selftests/filesystems/kernfs_test.c @@ -0,0 +1,38 @@ +// SPDX-License-Identifier: GPL-2.0 +#define _GNU_SOURCE +#define __SANE_USERSPACE_TYPES__ + +#include <fcntl.h> +#include <stdio.h> +#include <sys/stat.h> +#include <sys/xattr.h> + +#include "../kselftest_harness.h" +#include "wrappers.h" + +TEST(kernfs_listxattr) +{ + int fd; + + /* Read-only file that can never have any extended attributes set. */ + fd = open("/sys/kernel/warn_count", O_RDONLY | O_CLOEXEC); + ASSERT_GE(fd, 0); + ASSERT_EQ(flistxattr(fd, NULL, 0), 0); + EXPECT_EQ(close(fd), 0); +} + +TEST(kernfs_getxattr) +{ + int fd; + char buf[1]; + + /* Read-only file that can never have any extended attributes set. */ + fd = open("/sys/kernel/warn_count", O_RDONLY | O_CLOEXEC); + ASSERT_GE(fd, 0); + ASSERT_LT(fgetxattr(fd, "user.foo", buf, sizeof(buf)), 0); + ASSERT_EQ(errno, ENODATA); + EXPECT_EQ(close(fd), 0); +} + +TEST_HARNESS_MAIN + diff --git a/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_fprobe.tc b/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_fprobe.tc index 73f6c6fcecab..2506f464811b 100644 --- a/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_fprobe.tc +++ b/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_fprobe.tc @@ -16,6 +16,18 @@ ocnt=`cat enabled_functions | wc -l` echo "f:myevent1 $PLACE" >> dynamic_events +echo "f:myevent2 $PLACE%return" >> dynamic_events + +# add another event +echo "f:myevent3 $PLACE2" >> dynamic_events + +grep -q myevent1 dynamic_events +grep -q myevent2 dynamic_events +grep -q myevent3 dynamic_events +test -d events/fprobes/myevent1 +test -d events/fprobes/myevent2 + +echo 1 > events/fprobes/myevent1/enable # Make sure the event is attached and is the only one grep -q $PLACE enabled_functions cnt=`cat enabled_functions | wc -l` @@ -23,29 +35,22 @@ if [ $cnt -ne $((ocnt + 1)) ]; then exit_fail fi -echo "f:myevent2 $PLACE%return" >> dynamic_events - +echo 1 > events/fprobes/myevent2/enable # It should till be the only attached function cnt=`cat enabled_functions | wc -l` if [ $cnt -ne $((ocnt + 1)) ]; then exit_fail fi -# add another event -echo "f:myevent3 $PLACE2" >> dynamic_events - +echo 1 > events/fprobes/myevent3/enable +# If the function is different, the attached function should be increased grep -q $PLACE2 enabled_functions cnt=`cat enabled_functions | wc -l` if [ $cnt -ne $((ocnt + 2)) ]; then exit_fail fi -grep -q myevent1 dynamic_events -grep -q myevent2 dynamic_events -grep -q myevent3 dynamic_events -test -d events/fprobes/myevent1 -test -d events/fprobes/myevent2 - +echo 0 > events/fprobes/myevent2/enable echo "-:myevent2" >> dynamic_events grep -q myevent1 dynamic_events @@ -57,6 +62,7 @@ if [ $cnt -ne $((ocnt + 2)) ]; then exit_fail fi +echo 0 > events/fprobes/enable echo > dynamic_events # Should have none left @@ -67,12 +73,14 @@ fi echo "f:myevent4 $PLACE" >> dynamic_events +echo 1 > events/fprobes/myevent4/enable # Should only have one enabled cnt=`cat enabled_functions | wc -l` if [ $cnt -ne $((ocnt + 1)) ]; then exit_fail fi +echo 0 > events/fprobes/enable echo > dynamic_events # Should have none left diff --git a/tools/testing/selftests/ftrace/test.d/event/subsystem-enable.tc b/tools/testing/selftests/ftrace/test.d/event/subsystem-enable.tc index b7c8f29c09a9..65916bb55dfb 100644 --- a/tools/testing/selftests/ftrace/test.d/event/subsystem-enable.tc +++ b/tools/testing/selftests/ftrace/test.d/event/subsystem-enable.tc @@ -14,11 +14,35 @@ fail() { #msg exit_fail } +# As reading trace can last forever, simply look for 3 different +# events then exit out of reading the file. If there's not 3 different +# events, then the test has failed. +check_unique() { + cat trace | grep -v '^#' | awk ' + BEGIN { cnt = 0; } + { + for (i = 0; i < cnt; i++) { + if (event[i] == $5) { + break; + } + } + if (i == cnt) { + event[cnt++] = $5; + if (cnt > 2) { + exit; + } + } + } + END { + printf "%d", cnt; + }' +} + echo 'sched:*' > set_event yield -count=`head -n 100 trace | grep -v ^# | awk '{ print $5 }' | sort -u | wc -l` +count=`check_unique` if [ $count -lt 3 ]; then fail "at least fork, exec and exit events should be recorded" fi @@ -29,7 +53,7 @@ echo 1 > events/sched/enable yield -count=`head -n 100 trace | grep -v ^# | awk '{ print $5 }' | sort -u | wc -l` +count=`check_unique` if [ $count -lt 3 ]; then fail "at least fork, exec and exit events should be recorded" fi diff --git a/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-glob.tc b/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-glob.tc index 4b994b6df5ac..ed81eaf2afd6 100644 --- a/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-glob.tc +++ b/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-glob.tc @@ -29,7 +29,7 @@ ftrace_filter_check 'schedule*' '^schedule.*$' ftrace_filter_check '*pin*lock' '.*pin.*lock$' # filter by start*mid* -ftrace_filter_check 'mutex*try*' '^mutex.*try.*' +ftrace_filter_check 'mutex*unl*' '^mutex.*unl.*' # Advanced full-glob matching feature is recently supported. # Skip the tests if we are sure the kernel does not support it. diff --git a/tools/testing/selftests/futex/functional/.gitignore b/tools/testing/selftests/futex/functional/.gitignore index 7b24ae89594a..776ad658f75e 100644 --- a/tools/testing/selftests/futex/functional/.gitignore +++ b/tools/testing/selftests/futex/functional/.gitignore @@ -11,3 +11,4 @@ futex_wait_timeout futex_wait_uninitialized_heap futex_wait_wouldblock futex_waitv +futex_numa diff --git a/tools/testing/selftests/futex/functional/futex_numa_mpol.c b/tools/testing/selftests/futex/functional/futex_numa_mpol.c index 20a9d3ecf743..a9ecfb2d3932 100644 --- a/tools/testing/selftests/futex/functional/futex_numa_mpol.c +++ b/tools/testing/selftests/futex/functional/futex_numa_mpol.c @@ -144,7 +144,7 @@ int main(int argc, char *argv[]) struct futex32_numa *futex_numa; int mem_size, i; void *futex_ptr; - char c; + int c; while ((c = getopt(argc, argv, "chv:")) != -1) { switch (c) { @@ -210,6 +210,10 @@ int main(int argc, char *argv[]) ret = mbind(futex_ptr, mem_size, MPOL_BIND, &nodemask, sizeof(nodemask) * 8, 0); if (ret == 0) { + ret = numa_set_mempolicy_home_node(futex_ptr, mem_size, i, 0); + if (ret != 0) + ksft_exit_fail_msg("Failed to set home node: %m, %d\n", errno); + ksft_print_msg("Node %d test\n", i); futex_numa->futex = 0; futex_numa->numa = FUTEX_NO_NODE; @@ -220,8 +224,8 @@ int main(int argc, char *argv[]) if (0) test_futex_mpol(futex_numa, 0); if (futex_numa->numa != i) { - ksft_test_result_fail("Returned NUMA node is %d expected %d\n", - futex_numa->numa, i); + ksft_exit_fail_msg("Returned NUMA node is %d expected %d\n", + futex_numa->numa, i); } } } diff --git a/tools/testing/selftests/futex/functional/futex_priv_hash.c b/tools/testing/selftests/futex/functional/futex_priv_hash.c index 2dca18fefedc..aea001ac4946 100644 --- a/tools/testing/selftests/futex/functional/futex_priv_hash.c +++ b/tools/testing/selftests/futex/functional/futex_priv_hash.c @@ -26,14 +26,12 @@ static int counter; #ifndef PR_FUTEX_HASH #define PR_FUTEX_HASH 78 # define PR_FUTEX_HASH_SET_SLOTS 1 -# define FH_FLAG_IMMUTABLE (1ULL << 0) # define PR_FUTEX_HASH_GET_SLOTS 2 -# define PR_FUTEX_HASH_GET_IMMUTABLE 3 #endif -static int futex_hash_slots_set(unsigned int slots, int flags) +static int futex_hash_slots_set(unsigned int slots) { - return prctl(PR_FUTEX_HASH, PR_FUTEX_HASH_SET_SLOTS, slots, flags); + return prctl(PR_FUTEX_HASH, PR_FUTEX_HASH_SET_SLOTS, slots, 0); } static int futex_hash_slots_get(void) @@ -41,16 +39,11 @@ static int futex_hash_slots_get(void) return prctl(PR_FUTEX_HASH, PR_FUTEX_HASH_GET_SLOTS); } -static int futex_hash_immutable_get(void) -{ - return prctl(PR_FUTEX_HASH, PR_FUTEX_HASH_GET_IMMUTABLE); -} - static void futex_hash_slots_set_verify(int slots) { int ret; - ret = futex_hash_slots_set(slots, 0); + ret = futex_hash_slots_set(slots); if (ret != 0) { ksft_test_result_fail("Failed to set slots to %d: %m\n", slots); ksft_finished(); @@ -64,13 +57,13 @@ static void futex_hash_slots_set_verify(int slots) ksft_test_result_pass("SET and GET slots %d passed\n", slots); } -static void futex_hash_slots_set_must_fail(int slots, int flags) +static void futex_hash_slots_set_must_fail(int slots) { int ret; - ret = futex_hash_slots_set(slots, flags); - ksft_test_result(ret < 0, "futex_hash_slots_set(%d, %d)\n", - slots, flags); + ret = futex_hash_slots_set(slots); + ksft_test_result(ret < 0, "futex_hash_slots_set(%d)\n", + slots); } static void *thread_return_fn(void *arg) @@ -111,6 +104,30 @@ static void join_max_threads(void) } } +#define SEC_IN_NSEC 1000000000 +#define MSEC_IN_NSEC 1000000 + +static void futex_dummy_op(void) +{ + pthread_mutex_t lock = PTHREAD_MUTEX_INITIALIZER; + struct timespec timeout; + int ret; + + pthread_mutex_lock(&lock); + clock_gettime(CLOCK_REALTIME, &timeout); + timeout.tv_nsec += 100 * MSEC_IN_NSEC; + if (timeout.tv_nsec >= SEC_IN_NSEC) { + timeout.tv_nsec -= SEC_IN_NSEC; + timeout.tv_sec++; + } + ret = pthread_mutex_timedlock(&lock, &timeout); + if (ret == 0) + ksft_exit_fail_msg("Successfully locked an already locked mutex.\n"); + + if (ret != ETIMEDOUT) + ksft_exit_fail_msg("pthread_mutex_timedlock() did not timeout: %d.\n", ret); +} + static void usage(char *prog) { printf("Usage: %s\n", prog); @@ -128,18 +145,14 @@ int main(int argc, char *argv[]) { int futex_slots1, futex_slotsn, online_cpus; pthread_mutexattr_t mutex_attr_pi; - int use_global_hash = 0; - int ret; - char c; + int ret, retry = 20; + int c; - while ((c = getopt(argc, argv, "cghv:")) != -1) { + while ((c = getopt(argc, argv, "chv:")) != -1) { switch (c) { case 'c': log_color(1); break; - case 'g': - use_global_hash = 1; - break; case 'h': usage(basename(argv[0])); exit(0); @@ -154,7 +167,7 @@ int main(int argc, char *argv[]) } ksft_print_header(); - ksft_set_plan(22); + ksft_set_plan(21); ret = pthread_mutexattr_init(&mutex_attr_pi); ret |= pthread_mutexattr_setprotocol(&mutex_attr_pi, PTHREAD_PRIO_INHERIT); @@ -167,10 +180,6 @@ int main(int argc, char *argv[]) if (ret != 0) ksft_exit_fail_msg("futex_hash_slots_get() failed: %d, %m\n", ret); - ret = futex_hash_immutable_get(); - if (ret != 0) - ksft_exit_fail_msg("futex_hash_immutable_get() failed: %d, %m\n", ret); - ksft_test_result_pass("Basic get slots and immutable status.\n"); ret = pthread_create(&threads[0], NULL, thread_return_fn, NULL); if (ret != 0) @@ -208,8 +217,24 @@ int main(int argc, char *argv[]) */ ksft_print_msg("Online CPUs: %d\n", online_cpus); if (online_cpus > 16) { +retry_getslots: futex_slotsn = futex_hash_slots_get(); if (futex_slotsn < 0 || futex_slots1 == futex_slotsn) { + retry--; + /* + * Auto scaling on thread creation can be slightly delayed + * because it waits for a RCU grace period twice. The new + * private hash is assigned upon the first futex operation + * after grace period. + * To cover all this for testing purposes the function + * below will acquire a lock and acquire it again with a + * 100ms timeout which must timeout. This ensures we + * sleep for 100ms and issue a futex operation. + */ + if (retry > 0) { + futex_dummy_op(); + goto retry_getslots; + } ksft_print_msg("Expected increase of hash buckets but got: %d -> %d\n", futex_slots1, futex_slotsn); ksft_exit_fail_msg(test_msg_auto_inc); @@ -227,7 +252,7 @@ int main(int argc, char *argv[]) futex_hash_slots_set_verify(32); futex_hash_slots_set_verify(16); - ret = futex_hash_slots_set(15, 0); + ret = futex_hash_slots_set(15); ksft_test_result(ret < 0, "Use 15 slots\n"); futex_hash_slots_set_verify(2); @@ -245,28 +270,23 @@ int main(int argc, char *argv[]) ksft_test_result(ret == 2, "No more auto-resize after manaul setting, got %d\n", ret); - futex_hash_slots_set_must_fail(1 << 29, 0); + futex_hash_slots_set_must_fail(1 << 29); + futex_hash_slots_set_verify(4); /* - * Once the private hash has been made immutable or global hash has been requested, - * then this requested can not be undone. + * Once the global hash has been requested, then this requested can not + * be undone. */ - if (use_global_hash) { - ret = futex_hash_slots_set(0, 0); - ksft_test_result(ret == 0, "Global hash request\n"); - } else { - ret = futex_hash_slots_set(4, FH_FLAG_IMMUTABLE); - ksft_test_result(ret == 0, "Immutable resize to 4\n"); - } + ret = futex_hash_slots_set(0); + ksft_test_result(ret == 0, "Global hash request\n"); if (ret != 0) goto out; - futex_hash_slots_set_must_fail(4, 0); - futex_hash_slots_set_must_fail(4, FH_FLAG_IMMUTABLE); - futex_hash_slots_set_must_fail(8, 0); - futex_hash_slots_set_must_fail(8, FH_FLAG_IMMUTABLE); - futex_hash_slots_set_must_fail(0, FH_FLAG_IMMUTABLE); - futex_hash_slots_set_must_fail(6, FH_FLAG_IMMUTABLE); + futex_hash_slots_set_must_fail(4); + futex_hash_slots_set_must_fail(8); + futex_hash_slots_set_must_fail(8); + futex_hash_slots_set_must_fail(0); + futex_hash_slots_set_must_fail(6); ret = pthread_barrier_init(&barrier_main, NULL, MAX_THREADS); if (ret != 0) { @@ -277,14 +297,7 @@ int main(int argc, char *argv[]) join_max_threads(); ret = futex_hash_slots_get(); - if (use_global_hash) { - ksft_test_result(ret == 0, "Continue to use global hash\n"); - } else { - ksft_test_result(ret == 4, "Continue to use the 4 hash buckets\n"); - } - - ret = futex_hash_immutable_get(); - ksft_test_result(ret == 1, "Hash reports to be immutable\n"); + ksft_test_result(ret == 0, "Continue to use global hash\n"); out: ksft_finished(); diff --git a/tools/testing/selftests/futex/include/futex2test.h b/tools/testing/selftests/futex/include/futex2test.h index ea79662405bc..1f625b39948a 100644 --- a/tools/testing/selftests/futex/include/futex2test.h +++ b/tools/testing/selftests/futex/include/futex2test.h @@ -4,6 +4,7 @@ * * Copyright 2021 Collabora Ltd. */ +#include <linux/time_types.h> #include <stdint.h> #define u64_to_ptr(x) ((void *)(uintptr_t)(x)) @@ -65,7 +66,12 @@ struct futex32_numa { static inline int futex_waitv(volatile struct futex_waitv *waiters, unsigned long nr_waiters, unsigned long flags, struct timespec *timo, clockid_t clockid) { - return syscall(__NR_futex_waitv, waiters, nr_waiters, flags, timo, clockid); + struct __kernel_timespec ts = { + .tv_sec = timo->tv_sec, + .tv_nsec = timo->tv_nsec, + }; + + return syscall(__NR_futex_waitv, waiters, nr_waiters, flags, &ts, clockid); } /* diff --git a/tools/testing/selftests/futex/include/futextest.h b/tools/testing/selftests/futex/include/futextest.h index ddbcfc9b7bac..7a5fd1d5355e 100644 --- a/tools/testing/selftests/futex/include/futextest.h +++ b/tools/testing/selftests/futex/include/futextest.h @@ -47,6 +47,17 @@ typedef volatile u_int32_t futex_t; FUTEX_PRIVATE_FLAG) #endif +/* + * SYS_futex is expected from system C library, in glibc some 32-bit + * architectures (e.g. RV32) are using 64-bit time_t, therefore it doesn't have + * SYS_futex defined but just SYS_futex_time64. Define SYS_futex as + * SYS_futex_time64 in this situation to ensure the compilation and the + * compatibility. + */ +#if !defined(SYS_futex) && defined(SYS_futex_time64) +#define SYS_futex SYS_futex_time64 +#endif + /** * futex() - SYS_futex syscall wrapper * @uaddr: address of first futex diff --git a/tools/testing/selftests/hid/config.common b/tools/testing/selftests/hid/config.common index b1f40857307d..38c51158adf8 100644 --- a/tools/testing/selftests/hid/config.common +++ b/tools/testing/selftests/hid/config.common @@ -135,6 +135,7 @@ CONFIG_NET_EMATCH=y CONFIG_NETFILTER_NETLINK_LOG=y CONFIG_NETFILTER_NETLINK_QUEUE=y CONFIG_NETFILTER_XTABLES=y +CONFIG_NETFILTER_XTABLES_LEGACY=y CONFIG_NETFILTER_XT_MATCH_ADDRTYPE=y CONFIG_NETFILTER_XT_MATCH_BPF=y CONFIG_NETFILTER_XT_MATCH_COMMENT=y diff --git a/tools/testing/selftests/hid/tests/test_mouse.py b/tools/testing/selftests/hid/tests/test_mouse.py index 66daf7e5975c..eb4e15a0e53b 100644 --- a/tools/testing/selftests/hid/tests/test_mouse.py +++ b/tools/testing/selftests/hid/tests/test_mouse.py @@ -439,6 +439,68 @@ class BadResolutionMultiplierMouse(ResolutionMultiplierMouse): return 32 # EPIPE +class BadReportDescriptorMouse(BaseMouse): + """ + This "device" was one autogenerated by syzbot. There are a lot of issues in + it, and the most problematic is that it declares features that have no + size. + + This leads to report->size being set to 0 and can mess up with usbhid + internals. Fortunately, uhid merely passes the incoming buffer, without + touching it so a buffer of size 0 will be translated to [] without + triggering a kernel oops. + + Because the report descriptor is wrong, no input are created, and we need + to tweak a little bit the parameters to make it look correct. + """ + + # fmt: off + report_descriptor = [ + 0x96, 0x01, 0x00, # Report Count (1) 0 + 0x06, 0x01, 0x00, # Usage Page (Generic Desktop) 3 + # 0x03, 0x00, 0x00, 0x00, 0x00, # Ignored by the kernel somehow + 0x2a, 0x90, 0xa0, # Usage Maximum (41104) 6 + 0x27, 0x00, 0x00, 0x00, 0x00, # Logical Maximum (0) 9 + 0xb3, 0x81, 0x3e, 0x25, 0x03, # Feature (Cnst,Arr,Abs,Vol) 14 + 0x1b, 0xdd, 0xe8, 0x40, 0x50, # Usage Minimum (1346431197) 19 + 0x3b, 0x5d, 0x8c, 0x3d, 0xda, # Designator Index 24 + ] + # fmt: on + + def __init__( + self, rdesc=report_descriptor, name=None, input_info=(3, 0x045E, 0x07DA) + ): + super().__init__(rdesc, name, input_info) + self.high_resolution_report_called = False + + def get_evdev(self, application=None): + assert self._input_nodes is None + return ( + "Ok" # should be a list or None, but both would fail, so abusing the system + ) + + def next_sync_events(self, application=None): + # there are no evdev nodes, so no events + return [] + + def is_ready(self): + # we wait for the SET_REPORT command to come + return self.high_resolution_report_called + + def set_report(self, req, rnum, rtype, data): + if rtype != self.UHID_FEATURE_REPORT: + raise InvalidHIDCommunication(f"Unexpected report type: {rtype}") + if rnum != 0x0: + raise InvalidHIDCommunication(f"Unexpected report number: {rnum}") + + if len(data) != 1: + raise InvalidHIDCommunication(f"Unexpected data: {data}, expected '[0]'") + + self.high_resolution_report_called = True + + return 0 + + class ResolutionMultiplierHWheelMouse(TwoWheelMouse): # fmt: off report_descriptor = [ @@ -975,3 +1037,11 @@ class TestMiMouse(TestWheelMouse): # assert below print out the real error pass assert remaining == [] + + +class TestBadReportDescriptorMouse(base.BaseTestCase.TestUhid): + def create_device(self): + return BadReportDescriptorMouse() + + def assertName(self, uhdev): + pass diff --git a/tools/testing/selftests/iommu/iommufd.c b/tools/testing/selftests/iommu/iommufd.c index 1a8e85afe9aa..1926ef6b40ab 100644 --- a/tools/testing/selftests/iommu/iommufd.c +++ b/tools/testing/selftests/iommu/iommufd.c @@ -54,6 +54,8 @@ static __attribute__((constructor)) void setup_sizes(void) mfd_buffer = memfd_mmap(BUFFER_SIZE, PROT_READ | PROT_WRITE, MAP_SHARED, &mfd); + assert(mfd_buffer != MAP_FAILED); + assert(mfd > 0); } FIXTURE(iommufd) @@ -1746,13 +1748,15 @@ TEST_F(iommufd_mock_domain, all_aligns) unsigned int end; uint8_t *buf; int prot = PROT_READ | PROT_WRITE; - int mfd; + int mfd = -1; if (variant->file) buf = memfd_mmap(buf_size, prot, MAP_SHARED, &mfd); else buf = mmap(0, buf_size, prot, self->mmap_flags, -1, 0); ASSERT_NE(MAP_FAILED, buf); + if (variant->file) + ASSERT_GT(mfd, 0); check_refs(buf, buf_size, 0); /* @@ -1798,13 +1802,15 @@ TEST_F(iommufd_mock_domain, all_aligns_copy) unsigned int end; uint8_t *buf; int prot = PROT_READ | PROT_WRITE; - int mfd; + int mfd = -1; if (variant->file) buf = memfd_mmap(buf_size, prot, MAP_SHARED, &mfd); else buf = mmap(0, buf_size, prot, self->mmap_flags, -1, 0); ASSERT_NE(MAP_FAILED, buf); + if (variant->file) + ASSERT_GT(mfd, 0); check_refs(buf, buf_size, 0); /* @@ -2008,6 +2014,7 @@ FIXTURE_VARIANT(iommufd_dirty_tracking) FIXTURE_SETUP(iommufd_dirty_tracking) { + size_t mmap_buffer_size; unsigned long size; int mmap_flags; void *vrc; @@ -2022,22 +2029,33 @@ FIXTURE_SETUP(iommufd_dirty_tracking) self->fd = open("/dev/iommu", O_RDWR); ASSERT_NE(-1, self->fd); - rc = posix_memalign(&self->buffer, HUGEPAGE_SIZE, variant->buffer_size); - if (rc || !self->buffer) { - SKIP(return, "Skipping buffer_size=%lu due to errno=%d", - variant->buffer_size, rc); - } - mmap_flags = MAP_SHARED | MAP_ANONYMOUS | MAP_FIXED; + mmap_buffer_size = variant->buffer_size; if (variant->hugepages) { /* * MAP_POPULATE will cause the kernel to fail mmap if THPs are * not available. */ mmap_flags |= MAP_HUGETLB | MAP_POPULATE; + + /* + * Allocation must be aligned to the HUGEPAGE_SIZE, because the + * following mmap() will automatically align the length to be a + * multiple of the underlying huge page size. Failing to do the + * same at this allocation will result in a memory overwrite by + * the mmap(). + */ + if (mmap_buffer_size < HUGEPAGE_SIZE) + mmap_buffer_size = HUGEPAGE_SIZE; + } + + rc = posix_memalign(&self->buffer, HUGEPAGE_SIZE, mmap_buffer_size); + if (rc || !self->buffer) { + SKIP(return, "Skipping buffer_size=%lu due to errno=%d", + mmap_buffer_size, rc); } assert((uintptr_t)self->buffer % HUGEPAGE_SIZE == 0); - vrc = mmap(self->buffer, variant->buffer_size, PROT_READ | PROT_WRITE, + vrc = mmap(self->buffer, mmap_buffer_size, PROT_READ | PROT_WRITE, mmap_flags, -1, 0); assert(vrc == self->buffer); @@ -2066,8 +2084,8 @@ FIXTURE_SETUP(iommufd_dirty_tracking) FIXTURE_TEARDOWN(iommufd_dirty_tracking) { - munmap(self->buffer, variant->buffer_size); - munmap(self->bitmap, DIV_ROUND_UP(self->bitmap_size, BITS_PER_BYTE)); + free(self->buffer); + free(self->bitmap); teardown_iommufd(self->fd, _metadata); } diff --git a/tools/testing/selftests/iommu/iommufd_utils.h b/tools/testing/selftests/iommu/iommufd_utils.h index 72f6636e5d90..6e967b58acfd 100644 --- a/tools/testing/selftests/iommu/iommufd_utils.h +++ b/tools/testing/selftests/iommu/iommufd_utils.h @@ -60,13 +60,18 @@ static inline void *memfd_mmap(size_t length, int prot, int flags, int *mfd_p) { int mfd_flags = (flags & MAP_HUGETLB) ? MFD_HUGETLB : 0; int mfd = memfd_create("buffer", mfd_flags); + void *buf = MAP_FAILED; if (mfd <= 0) return MAP_FAILED; if (ftruncate(mfd, length)) - return MAP_FAILED; + goto out; *mfd_p = mfd; - return mmap(0, length, prot, flags, mfd, 0); + buf = mmap(0, length, prot, flags, mfd, 0); +out: + if (buf == MAP_FAILED) + close(mfd); + return buf; } /* diff --git a/tools/testing/selftests/ipc/msgque.c b/tools/testing/selftests/ipc/msgque.c index e9dbb84c100a..5e36aeeb9901 100644 --- a/tools/testing/selftests/ipc/msgque.c +++ b/tools/testing/selftests/ipc/msgque.c @@ -39,26 +39,26 @@ int restore_queue(struct msgque_data *msgque) fd = open("/proc/sys/kernel/msg_next_id", O_WRONLY); if (fd == -1) { - printf("Failed to open /proc/sys/kernel/msg_next_id\n"); + ksft_test_result_fail("Failed to open /proc/sys/kernel/msg_next_id\n"); return -errno; } sprintf(buf, "%d", msgque->msq_id); ret = write(fd, buf, strlen(buf)); if (ret != strlen(buf)) { - printf("Failed to write to /proc/sys/kernel/msg_next_id\n"); + ksft_test_result_fail("Failed to write to /proc/sys/kernel/msg_next_id\n"); return -errno; } id = msgget(msgque->key, msgque->mode | IPC_CREAT | IPC_EXCL); if (id == -1) { - printf("Failed to create queue\n"); + ksft_test_result_fail("Failed to create queue\n"); return -errno; } if (id != msgque->msq_id) { - printf("Restored queue has wrong id (%d instead of %d)\n", - id, msgque->msq_id); + ksft_test_result_fail("Restored queue has wrong id (%d instead of %d)\n" + , id, msgque->msq_id); ret = -EFAULT; goto destroy; } @@ -66,7 +66,7 @@ int restore_queue(struct msgque_data *msgque) for (i = 0; i < msgque->qnum; i++) { if (msgsnd(msgque->msq_id, &msgque->messages[i].mtype, msgque->messages[i].msize, IPC_NOWAIT) != 0) { - printf("msgsnd failed (%m)\n"); + ksft_test_result_fail("msgsnd failed (%m)\n"); ret = -errno; goto destroy; } @@ -90,23 +90,22 @@ int check_and_destroy_queue(struct msgque_data *msgque) if (ret < 0) { if (errno == ENOMSG) break; - printf("Failed to read IPC message: %m\n"); + ksft_test_result_fail("Failed to read IPC message: %m\n"); ret = -errno; goto err; } if (ret != msgque->messages[cnt].msize) { - printf("Wrong message size: %d (expected %d)\n", ret, - msgque->messages[cnt].msize); + ksft_test_result_fail("Wrong message size: %d (expected %d)\n", ret, msgque->messages[cnt].msize); ret = -EINVAL; goto err; } if (message.mtype != msgque->messages[cnt].mtype) { - printf("Wrong message type\n"); + ksft_test_result_fail("Wrong message type\n"); ret = -EINVAL; goto err; } if (memcmp(message.mtext, msgque->messages[cnt].mtext, ret)) { - printf("Wrong message content\n"); + ksft_test_result_fail("Wrong message content\n"); ret = -EINVAL; goto err; } @@ -114,7 +113,7 @@ int check_and_destroy_queue(struct msgque_data *msgque) } if (cnt != msgque->qnum) { - printf("Wrong message number\n"); + ksft_test_result_fail("Wrong message number\n"); ret = -EINVAL; goto err; } @@ -139,7 +138,7 @@ int dump_queue(struct msgque_data *msgque) if (ret < 0) { if (errno == EINVAL) continue; - printf("Failed to get stats for IPC queue with id %d\n", + ksft_test_result_fail("Failed to get stats for IPC queue with id %d\n", kern_id); return -errno; } @@ -150,7 +149,7 @@ int dump_queue(struct msgque_data *msgque) msgque->messages = malloc(sizeof(struct msg1) * ds.msg_qnum); if (msgque->messages == NULL) { - printf("Failed to get stats for IPC queue\n"); + ksft_test_result_fail("Failed to get stats for IPC queue\n"); return -ENOMEM; } @@ -162,7 +161,7 @@ int dump_queue(struct msgque_data *msgque) ret = msgrcv(msgque->msq_id, &msgque->messages[i].mtype, MAX_MSG_SIZE, i, IPC_NOWAIT | MSG_COPY); if (ret < 0) { - printf("Failed to copy IPC message: %m (%d)\n", errno); + ksft_test_result_fail("Failed to copy IPC message: %m (%d)\n", errno); return -errno; } msgque->messages[i].msize = ret; @@ -178,7 +177,7 @@ int fill_msgque(struct msgque_data *msgque) memcpy(msgbuf.mtext, TEST_STRING, sizeof(TEST_STRING)); if (msgsnd(msgque->msq_id, &msgbuf.mtype, sizeof(TEST_STRING), IPC_NOWAIT) != 0) { - printf("First message send failed (%m)\n"); + ksft_test_result_fail("First message send failed (%m)\n"); return -errno; } @@ -186,7 +185,7 @@ int fill_msgque(struct msgque_data *msgque) memcpy(msgbuf.mtext, ANOTHER_TEST_STRING, sizeof(ANOTHER_TEST_STRING)); if (msgsnd(msgque->msq_id, &msgbuf.mtype, sizeof(ANOTHER_TEST_STRING), IPC_NOWAIT) != 0) { - printf("Second message send failed (%m)\n"); + ksft_test_result_fail("Second message send failed (%m)\n"); return -errno; } return 0; @@ -202,44 +201,44 @@ int main(int argc, char **argv) msgque.key = ftok(argv[0], 822155650); if (msgque.key == -1) { - printf("Can't make key: %d\n", -errno); + ksft_test_result_fail("Can't make key: %d\n", -errno); ksft_exit_fail(); } msgque.msq_id = msgget(msgque.key, IPC_CREAT | IPC_EXCL | 0666); if (msgque.msq_id == -1) { err = -errno; - printf("Can't create queue: %d\n", err); + ksft_test_result_fail("Can't create queue: %d\n", err); goto err_out; } err = fill_msgque(&msgque); if (err) { - printf("Failed to fill queue: %d\n", err); + ksft_test_result_fail("Failed to fill queue: %d\n", err); goto err_destroy; } err = dump_queue(&msgque); if (err) { - printf("Failed to dump queue: %d\n", err); + ksft_test_result_fail("Failed to dump queue: %d\n", err); goto err_destroy; } err = check_and_destroy_queue(&msgque); if (err) { - printf("Failed to check and destroy queue: %d\n", err); + ksft_test_result_fail("Failed to check and destroy queue: %d\n", err); goto err_out; } err = restore_queue(&msgque); if (err) { - printf("Failed to restore queue: %d\n", err); + ksft_test_result_fail("Failed to restore queue: %d\n", err); goto err_destroy; } err = check_and_destroy_queue(&msgque); if (err) { - printf("Failed to test queue: %d\n", err); + ksft_test_result_fail("Failed to test queue: %d\n", err); goto err_out; } ksft_exit_pass(); diff --git a/tools/testing/selftests/kexec/Makefile b/tools/testing/selftests/kexec/Makefile index e3000ccb9a5d..874cfdd3b75b 100644 --- a/tools/testing/selftests/kexec/Makefile +++ b/tools/testing/selftests/kexec/Makefile @@ -12,7 +12,7 @@ include ../../../scripts/Makefile.arch ifeq ($(IS_64_BIT)$(ARCH_PROCESSED),1x86) TEST_PROGS += test_kexec_jump.sh -test_kexec_jump.sh: $(OUTPUT)/test_kexec_jump +TEST_GEN_PROGS := test_kexec_jump endif include ../lib.mk diff --git a/tools/testing/selftests/kvm/Makefile.kvm b/tools/testing/selftests/kvm/Makefile.kvm index 38b95998e1e6..f6fe7a07a0a2 100644 --- a/tools/testing/selftests/kvm/Makefile.kvm +++ b/tools/testing/selftests/kvm/Makefile.kvm @@ -59,6 +59,7 @@ TEST_PROGS_x86 += x86/nx_huge_pages_test.sh TEST_GEN_PROGS_COMMON = demand_paging_test TEST_GEN_PROGS_COMMON += dirty_log_test TEST_GEN_PROGS_COMMON += guest_print_test +TEST_GEN_PROGS_COMMON += irqfd_test TEST_GEN_PROGS_COMMON += kvm_binary_stats_test TEST_GEN_PROGS_COMMON += kvm_create_max_vcpus TEST_GEN_PROGS_COMMON += kvm_page_table_test @@ -134,6 +135,7 @@ TEST_GEN_PROGS_x86 += x86/amx_test TEST_GEN_PROGS_x86 += x86/max_vcpuid_cap_test TEST_GEN_PROGS_x86 += x86/triple_fault_event_test TEST_GEN_PROGS_x86 += x86/recalc_apic_map_test +TEST_GEN_PROGS_x86 += x86/aperfmperf_test TEST_GEN_PROGS_x86 += access_tracking_perf_test TEST_GEN_PROGS_x86 += coalesced_io_test TEST_GEN_PROGS_x86 += dirty_log_perf_test @@ -156,7 +158,7 @@ TEST_GEN_PROGS_arm64 += arm64/arch_timer_edge_cases TEST_GEN_PROGS_arm64 += arm64/debug-exceptions TEST_GEN_PROGS_arm64 += arm64/host_sve TEST_GEN_PROGS_arm64 += arm64/hypercalls -TEST_GEN_PROGS_arm64 += arm64/mmio_abort +TEST_GEN_PROGS_arm64 += arm64/external_aborts TEST_GEN_PROGS_arm64 += arm64/page_fault_test TEST_GEN_PROGS_arm64 += arm64/psci_test TEST_GEN_PROGS_arm64 += arm64/set_id_regs diff --git a/tools/testing/selftests/kvm/access_tracking_perf_test.c b/tools/testing/selftests/kvm/access_tracking_perf_test.c index da7196fd1b23..c9de66537ec3 100644 --- a/tools/testing/selftests/kvm/access_tracking_perf_test.c +++ b/tools/testing/selftests/kvm/access_tracking_perf_test.c @@ -596,11 +596,8 @@ int main(int argc, char *argv[]) if (ret) return ret; } else { - page_idle_fd = open("/sys/kernel/mm/page_idle/bitmap", O_RDWR); - __TEST_REQUIRE(page_idle_fd >= 0, - "Couldn't open /sys/kernel/mm/page_idle/bitmap. " - "Is CONFIG_IDLE_PAGE_TRACKING enabled?"); - + page_idle_fd = __open_path_or_exit("/sys/kernel/mm/page_idle/bitmap", O_RDWR, + "Is CONFIG_IDLE_PAGE_TRACKING enabled?"); close(page_idle_fd); puts("Using page_idle for aging"); diff --git a/tools/testing/selftests/kvm/arch_timer.c b/tools/testing/selftests/kvm/arch_timer.c index acb2cb596332..cf8fb67104f1 100644 --- a/tools/testing/selftests/kvm/arch_timer.c +++ b/tools/testing/selftests/kvm/arch_timer.c @@ -98,16 +98,11 @@ static uint32_t test_get_pcpu(void) static int test_migrate_vcpu(unsigned int vcpu_idx) { int ret; - cpu_set_t cpuset; uint32_t new_pcpu = test_get_pcpu(); - CPU_ZERO(&cpuset); - CPU_SET(new_pcpu, &cpuset); - pr_debug("Migrating vCPU: %u to pCPU: %u\n", vcpu_idx, new_pcpu); - ret = pthread_setaffinity_np(pt_vcpu_run[vcpu_idx], - sizeof(cpuset), &cpuset); + ret = __pin_task_to_cpu(pt_vcpu_run[vcpu_idx], new_pcpu); /* Allow the error where the vCPU thread is already finished */ TEST_ASSERT(ret == 0 || ret == ESRCH, diff --git a/tools/testing/selftests/kvm/arm64/arch_timer_edge_cases.c b/tools/testing/selftests/kvm/arm64/arch_timer_edge_cases.c index a36a7e2db434..ce74d069cb7b 100644 --- a/tools/testing/selftests/kvm/arm64/arch_timer_edge_cases.c +++ b/tools/testing/selftests/kvm/arm64/arch_timer_edge_cases.c @@ -22,7 +22,8 @@ #include "gic.h" #include "vgic.h" -static const uint64_t CVAL_MAX = ~0ULL; +/* Depends on counter width. */ +static uint64_t CVAL_MAX; /* tval is a signed 32-bit int. */ static const int32_t TVAL_MAX = INT32_MAX; static const int32_t TVAL_MIN = INT32_MIN; @@ -30,8 +31,8 @@ static const int32_t TVAL_MIN = INT32_MIN; /* After how much time we say there is no IRQ. */ static const uint32_t TIMEOUT_NO_IRQ_US = 50000; -/* A nice counter value to use as the starting one for most tests. */ -static const uint64_t DEF_CNT = (CVAL_MAX / 2); +/* Counter value to use as the starting one for most tests. Set to CVAL_MAX/2 */ +static uint64_t DEF_CNT; /* Number of runs. */ static const uint32_t NR_TEST_ITERS_DEF = 5; @@ -191,8 +192,8 @@ static void set_tval_irq(enum arch_timer timer, uint64_t tval_cycles, { atomic_set(&shared_data.handled, 0); atomic_set(&shared_data.spurious, 0); - timer_set_ctl(timer, ctl); timer_set_tval(timer, tval_cycles); + timer_set_ctl(timer, ctl); } static void set_xval_irq(enum arch_timer timer, uint64_t xval, uint32_t ctl, @@ -732,12 +733,6 @@ static void test_move_counters_ahead_of_timers(enum arch_timer timer) test_set_cnt_after_tval(timer, 0, tval, (uint64_t) tval + 1, wm); } - - for (i = 0; i < ARRAY_SIZE(sleep_method); i++) { - sleep_method_t sm = sleep_method[i]; - - test_set_cnt_after_cval_no_irq(timer, 0, DEF_CNT, CVAL_MAX, sm); - } } /* @@ -849,17 +844,17 @@ static void guest_code(enum arch_timer timer) GUEST_DONE(); } +static cpu_set_t default_cpuset; + static uint32_t next_pcpu(void) { uint32_t max = get_nprocs(); uint32_t cur = sched_getcpu(); uint32_t next = cur; - cpu_set_t cpuset; + cpu_set_t cpuset = default_cpuset; TEST_ASSERT(max > 1, "Need at least two physical cpus"); - sched_getaffinity(0, sizeof(cpuset), &cpuset); - do { next = (next + 1) % CPU_SETSIZE; } while (!CPU_ISSET(next, &cpuset)); @@ -867,25 +862,6 @@ static uint32_t next_pcpu(void) return next; } -static void migrate_self(uint32_t new_pcpu) -{ - int ret; - cpu_set_t cpuset; - pthread_t thread; - - thread = pthread_self(); - - CPU_ZERO(&cpuset); - CPU_SET(new_pcpu, &cpuset); - - pr_debug("Migrating from %u to %u\n", sched_getcpu(), new_pcpu); - - ret = pthread_setaffinity_np(thread, sizeof(cpuset), &cpuset); - - TEST_ASSERT(ret == 0, "Failed to migrate to pCPU: %u; ret: %d\n", - new_pcpu, ret); -} - static void kvm_set_cntxct(struct kvm_vcpu *vcpu, uint64_t cnt, enum arch_timer timer) { @@ -912,7 +888,7 @@ static void handle_sync(struct kvm_vcpu *vcpu, struct ucall *uc) sched_yield(); break; case USERSPACE_MIGRATE_SELF: - migrate_self(next_pcpu()); + pin_self_to_cpu(next_pcpu()); break; default: break; @@ -924,7 +900,7 @@ static void test_run(struct kvm_vm *vm, struct kvm_vcpu *vcpu) struct ucall uc; /* Start on CPU 0 */ - migrate_self(0); + pin_self_to_cpu(0); while (true) { vcpu_run(vcpu); @@ -959,6 +935,8 @@ static void test_init_timer_irq(struct kvm_vm *vm, struct kvm_vcpu *vcpu) pr_debug("ptimer_irq: %d; vtimer_irq: %d\n", ptimer_irq, vtimer_irq); } +static int gic_fd; + static void test_vm_create(struct kvm_vm **vm, struct kvm_vcpu **vcpu, enum arch_timer timer) { @@ -973,8 +951,18 @@ static void test_vm_create(struct kvm_vm **vm, struct kvm_vcpu **vcpu, vcpu_args_set(*vcpu, 1, timer); test_init_timer_irq(*vm, *vcpu); - vgic_v3_setup(*vm, 1, 64); + gic_fd = vgic_v3_setup(*vm, 1, 64); + __TEST_REQUIRE(gic_fd >= 0, "Failed to create vgic-v3"); + sync_global_to_guest(*vm, test_args); + sync_global_to_guest(*vm, CVAL_MAX); + sync_global_to_guest(*vm, DEF_CNT); +} + +static void test_vm_cleanup(struct kvm_vm *vm) +{ + close(gic_fd); + kvm_vm_free(vm); } static void test_print_help(char *name) @@ -986,7 +974,7 @@ static void test_print_help(char *name) pr_info("\t-b: Test both physical and virtual timers (default: true)\n"); pr_info("\t-l: Delta (in ms) used for long wait time test (default: %u)\n", LONG_WAIT_TEST_MS); - pr_info("\t-l: Delta (in ms) used for wait times (default: %u)\n", + pr_info("\t-w: Delta (in ms) used for wait times (default: %u)\n", WAIT_TEST_MS); pr_info("\t-p: Test physical timer (default: true)\n"); pr_info("\t-v: Test virtual timer (default: true)\n"); @@ -1035,6 +1023,17 @@ static bool parse_args(int argc, char *argv[]) return false; } +static void set_counter_defaults(void) +{ + const uint64_t MIN_ROLLOVER_SECS = 40ULL * 365 * 24 * 3600; + uint64_t freq = read_sysreg(CNTFRQ_EL0); + uint64_t width = ilog2(MIN_ROLLOVER_SECS * freq); + + width = clamp(width, 56, 64); + CVAL_MAX = GENMASK_ULL(width - 1, 0); + DEF_CNT = CVAL_MAX / 2; +} + int main(int argc, char *argv[]) { struct kvm_vcpu *vcpu; @@ -1046,16 +1045,19 @@ int main(int argc, char *argv[]) if (!parse_args(argc, argv)) exit(KSFT_SKIP); + sched_getaffinity(0, sizeof(default_cpuset), &default_cpuset); + set_counter_defaults(); + if (test_args.test_virtual) { test_vm_create(&vm, &vcpu, VIRTUAL); test_run(vm, vcpu); - kvm_vm_free(vm); + test_vm_cleanup(vm); } if (test_args.test_physical) { test_vm_create(&vm, &vcpu, PHYSICAL); test_run(vm, vcpu); - kvm_vm_free(vm); + test_vm_cleanup(vm); } return 0; diff --git a/tools/testing/selftests/kvm/arm64/debug-exceptions.c b/tools/testing/selftests/kvm/arm64/debug-exceptions.c index c7fb55c9135b..e34963956fbc 100644 --- a/tools/testing/selftests/kvm/arm64/debug-exceptions.c +++ b/tools/testing/selftests/kvm/arm64/debug-exceptions.c @@ -140,7 +140,7 @@ static void enable_os_lock(void) static void enable_monitor_debug_exceptions(void) { - uint32_t mdscr; + uint64_t mdscr; asm volatile("msr daifclr, #8"); @@ -223,7 +223,7 @@ void install_hw_bp_ctx(uint8_t addr_bp, uint8_t ctx_bp, uint64_t addr, static void install_ss(void) { - uint32_t mdscr; + uint64_t mdscr; asm volatile("msr daifclr, #8"); diff --git a/tools/testing/selftests/kvm/arm64/external_aborts.c b/tools/testing/selftests/kvm/arm64/external_aborts.c new file mode 100644 index 000000000000..062bf84cced1 --- /dev/null +++ b/tools/testing/selftests/kvm/arm64/external_aborts.c @@ -0,0 +1,330 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * external_abort - Tests for userspace external abort injection + * + * Copyright (c) 2024 Google LLC + */ +#include "processor.h" +#include "test_util.h" + +#define MMIO_ADDR 0x8000000ULL +#define EXPECTED_SERROR_ISS (ESR_ELx_ISV | 0x1d1ed) + +static u64 expected_abort_pc; + +static void expect_sea_handler(struct ex_regs *regs) +{ + u64 esr = read_sysreg(esr_el1); + + GUEST_ASSERT_EQ(regs->pc, expected_abort_pc); + GUEST_ASSERT_EQ(ESR_ELx_EC(esr), ESR_ELx_EC_DABT_CUR); + GUEST_ASSERT_EQ(esr & ESR_ELx_FSC_TYPE, ESR_ELx_FSC_EXTABT); + + GUEST_DONE(); +} + +static void unexpected_dabt_handler(struct ex_regs *regs) +{ + GUEST_FAIL("Unexpected data abort at PC: %lx\n", regs->pc); +} + +static struct kvm_vm *vm_create_with_dabt_handler(struct kvm_vcpu **vcpu, void *guest_code, + handler_fn dabt_handler) +{ + struct kvm_vm *vm = vm_create_with_one_vcpu(vcpu, guest_code); + + vm_init_descriptor_tables(vm); + vcpu_init_descriptor_tables(*vcpu); + vm_install_sync_handler(vm, VECTOR_SYNC_CURRENT, ESR_ELx_EC_DABT_CUR, dabt_handler); + + virt_map(vm, MMIO_ADDR, MMIO_ADDR, 1); + + return vm; +} + +static void vcpu_inject_sea(struct kvm_vcpu *vcpu) +{ + struct kvm_vcpu_events events = {}; + + events.exception.ext_dabt_pending = true; + vcpu_events_set(vcpu, &events); +} + +static bool vcpu_has_ras(struct kvm_vcpu *vcpu) +{ + u64 pfr0 = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64PFR0_EL1)); + + return SYS_FIELD_GET(ID_AA64PFR0_EL1, RAS, pfr0); +} + +static bool guest_has_ras(void) +{ + return SYS_FIELD_GET(ID_AA64PFR0_EL1, RAS, read_sysreg(id_aa64pfr0_el1)); +} + +static void vcpu_inject_serror(struct kvm_vcpu *vcpu) +{ + struct kvm_vcpu_events events = {}; + + events.exception.serror_pending = true; + if (vcpu_has_ras(vcpu)) { + events.exception.serror_has_esr = true; + events.exception.serror_esr = EXPECTED_SERROR_ISS; + } + + vcpu_events_set(vcpu, &events); +} + +static void __vcpu_run_expect(struct kvm_vcpu *vcpu, unsigned int cmd) +{ + struct ucall uc; + + vcpu_run(vcpu); + switch (get_ucall(vcpu, &uc)) { + case UCALL_ABORT: + REPORT_GUEST_ASSERT(uc); + break; + default: + if (uc.cmd == cmd) + return; + + TEST_FAIL("Unexpected ucall: %lu", uc.cmd); + } +} + +static void vcpu_run_expect_done(struct kvm_vcpu *vcpu) +{ + __vcpu_run_expect(vcpu, UCALL_DONE); +} + +static void vcpu_run_expect_sync(struct kvm_vcpu *vcpu) +{ + __vcpu_run_expect(vcpu, UCALL_SYNC); +} + +extern char test_mmio_abort_insn; + +static noinline void test_mmio_abort_guest(void) +{ + WRITE_ONCE(expected_abort_pc, (u64)&test_mmio_abort_insn); + + asm volatile("test_mmio_abort_insn:\n\t" + "ldr x0, [%0]\n\t" + : : "r" (MMIO_ADDR) : "x0", "memory"); + + GUEST_FAIL("MMIO instruction should not retire"); +} + +/* + * Test that KVM doesn't complete MMIO emulation when userspace has made an + * external abort pending for the instruction. + */ +static void test_mmio_abort(void) +{ + struct kvm_vcpu *vcpu; + struct kvm_vm *vm = vm_create_with_dabt_handler(&vcpu, test_mmio_abort_guest, + expect_sea_handler); + struct kvm_run *run = vcpu->run; + + vcpu_run(vcpu); + TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_MMIO); + TEST_ASSERT_EQ(run->mmio.phys_addr, MMIO_ADDR); + TEST_ASSERT_EQ(run->mmio.len, sizeof(unsigned long)); + TEST_ASSERT(!run->mmio.is_write, "Expected MMIO read"); + + vcpu_inject_sea(vcpu); + vcpu_run_expect_done(vcpu); + kvm_vm_free(vm); +} + +extern char test_mmio_nisv_insn; + +static void test_mmio_nisv_guest(void) +{ + WRITE_ONCE(expected_abort_pc, (u64)&test_mmio_nisv_insn); + + asm volatile("test_mmio_nisv_insn:\n\t" + "ldr x0, [%0], #8\n\t" + : : "r" (MMIO_ADDR) : "x0", "memory"); + + GUEST_FAIL("MMIO instruction should not retire"); +} + +/* + * Test that the KVM_RUN ioctl fails for ESR_EL2.ISV=0 MMIO aborts if userspace + * hasn't enabled KVM_CAP_ARM_NISV_TO_USER. + */ +static void test_mmio_nisv(void) +{ + struct kvm_vcpu *vcpu; + struct kvm_vm *vm = vm_create_with_dabt_handler(&vcpu, test_mmio_nisv_guest, + unexpected_dabt_handler); + + TEST_ASSERT(_vcpu_run(vcpu), "Expected nonzero return code from KVM_RUN"); + TEST_ASSERT_EQ(errno, ENOSYS); + + kvm_vm_free(vm); +} + +/* + * Test that ESR_EL2.ISV=0 MMIO aborts reach userspace and that an injected SEA + * reaches the guest. + */ +static void test_mmio_nisv_abort(void) +{ + struct kvm_vcpu *vcpu; + struct kvm_vm *vm = vm_create_with_dabt_handler(&vcpu, test_mmio_nisv_guest, + expect_sea_handler); + struct kvm_run *run = vcpu->run; + + vm_enable_cap(vm, KVM_CAP_ARM_NISV_TO_USER, 1); + + vcpu_run(vcpu); + TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_ARM_NISV); + TEST_ASSERT_EQ(run->arm_nisv.fault_ipa, MMIO_ADDR); + + vcpu_inject_sea(vcpu); + vcpu_run_expect_done(vcpu); + kvm_vm_free(vm); +} + +static void unexpected_serror_handler(struct ex_regs *regs) +{ + GUEST_FAIL("Took unexpected SError exception"); +} + +static void test_serror_masked_guest(void) +{ + GUEST_ASSERT(read_sysreg(isr_el1) & ISR_EL1_A); + + isb(); + + GUEST_DONE(); +} + +static void test_serror_masked(void) +{ + struct kvm_vcpu *vcpu; + struct kvm_vm *vm = vm_create_with_dabt_handler(&vcpu, test_serror_masked_guest, + unexpected_dabt_handler); + + vm_install_exception_handler(vm, VECTOR_ERROR_CURRENT, unexpected_serror_handler); + + vcpu_inject_serror(vcpu); + vcpu_run_expect_done(vcpu); + kvm_vm_free(vm); +} + +static void expect_serror_handler(struct ex_regs *regs) +{ + u64 esr = read_sysreg(esr_el1); + + GUEST_ASSERT_EQ(ESR_ELx_EC(esr), ESR_ELx_EC_SERROR); + if (guest_has_ras()) + GUEST_ASSERT_EQ(ESR_ELx_ISS(esr), EXPECTED_SERROR_ISS); + + GUEST_DONE(); +} + +static void test_serror_guest(void) +{ + GUEST_ASSERT(read_sysreg(isr_el1) & ISR_EL1_A); + + local_serror_enable(); + isb(); + local_serror_disable(); + + GUEST_FAIL("Should've taken pending SError exception"); +} + +static void test_serror(void) +{ + struct kvm_vcpu *vcpu; + struct kvm_vm *vm = vm_create_with_dabt_handler(&vcpu, test_serror_guest, + unexpected_dabt_handler); + + vm_install_exception_handler(vm, VECTOR_ERROR_CURRENT, expect_serror_handler); + + vcpu_inject_serror(vcpu); + vcpu_run_expect_done(vcpu); + kvm_vm_free(vm); +} + +static void test_serror_emulated_guest(void) +{ + GUEST_ASSERT(!(read_sysreg(isr_el1) & ISR_EL1_A)); + + local_serror_enable(); + GUEST_SYNC(0); + local_serror_disable(); + + GUEST_FAIL("Should've taken unmasked SError exception"); +} + +static void test_serror_emulated(void) +{ + struct kvm_vcpu *vcpu; + struct kvm_vm *vm = vm_create_with_dabt_handler(&vcpu, test_serror_emulated_guest, + unexpected_dabt_handler); + + vm_install_exception_handler(vm, VECTOR_ERROR_CURRENT, expect_serror_handler); + + vcpu_run_expect_sync(vcpu); + vcpu_inject_serror(vcpu); + vcpu_run_expect_done(vcpu); + kvm_vm_free(vm); +} + +static void test_mmio_ease_guest(void) +{ + sysreg_clear_set_s(SYS_SCTLR2_EL1, 0, SCTLR2_EL1_EASE); + isb(); + + test_mmio_abort_guest(); +} + +/* + * Test that KVM doesn't complete MMIO emulation when userspace has made an + * external abort pending for the instruction. + */ +static void test_mmio_ease(void) +{ + struct kvm_vcpu *vcpu; + struct kvm_vm *vm = vm_create_with_dabt_handler(&vcpu, test_mmio_ease_guest, + unexpected_dabt_handler); + struct kvm_run *run = vcpu->run; + u64 pfr1; + + pfr1 = vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64PFR1_EL1)); + if (!SYS_FIELD_GET(ID_AA64PFR1_EL1, DF2, pfr1)) { + pr_debug("Skipping %s\n", __func__); + return; + } + + /* + * SCTLR2_ELx.EASE changes the exception vector to the SError vector but + * doesn't further modify the exception context (e.g. ESR_ELx, FAR_ELx). + */ + vm_install_exception_handler(vm, VECTOR_ERROR_CURRENT, expect_sea_handler); + + vcpu_run(vcpu); + TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_MMIO); + TEST_ASSERT_EQ(run->mmio.phys_addr, MMIO_ADDR); + TEST_ASSERT_EQ(run->mmio.len, sizeof(unsigned long)); + TEST_ASSERT(!run->mmio.is_write, "Expected MMIO read"); + + vcpu_inject_sea(vcpu); + vcpu_run_expect_done(vcpu); + kvm_vm_free(vm); +} + +int main(void) +{ + test_mmio_abort(); + test_mmio_nisv(); + test_mmio_nisv_abort(); + test_serror(); + test_serror_masked(); + test_serror_emulated(); + test_mmio_ease(); +} diff --git a/tools/testing/selftests/kvm/arm64/get-reg-list.c b/tools/testing/selftests/kvm/arm64/get-reg-list.c index d01798b6b3b4..011fad95dd02 100644 --- a/tools/testing/selftests/kvm/arm64/get-reg-list.c +++ b/tools/testing/selftests/kvm/arm64/get-reg-list.c @@ -15,6 +15,12 @@ #include "test_util.h" #include "processor.h" +#define SYS_REG(r) ARM64_SYS_REG(sys_reg_Op0(SYS_ ## r), \ + sys_reg_Op1(SYS_ ## r), \ + sys_reg_CRn(SYS_ ## r), \ + sys_reg_CRm(SYS_ ## r), \ + sys_reg_Op2(SYS_ ## r)) + struct feature_id_reg { __u64 reg; __u64 id_reg; @@ -22,37 +28,43 @@ struct feature_id_reg { __u64 feat_min; }; -static struct feature_id_reg feat_id_regs[] = { - { - ARM64_SYS_REG(3, 0, 2, 0, 3), /* TCR2_EL1 */ - ARM64_SYS_REG(3, 0, 0, 7, 3), /* ID_AA64MMFR3_EL1 */ - 0, - 1 - }, - { - ARM64_SYS_REG(3, 0, 10, 2, 2), /* PIRE0_EL1 */ - ARM64_SYS_REG(3, 0, 0, 7, 3), /* ID_AA64MMFR3_EL1 */ - 8, - 1 - }, - { - ARM64_SYS_REG(3, 0, 10, 2, 3), /* PIR_EL1 */ - ARM64_SYS_REG(3, 0, 0, 7, 3), /* ID_AA64MMFR3_EL1 */ - 8, - 1 - }, - { - ARM64_SYS_REG(3, 0, 10, 2, 4), /* POR_EL1 */ - ARM64_SYS_REG(3, 0, 0, 7, 3), /* ID_AA64MMFR3_EL1 */ - 16, - 1 - }, - { - ARM64_SYS_REG(3, 3, 10, 2, 4), /* POR_EL0 */ - ARM64_SYS_REG(3, 0, 0, 7, 3), /* ID_AA64MMFR3_EL1 */ - 16, - 1 +#define FEAT(id, f, v) \ + .id_reg = SYS_REG(id), \ + .feat_shift = id ## _ ## f ## _SHIFT, \ + .feat_min = id ## _ ## f ## _ ## v + +#define REG_FEAT(r, id, f, v) \ + { \ + .reg = SYS_REG(r), \ + FEAT(id, f, v) \ } + +static struct feature_id_reg feat_id_regs[] = { + REG_FEAT(TCR2_EL1, ID_AA64MMFR3_EL1, TCRX, IMP), + REG_FEAT(TCR2_EL2, ID_AA64MMFR3_EL1, TCRX, IMP), + REG_FEAT(PIRE0_EL1, ID_AA64MMFR3_EL1, S1PIE, IMP), + REG_FEAT(PIRE0_EL2, ID_AA64MMFR3_EL1, S1PIE, IMP), + REG_FEAT(PIR_EL1, ID_AA64MMFR3_EL1, S1PIE, IMP), + REG_FEAT(PIR_EL2, ID_AA64MMFR3_EL1, S1PIE, IMP), + REG_FEAT(POR_EL1, ID_AA64MMFR3_EL1, S1POE, IMP), + REG_FEAT(POR_EL0, ID_AA64MMFR3_EL1, S1POE, IMP), + REG_FEAT(POR_EL2, ID_AA64MMFR3_EL1, S1POE, IMP), + REG_FEAT(HCRX_EL2, ID_AA64MMFR1_EL1, HCX, IMP), + REG_FEAT(HFGRTR_EL2, ID_AA64MMFR0_EL1, FGT, IMP), + REG_FEAT(HFGWTR_EL2, ID_AA64MMFR0_EL1, FGT, IMP), + REG_FEAT(HFGITR_EL2, ID_AA64MMFR0_EL1, FGT, IMP), + REG_FEAT(HDFGRTR_EL2, ID_AA64MMFR0_EL1, FGT, IMP), + REG_FEAT(HDFGWTR_EL2, ID_AA64MMFR0_EL1, FGT, IMP), + REG_FEAT(HAFGRTR_EL2, ID_AA64MMFR0_EL1, FGT, IMP), + REG_FEAT(HFGRTR2_EL2, ID_AA64MMFR0_EL1, FGT, FGT2), + REG_FEAT(HFGWTR2_EL2, ID_AA64MMFR0_EL1, FGT, FGT2), + REG_FEAT(HFGITR2_EL2, ID_AA64MMFR0_EL1, FGT, FGT2), + REG_FEAT(HDFGRTR2_EL2, ID_AA64MMFR0_EL1, FGT, FGT2), + REG_FEAT(HDFGWTR2_EL2, ID_AA64MMFR0_EL1, FGT, FGT2), + REG_FEAT(ZCR_EL2, ID_AA64PFR0_EL1, SVE, IMP), + REG_FEAT(SCTLR2_EL1, ID_AA64MMFR3_EL1, SCTLRX, IMP), + REG_FEAT(VDISR_EL2, ID_AA64PFR0_EL1, RAS, IMP), + REG_FEAT(VSESR_EL2, ID_AA64PFR0_EL1, RAS, IMP), }; bool filter_reg(__u64 reg) @@ -469,6 +481,7 @@ static __u64 base_regs[] = { ARM64_SYS_REG(3, 0, 1, 0, 0), /* SCTLR_EL1 */ ARM64_SYS_REG(3, 0, 1, 0, 1), /* ACTLR_EL1 */ ARM64_SYS_REG(3, 0, 1, 0, 2), /* CPACR_EL1 */ + KVM_ARM64_SYS_REG(SYS_SCTLR2_EL1), ARM64_SYS_REG(3, 0, 2, 0, 0), /* TTBR0_EL1 */ ARM64_SYS_REG(3, 0, 2, 0, 1), /* TTBR1_EL1 */ ARM64_SYS_REG(3, 0, 2, 0, 2), /* TCR_EL1 */ @@ -686,6 +699,62 @@ static __u64 pauth_generic_regs[] = { ARM64_SYS_REG(3, 0, 2, 3, 1), /* APGAKEYHI_EL1 */ }; +static __u64 el2_regs[] = { + SYS_REG(VPIDR_EL2), + SYS_REG(VMPIDR_EL2), + SYS_REG(SCTLR_EL2), + SYS_REG(ACTLR_EL2), + SYS_REG(HCR_EL2), + SYS_REG(MDCR_EL2), + SYS_REG(CPTR_EL2), + SYS_REG(HSTR_EL2), + SYS_REG(HFGRTR_EL2), + SYS_REG(HFGWTR_EL2), + SYS_REG(HFGITR_EL2), + SYS_REG(HACR_EL2), + SYS_REG(ZCR_EL2), + SYS_REG(HCRX_EL2), + SYS_REG(TTBR0_EL2), + SYS_REG(TTBR1_EL2), + SYS_REG(TCR_EL2), + SYS_REG(TCR2_EL2), + SYS_REG(VTTBR_EL2), + SYS_REG(VTCR_EL2), + SYS_REG(VNCR_EL2), + SYS_REG(HDFGRTR2_EL2), + SYS_REG(HDFGWTR2_EL2), + SYS_REG(HFGRTR2_EL2), + SYS_REG(HFGWTR2_EL2), + SYS_REG(HDFGRTR_EL2), + SYS_REG(HDFGWTR_EL2), + SYS_REG(HAFGRTR_EL2), + SYS_REG(HFGITR2_EL2), + SYS_REG(SPSR_EL2), + SYS_REG(ELR_EL2), + SYS_REG(AFSR0_EL2), + SYS_REG(AFSR1_EL2), + SYS_REG(ESR_EL2), + SYS_REG(FAR_EL2), + SYS_REG(HPFAR_EL2), + SYS_REG(MAIR_EL2), + SYS_REG(PIRE0_EL2), + SYS_REG(PIR_EL2), + SYS_REG(POR_EL2), + SYS_REG(AMAIR_EL2), + SYS_REG(VBAR_EL2), + SYS_REG(CONTEXTIDR_EL2), + SYS_REG(TPIDR_EL2), + SYS_REG(CNTVOFF_EL2), + SYS_REG(CNTHCTL_EL2), + SYS_REG(CNTHP_CTL_EL2), + SYS_REG(CNTHP_CVAL_EL2), + SYS_REG(CNTHV_CTL_EL2), + SYS_REG(CNTHV_CVAL_EL2), + SYS_REG(SP_EL2), + SYS_REG(VDISR_EL2), + SYS_REG(VSESR_EL2), +}; + #define BASE_SUBLIST \ { "base", .regs = base_regs, .regs_n = ARRAY_SIZE(base_regs), } #define VREGS_SUBLIST \ @@ -712,6 +781,14 @@ static __u64 pauth_generic_regs[] = { .regs = pauth_generic_regs, \ .regs_n = ARRAY_SIZE(pauth_generic_regs), \ } +#define EL2_SUBLIST \ + { \ + .name = "EL2", \ + .capability = KVM_CAP_ARM_EL2, \ + .feature = KVM_ARM_VCPU_HAS_EL2, \ + .regs = el2_regs, \ + .regs_n = ARRAY_SIZE(el2_regs), \ + } static struct vcpu_reg_list vregs_config = { .sublists = { @@ -761,6 +838,65 @@ static struct vcpu_reg_list pauth_pmu_config = { }, }; +static struct vcpu_reg_list el2_vregs_config = { + .sublists = { + BASE_SUBLIST, + EL2_SUBLIST, + VREGS_SUBLIST, + {0}, + }, +}; + +static struct vcpu_reg_list el2_vregs_pmu_config = { + .sublists = { + BASE_SUBLIST, + EL2_SUBLIST, + VREGS_SUBLIST, + PMU_SUBLIST, + {0}, + }, +}; + +static struct vcpu_reg_list el2_sve_config = { + .sublists = { + BASE_SUBLIST, + EL2_SUBLIST, + SVE_SUBLIST, + {0}, + }, +}; + +static struct vcpu_reg_list el2_sve_pmu_config = { + .sublists = { + BASE_SUBLIST, + EL2_SUBLIST, + SVE_SUBLIST, + PMU_SUBLIST, + {0}, + }, +}; + +static struct vcpu_reg_list el2_pauth_config = { + .sublists = { + BASE_SUBLIST, + EL2_SUBLIST, + VREGS_SUBLIST, + PAUTH_SUBLIST, + {0}, + }, +}; + +static struct vcpu_reg_list el2_pauth_pmu_config = { + .sublists = { + BASE_SUBLIST, + EL2_SUBLIST, + VREGS_SUBLIST, + PAUTH_SUBLIST, + PMU_SUBLIST, + {0}, + }, +}; + struct vcpu_reg_list *vcpu_configs[] = { &vregs_config, &vregs_pmu_config, @@ -768,5 +904,12 @@ struct vcpu_reg_list *vcpu_configs[] = { &sve_pmu_config, &pauth_config, &pauth_pmu_config, + + &el2_vregs_config, + &el2_vregs_pmu_config, + &el2_sve_config, + &el2_sve_pmu_config, + &el2_pauth_config, + &el2_pauth_pmu_config, }; int vcpu_configs_n = ARRAY_SIZE(vcpu_configs); diff --git a/tools/testing/selftests/kvm/arm64/mmio_abort.c b/tools/testing/selftests/kvm/arm64/mmio_abort.c deleted file mode 100644 index 8b7a80a51b1c..000000000000 --- a/tools/testing/selftests/kvm/arm64/mmio_abort.c +++ /dev/null @@ -1,159 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * mmio_abort - Tests for userspace MMIO abort injection - * - * Copyright (c) 2024 Google LLC - */ -#include "processor.h" -#include "test_util.h" - -#define MMIO_ADDR 0x8000000ULL - -static u64 expected_abort_pc; - -static void expect_sea_handler(struct ex_regs *regs) -{ - u64 esr = read_sysreg(esr_el1); - - GUEST_ASSERT_EQ(regs->pc, expected_abort_pc); - GUEST_ASSERT_EQ(ESR_ELx_EC(esr), ESR_ELx_EC_DABT_CUR); - GUEST_ASSERT_EQ(esr & ESR_ELx_FSC_TYPE, ESR_ELx_FSC_EXTABT); - - GUEST_DONE(); -} - -static void unexpected_dabt_handler(struct ex_regs *regs) -{ - GUEST_FAIL("Unexpected data abort at PC: %lx\n", regs->pc); -} - -static struct kvm_vm *vm_create_with_dabt_handler(struct kvm_vcpu **vcpu, void *guest_code, - handler_fn dabt_handler) -{ - struct kvm_vm *vm = vm_create_with_one_vcpu(vcpu, guest_code); - - vm_init_descriptor_tables(vm); - vcpu_init_descriptor_tables(*vcpu); - vm_install_sync_handler(vm, VECTOR_SYNC_CURRENT, ESR_ELx_EC_DABT_CUR, dabt_handler); - - virt_map(vm, MMIO_ADDR, MMIO_ADDR, 1); - - return vm; -} - -static void vcpu_inject_extabt(struct kvm_vcpu *vcpu) -{ - struct kvm_vcpu_events events = {}; - - events.exception.ext_dabt_pending = true; - vcpu_events_set(vcpu, &events); -} - -static void vcpu_run_expect_done(struct kvm_vcpu *vcpu) -{ - struct ucall uc; - - vcpu_run(vcpu); - switch (get_ucall(vcpu, &uc)) { - case UCALL_ABORT: - REPORT_GUEST_ASSERT(uc); - break; - case UCALL_DONE: - break; - default: - TEST_FAIL("Unexpected ucall: %lu", uc.cmd); - } -} - -extern char test_mmio_abort_insn; - -static void test_mmio_abort_guest(void) -{ - WRITE_ONCE(expected_abort_pc, (u64)&test_mmio_abort_insn); - - asm volatile("test_mmio_abort_insn:\n\t" - "ldr x0, [%0]\n\t" - : : "r" (MMIO_ADDR) : "x0", "memory"); - - GUEST_FAIL("MMIO instruction should not retire"); -} - -/* - * Test that KVM doesn't complete MMIO emulation when userspace has made an - * external abort pending for the instruction. - */ -static void test_mmio_abort(void) -{ - struct kvm_vcpu *vcpu; - struct kvm_vm *vm = vm_create_with_dabt_handler(&vcpu, test_mmio_abort_guest, - expect_sea_handler); - struct kvm_run *run = vcpu->run; - - vcpu_run(vcpu); - TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_MMIO); - TEST_ASSERT_EQ(run->mmio.phys_addr, MMIO_ADDR); - TEST_ASSERT_EQ(run->mmio.len, sizeof(unsigned long)); - TEST_ASSERT(!run->mmio.is_write, "Expected MMIO read"); - - vcpu_inject_extabt(vcpu); - vcpu_run_expect_done(vcpu); - kvm_vm_free(vm); -} - -extern char test_mmio_nisv_insn; - -static void test_mmio_nisv_guest(void) -{ - WRITE_ONCE(expected_abort_pc, (u64)&test_mmio_nisv_insn); - - asm volatile("test_mmio_nisv_insn:\n\t" - "ldr x0, [%0], #8\n\t" - : : "r" (MMIO_ADDR) : "x0", "memory"); - - GUEST_FAIL("MMIO instruction should not retire"); -} - -/* - * Test that the KVM_RUN ioctl fails for ESR_EL2.ISV=0 MMIO aborts if userspace - * hasn't enabled KVM_CAP_ARM_NISV_TO_USER. - */ -static void test_mmio_nisv(void) -{ - struct kvm_vcpu *vcpu; - struct kvm_vm *vm = vm_create_with_dabt_handler(&vcpu, test_mmio_nisv_guest, - unexpected_dabt_handler); - - TEST_ASSERT(_vcpu_run(vcpu), "Expected nonzero return code from KVM_RUN"); - TEST_ASSERT_EQ(errno, ENOSYS); - - kvm_vm_free(vm); -} - -/* - * Test that ESR_EL2.ISV=0 MMIO aborts reach userspace and that an injected SEA - * reaches the guest. - */ -static void test_mmio_nisv_abort(void) -{ - struct kvm_vcpu *vcpu; - struct kvm_vm *vm = vm_create_with_dabt_handler(&vcpu, test_mmio_nisv_guest, - expect_sea_handler); - struct kvm_run *run = vcpu->run; - - vm_enable_cap(vm, KVM_CAP_ARM_NISV_TO_USER, 1); - - vcpu_run(vcpu); - TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_ARM_NISV); - TEST_ASSERT_EQ(run->arm_nisv.fault_ipa, MMIO_ADDR); - - vcpu_inject_extabt(vcpu); - vcpu_run_expect_done(vcpu); - kvm_vm_free(vm); -} - -int main(void) -{ - test_mmio_abort(); - test_mmio_nisv(); - test_mmio_nisv_abort(); -} diff --git a/tools/testing/selftests/kvm/arm64/set_id_regs.c b/tools/testing/selftests/kvm/arm64/set_id_regs.c index 8f422bfdfcb9..d3bf9204409c 100644 --- a/tools/testing/selftests/kvm/arm64/set_id_regs.c +++ b/tools/testing/selftests/kvm/arm64/set_id_regs.c @@ -139,6 +139,7 @@ static const struct reg_ftr_bits ftr_id_aa64pfr0_el1[] = { }; static const struct reg_ftr_bits ftr_id_aa64pfr1_el1[] = { + REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR1_EL1, DF2, 0), REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR1_EL1, CSV2_frac, 0), REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR1_EL1, SSBS, ID_AA64PFR1_EL1_SSBS_NI), REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR1_EL1, BT, 0), @@ -187,6 +188,14 @@ static const struct reg_ftr_bits ftr_id_aa64mmfr2_el1[] = { REG_FTR_END, }; +static const struct reg_ftr_bits ftr_id_aa64mmfr3_el1[] = { + REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR3_EL1, S1POE, 0), + REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR3_EL1, S1PIE, 0), + REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR3_EL1, SCTLRX, 0), + REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR3_EL1, TCRX, 0), + REG_FTR_END, +}; + static const struct reg_ftr_bits ftr_id_aa64zfr0_el1[] = { REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ZFR0_EL1, F64MM, 0), REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ZFR0_EL1, F32MM, 0), @@ -217,6 +226,7 @@ static struct test_feature_reg test_regs[] = { TEST_REG(SYS_ID_AA64MMFR0_EL1, ftr_id_aa64mmfr0_el1), TEST_REG(SYS_ID_AA64MMFR1_EL1, ftr_id_aa64mmfr1_el1), TEST_REG(SYS_ID_AA64MMFR2_EL1, ftr_id_aa64mmfr2_el1), + TEST_REG(SYS_ID_AA64MMFR3_EL1, ftr_id_aa64mmfr3_el1), TEST_REG(SYS_ID_AA64ZFR0_EL1, ftr_id_aa64zfr0_el1), }; @@ -774,8 +784,8 @@ int main(void) ARRAY_SIZE(ftr_id_aa64isar2_el1) + ARRAY_SIZE(ftr_id_aa64pfr0_el1) + ARRAY_SIZE(ftr_id_aa64pfr1_el1) + ARRAY_SIZE(ftr_id_aa64mmfr0_el1) + ARRAY_SIZE(ftr_id_aa64mmfr1_el1) + ARRAY_SIZE(ftr_id_aa64mmfr2_el1) + - ARRAY_SIZE(ftr_id_aa64zfr0_el1) - ARRAY_SIZE(test_regs) + 3 + - MPAM_IDREG_TEST + MTE_IDREG_TEST; + ARRAY_SIZE(ftr_id_aa64mmfr3_el1) + ARRAY_SIZE(ftr_id_aa64zfr0_el1) - + ARRAY_SIZE(test_regs) + 3 + MPAM_IDREG_TEST + MTE_IDREG_TEST; ksft_set_plan(test_cnt); diff --git a/tools/testing/selftests/kvm/arm64/vgic_init.c b/tools/testing/selftests/kvm/arm64/vgic_init.c index b3b5fb0ff0a9..a8e0f46bc0ab 100644 --- a/tools/testing/selftests/kvm/arm64/vgic_init.c +++ b/tools/testing/selftests/kvm/arm64/vgic_init.c @@ -9,17 +9,18 @@ #include <asm/kvm.h> #include <asm/kvm_para.h> +#include <arm64/gic_v3.h> + #include "test_util.h" #include "kvm_util.h" #include "processor.h" #include "vgic.h" +#include "gic_v3.h" #define NR_VCPUS 4 #define REG_OFFSET(vcpu, offset) (((uint64_t)vcpu << 32) | offset) -#define GICR_TYPER 0x8 - #define VGIC_DEV_IS_V2(_d) ((_d) == KVM_DEV_TYPE_ARM_VGIC_V2) #define VGIC_DEV_IS_V3(_d) ((_d) == KVM_DEV_TYPE_ARM_VGIC_V3) @@ -675,6 +676,44 @@ static void test_v3_its_region(void) vm_gic_destroy(&v); } +static void test_v3_nassgicap(void) +{ + struct kvm_vcpu *vcpus[NR_VCPUS]; + bool has_nassgicap; + struct vm_gic vm; + u32 typer2; + int ret; + + vm = vm_gic_create_with_vcpus(KVM_DEV_TYPE_ARM_VGIC_V3, NR_VCPUS, vcpus); + kvm_device_attr_get(vm.gic_fd, KVM_DEV_ARM_VGIC_GRP_DIST_REGS, + GICD_TYPER2, &typer2); + has_nassgicap = typer2 & GICD_TYPER2_nASSGIcap; + + typer2 |= GICD_TYPER2_nASSGIcap; + ret = __kvm_device_attr_set(vm.gic_fd, KVM_DEV_ARM_VGIC_GRP_DIST_REGS, + GICD_TYPER2, &typer2); + if (has_nassgicap) + TEST_ASSERT(!ret, KVM_IOCTL_ERROR(KVM_DEVICE_ATTR_SET, ret)); + else + TEST_ASSERT(ret && errno == EINVAL, + "Enabled nASSGIcap even though it's unavailable"); + + typer2 &= ~GICD_TYPER2_nASSGIcap; + kvm_device_attr_set(vm.gic_fd, KVM_DEV_ARM_VGIC_GRP_DIST_REGS, + GICD_TYPER2, &typer2); + + kvm_device_attr_set(vm.gic_fd, KVM_DEV_ARM_VGIC_GRP_CTRL, + KVM_DEV_ARM_VGIC_CTRL_INIT, NULL); + + typer2 ^= GICD_TYPER2_nASSGIcap; + ret = __kvm_device_attr_set(vm.gic_fd, KVM_DEV_ARM_VGIC_GRP_DIST_REGS, + GICD_TYPER2, &typer2); + TEST_ASSERT(ret && errno == EBUSY, + "Changed nASSGIcap after initializing the VGIC"); + + vm_gic_destroy(&vm); +} + /* * Returns 0 if it's possible to create GIC device of a given type (V2 or V3). */ @@ -715,6 +754,220 @@ int test_kvm_device(uint32_t gic_dev_type) return 0; } +struct sr_def { + const char *name; + u32 encoding; +}; + +#define PACK_SR(r) \ + ((sys_reg_Op0(r) << 14) | \ + (sys_reg_Op1(r) << 11) | \ + (sys_reg_CRn(r) << 7) | \ + (sys_reg_CRm(r) << 3) | \ + (sys_reg_Op2(r))) + +#define SR(r) \ + { \ + .name = #r, \ + .encoding = r, \ + } + +static const struct sr_def sysregs_el1[] = { + SR(SYS_ICC_PMR_EL1), + SR(SYS_ICC_BPR0_EL1), + SR(SYS_ICC_AP0R0_EL1), + SR(SYS_ICC_AP0R1_EL1), + SR(SYS_ICC_AP0R2_EL1), + SR(SYS_ICC_AP0R3_EL1), + SR(SYS_ICC_AP1R0_EL1), + SR(SYS_ICC_AP1R1_EL1), + SR(SYS_ICC_AP1R2_EL1), + SR(SYS_ICC_AP1R3_EL1), + SR(SYS_ICC_BPR1_EL1), + SR(SYS_ICC_CTLR_EL1), + SR(SYS_ICC_SRE_EL1), + SR(SYS_ICC_IGRPEN0_EL1), + SR(SYS_ICC_IGRPEN1_EL1), +}; + +static const struct sr_def sysregs_el2[] = { + SR(SYS_ICH_AP0R0_EL2), + SR(SYS_ICH_AP0R1_EL2), + SR(SYS_ICH_AP0R2_EL2), + SR(SYS_ICH_AP0R3_EL2), + SR(SYS_ICH_AP1R0_EL2), + SR(SYS_ICH_AP1R1_EL2), + SR(SYS_ICH_AP1R2_EL2), + SR(SYS_ICH_AP1R3_EL2), + SR(SYS_ICH_HCR_EL2), + SR(SYS_ICC_SRE_EL2), + SR(SYS_ICH_VTR_EL2), + SR(SYS_ICH_VMCR_EL2), + SR(SYS_ICH_LR0_EL2), + SR(SYS_ICH_LR1_EL2), + SR(SYS_ICH_LR2_EL2), + SR(SYS_ICH_LR3_EL2), + SR(SYS_ICH_LR4_EL2), + SR(SYS_ICH_LR5_EL2), + SR(SYS_ICH_LR6_EL2), + SR(SYS_ICH_LR7_EL2), + SR(SYS_ICH_LR8_EL2), + SR(SYS_ICH_LR9_EL2), + SR(SYS_ICH_LR10_EL2), + SR(SYS_ICH_LR11_EL2), + SR(SYS_ICH_LR12_EL2), + SR(SYS_ICH_LR13_EL2), + SR(SYS_ICH_LR14_EL2), + SR(SYS_ICH_LR15_EL2), +}; + +static void test_sysreg_array(int gic, const struct sr_def *sr, int nr, + int (*check)(int, const struct sr_def *, const char *)) +{ + for (int i = 0; i < nr; i++) { + u64 val; + u64 attr; + int ret; + + /* Assume MPIDR_EL1.Aff*=0 */ + attr = PACK_SR(sr[i].encoding); + + /* + * The API is braindead. A register can be advertised as + * available, and yet not be readable or writable. + * ICC_APnR{1,2,3}_EL1 are examples of such non-sense, and + * ICH_APnR{1,2,3}_EL2 do follow suit for consistency. + * + * On the bright side, no known HW is implementing more than + * 5 bits of priority, so we're safe. Sort of... + */ + ret = __kvm_has_device_attr(gic, KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS, + attr); + TEST_ASSERT(ret == 0, "%s unavailable", sr[i].name); + + /* Check that we can write back what we read */ + ret = __kvm_device_attr_get(gic, KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS, + attr, &val); + TEST_ASSERT(ret == 0 || !check(gic, &sr[i], "read"), "%s unreadable", sr[i].name); + ret = __kvm_device_attr_set(gic, KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS, + attr, &val); + TEST_ASSERT(ret == 0 || !check(gic, &sr[i], "write"), "%s unwritable", sr[i].name); + } +} + +static u8 get_ctlr_pribits(int gic) +{ + int ret; + u64 val; + u8 pri; + + ret = __kvm_device_attr_get(gic, KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS, + PACK_SR(SYS_ICC_CTLR_EL1), &val); + TEST_ASSERT(ret == 0, "ICC_CTLR_EL1 unreadable"); + + pri = FIELD_GET(ICC_CTLR_EL1_PRI_BITS_MASK, val) + 1; + TEST_ASSERT(pri >= 5 && pri <= 7, "Bad pribits %d", pri); + + return pri; +} + +static int check_unaccessible_el1_regs(int gic, const struct sr_def *sr, const char *what) +{ + switch (sr->encoding) { + case SYS_ICC_AP0R1_EL1: + case SYS_ICC_AP1R1_EL1: + if (get_ctlr_pribits(gic) >= 6) + return -EINVAL; + break; + case SYS_ICC_AP0R2_EL1: + case SYS_ICC_AP0R3_EL1: + case SYS_ICC_AP1R2_EL1: + case SYS_ICC_AP1R3_EL1: + if (get_ctlr_pribits(gic) == 7) + return 0; + break; + default: + return -EINVAL; + } + + pr_info("SKIP %s for %s\n", sr->name, what); + return 0; +} + +static u8 get_vtr_pribits(int gic) +{ + int ret; + u64 val; + u8 pri; + + ret = __kvm_device_attr_get(gic, KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS, + PACK_SR(SYS_ICH_VTR_EL2), &val); + TEST_ASSERT(ret == 0, "ICH_VTR_EL2 unreadable"); + + pri = FIELD_GET(ICH_VTR_EL2_PRIbits, val) + 1; + TEST_ASSERT(pri >= 5 && pri <= 7, "Bad pribits %d", pri); + + return pri; +} + +static int check_unaccessible_el2_regs(int gic, const struct sr_def *sr, const char *what) +{ + switch (sr->encoding) { + case SYS_ICH_AP0R1_EL2: + case SYS_ICH_AP1R1_EL2: + if (get_vtr_pribits(gic) >= 6) + return -EINVAL; + break; + case SYS_ICH_AP0R2_EL2: + case SYS_ICH_AP0R3_EL2: + case SYS_ICH_AP1R2_EL2: + case SYS_ICH_AP1R3_EL2: + if (get_vtr_pribits(gic) == 7) + return -EINVAL; + break; + default: + return -EINVAL; + } + + pr_info("SKIP %s for %s\n", sr->name, what); + return 0; +} + +static void test_v3_sysregs(void) +{ + struct kvm_vcpu_init init = {}; + struct kvm_vcpu *vcpu; + struct kvm_vm *vm; + u32 feat = 0; + int gic; + + if (kvm_check_cap(KVM_CAP_ARM_EL2)) + feat |= BIT(KVM_ARM_VCPU_HAS_EL2); + + vm = vm_create(1); + + vm_ioctl(vm, KVM_ARM_PREFERRED_TARGET, &init); + init.features[0] |= feat; + + vcpu = aarch64_vcpu_add(vm, 0, &init, NULL); + TEST_ASSERT(vcpu, "Can't create a vcpu?"); + + gic = kvm_create_device(vm, KVM_DEV_TYPE_ARM_VGIC_V3); + TEST_ASSERT(gic >= 0, "No GIC???"); + + kvm_device_attr_set(gic, KVM_DEV_ARM_VGIC_GRP_CTRL, + KVM_DEV_ARM_VGIC_CTRL_INIT, NULL); + + test_sysreg_array(gic, sysregs_el1, ARRAY_SIZE(sysregs_el1), check_unaccessible_el1_regs); + if (feat) + test_sysreg_array(gic, sysregs_el2, ARRAY_SIZE(sysregs_el2), check_unaccessible_el2_regs); + else + pr_info("SKIP EL2 registers, not available\n"); + + close(gic); + kvm_vm_free(vm); +} + void run_tests(uint32_t gic_dev_type) { test_vcpus_then_vgic(gic_dev_type); @@ -730,6 +983,8 @@ void run_tests(uint32_t gic_dev_type) test_v3_last_bit_single_rdist(); test_v3_redist_ipa_range_check_at_vcpu_run(); test_v3_its_region(); + test_v3_sysregs(); + test_v3_nassgicap(); } } diff --git a/tools/testing/selftests/kvm/arm64/vgic_irq.c b/tools/testing/selftests/kvm/arm64/vgic_irq.c index f4ac28d53747..a09dd423c2d7 100644 --- a/tools/testing/selftests/kvm/arm64/vgic_irq.c +++ b/tools/testing/selftests/kvm/arm64/vgic_irq.c @@ -620,18 +620,12 @@ static void kvm_routing_and_irqfd_check(struct kvm_vm *vm, * that no actual interrupt was injected for those cases. */ - for (f = 0, i = intid; i < (uint64_t)intid + num; i++, f++) { - fd[f] = eventfd(0, 0); - TEST_ASSERT(fd[f] != -1, __KVM_SYSCALL_ERROR("eventfd()", fd[f])); - } + for (f = 0, i = intid; i < (uint64_t)intid + num; i++, f++) + fd[f] = kvm_new_eventfd(); for (f = 0, i = intid; i < (uint64_t)intid + num; i++, f++) { - struct kvm_irqfd irqfd = { - .fd = fd[f], - .gsi = i - MIN_SPI, - }; assert(i <= (uint64_t)UINT_MAX); - vm_ioctl(vm, KVM_IRQFD, &irqfd); + kvm_assign_irqfd(vm, i - MIN_SPI, fd[f]); } for (f = 0, i = intid; i < (uint64_t)intid + num; i++, f++) { diff --git a/tools/testing/selftests/kvm/config b/tools/testing/selftests/kvm/config index 8835fed09e9f..96d874b239eb 100644 --- a/tools/testing/selftests/kvm/config +++ b/tools/testing/selftests/kvm/config @@ -1,5 +1,6 @@ CONFIG_KVM=y CONFIG_KVM_INTEL=y CONFIG_KVM_AMD=y +CONFIG_EVENTFD=y CONFIG_USERFAULTFD=y CONFIG_IDLE_PAGE_TRACKING=y diff --git a/tools/testing/selftests/kvm/include/arm64/processor.h b/tools/testing/selftests/kvm/include/arm64/processor.h index b0fc0f945766..255fed769a8a 100644 --- a/tools/testing/selftests/kvm/include/arm64/processor.h +++ b/tools/testing/selftests/kvm/include/arm64/processor.h @@ -254,6 +254,16 @@ static inline void local_irq_disable(void) asm volatile("msr daifset, #3" : : : "memory"); } +static inline void local_serror_enable(void) +{ + asm volatile("msr daifclr, #4" : : : "memory"); +} + +static inline void local_serror_disable(void) +{ + asm volatile("msr daifset, #4" : : : "memory"); +} + /** * struct arm_smccc_res - Result from SMC/HVC call * @a0-a3 result values from registers 0 to 3 diff --git a/tools/testing/selftests/kvm/include/kvm_util.h b/tools/testing/selftests/kvm/include/kvm_util.h index bee65ca08721..23a506d7eca3 100644 --- a/tools/testing/selftests/kvm/include/kvm_util.h +++ b/tools/testing/selftests/kvm/include/kvm_util.h @@ -18,8 +18,11 @@ #include <asm/atomic.h> #include <asm/kvm.h> +#include <sys/eventfd.h> #include <sys/ioctl.h> +#include <pthread.h> + #include "kvm_util_arch.h" #include "kvm_util_types.h" #include "sparsebit.h" @@ -253,6 +256,7 @@ struct vm_guest_mode_params { }; extern const struct vm_guest_mode_params vm_guest_mode_params[]; +int __open_path_or_exit(const char *path, int flags, const char *enoent_help); int open_path_or_exit(const char *path, int flags); int open_kvm_dev_path_or_exit(void); @@ -502,6 +506,45 @@ static inline int vm_get_stats_fd(struct kvm_vm *vm) return fd; } +static inline int __kvm_irqfd(struct kvm_vm *vm, uint32_t gsi, int eventfd, + uint32_t flags) +{ + struct kvm_irqfd irqfd = { + .fd = eventfd, + .gsi = gsi, + .flags = flags, + .resamplefd = -1, + }; + + return __vm_ioctl(vm, KVM_IRQFD, &irqfd); +} + +static inline void kvm_irqfd(struct kvm_vm *vm, uint32_t gsi, int eventfd, + uint32_t flags) +{ + int ret = __kvm_irqfd(vm, gsi, eventfd, flags); + + TEST_ASSERT_VM_VCPU_IOCTL(!ret, KVM_IRQFD, ret, vm); +} + +static inline void kvm_assign_irqfd(struct kvm_vm *vm, uint32_t gsi, int eventfd) +{ + kvm_irqfd(vm, gsi, eventfd, 0); +} + +static inline void kvm_deassign_irqfd(struct kvm_vm *vm, uint32_t gsi, int eventfd) +{ + kvm_irqfd(vm, gsi, eventfd, KVM_IRQFD_FLAG_DEASSIGN); +} + +static inline int kvm_new_eventfd(void) +{ + int fd = eventfd(0, 0); + + TEST_ASSERT(fd >= 0, __KVM_SYSCALL_ERROR("eventfd()", fd)); + return fd; +} + static inline void read_stats_header(int stats_fd, struct kvm_stats_header *header) { ssize_t ret; @@ -1013,7 +1056,34 @@ struct kvm_vcpu *vm_recreate_with_one_vcpu(struct kvm_vm *vm); void kvm_set_files_rlimit(uint32_t nr_vcpus); -void kvm_pin_this_task_to_pcpu(uint32_t pcpu); +int __pin_task_to_cpu(pthread_t task, int cpu); + +static inline void pin_task_to_cpu(pthread_t task, int cpu) +{ + int r; + + r = __pin_task_to_cpu(task, cpu); + TEST_ASSERT(!r, "Failed to set thread affinity to pCPU '%u'", cpu); +} + +static inline int pin_task_to_any_cpu(pthread_t task) +{ + int cpu = sched_getcpu(); + + pin_task_to_cpu(task, cpu); + return cpu; +} + +static inline void pin_self_to_cpu(int cpu) +{ + pin_task_to_cpu(pthread_self(), cpu); +} + +static inline int pin_self_to_any_cpu(void) +{ + return pin_task_to_any_cpu(pthread_self()); +} + void kvm_print_vcpu_pinning_help(void); void kvm_parse_vcpu_pinning(const char *pcpus_string, uint32_t vcpu_to_pcpu[], int nr_vcpus); diff --git a/tools/testing/selftests/kvm/include/x86/processor.h b/tools/testing/selftests/kvm/include/x86/processor.h index b11b5a53ebd5..2efb05c2f2fb 100644 --- a/tools/testing/selftests/kvm/include/x86/processor.h +++ b/tools/testing/selftests/kvm/include/x86/processor.h @@ -1150,7 +1150,6 @@ do { \ void kvm_get_cpu_address_width(unsigned int *pa_bits, unsigned int *va_bits); void kvm_init_vm_address_properties(struct kvm_vm *vm); -bool vm_is_unrestricted_guest(struct kvm_vm *vm); struct ex_regs { uint64_t rax, rcx, rdx, rbx; @@ -1325,6 +1324,11 @@ static inline bool kvm_is_forced_emulation_enabled(void) return !!get_kvm_param_integer("force_emulation_prefix"); } +static inline bool kvm_is_unrestricted_guest_enabled(void) +{ + return get_kvm_intel_param_bool("unrestricted_guest"); +} + uint64_t *__vm_get_page_table_entry(struct kvm_vm *vm, uint64_t vaddr, int *level); uint64_t *vm_get_page_table_entry(struct kvm_vm *vm, uint64_t vaddr); diff --git a/tools/testing/selftests/kvm/irqfd_test.c b/tools/testing/selftests/kvm/irqfd_test.c new file mode 100644 index 000000000000..7c301b4c7005 --- /dev/null +++ b/tools/testing/selftests/kvm/irqfd_test.c @@ -0,0 +1,135 @@ +// SPDX-License-Identifier: GPL-2.0-only +#include <errno.h> +#include <pthread.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <signal.h> +#include <stdint.h> +#include <sys/sysinfo.h> + +#include "kvm_util.h" + +static struct kvm_vm *vm1; +static struct kvm_vm *vm2; +static int __eventfd; +static bool done; + +/* + * KVM de-assigns based on eventfd *and* GSI, but requires unique eventfds when + * assigning (the API isn't symmetrical). Abuse the oddity and use a per-task + * GSI base to avoid false failures due to cross-task de-assign, i.e. so that + * the secondary doesn't de-assign the primary's eventfd and cause assign to + * unexpectedly succeed on the primary. + */ +#define GSI_BASE_PRIMARY 0x20 +#define GSI_BASE_SECONDARY 0x30 + +static void juggle_eventfd_secondary(struct kvm_vm *vm, int eventfd) +{ + int r, i; + + /* + * The secondary task can encounter EBADF since the primary can close + * the eventfd at any time. And because the primary can recreate the + * eventfd, at the safe fd in the file table, the secondary can also + * encounter "unexpected" success, e.g. if the close+recreate happens + * between the first and second assignments. The secondary's role is + * mostly to antagonize KVM, not to detect bugs. + */ + for (i = 0; i < 2; i++) { + r = __kvm_irqfd(vm, GSI_BASE_SECONDARY, eventfd, 0); + TEST_ASSERT(!r || errno == EBUSY || errno == EBADF, + "Wanted success, EBUSY, or EBADF, r = %d, errno = %d", + r, errno); + + /* De-assign should succeed unless the eventfd was closed. */ + r = __kvm_irqfd(vm, GSI_BASE_SECONDARY + i, eventfd, KVM_IRQFD_FLAG_DEASSIGN); + TEST_ASSERT(!r || errno == EBADF, + "De-assign should succeed unless the fd was closed"); + } +} + +static void *secondary_irqfd_juggler(void *ign) +{ + while (!READ_ONCE(done)) { + juggle_eventfd_secondary(vm1, READ_ONCE(__eventfd)); + juggle_eventfd_secondary(vm2, READ_ONCE(__eventfd)); + } + + return NULL; +} + +static void juggle_eventfd_primary(struct kvm_vm *vm, int eventfd) +{ + int r1, r2; + + /* + * At least one of the assigns should fail. KVM disallows assigning a + * single eventfd to multiple GSIs (or VMs), so it's possible that both + * assignments can fail, too. + */ + r1 = __kvm_irqfd(vm, GSI_BASE_PRIMARY, eventfd, 0); + TEST_ASSERT(!r1 || errno == EBUSY, + "Wanted success or EBUSY, r = %d, errno = %d", r1, errno); + + r2 = __kvm_irqfd(vm, GSI_BASE_PRIMARY + 1, eventfd, 0); + TEST_ASSERT(r1 || (r2 && errno == EBUSY), + "Wanted failure (EBUSY), r1 = %d, r2 = %d, errno = %d", + r1, r2, errno); + + /* + * De-assign should always succeed, even if the corresponding assign + * failed. + */ + kvm_irqfd(vm, GSI_BASE_PRIMARY, eventfd, KVM_IRQFD_FLAG_DEASSIGN); + kvm_irqfd(vm, GSI_BASE_PRIMARY + 1, eventfd, KVM_IRQFD_FLAG_DEASSIGN); +} + +int main(int argc, char *argv[]) +{ + pthread_t racing_thread; + int r, i; + + /* Create "full" VMs, as KVM_IRQFD requires an in-kernel IRQ chip. */ + vm1 = vm_create(1); + vm2 = vm_create(1); + + WRITE_ONCE(__eventfd, kvm_new_eventfd()); + + kvm_irqfd(vm1, 10, __eventfd, 0); + + r = __kvm_irqfd(vm1, 11, __eventfd, 0); + TEST_ASSERT(r && errno == EBUSY, + "Wanted EBUSY, r = %d, errno = %d", r, errno); + + r = __kvm_irqfd(vm2, 12, __eventfd, 0); + TEST_ASSERT(r && errno == EBUSY, + "Wanted EBUSY, r = %d, errno = %d", r, errno); + + /* + * De-assign all eventfds, along with multiple eventfds that were never + * assigned. KVM's ABI is that de-assign is allowed so long as the + * eventfd itself is valid. + */ + kvm_irqfd(vm1, 11, READ_ONCE(__eventfd), KVM_IRQFD_FLAG_DEASSIGN); + kvm_irqfd(vm1, 12, READ_ONCE(__eventfd), KVM_IRQFD_FLAG_DEASSIGN); + kvm_irqfd(vm1, 13, READ_ONCE(__eventfd), KVM_IRQFD_FLAG_DEASSIGN); + kvm_irqfd(vm1, 14, READ_ONCE(__eventfd), KVM_IRQFD_FLAG_DEASSIGN); + kvm_irqfd(vm1, 10, READ_ONCE(__eventfd), KVM_IRQFD_FLAG_DEASSIGN); + + close(__eventfd); + + pthread_create(&racing_thread, NULL, secondary_irqfd_juggler, vm2); + + for (i = 0; i < 10000; i++) { + WRITE_ONCE(__eventfd, kvm_new_eventfd()); + + juggle_eventfd_primary(vm1, __eventfd); + juggle_eventfd_primary(vm2, __eventfd); + close(__eventfd); + } + + WRITE_ONCE(done, true); + pthread_join(racing_thread, NULL); +} diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c index a055343a7bf7..c3f5142b0a54 100644 --- a/tools/testing/selftests/kvm/lib/kvm_util.c +++ b/tools/testing/selftests/kvm/lib/kvm_util.c @@ -26,15 +26,27 @@ static uint32_t last_guest_seed; static int vcpu_mmap_sz(void); -int open_path_or_exit(const char *path, int flags) +int __open_path_or_exit(const char *path, int flags, const char *enoent_help) { int fd; fd = open(path, flags); - __TEST_REQUIRE(fd >= 0 || errno != ENOENT, "Cannot open %s: %s", path, strerror(errno)); - TEST_ASSERT(fd >= 0, "Failed to open '%s'", path); + if (fd < 0) + goto error; return fd; + +error: + if (errno == EACCES || errno == ENOENT) + ksft_exit_skip("- Cannot open '%s': %s. %s\n", + path, strerror(errno), + errno == EACCES ? "Root required?" : enoent_help); + TEST_FAIL("Failed to open '%s'", path); +} + +int open_path_or_exit(const char *path, int flags) +{ + return __open_path_or_exit(path, flags, ""); } /* @@ -48,7 +60,7 @@ int open_path_or_exit(const char *path, int flags) */ static int _open_kvm_dev_path_or_exit(int flags) { - return open_path_or_exit(KVM_DEV_PATH, flags); + return __open_path_or_exit(KVM_DEV_PATH, flags, "Is KVM loaded and enabled?"); } int open_kvm_dev_path_or_exit(void) @@ -64,6 +76,9 @@ static ssize_t get_module_param(const char *module_name, const char *param, ssize_t bytes_read; int fd, r; + /* Verify KVM is loaded, to provide a more helpful SKIP message. */ + close(open_kvm_dev_path_or_exit()); + r = snprintf(path, path_size, "/sys/module/%s/parameters/%s", module_name, param); TEST_ASSERT(r < path_size, @@ -605,15 +620,14 @@ struct kvm_vcpu *vm_recreate_with_one_vcpu(struct kvm_vm *vm) return vm_vcpu_recreate(vm, 0); } -void kvm_pin_this_task_to_pcpu(uint32_t pcpu) +int __pin_task_to_cpu(pthread_t task, int cpu) { - cpu_set_t mask; - int r; + cpu_set_t cpuset; + + CPU_ZERO(&cpuset); + CPU_SET(cpu, &cpuset); - CPU_ZERO(&mask); - CPU_SET(pcpu, &mask); - r = sched_setaffinity(0, sizeof(mask), &mask); - TEST_ASSERT(!r, "sched_setaffinity() failed for pCPU '%u'.", pcpu); + return pthread_setaffinity_np(task, sizeof(cpuset), &cpuset); } static uint32_t parse_pcpu(const char *cpu_str, const cpu_set_t *allowed_mask) @@ -667,7 +681,7 @@ void kvm_parse_vcpu_pinning(const char *pcpus_string, uint32_t vcpu_to_pcpu[], /* 2. Check if the main worker needs to be pinned. */ if (cpu) { - kvm_pin_this_task_to_pcpu(parse_pcpu(cpu, &allowed_mask)); + pin_self_to_cpu(parse_pcpu(cpu, &allowed_mask)); cpu = strtok(NULL, delim); } @@ -1716,7 +1730,18 @@ void *addr_gpa2alias(struct kvm_vm *vm, vm_paddr_t gpa) /* Create an interrupt controller chip for the specified VM. */ void vm_create_irqchip(struct kvm_vm *vm) { - vm_ioctl(vm, KVM_CREATE_IRQCHIP, NULL); + int r; + + /* + * Allocate a fully in-kernel IRQ chip by default, but fall back to a + * split model (x86 only) if that fails (KVM x86 allows compiling out + * support for KVM_CREATE_IRQCHIP). + */ + r = __vm_ioctl(vm, KVM_CREATE_IRQCHIP, NULL); + if (r && errno == ENOTTY && kvm_has_cap(KVM_CAP_SPLIT_IRQCHIP)) + vm_enable_cap(vm, KVM_CAP_SPLIT_IRQCHIP, 24); + else + TEST_ASSERT_VM_VCPU_IOCTL(!r, KVM_CREATE_IRQCHIP, r, vm); vm->has_irqchip = true; } diff --git a/tools/testing/selftests/kvm/lib/memstress.c b/tools/testing/selftests/kvm/lib/memstress.c index 313277486a1d..557c0a0a5658 100644 --- a/tools/testing/selftests/kvm/lib/memstress.c +++ b/tools/testing/selftests/kvm/lib/memstress.c @@ -265,7 +265,7 @@ static void *vcpu_thread_main(void *data) int vcpu_idx = vcpu->vcpu_idx; if (memstress_args.pin_vcpus) - kvm_pin_this_task_to_pcpu(memstress_args.vcpu_to_pcpu[vcpu_idx]); + pin_self_to_cpu(memstress_args.vcpu_to_pcpu[vcpu_idx]); WRITE_ONCE(vcpu->running, true); diff --git a/tools/testing/selftests/kvm/lib/sparsebit.c b/tools/testing/selftests/kvm/lib/sparsebit.c index cfed9d26cc71..a99188f87a38 100644 --- a/tools/testing/selftests/kvm/lib/sparsebit.c +++ b/tools/testing/selftests/kvm/lib/sparsebit.c @@ -116,7 +116,7 @@ * * + A node with all mask bits set only occurs when the last bit * described by the previous node is not equal to this nodes - * starting index - 1. All such occurences of this condition are + * starting index - 1. All such occurrences of this condition are * avoided by moving the setting of the nodes mask bits into * the previous nodes num_after setting. * @@ -592,7 +592,7 @@ static struct node *node_split(struct sparsebit *s, sparsebit_idx_t idx) * * + A node with all mask bits set only occurs when the last bit * described by the previous node is not equal to this nodes - * starting index - 1. All such occurences of this condition are + * starting index - 1. All such occurrences of this condition are * avoided by moving the setting of the nodes mask bits into * the previous nodes num_after setting. */ diff --git a/tools/testing/selftests/kvm/lib/x86/processor.c b/tools/testing/selftests/kvm/lib/x86/processor.c index a92dc1dad085..d4c19ac885a9 100644 --- a/tools/testing/selftests/kvm/lib/x86/processor.c +++ b/tools/testing/selftests/kvm/lib/x86/processor.c @@ -1264,16 +1264,6 @@ done: return min(max_gfn, ht_gfn - 1); } -/* Returns true if kvm_intel was loaded with unrestricted_guest=1. */ -bool vm_is_unrestricted_guest(struct kvm_vm *vm) -{ - /* Ensure that a KVM vendor-specific module is loaded. */ - if (vm == NULL) - close(open_kvm_dev_path_or_exit()); - - return get_kvm_intel_param_bool("unrestricted_guest"); -} - void kvm_selftest_arch_init(void) { host_cpu_is_intel = this_cpu_is_intel(); diff --git a/tools/testing/selftests/kvm/x86/aperfmperf_test.c b/tools/testing/selftests/kvm/x86/aperfmperf_test.c new file mode 100644 index 000000000000..8b15a13df939 --- /dev/null +++ b/tools/testing/selftests/kvm/x86/aperfmperf_test.c @@ -0,0 +1,213 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Test for KVM_X86_DISABLE_EXITS_APERFMPERF + * + * Copyright (C) 2025, Google LLC. + * + * Test the ability to disable VM-exits for rdmsr of IA32_APERF and + * IA32_MPERF. When these VM-exits are disabled, reads of these MSRs + * return the host's values. + * + * Note: Requires read access to /dev/cpu/<lpu>/msr to read host MSRs. + */ + +#include <fcntl.h> +#include <limits.h> +#include <stdbool.h> +#include <stdio.h> +#include <stdint.h> +#include <unistd.h> +#include <asm/msr-index.h> + +#include "kvm_util.h" +#include "processor.h" +#include "svm_util.h" +#include "test_util.h" +#include "vmx.h" + +#define NUM_ITERATIONS 10000 + +static int open_dev_msr(int cpu) +{ + char path[PATH_MAX]; + + snprintf(path, sizeof(path), "/dev/cpu/%d/msr", cpu); + return open_path_or_exit(path, O_RDONLY); +} + +static uint64_t read_dev_msr(int msr_fd, uint32_t msr) +{ + uint64_t data; + ssize_t rc; + + rc = pread(msr_fd, &data, sizeof(data), msr); + TEST_ASSERT(rc == sizeof(data), "Read of MSR 0x%x failed", msr); + + return data; +} + +static void guest_read_aperf_mperf(void) +{ + int i; + + for (i = 0; i < NUM_ITERATIONS; i++) + GUEST_SYNC2(rdmsr(MSR_IA32_APERF), rdmsr(MSR_IA32_MPERF)); +} + +#define L2_GUEST_STACK_SIZE 64 + +static void l2_guest_code(void) +{ + guest_read_aperf_mperf(); + GUEST_DONE(); +} + +static void l1_svm_code(struct svm_test_data *svm) +{ + unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE]; + struct vmcb *vmcb = svm->vmcb; + + generic_svm_setup(svm, l2_guest_code, &l2_guest_stack[L2_GUEST_STACK_SIZE]); + run_guest(vmcb, svm->vmcb_gpa); +} + +static void l1_vmx_code(struct vmx_pages *vmx) +{ + unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE]; + + GUEST_ASSERT_EQ(prepare_for_vmx_operation(vmx), true); + GUEST_ASSERT_EQ(load_vmcs(vmx), true); + + prepare_vmcs(vmx, NULL, &l2_guest_stack[L2_GUEST_STACK_SIZE]); + + /* + * Enable MSR bitmaps (the bitmap itself is allocated, zeroed, and set + * in the VMCS by prepare_vmcs()), as MSR exiting mandatory on Intel. + */ + vmwrite(CPU_BASED_VM_EXEC_CONTROL, + vmreadz(CPU_BASED_VM_EXEC_CONTROL) | CPU_BASED_USE_MSR_BITMAPS); + + GUEST_ASSERT(!vmwrite(GUEST_RIP, (u64)l2_guest_code)); + GUEST_ASSERT(!vmlaunch()); +} + +static void guest_code(void *nested_test_data) +{ + guest_read_aperf_mperf(); + + if (this_cpu_has(X86_FEATURE_SVM)) + l1_svm_code(nested_test_data); + else if (this_cpu_has(X86_FEATURE_VMX)) + l1_vmx_code(nested_test_data); + else + GUEST_DONE(); + + TEST_FAIL("L2 should have signaled 'done'"); +} + +static void guest_no_aperfmperf(void) +{ + uint64_t msr_val; + uint8_t vector; + + vector = rdmsr_safe(MSR_IA32_APERF, &msr_val); + GUEST_ASSERT(vector == GP_VECTOR); + + vector = rdmsr_safe(MSR_IA32_APERF, &msr_val); + GUEST_ASSERT(vector == GP_VECTOR); + + GUEST_DONE(); +} + +int main(int argc, char *argv[]) +{ + const bool has_nested = kvm_cpu_has(X86_FEATURE_SVM) || kvm_cpu_has(X86_FEATURE_VMX); + uint64_t host_aperf_before, host_mperf_before; + vm_vaddr_t nested_test_data_gva; + struct kvm_vcpu *vcpu; + struct kvm_vm *vm; + int msr_fd, cpu, i; + + /* Sanity check that APERF/MPERF are unsupported by default. */ + vm = vm_create_with_one_vcpu(&vcpu, guest_no_aperfmperf); + vcpu_run(vcpu); + TEST_ASSERT_EQ(get_ucall(vcpu, NULL), UCALL_DONE); + kvm_vm_free(vm); + + cpu = pin_self_to_any_cpu(); + + msr_fd = open_dev_msr(cpu); + + /* + * This test requires a non-standard VM initialization, because + * KVM_ENABLE_CAP cannot be used on a VM file descriptor after + * a VCPU has been created. + */ + vm = vm_create(1); + + TEST_REQUIRE(vm_check_cap(vm, KVM_CAP_X86_DISABLE_EXITS) & + KVM_X86_DISABLE_EXITS_APERFMPERF); + + vm_enable_cap(vm, KVM_CAP_X86_DISABLE_EXITS, + KVM_X86_DISABLE_EXITS_APERFMPERF); + + vcpu = vm_vcpu_add(vm, 0, guest_code); + + if (!has_nested) + nested_test_data_gva = NONCANONICAL; + else if (kvm_cpu_has(X86_FEATURE_SVM)) + vcpu_alloc_svm(vm, &nested_test_data_gva); + else + vcpu_alloc_vmx(vm, &nested_test_data_gva); + + vcpu_args_set(vcpu, 1, nested_test_data_gva); + + host_aperf_before = read_dev_msr(msr_fd, MSR_IA32_APERF); + host_mperf_before = read_dev_msr(msr_fd, MSR_IA32_MPERF); + + for (i = 0; i <= NUM_ITERATIONS * (1 + has_nested); i++) { + uint64_t host_aperf_after, host_mperf_after; + uint64_t guest_aperf, guest_mperf; + struct ucall uc; + + vcpu_run(vcpu); + TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO); + + switch (get_ucall(vcpu, &uc)) { + case UCALL_DONE: + goto done; + case UCALL_ABORT: + REPORT_GUEST_ASSERT(uc); + case UCALL_SYNC: + guest_aperf = uc.args[0]; + guest_mperf = uc.args[1]; + + host_aperf_after = read_dev_msr(msr_fd, MSR_IA32_APERF); + host_mperf_after = read_dev_msr(msr_fd, MSR_IA32_MPERF); + + TEST_ASSERT(host_aperf_before < guest_aperf, + "APERF: host_before (0x%" PRIx64 ") >= guest (0x%" PRIx64 ")", + host_aperf_before, guest_aperf); + TEST_ASSERT(guest_aperf < host_aperf_after, + "APERF: guest (0x%" PRIx64 ") >= host_after (0x%" PRIx64 ")", + guest_aperf, host_aperf_after); + TEST_ASSERT(host_mperf_before < guest_mperf, + "MPERF: host_before (0x%" PRIx64 ") >= guest (0x%" PRIx64 ")", + host_mperf_before, guest_mperf); + TEST_ASSERT(guest_mperf < host_mperf_after, + "MPERF: guest (0x%" PRIx64 ") >= host_after (0x%" PRIx64 ")", + guest_mperf, host_mperf_after); + + host_aperf_before = host_aperf_after; + host_mperf_before = host_mperf_after; + + break; + } + } + TEST_FAIL("Didn't receive UCALL_DONE\n"); +done: + kvm_vm_free(vm); + close(msr_fd); + + return 0; +} diff --git a/tools/testing/selftests/kvm/x86/monitor_mwait_test.c b/tools/testing/selftests/kvm/x86/monitor_mwait_test.c index 390ae2d87493..0eb371c62ab8 100644 --- a/tools/testing/selftests/kvm/x86/monitor_mwait_test.c +++ b/tools/testing/selftests/kvm/x86/monitor_mwait_test.c @@ -74,6 +74,7 @@ int main(int argc, char *argv[]) int testcase; char test[80]; + TEST_REQUIRE(this_cpu_has(X86_FEATURE_MWAIT)); TEST_REQUIRE(kvm_has_cap(KVM_CAP_DISABLE_QUIRKS2)); ksft_print_header(); diff --git a/tools/testing/selftests/kvm/x86/userspace_msr_exit_test.c b/tools/testing/selftests/kvm/x86/userspace_msr_exit_test.c index 32b2794b78fe..8463a9956410 100644 --- a/tools/testing/selftests/kvm/x86/userspace_msr_exit_test.c +++ b/tools/testing/selftests/kvm/x86/userspace_msr_exit_test.c @@ -343,6 +343,12 @@ static void guest_code_permission_bitmap(void) data = test_rdmsr(MSR_GS_BASE); GUEST_ASSERT(data == MSR_GS_BASE); + /* Access the MSRs again to ensure KVM has disabled interception.*/ + data = test_rdmsr(MSR_FS_BASE); + GUEST_ASSERT(data != MSR_FS_BASE); + data = test_rdmsr(MSR_GS_BASE); + GUEST_ASSERT(data != MSR_GS_BASE); + GUEST_DONE(); } @@ -682,6 +688,8 @@ KVM_ONE_VCPU_TEST(user_msr, msr_permission_bitmap, guest_code_permission_bitmap) "Expected ucall state to be UCALL_SYNC."); vm_ioctl(vm, KVM_X86_SET_MSR_FILTER, &filter_gs); run_guest_then_process_rdmsr(vcpu, MSR_GS_BASE); + + vm_ioctl(vm, KVM_X86_SET_MSR_FILTER, &filter_allow); run_guest_then_process_ucall_done(vcpu); } diff --git a/tools/testing/selftests/kvm/x86/vmx_exception_with_invalid_guest_state.c b/tools/testing/selftests/kvm/x86/vmx_exception_with_invalid_guest_state.c index 3fd6eceab46f..2cae86d9d5e2 100644 --- a/tools/testing/selftests/kvm/x86/vmx_exception_with_invalid_guest_state.c +++ b/tools/testing/selftests/kvm/x86/vmx_exception_with_invalid_guest_state.c @@ -110,7 +110,7 @@ int main(int argc, char *argv[]) struct kvm_vm *vm; TEST_REQUIRE(host_cpu_is_intel); - TEST_REQUIRE(!vm_is_unrestricted_guest(NULL)); + TEST_REQUIRE(!kvm_is_unrestricted_guest_enabled()); vm = vm_create_with_one_vcpu(&vcpu, guest_code); get_set_sigalrm_vcpu(vcpu); diff --git a/tools/testing/selftests/kvm/x86/xen_shinfo_test.c b/tools/testing/selftests/kvm/x86/xen_shinfo_test.c index 287829f850f7..23909b501ac2 100644 --- a/tools/testing/selftests/kvm/x86/xen_shinfo_test.c +++ b/tools/testing/selftests/kvm/x86/xen_shinfo_test.c @@ -547,15 +547,9 @@ int main(int argc, char *argv[]) int irq_fd[2] = { -1, -1 }; if (do_eventfd_tests) { - irq_fd[0] = eventfd(0, 0); - irq_fd[1] = eventfd(0, 0); + irq_fd[0] = kvm_new_eventfd(); + irq_fd[1] = kvm_new_eventfd(); - /* Unexpected, but not a KVM failure */ - if (irq_fd[0] == -1 || irq_fd[1] == -1) - do_evtchn_tests = do_eventfd_tests = false; - } - - if (do_eventfd_tests) { irq_routes.info.nr = 2; irq_routes.entries[0].gsi = 32; @@ -572,15 +566,8 @@ int main(int argc, char *argv[]) vm_ioctl(vm, KVM_SET_GSI_ROUTING, &irq_routes.info); - struct kvm_irqfd ifd = { }; - - ifd.fd = irq_fd[0]; - ifd.gsi = 32; - vm_ioctl(vm, KVM_IRQFD, &ifd); - - ifd.fd = irq_fd[1]; - ifd.gsi = 33; - vm_ioctl(vm, KVM_IRQFD, &ifd); + kvm_assign_irqfd(vm, 32, irq_fd[0]); + kvm_assign_irqfd(vm, 33, irq_fd[1]); struct sigaction sa = { }; sa.sa_handler = handle_alrm; diff --git a/tools/testing/selftests/landlock/audit.h b/tools/testing/selftests/landlock/audit.h index 18a6014920b5..b16986aa6442 100644 --- a/tools/testing/selftests/landlock/audit.h +++ b/tools/testing/selftests/landlock/audit.h @@ -403,11 +403,12 @@ static int audit_init_filter_exe(struct audit_filter *filter, const char *path) /* It is assume that there is not already filtering rules. */ filter->record_type = AUDIT_EXE; if (!path) { - filter->exe_len = readlink("/proc/self/exe", filter->exe, - sizeof(filter->exe) - 1); - if (filter->exe_len < 0) + int ret = readlink("/proc/self/exe", filter->exe, + sizeof(filter->exe) - 1); + if (ret < 0) return -errno; + filter->exe_len = ret; return 0; } diff --git a/tools/testing/selftests/landlock/audit_test.c b/tools/testing/selftests/landlock/audit_test.c index cfc571afd0eb..46d02d49835a 100644 --- a/tools/testing/selftests/landlock/audit_test.c +++ b/tools/testing/selftests/landlock/audit_test.c @@ -7,6 +7,7 @@ #define _GNU_SOURCE #include <errno.h> +#include <fcntl.h> #include <limits.h> #include <linux/landlock.h> #include <pthread.h> diff --git a/tools/testing/selftests/landlock/fs_test.c b/tools/testing/selftests/landlock/fs_test.c index 73729382d40f..fa0f18ec62c4 100644 --- a/tools/testing/selftests/landlock/fs_test.c +++ b/tools/testing/selftests/landlock/fs_test.c @@ -1832,6 +1832,46 @@ TEST_F_FORK(layout1, release_inodes) ASSERT_EQ(ENOENT, test_open(dir_s3d3, O_RDONLY)); } +/* + * This test checks that a rule on a directory used as a mount point does not + * grant access to the mount covering it. It is a generalization of the bind + * mount case in layout3_fs.hostfs.release_inodes that tests hidden mount points. + */ +TEST_F_FORK(layout1, covered_rule) +{ + const struct rule layer1[] = { + { + .path = dir_s3d2, + .access = LANDLOCK_ACCESS_FS_READ_DIR, + }, + {}, + }; + int ruleset_fd; + + /* Unmount to simplify FIXTURE_TEARDOWN. */ + set_cap(_metadata, CAP_SYS_ADMIN); + ASSERT_EQ(0, umount(dir_s3d2)); + clear_cap(_metadata, CAP_SYS_ADMIN); + + /* Creates a ruleset with the future hidden directory. */ + ruleset_fd = + create_ruleset(_metadata, LANDLOCK_ACCESS_FS_READ_DIR, layer1); + ASSERT_LE(0, ruleset_fd); + + /* Covers with a new mount point. */ + set_cap(_metadata, CAP_SYS_ADMIN); + ASSERT_EQ(0, mount_opt(&mnt_tmp, dir_s3d2)); + clear_cap(_metadata, CAP_SYS_ADMIN); + + ASSERT_EQ(0, test_open(dir_s3d2, O_RDONLY)); + + enforce_ruleset(_metadata, ruleset_fd); + ASSERT_EQ(0, close(ruleset_fd)); + + /* Checks that access to the new mount point is denied. */ + ASSERT_EQ(EACCES, test_open(dir_s3d2, O_RDONLY)); +} + enum relative_access { REL_OPEN, REL_CHDIR, diff --git a/tools/testing/selftests/lkdtm/config b/tools/testing/selftests/lkdtm/config index 7afe05e8c4d7..bd09fdaf53e0 100644 --- a/tools/testing/selftests/lkdtm/config +++ b/tools/testing/selftests/lkdtm/config @@ -2,7 +2,7 @@ CONFIG_LKDTM=y CONFIG_DEBUG_LIST=y CONFIG_SLAB_FREELIST_HARDENED=y CONFIG_FORTIFY_SOURCE=y -CONFIG_GCC_PLUGIN_STACKLEAK=y +CONFIG_KSTACK_ERASE=y CONFIG_HARDENED_USERCOPY=y CONFIG_RANDOMIZE_KSTACK_OFFSET_DEFAULT=y CONFIG_INIT_ON_FREE_DEFAULT_ON=y diff --git a/tools/testing/selftests/mm/config b/tools/testing/selftests/mm/config index a28baa536332..deba93379c80 100644 --- a/tools/testing/selftests/mm/config +++ b/tools/testing/selftests/mm/config @@ -8,3 +8,6 @@ CONFIG_GUP_TEST=y CONFIG_TRANSPARENT_HUGEPAGE=y CONFIG_MEM_SOFT_DIRTY=y CONFIG_ANON_VMA_NAME=y +CONFIG_FTRACE=y +CONFIG_PROFILING=y +CONFIG_UPROBES=y diff --git a/tools/testing/selftests/mm/gup_longterm.c b/tools/testing/selftests/mm/gup_longterm.c index 8a97ac5176a4..29047d2e0c49 100644 --- a/tools/testing/selftests/mm/gup_longterm.c +++ b/tools/testing/selftests/mm/gup_longterm.c @@ -298,8 +298,11 @@ static void run_with_memfd(test_fn fn, const char *desc) log_test_start("%s ... with memfd", desc); fd = memfd_create("test", 0); - if (fd < 0) + if (fd < 0) { ksft_print_msg("memfd_create() failed (%s)\n", strerror(errno)); + log_test_result(KSFT_SKIP); + return; + } fn(fd, pagesize); close(fd); @@ -366,6 +369,8 @@ static void run_with_memfd_hugetlb(test_fn fn, const char *desc, fd = memfd_create("test", flags); if (fd < 0) { ksft_print_msg("memfd_create() failed (%s)\n", strerror(errno)); + log_test_result(KSFT_SKIP); + return; } fn(fd, hugetlbsize); diff --git a/tools/testing/selftests/mm/merge.c b/tools/testing/selftests/mm/merge.c index bbae66fc5038..cc26480098ae 100644 --- a/tools/testing/selftests/mm/merge.c +++ b/tools/testing/selftests/mm/merge.c @@ -470,7 +470,9 @@ TEST_F(merge, handle_uprobe_upon_merged_vma) ASSERT_GE(fd, 0); ASSERT_EQ(ftruncate(fd, page_size), 0); - ASSERT_EQ(read_sysfs("/sys/bus/event_source/devices/uprobe/type", &type), 0); + if (read_sysfs("/sys/bus/event_source/devices/uprobe/type", &type) != 0) { + SKIP(goto out, "Failed to read uprobe sysfs file, skipping"); + } memset(&attr, 0, attr_sz); attr.size = attr_sz; @@ -491,6 +493,7 @@ TEST_F(merge, handle_uprobe_upon_merged_vma) ASSERT_NE(mremap(ptr2, page_size, page_size, MREMAP_MAYMOVE | MREMAP_FIXED, ptr1), MAP_FAILED); +out: close(fd); remove(probe_file); } diff --git a/tools/testing/selftests/mm/settings b/tools/testing/selftests/mm/settings index a953c96aa16e..e2206265f67c 100644 --- a/tools/testing/selftests/mm/settings +++ b/tools/testing/selftests/mm/settings @@ -1 +1 @@ -timeout=180 +timeout=900 diff --git a/tools/testing/selftests/mm/split_huge_page_test.c b/tools/testing/selftests/mm/split_huge_page_test.c index aa7400ed0e99..f0d9c035641d 100644 --- a/tools/testing/selftests/mm/split_huge_page_test.c +++ b/tools/testing/selftests/mm/split_huge_page_test.c @@ -31,6 +31,7 @@ uint64_t pmd_pagesize; #define INPUT_MAX 80 #define PID_FMT "%d,0x%lx,0x%lx,%d" +#define PID_FMT_OFFSET "%d,0x%lx,0x%lx,%d,%d" #define PATH_FMT "%s,0x%lx,0x%lx,%d" #define PFN_MASK ((1UL<<55)-1) @@ -483,7 +484,7 @@ void split_thp_in_pagecache_to_order_at(size_t fd_size, const char *fs_loc, write_debugfs(PID_FMT, getpid(), (uint64_t)addr, (uint64_t)addr + fd_size, order); else - write_debugfs(PID_FMT, getpid(), (uint64_t)addr, + write_debugfs(PID_FMT_OFFSET, getpid(), (uint64_t)addr, (uint64_t)addr + fd_size, order, offset); for (i = 0; i < fd_size; i++) diff --git a/tools/testing/selftests/mm/virtual_address_range.c b/tools/testing/selftests/mm/virtual_address_range.c index b380e102b22f..169dbd692bf5 100644 --- a/tools/testing/selftests/mm/virtual_address_range.c +++ b/tools/testing/selftests/mm/virtual_address_range.c @@ -77,8 +77,11 @@ static void validate_addr(char *ptr, int high_addr) { unsigned long addr = (unsigned long) ptr; - if (high_addr && addr < HIGH_ADDR_MARK) - ksft_exit_fail_msg("Bad address %lx\n", addr); + if (high_addr) { + if (addr < HIGH_ADDR_MARK) + ksft_exit_fail_msg("Bad address %lx\n", addr); + return; + } if (addr > HIGH_ADDR_MARK) ksft_exit_fail_msg("Bad address %lx\n", addr); diff --git a/tools/testing/selftests/net/.gitignore b/tools/testing/selftests/net/.gitignore index 532bb732bc6d..47c293c2962f 100644 --- a/tools/testing/selftests/net/.gitignore +++ b/tools/testing/selftests/net/.gitignore @@ -34,6 +34,7 @@ reuseport_bpf_numa reuseport_dualstack rxtimestamp sctp_hello +scm_inq scm_pidfd scm_rights sk_bind_sendto_listen @@ -50,6 +51,7 @@ tap tcp_fastopen_backup_key tcp_inq tcp_mmap +tfo timestamping tls toeplitz diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile index ea84b88bcb30..b31a71f2b372 100644 --- a/tools/testing/selftests/net/Makefile +++ b/tools/testing/selftests/net/Makefile @@ -27,6 +27,7 @@ TEST_PROGS += amt.sh TEST_PROGS += unicast_extensions.sh TEST_PROGS += udpgro_fwd.sh TEST_PROGS += udpgro_frglist.sh +TEST_PROGS += nat6to4.sh TEST_PROGS += veth.sh TEST_PROGS += ioam6.sh TEST_PROGS += gro.sh @@ -40,6 +41,7 @@ TEST_PROGS += netns-name.sh TEST_PROGS += link_netns.py TEST_PROGS += nl_netdev.py TEST_PROGS += rtnetlink.py +TEST_PROGS += rtnetlink_notification.sh TEST_PROGS += srv6_end_dt46_l3vpn_test.sh TEST_PROGS += srv6_end_dt4_l3vpn_test.sh TEST_PROGS += srv6_end_dt6_l3vpn_test.sh @@ -61,6 +63,7 @@ TEST_PROGS += ip_local_port_range.sh TEST_PROGS += rps_default_mask.sh TEST_PROGS += big_tcp.sh TEST_PROGS += netns-sysctl.sh +TEST_PROGS += netdev-l2addr.sh TEST_PROGS_EXTENDED := toeplitz_client.sh toeplitz.sh xfrm_policy_add_speed.sh TEST_GEN_FILES = socket nettest TEST_GEN_FILES += psock_fanout psock_tpacket msg_zerocopy reuseport_addr_any @@ -98,6 +101,7 @@ TEST_PROGS += test_vxlan_mdb.sh TEST_PROGS += test_bridge_neigh_suppress.sh TEST_PROGS += test_vxlan_nolocalbypass.sh TEST_PROGS += test_bridge_backup_port.sh +TEST_PROGS += test_neigh.sh TEST_PROGS += fdb_flush.sh fdb_notify.sh TEST_PROGS += fq_band_pktlimit.sh TEST_PROGS += vlan_hw_filter.sh @@ -109,6 +113,10 @@ TEST_GEN_PROGS += proc_net_pktgen TEST_PROGS += lwt_dst_cache_ref_loop.sh TEST_PROGS += skf_net_off.sh TEST_GEN_FILES += skf_net_off +TEST_GEN_FILES += tfo +TEST_PROGS += tfo_passive.sh +TEST_PROGS += broadcast_pmtu.sh +TEST_PROGS += ipv6_force_forwarding.sh # YNL files, must be before "include ..lib.mk" YNL_GEN_FILES := busy_poller netlink-dumps diff --git a/tools/testing/selftests/net/af_unix/Makefile b/tools/testing/selftests/net/af_unix/Makefile index 50584479540b..a4b61c6d0290 100644 --- a/tools/testing/selftests/net/af_unix/Makefile +++ b/tools/testing/selftests/net/af_unix/Makefile @@ -1,4 +1,4 @@ CFLAGS += $(KHDR_INCLUDES) -TEST_GEN_PROGS := diag_uid msg_oob scm_pidfd scm_rights unix_connect +TEST_GEN_PROGS := diag_uid msg_oob scm_inq scm_pidfd scm_rights unix_connect include ../../lib.mk diff --git a/tools/testing/selftests/net/af_unix/msg_oob.c b/tools/testing/selftests/net/af_unix/msg_oob.c index 3ed3882a93b8..b5f474969917 100644 --- a/tools/testing/selftests/net/af_unix/msg_oob.c +++ b/tools/testing/selftests/net/af_unix/msg_oob.c @@ -210,7 +210,7 @@ static void __sendpair(struct __test_metadata *_metadata, static void __recvpair(struct __test_metadata *_metadata, FIXTURE_DATA(msg_oob) *self, const char *expected_buf, int expected_len, - int buf_len, int flags) + int buf_len, int flags, bool is_sender) { int i, ret[2], recv_errno[2], expected_errno = 0; char recv_buf[2][BUF_SZ] = {}; @@ -221,7 +221,9 @@ static void __recvpair(struct __test_metadata *_metadata, errno = 0; for (i = 0; i < 2; i++) { - ret[i] = recv(self->fd[i * 2 + 1], recv_buf[i], buf_len, flags); + int index = is_sender ? i * 2 : i * 2 + 1; + + ret[i] = recv(self->fd[index], recv_buf[i], buf_len, flags); recv_errno[i] = errno; } @@ -308,6 +310,20 @@ static void __siocatmarkpair(struct __test_metadata *_metadata, ASSERT_EQ(answ[0], answ[1]); } +static void __resetpair(struct __test_metadata *_metadata, + FIXTURE_DATA(msg_oob) *self, + const FIXTURE_VARIANT(msg_oob) *variant, + bool reset) +{ + int i; + + for (i = 0; i < 2; i++) + close(self->fd[i * 2 + 1]); + + __recvpair(_metadata, self, "", reset ? -ECONNRESET : 0, 1, + variant->peek ? MSG_PEEK : 0, true); +} + #define sendpair(buf, len, flags) \ __sendpair(_metadata, self, buf, len, flags) @@ -316,9 +332,10 @@ static void __siocatmarkpair(struct __test_metadata *_metadata, if (variant->peek) \ __recvpair(_metadata, self, \ expected_buf, expected_len, \ - buf_len, (flags) | MSG_PEEK); \ + buf_len, (flags) | MSG_PEEK, false); \ __recvpair(_metadata, self, \ - expected_buf, expected_len, buf_len, flags); \ + expected_buf, expected_len, \ + buf_len, flags, false); \ } while (0) #define epollpair(oob_remaining) \ @@ -330,6 +347,9 @@ static void __siocatmarkpair(struct __test_metadata *_metadata, #define setinlinepair() \ __setinlinepair(_metadata, self) +#define resetpair(reset) \ + __resetpair(_metadata, self, variant, reset) + #define tcp_incompliant \ for (self->tcp_compliant = false; \ self->tcp_compliant == false; \ @@ -344,6 +364,21 @@ TEST_F(msg_oob, non_oob) recvpair("", -EINVAL, 1, MSG_OOB); epollpair(false); siocatmarkpair(false); + + resetpair(true); +} + +TEST_F(msg_oob, non_oob_no_reset) +{ + sendpair("x", 1, 0); + epollpair(false); + siocatmarkpair(false); + + recvpair("x", 1, 1, 0); + epollpair(false); + siocatmarkpair(false); + + resetpair(false); } TEST_F(msg_oob, oob) @@ -355,6 +390,19 @@ TEST_F(msg_oob, oob) recvpair("x", 1, 1, MSG_OOB); epollpair(false); siocatmarkpair(true); + + tcp_incompliant { + resetpair(false); /* TCP sets -ECONNRESET for ex-OOB. */ + } +} + +TEST_F(msg_oob, oob_reset) +{ + sendpair("x", 1, MSG_OOB); + epollpair(true); + siocatmarkpair(true); + + resetpair(true); } TEST_F(msg_oob, oob_drop) @@ -370,6 +418,8 @@ TEST_F(msg_oob, oob_drop) recvpair("", -EINVAL, 1, MSG_OOB); epollpair(false); siocatmarkpair(false); + + resetpair(false); } TEST_F(msg_oob, oob_ahead) @@ -385,6 +435,10 @@ TEST_F(msg_oob, oob_ahead) recvpair("hell", 4, 4, 0); epollpair(false); siocatmarkpair(true); + + tcp_incompliant { + resetpair(false); /* TCP sets -ECONNRESET for ex-OOB. */ + } } TEST_F(msg_oob, oob_break) @@ -403,6 +457,8 @@ TEST_F(msg_oob, oob_break) recvpair("", -EAGAIN, 1, 0); siocatmarkpair(false); + + resetpair(false); } TEST_F(msg_oob, oob_ahead_break) @@ -426,6 +482,8 @@ TEST_F(msg_oob, oob_ahead_break) recvpair("world", 5, 5, 0); epollpair(false); siocatmarkpair(false); + + resetpair(false); } TEST_F(msg_oob, oob_break_drop) @@ -449,6 +507,8 @@ TEST_F(msg_oob, oob_break_drop) recvpair("", -EINVAL, 1, MSG_OOB); epollpair(false); siocatmarkpair(false); + + resetpair(false); } TEST_F(msg_oob, ex_oob_break) @@ -476,6 +536,8 @@ TEST_F(msg_oob, ex_oob_break) recvpair("ld", 2, 2, 0); epollpair(false); siocatmarkpair(false); + + resetpair(false); } TEST_F(msg_oob, ex_oob_drop) @@ -498,6 +560,8 @@ TEST_F(msg_oob, ex_oob_drop) epollpair(false); siocatmarkpair(true); } + + resetpair(false); } TEST_F(msg_oob, ex_oob_drop_2) @@ -523,6 +587,8 @@ TEST_F(msg_oob, ex_oob_drop_2) epollpair(false); siocatmarkpair(true); } + + resetpair(false); } TEST_F(msg_oob, ex_oob_oob) @@ -546,6 +612,54 @@ TEST_F(msg_oob, ex_oob_oob) recvpair("", -EINVAL, 1, MSG_OOB); epollpair(false); siocatmarkpair(false); + + resetpair(false); +} + +TEST_F(msg_oob, ex_oob_ex_oob) +{ + sendpair("x", 1, MSG_OOB); + epollpair(true); + siocatmarkpair(true); + + recvpair("x", 1, 1, MSG_OOB); + epollpair(false); + siocatmarkpair(true); + + sendpair("y", 1, MSG_OOB); + epollpair(true); + siocatmarkpair(true); + + recvpair("y", 1, 1, MSG_OOB); + epollpair(false); + siocatmarkpair(true); + + tcp_incompliant { + resetpair(false); /* TCP sets -ECONNRESET for ex-OOB. */ + } +} + +TEST_F(msg_oob, ex_oob_ex_oob_oob) +{ + sendpair("x", 1, MSG_OOB); + epollpair(true); + siocatmarkpair(true); + + recvpair("x", 1, 1, MSG_OOB); + epollpair(false); + siocatmarkpair(true); + + sendpair("y", 1, MSG_OOB); + epollpair(true); + siocatmarkpair(true); + + recvpair("y", 1, 1, MSG_OOB); + epollpair(false); + siocatmarkpair(true); + + sendpair("z", 1, MSG_OOB); + epollpair(true); + siocatmarkpair(true); } TEST_F(msg_oob, ex_oob_ahead_break) @@ -576,6 +690,10 @@ TEST_F(msg_oob, ex_oob_ahead_break) recvpair("d", 1, 1, MSG_OOB); epollpair(false); siocatmarkpair(true); + + tcp_incompliant { + resetpair(false); /* TCP sets -ECONNRESET for ex-OOB. */ + } } TEST_F(msg_oob, ex_oob_siocatmark) @@ -595,6 +713,8 @@ TEST_F(msg_oob, ex_oob_siocatmark) recvpair("hell", 4, 4, 0); /* Intentionally stop at ex-OOB. */ epollpair(true); siocatmarkpair(false); + + resetpair(true); } TEST_F(msg_oob, inline_oob) @@ -612,6 +732,8 @@ TEST_F(msg_oob, inline_oob) recvpair("x", 1, 1, 0); epollpair(false); siocatmarkpair(false); + + resetpair(false); } TEST_F(msg_oob, inline_oob_break) @@ -633,6 +755,8 @@ TEST_F(msg_oob, inline_oob_break) recvpair("o", 1, 1, 0); epollpair(false); siocatmarkpair(false); + + resetpair(false); } TEST_F(msg_oob, inline_oob_ahead_break) @@ -661,6 +785,8 @@ TEST_F(msg_oob, inline_oob_ahead_break) epollpair(false); siocatmarkpair(false); + + resetpair(false); } TEST_F(msg_oob, inline_ex_oob_break) @@ -686,6 +812,8 @@ TEST_F(msg_oob, inline_ex_oob_break) recvpair("rld", 3, 3, 0); epollpair(false); siocatmarkpair(false); + + resetpair(false); } TEST_F(msg_oob, inline_ex_oob_no_drop) @@ -707,6 +835,8 @@ TEST_F(msg_oob, inline_ex_oob_no_drop) recvpair("y", 1, 1, 0); epollpair(false); siocatmarkpair(false); + + resetpair(false); } TEST_F(msg_oob, inline_ex_oob_drop) @@ -731,6 +861,8 @@ TEST_F(msg_oob, inline_ex_oob_drop) epollpair(false); siocatmarkpair(false); } + + resetpair(false); } TEST_F(msg_oob, inline_ex_oob_siocatmark) @@ -752,6 +884,8 @@ TEST_F(msg_oob, inline_ex_oob_siocatmark) recvpair("hell", 4, 4, 0); /* Intentionally stop at ex-OOB. */ epollpair(true); siocatmarkpair(false); + + resetpair(true); } TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/net/af_unix/scm_inq.c b/tools/testing/selftests/net/af_unix/scm_inq.c new file mode 100644 index 000000000000..9d22561e7b8f --- /dev/null +++ b/tools/testing/selftests/net/af_unix/scm_inq.c @@ -0,0 +1,125 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright 2025 Google LLC */ + +#include <linux/sockios.h> +#include <sys/ioctl.h> +#include <sys/socket.h> +#include <sys/types.h> + +#include "../../kselftest_harness.h" + +#define NR_CHUNKS 100 +#define MSG_LEN 256 + +struct scm_inq { + struct cmsghdr cmsghdr; + int inq; +}; + +FIXTURE(scm_inq) +{ + int fd[2]; +}; + +FIXTURE_VARIANT(scm_inq) +{ + int type; +}; + +FIXTURE_VARIANT_ADD(scm_inq, stream) +{ + .type = SOCK_STREAM, +}; + +FIXTURE_VARIANT_ADD(scm_inq, dgram) +{ + .type = SOCK_DGRAM, +}; + +FIXTURE_VARIANT_ADD(scm_inq, seqpacket) +{ + .type = SOCK_SEQPACKET, +}; + +FIXTURE_SETUP(scm_inq) +{ + int err; + + err = socketpair(AF_UNIX, variant->type | SOCK_NONBLOCK, 0, self->fd); + ASSERT_EQ(0, err); +} + +FIXTURE_TEARDOWN(scm_inq) +{ + close(self->fd[0]); + close(self->fd[1]); +} + +static void send_chunks(struct __test_metadata *_metadata, + FIXTURE_DATA(scm_inq) *self) +{ + char buf[MSG_LEN] = {}; + int i, ret; + + for (i = 0; i < NR_CHUNKS; i++) { + ret = send(self->fd[0], buf, sizeof(buf), 0); + ASSERT_EQ(sizeof(buf), ret); + } +} + +static void recv_chunks(struct __test_metadata *_metadata, + FIXTURE_DATA(scm_inq) *self) +{ + struct msghdr msg = {}; + struct iovec iov = {}; + struct scm_inq cmsg; + char buf[MSG_LEN]; + int i, ret; + int inq; + + msg.msg_iov = &iov; + msg.msg_iovlen = 1; + msg.msg_control = &cmsg; + msg.msg_controllen = CMSG_SPACE(sizeof(cmsg.inq)); + + iov.iov_base = buf; + iov.iov_len = sizeof(buf); + + for (i = 0; i < NR_CHUNKS; i++) { + memset(buf, 0, sizeof(buf)); + memset(&cmsg, 0, sizeof(cmsg)); + + ret = recvmsg(self->fd[1], &msg, 0); + ASSERT_EQ(MSG_LEN, ret); + ASSERT_NE(NULL, CMSG_FIRSTHDR(&msg)); + ASSERT_EQ(CMSG_LEN(sizeof(cmsg.inq)), cmsg.cmsghdr.cmsg_len); + ASSERT_EQ(SOL_SOCKET, cmsg.cmsghdr.cmsg_level); + ASSERT_EQ(SCM_INQ, cmsg.cmsghdr.cmsg_type); + + ret = ioctl(self->fd[1], SIOCINQ, &inq); + ASSERT_EQ(0, ret); + ASSERT_EQ(cmsg.inq, inq); + } +} + +TEST_F(scm_inq, basic) +{ + int err, inq; + + err = setsockopt(self->fd[1], SOL_SOCKET, SO_INQ, &(int){1}, sizeof(int)); + if (variant->type != SOCK_STREAM) { + ASSERT_EQ(-ENOPROTOOPT, -errno); + return; + } + + ASSERT_EQ(0, err); + + err = ioctl(self->fd[1], SIOCINQ, &inq); + ASSERT_EQ(0, err); + ASSERT_EQ(0, inq); + + send_chunks(_metadata, self); + recv_chunks(_metadata, self); +} + +TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/net/af_unix/scm_pidfd.c b/tools/testing/selftests/net/af_unix/scm_pidfd.c index 7e534594167e..37e034874034 100644 --- a/tools/testing/selftests/net/af_unix/scm_pidfd.c +++ b/tools/testing/selftests/net/af_unix/scm_pidfd.c @@ -15,6 +15,7 @@ #include <sys/types.h> #include <sys/wait.h> +#include "../../pidfd/pidfd.h" #include "../../kselftest_harness.h" #define clean_errno() (errno == 0 ? "None" : strerror(errno)) @@ -26,6 +27,8 @@ #define SCM_PIDFD 0x04 #endif +#define CHILD_EXIT_CODE_OK 123 + static void child_die() { exit(1); @@ -126,16 +129,65 @@ out: return result; } +struct cmsg_data { + struct ucred *ucred; + int *pidfd; +}; + +static int parse_cmsg(struct msghdr *msg, struct cmsg_data *res) +{ + struct cmsghdr *cmsg; + int data = 0; + + if (msg->msg_flags & (MSG_TRUNC | MSG_CTRUNC)) { + log_err("recvmsg: truncated"); + return 1; + } + + for (cmsg = CMSG_FIRSTHDR(msg); cmsg != NULL; + cmsg = CMSG_NXTHDR(msg, cmsg)) { + if (cmsg->cmsg_level == SOL_SOCKET && + cmsg->cmsg_type == SCM_PIDFD) { + if (cmsg->cmsg_len < sizeof(*res->pidfd)) { + log_err("CMSG parse: SCM_PIDFD wrong len"); + return 1; + } + + res->pidfd = (void *)CMSG_DATA(cmsg); + } + + if (cmsg->cmsg_level == SOL_SOCKET && + cmsg->cmsg_type == SCM_CREDENTIALS) { + if (cmsg->cmsg_len < sizeof(*res->ucred)) { + log_err("CMSG parse: SCM_CREDENTIALS wrong len"); + return 1; + } + + res->ucred = (void *)CMSG_DATA(cmsg); + } + } + + if (!res->pidfd) { + log_err("CMSG parse: SCM_PIDFD not found"); + return 1; + } + + if (!res->ucred) { + log_err("CMSG parse: SCM_CREDENTIALS not found"); + return 1; + } + + return 0; +} + static int cmsg_check(int fd) { struct msghdr msg = { 0 }; - struct cmsghdr *cmsg; + struct cmsg_data res; struct iovec iov; - struct ucred *ucred = NULL; int data = 0; char control[CMSG_SPACE(sizeof(struct ucred)) + CMSG_SPACE(sizeof(int))] = { 0 }; - int *pidfd = NULL; pid_t parent_pid; int err; @@ -158,53 +210,99 @@ static int cmsg_check(int fd) return 1; } - for (cmsg = CMSG_FIRSTHDR(&msg); cmsg != NULL; - cmsg = CMSG_NXTHDR(&msg, cmsg)) { - if (cmsg->cmsg_level == SOL_SOCKET && - cmsg->cmsg_type == SCM_PIDFD) { - if (cmsg->cmsg_len < sizeof(*pidfd)) { - log_err("CMSG parse: SCM_PIDFD wrong len"); - return 1; - } + /* send(pfd, "x", sizeof(char), 0) */ + if (data != 'x') { + log_err("recvmsg: data corruption"); + return 1; + } - pidfd = (void *)CMSG_DATA(cmsg); - } + if (parse_cmsg(&msg, &res)) { + log_err("CMSG parse: parse_cmsg() failed"); + return 1; + } - if (cmsg->cmsg_level == SOL_SOCKET && - cmsg->cmsg_type == SCM_CREDENTIALS) { - if (cmsg->cmsg_len < sizeof(*ucred)) { - log_err("CMSG parse: SCM_CREDENTIALS wrong len"); - return 1; - } + /* pidfd from SCM_PIDFD should point to the parent process PID */ + parent_pid = + get_pid_from_fdinfo_file(*res.pidfd, "Pid:", sizeof("Pid:") - 1); + if (parent_pid != getppid()) { + log_err("wrong SCM_PIDFD %d != %d", parent_pid, getppid()); + close(*res.pidfd); + return 1; + } - ucred = (void *)CMSG_DATA(cmsg); - } + close(*res.pidfd); + return 0; +} + +static int cmsg_check_dead(int fd, int expected_pid) +{ + int err; + struct msghdr msg = { 0 }; + struct cmsg_data res; + struct iovec iov; + int data = 0; + char control[CMSG_SPACE(sizeof(struct ucred)) + + CMSG_SPACE(sizeof(int))] = { 0 }; + pid_t client_pid; + struct pidfd_info info = { + .mask = PIDFD_INFO_EXIT, + }; + + iov.iov_base = &data; + iov.iov_len = sizeof(data); + + msg.msg_iov = &iov; + msg.msg_iovlen = 1; + msg.msg_control = control; + msg.msg_controllen = sizeof(control); + + err = recvmsg(fd, &msg, 0); + if (err < 0) { + log_err("recvmsg"); + return 1; } - /* send(pfd, "x", sizeof(char), 0) */ - if (data != 'x') { + if (msg.msg_flags & (MSG_TRUNC | MSG_CTRUNC)) { + log_err("recvmsg: truncated"); + return 1; + } + + /* send(cfd, "y", sizeof(char), 0) */ + if (data != 'y') { log_err("recvmsg: data corruption"); return 1; } - if (!pidfd) { - log_err("CMSG parse: SCM_PIDFD not found"); + if (parse_cmsg(&msg, &res)) { + log_err("CMSG parse: parse_cmsg() failed"); return 1; } - if (!ucred) { - log_err("CMSG parse: SCM_CREDENTIALS not found"); + /* + * pidfd from SCM_PIDFD should point to the client_pid. + * Let's read exit information and check if it's what + * we expect to see. + */ + if (ioctl(*res.pidfd, PIDFD_GET_INFO, &info)) { + log_err("%s: ioctl(PIDFD_GET_INFO) failed", __func__); + close(*res.pidfd); return 1; } - /* pidfd from SCM_PIDFD should point to the parent process PID */ - parent_pid = - get_pid_from_fdinfo_file(*pidfd, "Pid:", sizeof("Pid:") - 1); - if (parent_pid != getppid()) { - log_err("wrong SCM_PIDFD %d != %d", parent_pid, getppid()); + if (!(info.mask & PIDFD_INFO_EXIT)) { + log_err("%s: No exit information from ioctl(PIDFD_GET_INFO)", __func__); + close(*res.pidfd); return 1; } + err = WIFEXITED(info.exit_code) ? WEXITSTATUS(info.exit_code) : 1; + if (err != CHILD_EXIT_CODE_OK) { + log_err("%s: wrong exit_code %d != %d", __func__, err, CHILD_EXIT_CODE_OK); + close(*res.pidfd); + return 1; + } + + close(*res.pidfd); return 0; } @@ -291,6 +389,24 @@ static void fill_sockaddr(struct sock_addr *addr, bool abstract) memcpy(sun_path_buf, addr->sock_name, strlen(addr->sock_name)); } +static int sk_enable_cred_pass(int sk) +{ + int on = 0; + + on = 1; + if (setsockopt(sk, SOL_SOCKET, SO_PASSCRED, &on, sizeof(on))) { + log_err("Failed to set SO_PASSCRED"); + return 1; + } + + if (setsockopt(sk, SOL_SOCKET, SO_PASSPIDFD, &on, sizeof(on))) { + log_err("Failed to set SO_PASSPIDFD"); + return 1; + } + + return 0; +} + static void client(FIXTURE_DATA(scm_pidfd) *self, const FIXTURE_VARIANT(scm_pidfd) *variant) { @@ -299,7 +415,6 @@ static void client(FIXTURE_DATA(scm_pidfd) *self, struct ucred peer_cred; int peer_pidfd; pid_t peer_pid; - int on = 0; cfd = socket(AF_UNIX, variant->type, 0); if (cfd < 0) { @@ -322,14 +437,8 @@ static void client(FIXTURE_DATA(scm_pidfd) *self, child_die(); } - on = 1; - if (setsockopt(cfd, SOL_SOCKET, SO_PASSCRED, &on, sizeof(on))) { - log_err("Failed to set SO_PASSCRED"); - child_die(); - } - - if (setsockopt(cfd, SOL_SOCKET, SO_PASSPIDFD, &on, sizeof(on))) { - log_err("Failed to set SO_PASSPIDFD"); + if (sk_enable_cred_pass(cfd)) { + log_err("sk_enable_cred_pass() failed"); child_die(); } @@ -340,6 +449,12 @@ static void client(FIXTURE_DATA(scm_pidfd) *self, child_die(); } + /* send something to the parent so it can receive SCM_PIDFD too and validate it */ + if (send(cfd, "y", sizeof(char), 0) == -1) { + log_err("Failed to send(cfd, \"y\", sizeof(char), 0)"); + child_die(); + } + /* skip further for SOCK_DGRAM as it's not applicable */ if (variant->type == SOCK_DGRAM) return; @@ -398,7 +513,13 @@ TEST_F(scm_pidfd, test) close(self->server); close(self->startup_pipe[0]); client(self, variant); - exit(0); + + /* + * It's a bit unusual, but in case of success we return non-zero + * exit code (CHILD_EXIT_CODE_OK) and then we expect to read it + * from ioctl(PIDFD_GET_INFO) in cmsg_check_dead(). + */ + exit(CHILD_EXIT_CODE_OK); } close(self->startup_pipe[1]); @@ -421,9 +542,17 @@ TEST_F(scm_pidfd, test) ASSERT_NE(-1, err); } - close(pfd); waitpid(self->client_pid, &child_status, 0); - ASSERT_EQ(0, WIFEXITED(child_status) ? WEXITSTATUS(child_status) : 1); + /* see comment before exit(CHILD_EXIT_CODE_OK) */ + ASSERT_EQ(CHILD_EXIT_CODE_OK, WIFEXITED(child_status) ? WEXITSTATUS(child_status) : 1); + + err = sk_enable_cred_pass(pfd); + ASSERT_EQ(0, err); + + err = cmsg_check_dead(pfd, self->client_pid); + ASSERT_EQ(0, err); + + close(pfd); } TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/net/bench/Makefile b/tools/testing/selftests/net/bench/Makefile new file mode 100644 index 000000000000..2546c45e42f7 --- /dev/null +++ b/tools/testing/selftests/net/bench/Makefile @@ -0,0 +1,7 @@ +# SPDX-License-Identifier: GPL-2.0 + +TEST_GEN_MODS_DIR := page_pool + +TEST_PROGS += test_bench_page_pool.sh + +include ../../lib.mk diff --git a/tools/testing/selftests/net/bench/page_pool/Makefile b/tools/testing/selftests/net/bench/page_pool/Makefile new file mode 100644 index 000000000000..0549a16ba275 --- /dev/null +++ b/tools/testing/selftests/net/bench/page_pool/Makefile @@ -0,0 +1,17 @@ +BENCH_PAGE_POOL_SIMPLE_TEST_DIR := $(realpath $(dir $(abspath $(lastword $(MAKEFILE_LIST))))) +KDIR ?= /lib/modules/$(shell uname -r)/build + +ifeq ($(V),1) +Q = +else +Q = @ +endif + +obj-m += bench_page_pool.o +bench_page_pool-y += bench_page_pool_simple.o time_bench.o + +all: + +$(Q)make -C $(KDIR) M=$(BENCH_PAGE_POOL_SIMPLE_TEST_DIR) modules + +clean: + +$(Q)make -C $(KDIR) M=$(BENCH_PAGE_POOL_SIMPLE_TEST_DIR) clean diff --git a/tools/testing/selftests/net/bench/page_pool/bench_page_pool_simple.c b/tools/testing/selftests/net/bench/page_pool/bench_page_pool_simple.c new file mode 100644 index 000000000000..cb6468adbda4 --- /dev/null +++ b/tools/testing/selftests/net/bench/page_pool/bench_page_pool_simple.c @@ -0,0 +1,267 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Benchmark module for page_pool. + * + */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include <linux/interrupt.h> +#include <linux/limits.h> +#include <linux/module.h> +#include <linux/mutex.h> +#include <net/page_pool/helpers.h> + +#include "time_bench.h" + +static int verbose = 1; +#define MY_POOL_SIZE 1024 + +/* Makes tests selectable. Useful for perf-record to analyze a single test. + * Hint: Bash shells support writing binary number like: $((2#101010) + * + * # modprobe bench_page_pool_simple run_flags=$((2#100)) + */ +static unsigned long run_flags = 0xFFFFFFFF; +module_param(run_flags, ulong, 0); +MODULE_PARM_DESC(run_flags, "Limit which bench test that runs"); + +/* Count the bit number from the enum */ +enum benchmark_bit { + bit_run_bench_baseline, + bit_run_bench_no_softirq01, + bit_run_bench_no_softirq02, + bit_run_bench_no_softirq03, +}; + +#define bit(b) (1 << (b)) +#define enabled(b) ((run_flags & (bit(b)))) + +/* notice time_bench is limited to U32_MAX nr loops */ +static unsigned long loops = 10000000; +module_param(loops, ulong, 0); +MODULE_PARM_DESC(loops, "Specify loops bench will run"); + +/* Timing at the nanosec level, we need to know the overhead + * introduced by the for loop itself + */ +static int time_bench_for_loop(struct time_bench_record *rec, void *data) +{ + uint64_t loops_cnt = 0; + int i; + + time_bench_start(rec); + /** Loop to measure **/ + for (i = 0; i < rec->loops; i++) { + loops_cnt++; + barrier(); /* avoid compiler to optimize this loop */ + } + time_bench_stop(rec, loops_cnt); + return loops_cnt; +} + +static int time_bench_atomic_inc(struct time_bench_record *rec, void *data) +{ + uint64_t loops_cnt = 0; + atomic_t cnt; + int i; + + atomic_set(&cnt, 0); + + time_bench_start(rec); + /** Loop to measure **/ + for (i = 0; i < rec->loops; i++) { + atomic_inc(&cnt); + barrier(); /* avoid compiler to optimize this loop */ + } + loops_cnt = atomic_read(&cnt); + time_bench_stop(rec, loops_cnt); + return loops_cnt; +} + +/* The ptr_ping in page_pool uses a spinlock. We need to know the minimum + * overhead of taking+releasing a spinlock, to know the cycles that can be saved + * by e.g. amortizing this via bulking. + */ +static int time_bench_lock(struct time_bench_record *rec, void *data) +{ + uint64_t loops_cnt = 0; + spinlock_t lock; + int i; + + spin_lock_init(&lock); + + time_bench_start(rec); + /** Loop to measure **/ + for (i = 0; i < rec->loops; i++) { + spin_lock(&lock); + loops_cnt++; + barrier(); /* avoid compiler to optimize this loop */ + spin_unlock(&lock); + } + time_bench_stop(rec, loops_cnt); + return loops_cnt; +} + +/* Helper for filling some page's into ptr_ring */ +static void pp_fill_ptr_ring(struct page_pool *pp, int elems) +{ + /* GFP_ATOMIC needed when under run softirq */ + gfp_t gfp_mask = GFP_ATOMIC; + struct page **array; + int i; + + array = kcalloc(elems, sizeof(struct page *), gfp_mask); + + for (i = 0; i < elems; i++) + array[i] = page_pool_alloc_pages(pp, gfp_mask); + for (i = 0; i < elems; i++) + page_pool_put_page(pp, array[i], -1, false); + + kfree(array); +} + +enum test_type { type_fast_path, type_ptr_ring, type_page_allocator }; + +/* Depends on compile optimizing this function */ +static int time_bench_page_pool(struct time_bench_record *rec, void *data, + enum test_type type, const char *func) +{ + uint64_t loops_cnt = 0; + gfp_t gfp_mask = GFP_ATOMIC; /* GFP_ATOMIC is not really needed */ + int i, err; + + struct page_pool *pp; + struct page *page; + + struct page_pool_params pp_params = { + .order = 0, + .flags = 0, + .pool_size = MY_POOL_SIZE, + .nid = NUMA_NO_NODE, + .dev = NULL, /* Only use for DMA mapping */ + .dma_dir = DMA_BIDIRECTIONAL, + }; + + pp = page_pool_create(&pp_params); + if (IS_ERR(pp)) { + err = PTR_ERR(pp); + pr_warn("%s: Error(%d) creating page_pool\n", func, err); + goto out; + } + pp_fill_ptr_ring(pp, 64); + + if (in_serving_softirq()) + pr_warn("%s(): in_serving_softirq fast-path\n", func); + else + pr_warn("%s(): Cannot use page_pool fast-path\n", func); + + time_bench_start(rec); + /** Loop to measure **/ + for (i = 0; i < rec->loops; i++) { + /* Common fast-path alloc that depend on in_serving_softirq() */ + page = page_pool_alloc_pages(pp, gfp_mask); + if (!page) + break; + loops_cnt++; + barrier(); /* avoid compiler to optimize this loop */ + + /* The benchmarks purpose it to test different return paths. + * Compiler should inline optimize other function calls out + */ + if (type == type_fast_path) { + /* Fast-path recycling e.g. XDP_DROP use-case */ + page_pool_recycle_direct(pp, page); + + } else if (type == type_ptr_ring) { + /* Normal return path */ + page_pool_put_page(pp, page, -1, false); + + } else if (type == type_page_allocator) { + /* Test if not pages are recycled, but instead + * returned back into systems page allocator + */ + get_page(page); /* cause no-recycling */ + page_pool_put_page(pp, page, -1, false); + put_page(page); + } else { + BUILD_BUG(); + } + } + time_bench_stop(rec, loops_cnt); +out: + page_pool_destroy(pp); + return loops_cnt; +} + +static int time_bench_page_pool01_fast_path(struct time_bench_record *rec, + void *data) +{ + return time_bench_page_pool(rec, data, type_fast_path, __func__); +} + +static int time_bench_page_pool02_ptr_ring(struct time_bench_record *rec, + void *data) +{ + return time_bench_page_pool(rec, data, type_ptr_ring, __func__); +} + +static int time_bench_page_pool03_slow(struct time_bench_record *rec, + void *data) +{ + return time_bench_page_pool(rec, data, type_page_allocator, __func__); +} + +static int run_benchmark_tests(void) +{ + uint32_t nr_loops = loops; + + /* Baseline tests */ + if (enabled(bit_run_bench_baseline)) { + time_bench_loop(nr_loops * 10, 0, "for_loop", NULL, + time_bench_for_loop); + time_bench_loop(nr_loops * 10, 0, "atomic_inc", NULL, + time_bench_atomic_inc); + time_bench_loop(nr_loops, 0, "lock", NULL, time_bench_lock); + } + + /* This test cannot activate correct code path, due to no-softirq ctx */ + if (enabled(bit_run_bench_no_softirq01)) + time_bench_loop(nr_loops, 0, "no-softirq-page_pool01", NULL, + time_bench_page_pool01_fast_path); + if (enabled(bit_run_bench_no_softirq02)) + time_bench_loop(nr_loops, 0, "no-softirq-page_pool02", NULL, + time_bench_page_pool02_ptr_ring); + if (enabled(bit_run_bench_no_softirq03)) + time_bench_loop(nr_loops, 0, "no-softirq-page_pool03", NULL, + time_bench_page_pool03_slow); + + return 0; +} + +static int __init bench_page_pool_simple_module_init(void) +{ + if (verbose) + pr_info("Loaded\n"); + + if (loops > U32_MAX) { + pr_err("Module param loops(%lu) exceeded U32_MAX(%u)\n", loops, + U32_MAX); + return -ECHRNG; + } + + run_benchmark_tests(); + + return 0; +} +module_init(bench_page_pool_simple_module_init); + +static void __exit bench_page_pool_simple_module_exit(void) +{ + if (verbose) + pr_info("Unloaded\n"); +} +module_exit(bench_page_pool_simple_module_exit); + +MODULE_DESCRIPTION("Benchmark of page_pool simple cases"); +MODULE_AUTHOR("Jesper Dangaard Brouer <netoptimizer@brouer.com>"); +MODULE_LICENSE("GPL"); diff --git a/tools/testing/selftests/net/bench/page_pool/time_bench.c b/tools/testing/selftests/net/bench/page_pool/time_bench.c new file mode 100644 index 000000000000..073bb36ec5f2 --- /dev/null +++ b/tools/testing/selftests/net/bench/page_pool/time_bench.c @@ -0,0 +1,394 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Benchmarking code execution time inside the kernel + * + * Copyright (C) 2014, Red Hat, Inc., Jesper Dangaard Brouer + */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include <linux/module.h> +#include <linux/time.h> + +#include <linux/perf_event.h> /* perf_event_create_kernel_counter() */ + +/* For concurrency testing */ +#include <linux/completion.h> +#include <linux/sched.h> +#include <linux/workqueue.h> +#include <linux/kthread.h> + +#include "time_bench.h" + +static int verbose = 1; + +/** TSC (Time-Stamp Counter) based ** + * See: linux/time_bench.h + * tsc_start_clock() and tsc_stop_clock() + */ + +/** Wall-clock based ** + */ + +/** PMU (Performance Monitor Unit) based ** + */ +#define PERF_FORMAT \ + (PERF_FORMAT_GROUP | PERF_FORMAT_ID | PERF_FORMAT_TOTAL_TIME_ENABLED | \ + PERF_FORMAT_TOTAL_TIME_RUNNING) + +struct raw_perf_event { + uint64_t config; /* event */ + uint64_t config1; /* umask */ + struct perf_event *save; + char *desc; +}; + +/* if HT is enable a maximum of 4 events (5 if one is instructions + * retired can be specified, if HT is disabled a maximum of 8 (9 if + * one is instructions retired) can be specified. + * + * From Table 19-1. Architectural Performance Events + * Architectures Software Developer’s Manual Volume 3: System Programming + * Guide + */ +struct raw_perf_event perf_events[] = { + { 0x3c, 0x00, NULL, "Unhalted CPU Cycles" }, + { 0xc0, 0x00, NULL, "Instruction Retired" } +}; + +#define NUM_EVTS (ARRAY_SIZE(perf_events)) + +/* WARNING: PMU config is currently broken! + */ +bool time_bench_PMU_config(bool enable) +{ + int i; + struct perf_event_attr perf_conf; + struct perf_event *perf_event; + int cpu; + + preempt_disable(); + cpu = smp_processor_id(); + pr_info("DEBUG: cpu:%d\n", cpu); + preempt_enable(); + + memset(&perf_conf, 0, sizeof(struct perf_event_attr)); + perf_conf.type = PERF_TYPE_RAW; + perf_conf.size = sizeof(struct perf_event_attr); + perf_conf.read_format = PERF_FORMAT; + perf_conf.pinned = 1; + perf_conf.exclude_user = 1; /* No userspace events */ + perf_conf.exclude_kernel = 0; /* Only kernel events */ + + for (i = 0; i < NUM_EVTS; i++) { + perf_conf.disabled = enable; + //perf_conf.disabled = (i == 0) ? 1 : 0; + perf_conf.config = perf_events[i].config; + perf_conf.config1 = perf_events[i].config1; + if (verbose) + pr_info("%s() enable PMU counter: %s\n", + __func__, perf_events[i].desc); + perf_event = perf_event_create_kernel_counter(&perf_conf, cpu, + NULL /* task */, + NULL /* overflow_handler*/, + NULL /* context */); + if (perf_event) { + perf_events[i].save = perf_event; + pr_info("%s():DEBUG perf_event success\n", __func__); + + perf_event_enable(perf_event); + } else { + pr_info("%s():DEBUG perf_event is NULL\n", __func__); + } + } + + return true; +} + +/** Generic functions ** + */ + +/* Calculate stats, store results in record */ +bool time_bench_calc_stats(struct time_bench_record *rec) +{ +#define NANOSEC_PER_SEC 1000000000 /* 10^9 */ + uint64_t ns_per_call_tmp_rem = 0; + uint32_t ns_per_call_remainder = 0; + uint64_t pmc_ipc_tmp_rem = 0; + uint32_t pmc_ipc_remainder = 0; + uint32_t pmc_ipc_div = 0; + uint32_t invoked_cnt_precision = 0; + uint32_t invoked_cnt = 0; /* 32-bit due to div_u64_rem() */ + + if (rec->flags & TIME_BENCH_LOOP) { + if (rec->invoked_cnt < 1000) { + pr_err("ERR: need more(>1000) loops(%llu) for timing\n", + rec->invoked_cnt); + return false; + } + if (rec->invoked_cnt > ((1ULL << 32) - 1)) { + /* div_u64_rem() can only support div with 32bit*/ + pr_err("ERR: Invoke cnt(%llu) too big overflow 32bit\n", + rec->invoked_cnt); + return false; + } + invoked_cnt = (uint32_t)rec->invoked_cnt; + } + + /* TSC (Time-Stamp Counter) records */ + if (rec->flags & TIME_BENCH_TSC) { + rec->tsc_interval = rec->tsc_stop - rec->tsc_start; + if (rec->tsc_interval == 0) { + pr_err("ABORT: timing took ZERO TSC time\n"); + return false; + } + /* Calculate stats */ + if (rec->flags & TIME_BENCH_LOOP) + rec->tsc_cycles = rec->tsc_interval / invoked_cnt; + else + rec->tsc_cycles = rec->tsc_interval; + } + + /* Wall-clock time calc */ + if (rec->flags & TIME_BENCH_WALLCLOCK) { + rec->time_start = rec->ts_start.tv_nsec + + (NANOSEC_PER_SEC * rec->ts_start.tv_sec); + rec->time_stop = rec->ts_stop.tv_nsec + + (NANOSEC_PER_SEC * rec->ts_stop.tv_sec); + rec->time_interval = rec->time_stop - rec->time_start; + if (rec->time_interval == 0) { + pr_err("ABORT: timing took ZERO wallclock time\n"); + return false; + } + /* Calculate stats */ + /*** Division in kernel it tricky ***/ + /* Orig: time_sec = (time_interval / NANOSEC_PER_SEC); */ + /* remainder only correct because NANOSEC_PER_SEC is 10^9 */ + rec->time_sec = div_u64_rem(rec->time_interval, NANOSEC_PER_SEC, + &rec->time_sec_remainder); + //TODO: use existing struct timespec records instead of div? + + if (rec->flags & TIME_BENCH_LOOP) { + /*** Division in kernel it tricky ***/ + /* Orig: ns = ((double)time_interval / invoked_cnt); */ + /* First get quotient */ + rec->ns_per_call_quotient = + div_u64_rem(rec->time_interval, invoked_cnt, + &ns_per_call_remainder); + /* Now get decimals .xxx precision (incorrect roundup)*/ + ns_per_call_tmp_rem = ns_per_call_remainder; + invoked_cnt_precision = invoked_cnt / 1000; + if (invoked_cnt_precision > 0) { + rec->ns_per_call_decimal = + div_u64_rem(ns_per_call_tmp_rem, + invoked_cnt_precision, + &ns_per_call_remainder); + } + } + } + + /* Performance Monitor Unit (PMU) counters */ + if (rec->flags & TIME_BENCH_PMU) { + //FIXME: Overflow handling??? + rec->pmc_inst = rec->pmc_inst_stop - rec->pmc_inst_start; + rec->pmc_clk = rec->pmc_clk_stop - rec->pmc_clk_start; + + /* Calc Instruction Per Cycle (IPC) */ + /* First get quotient */ + rec->pmc_ipc_quotient = div_u64_rem(rec->pmc_inst, rec->pmc_clk, + &pmc_ipc_remainder); + /* Now get decimals .xxx precision (incorrect roundup)*/ + pmc_ipc_tmp_rem = pmc_ipc_remainder; + pmc_ipc_div = rec->pmc_clk / 1000; + if (pmc_ipc_div > 0) { + rec->pmc_ipc_decimal = div_u64_rem(pmc_ipc_tmp_rem, + pmc_ipc_div, + &pmc_ipc_remainder); + } + } + + return true; +} + +/* Generic function for invoking a loop function and calculating + * execution time stats. The function being called/timed is assumed + * to perform a tight loop, and update the timing record struct. + */ +bool time_bench_loop(uint32_t loops, int step, char *txt, void *data, + int (*func)(struct time_bench_record *record, void *data)) +{ + struct time_bench_record rec; + + /* Setup record */ + memset(&rec, 0, sizeof(rec)); /* zero func might not update all */ + rec.version_abi = 1; + rec.loops = loops; + rec.step = step; + rec.flags = (TIME_BENCH_LOOP | TIME_BENCH_TSC | TIME_BENCH_WALLCLOCK); + + /*** Loop function being timed ***/ + if (!func(&rec, data)) { + pr_err("ABORT: function being timed failed\n"); + return false; + } + + if (rec.invoked_cnt < loops) + pr_warn("WARNING: Invoke count(%llu) smaller than loops(%d)\n", + rec.invoked_cnt, loops); + + /* Calculate stats */ + time_bench_calc_stats(&rec); + + pr_info("Type:%s Per elem: %llu cycles(tsc) %llu.%03llu ns (step:%d) - (measurement period time:%llu.%09u sec time_interval:%llu) - (invoke count:%llu tsc_interval:%llu)\n", + txt, rec.tsc_cycles, rec.ns_per_call_quotient, + rec.ns_per_call_decimal, rec.step, rec.time_sec, + rec.time_sec_remainder, rec.time_interval, rec.invoked_cnt, + rec.tsc_interval); + if (rec.flags & TIME_BENCH_PMU) + pr_info("Type:%s PMU inst/clock%llu/%llu = %llu.%03llu IPC (inst per cycle)\n", + txt, rec.pmc_inst, rec.pmc_clk, rec.pmc_ipc_quotient, + rec.pmc_ipc_decimal); + return true; +} + +/* Function getting invoked by kthread */ +static int invoke_test_on_cpu_func(void *private) +{ + struct time_bench_cpu *cpu = private; + struct time_bench_sync *sync = cpu->sync; + cpumask_t newmask = CPU_MASK_NONE; + void *data = cpu->data; + + /* Restrict CPU */ + cpumask_set_cpu(cpu->rec.cpu, &newmask); + set_cpus_allowed_ptr(current, &newmask); + + /* Synchronize start of concurrency test */ + atomic_inc(&sync->nr_tests_running); + wait_for_completion(&sync->start_event); + + /* Start benchmark function */ + if (!cpu->bench_func(&cpu->rec, data)) { + pr_err("ERROR: function being timed failed on CPU:%d(%d)\n", + cpu->rec.cpu, smp_processor_id()); + } else { + if (verbose) + pr_info("SUCCESS: ran on CPU:%d(%d)\n", cpu->rec.cpu, + smp_processor_id()); + } + cpu->did_bench_run = true; + + /* End test */ + atomic_dec(&sync->nr_tests_running); + /* Wait for kthread_stop() telling us to stop */ + while (!kthread_should_stop()) { + set_current_state(TASK_INTERRUPTIBLE); + schedule(); + } + __set_current_state(TASK_RUNNING); + return 0; +} + +void time_bench_print_stats_cpumask(const char *desc, + struct time_bench_cpu *cpu_tasks, + const struct cpumask *mask) +{ + uint64_t average = 0; + int cpu; + int step = 0; + struct sum { + uint64_t tsc_cycles; + int records; + } sum = { 0 }; + + /* Get stats */ + for_each_cpu(cpu, mask) { + struct time_bench_cpu *c = &cpu_tasks[cpu]; + struct time_bench_record *rec = &c->rec; + + /* Calculate stats */ + time_bench_calc_stats(rec); + + pr_info("Type:%s CPU(%d) %llu cycles(tsc) %llu.%03llu ns (step:%d) - (measurement period time:%llu.%09u sec time_interval:%llu) - (invoke count:%llu tsc_interval:%llu)\n", + desc, cpu, rec->tsc_cycles, rec->ns_per_call_quotient, + rec->ns_per_call_decimal, rec->step, rec->time_sec, + rec->time_sec_remainder, rec->time_interval, + rec->invoked_cnt, rec->tsc_interval); + + /* Collect average */ + sum.records++; + sum.tsc_cycles += rec->tsc_cycles; + step = rec->step; + } + + if (sum.records) /* avoid div-by-zero */ + average = sum.tsc_cycles / sum.records; + pr_info("Sum Type:%s Average: %llu cycles(tsc) CPUs:%d step:%d\n", desc, + average, sum.records, step); +} + +void time_bench_run_concurrent(uint32_t loops, int step, void *data, + const struct cpumask *mask, /* Support masking outsome CPUs*/ + struct time_bench_sync *sync, + struct time_bench_cpu *cpu_tasks, + int (*func)(struct time_bench_record *record, void *data)) +{ + int cpu, running = 0; + + if (verbose) // DEBUG + pr_warn("%s() Started on CPU:%d\n", __func__, + smp_processor_id()); + + /* Reset sync conditions */ + atomic_set(&sync->nr_tests_running, 0); + init_completion(&sync->start_event); + + /* Spawn off jobs on all CPUs */ + for_each_cpu(cpu, mask) { + struct time_bench_cpu *c = &cpu_tasks[cpu]; + + running++; + c->sync = sync; /* Send sync variable along */ + c->data = data; /* Send opaque along */ + + /* Init benchmark record */ + memset(&c->rec, 0, sizeof(struct time_bench_record)); + c->rec.version_abi = 1; + c->rec.loops = loops; + c->rec.step = step; + c->rec.flags = (TIME_BENCH_LOOP | TIME_BENCH_TSC | + TIME_BENCH_WALLCLOCK); + c->rec.cpu = cpu; + c->bench_func = func; + c->task = kthread_run(invoke_test_on_cpu_func, c, + "time_bench%d", cpu); + if (IS_ERR(c->task)) { + pr_err("%s(): Failed to start test func\n", __func__); + return; /* Argh, what about cleanup?! */ + } + } + + /* Wait until all processes are running */ + while (atomic_read(&sync->nr_tests_running) < running) { + set_current_state(TASK_UNINTERRUPTIBLE); + schedule_timeout(10); + } + /* Kick off all CPU concurrently on completion event */ + complete_all(&sync->start_event); + + /* Wait for CPUs to finish */ + while (atomic_read(&sync->nr_tests_running)) { + set_current_state(TASK_UNINTERRUPTIBLE); + schedule_timeout(10); + } + + /* Stop the kthreads */ + for_each_cpu(cpu, mask) { + struct time_bench_cpu *c = &cpu_tasks[cpu]; + + kthread_stop(c->task); + } + + if (verbose) // DEBUG - happens often, finish on another CPU + pr_warn("%s() Finished on CPU:%d\n", __func__, + smp_processor_id()); +} diff --git a/tools/testing/selftests/net/bench/page_pool/time_bench.h b/tools/testing/selftests/net/bench/page_pool/time_bench.h new file mode 100644 index 000000000000..e113fcf341dc --- /dev/null +++ b/tools/testing/selftests/net/bench/page_pool/time_bench.h @@ -0,0 +1,238 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Benchmarking code execution time inside the kernel + * + * Copyright (C) 2014, Red Hat, Inc., Jesper Dangaard Brouer + * for licensing details see kernel-base/COPYING + */ +#ifndef _LINUX_TIME_BENCH_H +#define _LINUX_TIME_BENCH_H + +/* Main structure used for recording a benchmark run */ +struct time_bench_record { + uint32_t version_abi; + uint32_t loops; /* Requested loop invocations */ + uint32_t step; /* option for e.g. bulk invocations */ + + uint32_t flags; /* Measurements types enabled */ +#define TIME_BENCH_LOOP BIT(0) +#define TIME_BENCH_TSC BIT(1) +#define TIME_BENCH_WALLCLOCK BIT(2) +#define TIME_BENCH_PMU BIT(3) + + uint32_t cpu; /* Used when embedded in time_bench_cpu */ + + /* Records */ + uint64_t invoked_cnt; /* Returned actual invocations */ + uint64_t tsc_start; + uint64_t tsc_stop; + struct timespec64 ts_start; + struct timespec64 ts_stop; + /* PMU counters for instruction and cycles + * instructions counter including pipelined instructions + */ + uint64_t pmc_inst_start; + uint64_t pmc_inst_stop; + /* CPU unhalted clock counter */ + uint64_t pmc_clk_start; + uint64_t pmc_clk_stop; + + /* Result records */ + uint64_t tsc_interval; + uint64_t time_start, time_stop, time_interval; /* in nanosec */ + uint64_t pmc_inst, pmc_clk; + + /* Derived result records */ + uint64_t tsc_cycles; // +decimal? + uint64_t ns_per_call_quotient, ns_per_call_decimal; + uint64_t time_sec; + uint32_t time_sec_remainder; + uint64_t pmc_ipc_quotient, pmc_ipc_decimal; /* inst per cycle */ +}; + +/* For synchronizing parallel CPUs to run concurrently */ +struct time_bench_sync { + atomic_t nr_tests_running; + struct completion start_event; +}; + +/* Keep track of CPUs executing our bench function. + * + * Embed a time_bench_record for storing info per cpu + */ +struct time_bench_cpu { + struct time_bench_record rec; + struct time_bench_sync *sync; /* back ptr */ + struct task_struct *task; + /* "data" opaque could have been placed in time_bench_sync, + * but to avoid any false sharing, place it per CPU + */ + void *data; + /* Support masking outsome CPUs, mark if it ran */ + bool did_bench_run; + /* int cpu; // note CPU stored in time_bench_record */ + int (*bench_func)(struct time_bench_record *record, void *data); +}; + +/* + * Below TSC assembler code is not compatible with other archs, and + * can also fail on guests if cpu-flags are not correct. + * + * The way TSC reading is used, many iterations, does not require as + * high accuracy as described below (in Intel Doc #324264). + * + * Considering changing to use get_cycles() (#include <asm/timex.h>). + */ + +/** TSC (Time-Stamp Counter) based ** + * Recommend reading, to understand details of reading TSC accurately: + * Intel Doc #324264, "How to Benchmark Code Execution Times on Intel" + * + * Consider getting exclusive ownership of CPU by using: + * unsigned long flags; + * preempt_disable(); + * raw_local_irq_save(flags); + * _your_code_ + * raw_local_irq_restore(flags); + * preempt_enable(); + * + * Clobbered registers: "%rax", "%rbx", "%rcx", "%rdx" + * RDTSC only change "%rax" and "%rdx" but + * CPUID clears the high 32-bits of all (rax/rbx/rcx/rdx) + */ +static __always_inline uint64_t tsc_start_clock(void) +{ + /* See: Intel Doc #324264 */ + unsigned int hi, lo; + + asm volatile("CPUID\n\t" + "RDTSC\n\t" + "mov %%edx, %0\n\t" + "mov %%eax, %1\n\t" + : "=r"(hi), "=r"(lo)::"%rax", "%rbx", "%rcx", "%rdx"); + //FIXME: on 32bit use clobbered %eax + %edx + return ((uint64_t)lo) | (((uint64_t)hi) << 32); +} + +static __always_inline uint64_t tsc_stop_clock(void) +{ + /* See: Intel Doc #324264 */ + unsigned int hi, lo; + + asm volatile("RDTSCP\n\t" + "mov %%edx, %0\n\t" + "mov %%eax, %1\n\t" + "CPUID\n\t" + : "=r"(hi), "=r"(lo)::"%rax", "%rbx", "%rcx", "%rdx"); + return ((uint64_t)lo) | (((uint64_t)hi) << 32); +} + +/** Wall-clock based ** + * + * use: getnstimeofday() + * getnstimeofday(&rec->ts_start); + * getnstimeofday(&rec->ts_stop); + * + * API changed see: Documentation/core-api/timekeeping.rst + * https://www.kernel.org/doc/html/latest/core-api/timekeeping.html#c.getnstimeofday + * + * We should instead use: ktime_get_real_ts64() is a direct + * replacement, but consider using monotonic time (ktime_get_ts64()) + * and/or a ktime_t based interface (ktime_get()/ktime_get_real()). + */ + +/** PMU (Performance Monitor Unit) based ** + * + * Needed for calculating: Instructions Per Cycle (IPC) + * - The IPC number tell how efficient the CPU pipelining were + */ +//lookup: perf_event_create_kernel_counter() + +bool time_bench_PMU_config(bool enable); + +/* Raw reading via rdpmc() using fixed counters + * + * From: https://github.com/andikleen/simple-pmu + */ +enum { + FIXED_SELECT = (1U << 30), /* == 0x40000000 */ + FIXED_INST_RETIRED_ANY = 0, + FIXED_CPU_CLK_UNHALTED_CORE = 1, + FIXED_CPU_CLK_UNHALTED_REF = 2, +}; + +static __always_inline unsigned int long long p_rdpmc(unsigned int in) +{ + unsigned int d, a; + + asm volatile("rdpmc" : "=d"(d), "=a"(a) : "c"(in) : "memory"); + return ((unsigned long long)d << 32) | a; +} + +/* These PMU counter needs to be enabled, but I don't have the + * configure code implemented. My current hack is running: + * sudo perf stat -e cycles:k -e instructions:k insmod lib/ring_queue_test.ko + */ +/* Reading all pipelined instruction */ +static __always_inline unsigned long long pmc_inst(void) +{ + return p_rdpmc(FIXED_SELECT | FIXED_INST_RETIRED_ANY); +} + +/* Reading CPU clock cycles */ +static __always_inline unsigned long long pmc_clk(void) +{ + return p_rdpmc(FIXED_SELECT | FIXED_CPU_CLK_UNHALTED_CORE); +} + +/* Raw reading via MSR rdmsr() is likely wrong + * FIXME: How can I know which raw MSR registers are conf for what? + */ +#define MSR_IA32_PCM0 0x400000C1 /* PERFCTR0 */ +#define MSR_IA32_PCM1 0x400000C2 /* PERFCTR1 */ +#define MSR_IA32_PCM2 0x400000C3 +static inline uint64_t msr_inst(unsigned long long *msr_result) +{ + return rdmsrq_safe(MSR_IA32_PCM0, msr_result); +} + +/** Generic functions ** + */ +bool time_bench_loop(uint32_t loops, int step, char *txt, void *data, + int (*func)(struct time_bench_record *rec, void *data)); +bool time_bench_calc_stats(struct time_bench_record *rec); + +void time_bench_run_concurrent(uint32_t loops, int step, void *data, + const struct cpumask *mask, /* Support masking outsome CPUs*/ + struct time_bench_sync *sync, struct time_bench_cpu *cpu_tasks, + int (*func)(struct time_bench_record *record, void *data)); +void time_bench_print_stats_cpumask(const char *desc, + struct time_bench_cpu *cpu_tasks, + const struct cpumask *mask); + +//FIXME: use rec->flags to select measurement, should be MACRO +static __always_inline void time_bench_start(struct time_bench_record *rec) +{ + //getnstimeofday(&rec->ts_start); + ktime_get_real_ts64(&rec->ts_start); + if (rec->flags & TIME_BENCH_PMU) { + rec->pmc_inst_start = pmc_inst(); + rec->pmc_clk_start = pmc_clk(); + } + rec->tsc_start = tsc_start_clock(); +} + +static __always_inline void time_bench_stop(struct time_bench_record *rec, + uint64_t invoked_cnt) +{ + rec->tsc_stop = tsc_stop_clock(); + if (rec->flags & TIME_BENCH_PMU) { + rec->pmc_inst_stop = pmc_inst(); + rec->pmc_clk_stop = pmc_clk(); + } + //getnstimeofday(&rec->ts_stop); + ktime_get_real_ts64(&rec->ts_stop); + rec->invoked_cnt = invoked_cnt; +} + +#endif /* _LINUX_TIME_BENCH_H */ diff --git a/tools/testing/selftests/net/bench/test_bench_page_pool.sh b/tools/testing/selftests/net/bench/test_bench_page_pool.sh new file mode 100755 index 000000000000..7b8b18cfedce --- /dev/null +++ b/tools/testing/selftests/net/bench/test_bench_page_pool.sh @@ -0,0 +1,32 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# + +set -e + +DRIVER="./page_pool/bench_page_pool.ko" +result="" + +function run_test() +{ + rmmod "bench_page_pool.ko" || true + insmod $DRIVER > /dev/null 2>&1 + result=$(dmesg | tail -10) + echo "$result" + + echo + echo "Fast path results:" + echo "${result}" | grep -o -E "no-softirq-page_pool01 Per elem: ([0-9]+) cycles\(tsc\) ([0-9]+\.[0-9]+) ns" + + echo + echo "ptr_ring results:" + echo "${result}" | grep -o -E "no-softirq-page_pool02 Per elem: ([0-9]+) cycles\(tsc\) ([0-9]+\.[0-9]+) ns" + + echo + echo "slow path results:" + echo "${result}" | grep -o -E "no-softirq-page_pool03 Per elem: ([0-9]+) cycles\(tsc\) ([0-9]+\.[0-9]+) ns" +} + +run_test + +exit 0 diff --git a/tools/testing/selftests/net/broadcast_pmtu.sh b/tools/testing/selftests/net/broadcast_pmtu.sh new file mode 100755 index 000000000000..726eb5d25839 --- /dev/null +++ b/tools/testing/selftests/net/broadcast_pmtu.sh @@ -0,0 +1,47 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# Ensures broadcast route MTU is respected + +CLIENT_NS=$(mktemp -u client-XXXXXXXX) +CLIENT_IP4="192.168.0.1/24" +CLIENT_BROADCAST_ADDRESS="192.168.0.255" + +SERVER_NS=$(mktemp -u server-XXXXXXXX) +SERVER_IP4="192.168.0.2/24" + +setup() { + ip netns add "${CLIENT_NS}" + ip netns add "${SERVER_NS}" + + ip -net "${SERVER_NS}" link add link1 type veth peer name link0 netns "${CLIENT_NS}" + + ip -net "${CLIENT_NS}" link set link0 up + ip -net "${CLIENT_NS}" link set link0 mtu 9000 + ip -net "${CLIENT_NS}" addr add "${CLIENT_IP4}" dev link0 + + ip -net "${SERVER_NS}" link set link1 up + ip -net "${SERVER_NS}" link set link1 mtu 1500 + ip -net "${SERVER_NS}" addr add "${SERVER_IP4}" dev link1 + + read -r -a CLIENT_BROADCAST_ENTRY <<< "$(ip -net "${CLIENT_NS}" route show table local type broadcast)" + ip -net "${CLIENT_NS}" route del "${CLIENT_BROADCAST_ENTRY[@]}" + ip -net "${CLIENT_NS}" route add "${CLIENT_BROADCAST_ENTRY[@]}" mtu 1500 + + ip net exec "${SERVER_NS}" sysctl -wq net.ipv4.icmp_echo_ignore_broadcasts=0 +} + +cleanup() { + ip -net "${SERVER_NS}" link del link1 + ip netns del "${CLIENT_NS}" + ip netns del "${SERVER_NS}" +} + +trap cleanup EXIT + +setup && + echo "Testing for broadcast route MTU" && + ip net exec "${CLIENT_NS}" ping -f -M want -q -c 1 -s 8000 -w 1 -b "${CLIENT_BROADCAST_ADDRESS}" > /dev/null 2>&1 + +exit $? + diff --git a/tools/testing/selftests/net/config b/tools/testing/selftests/net/config index 3cfef5153823..c24417d0047b 100644 --- a/tools/testing/selftests/net/config +++ b/tools/testing/selftests/net/config @@ -30,16 +30,25 @@ CONFIG_NET_FOU=y CONFIG_NET_FOU_IP_TUNNELS=y CONFIG_NETFILTER=y CONFIG_NETFILTER_ADVANCED=y +CONFIG_NETFILTER_XTABLES_LEGACY=y CONFIG_NF_CONNTRACK=m CONFIG_IPV6_MROUTE=y CONFIG_IPV6_SIT=y CONFIG_NF_NAT=m CONFIG_IP6_NF_IPTABLES=m +CONFIG_IP6_NF_IPTABLES_LEGACY=m CONFIG_IP_NF_IPTABLES=m +CONFIG_IP_NF_IPTABLES_LEGACY=m +CONFIG_IP6_NF_MANGLE=m +CONFIG_IP6_NF_FILTER=m CONFIG_IP6_NF_NAT=m CONFIG_IP6_NF_RAW=m +CONFIG_IP_NF_MANGLE=m +CONFIG_IP_NF_FILTER=m CONFIG_IP_NF_NAT=m CONFIG_IP_NF_RAW=m +CONFIG_IP_NF_TARGET_REJECT=m +CONFIG_IP6_NF_TARGET_REJECT=m CONFIG_IP_NF_TARGET_TTL=m CONFIG_IPV6_GRE=m CONFIG_IPV6_SEG6_LWTUNNEL=y @@ -57,6 +66,8 @@ CONFIG_NF_TABLES_IPV6=y CONFIG_NF_TABLES_IPV4=y CONFIG_NFT_NAT=m CONFIG_NETFILTER_XT_MATCH_LENGTH=m +CONFIG_NETFILTER_XT_TARGET_HL=m +CONFIG_NETFILTER_XT_NAT=m CONFIG_NET_ACT_CSUM=m CONFIG_NET_ACT_CT=m CONFIG_NET_ACT_GACT=m diff --git a/tools/testing/selftests/net/forwarding/Makefile b/tools/testing/selftests/net/forwarding/Makefile index 00bde7b6f39e..d7bb2e80e88c 100644 --- a/tools/testing/selftests/net/forwarding/Makefile +++ b/tools/testing/selftests/net/forwarding/Makefile @@ -102,6 +102,7 @@ TEST_PROGS = bridge_fdb_learning_limit.sh \ vxlan_bridge_1d_port_8472.sh \ vxlan_bridge_1d.sh \ vxlan_bridge_1q_ipv6.sh \ + vxlan_bridge_1q_mc_ul.sh \ vxlan_bridge_1q_port_8472_ipv6.sh \ vxlan_bridge_1q_port_8472.sh \ vxlan_bridge_1q.sh \ diff --git a/tools/testing/selftests/net/forwarding/lib.sh b/tools/testing/selftests/net/forwarding/lib.sh index 508f3c700d71..890b3374dacd 100644 --- a/tools/testing/selftests/net/forwarding/lib.sh +++ b/tools/testing/selftests/net/forwarding/lib.sh @@ -37,6 +37,7 @@ declare -A NETIFS=( : "${TEAMD:=teamd}" : "${MCD:=smcrouted}" : "${MC_CLI:=smcroutectl}" +: "${MCD_TABLE_NAME:=selftests}" # Constants for netdevice bring-up: # Default time in seconds to wait for an interface to come up before giving up @@ -141,6 +142,20 @@ check_tc_version() fi } +check_tc_erspan_support() +{ + local dev=$1; shift + + tc filter add dev $dev ingress pref 1 handle 1 flower \ + erspan_opts 1:0:0:0 &> /dev/null + if [[ $? -ne 0 ]]; then + echo "SKIP: iproute2 too old; tc is missing erspan support" + return $ksft_skip + fi + tc filter del dev $dev ingress pref 1 handle 1 flower \ + erspan_opts 1:0:0:0 &> /dev/null +} + # Old versions of tc don't understand "mpls_uc" check_tc_mpls_support() { @@ -525,9 +540,9 @@ setup_wait_dev_with_timeout() return 1 } -setup_wait() +setup_wait_n() { - local num_netifs=${1:-$NUM_NETIFS} + local num_netifs=$1; shift local i for ((i = 1; i <= num_netifs; ++i)); do @@ -538,6 +553,11 @@ setup_wait() sleep $WAIT_TIME } +setup_wait() +{ + setup_wait_n "$NUM_NETIFS" +} + wait_for_dev() { local dev=$1; shift @@ -1757,6 +1777,51 @@ mc_send() msend -g $groups -I $if_name -c 1 > /dev/null 2>&1 } +adf_mcd_start() +{ + local ifs=("$@") + + local table_name="$MCD_TABLE_NAME" + local smcroutedir + local pid + local if + local i + + check_command "$MCD" || return 1 + check_command "$MC_CLI" || return 1 + + smcroutedir=$(mktemp -d) + defer rm -rf "$smcroutedir" + + for ((i = 1; i <= NUM_NETIFS; ++i)); do + echo "phyint ${NETIFS[p$i]} enable" >> \ + "$smcroutedir/$table_name.conf" + done + + for if in "${ifs[@]}"; do + if ! ip_link_has_flag "$if" MULTICAST; then + ip link set dev "$if" multicast on + defer ip link set dev "$if" multicast off + fi + + echo "phyint $if enable" >> \ + "$smcroutedir/$table_name.conf" + done + + "$MCD" -N -I "$table_name" -f "$smcroutedir/$table_name.conf" \ + -P "$smcroutedir/$table_name.pid" + busywait "$BUSYWAIT_TIMEOUT" test -e "$smcroutedir/$table_name.pid" + pid=$(cat "$smcroutedir/$table_name.pid") + defer kill_process "$pid" +} + +mc_cli() +{ + local table_name="$MCD_TABLE_NAME" + + "$MC_CLI" -I "$table_name" "$@" +} + start_ip_monitor() { local mtype=$1; shift diff --git a/tools/testing/selftests/net/forwarding/router_multicast.sh b/tools/testing/selftests/net/forwarding/router_multicast.sh index 5a58b1ec8aef..83e52abdbc2e 100755 --- a/tools/testing/selftests/net/forwarding/router_multicast.sh +++ b/tools/testing/selftests/net/forwarding/router_multicast.sh @@ -33,10 +33,6 @@ NUM_NETIFS=6 source lib.sh source tc_common.sh -require_command $MCD -require_command $MC_CLI -table_name=selftests - h1_create() { simple_if_init $h1 198.51.100.2/28 2001:db8:1::2/64 @@ -149,25 +145,6 @@ router_destroy() ip link set dev $rp1 down } -start_mcd() -{ - SMCROUTEDIR="$(mktemp -d)" - - for ((i = 1; i <= $NUM_NETIFS; ++i)); do - echo "phyint ${NETIFS[p$i]} enable" >> \ - $SMCROUTEDIR/$table_name.conf - done - - $MCD -N -I $table_name -f $SMCROUTEDIR/$table_name.conf \ - -P $SMCROUTEDIR/$table_name.pid -} - -kill_mcd() -{ - pkill $MCD - rm -rf $SMCROUTEDIR -} - setup_prepare() { h1=${NETIFS[p1]} @@ -179,7 +156,7 @@ setup_prepare() rp3=${NETIFS[p5]} h3=${NETIFS[p6]} - start_mcd + adf_mcd_start || exit "$EXIT_STATUS" vrf_prepare @@ -206,7 +183,7 @@ cleanup() vrf_cleanup - kill_mcd + defer_scopes_cleanup } create_mcast_sg() @@ -214,9 +191,9 @@ create_mcast_sg() local if_name=$1; shift local s_addr=$1; shift local mcast=$1; shift - local dest_ifs=${@} + local dest_ifs=("${@}") - $MC_CLI -I $table_name add $if_name $s_addr $mcast $dest_ifs + mc_cli add "$if_name" "$s_addr" "$mcast" "${dest_ifs[@]}" } delete_mcast_sg() @@ -224,9 +201,9 @@ delete_mcast_sg() local if_name=$1; shift local s_addr=$1; shift local mcast=$1; shift - local dest_ifs=${@} + local dest_ifs=("${@}") - $MC_CLI -I $table_name remove $if_name $s_addr $mcast $dest_ifs + mc_cli remove "$if_name" "$s_addr" "$mcast" "${dest_ifs[@]}" } mcast_v4() diff --git a/tools/testing/selftests/net/forwarding/tc_flower.sh b/tools/testing/selftests/net/forwarding/tc_flower.sh index b1daad19b01e..b58909a93112 100755 --- a/tools/testing/selftests/net/forwarding/tc_flower.sh +++ b/tools/testing/selftests/net/forwarding/tc_flower.sh @@ -6,7 +6,7 @@ ALL_TESTS="match_dst_mac_test match_src_mac_test match_dst_ip_test \ match_ip_tos_test match_indev_test match_ip_ttl_test match_mpls_label_test \ match_mpls_tc_test match_mpls_bos_test match_mpls_ttl_test \ - match_mpls_lse_test" + match_mpls_lse_test match_erspan_opts_test" NUM_NETIFS=2 source tc_common.sh source lib.sh @@ -676,6 +676,56 @@ match_mpls_lse_test() log_test "mpls lse match ($tcflags)" } +match_erspan_opts_test() +{ + RET=0 + + check_tc_erspan_support $h2 || return 0 + + # h1 erspan setup + tunnel_create erspan1 erspan 192.0.2.1 192.0.2.2 dev $h1 seq key 1001 \ + tos C ttl 64 erspan_ver 1 erspan 6789 # ERSPAN Type II + tunnel_create erspan2 erspan 192.0.2.1 192.0.2.2 dev $h1 seq key 1002 \ + tos C ttl 64 erspan_ver 2 erspan_dir egress erspan_hwid 63 \ + # ERSPAN Type III + ip link set dev erspan1 master v$h1 + ip link set dev erspan2 master v$h1 + # h2 erspan setup + ip link add ep-ex type erspan ttl 64 external # To collect tunnel info + ip link set ep-ex up + ip link set dev ep-ex master v$h2 + tc qdisc add dev ep-ex clsact + + # ERSPAN Type II [decap direction] + tc filter add dev ep-ex ingress protocol ip handle 101 flower \ + $tcflags enc_src_ip 192.0.2.1 enc_dst_ip 192.0.2.2 \ + enc_key_id 1001 erspan_opts 1:6789:0:0 \ + action drop + # ERSPAN Type III [decap direction] + tc filter add dev ep-ex ingress protocol ip handle 102 flower \ + $tcflags enc_src_ip 192.0.2.1 enc_dst_ip 192.0.2.2 \ + enc_key_id 1002 erspan_opts 2:0:1:63 action drop + + ep1mac=$(mac_get erspan1) + $MZ erspan1 -c 1 -p 64 -a $ep1mac -b $h2mac -t ip -q + tc_check_packets "dev ep-ex ingress" 101 1 + check_err $? "ERSPAN Type II" + + ep2mac=$(mac_get erspan2) + $MZ erspan2 -c 1 -p 64 -a $ep1mac -b $h2mac -t ip -q + tc_check_packets "dev ep-ex ingress" 102 1 + check_err $? "ERSPAN Type III" + + # h2 erspan cleanup + tc qdisc del dev ep-ex clsact + tunnel_destroy ep-ex + # h1 erspan cleanup + tunnel_destroy erspan2 # ERSPAN Type III + tunnel_destroy erspan1 # ERSPAN Type II + + log_test "erspan_opts match ($tcflags)" +} + setup_prepare() { h1=${NETIFS[p1]} diff --git a/tools/testing/selftests/net/forwarding/vxlan_bridge_1q_mc_ul.sh b/tools/testing/selftests/net/forwarding/vxlan_bridge_1q_mc_ul.sh new file mode 100755 index 000000000000..462db0b603e7 --- /dev/null +++ b/tools/testing/selftests/net/forwarding/vxlan_bridge_1q_mc_ul.sh @@ -0,0 +1,771 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# +-----------------------------------------+ +# | + $h1.10 + $h1.20 | +# | | 192.0.2.1/28 | 2001:db8:1::1/64 | +# | \________ ________/ | +# | \ / | +# | + $h1 H1 (vrf) | +# +-----------|-----------------------------+ +# | +# +-----------|----------------------------------------------------------------+ +# | +---------|--------------------------------------+ SWITCH (main vrf) | +# | | + $swp1 BR1 (802.1q) | | +# | | vid 10 20 | | +# | | | | +# | | + vx10 (vxlan) + vx20 (vxlan) | + lo10 (dummy) | +# | | local 192.0.2.100 local 2001:db8:4::1 | 192.0.2.100/28 | +# | | group 233.252.0.1 group ff0e::1:2:3 | 2001:db8:4::1/64 | +# | | id 1000 id 2000 | | +# | | vid 10 pvid untagged vid 20 pvid untagged | | +# | +------------------------------------------------+ | +# | | +# | + $swp2 $swp3 + | +# | | 192.0.2.33/28 192.0.2.65/28 | | +# | | 2001:db8:2::1/64 2001:db8:3::1/64 | | +# | | | | +# +---|--------------------------------------------------------------------|---+ +# | | +# +---|--------------------------------+ +--------------------------------|---+ +# | | H2 (vrf) | | H3 (vrf) | | +# | +-|----------------------------+ | | +-----------------------------|-+ | +# | | + $h2 BR2 (802.1d) | | | | BR3 (802.1d) $h3 + | | +# | | | | | | | | +# | | + v1$h2 (veth) | | | | v1$h3 (veth) + | | +# | +-|----------------------------+ | | +-----------------------------|-+ | +# | | | | | | +# +---|--------------------------------+ +--------------------------------|---+ +# | | +# +---|--------------------------------+ +--------------------------------|---+ +# | + v2$h2 (veth) NS2 (netns) | | NS3 (netns) v2$h3 (veth) + | +# | 192.0.2.34/28 | | 192.0.2.66/28 | +# | 2001:db8:2::2/64 | | 2001:db8:3::2/64 | +# | | | | +# | +--------------------------------+ | | +--------------------------------+ | +# | | BR1 (802.1q) | | | | BR1 (802.1q) | | +# | | + vx10 (vxlan) | | | | + vx10 (vxlan) | | +# | | local 192.0.2.34 | | | | local 192.0.2.50 | | +# | | group 233.252.0.1 dev v2$h2 | | | | group 233.252.0.1 dev v2$h3 | | +# | | id 1000 dstport $VXPORT | | | | id 1000 dstport $VXPORT | | +# | | vid 10 pvid untagged | | | | vid 10 pvid untagged | | +# | | | | | | | | +# | | + vx20 (vxlan) | | | | + vx20 (vxlan) | | +# | | local 2001:db8:2::2 | | | | local 2001:db8:3::2 | | +# | | group ff0e::1:2:3 dev v2$h2 | | | | group ff0e::1:2:3 dev v2$h3 | | +# | | id 2000 dstport $VXPORT | | | | id 2000 dstport $VXPORT | | +# | | vid 20 pvid untagged | | | | vid 20 pvid untagged | | +# | | | | | | | | +# | | + w1 (veth) | | | | + w1 (veth) | | +# | | | vid 10 20 | | | | | vid 10 20 | | +# | +--|-----------------------------+ | | +--|-----------------------------+ | +# | | | | | | +# | +--|-----------------------------+ | | +--|-----------------------------+ | +# | | + w2 (veth) VW2 (vrf) | | | | + w2 (veth) VW2 (vrf) | | +# | | |\ | | | | |\ | | +# | | | + w2.10 | | | | | + w2.10 | | +# | | | 192.0.2.3/28 | | | | | 192.0.2.4/28 | | +# | | | | | | | | | | +# | | + w2.20 | | | | + w2.20 | | +# | | 2001:db8:1::3/64 | | | | 2001:db8:1::4/64 | | +# | +--------------------------------+ | | +--------------------------------+ | +# +------------------------------------+ +------------------------------------+ +# +#shellcheck disable=SC2317 # SC doesn't see our uses of functions. + +: "${VXPORT:=4789}" +export VXPORT + +: "${GROUP4:=233.252.0.1}" +export GROUP4 + +: "${GROUP6:=ff0e::1:2:3}" +export GROUP6 + +: "${IPMR:=lo10}" + +ALL_TESTS=" + ipv4_nomcroute + ipv4_mcroute + ipv4_mcroute_changelink + ipv4_mcroute_starg + ipv4_mcroute_noroute + ipv4_mcroute_fdb + ipv4_mcroute_fdb_oif0 + ipv4_mcroute_fdb_oif0_sep + + ipv6_nomcroute + ipv6_mcroute + ipv6_mcroute_changelink + ipv6_mcroute_starg + ipv6_mcroute_noroute + ipv6_mcroute_fdb + ipv6_mcroute_fdb_oif0 + + ipv4_nomcroute_rx + ipv4_mcroute_rx + ipv4_mcroute_starg_rx + ipv4_mcroute_fdb_oif0_sep_rx + ipv4_mcroute_fdb_sep_rx + + ipv6_nomcroute_rx + ipv6_mcroute_rx + ipv6_mcroute_starg_rx + ipv6_mcroute_fdb_sep_rx +" + +NUM_NETIFS=6 +source lib.sh + +h1_create() +{ + simple_if_init "$h1" + defer simple_if_fini "$h1" + + ip_link_add "$h1.10" master "v$h1" link "$h1" type vlan id 10 + ip_link_set_up "$h1.10" + ip_addr_add "$h1.10" 192.0.2.1/28 + + ip_link_add "$h1.20" master "v$h1" link "$h1" type vlan id 20 + ip_link_set_up "$h1.20" + ip_addr_add "$h1.20" 2001:db8:1::1/64 +} + +install_capture() +{ + local dev=$1; shift + + tc qdisc add dev "$dev" clsact + defer tc qdisc del dev "$dev" clsact + + tc filter add dev "$dev" ingress proto ip pref 104 \ + flower skip_hw ip_proto udp dst_port "$VXPORT" \ + action pass + defer tc filter del dev "$dev" ingress proto ip pref 104 + + tc filter add dev "$dev" ingress proto ipv6 pref 106 \ + flower skip_hw ip_proto udp dst_port "$VXPORT" \ + action pass + defer tc filter del dev "$dev" ingress proto ipv6 pref 106 +} + +h2_create() +{ + # $h2 + ip_link_set_up "$h2" + + # H2 + vrf_create "v$h2" + defer vrf_destroy "v$h2" + + ip_link_set_up "v$h2" + + # br2 + ip_link_add br2 type bridge vlan_filtering 0 mcast_snooping 0 + ip_link_set_master br2 "v$h2" + ip_link_set_up br2 + + # $h2 + ip_link_set_master "$h2" br2 + install_capture "$h2" + + # v1$h2 + ip_link_set_up "v1$h2" + ip_link_set_master "v1$h2" br2 +} + +h3_create() +{ + # $h3 + ip_link_set_up "$h3" + + # H3 + vrf_create "v$h3" + defer vrf_destroy "v$h3" + + ip_link_set_up "v$h3" + + # br3 + ip_link_add br3 type bridge vlan_filtering 0 mcast_snooping 0 + ip_link_set_master br3 "v$h3" + ip_link_set_up br3 + + # $h3 + ip_link_set_master "$h3" br3 + install_capture "$h3" + + # v1$h3 + ip_link_set_up "v1$h3" + ip_link_set_master "v1$h3" br3 +} + +switch_create() +{ + local swp1_mac + + # br1 + swp1_mac=$(mac_get "$swp1") + ip_link_add br1 type bridge vlan_filtering 1 \ + vlan_default_pvid 0 mcast_snooping 0 + ip_link_set_addr br1 "$swp1_mac" + ip_link_set_up br1 + + # A dummy to force the IPv6 OIF=0 test to install a suitable MC route on + # $IPMR to be deterministic. Also used for the IPv6 RX!=TX ping test. + ip_link_add "X$IPMR" up type dummy + + # IPMR + ip_link_add "$IPMR" up type dummy + ip_addr_add "$IPMR" 192.0.2.100/28 + ip_addr_add "$IPMR" 2001:db8:4::1/64 + + # $swp1 + ip_link_set_up "$swp1" + ip_link_set_master "$swp1" br1 + bridge_vlan_add vid 10 dev "$swp1" + bridge_vlan_add vid 20 dev "$swp1" + + # $swp2 + ip_link_set_up "$swp2" + ip_addr_add "$swp2" 192.0.2.33/28 + ip_addr_add "$swp2" 2001:db8:2::1/64 + + # $swp3 + ip_link_set_up "$swp3" + ip_addr_add "$swp3" 192.0.2.65/28 + ip_addr_add "$swp3" 2001:db8:3::1/64 +} + +vx_create() +{ + local name=$1; shift + local vid=$1; shift + + ip_link_add "$name" up type vxlan dstport "$VXPORT" \ + nolearning noudpcsum tos inherit ttl 16 \ + "$@" + ip_link_set_master "$name" br1 + bridge_vlan_add vid "$vid" dev "$name" pvid untagged +} +export -f vx_create + +vx_wait() +{ + # Wait for all the ARP, IGMP etc. noise to settle down so that the + # tunnel is clear for measurements. + sleep 10 +} + +vx10_create() +{ + vx_create vx10 10 id 1000 "$@" +} +export -f vx10_create + +vx20_create() +{ + vx_create vx20 20 id 2000 "$@" +} +export -f vx20_create + +vx10_create_wait() +{ + vx10_create "$@" + vx_wait +} + +vx20_create_wait() +{ + vx20_create "$@" + vx_wait +} + +ns_init_common() +{ + local ns=$1; shift + local if_in=$1; shift + local ipv4_in=$1; shift + local ipv6_in=$1; shift + local ipv4_host=$1; shift + local ipv6_host=$1; shift + + # v2$h2 / v2$h3 + ip_link_set_up "$if_in" + ip_addr_add "$if_in" "$ipv4_in" + ip_addr_add "$if_in" "$ipv6_in" + + # br1 + ip_link_add br1 type bridge vlan_filtering 1 \ + vlan_default_pvid 0 mcast_snooping 0 + ip_link_set_up br1 + + # vx10, vx20 + vx10_create local "${ipv4_in%/*}" group "$GROUP4" dev "$if_in" + vx20_create local "${ipv6_in%/*}" group "$GROUP6" dev "$if_in" + + # w1 + ip_link_add w1 type veth peer name w2 + ip_link_set_master w1 br1 + ip_link_set_up w1 + bridge_vlan_add vid 10 dev w1 + bridge_vlan_add vid 20 dev w1 + + # w2 + simple_if_init w2 + defer simple_if_fini w2 + + # w2.10 + ip_link_add w2.10 master vw2 link w2 type vlan id 10 + ip_link_set_up w2.10 + ip_addr_add w2.10 "$ipv4_host" + + # w2.20 + ip_link_add w2.20 master vw2 link w2 type vlan id 20 + ip_link_set_up w2.20 + ip_addr_add w2.20 "$ipv6_host" +} +export -f ns_init_common + +ns2_create() +{ + # NS2 + ip netns add ns2 + defer ip netns del ns2 + + # v2$h2 + ip link set dev "v2$h2" netns ns2 + defer ip -n ns2 link set dev "v2$h2" netns 1 + + in_ns ns2 \ + ns_init_common ns2 "v2$h2" \ + 192.0.2.34/28 2001:db8:2::2/64 \ + 192.0.2.3/28 2001:db8:1::3/64 +} + +ns3_create() +{ + # NS3 + ip netns add ns3 + defer ip netns del ns3 + + # v2$h3 + ip link set dev "v2$h3" netns ns3 + defer ip -n ns3 link set dev "v2$h3" netns 1 + + ip -n ns3 link set dev "v2$h3" up + + in_ns ns3 \ + ns_init_common ns3 "v2$h3" \ + 192.0.2.66/28 2001:db8:3::2/64 \ + 192.0.2.4/28 2001:db8:1::4/64 +} + +setup_prepare() +{ + h1=${NETIFS[p1]} + swp1=${NETIFS[p2]} + + swp2=${NETIFS[p3]} + h2=${NETIFS[p4]} + + swp3=${NETIFS[p5]} + h3=${NETIFS[p6]} + + vrf_prepare + defer vrf_cleanup + + forwarding_enable + defer forwarding_restore + + ip_link_add "v1$h2" type veth peer name "v2$h2" + ip_link_add "v1$h3" type veth peer name "v2$h3" + + h1_create + h2_create + h3_create + switch_create + ns2_create + ns3_create +} + +adf_install_broken_sg() +{ + adf_mcd_start "$IPMR" || exit "$EXIT_STATUS" + + mc_cli add "$swp2" 192.0.2.100 "$GROUP4" "$swp1" "$swp3" + defer mc_cli remove "$swp2" 192.0.2.100 "$GROUP4" "$swp1" "$swp3" + + mc_cli add "$swp2" 2001:db8:4::1 "$GROUP6" "$swp1" "$swp3" + defer mc_cli remove "$swp2" 2001:db8:4::1 "$GROUP6" "$swp1" "$swp3" +} + +adf_install_rx() +{ + mc_cli add "$swp2" 0.0.0.0 "$GROUP4" "$IPMR" + defer mc_cli remove "$swp2" 0.0.0.0 "$GROUP4" lo10 + + mc_cli add "$swp3" 0.0.0.0 "$GROUP4" "$IPMR" + defer mc_cli remove "$swp3" 0.0.0.0 "$GROUP4" lo10 + + mc_cli add "$swp2" :: "$GROUP6" "$IPMR" + defer mc_cli remove "$swp2" :: "$GROUP6" lo10 + + mc_cli add "$swp3" :: "$GROUP6" "$IPMR" + defer mc_cli remove "$swp3" :: "$GROUP6" lo10 +} + +adf_install_sg() +{ + adf_mcd_start "$IPMR" || exit "$EXIT_STATUS" + + mc_cli add "$IPMR" 192.0.2.100 "$GROUP4" "$swp2" "$swp3" + defer mc_cli remove "$IPMR" 192.0.2.33 "$GROUP4" "$swp2" "$swp3" + + mc_cli add "$IPMR" 2001:db8:4::1 "$GROUP6" "$swp2" "$swp3" + defer mc_cli remove "$IPMR" 2001:db8:4::1 "$GROUP6" "$swp2" "$swp3" + + adf_install_rx +} + +adf_install_sg_sep() +{ + adf_mcd_start lo || exit "$EXIT_STATUS" + + mc_cli add lo 192.0.2.120 "$GROUP4" "$swp2" "$swp3" + defer mc_cli remove lo 192.0.2.120 "$GROUP4" "$swp2" "$swp3" + + mc_cli add lo 2001:db8:5::1 "$GROUP6" "$swp2" "$swp3" + defer mc_cli remove lo 2001:db8:5::1 "$GROUP6" "$swp2" "$swp3" +} + +adf_install_sg_sep_rx() +{ + local lo=$1; shift + + adf_mcd_start "$IPMR" "$lo" || exit "$EXIT_STATUS" + + mc_cli add "$lo" 192.0.2.120 "$GROUP4" "$swp2" "$swp3" + defer mc_cli remove "$lo" 192.0.2.120 "$GROUP4" "$swp2" "$swp3" + + mc_cli add "$lo" 2001:db8:5::1 "$GROUP6" "$swp2" "$swp3" + defer mc_cli remove "$lo" 2001:db8:5::1 "$GROUP6" "$swp2" "$swp3" + + adf_install_rx +} + +adf_install_starg() +{ + adf_mcd_start "$IPMR" || exit "$EXIT_STATUS" + + mc_cli add "$IPMR" 0.0.0.0 "$GROUP4" "$swp2" "$swp3" + defer mc_cli remove "$IPMR" 0.0.0.0 "$GROUP4" "$swp2" "$swp3" + + mc_cli add "$IPMR" :: "$GROUP6" "$swp2" "$swp3" + defer mc_cli remove "$IPMR" :: "$GROUP6" "$swp2" "$swp3" + + adf_install_rx +} + +do_packets_v4() +{ + local mac + + mac=$(mac_get "$h2") + "$MZ" "$h1" -Q 10 -c 10 -d 100msec -p 64 -a own -b "$mac" \ + -A 192.0.2.1 -B 192.0.2.2 -t udp sp=1234,dp=2345 -q +} + +do_packets_v6() +{ + local mac + + mac=$(mac_get "$h2") + "$MZ" -6 "$h1" -Q 20 -c 10 -d 100msec -p 64 -a own -b "$mac" \ + -A 2001:db8:1::1 -B 2001:db8:1::2 -t udp sp=1234,dp=2345 -q +} + +do_test() +{ + local ipv=$1; shift + local expect_h2=$1; shift + local expect_h3=$1; shift + local what=$1; shift + + local pref=$((100 + ipv)) + local t0_h2 + local t0_h3 + local t1_h2 + local t1_h3 + local d_h2 + local d_h3 + + RET=0 + + t0_h2=$(tc_rule_stats_get "$h2" "$pref" ingress) + t0_h3=$(tc_rule_stats_get "$h3" "$pref" ingress) + + "do_packets_v$ipv" + sleep 1 + + t1_h2=$(tc_rule_stats_get "$h2" "$pref" ingress) + t1_h3=$(tc_rule_stats_get "$h3" "$pref" ingress) + + d_h2=$((t1_h2 - t0_h2)) + d_h3=$((t1_h3 - t0_h3)) + + ((d_h2 == expect_h2)) + check_err $? "Expected $expect_h2 packets on H2, got $d_h2" + + ((d_h3 == expect_h3)) + check_err $? "Expected $expect_h3 packets on H3, got $d_h3" + + log_test "VXLAN MC flood $what" +} + +ipv4_do_test_rx() +{ + local h3_should_fail=$1; shift + local what=$1; shift + + RET=0 + + ping_do "$h1.10" 192.0.2.3 + check_err $? "H2 should respond" + + ping_do "$h1.10" 192.0.2.4 + check_err_fail "$h3_should_fail" $? "H3 responds" + + log_test "VXLAN MC flood $what" +} + +ipv6_do_test_rx() +{ + local h3_should_fail=$1; shift + local what=$1; shift + + RET=0 + + ping6_do "$h1.20" 2001:db8:1::3 + check_err $? "H2 should respond" + + ping6_do "$h1.20" 2001:db8:1::4 + check_err_fail "$h3_should_fail" $? "H3 responds" + + log_test "VXLAN MC flood $what" +} + +ipv4_nomcroute() +{ + # Install a misleading (S,G) rule to attempt to trick the system into + # pushing the packets elsewhere. + adf_install_broken_sg + vx10_create_wait local 192.0.2.100 group "$GROUP4" dev "$swp2" + do_test 4 10 0 "IPv4 nomcroute" +} + +ipv6_nomcroute() +{ + # Like for IPv4, install a misleading (S,G). + adf_install_broken_sg + vx20_create_wait local 2001:db8:4::1 group "$GROUP6" dev "$swp2" + do_test 6 10 0 "IPv6 nomcroute" +} + +ipv4_nomcroute_rx() +{ + vx10_create local 192.0.2.100 group "$GROUP4" dev "$swp2" + ipv4_do_test_rx 1 "IPv4 nomcroute ping" +} + +ipv6_nomcroute_rx() +{ + vx20_create local 2001:db8:4::1 group "$GROUP6" dev "$swp2" + ipv6_do_test_rx 1 "IPv6 nomcroute ping" +} + +ipv4_mcroute() +{ + adf_install_sg + vx10_create_wait local 192.0.2.100 group "$GROUP4" dev "$IPMR" mcroute + do_test 4 10 10 "IPv4 mcroute" +} + +ipv6_mcroute() +{ + adf_install_sg + vx20_create_wait local 2001:db8:4::1 group "$GROUP6" dev "$IPMR" mcroute + do_test 6 10 10 "IPv6 mcroute" +} + +ipv4_mcroute_rx() +{ + adf_install_sg + vx10_create_wait local 192.0.2.100 group "$GROUP4" dev "$IPMR" mcroute + ipv4_do_test_rx 0 "IPv4 mcroute ping" +} + +ipv6_mcroute_rx() +{ + adf_install_sg + vx20_create_wait local 2001:db8:4::1 group "$GROUP6" dev "$IPMR" mcroute + ipv6_do_test_rx 0 "IPv6 mcroute ping" +} + +ipv4_mcroute_changelink() +{ + adf_install_sg + vx10_create_wait local 192.0.2.100 group "$GROUP4" dev "$IPMR" + ip link set dev vx10 type vxlan mcroute + sleep 1 + do_test 4 10 10 "IPv4 mcroute changelink" +} + +ipv6_mcroute_changelink() +{ + adf_install_sg + vx20_create_wait local 2001:db8:4::1 group "$GROUP6" dev "$IPMR" mcroute + ip link set dev vx20 type vxlan mcroute + sleep 1 + do_test 6 10 10 "IPv6 mcroute changelink" +} + +ipv4_mcroute_starg() +{ + adf_install_starg + vx10_create_wait local 192.0.2.100 group "$GROUP4" dev "$IPMR" mcroute + do_test 4 10 10 "IPv4 mcroute (*,G)" +} + +ipv6_mcroute_starg() +{ + adf_install_starg + vx20_create_wait local 2001:db8:4::1 group "$GROUP6" dev "$IPMR" mcroute + do_test 6 10 10 "IPv6 mcroute (*,G)" +} + +ipv4_mcroute_starg_rx() +{ + adf_install_starg + vx10_create_wait local 192.0.2.100 group "$GROUP4" dev "$IPMR" mcroute + ipv4_do_test_rx 0 "IPv4 mcroute (*,G) ping" +} + +ipv6_mcroute_starg_rx() +{ + adf_install_starg + vx20_create_wait local 2001:db8:4::1 group "$GROUP6" dev "$IPMR" mcroute + ipv6_do_test_rx 0 "IPv6 mcroute (*,G) ping" +} + +ipv4_mcroute_noroute() +{ + vx10_create_wait local 192.0.2.100 group "$GROUP4" dev "$IPMR" mcroute + do_test 4 0 0 "IPv4 mcroute, no route" +} + +ipv6_mcroute_noroute() +{ + vx20_create_wait local 2001:db8:4::1 group "$GROUP6" dev "$IPMR" mcroute + do_test 6 0 0 "IPv6 mcroute, no route" +} + +ipv4_mcroute_fdb() +{ + adf_install_sg + vx10_create_wait local 192.0.2.100 dev "$IPMR" mcroute + bridge fdb add dev vx10 \ + 00:00:00:00:00:00 self static dst "$GROUP4" via "$IPMR" + do_test 4 10 10 "IPv4 mcroute FDB" +} + +ipv6_mcroute_fdb() +{ + adf_install_sg + vx20_create_wait local 2001:db8:4::1 dev "$IPMR" mcroute + bridge -6 fdb add dev vx20 \ + 00:00:00:00:00:00 self static dst "$GROUP6" via "$IPMR" + do_test 6 10 10 "IPv6 mcroute FDB" +} + +# Use FDB to configure VXLAN in a way where oif=0 for purposes of FIB lookup. +ipv4_mcroute_fdb_oif0() +{ + adf_install_sg + vx10_create_wait local 192.0.2.100 group "$GROUP4" dev "$IPMR" mcroute + bridge fdb del dev vx10 00:00:00:00:00:00 + bridge fdb add dev vx10 00:00:00:00:00:00 self static dst "$GROUP4" + do_test 4 10 10 "IPv4 mcroute oif=0" +} + +ipv6_mcroute_fdb_oif0() +{ + # The IPv6 tunnel lookup does not fall back to selection by source + # address. Instead it just does a FIB match, and that would find one of + # the several ff00::/8 multicast routes -- each device has one. In order + # to reliably force the $IPMR device, add a /128 route for the + # destination group address. + ip -6 route add table local multicast "$GROUP6/128" dev "$IPMR" + defer ip -6 route del table local multicast "$GROUP6/128" dev "$IPMR" + + adf_install_sg + vx20_create_wait local 2001:db8:4::1 group "$GROUP6" dev "$IPMR" mcroute + bridge -6 fdb del dev vx20 00:00:00:00:00:00 + bridge -6 fdb add dev vx20 00:00:00:00:00:00 self static dst "$GROUP6" + do_test 6 10 10 "IPv6 mcroute oif=0" +} + +# In oif=0 test as above, have FIB lookup resolve to loopback instead of IPMR. +# This doesn't work with IPv6 -- a MC route on lo would be marked as RTF_REJECT. +ipv4_mcroute_fdb_oif0_sep() +{ + adf_install_sg_sep + + ip_addr_add lo 192.0.2.120/28 + vx10_create_wait local 192.0.2.120 group "$GROUP4" dev "$IPMR" mcroute + bridge fdb del dev vx10 00:00:00:00:00:00 + bridge fdb add dev vx10 00:00:00:00:00:00 self static dst "$GROUP4" + do_test 4 10 10 "IPv4 mcroute TX!=RX oif=0" +} + +ipv4_mcroute_fdb_oif0_sep_rx() +{ + adf_install_sg_sep_rx lo + + ip_addr_add lo 192.0.2.120/28 + vx10_create_wait local 192.0.2.120 group "$GROUP4" dev "$IPMR" mcroute + bridge fdb del dev vx10 00:00:00:00:00:00 + bridge fdb add dev vx10 00:00:00:00:00:00 self static dst "$GROUP4" + ipv4_do_test_rx 0 "IPv4 mcroute TX!=RX oif=0 ping" +} + +ipv4_mcroute_fdb_sep_rx() +{ + adf_install_sg_sep_rx lo + + ip_addr_add lo 192.0.2.120/28 + vx10_create_wait local 192.0.2.120 group "$GROUP4" dev "$IPMR" mcroute + bridge fdb del dev vx10 00:00:00:00:00:00 + bridge fdb add \ + dev vx10 00:00:00:00:00:00 self static dst "$GROUP4" via lo + ipv4_do_test_rx 0 "IPv4 mcroute TX!=RX ping" +} + +ipv6_mcroute_fdb_sep_rx() +{ + adf_install_sg_sep_rx "X$IPMR" + + ip_addr_add "X$IPMR" 2001:db8:5::1/64 + vx20_create_wait local 2001:db8:5::1 group "$GROUP6" dev "$IPMR" mcroute + bridge -6 fdb del dev vx20 00:00:00:00:00:00 + bridge -6 fdb add dev vx20 00:00:00:00:00:00 \ + self static dst "$GROUP6" via "X$IPMR" + ipv6_do_test_rx 0 "IPv6 mcroute TX!=RX ping" +} + +trap cleanup EXIT + +setup_prepare +setup_wait +tests_run + +exit "$EXIT_STATUS" diff --git a/tools/testing/selftests/net/gre_ipv6_lladdr.sh b/tools/testing/selftests/net/gre_ipv6_lladdr.sh index 5b34f6e1f831..48eb999a3120 100755 --- a/tools/testing/selftests/net/gre_ipv6_lladdr.sh +++ b/tools/testing/selftests/net/gre_ipv6_lladdr.sh @@ -24,7 +24,10 @@ setup_basenet() ip -netns "${NS0}" address add dev lo 2001:db8::10/64 nodad } -# Check if network device has an IPv6 link-local address assigned. +# Check the IPv6 configuration of a network device. +# +# We currently check the generation of the link-local IPv6 address and the +# creation of the ff00::/8 multicast route. # # Parameters: # @@ -35,7 +38,7 @@ setup_basenet() # a link-local address) # * $4: The user visible name for the scenario being tested # -check_ipv6_ll_addr() +check_ipv6_device_config() { local DEV="$1" local EXTRA_MATCH="$2" @@ -45,7 +48,11 @@ check_ipv6_ll_addr() RET=0 set +e ip -netns "${NS0}" -6 address show dev "${DEV}" scope link | grep "fe80::" | grep -q "${EXTRA_MATCH}" - check_err_fail "${XRET}" $? "" + check_err_fail "${XRET}" $? "IPv6 link-local address generation" + + ip -netns "${NS0}" -6 route show table local type multicast ff00::/8 proto kernel | grep -q "${DEV}" + check_err_fail 0 $? "IPv6 multicast route creation" + log_test "${MSG}" set -e } @@ -102,20 +109,20 @@ test_gre_device() ;; esac - # Check that IPv6 link-local address is generated when device goes up + # Check the IPv6 device configuration when it goes up ip netns exec "${NS0}" sysctl -qw net.ipv6.conf.gretest.addr_gen_mode="${ADDR_GEN_MODE}" ip -netns "${NS0}" link set dev gretest up - check_ipv6_ll_addr gretest "${MATCH_REGEXP}" "${XRET}" "config: ${MSG}" + check_ipv6_device_config gretest "${MATCH_REGEXP}" "${XRET}" "config: ${MSG}" # Now disable link-local address generation ip -netns "${NS0}" link set dev gretest down ip netns exec "${NS0}" sysctl -qw net.ipv6.conf.gretest.addr_gen_mode=1 ip -netns "${NS0}" link set dev gretest up - # Check that link-local address generation works when re-enabled while - # the device is already up + # Check the IPv6 device configuration when link-local address + # generation is re-enabled while the device is already up ip netns exec "${NS0}" sysctl -qw net.ipv6.conf.gretest.addr_gen_mode="${ADDR_GEN_MODE}" - check_ipv6_ll_addr gretest "${MATCH_REGEXP}" "${XRET}" "update: ${MSG}" + check_ipv6_device_config gretest "${MATCH_REGEXP}" "${XRET}" "update: ${MSG}" ip -netns "${NS0}" link del dev gretest } @@ -126,7 +133,7 @@ test_gre4() local MODE for GRE_TYPE in "gre" "gretap"; do - printf "\n####\nTesting IPv6 link-local address generation on ${GRE_TYPE} devices\n####\n\n" + printf "\n####\nTesting IPv6 configuration of ${GRE_TYPE} devices\n####\n\n" for MODE in "eui64" "none" "stable-privacy" "random"; do test_gre_device "${GRE_TYPE}" 192.0.2.10 192.0.2.11 "${MODE}" @@ -142,7 +149,7 @@ test_gre6() local MODE for GRE_TYPE in "ip6gre" "ip6gretap"; do - printf "\n####\nTesting IPv6 link-local address generation on ${GRE_TYPE} devices\n####\n\n" + printf "\n####\nTesting IPv6 configuration of ${GRE_TYPE} devices\n####\n\n" for MODE in "eui64" "none" "stable-privacy" "random"; do test_gre_device "${GRE_TYPE}" 2001:db8::10 2001:db8::11 "${MODE}" diff --git a/tools/testing/selftests/net/ipv6_force_forwarding.sh b/tools/testing/selftests/net/ipv6_force_forwarding.sh new file mode 100755 index 000000000000..bf0243366caa --- /dev/null +++ b/tools/testing/selftests/net/ipv6_force_forwarding.sh @@ -0,0 +1,105 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# Test IPv6 force_forwarding interface property +# +# This test verifies that the force_forwarding property works correctly: +# - When global forwarding is disabled, packets are not forwarded normally +# - When force_forwarding is enabled on an interface, packets are forwarded +# regardless of the global forwarding setting + +source lib.sh + +cleanup() { + cleanup_ns $ns1 $ns2 $ns3 +} + +trap cleanup EXIT + +setup_test() { + # Create three namespaces: sender, router, receiver + setup_ns ns1 ns2 ns3 + + # Create veth pairs: ns1 <-> ns2 <-> ns3 + ip link add name veth12 type veth peer name veth21 + ip link add name veth23 type veth peer name veth32 + + # Move interfaces to namespaces + ip link set veth12 netns $ns1 + ip link set veth21 netns $ns2 + ip link set veth23 netns $ns2 + ip link set veth32 netns $ns3 + + # Configure interfaces + ip -n $ns1 addr add 2001:db8:1::1/64 dev veth12 nodad + ip -n $ns2 addr add 2001:db8:1::2/64 dev veth21 nodad + ip -n $ns2 addr add 2001:db8:2::1/64 dev veth23 nodad + ip -n $ns3 addr add 2001:db8:2::2/64 dev veth32 nodad + + # Bring up interfaces + ip -n $ns1 link set veth12 up + ip -n $ns2 link set veth21 up + ip -n $ns2 link set veth23 up + ip -n $ns3 link set veth32 up + + # Add routes + ip -n $ns1 route add 2001:db8:2::/64 via 2001:db8:1::2 + ip -n $ns3 route add 2001:db8:1::/64 via 2001:db8:2::1 + + # Disable global forwarding + ip netns exec $ns2 sysctl -qw net.ipv6.conf.all.forwarding=0 +} + +test_force_forwarding() { + local ret=0 + + echo "TEST: force_forwarding functionality" + + # Check if force_forwarding sysctl exists + if ! ip netns exec $ns2 test -f /proc/sys/net/ipv6/conf/veth21/force_forwarding; then + echo "SKIP: force_forwarding not available" + return $ksft_skip + fi + + # Test 1: Without force_forwarding, ping should fail + ip netns exec $ns2 sysctl -qw net.ipv6.conf.veth21.force_forwarding=0 + ip netns exec $ns2 sysctl -qw net.ipv6.conf.veth23.force_forwarding=0 + + if ip netns exec $ns1 ping -6 -c 1 -W 2 2001:db8:2::2 &>/dev/null; then + echo "FAIL: ping succeeded when forwarding disabled" + ret=1 + else + echo "PASS: forwarding disabled correctly" + fi + + # Test 2: With force_forwarding enabled, ping should succeed + ip netns exec $ns2 sysctl -qw net.ipv6.conf.veth21.force_forwarding=1 + ip netns exec $ns2 sysctl -qw net.ipv6.conf.veth23.force_forwarding=1 + + if ip netns exec $ns1 ping -6 -c 1 -W 2 2001:db8:2::2 &>/dev/null; then + echo "PASS: force_forwarding enabled forwarding" + else + echo "FAIL: ping failed with force_forwarding enabled" + ret=1 + fi + + return $ret +} + +echo "IPv6 force_forwarding test" +echo "==========================" + +setup_test +test_force_forwarding +ret=$? + +if [ $ret -eq 0 ]; then + echo "OK" + exit 0 +elif [ $ret -eq $ksft_skip ]; then + echo "SKIP" + exit $ksft_skip +else + echo "FAIL" + exit 1 +fi diff --git a/tools/testing/selftests/net/lib.sh b/tools/testing/selftests/net/lib.sh index 006fdadcc4b9..c7add0dc4c60 100644 --- a/tools/testing/selftests/net/lib.sh +++ b/tools/testing/selftests/net/lib.sh @@ -240,6 +240,29 @@ create_netdevsim() { echo nsim$id } +create_netdevsim_port() { + local nsim_id="$1" + local ns="$2" + local port_id="$3" + local perm_addr="$4" + local orig_dev + local new_dev + local nsim_path + + nsim_path="/sys/bus/netdevsim/devices/netdevsim$nsim_id" + + echo "$port_id $perm_addr" | ip netns exec "$ns" tee "$nsim_path"/new_port > /dev/null || return 1 + + orig_dev=$(ip netns exec "$ns" find "$nsim_path"/net/ -maxdepth 1 -name 'e*' | tail -n 1) + orig_dev=$(basename "$orig_dev") + new_dev="nsim${nsim_id}p$port_id" + + ip -netns "$ns" link set dev "$orig_dev" name "$new_dev" + ip -netns "$ns" link set dev "$new_dev" up + + echo "$new_dev" +} + # Remove netdevsim with given id. cleanup_netdevsim() { local id="$1" @@ -312,7 +335,7 @@ log_test_result() local test_name=$1; shift local opt_str=$1; shift local result=$1; shift - local retmsg=$1; shift + local retmsg=$1 printf "TEST: %-60s [%s]\n" "$test_name $opt_str" "$result" if [[ $retmsg ]]; then @@ -547,13 +570,19 @@ ip_link_set_addr() defer ip link set dev "$name" address "$old_addr" } -ip_link_is_up() +ip_link_has_flag() { local name=$1; shift + local flag=$1; shift local state=$(ip -j link show "$name" | - jq -r '(.[].flags[] | select(. == "UP")) // "DOWN"') - [[ $state == "UP" ]] + jq --arg flag "$flag" 'any(.[].flags.[]; . == $flag)') + [[ $state == true ]] +} + +ip_link_is_up() +{ + ip_link_has_flag "$1" UP } ip_link_set_up() diff --git a/tools/testing/selftests/net/lib/py/__init__.py b/tools/testing/selftests/net/lib/py/__init__.py index 8697bd27dc30..02be28dcc089 100644 --- a/tools/testing/selftests/net/lib/py/__init__.py +++ b/tools/testing/selftests/net/lib/py/__init__.py @@ -6,4 +6,4 @@ from .netns import NetNS, NetNSEnter from .nsim import * from .utils import * from .ynl import NlError, YnlFamily, EthtoolFamily, NetdevFamily, RtnlFamily, RtnlAddrFamily -from .ynl import NetshaperFamily +from .ynl import NetshaperFamily, DevlinkFamily diff --git a/tools/testing/selftests/net/lib/py/ksft.py b/tools/testing/selftests/net/lib/py/ksft.py index 61287c203b6e..8e35ed12ed9e 100644 --- a/tools/testing/selftests/net/lib/py/ksft.py +++ b/tools/testing/selftests/net/lib/py/ksft.py @@ -32,6 +32,7 @@ class KsftTerminate(KeyboardInterrupt): def ksft_pr(*objs, **kwargs): + kwargs["flush"] = True print("#", *objs, **kwargs) @@ -139,7 +140,7 @@ def ktap_result(ok, cnt=1, case="", comment=""): res += "." + str(case.__name__) if comment: res += " # " + comment - print(res) + print(res, flush=True) def ksft_flush_defer(): @@ -227,8 +228,8 @@ def ksft_run(cases=None, globs=None, case_pfx=None, args=()): totals = {"pass": 0, "fail": 0, "skip": 0, "xfail": 0} - print("TAP version 13") - print("1.." + str(len(cases))) + print("TAP version 13", flush=True) + print("1.." + str(len(cases)), flush=True) global KSFT_RESULT cnt = 0 diff --git a/tools/testing/selftests/net/lib/py/utils.py b/tools/testing/selftests/net/lib/py/utils.py index 34470d65d871..f395c90fb0f1 100644 --- a/tools/testing/selftests/net/lib/py/utils.py +++ b/tools/testing/selftests/net/lib/py/utils.py @@ -175,6 +175,10 @@ def tool(name, args, json=None, ns=None, host=None): return cmd_obj +def bpftool(args, json=None, ns=None, host=None): + return tool('bpftool', args, json=json, ns=ns, host=host) + + def ip(args, json=None, ns=None, host=None): if ns: args = f'-netns {ns} ' + args @@ -185,6 +189,41 @@ def ethtool(args, json=None, ns=None, host=None): return tool('ethtool', args, json=json, ns=ns, host=host) +def bpftrace(expr, json=None, ns=None, host=None, timeout=None): + """ + Run bpftrace and return map data (if json=True). + The output of bpftrace is inconvenient, so the helper converts + to a dict indexed by map name, e.g.: + { + "@": { ... }, + "@map2": { ... }, + } + """ + cmd_arr = ['bpftrace'] + # Throw in --quiet if json, otherwise the output has two objects + if json: + cmd_arr += ['-f', 'json', '-q'] + if timeout: + expr += ' interval:s:' + str(timeout) + ' { exit(); }' + cmd_arr += ['-e', expr] + cmd_obj = cmd(cmd_arr, ns=ns, host=host, shell=False) + if json: + # bpftrace prints objects as lines + ret = {} + for l in cmd_obj.stdout.split('\n'): + if not l.strip(): + continue + one = _json.loads(l) + if one.get('type') != 'map': + continue + for k, v in one["data"].items(): + if k.startswith('@'): + k = k.lstrip('@') + ret[k] = v + return ret + return cmd_obj + + def rand_port(type=socket.SOCK_STREAM): """ Get a random unprivileged port. diff --git a/tools/testing/selftests/net/lib/py/ynl.py b/tools/testing/selftests/net/lib/py/ynl.py index 6329ae805abf..2b3a61ea3bfa 100644 --- a/tools/testing/selftests/net/lib/py/ynl.py +++ b/tools/testing/selftests/net/lib/py/ynl.py @@ -56,3 +56,8 @@ class NetshaperFamily(YnlFamily): def __init__(self, recv_size=0): super().__init__((SPEC_PATH / Path('net_shaper.yaml')).as_posix(), schema='', recv_size=recv_size) + +class DevlinkFamily(YnlFamily): + def __init__(self, recv_size=0): + super().__init__((SPEC_PATH / Path('devlink.yaml')).as_posix(), + schema='', recv_size=recv_size) diff --git a/tools/testing/selftests/net/lib/xdp_native.bpf.c b/tools/testing/selftests/net/lib/xdp_native.bpf.c new file mode 100644 index 000000000000..521ba38f2ddd --- /dev/null +++ b/tools/testing/selftests/net/lib/xdp_native.bpf.c @@ -0,0 +1,621 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <stddef.h> +#include <linux/bpf.h> +#include <linux/in.h> +#include <linux/if_ether.h> +#include <linux/ip.h> +#include <linux/ipv6.h> +#include <linux/udp.h> +#include <bpf/bpf_endian.h> +#include <bpf/bpf_helpers.h> + +#define MAX_ADJST_OFFSET 256 +#define MAX_PAYLOAD_LEN 5000 +#define MAX_HDR_LEN 64 + +enum { + XDP_MODE = 0, + XDP_PORT = 1, + XDP_ADJST_OFFSET = 2, + XDP_ADJST_TAG = 3, +} xdp_map_setup_keys; + +enum { + XDP_MODE_PASS = 0, + XDP_MODE_DROP = 1, + XDP_MODE_TX = 2, + XDP_MODE_TAIL_ADJST = 3, + XDP_MODE_HEAD_ADJST = 4, +} xdp_map_modes; + +enum { + STATS_RX = 0, + STATS_PASS = 1, + STATS_DROP = 2, + STATS_TX = 3, + STATS_ABORT = 4, +} xdp_stats; + +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __uint(max_entries, 5); + __type(key, __u32); + __type(value, __s32); +} map_xdp_setup SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __uint(max_entries, 5); + __type(key, __u32); + __type(value, __u64); +} map_xdp_stats SEC(".maps"); + +static __u32 min(__u32 a, __u32 b) +{ + return a < b ? a : b; +} + +static void record_stats(struct xdp_md *ctx, __u32 stat_type) +{ + __u64 *count; + + count = bpf_map_lookup_elem(&map_xdp_stats, &stat_type); + + if (count) + __sync_fetch_and_add(count, 1); +} + +static struct udphdr *filter_udphdr(struct xdp_md *ctx, __u16 port) +{ + void *data_end = (void *)(long)ctx->data_end; + void *data = (void *)(long)ctx->data; + struct udphdr *udph = NULL; + struct ethhdr *eth = data; + + if (data + sizeof(*eth) > data_end) + return NULL; + + if (eth->h_proto == bpf_htons(ETH_P_IP)) { + struct iphdr *iph = data + sizeof(*eth); + + if (iph + 1 > (struct iphdr *)data_end || + iph->protocol != IPPROTO_UDP) + return NULL; + + udph = (void *)eth + sizeof(*iph) + sizeof(*eth); + } else if (eth->h_proto == bpf_htons(ETH_P_IPV6)) { + struct ipv6hdr *ipv6h = data + sizeof(*eth); + + if (ipv6h + 1 > (struct ipv6hdr *)data_end || + ipv6h->nexthdr != IPPROTO_UDP) + return NULL; + + udph = (void *)eth + sizeof(*ipv6h) + sizeof(*eth); + } else { + return NULL; + } + + if (udph + 1 > (struct udphdr *)data_end) + return NULL; + + if (udph->dest != bpf_htons(port)) + return NULL; + + record_stats(ctx, STATS_RX); + + return udph; +} + +static int xdp_mode_pass(struct xdp_md *ctx, __u16 port) +{ + struct udphdr *udph = NULL; + + udph = filter_udphdr(ctx, port); + if (!udph) + return XDP_PASS; + + record_stats(ctx, STATS_PASS); + + return XDP_PASS; +} + +static int xdp_mode_drop_handler(struct xdp_md *ctx, __u16 port) +{ + struct udphdr *udph = NULL; + + udph = filter_udphdr(ctx, port); + if (!udph) + return XDP_PASS; + + record_stats(ctx, STATS_DROP); + + return XDP_DROP; +} + +static void swap_machdr(void *data) +{ + struct ethhdr *eth = data; + __u8 tmp_mac[ETH_ALEN]; + + __builtin_memcpy(tmp_mac, eth->h_source, ETH_ALEN); + __builtin_memcpy(eth->h_source, eth->h_dest, ETH_ALEN); + __builtin_memcpy(eth->h_dest, tmp_mac, ETH_ALEN); +} + +static int xdp_mode_tx_handler(struct xdp_md *ctx, __u16 port) +{ + void *data_end = (void *)(long)ctx->data_end; + void *data = (void *)(long)ctx->data; + struct udphdr *udph = NULL; + struct ethhdr *eth = data; + + if (data + sizeof(*eth) > data_end) + return XDP_PASS; + + if (eth->h_proto == bpf_htons(ETH_P_IP)) { + struct iphdr *iph = data + sizeof(*eth); + __be32 tmp_ip = iph->saddr; + + if (iph + 1 > (struct iphdr *)data_end || + iph->protocol != IPPROTO_UDP) + return XDP_PASS; + + udph = data + sizeof(*iph) + sizeof(*eth); + + if (udph + 1 > (struct udphdr *)data_end) + return XDP_PASS; + if (udph->dest != bpf_htons(port)) + return XDP_PASS; + + record_stats(ctx, STATS_RX); + swap_machdr((void *)eth); + + iph->saddr = iph->daddr; + iph->daddr = tmp_ip; + + record_stats(ctx, STATS_TX); + + return XDP_TX; + + } else if (eth->h_proto == bpf_htons(ETH_P_IPV6)) { + struct ipv6hdr *ipv6h = data + sizeof(*eth); + struct in6_addr tmp_ipv6; + + if (ipv6h + 1 > (struct ipv6hdr *)data_end || + ipv6h->nexthdr != IPPROTO_UDP) + return XDP_PASS; + + udph = data + sizeof(*ipv6h) + sizeof(*eth); + + if (udph + 1 > (struct udphdr *)data_end) + return XDP_PASS; + if (udph->dest != bpf_htons(port)) + return XDP_PASS; + + record_stats(ctx, STATS_RX); + swap_machdr((void *)eth); + + __builtin_memcpy(&tmp_ipv6, &ipv6h->saddr, sizeof(tmp_ipv6)); + __builtin_memcpy(&ipv6h->saddr, &ipv6h->daddr, + sizeof(tmp_ipv6)); + __builtin_memcpy(&ipv6h->daddr, &tmp_ipv6, sizeof(tmp_ipv6)); + + record_stats(ctx, STATS_TX); + + return XDP_TX; + } + + return XDP_PASS; +} + +static void *update_pkt(struct xdp_md *ctx, __s16 offset, __u32 *udp_csum) +{ + void *data_end = (void *)(long)ctx->data_end; + void *data = (void *)(long)ctx->data; + struct udphdr *udph = NULL; + struct ethhdr *eth = data; + __u32 len, len_new; + + if (data + sizeof(*eth) > data_end) + return NULL; + + if (eth->h_proto == bpf_htons(ETH_P_IP)) { + struct iphdr *iph = data + sizeof(*eth); + __u16 total_len; + + if (iph + 1 > (struct iphdr *)data_end) + return NULL; + + iph->tot_len = bpf_htons(bpf_ntohs(iph->tot_len) + offset); + + udph = (void *)eth + sizeof(*iph) + sizeof(*eth); + if (!udph || udph + 1 > (struct udphdr *)data_end) + return NULL; + + len_new = bpf_htons(bpf_ntohs(udph->len) + offset); + } else if (eth->h_proto == bpf_htons(ETH_P_IPV6)) { + struct ipv6hdr *ipv6h = data + sizeof(*eth); + __u16 payload_len; + + if (ipv6h + 1 > (struct ipv6hdr *)data_end) + return NULL; + + udph = (void *)eth + sizeof(*ipv6h) + sizeof(*eth); + if (!udph || udph + 1 > (struct udphdr *)data_end) + return NULL; + + *udp_csum = ~((__u32)udph->check); + + len = ipv6h->payload_len; + len_new = bpf_htons(bpf_ntohs(len) + offset); + ipv6h->payload_len = len_new; + + *udp_csum = bpf_csum_diff(&len, sizeof(len), &len_new, + sizeof(len_new), *udp_csum); + + len = udph->len; + len_new = bpf_htons(bpf_ntohs(udph->len) + offset); + *udp_csum = bpf_csum_diff(&len, sizeof(len), &len_new, + sizeof(len_new), *udp_csum); + } else { + return NULL; + } + + udph->len = len_new; + + return udph; +} + +static __u16 csum_fold_helper(__u32 csum) +{ + return ~((csum & 0xffff) + (csum >> 16)) ? : 0xffff; +} + +static int xdp_adjst_tail_shrnk_data(struct xdp_md *ctx, __u16 offset, + __u32 hdr_len) +{ + char tmp_buff[MAX_ADJST_OFFSET]; + __u32 buff_pos, udp_csum = 0; + struct udphdr *udph = NULL; + __u32 buff_len; + + udph = update_pkt(ctx, 0 - offset, &udp_csum); + if (!udph) + return -1; + + buff_len = bpf_xdp_get_buff_len(ctx); + + offset = (offset & 0x1ff) >= MAX_ADJST_OFFSET ? MAX_ADJST_OFFSET : + offset & 0xff; + if (offset == 0) + return -1; + + /* Make sure we have enough data to avoid eating the header */ + if (buff_len - offset < hdr_len) + return -1; + + buff_pos = buff_len - offset; + if (bpf_xdp_load_bytes(ctx, buff_pos, tmp_buff, offset) < 0) + return -1; + + udp_csum = bpf_csum_diff((__be32 *)tmp_buff, offset, 0, 0, udp_csum); + udph->check = (__u16)csum_fold_helper(udp_csum); + + if (bpf_xdp_adjust_tail(ctx, 0 - offset) < 0) + return -1; + + return 0; +} + +static int xdp_adjst_tail_grow_data(struct xdp_md *ctx, __u16 offset) +{ + char tmp_buff[MAX_ADJST_OFFSET]; + __u32 buff_pos, udp_csum = 0; + __u32 buff_len, hdr_len, key; + struct udphdr *udph; + __s32 *val; + __u8 tag; + + /* Proceed to update the packet headers before attempting to adjuste + * the tail. Once the tail is adjusted we lose access to the offset + * amount of data at the end of the packet which is crucial to update + * the checksum. + * Since any failure beyond this would abort the packet, we should + * not worry about passing a packet up the stack with wrong headers + */ + udph = update_pkt(ctx, offset, &udp_csum); + if (!udph) + return -1; + + key = XDP_ADJST_TAG; + val = bpf_map_lookup_elem(&map_xdp_setup, &key); + if (!val) + return -1; + + tag = (__u8)(*val); + + for (int i = 0; i < MAX_ADJST_OFFSET; i++) + __builtin_memcpy(&tmp_buff[i], &tag, 1); + + offset = (offset & 0x1ff) >= MAX_ADJST_OFFSET ? MAX_ADJST_OFFSET : + offset & 0xff; + if (offset == 0) + return -1; + + udp_csum = bpf_csum_diff(0, 0, (__be32 *)tmp_buff, offset, udp_csum); + udph->check = (__u16)csum_fold_helper(udp_csum); + + buff_len = bpf_xdp_get_buff_len(ctx); + + if (bpf_xdp_adjust_tail(ctx, offset) < 0) { + bpf_printk("Failed to adjust tail\n"); + return -1; + } + + if (bpf_xdp_store_bytes(ctx, buff_len, tmp_buff, offset) < 0) + return -1; + + return 0; +} + +static int xdp_adjst_tail(struct xdp_md *ctx, __u16 port) +{ + void *data = (void *)(long)ctx->data; + struct udphdr *udph = NULL; + __s32 *adjust_offset, *val; + __u32 key, hdr_len; + void *offset_ptr; + __u8 tag; + int ret; + + udph = filter_udphdr(ctx, port); + if (!udph) + return XDP_PASS; + + hdr_len = (void *)udph - data + sizeof(struct udphdr); + key = XDP_ADJST_OFFSET; + adjust_offset = bpf_map_lookup_elem(&map_xdp_setup, &key); + if (!adjust_offset) + return XDP_PASS; + + if (*adjust_offset < 0) + ret = xdp_adjst_tail_shrnk_data(ctx, + (__u16)(0 - *adjust_offset), + hdr_len); + else + ret = xdp_adjst_tail_grow_data(ctx, (__u16)(*adjust_offset)); + if (ret) + goto abort_pkt; + + record_stats(ctx, STATS_PASS); + return XDP_PASS; + +abort_pkt: + record_stats(ctx, STATS_ABORT); + return XDP_ABORTED; +} + +static int xdp_adjst_head_shrnk_data(struct xdp_md *ctx, __u64 hdr_len, + __u32 offset) +{ + char tmp_buff[MAX_ADJST_OFFSET]; + struct udphdr *udph; + void *offset_ptr; + __u32 udp_csum = 0; + + /* Update the length information in the IP and UDP headers before + * adjusting the headroom. This simplifies accessing the relevant + * fields in the IP and UDP headers for fragmented packets. Any + * failure beyond this point will result in the packet being aborted, + * so we don't need to worry about incorrect length information for + * passed packets. + */ + udph = update_pkt(ctx, (__s16)(0 - offset), &udp_csum); + if (!udph) + return -1; + + offset = (offset & 0x1ff) >= MAX_ADJST_OFFSET ? MAX_ADJST_OFFSET : + offset & 0xff; + if (offset == 0) + return -1; + + if (bpf_xdp_load_bytes(ctx, hdr_len, tmp_buff, offset) < 0) + return -1; + + udp_csum = bpf_csum_diff((__be32 *)tmp_buff, offset, 0, 0, udp_csum); + + udph->check = (__u16)csum_fold_helper(udp_csum); + + if (bpf_xdp_load_bytes(ctx, 0, tmp_buff, MAX_ADJST_OFFSET) < 0) + return -1; + + if (bpf_xdp_adjust_head(ctx, offset) < 0) + return -1; + + if (offset > MAX_ADJST_OFFSET) + return -1; + + if (hdr_len > MAX_ADJST_OFFSET || hdr_len == 0) + return -1; + + /* Added here to handle clang complain about negative value */ + hdr_len = hdr_len & 0xff; + + if (hdr_len == 0) + return -1; + + if (bpf_xdp_store_bytes(ctx, 0, tmp_buff, hdr_len) < 0) + return -1; + + return 0; +} + +static int xdp_adjst_head_grow_data(struct xdp_md *ctx, __u64 hdr_len, + __u32 offset) +{ + char hdr_buff[MAX_HDR_LEN]; + char data_buff[MAX_ADJST_OFFSET]; + void *offset_ptr; + __s32 *val; + __u32 key; + __u8 tag; + __u32 udp_csum = 0; + struct udphdr *udph; + + udph = update_pkt(ctx, (__s16)(offset), &udp_csum); + if (!udph) + return -1; + + key = XDP_ADJST_TAG; + val = bpf_map_lookup_elem(&map_xdp_setup, &key); + if (!val) + return -1; + + tag = (__u8)(*val); + for (int i = 0; i < MAX_ADJST_OFFSET; i++) + __builtin_memcpy(&data_buff[i], &tag, 1); + + offset = (offset & 0x1ff) >= MAX_ADJST_OFFSET ? MAX_ADJST_OFFSET : + offset & 0xff; + if (offset == 0) + return -1; + + udp_csum = bpf_csum_diff(0, 0, (__be32 *)data_buff, offset, udp_csum); + udph->check = (__u16)csum_fold_helper(udp_csum); + + if (hdr_len > MAX_ADJST_OFFSET || hdr_len == 0) + return -1; + + /* Added here to handle clang complain about negative value */ + hdr_len = hdr_len & 0xff; + + if (hdr_len == 0) + return -1; + + if (bpf_xdp_load_bytes(ctx, 0, hdr_buff, hdr_len) < 0) + return -1; + + if (offset > MAX_ADJST_OFFSET) + return -1; + + if (bpf_xdp_adjust_head(ctx, 0 - offset) < 0) + return -1; + + if (bpf_xdp_store_bytes(ctx, 0, hdr_buff, hdr_len) < 0) + return -1; + + if (bpf_xdp_store_bytes(ctx, hdr_len, data_buff, offset) < 0) + return -1; + + return 0; +} + +static int xdp_head_adjst(struct xdp_md *ctx, __u16 port) +{ + void *data_end = (void *)(long)ctx->data_end; + void *data = (void *)(long)ctx->data; + struct udphdr *udph_ptr = NULL; + __u32 key, size, hdr_len; + __s32 *val; + int res; + + /* Filter packets based on UDP port */ + udph_ptr = filter_udphdr(ctx, port); + if (!udph_ptr) + return XDP_PASS; + + hdr_len = (void *)udph_ptr - data + sizeof(struct udphdr); + + key = XDP_ADJST_OFFSET; + val = bpf_map_lookup_elem(&map_xdp_setup, &key); + if (!val) + return XDP_PASS; + + switch (*val) { + case -16: + case 16: + size = 16; + break; + case -32: + case 32: + size = 32; + break; + case -64: + case 64: + size = 64; + break; + case -128: + case 128: + size = 128; + break; + case -256: + case 256: + size = 256; + break; + default: + bpf_printk("Invalid adjustment offset: %d\n", *val); + goto abort; + } + + if (*val < 0) + res = xdp_adjst_head_grow_data(ctx, hdr_len, size); + else + res = xdp_adjst_head_shrnk_data(ctx, hdr_len, size); + + if (res) + goto abort; + + record_stats(ctx, STATS_PASS); + return XDP_PASS; + +abort: + record_stats(ctx, STATS_ABORT); + return XDP_ABORTED; +} + +static int xdp_prog_common(struct xdp_md *ctx) +{ + __u32 key, *port; + __s32 *mode; + + key = XDP_MODE; + mode = bpf_map_lookup_elem(&map_xdp_setup, &key); + if (!mode) + return XDP_PASS; + + key = XDP_PORT; + port = bpf_map_lookup_elem(&map_xdp_setup, &key); + if (!port) + return XDP_PASS; + + switch (*mode) { + case XDP_MODE_PASS: + return xdp_mode_pass(ctx, (__u16)(*port)); + case XDP_MODE_DROP: + return xdp_mode_drop_handler(ctx, (__u16)(*port)); + case XDP_MODE_TX: + return xdp_mode_tx_handler(ctx, (__u16)(*port)); + case XDP_MODE_TAIL_ADJST: + return xdp_adjst_tail(ctx, (__u16)(*port)); + case XDP_MODE_HEAD_ADJST: + return xdp_head_adjst(ctx, (__u16)(*port)); + } + + /* Default action is to simple pass */ + return XDP_PASS; +} + +SEC("xdp") +int xdp_prog(struct xdp_md *ctx) +{ + return xdp_prog_common(ctx); +} + +SEC("xdp.frags") +int xdp_prog_frags(struct xdp_md *ctx) +{ + return xdp_prog_common(ctx); +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/net/mptcp/Makefile b/tools/testing/selftests/net/mptcp/Makefile index e47788bfa671..4c7e51336ab2 100644 --- a/tools/testing/selftests/net/mptcp/Makefile +++ b/tools/testing/selftests/net/mptcp/Makefile @@ -4,7 +4,8 @@ top_srcdir = ../../../../.. CFLAGS += -Wall -Wl,--no-as-needed -O2 -g -I$(top_srcdir)/usr/include $(KHDR_INCLUDES) -TEST_PROGS := mptcp_connect.sh pm_netlink.sh mptcp_join.sh diag.sh \ +TEST_PROGS := mptcp_connect.sh mptcp_connect_mmap.sh mptcp_connect_sendfile.sh \ + mptcp_connect_checksum.sh pm_netlink.sh mptcp_join.sh diag.sh \ simult_flows.sh mptcp_sockopt.sh userspace_pm.sh TEST_GEN_FILES = mptcp_connect pm_nl_ctl mptcp_sockopt mptcp_inq mptcp_diag diff --git a/tools/testing/selftests/net/mptcp/config b/tools/testing/selftests/net/mptcp/config index 4f80014cae49..968d440c03fe 100644 --- a/tools/testing/selftests/net/mptcp/config +++ b/tools/testing/selftests/net/mptcp/config @@ -13,6 +13,7 @@ CONFIG_NETFILTER_NETLINK=m CONFIG_NF_TABLES=m CONFIG_NFT_COMPAT=m CONFIG_NETFILTER_XTABLES=m +CONFIG_NETFILTER_XTABLES_LEGACY=y CONFIG_NETFILTER_XT_MATCH_BPF=m CONFIG_NETFILTER_XT_MATCH_LENGTH=m CONFIG_NETFILTER_XT_MATCH_STATISTIC=m @@ -25,6 +26,7 @@ CONFIG_IP_MULTIPLE_TABLES=y CONFIG_IP_NF_FILTER=m CONFIG_IP_NF_MANGLE=m CONFIG_IP_NF_TARGET_REJECT=m +CONFIG_IP6_NF_TARGET_REJECT=m CONFIG_IPV6_MULTIPLE_TABLES=y CONFIG_IP6_NF_FILTER=m CONFIG_NET_ACT_CSUM=m diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect_checksum.sh b/tools/testing/selftests/net/mptcp/mptcp_connect_checksum.sh new file mode 100755 index 000000000000..ce93ec2f107f --- /dev/null +++ b/tools/testing/selftests/net/mptcp/mptcp_connect_checksum.sh @@ -0,0 +1,5 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +MPTCP_LIB_KSFT_TEST="$(basename "${0}" .sh)" \ + "$(dirname "${0}")/mptcp_connect.sh" -C "${@}" diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect_mmap.sh b/tools/testing/selftests/net/mptcp/mptcp_connect_mmap.sh new file mode 100755 index 000000000000..5dd30f9394af --- /dev/null +++ b/tools/testing/selftests/net/mptcp/mptcp_connect_mmap.sh @@ -0,0 +1,5 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +MPTCP_LIB_KSFT_TEST="$(basename "${0}" .sh)" \ + "$(dirname "${0}")/mptcp_connect.sh" -m mmap "${@}" diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect_sendfile.sh b/tools/testing/selftests/net/mptcp/mptcp_connect_sendfile.sh new file mode 100755 index 000000000000..1d16fb1cc9bb --- /dev/null +++ b/tools/testing/selftests/net/mptcp/mptcp_connect_sendfile.sh @@ -0,0 +1,5 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +MPTCP_LIB_KSFT_TEST="$(basename "${0}" .sh)" \ + "$(dirname "${0}")/mptcp_connect.sh" -m sendfile "${@}" diff --git a/tools/testing/selftests/net/msg_zerocopy.c b/tools/testing/selftests/net/msg_zerocopy.c index 7ea5fb28c93d..1d5d3c4e7e87 100644 --- a/tools/testing/selftests/net/msg_zerocopy.c +++ b/tools/testing/selftests/net/msg_zerocopy.c @@ -77,6 +77,7 @@ static int cfg_cork; static bool cfg_cork_mixed; static int cfg_cpu = -1; /* default: pin to last cpu */ +static int cfg_expect_zerocopy = -1; static int cfg_family = PF_UNSPEC; static int cfg_ifindex = 1; static int cfg_payload_len; @@ -92,9 +93,9 @@ static socklen_t cfg_alen; static struct sockaddr_storage cfg_dst_addr; static struct sockaddr_storage cfg_src_addr; +static int exitcode; static char payload[IP_MAXPACKET]; static long packets, bytes, completions, expected_completions; -static int zerocopied = -1; static uint32_t next_completion; static uint32_t sends_since_notify; @@ -444,11 +445,13 @@ static bool do_recv_completion(int fd, int domain) next_completion = hi + 1; zerocopy = !(serr->ee_code & SO_EE_CODE_ZEROCOPY_COPIED); - if (zerocopied == -1) - zerocopied = zerocopy; - else if (zerocopied != zerocopy) { - fprintf(stderr, "serr: inconsistent\n"); - zerocopied = zerocopy; + if (cfg_expect_zerocopy != -1 && + cfg_expect_zerocopy != zerocopy) { + fprintf(stderr, "serr: ee_code: %u != expected %u\n", + zerocopy, cfg_expect_zerocopy); + exitcode = 1; + /* suppress repeated messages */ + cfg_expect_zerocopy = zerocopy; } if (cfg_verbose >= 2) @@ -571,7 +574,7 @@ static void do_tx(int domain, int type, int protocol) fprintf(stderr, "tx=%lu (%lu MB) txc=%lu zc=%c\n", packets, bytes >> 20, completions, - zerocopied == 1 ? 'y' : 'n'); + cfg_zerocopy && cfg_expect_zerocopy == 1 ? 'y' : 'n'); } static int do_setup_rx(int domain, int type, int protocol) @@ -715,7 +718,7 @@ static void parse_opts(int argc, char **argv) cfg_payload_len = max_payload_len; - while ((c = getopt(argc, argv, "46c:C:D:i:l:mp:rs:S:t:vz")) != -1) { + while ((c = getopt(argc, argv, "46c:C:D:i:l:mp:rs:S:t:vzZ:")) != -1) { switch (c) { case '4': if (cfg_family != PF_UNSPEC) @@ -770,6 +773,9 @@ static void parse_opts(int argc, char **argv) case 'z': cfg_zerocopy = true; break; + case 'Z': + cfg_expect_zerocopy = !!atoi(optarg); + break; } } @@ -817,5 +823,5 @@ int main(int argc, char **argv) else error(1, 0, "unknown cfg_test %s", cfg_test); - return 0; + return exitcode; } diff --git a/tools/testing/selftests/net/msg_zerocopy.sh b/tools/testing/selftests/net/msg_zerocopy.sh index 89c22f5320e0..28178a38a4e7 100755 --- a/tools/testing/selftests/net/msg_zerocopy.sh +++ b/tools/testing/selftests/net/msg_zerocopy.sh @@ -6,6 +6,7 @@ set -e readonly DEV="veth0" +readonly DUMMY_DEV="dummy0" readonly DEV_MTU=65535 readonly BIN="./msg_zerocopy" @@ -14,21 +15,25 @@ readonly NSPREFIX="ns-${RAND}" readonly NS1="${NSPREFIX}1" readonly NS2="${NSPREFIX}2" -readonly SADDR4='192.168.1.1' -readonly DADDR4='192.168.1.2' -readonly SADDR6='fd::1' -readonly DADDR6='fd::2' +readonly LPREFIX4='192.168.1' +readonly RPREFIX4='192.168.2' +readonly LPREFIX6='fd' +readonly RPREFIX6='fc' + readonly path_sysctl_mem="net.core.optmem_max" # No arguments: automated test if [[ "$#" -eq "0" ]]; then - $0 4 tcp -t 1 - $0 6 tcp -t 1 - $0 4 udp -t 1 - $0 6 udp -t 1 - echo "OK. All tests passed" - exit 0 + ret=0 + + $0 4 tcp -t 1 || ret=1 + $0 6 tcp -t 1 || ret=1 + $0 4 udp -t 1 || ret=1 + $0 6 udp -t 1 || ret=1 + + [[ "$ret" == "0" ]] && echo "OK. All tests passed" + exit $ret fi # Argument parsing @@ -45,11 +50,18 @@ readonly EXTRA_ARGS="$@" # Argument parsing: configure addresses if [[ "${IP}" == "4" ]]; then - readonly SADDR="${SADDR4}" - readonly DADDR="${DADDR4}" + readonly SADDR="${LPREFIX4}.1" + readonly DADDR="${LPREFIX4}.2" + readonly DUMMY_ADDR="${RPREFIX4}.1" + readonly DADDR_TXONLY="${RPREFIX4}.2" + readonly MASK="24" elif [[ "${IP}" == "6" ]]; then - readonly SADDR="${SADDR6}" - readonly DADDR="${DADDR6}" + readonly SADDR="${LPREFIX6}::1" + readonly DADDR="${LPREFIX6}::2" + readonly DUMMY_ADDR="${RPREFIX6}::1" + readonly DADDR_TXONLY="${RPREFIX6}::2" + readonly MASK="64" + readonly NODAD="nodad" else echo "Invalid IP version ${IP}" exit 1 @@ -89,33 +101,61 @@ ip netns exec "${NS2}" sysctl -w -q "${path_sysctl_mem}=1000000" ip link add "${DEV}" mtu "${DEV_MTU}" netns "${NS1}" type veth \ peer name "${DEV}" mtu "${DEV_MTU}" netns "${NS2}" +ip link add "${DUMMY_DEV}" mtu "${DEV_MTU}" netns "${NS2}" type dummy + # Bring the devices up ip -netns "${NS1}" link set "${DEV}" up ip -netns "${NS2}" link set "${DEV}" up +ip -netns "${NS2}" link set "${DUMMY_DEV}" up # Set fixed MAC addresses on the devices ip -netns "${NS1}" link set dev "${DEV}" address 02:02:02:02:02:02 ip -netns "${NS2}" link set dev "${DEV}" address 06:06:06:06:06:06 # Add fixed IP addresses to the devices -ip -netns "${NS1}" addr add 192.168.1.1/24 dev "${DEV}" -ip -netns "${NS2}" addr add 192.168.1.2/24 dev "${DEV}" -ip -netns "${NS1}" addr add fd::1/64 dev "${DEV}" nodad -ip -netns "${NS2}" addr add fd::2/64 dev "${DEV}" nodad +ip -netns "${NS1}" addr add "${SADDR}/${MASK}" dev "${DEV}" ${NODAD} +ip -netns "${NS2}" addr add "${DADDR}/${MASK}" dev "${DEV}" ${NODAD} +ip -netns "${NS2}" addr add "${DUMMY_ADDR}/${MASK}" dev "${DUMMY_DEV}" ${NODAD} + +ip -netns "${NS1}" route add default via "${DADDR}" dev "${DEV}" +ip -netns "${NS2}" route add default via "${DADDR_TXONLY}" dev "${DUMMY_DEV}" + +ip netns exec "${NS2}" sysctl -wq net.ipv4.ip_forward=1 +ip netns exec "${NS2}" sysctl -wq net.ipv6.conf.all.forwarding=1 # Optionally disable sg or csum offload to test edge cases # ip netns exec "${NS1}" ethtool -K "${DEV}" sg off +ret=0 + do_test() { local readonly ARGS="$1" - echo "ipv${IP} ${TXMODE} ${ARGS}" - ip netns exec "${NS2}" "${BIN}" "-${IP}" -i "${DEV}" -t 2 -C 2 -S "${SADDR}" -D "${DADDR}" ${ARGS} -r "${RXMODE}" & + # tx-rx test + # packets queued to a local socket are copied, + # sender notification has SO_EE_CODE_ZEROCOPY_COPIED. + + echo -e "\nipv${IP} ${TXMODE} ${ARGS} tx-rx\n" + ip netns exec "${NS2}" "${BIN}" "-${IP}" -i "${DEV}" -t 2 -C 2 \ + -S "${SADDR}" -D "${DADDR}" ${ARGS} -r "${RXMODE}" & sleep 0.2 - ip netns exec "${NS1}" "${BIN}" "-${IP}" -i "${DEV}" -t 1 -C 3 -S "${SADDR}" -D "${DADDR}" ${ARGS} "${TXMODE}" + ip netns exec "${NS1}" "${BIN}" "-${IP}" -i "${DEV}" -t 1 -C 3 \ + -S "${SADDR}" -D "${DADDR}" ${ARGS} "${TXMODE}" -Z 0 || ret=1 wait + + # next test is unconnected tx to dummy0, cannot exercise with tcp + [[ "${TXMODE}" == "tcp" ]] && return + + # tx-only test: send out dummy0 + # packets leaving the host are not copied, + # sender notification does not have SO_EE_CODE_ZEROCOPY_COPIED. + + echo -e "\nipv${IP} ${TXMODE} ${ARGS} tx-only\n" + ip netns exec "${NS1}" "${BIN}" "-${IP}" -i "${DEV}" -t 1 -C 3 \ + -S "${SADDR}" -D "${DADDR_TXONLY}" ${ARGS} "${TXMODE}" -Z 1 || ret=1 } do_test "${EXTRA_ARGS}" do_test "-z ${EXTRA_ARGS}" -echo ok + +[[ "$ret" == "0" ]] && echo "OK" diff --git a/tools/testing/selftests/net/nat6to4.sh b/tools/testing/selftests/net/nat6to4.sh new file mode 100755 index 000000000000..0ee859b622a4 --- /dev/null +++ b/tools/testing/selftests/net/nat6to4.sh @@ -0,0 +1,15 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +NS="ns-peer-$(mktemp -u XXXXXX)" + +ip netns add "${NS}" +ip -netns "${NS}" link set lo up +ip -netns "${NS}" route add default via 127.0.0.2 dev lo + +tc -n "${NS}" qdisc add dev lo ingress +tc -n "${NS}" filter add dev lo ingress prio 4 protocol ip \ + bpf object-file nat6to4.bpf.o section schedcls/egress4/snat4 direct-action + +ip netns exec "${NS}" \ + bash -c 'echo 012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789abc | socat - UDP4-DATAGRAM:224.1.0.1:6666,ip-multicast-loop=1' diff --git a/tools/testing/selftests/net/netdev-l2addr.sh b/tools/testing/selftests/net/netdev-l2addr.sh new file mode 100755 index 000000000000..18509da293e5 --- /dev/null +++ b/tools/testing/selftests/net/netdev-l2addr.sh @@ -0,0 +1,59 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +source lib.sh +set -o pipefail + +NSIM_ADDR=2025 +TEST_ADDR="d0:be:d0:be:d0:00" + +RET_CODE=0 + +cleanup() { + cleanup_netdevsim "$NSIM_ADDR" + cleanup_ns "$NS" +} + +trap cleanup EXIT + +fail() { + echo "ERROR: ${1:-unexpected return code} (ret: $_)" >&2 + RET_CODE=1 +} + +get_addr() +{ + local type="$1" + local dev="$2" + local ns="$3" + + ip -j -n "$ns" link show dev "$dev" | jq -er ".[0].$type" +} + +setup_ns NS + +nsim=$(create_netdevsim $NSIM_ADDR "$NS") + +get_addr address "$nsim" "$NS" >/dev/null || fail "Couldn't get ether addr" +get_addr broadcast "$nsim" "$NS" >/dev/null || fail "Couldn't get brd addr" +get_addr permaddr "$nsim" "$NS" >/dev/null && fail "Found perm_addr without setting it" + +ip -n "$NS" link set dev "$nsim" address "$TEST_ADDR" +ip -n "$NS" link set dev "$nsim" brd "$TEST_ADDR" + +[[ "$(get_addr address "$nsim" "$NS")" == "$TEST_ADDR" ]] || fail "Couldn't set ether addr" +[[ "$(get_addr broadcast "$nsim" "$NS")" == "$TEST_ADDR" ]] || fail "Couldn't set brd addr" + +if create_netdevsim_port "$NSIM_ADDR" "$NS" 2 "FF:FF:FF:FF:FF:FF" 2>/dev/null; then + fail "Created netdevsim with broadcast permaddr" +fi + +nsim_port=$(create_netdevsim_port "$NSIM_ADDR" "$NS" 2 "$TEST_ADDR") + +get_addr address "$nsim_port" "$NS" >/dev/null || fail "Couldn't get ether addr" +get_addr broadcast "$nsim_port" "$NS" >/dev/null || fail "Couldn't get brd addr" +[[ "$(get_addr permaddr "$nsim_port" "$NS")" == "$TEST_ADDR" ]] || fail "Couldn't get permaddr" + +cleanup_netdevsim "$NSIM_ADDR" "$NS" + +exit $RET_CODE diff --git a/tools/testing/selftests/net/netfilter/.gitignore b/tools/testing/selftests/net/netfilter/.gitignore index 64c4f8d9aa6c..5d2be9a00627 100644 --- a/tools/testing/selftests/net/netfilter/.gitignore +++ b/tools/testing/selftests/net/netfilter/.gitignore @@ -5,3 +5,4 @@ conntrack_dump_flush conntrack_reverse_clash sctp_collision nf_queue +udpclash diff --git a/tools/testing/selftests/net/netfilter/Makefile b/tools/testing/selftests/net/netfilter/Makefile index e9b2f553588d..a98ed892f55f 100644 --- a/tools/testing/selftests/net/netfilter/Makefile +++ b/tools/testing/selftests/net/netfilter/Makefile @@ -15,6 +15,7 @@ TEST_PROGS += conntrack_tcp_unreplied.sh TEST_PROGS += conntrack_resize.sh TEST_PROGS += conntrack_sctp_collision.sh TEST_PROGS += conntrack_vrf.sh +TEST_PROGS += conntrack_clash.sh TEST_PROGS += conntrack_reverse_clash.sh TEST_PROGS += ipvs.sh TEST_PROGS += nf_conntrack_packetdrill.sh @@ -44,6 +45,7 @@ TEST_GEN_FILES += connect_close nf_queue TEST_GEN_FILES += conntrack_dump_flush TEST_GEN_FILES += conntrack_reverse_clash TEST_GEN_FILES += sctp_collision +TEST_GEN_FILES += udpclash include ../../lib.mk @@ -52,6 +54,7 @@ $(OUTPUT)/nf_queue: LDLIBS += $(MNL_LDLIBS) $(OUTPUT)/conntrack_dump_flush: CFLAGS += $(MNL_CFLAGS) $(OUTPUT)/conntrack_dump_flush: LDLIBS += $(MNL_LDLIBS) +$(OUTPUT)/udpclash: LDLIBS += -lpthread TEST_FILES := lib.sh TEST_FILES += packetdrill diff --git a/tools/testing/selftests/net/netfilter/config b/tools/testing/selftests/net/netfilter/config index 363646f4fefe..79d5b33966ba 100644 --- a/tools/testing/selftests/net/netfilter/config +++ b/tools/testing/selftests/net/netfilter/config @@ -1,6 +1,8 @@ CONFIG_AUDIT=y CONFIG_BPF_SYSCALL=y CONFIG_BRIDGE=m +CONFIG_NETFILTER_XTABLES_LEGACY=y +CONFIG_BRIDGE_NF_EBTABLES_LEGACY=m CONFIG_BRIDGE_EBT_BROUTE=m CONFIG_BRIDGE_EBT_IP=m CONFIG_BRIDGE_EBT_REDIRECT=m @@ -14,7 +16,10 @@ CONFIG_INET_ESP=m CONFIG_IP_NF_MATCH_RPFILTER=m CONFIG_IP6_NF_MATCH_RPFILTER=m CONFIG_IP_NF_IPTABLES=m +CONFIG_IP_NF_IPTABLES_LEGACY=m CONFIG_IP6_NF_IPTABLES=m +CONFIG_IP6_NF_IPTABLES_LEGACY=m +CONFIG_IP_NF_NAT=m CONFIG_IP_NF_FILTER=m CONFIG_IP6_NF_FILTER=m CONFIG_IP_NF_RAW=m @@ -92,4 +97,4 @@ CONFIG_XFRM_STATISTICS=y CONFIG_NET_PKTGEN=m CONFIG_TUN=m CONFIG_INET_DIAG=m -CONFIG_SCTP_DIAG=m +CONFIG_INET_SCTP_DIAG=m diff --git a/tools/testing/selftests/net/netfilter/conntrack_clash.sh b/tools/testing/selftests/net/netfilter/conntrack_clash.sh new file mode 100755 index 000000000000..606a43a60f73 --- /dev/null +++ b/tools/testing/selftests/net/netfilter/conntrack_clash.sh @@ -0,0 +1,174 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +source lib.sh + +clash_resolution_active=0 +dport=22111 +ret=0 + +cleanup() +{ + # netns cleanup also zaps any remaining socat echo server. + cleanup_all_ns +} + +checktool "nft --version" "run test without nft" +checktool "conntrack --version" "run test without conntrack" +checktool "socat -h" "run test without socat" + +trap cleanup EXIT + +setup_ns nsclient1 nsclient2 nsrouter + +ip netns exec "$nsrouter" nft -f -<<EOF +table ip t { + chain lb { + meta l4proto udp dnat to numgen random mod 3 map { 0 : 10.0.2.1 . 9000, 1 : 10.0.2.1 . 9001, 2 : 10.0.2.1 . 9002 } + } + + chain prerouting { + type nat hook prerouting priority dstnat + + udp dport $dport counter jump lb + } + + chain output { + type nat hook output priority dstnat + + udp dport $dport counter jump lb + } +} +EOF + +load_simple_ruleset() +{ +ip netns exec "$1" nft -f -<<EOF +table ip t { + chain forward { + type filter hook forward priority 0 + + ct state new counter + } +} +EOF +} + +spawn_servers() +{ + local ns="$1" + local ports="9000 9001 9002" + + for port in $ports; do + ip netns exec "$ns" socat UDP-RECVFROM:$port,fork PIPE 2>/dev/null & + done + + for port in $ports; do + wait_local_port_listen "$ns" $port udp + done +} + +add_addr() +{ + local ns="$1" + local dev="$2" + local i="$3" + local j="$4" + + ip -net "$ns" link set "$dev" up + ip -net "$ns" addr add "10.0.$i.$j/24" dev "$dev" +} + +ping_test() +{ + local ns="$1" + local daddr="$2" + + if ! ip netns exec "$ns" ping -q -c 1 $daddr > /dev/null;then + echo "FAIL: ping from $ns to $daddr" + exit 1 + fi +} + +run_one_clash_test() +{ + local ns="$1" + local ctns="$2" + local daddr="$3" + local dport="$4" + local entries + local cre + + if ! ip netns exec "$ns" ./udpclash $daddr $dport;then + echo "INFO: did not receive expected number of replies for $daddr:$dport" + ip netns exec "$ctns" conntrack -S + # don't fail: check if clash resolution triggered after all. + fi + + entries=$(ip netns exec "$ctns" conntrack -S | wc -l) + cre=$(ip netns exec "$ctns" conntrack -S | grep "clash_resolve=0" | wc -l) + + if [ "$cre" -ne "$entries" ];then + clash_resolution_active=1 + return 0 + fi + + # not a failure: clash resolution logic did not trigger. + # With right timing, xmit completed sequentially and + # no parallel insertion occurs. + return $ksft_skip +} + +run_clash_test() +{ + local ns="$1" + local ctns="$2" + local daddr="$3" + local dport="$4" + local softerr=0 + + for i in $(seq 1 10);do + run_one_clash_test "$ns" "$ctns" "$daddr" "$dport" + local rv=$? + if [ $rv -eq 0 ];then + echo "PASS: clash resolution test for $daddr:$dport on attempt $i" + return 0 + elif [ $rv -eq $ksft_skip ]; then + softerr=1 + fi + done + + [ $softerr -eq 1 ] && echo "SKIP: clash resolution for $daddr:$dport did not trigger" +} + +ip link add veth0 netns "$nsclient1" type veth peer name veth0 netns "$nsrouter" +ip link add veth0 netns "$nsclient2" type veth peer name veth1 netns "$nsrouter" +add_addr "$nsclient1" veth0 1 1 +add_addr "$nsclient2" veth0 2 1 +add_addr "$nsrouter" veth0 1 99 +add_addr "$nsrouter" veth1 2 99 + +ip -net "$nsclient1" route add default via 10.0.1.99 +ip -net "$nsclient2" route add default via 10.0.2.99 +ip netns exec "$nsrouter" sysctl -q net.ipv4.ip_forward=1 + +ping_test "$nsclient1" 10.0.1.99 +ping_test "$nsclient1" 10.0.2.1 +ping_test "$nsclient2" 10.0.1.1 + +spawn_servers "$nsclient2" + +# exercise clash resolution with nat: +# nsrouter is supposed to dnat to 10.0.2.1:900{0,1,2,3}. +run_clash_test "$nsclient1" "$nsrouter" 10.0.1.99 "$dport" + +# exercise clash resolution without nat. +load_simple_ruleset "$nsclient2" +run_clash_test "$nsclient2" "$nsclient2" 127.0.0.1 9001 + +if [ $clash_resolution_active -eq 0 ];then + [ "$ret" -eq 0 ] && ret=$ksft_skip + echo "SKIP: Clash resolution did not trigger" +fi + +exit $ret diff --git a/tools/testing/selftests/net/netfilter/conntrack_resize.sh b/tools/testing/selftests/net/netfilter/conntrack_resize.sh index 9e033e80219e..788cd56ea4a0 100755 --- a/tools/testing/selftests/net/netfilter/conntrack_resize.sh +++ b/tools/testing/selftests/net/netfilter/conntrack_resize.sh @@ -12,6 +12,9 @@ tmpfile="" tmpfile_proc="" tmpfile_uniq="" ret=0 +have_socat=0 + +socat -h > /dev/null && have_socat=1 insert_count=2000 [ "$KSFT_MACHINE_SLOW" = "yes" ] && insert_count=400 @@ -123,7 +126,7 @@ ctflush() { done } -ctflood() +ct_pingflood() { local ns="$1" local duration="$2" @@ -152,6 +155,44 @@ ctflood() wait } +ct_udpflood() +{ + local ns="$1" + local duration="$2" + local now=$(date +%s) + local end=$((now + duration)) + + [ $have_socat -ne "1" ] && return + + while [ $now -lt $end ]; do +ip netns exec "$ns" bash<<"EOF" + for i in $(seq 1 100);do + dport=$(((RANDOM%65536)+1)) + + echo bar | socat -u STDIN UDP:"127.0.0.1:$dport" & + done > /dev/null 2>&1 + wait +EOF + now=$(date +%s) + done +} + +ct_udpclash() +{ + local ns="$1" + local duration="$2" + local now=$(date +%s) + local end=$((now + duration)) + + [ -x udpclash ] || return + + while [ $now -lt $end ]; do + ip netns exec "$ns" ./udpclash 127.0.0.1 $((RANDOM%65536)) > /dev/null 2>&1 + + now=$(date +%s) + done +} + # dump to /dev/null. We don't want dumps to cause infinite loops # or use-after-free even when conntrack table is altered while dumps # are in progress. @@ -169,6 +210,48 @@ ct_nulldump() wait } +ct_nulldump_loop() +{ + local ns="$1" + local duration="$2" + local now=$(date +%s) + local end=$((now + duration)) + + while [ $now -lt $end ]; do + ct_nulldump "$ns" + sleep $((RANDOM%2)) + now=$(date +%s) + done +} + +change_timeouts() +{ + local ns="$1" + local r1=$((RANDOM%2)) + local r2=$((RANDOM%2)) + + [ "$r1" -eq 1 ] && ip netns exec "$ns" sysctl -q net.netfilter.nf_conntrack_icmp_timeout=$((RANDOM%5)) + [ "$r2" -eq 1 ] && ip netns exec "$ns" sysctl -q net.netfilter.nf_conntrack_udp_timeout=$((RANDOM%5)) +} + +ct_change_timeouts_loop() +{ + local ns="$1" + local duration="$2" + local now=$(date +%s) + local end=$((now + duration)) + + while [ $now -lt $end ]; do + change_timeouts "$ns" + sleep $((RANDOM%2)) + now=$(date +%s) + done + + # restore defaults + ip netns exec "$ns" sysctl -q net.netfilter.nf_conntrack_icmp_timeout=30 + ip netns exec "$ns" sysctl -q net.netfilter.nf_conntrack_udp_timeout=30 +} + check_taint() { local tainted_then="$1" @@ -198,10 +281,14 @@ insert_flood() r=$((RANDOM%$insert_count)) - ctflood "$n" "$timeout" "floodresize" & + ct_pingflood "$n" "$timeout" "floodresize" & + ct_udpflood "$n" "$timeout" & + ct_udpclash "$n" "$timeout" & + insert_ctnetlink "$n" "$r" & ctflush "$n" "$timeout" & - ct_nulldump "$n" & + ct_nulldump_loop "$n" "$timeout" & + ct_change_timeouts_loop "$n" "$timeout" & wait } @@ -306,7 +393,7 @@ test_dump_all() ip netns exec "$nsclient1" sysctl -q net.netfilter.nf_conntrack_icmp_timeout=3600 - ctflood "$nsclient1" $timeout "dumpall" & + ct_pingflood "$nsclient1" $timeout "dumpall" & insert_ctnetlink "$nsclient2" $insert_count wait @@ -368,7 +455,7 @@ test_conntrack_disable() ct_flush_once "$nsclient1" ct_flush_once "$nsclient2" - ctflood "$nsclient1" "$timeout" "conntrack disable" + ct_pingflood "$nsclient1" "$timeout" "conntrack disable" ip netns exec "$nsclient2" ping -q -c 1 127.0.0.1 >/dev/null 2>&1 # Disabled, should not have picked up any connection. diff --git a/tools/testing/selftests/net/netfilter/ipvs.sh b/tools/testing/selftests/net/netfilter/ipvs.sh index 6af2ea3ad6b8..9c9d5b38ab71 100755 --- a/tools/testing/selftests/net/netfilter/ipvs.sh +++ b/tools/testing/selftests/net/netfilter/ipvs.sh @@ -151,7 +151,7 @@ test_nat() { test_tun() { ip netns exec "${ns0}" ip route add "${vip_v4}" via "${gip_v4}" dev br0 - ip netns exec "${ns1}" modprobe -q ipip + modprobe -q ipip ip netns exec "${ns1}" ip link set tunl0 up ip netns exec "${ns1}" sysctl -qw net.ipv4.ip_forward=0 ip netns exec "${ns1}" sysctl -qw net.ipv4.conf.all.send_redirects=0 @@ -160,10 +160,10 @@ test_tun() { ip netns exec "${ns1}" ipvsadm -a -i -t "${vip_v4}:${port}" -r ${rip_v4}:${port} ip netns exec "${ns1}" ip addr add ${vip_v4}/32 dev lo:1 - ip netns exec "${ns2}" modprobe -q ipip ip netns exec "${ns2}" ip link set tunl0 up ip netns exec "${ns2}" sysctl -qw net.ipv4.conf.all.arp_ignore=1 ip netns exec "${ns2}" sysctl -qw net.ipv4.conf.all.arp_announce=2 + ip netns exec "${ns2}" sysctl -qw net.ipv4.conf.tunl0.rp_filter=0 ip netns exec "${ns2}" ip addr add "${vip_v4}/32" dev lo:1 test_service diff --git a/tools/testing/selftests/net/netfilter/nft_concat_range.sh b/tools/testing/selftests/net/netfilter/nft_concat_range.sh index cd12b8b5ac0e..20e76b395c85 100755 --- a/tools/testing/selftests/net/netfilter/nft_concat_range.sh +++ b/tools/testing/selftests/net/netfilter/nft_concat_range.sh @@ -1311,6 +1311,9 @@ maybe_send_match() { # - remove some elements, check that packets don't match anymore test_correctness_main() { range_size=1 + + send_nomatch $((end + 1)) $((end + 1 + src_delta)) || return 1 + for i in $(seq "${start}" $((start + count))); do local elem="" diff --git a/tools/testing/selftests/net/netfilter/nft_interface_stress.sh b/tools/testing/selftests/net/netfilter/nft_interface_stress.sh index 5ff7be9daeee..c0fffaa6dbd9 100755 --- a/tools/testing/selftests/net/netfilter/nft_interface_stress.sh +++ b/tools/testing/selftests/net/netfilter/nft_interface_stress.sh @@ -10,6 +10,8 @@ source lib.sh checktool "nft --version" "run test without nft tool" checktool "iperf3 --version" "run test without iperf3 tool" +read kernel_tainted < /proc/sys/kernel/tainted + # how many seconds to torture the kernel? # default to 80% of max run time but don't exceed 48s TEST_RUNTIME=$((${kselftest_timeout:-60} * 8 / 10)) @@ -135,7 +137,8 @@ else wait fi -[[ $(</proc/sys/kernel/tainted) -eq 0 ]] || { + +[[ $kernel_tainted -eq 0 && $(</proc/sys/kernel/tainted) -ne 0 ]] && { echo "FAIL: Kernel is tainted!" exit $ksft_fail } diff --git a/tools/testing/selftests/net/netfilter/udpclash.c b/tools/testing/selftests/net/netfilter/udpclash.c new file mode 100644 index 000000000000..85c7b906ad08 --- /dev/null +++ b/tools/testing/selftests/net/netfilter/udpclash.c @@ -0,0 +1,158 @@ +// SPDX-License-Identifier: GPL-2.0 + +/* Usage: ./udpclash <IP> <PORT> + * + * Emit THREAD_COUNT UDP packets sharing the same saddr:daddr pair. + * + * This mimics DNS resolver libraries that emit A and AAAA requests + * in parallel. + * + * This exercises conntrack clash resolution logic added and later + * refined in + * + * 71d8c47fc653 ("netfilter: conntrack: introduce clash resolution on insertion race") + * ed07d9a021df ("netfilter: nf_conntrack: resolve clash for matching conntracks") + * 6a757c07e51f ("netfilter: conntrack: allow insertion of clashing entries") + */ +#include <stdio.h> +#include <string.h> +#include <stdlib.h> +#include <unistd.h> +#include <arpa/inet.h> +#include <sys/socket.h> +#include <pthread.h> + +#define THREAD_COUNT 128 + +struct thread_args { + const struct sockaddr_in *si_remote; + int sockfd; +}; + +static int wait = 1; + +static void *thread_main(void *varg) +{ + const struct sockaddr_in *si_remote; + const struct thread_args *args = varg; + static const char msg[] = "foo"; + + si_remote = args->si_remote; + + while (wait == 1) + ; + + if (sendto(args->sockfd, msg, strlen(msg), MSG_NOSIGNAL, + (struct sockaddr *)si_remote, sizeof(*si_remote)) < 0) + exit(111); + + return varg; +} + +static int run_test(int fd, const struct sockaddr_in *si_remote) +{ + struct thread_args thread_args = { + .si_remote = si_remote, + .sockfd = fd, + }; + pthread_t *tid = calloc(THREAD_COUNT, sizeof(pthread_t)); + unsigned int repl_count = 0, timeout = 0; + int i; + + if (!tid) { + perror("calloc"); + return 1; + } + + for (i = 0; i < THREAD_COUNT; i++) { + int err = pthread_create(&tid[i], NULL, &thread_main, &thread_args); + + if (err != 0) { + perror("pthread_create"); + exit(1); + } + } + + wait = 0; + + for (i = 0; i < THREAD_COUNT; i++) + pthread_join(tid[i], NULL); + + while (repl_count < THREAD_COUNT) { + struct sockaddr_in si_repl; + socklen_t si_repl_len = sizeof(si_repl); + char repl[512]; + ssize_t ret; + + ret = recvfrom(fd, repl, sizeof(repl), MSG_NOSIGNAL, + (struct sockaddr *) &si_repl, &si_repl_len); + if (ret < 0) { + if (timeout++ > 5000) { + fputs("timed out while waiting for reply from thread\n", stderr); + break; + } + + /* give reply time to pass though the stack */ + usleep(1000); + continue; + } + + if (si_repl_len != sizeof(*si_remote)) { + fprintf(stderr, "warning: reply has unexpected repl_len %d vs %d\n", + (int)si_repl_len, (int)sizeof(si_repl)); + } else if (si_remote->sin_addr.s_addr != si_repl.sin_addr.s_addr || + si_remote->sin_port != si_repl.sin_port) { + char a[64], b[64]; + + inet_ntop(AF_INET, &si_remote->sin_addr, a, sizeof(a)); + inet_ntop(AF_INET, &si_repl.sin_addr, b, sizeof(b)); + + fprintf(stderr, "reply from wrong source: want %s:%d got %s:%d\n", + a, ntohs(si_remote->sin_port), b, ntohs(si_repl.sin_port)); + } + + repl_count++; + } + + printf("got %d of %d replies\n", repl_count, THREAD_COUNT); + + free(tid); + + return repl_count == THREAD_COUNT ? 0 : 1; +} + +int main(int argc, char *argv[]) +{ + struct sockaddr_in si_local = { + .sin_family = AF_INET, + }; + struct sockaddr_in si_remote = { + .sin_family = AF_INET, + }; + int fd, ret; + + if (argc < 3) { + fputs("Usage: send_udp <daddr> <dport>\n", stderr); + return 1; + } + + si_remote.sin_port = htons(atoi(argv[2])); + si_remote.sin_addr.s_addr = inet_addr(argv[1]); + + fd = socket(AF_INET, SOCK_DGRAM|SOCK_CLOEXEC|SOCK_NONBLOCK, IPPROTO_UDP); + if (fd < 0) { + perror("socket"); + return 1; + } + + if (bind(fd, (struct sockaddr *)&si_local, sizeof(si_local)) < 0) { + perror("bind"); + return 1; + } + + ret = run_test(fd, &si_remote); + + close(fd); + + return ret; +} diff --git a/tools/testing/selftests/net/nettest.c b/tools/testing/selftests/net/nettest.c index cd8a58097448..1f5227f3d64d 100644 --- a/tools/testing/selftests/net/nettest.c +++ b/tools/testing/selftests/net/nettest.c @@ -385,7 +385,7 @@ static int get_bind_to_device(int sd, char *name, size_t len) name[0] = '\0'; rc = getsockopt(sd, SOL_SOCKET, SO_BINDTODEVICE, name, &optlen); if (rc < 0) - log_err_errno("setsockopt(SO_BINDTODEVICE)"); + log_err_errno("getsockopt(SO_BINDTODEVICE)"); return rc; } @@ -535,7 +535,7 @@ static int set_freebind(int sd, int version) break; case AF_INET6: if (setsockopt(sd, SOL_IPV6, IPV6_FREEBIND, &one, sizeof(one))) { - log_err_errno("setsockopt(IPV6_FREEBIND"); + log_err_errno("setsockopt(IPV6_FREEBIND)"); rc = -1; } break; @@ -812,7 +812,7 @@ static int convert_addr(struct sock_args *args, const char *_str, sep++; if (str_to_uint(sep, 1, pfx_len_max, &args->prefix_len) != 0) { - fprintf(stderr, "Invalid port\n"); + fprintf(stderr, "Invalid prefix length\n"); return 1; } } else { @@ -1272,7 +1272,7 @@ static int msg_loop(int client, int sd, void *addr, socklen_t alen, } } - nfds = interactive ? MAX(fileno(stdin), sd) + 1 : sd + 1; + nfds = interactive ? MAX(fileno(stdin), sd) + 1 : sd + 1; while (1) { FD_ZERO(&rfds); FD_SET(sd, &rfds); @@ -1492,7 +1492,7 @@ static int lsock_init(struct sock_args *args) sd = socket(args->version, args->type, args->protocol); if (sd < 0) { log_err_errno("Error opening socket"); - return -1; + return -1; } if (set_reuseaddr(sd) != 0) @@ -1912,7 +1912,7 @@ static int ipc_parent(int cpid, int fd, struct sock_args *args) * waiting to be told when to continue */ if (read(fd, &buf, sizeof(buf)) <= 0) { - log_err_errno("Failed to read IPC status from status"); + log_err_errno("Failed to read IPC status from pipe"); return 1; } if (!buf) { diff --git a/tools/testing/selftests/net/nl_netdev.py b/tools/testing/selftests/net/nl_netdev.py index beaee5e4e2aa..5c66421ab8aa 100755 --- a/tools/testing/selftests/net/nl_netdev.py +++ b/tools/testing/selftests/net/nl_netdev.py @@ -2,8 +2,9 @@ # SPDX-License-Identifier: GPL-2.0 import time +from os import system from lib.py import ksft_run, ksft_exit, ksft_pr -from lib.py import ksft_eq, ksft_ge, ksft_busy_wait +from lib.py import ksft_eq, ksft_ge, ksft_ne, ksft_busy_wait from lib.py import NetdevFamily, NetdevSimDev, ip @@ -34,6 +35,128 @@ def napi_list_check(nf) -> None: ksft_eq(len(napis), 100, comment=f"queue count after reset queue {q} mode {i}") +def napi_set_threaded(nf) -> None: + """ + Test that verifies various cases of napi threaded + set and unset at napi and device level. + """ + with NetdevSimDev(queue_count=2) as nsimdev: + nsim = nsimdev.nsims[0] + + ip(f"link set dev {nsim.ifname} up") + + napis = nf.napi_get({'ifindex': nsim.ifindex}, dump=True) + ksft_eq(len(napis), 2) + + napi0_id = napis[0]['id'] + napi1_id = napis[1]['id'] + + # set napi threaded and verify + nf.napi_set({'id': napi0_id, 'threaded': "enabled"}) + napi0 = nf.napi_get({'id': napi0_id}) + ksft_eq(napi0['threaded'], "enabled") + ksft_ne(napi0.get('pid'), None) + + # check it is not set for napi1 + napi1 = nf.napi_get({'id': napi1_id}) + ksft_eq(napi1['threaded'], "disabled") + ksft_eq(napi1.get('pid'), None) + + ip(f"link set dev {nsim.ifname} down") + ip(f"link set dev {nsim.ifname} up") + + # verify if napi threaded is still set + napi0 = nf.napi_get({'id': napi0_id}) + ksft_eq(napi0['threaded'], "enabled") + ksft_ne(napi0.get('pid'), None) + + # check it is still not set for napi1 + napi1 = nf.napi_get({'id': napi1_id}) + ksft_eq(napi1['threaded'], "disabled") + ksft_eq(napi1.get('pid'), None) + + # unset napi threaded and verify + nf.napi_set({'id': napi0_id, 'threaded': "disabled"}) + napi0 = nf.napi_get({'id': napi0_id}) + ksft_eq(napi0['threaded'], "disabled") + ksft_eq(napi0.get('pid'), None) + + # set threaded at device level + system(f"echo 1 > /sys/class/net/{nsim.ifname}/threaded") + + # check napi threaded is set for both napis + napi0 = nf.napi_get({'id': napi0_id}) + ksft_eq(napi0['threaded'], "enabled") + ksft_ne(napi0.get('pid'), None) + napi1 = nf.napi_get({'id': napi1_id}) + ksft_eq(napi1['threaded'], "enabled") + ksft_ne(napi1.get('pid'), None) + + # unset threaded at device level + system(f"echo 0 > /sys/class/net/{nsim.ifname}/threaded") + + # check napi threaded is unset for both napis + napi0 = nf.napi_get({'id': napi0_id}) + ksft_eq(napi0['threaded'], "disabled") + ksft_eq(napi0.get('pid'), None) + napi1 = nf.napi_get({'id': napi1_id}) + ksft_eq(napi1['threaded'], "disabled") + ksft_eq(napi1.get('pid'), None) + + # set napi threaded for napi0 + nf.napi_set({'id': napi0_id, 'threaded': 1}) + napi0 = nf.napi_get({'id': napi0_id}) + ksft_eq(napi0['threaded'], "enabled") + ksft_ne(napi0.get('pid'), None) + + # unset threaded at device level + system(f"echo 0 > /sys/class/net/{nsim.ifname}/threaded") + + # check napi threaded is unset for both napis + napi0 = nf.napi_get({'id': napi0_id}) + ksft_eq(napi0['threaded'], "disabled") + ksft_eq(napi0.get('pid'), None) + napi1 = nf.napi_get({'id': napi1_id}) + ksft_eq(napi1['threaded'], "disabled") + ksft_eq(napi1.get('pid'), None) + +def dev_set_threaded(nf) -> None: + """ + Test that verifies various cases of napi threaded + set and unset at device level using sysfs. + """ + with NetdevSimDev(queue_count=2) as nsimdev: + nsim = nsimdev.nsims[0] + + ip(f"link set dev {nsim.ifname} up") + + napis = nf.napi_get({'ifindex': nsim.ifindex}, dump=True) + ksft_eq(len(napis), 2) + + napi0_id = napis[0]['id'] + napi1_id = napis[1]['id'] + + # set threaded + system(f"echo 1 > /sys/class/net/{nsim.ifname}/threaded") + + # check napi threaded is set for both napis + napi0 = nf.napi_get({'id': napi0_id}) + ksft_eq(napi0['threaded'], "enabled") + ksft_ne(napi0.get('pid'), None) + napi1 = nf.napi_get({'id': napi1_id}) + ksft_eq(napi1['threaded'], "enabled") + ksft_ne(napi1.get('pid'), None) + + # unset threaded + system(f"echo 0 > /sys/class/net/{nsim.ifname}/threaded") + + # check napi threaded is unset for both napis + napi0 = nf.napi_get({'id': napi0_id}) + ksft_eq(napi0['threaded'], "disabled") + ksft_eq(napi0.get('pid'), None) + napi1 = nf.napi_get({'id': napi1_id}) + ksft_eq(napi1['threaded'], "disabled") + ksft_eq(napi1.get('pid'), None) def nsim_rxq_reset_down(nf) -> None: """ @@ -122,7 +245,7 @@ def page_pool_check(nf) -> None: def main() -> None: nf = NetdevFamily() ksft_run([empty_check, lo_check, page_pool_check, napi_list_check, - nsim_rxq_reset_down], + dev_set_threaded, napi_set_threaded, nsim_rxq_reset_down], args=(nf, )) ksft_exit() diff --git a/tools/testing/selftests/net/packetdrill/ksft_runner.sh b/tools/testing/selftests/net/packetdrill/ksft_runner.sh index ef8b25a606d8..c5b01e1bd4c7 100755 --- a/tools/testing/selftests/net/packetdrill/ksft_runner.sh +++ b/tools/testing/selftests/net/packetdrill/ksft_runner.sh @@ -39,11 +39,15 @@ if [[ -n "${KSFT_MACHINE_SLOW}" ]]; then # xfail tests that are known flaky with dbg config, not fixable. # still run them for coverage (and expect 100% pass without dbg). declare -ar xfail_list=( + "tcp_blocking_blocking-connect.pkt" + "tcp_blocking_blocking-read.pkt" "tcp_eor_no-coalesce-retrans.pkt" "tcp_fast_recovery_prr-ss.*.pkt" + "tcp_sack_sack-route-refresh-ip-tos.pkt" "tcp_slow_start_slow-start-after-win-update.pkt" "tcp_timestamping.*.pkt" "tcp_user_timeout_user-timeout-probe.pkt" + "tcp_zerocopy_cl.*.pkt" "tcp_zerocopy_epoll_.*.pkt" "tcp_tcp_info_tcp-info-.*-limited.pkt" ) diff --git a/tools/testing/selftests/net/packetdrill/tcp_blocking_blocking-read.pkt b/tools/testing/selftests/net/packetdrill/tcp_blocking_blocking-read.pkt index 914eabab367a..657e42ca65b5 100644 --- a/tools/testing/selftests/net/packetdrill/tcp_blocking_blocking-read.pkt +++ b/tools/testing/selftests/net/packetdrill/tcp_blocking_blocking-read.pkt @@ -1,6 +1,8 @@ // SPDX-License-Identifier: GPL-2.0 // Test for blocking read. + --tolerance_usecs=10000 +--mss=1000 `./defaults.sh` diff --git a/tools/testing/selftests/net/packetdrill/tcp_dsack_mult.pkt b/tools/testing/selftests/net/packetdrill/tcp_dsack_mult.pkt new file mode 100644 index 000000000000..c790d0af635e --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_dsack_mult.pkt @@ -0,0 +1,45 @@ +// SPDX-License-Identifier: GPL-2.0 +// Test various DSACK (RFC 2883) behaviors. + +--mss=1000 + +`./defaults.sh` + + + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +0 < S 0:0(0) win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 7> + +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8> + +.1 < . 1:1(0) ack 1 win 1024 + +0 accept(3, ..., ...) = 4 + +// First SACK range. + +0 < P. 1001:2001(1000) ack 1 win 1024 + +0 > . 1:1(0) ack 1 <nop, nop, sack 1001:2001> + +// Check SACK coalescing (contiguous sequence). + +0 < P. 2001:3001(1000) ack 1 win 1024 + +0 > . 1:1(0) ack 1 <nop,nop,sack 1001:3001> + +// Check we have two SACK ranges for non contiguous sequences. + +0 < P. 4001:5001(1000) ack 1 win 1024 + +0 > . 1:1(0) ack 1 <nop,nop,sack 4001:5001 1001:3001> + +// Three ranges. + +0 < P. 7001:8001(1000) ack 1 win 1024 + +0 > . 1:1(0) ack 1 <nop,nop,sack 7001:8001 4001:5001 1001:3001> + +// DSACK (1001:3001) + SACK (6001:7001) + +0 < P. 1:6001(6000) ack 1 win 1024 + +0 > . 1:1(0) ack 6001 <nop,nop,sack 1001:3001 7001:8001> + +// DSACK (7001:8001) + +0 < P. 6001:8001(2000) ack 1 win 1024 + +0 > . 1:1(0) ack 8001 <nop,nop,sack 7001:8001> + +// DSACK for an older segment. + +0 < P. 1:1001(1000) ack 1 win 1024 + +0 > . 1:1(0) ack 8001 <nop,nop,sack 1:1001> diff --git a/tools/testing/selftests/net/packetdrill/tcp_inq_client.pkt b/tools/testing/selftests/net/packetdrill/tcp_inq_client.pkt index df49c67645ac..e13f0eee9795 100644 --- a/tools/testing/selftests/net/packetdrill/tcp_inq_client.pkt +++ b/tools/testing/selftests/net/packetdrill/tcp_inq_client.pkt @@ -1,5 +1,8 @@ // SPDX-License-Identifier: GPL-2.0 // Test TCP_INQ and TCP_CM_INQ on the client side. + +--mss=1000 + `./defaults.sh ` diff --git a/tools/testing/selftests/net/packetdrill/tcp_inq_server.pkt b/tools/testing/selftests/net/packetdrill/tcp_inq_server.pkt index 04a5e2590c62..14dd5f813d50 100644 --- a/tools/testing/selftests/net/packetdrill/tcp_inq_server.pkt +++ b/tools/testing/selftests/net/packetdrill/tcp_inq_server.pkt @@ -1,5 +1,8 @@ // SPDX-License-Identifier: GPL-2.0 // Test TCP_INQ and TCP_CM_INQ on the server side. + +--mss=1000 + `./defaults.sh ` diff --git a/tools/testing/selftests/net/packetdrill/tcp_ooo-before-and-after-accept.pkt b/tools/testing/selftests/net/packetdrill/tcp_ooo-before-and-after-accept.pkt new file mode 100644 index 000000000000..09aabc775e80 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_ooo-before-and-after-accept.pkt @@ -0,0 +1,53 @@ +// SPDX-License-Identifier: GPL-2.0 + +--mss=1000 + +`./defaults.sh +sysctl -q net.ipv4.tcp_rmem="4096 131072 $((32*1024*1024))"` + +// Test that a not-yet-accepted socket does not change +// its initial sk_rcvbuf (tcp_rmem[1]) when receiving ooo packets. + + +0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +0 < S 0:0(0) win 65535 <mss 1000,nop,nop,sackOK,nop,wscale 7> + +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 10> + +.1 < . 1:1(0) ack 1 win 257 + +0 < . 2001:41001(39000) ack 1 win 257 + +0 > . 1:1(0) ack 1 <nop,nop,sack 2001:41001> + +0 < . 41001:101001(60000) ack 1 win 257 + +0 > . 1:1(0) ack 1 <nop,nop,sack 2001:101001> + +0 < . 1:1001(1000) ack 1 win 257 + +0 > . 1:1(0) ack 1001 <nop,nop,sack 2001:101001> + +0 < . 1001:2001(1000) ack 1 win 257 + +0 > . 1:1(0) ack 101001 + + +0 accept(3, ..., ...) = 4 + + +0 %{ assert SK_MEMINFO_RCVBUF == 131072, SK_MEMINFO_RCVBUF }% + + +0 close(4) = 0 + +0 close(3) = 0 + +// Test that ooo packets for accepted sockets do increase sk_rcvbuf + +0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +0 < S 0:0(0) win 65535 <mss 1000,nop,nop,sackOK,nop,wscale 7> + +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 10> + +.1 < . 1:1(0) ack 1 win 257 + + +0 accept(3, ..., ...) = 4 + + +0 < . 2001:41001(39000) ack 1 win 257 + +0 > . 1:1(0) ack 1 <nop,nop,sack 2001:41001> + +0 < . 41001:101001(60000) ack 1 win 257 + +0 > . 1:1(0) ack 1 <nop,nop,sack 2001:101001> + + +0 %{ assert SK_MEMINFO_RCVBUF > 131072, SK_MEMINFO_RCVBUF }% + diff --git a/tools/testing/selftests/net/packetdrill/tcp_ooo_rcv_mss.pkt b/tools/testing/selftests/net/packetdrill/tcp_ooo_rcv_mss.pkt new file mode 100644 index 000000000000..7e6bc5fb0c8d --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_ooo_rcv_mss.pkt @@ -0,0 +1,27 @@ +// SPDX-License-Identifier: GPL-2.0 + +--mss=1000 + +`./defaults.sh +sysctl -q net.ipv4.tcp_rmem="4096 131072 $((32*1024*1024))"` + + +0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +0 < S 0:0(0) win 65535 <mss 1000,nop,nop,sackOK,nop,wscale 7> + +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 10> + +.1 < . 1:1(0) ack 1 win 257 + + +0 accept(3, ..., ...) = 4 + + +0 < . 2001:11001(9000) ack 1 win 257 + +0 > . 1:1(0) ack 1 win 81 <nop,nop,sack 2001:11001> + +// check that ooo packet properly updates tcpi_rcv_mss + +0 %{ assert tcpi_rcv_mss == 1000, tcpi_rcv_mss }% + + +0 < . 11001:21001(10000) ack 1 win 257 + +0 > . 1:1(0) ack 1 win 81 <nop,nop,sack 2001:21001> + diff --git a/tools/testing/selftests/net/packetdrill/tcp_rcv_big_endseq.pkt b/tools/testing/selftests/net/packetdrill/tcp_rcv_big_endseq.pkt new file mode 100644 index 000000000000..3848b419e68c --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_rcv_big_endseq.pkt @@ -0,0 +1,44 @@ +// SPDX-License-Identifier: GPL-2.0 + +--mss=1000 + +`./defaults.sh` + + 0 `nstat -n` + +// Establish a connection. + +0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 setsockopt(3, SOL_SOCKET, SO_RCVBUF, [10000], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +0 < S 0:0(0) win 32792 <mss 1000,nop,wscale 7> + +0 > S. 0:0(0) ack 1 <mss 1460,nop,wscale 0> + +.1 < . 1:1(0) ack 1 win 257 + + +0 accept(3, ..., ...) = 4 + + +0 < P. 1:4001(4000) ack 1 win 257 + +0 > . 1:1(0) ack 4001 win 5000 + +// packet in sequence : SKB_DROP_REASON_TCP_INVALID_END_SEQUENCE / LINUX_MIB_BEYOND_WINDOW + +0 < P. 4001:54001(50000) ack 1 win 257 + +0 > . 1:1(0) ack 4001 win 5000 + +// ooo packet. : SKB_DROP_REASON_TCP_INVALID_END_SEQUENCE / LINUX_MIB_BEYOND_WINDOW + +1 < P. 5001:55001(50000) ack 1 win 257 + +0 > . 1:1(0) ack 4001 win 5000 + +// SKB_DROP_REASON_TCP_INVALID_SEQUENCE / LINUX_MIB_BEYOND_WINDOW + +0 < P. 70001:80001(10000) ack 1 win 257 + +0 > . 1:1(0) ack 4001 win 5000 + + +0 read(4, ..., 100000) = 4000 + +// If queue is empty, accept a packet even if its end_seq is above wup + rcv_wnd + +0 < P. 4001:54001(50000) ack 1 win 257 + +0 > . 1:1(0) ack 54001 win 0 + +// Check LINUX_MIB_BEYOND_WINDOW has been incremented 3 times. ++0 `nstat | grep TcpExtBeyondWindow | grep -q " 3 "` diff --git a/tools/testing/selftests/net/packetdrill/tcp_rcv_toobig.pkt b/tools/testing/selftests/net/packetdrill/tcp_rcv_toobig.pkt new file mode 100644 index 000000000000..f575c0ff89da --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_rcv_toobig.pkt @@ -0,0 +1,33 @@ +// SPDX-License-Identifier: GPL-2.0 + +--mss=1000 + +`./defaults.sh` + + 0 `nstat -n` + +// Establish a connection. + +0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 setsockopt(3, SOL_SOCKET, SO_RCVBUF, [20000], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +0 < S 0:0(0) win 32792 <mss 1000,nop,wscale 7> + +0 > S. 0:0(0) ack 1 win 18980 <mss 1460,nop,wscale 0> + +.1 < . 1:1(0) ack 1 win 257 + + +0 accept(3, ..., ...) = 4 + + +0 < P. 1:20001(20000) ack 1 win 257 + +.04 > . 1:1(0) ack 20001 win 18000 + + +0 setsockopt(4, SOL_SOCKET, SO_RCVBUF, [12000], 4) = 0 + +0 < P. 20001:80001(60000) ack 1 win 257 + +0 > . 1:1(0) ack 20001 win 18000 + + +0 read(4, ..., 20000) = 20000 +// A too big packet is accepted if the receive queue is empty + +0 < P. 20001:80001(60000) ack 1 win 257 + +0 > . 1:1(0) ack 80001 win 0 + diff --git a/tools/testing/selftests/net/rtnetlink.sh b/tools/testing/selftests/net/rtnetlink.sh index 2e8243a65b50..d6c00efeb664 100755 --- a/tools/testing/selftests/net/rtnetlink.sh +++ b/tools/testing/selftests/net/rtnetlink.sh @@ -21,6 +21,7 @@ ALL_TESTS=" kci_test_vrf kci_test_encap kci_test_macsec + kci_test_macsec_vlan kci_test_ipsec kci_test_ipsec_offload kci_test_fdb_get @@ -30,6 +31,7 @@ ALL_TESTS=" kci_test_address_proto kci_test_enslave_bonding kci_test_mngtmpaddr + kci_test_operstate " devdummy="test-dummy0" @@ -291,6 +293,17 @@ kci_test_route_get() end_test "PASS: route get" } +check_addr_not_exist() +{ + dev=$1 + addr=$2 + if ip addr show dev $dev | grep -q $addr; then + return 1 + else + return 0 + fi +} + kci_test_addrlft() { for i in $(seq 10 100) ;do @@ -298,9 +311,8 @@ kci_test_addrlft() run_cmd ip addr add 10.23.11.$i/32 dev "$devdummy" preferred_lft $lft valid_lft $((lft+1)) done - sleep 5 - run_cmd_grep_fail "10.23.11." ip addr show dev "$devdummy" - if [ $? -eq 0 ]; then + slowwait 5 check_addr_not_exist "$devdummy" "10.23.11." + if [ $? -eq 1 ]; then check_err 1 end_test "FAIL: preferred_lft addresses remaining" return @@ -561,6 +573,41 @@ kci_test_macsec() end_test "PASS: macsec" } +# Test __dev_set_rx_mode call from dev_uc_add under addr_list_lock spinlock. +# Make sure __dev_set_promiscuity is not grabbing (sleeping) netdev instance +# lock. +# https://lore.kernel.org/netdev/2aff4342b0f5b1539c02ffd8df4c7e58dd9746e7.camel@nvidia.com/ +kci_test_macsec_vlan() +{ + msname="test_macsec1" + vlanname="test_vlan1" + local ret=0 + run_cmd_grep "^Usage: ip macsec" ip macsec help + if [ $? -ne 0 ]; then + end_test "SKIP: macsec: iproute2 too old" + return $ksft_skip + fi + run_cmd ip link add link "$devdummy" "$msname" type macsec port 42 encrypt on + if [ $ret -ne 0 ];then + end_test "FAIL: can't add macsec interface, skipping test" + return 1 + fi + + run_cmd ip link set dev "$msname" up + ip link add link "$msname" name "$vlanname" type vlan id 1 + ip link set dev "$vlanname" address 00:11:22:33:44:88 + ip link set dev "$vlanname" up + run_cmd ip link del dev "$vlanname" + run_cmd ip link del dev "$msname" + + if [ $ret -ne 0 ];then + end_test "FAIL: macsec_vlan" + return 1 + fi + + end_test "PASS: macsec_vlan" +} + #------------------------------------------------------------------- # Example commands # ip x s add proto esp src 14.0.0.52 dst 14.0.0.70 \ @@ -673,6 +720,11 @@ kci_test_ipsec_offload() sysfsf=$sysfsd/ipsec sysfsnet=/sys/bus/netdevsim/devices/netdevsim0/net/ probed=false + esp4_offload_probed_default=false + + if lsmod | grep -q esp4_offload; then + esp4_offload_probed_default=true + fi if ! mount | grep -q debugfs; then mount -t debugfs none /sys/kernel/debug/ &> /dev/null @@ -766,6 +818,7 @@ EOF fi # clean up any leftovers + ! "$esp4_offload_probed_default" && lsmod | grep -q esp4_offload && rmmod esp4_offload echo 0 > /sys/bus/netdevsim/del_device $probed && rmmod netdevsim @@ -1334,6 +1387,39 @@ kci_test_mngtmpaddr() return $ret } +kci_test_operstate() +{ + local ret=0 + + # Check that it is possible to set operational state during device + # creation and that it is preserved when the administrative state of + # the device is toggled. + run_cmd ip link add name vx0 up state up type vxlan id 10010 dstport 4789 + run_cmd_grep "state UP" ip link show dev vx0 + run_cmd ip link set dev vx0 down + run_cmd_grep "state DOWN" ip link show dev vx0 + run_cmd ip link set dev vx0 up + run_cmd_grep "state UP" ip link show dev vx0 + + run_cmd ip link del dev vx0 + + # Check that it is possible to set the operational state of the device + # after creation. + run_cmd ip link add name vx0 up type vxlan id 10010 dstport 4789 + run_cmd_grep "state UNKNOWN" ip link show dev vx0 + run_cmd ip link set dev vx0 state up + run_cmd_grep "state UP" ip link show dev vx0 + + run_cmd ip link del dev vx0 + + if [ "$ret" -ne 0 ]; then + end_test "FAIL: operstate" + return 1 + fi + + end_test "PASS: operstate" +} + kci_test_rtnl() { local current_test diff --git a/tools/testing/selftests/net/rtnetlink_notification.sh b/tools/testing/selftests/net/rtnetlink_notification.sh new file mode 100755 index 000000000000..3f9780232bd6 --- /dev/null +++ b/tools/testing/selftests/net/rtnetlink_notification.sh @@ -0,0 +1,112 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# This test is for checking rtnetlink notification callpaths, and get as much +# coverage as possible. +# +# set -e + +ALL_TESTS=" + kci_test_mcast_addr_notification + kci_test_anycast_addr_notification +" + +source lib.sh +test_dev="test-dummy1" + +kci_test_mcast_addr_notification() +{ + RET=0 + local tmpfile + local monitor_pid + local match_result + + tmpfile=$(mktemp) + defer rm "$tmpfile" + + ip monitor maddr > $tmpfile & + monitor_pid=$! + defer kill_process "$monitor_pid" + + sleep 1 + + if [ ! -e "/proc/$monitor_pid" ]; then + RET=$ksft_skip + log_test "mcast addr notification: iproute2 too old" + return $RET + fi + + ip link add name "$test_dev" type dummy + check_err $? "failed to add dummy interface" + ip link set "$test_dev" up + check_err $? "failed to set dummy interface up" + ip link del dev "$test_dev" + check_err $? "Failed to delete dummy interface" + sleep 1 + + # There should be 4 line matches as follows. + # 13: test-dummy1  inet6 mcast ff02::1 scope global + # 13: test-dummy1  inet mcast 224.0.0.1 scope global + # Deleted 13: test-dummy1  inet mcast 224.0.0.1 scope global + # Deleted 13: test-dummy1  inet6 mcast ff02::1 scope global + match_result=$(grep -cE "$test_dev.*(224.0.0.1|ff02::1)" "$tmpfile") + if [ "$match_result" -ne 4 ]; then + RET=$ksft_fail + fi + log_test "mcast addr notification: Expected 4 matches, got $match_result" + return $RET +} + +kci_test_anycast_addr_notification() +{ + RET=0 + local tmpfile + local monitor_pid + local match_result + + tmpfile=$(mktemp) + defer rm "$tmpfile" + + ip monitor acaddress > "$tmpfile" & + monitor_pid=$! + defer kill_process "$monitor_pid" + sleep 1 + + if [ ! -e "/proc/$monitor_pid" ]; then + RET=$ksft_skip + log_test "anycast addr notification: iproute2 too old" + return "$RET" + fi + + ip link add name "$test_dev" type dummy + check_err $? "failed to add dummy interface" + ip link set "$test_dev" up + check_err $? "failed to set dummy interface up" + sysctl -qw net.ipv6.conf."$test_dev".forwarding=1 + ip link del dev "$test_dev" + check_err $? "Failed to delete dummy interface" + sleep 1 + + # There should be 2 line matches as follows. + # 9: dummy2 inet6 any fe80:: scope global + # Deleted 9: dummy2 inet6 any fe80:: scope global + match_result=$(grep -cE "$test_dev.*(fe80::)" "$tmpfile") + if [ "$match_result" -ne 2 ]; then + RET=$ksft_fail + fi + log_test "anycast addr notification: Expected 2 matches, got $match_result" + return "$RET" +} + +#check for needed privileges +if [ "$(id -u)" -ne 0 ];then + RET=$ksft_skip + log_test "need root privileges" + exit $RET +fi + +require_command ip + +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/srv6_end_next_csid_l3vpn_test.sh b/tools/testing/selftests/net/srv6_end_next_csid_l3vpn_test.sh index ba730655a7bf..4bc135e5c22c 100755 --- a/tools/testing/selftests/net/srv6_end_next_csid_l3vpn_test.sh +++ b/tools/testing/selftests/net/srv6_end_next_csid_l3vpn_test.sh @@ -594,7 +594,7 @@ setup_rt_local_sids() dev "${DUMMY_DEVNAME}" # all SIDs for VPNs start with a common locator. Routes and SRv6 - # Endpoint behavior instaces are grouped together in the 'localsid' + # Endpoint behavior instances are grouped together in the 'localsid' # table. ip -netns "${nsname}" -6 rule \ add to "${VPN_LOCATOR_SERVICE}::/16" \ diff --git a/tools/testing/selftests/net/srv6_end_x_next_csid_l3vpn_test.sh b/tools/testing/selftests/net/srv6_end_x_next_csid_l3vpn_test.sh index 4b86040c58c6..34b781a2ae74 100755 --- a/tools/testing/selftests/net/srv6_end_x_next_csid_l3vpn_test.sh +++ b/tools/testing/selftests/net/srv6_end_x_next_csid_l3vpn_test.sh @@ -72,6 +72,9 @@ # Every fcf0:0:x:y::/64 network interconnects the SRv6 routers rt-x with rt-y in # the selftest network. # +# In addition, every router interface connecting rt-x to rt-y is assigned an +# IPv6 link-local address fe80::x:y/64. +# # Local SID/C-SID table # ===================== # @@ -521,6 +524,9 @@ setup_rt_networking() ip -netns "${nsname}" addr \ add "${net_prefix}::${rt}/64" dev "${devname}" nodad + ip -netns "${nsname}" addr \ + add "fe80::${rt}:${neigh}/64" dev "${devname}" nodad + ip -netns "${nsname}" link set "${devname}" up done @@ -609,6 +615,27 @@ set_end_x_nextcsid() nflen "${LCNODEFUNC_BLEN}" dev "${DUMMY_DEVNAME}" } +set_end_x_ll_nextcsid() +{ + local rt="$1" + local adj="$2" + + eval nsname=\${$(get_rtname "${rt}")} + lcnode_func_prefix="$(build_lcnode_func_prefix "${rt}")" + nh6_ll_addr="fe80::${adj}:${rt}" + oifname="veth-rt-${rt}-${adj}" + + # enabled NEXT-C-SID SRv6 End.X behavior via an IPv6 link-local nexthop + # address (note that "dev" is the dummy dum0 device chosen for the sake + # of simplicity). + ip -netns "${nsname}" -6 route \ + replace "${lcnode_func_prefix}" \ + table "${LOCALSID_TABLE_ID}" \ + encap seg6local action End.X nh6 "${nh6_ll_addr}" \ + oif "${oifname}" flavors next-csid lblen "${LCBLOCK_BLEN}" \ + nflen "${LCNODEFUNC_BLEN}" dev "${DUMMY_DEVNAME}" +} + set_underlay_sids_reachability() { local rt="$1" @@ -654,7 +681,7 @@ setup_rt_local_sids() set_underlay_sids_reachability "${rt}" "${rt_neighs}" # all SIDs for VPNs start with a common locator. Routes and SRv6 - # Endpoint behavior instaces are grouped together in the 'localsid' + # Endpoint behavior instances are grouped together in the 'localsid' # table. ip -netns "${nsname}" -6 rule \ add to "${VPN_LOCATOR_SERVICE}::/16" \ @@ -1016,6 +1043,27 @@ host_vpn_tests() check_and_log_hs_ipv4_connectivity 1 2 check_and_log_hs_ipv4_connectivity 2 1 + + # Setup the adjacencies in the SRv6 aware routers using IPv6 link-local + # addresses. + # - rt-3 SRv6 End.X adjacency with rt-4 + # - rt-4 SRv6 End.X adjacency with rt-1 + set_end_x_ll_nextcsid 3 4 + set_end_x_ll_nextcsid 4 1 + + log_section "SRv6 VPN connectivity test hosts (h1 <-> h2, IPv6), link-local" + + check_and_log_hs_ipv6_connectivity 1 2 + check_and_log_hs_ipv6_connectivity 2 1 + + log_section "SRv6 VPN connectivity test hosts (h1 <-> h2, IPv4), link-local" + + check_and_log_hs_ipv4_connectivity 1 2 + check_and_log_hs_ipv4_connectivity 2 1 + + # Restore the previous adjacencies. + set_end_x_nextcsid 3 4 + set_end_x_nextcsid 4 1 } __nextcsid_end_x_behavior_test() diff --git a/tools/testing/selftests/net/srv6_hencap_red_l3vpn_test.sh b/tools/testing/selftests/net/srv6_hencap_red_l3vpn_test.sh index 3efce1718c5f..6a68c7eff1dc 100755 --- a/tools/testing/selftests/net/srv6_hencap_red_l3vpn_test.sh +++ b/tools/testing/selftests/net/srv6_hencap_red_l3vpn_test.sh @@ -395,7 +395,7 @@ setup_rt_local_sids() dev "${VRF_DEVNAME}" # all SIDs for VPNs start with a common locator. Routes and SRv6 - # Endpoint behavior instaces are grouped together in the 'localsid' + # Endpoint behavior instances are grouped together in the 'localsid' # table. ip -netns "${nsname}" -6 rule \ add to "${VPN_LOCATOR_SERVICE}::/16" \ diff --git a/tools/testing/selftests/net/srv6_hl2encap_red_l2vpn_test.sh b/tools/testing/selftests/net/srv6_hl2encap_red_l2vpn_test.sh index cabc70538ffe..0979b5316fdf 100755 --- a/tools/testing/selftests/net/srv6_hl2encap_red_l2vpn_test.sh +++ b/tools/testing/selftests/net/srv6_hl2encap_red_l2vpn_test.sh @@ -343,7 +343,7 @@ setup_rt_local_sids() encap seg6local action End dev "${DUMMY_DEVNAME}" # all SIDs for VPNs start with a common locator. Routes and SRv6 - # Endpoint behaviors instaces are grouped together in the 'localsid' + # Endpoint behaviors instances are grouped together in the 'localsid' # table. ip -netns "${nsname}" -6 rule add \ to "${VPN_LOCATOR_SERVICE}::/16" \ diff --git a/tools/testing/selftests/net/tcp_ao/seq-ext.c b/tools/testing/selftests/net/tcp_ao/seq-ext.c index f00245263b20..6478da6a71c3 100644 --- a/tools/testing/selftests/net/tcp_ao/seq-ext.c +++ b/tools/testing/selftests/net/tcp_ao/seq-ext.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 /* Check that after SEQ number wrap-around: * 1. SEQ-extension has upper bytes set - * 2. TCP conneciton is alive and no TCPAOBad segments + * 2. TCP connection is alive and no TCPAOBad segments * In order to test (2), the test doesn't just adjust seq number for a queue * on a connected socket, but migrates it to another sk+port number, so * that there won't be any delayed packets that will fail to verify diff --git a/tools/testing/selftests/net/test_neigh.sh b/tools/testing/selftests/net/test_neigh.sh new file mode 100755 index 000000000000..388056472b5b --- /dev/null +++ b/tools/testing/selftests/net/test_neigh.sh @@ -0,0 +1,366 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +source lib.sh +TESTS=" + extern_valid_ipv4 + extern_valid_ipv6 +" +VERBOSE=0 + +################################################################################ +# Utilities + +run_cmd() +{ + local cmd="$1" + local out + local stderr="2>/dev/null" + + if [ "$VERBOSE" = "1" ]; then + echo "COMMAND: $cmd" + stderr= + fi + + out=$(eval "$cmd" "$stderr") + rc=$? + if [ "$VERBOSE" -eq 1 ] && [ -n "$out" ]; then + echo " $out" + fi + + return $rc +} + +################################################################################ +# Setup + +setup() +{ + set -e + + setup_ns ns1 ns2 + + ip -n "$ns1" link add veth0 type veth peer name veth1 netns "$ns2" + ip -n "$ns1" link set dev veth0 up + ip -n "$ns2" link set dev veth1 up + + ip -n "$ns1" address add 192.0.2.1/24 dev veth0 + ip -n "$ns1" address add 2001:db8:1::1/64 dev veth0 nodad + ip -n "$ns2" address add 192.0.2.2/24 dev veth1 + ip -n "$ns2" address add 2001:db8:1::2/64 dev veth1 nodad + + ip netns exec "$ns1" sysctl -qw net.ipv6.conf.all.keep_addr_on_down=1 + ip netns exec "$ns2" sysctl -qw net.ipv6.conf.all.keep_addr_on_down=1 + + sleep 5 + + set +e +} + +exit_cleanup_all() +{ + cleanup_all_ns + exit "${EXIT_STATUS}" +} + +################################################################################ +# Tests + +extern_valid_common() +{ + local af_str=$1; shift + local ip_addr=$1; shift + local tbl_name=$1; shift + local subnet=$1; shift + local mac + + mac=$(ip -n "$ns2" -j link show dev veth1 | jq -r '.[]["address"]') + + RET=0 + + # Check that simple addition works. + run_cmd "ip -n $ns1 neigh add $ip_addr lladdr $mac nud stale dev veth0 extern_valid" + run_cmd "ip -n $ns1 neigh get $ip_addr dev veth0 | grep \"extern_valid\"" + check_err $? "No \"extern_valid\" flag after addition" + + log_test "$af_str \"extern_valid\" flag: Add entry" + + RET=0 + + # Check that an entry cannot be added with "extern_valid" flag and an + # invalid state. + run_cmd "ip -n $ns1 neigh flush dev veth0" + run_cmd "ip -n $ns1 neigh add $ip_addr nud none dev veth0 extern_valid" + check_fail $? "Managed to add an entry with \"extern_valid\" flag and an invalid state" + + log_test "$af_str \"extern_valid\" flag: Add with an invalid state" + + RET=0 + + # Check that entry cannot be added with both "extern_valid" flag and + # "use" / "managed" flag. + run_cmd "ip -n $ns1 neigh flush dev veth0" + run_cmd "ip -n $ns1 neigh add $ip_addr lladdr $mac nud stale dev veth0 extern_valid use" + check_fail $? "Managed to add an entry with \"extern_valid\" flag and \"use\" flag" + + log_test "$af_str \"extern_valid\" flag: Add with \"use\" flag" + + RET=0 + + # Check that "extern_valid" flag can be toggled using replace. + run_cmd "ip -n $ns1 neigh flush dev veth0" + run_cmd "ip -n $ns1 neigh add $ip_addr lladdr $mac nud stale dev veth0" + run_cmd "ip -n $ns1 neigh replace $ip_addr lladdr $mac nud stale dev veth0 extern_valid" + run_cmd "ip -n $ns1 neigh get $ip_addr dev veth0 | grep \"extern_valid\"" + check_err $? "Did not manage to set \"extern_valid\" flag with replace" + run_cmd "ip -n $ns1 neigh replace $ip_addr lladdr $mac nud stale dev veth0" + run_cmd "ip -n $ns1 neigh get $ip_addr dev veth0 | grep \"extern_valid\"" + check_fail $? "Did not manage to clear \"extern_valid\" flag with replace" + + log_test "$af_str \"extern_valid\" flag: Replace entry" + + RET=0 + + # Check that an existing "extern_valid" entry can be marked as + # "managed". + run_cmd "ip -n $ns1 neigh flush dev veth0" + run_cmd "ip -n $ns1 neigh add $ip_addr lladdr $mac nud stale dev veth0 extern_valid" + run_cmd "ip -n $ns1 neigh replace $ip_addr lladdr $mac nud stale dev veth0 extern_valid managed" + check_err $? "Did not manage to add \"managed\" flag to an existing \"extern_valid\" entry" + + log_test "$af_str \"extern_valid\" flag: Replace entry with \"managed\" flag" + + RET=0 + + # Check that entry cannot be replaced with "extern_valid" flag and an + # invalid state. + run_cmd "ip -n $ns1 neigh flush dev veth0" + run_cmd "ip -n $ns1 neigh add $ip_addr lladdr $mac nud stale dev veth0 extern_valid" + run_cmd "ip -n $ns1 neigh replace $ip_addr nud none dev veth0 extern_valid" + check_fail $? "Managed to replace an entry with \"extern_valid\" flag and an invalid state" + + log_test "$af_str \"extern_valid\" flag: Replace with an invalid state" + + RET=0 + + # Check that an "extern_valid" entry is flushed when the interface is + # put administratively down. + run_cmd "ip -n $ns1 neigh flush dev veth0" + run_cmd "ip -n $ns1 neigh add $ip_addr lladdr $mac nud stale dev veth0 extern_valid" + run_cmd "ip -n $ns1 link set dev veth0 down" + run_cmd "ip -n $ns1 link set dev veth0 up" + run_cmd "ip -n $ns1 neigh get $ip_addr dev veth0" + check_fail $? "\"extern_valid\" entry not flushed upon interface down" + + log_test "$af_str \"extern_valid\" flag: Interface down" + + RET=0 + + # Check that an "extern_valid" entry is not flushed when the interface + # loses its carrier. + run_cmd "ip -n $ns1 neigh flush dev veth0" + run_cmd "ip -n $ns1 neigh add $ip_addr lladdr $mac nud stale dev veth0 extern_valid" + run_cmd "ip -n $ns2 link set dev veth1 down" + run_cmd "ip -n $ns2 link set dev veth1 up" + run_cmd "sleep 2" + run_cmd "ip -n $ns1 neigh get $ip_addr dev veth0" + check_err $? "\"extern_valid\" entry flushed upon carrier down" + + log_test "$af_str \"extern_valid\" flag: Carrier down" + + RET=0 + + # Check that when entry transitions to "reachable" state it maintains + # the "extern_valid" flag. Wait "delay_probe" seconds for ARP request / + # NS to be sent. + local delay_probe + + delay_probe=$(ip -n "$ns1" -j ntable show dev veth0 name "$tbl_name" | jq '.[]["delay_probe"]') + run_cmd "ip -n $ns1 neigh flush dev veth0" + run_cmd "ip -n $ns1 neigh add $ip_addr lladdr $mac nud stale dev veth0 extern_valid" + run_cmd "ip -n $ns1 neigh replace $ip_addr lladdr $mac nud stale dev veth0 extern_valid use" + run_cmd "sleep $((delay_probe / 1000 + 2))" + run_cmd "ip -n $ns1 neigh get $ip_addr dev veth0 | grep \"REACHABLE\"" + check_err $? "Entry did not transition to \"reachable\" state" + run_cmd "ip -n $ns1 neigh get $ip_addr dev veth0 | grep \"extern_valid\"" + check_err $? "Entry did not maintain \"extern_valid\" flag after transition to \"reachable\" state" + + log_test "$af_str \"extern_valid\" flag: Transition to \"reachable\" state" + + RET=0 + + # Drop all packets, trigger resolution and check that entry goes back + # to "stale" state instead of "failed". + local mcast_reprobes + local retrans_time + local ucast_probes + local app_probes + local probes + local delay + + run_cmd "ip -n $ns1 neigh flush dev veth0" + run_cmd "tc -n $ns2 qdisc add dev veth1 clsact" + run_cmd "tc -n $ns2 filter add dev veth1 ingress proto all matchall action drop" + run_cmd "ip -n $ns1 neigh add $ip_addr lladdr $mac nud stale dev veth0 extern_valid" + run_cmd "ip -n $ns1 neigh replace $ip_addr lladdr $mac nud stale dev veth0 extern_valid use" + retrans_time=$(ip -n "$ns1" -j ntable show dev veth0 name "$tbl_name" | jq '.[]["retrans"]') + ucast_probes=$(ip -n "$ns1" -j ntable show dev veth0 name "$tbl_name" | jq '.[]["ucast_probes"]') + app_probes=$(ip -n "$ns1" -j ntable show dev veth0 name "$tbl_name" | jq '.[]["app_probes"]') + mcast_reprobes=$(ip -n "$ns1" -j ntable show dev veth0 name "$tbl_name" | jq '.[]["mcast_reprobes"]') + delay=$((delay_probe + (ucast_probes + app_probes + mcast_reprobes) * retrans_time)) + run_cmd "sleep $((delay / 1000 + 2))" + run_cmd "ip -n $ns1 neigh get $ip_addr dev veth0 | grep \"STALE\"" + check_err $? "Entry did not return to \"stale\" state" + run_cmd "ip -n $ns1 neigh get $ip_addr dev veth0 | grep \"extern_valid\"" + check_err $? "Entry did not maintain \"extern_valid\" flag after returning to \"stale\" state" + probes=$(ip -n "$ns1" -j -s neigh get "$ip_addr" dev veth0 | jq '.[]["probes"]') + if [[ $probes -eq 0 ]]; then + check_err 1 "No probes were sent" + fi + + log_test "$af_str \"extern_valid\" flag: Transition back to \"stale\" state" + + run_cmd "tc -n $ns2 qdisc del dev veth1 clsact" + + RET=0 + + # Forced garbage collection runs whenever the number of entries is + # larger than "thresh3" and deletes stale entries that have not been + # updated in the last 5 seconds. + # + # Check that an "extern_valid" entry survives a forced garbage + # collection. Add an entry, wait 5 seconds and add more entries than + # "thresh3" so that forced garbage collection will run. + # + # Note that the garbage collection thresholds are global resources and + # that changes in the initial namespace affect all the namespaces. + local forced_gc_runs_t0 + local forced_gc_runs_t1 + local orig_thresh1 + local orig_thresh2 + local orig_thresh3 + + run_cmd "ip -n $ns1 neigh flush dev veth0" + orig_thresh1=$(ip -j ntable show name "$tbl_name" | jq '.[] | select(has("thresh1")) | .["thresh1"]') + orig_thresh2=$(ip -j ntable show name "$tbl_name" | jq '.[] | select(has("thresh2")) | .["thresh2"]') + orig_thresh3=$(ip -j ntable show name "$tbl_name" | jq '.[] | select(has("thresh3")) | .["thresh3"]') + run_cmd "ip ntable change name $tbl_name thresh3 10 thresh2 9 thresh1 8" + run_cmd "ip -n $ns1 neigh add $ip_addr lladdr $mac nud stale dev veth0 extern_valid" + run_cmd "ip -n $ns1 neigh add ${subnet}3 lladdr $mac nud stale dev veth0" + run_cmd "sleep 5" + forced_gc_runs_t0=$(ip -j -s ntable show name "$tbl_name" | jq '.[] | select(has("forced_gc_runs")) | .["forced_gc_runs"]') + for i in {1..20}; do + run_cmd "ip -n $ns1 neigh add ${subnet}$((i + 4)) nud none dev veth0" + done + forced_gc_runs_t1=$(ip -j -s ntable show name "$tbl_name" | jq '.[] | select(has("forced_gc_runs")) | .["forced_gc_runs"]') + if [[ $forced_gc_runs_t1 -eq $forced_gc_runs_t0 ]]; then + check_err 1 "Forced garbage collection did not run" + fi + run_cmd "ip -n $ns1 neigh get $ip_addr dev veth0 | grep \"extern_valid\"" + check_err $? "Entry with \"extern_valid\" flag did not survive forced garbage collection" + run_cmd "ip -n $ns1 neigh get ${subnet}3 dev veth0" + check_fail $? "Entry without \"extern_valid\" flag survived forced garbage collection" + + log_test "$af_str \"extern_valid\" flag: Forced garbage collection" + + run_cmd "ip ntable change name $tbl_name thresh3 $orig_thresh3 thresh2 $orig_thresh2 thresh1 $orig_thresh1" + + RET=0 + + # Periodic garbage collection runs every "base_reachable"/2 seconds and + # if the number of entries is larger than "thresh1", then it deletes + # stale entries that have not been used in the last "gc_stale" seconds. + # + # Check that an "extern_valid" entry survives a periodic garbage + # collection. Add an "extern_valid" entry, add more than "thresh1" + # regular entries, wait "base_reachable" (longer than "gc_stale") + # seconds and check that the "extern_valid" entry was not deleted. + # + # Note that the garbage collection thresholds and "base_reachable" are + # global resources and that changes in the initial namespace affect all + # the namespaces. + local periodic_gc_runs_t0 + local periodic_gc_runs_t1 + local orig_base_reachable + local orig_gc_stale + + run_cmd "ip -n $ns1 neigh flush dev veth0" + orig_thresh1=$(ip -j ntable show name "$tbl_name" | jq '.[] | select(has("thresh1")) | .["thresh1"]') + orig_base_reachable=$(ip -j ntable show name "$tbl_name" | jq '.[] | select(has("thresh1")) | .["base_reachable"]') + run_cmd "ip ntable change name $tbl_name thresh1 10 base_reachable 10000" + orig_gc_stale=$(ip -n "$ns1" -j ntable show name "$tbl_name" dev veth0 | jq '.[]["gc_stale"]') + run_cmd "ip -n $ns1 ntable change name $tbl_name dev veth0 gc_stale 5000" + # Wait orig_base_reachable/2 for the new interval to take effect. + run_cmd "sleep $(((orig_base_reachable / 1000) / 2 + 2))" + run_cmd "ip -n $ns1 neigh add $ip_addr lladdr $mac nud stale dev veth0 extern_valid" + run_cmd "ip -n $ns1 neigh add ${subnet}3 lladdr $mac nud stale dev veth0" + for i in {1..20}; do + run_cmd "ip -n $ns1 neigh add ${subnet}$((i + 4)) nud none dev veth0" + done + periodic_gc_runs_t0=$(ip -j -s ntable show name "$tbl_name" | jq '.[] | select(has("periodic_gc_runs")) | .["periodic_gc_runs"]') + run_cmd "sleep 10" + periodic_gc_runs_t1=$(ip -j -s ntable show name "$tbl_name" | jq '.[] | select(has("periodic_gc_runs")) | .["periodic_gc_runs"]') + [[ $periodic_gc_runs_t1 -ne $periodic_gc_runs_t0 ]] + check_err $? "Periodic garbage collection did not run" + run_cmd "ip -n $ns1 neigh get $ip_addr dev veth0 | grep \"extern_valid\"" + check_err $? "Entry with \"extern_valid\" flag did not survive periodic garbage collection" + run_cmd "ip -n $ns1 neigh get ${subnet}3 dev veth0" + check_fail $? "Entry without \"extern_valid\" flag survived periodic garbage collection" + + log_test "$af_str \"extern_valid\" flag: Periodic garbage collection" + + run_cmd "ip -n $ns1 ntable change name $tbl_name dev veth0 gc_stale $orig_gc_stale" + run_cmd "ip ntable change name $tbl_name thresh1 $orig_thresh1 base_reachable $orig_base_reachable" +} + +extern_valid_ipv4() +{ + extern_valid_common "IPv4" 192.0.2.2 "arp_cache" 192.0.2. +} + +extern_valid_ipv6() +{ + extern_valid_common "IPv6" 2001:db8:1::2 "ndisc_cache" 2001:db8:1:: +} + +################################################################################ +# Usage + +usage() +{ + cat <<EOF +usage: ${0##*/} OPTS + + -t <test> Test(s) to run (default: all) + (options: $TESTS) + -p Pause on fail + -v Verbose mode (show commands and output) +EOF +} + +################################################################################ +# Main + +while getopts ":t:pvh" opt; do + case $opt in + t) TESTS=$OPTARG;; + p) PAUSE_ON_FAIL=yes;; + v) VERBOSE=$((VERBOSE + 1));; + h) usage; exit 0;; + *) usage; exit 1;; + esac +done + +require_command jq + +if ! ip neigh help 2>&1 | grep -q "extern_valid"; then + echo "SKIP: iproute2 ip too old, missing \"extern_valid\" support" + exit "$ksft_skip" +fi + +trap exit_cleanup_all EXIT + +for t in $TESTS +do + setup; $t; cleanup_all_ns; +done diff --git a/tools/testing/selftests/net/test_vxlan_vnifiltering.sh b/tools/testing/selftests/net/test_vxlan_vnifiltering.sh index 6127a78ee988..8deacc565afa 100755 --- a/tools/testing/selftests/net/test_vxlan_vnifiltering.sh +++ b/tools/testing/selftests/net/test_vxlan_vnifiltering.sh @@ -146,18 +146,17 @@ run_cmd() } check_hv_connectivity() { - ip netns exec $hv_1 ping -c 1 -W 1 $1 &>/dev/null - sleep 1 - ip netns exec $hv_1 ping -c 1 -W 1 $2 &>/dev/null + slowwait 5 ip netns exec $hv_1 ping -c 1 -W 1 $1 &>/dev/null + slowwait 5 ip netns exec $hv_1 ping -c 1 -W 1 $2 &>/dev/null return $? } check_vm_connectivity() { - run_cmd "ip netns exec $vm_11 ping -c 1 -W 1 10.0.10.12" + slowwait 5 run_cmd "ip netns exec $vm_11 ping -c 1 -W 1 10.0.10.12" log_test $? 0 "VM connectivity over $1 (ipv4 default rdst)" - run_cmd "ip netns exec $vm_21 ping -c 1 -W 1 10.0.10.22" + slowwait 5 run_cmd "ip netns exec $vm_21 ping -c 1 -W 1 10.0.10.22" log_test $? 0 "VM connectivity over $1 (ipv6 default rdst)" } diff --git a/tools/testing/selftests/net/tfo.c b/tools/testing/selftests/net/tfo.c new file mode 100644 index 000000000000..eb3cac5e583c --- /dev/null +++ b/tools/testing/selftests/net/tfo.c @@ -0,0 +1,171 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <error.h> +#include <fcntl.h> +#include <limits.h> +#include <stdbool.h> +#include <stdint.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#include <arpa/inet.h> +#include <sys/socket.h> +#include <netinet/tcp.h> +#include <errno.h> + +static int cfg_server; +static int cfg_client; +static int cfg_port = 8000; +static struct sockaddr_in6 cfg_addr; +static char *cfg_outfile; + +static int parse_address(const char *str, int port, struct sockaddr_in6 *sin6) +{ + int ret; + + sin6->sin6_family = AF_INET6; + sin6->sin6_port = htons(port); + + ret = inet_pton(sin6->sin6_family, str, &sin6->sin6_addr); + if (ret != 1) { + /* fallback to plain IPv4 */ + ret = inet_pton(AF_INET, str, &sin6->sin6_addr.s6_addr32[3]); + if (ret != 1) + return -1; + + /* add ::ffff prefix */ + sin6->sin6_addr.s6_addr32[0] = 0; + sin6->sin6_addr.s6_addr32[1] = 0; + sin6->sin6_addr.s6_addr16[4] = 0; + sin6->sin6_addr.s6_addr16[5] = 0xffff; + } + + return 0; +} + +static void run_server(void) +{ + unsigned long qlen = 32; + int fd, opt, connfd; + socklen_t len; + char buf[64]; + FILE *outfile; + + outfile = fopen(cfg_outfile, "w"); + if (!outfile) + error(1, errno, "fopen() outfile"); + + fd = socket(AF_INET6, SOCK_STREAM, 0); + if (fd == -1) + error(1, errno, "socket()"); + + opt = 1; + if (setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &opt, sizeof(opt)) < 0) + error(1, errno, "setsockopt(SO_REUSEADDR)"); + + if (setsockopt(fd, SOL_TCP, TCP_FASTOPEN, &qlen, sizeof(qlen)) < 0) + error(1, errno, "setsockopt(TCP_FASTOPEN)"); + + if (bind(fd, (struct sockaddr *)&cfg_addr, sizeof(cfg_addr)) < 0) + error(1, errno, "bind()"); + + if (listen(fd, 5) < 0) + error(1, errno, "listen()"); + + len = sizeof(cfg_addr); + connfd = accept(fd, (struct sockaddr *)&cfg_addr, &len); + if (connfd < 0) + error(1, errno, "accept()"); + + len = sizeof(opt); + if (getsockopt(connfd, SOL_SOCKET, SO_INCOMING_NAPI_ID, &opt, &len) < 0) + error(1, errno, "getsockopt(SO_INCOMING_NAPI_ID)"); + + read(connfd, buf, 64); + fprintf(outfile, "%d\n", opt); + + fclose(outfile); + close(connfd); + close(fd); +} + +static void run_client(void) +{ + int fd; + char *msg = "Hello, world!"; + + fd = socket(AF_INET6, SOCK_STREAM, 0); + if (fd == -1) + error(1, errno, "socket()"); + + sendto(fd, msg, strlen(msg), MSG_FASTOPEN, (struct sockaddr *)&cfg_addr, sizeof(cfg_addr)); + + close(fd); +} + +static void usage(const char *filepath) +{ + error(1, 0, "Usage: %s (-s|-c) -h<server_ip> -p<port> -o<outfile> ", filepath); +} + +static void parse_opts(int argc, char **argv) +{ + struct sockaddr_in6 *addr6 = (void *) &cfg_addr; + char *addr = NULL; + int ret; + int c; + + if (argc <= 1) + usage(argv[0]); + + while ((c = getopt(argc, argv, "sch:p:o:")) != -1) { + switch (c) { + case 's': + if (cfg_client) + error(1, 0, "Pass one of -s or -c"); + cfg_server = 1; + break; + case 'c': + if (cfg_server) + error(1, 0, "Pass one of -s or -c"); + cfg_client = 1; + break; + case 'h': + addr = optarg; + break; + case 'p': + cfg_port = strtoul(optarg, NULL, 0); + break; + case 'o': + cfg_outfile = strdup(optarg); + if (!cfg_outfile) + error(1, 0, "outfile invalid"); + break; + } + } + + if (cfg_server && addr) + error(1, 0, "Server cannot have -h specified"); + + memset(addr6, 0, sizeof(*addr6)); + addr6->sin6_family = AF_INET6; + addr6->sin6_port = htons(cfg_port); + addr6->sin6_addr = in6addr_any; + if (addr) { + ret = parse_address(addr, cfg_port, addr6); + if (ret) + error(1, 0, "Client address parse error: %s", addr); + } +} + +int main(int argc, char **argv) +{ + parse_opts(argc, argv); + + if (cfg_server) + run_server(); + else if (cfg_client) + run_client(); + + return 0; +} diff --git a/tools/testing/selftests/net/tfo_passive.sh b/tools/testing/selftests/net/tfo_passive.sh new file mode 100755 index 000000000000..80bf11fdc046 --- /dev/null +++ b/tools/testing/selftests/net/tfo_passive.sh @@ -0,0 +1,112 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +source lib.sh + +NSIM_SV_ID=$((256 + RANDOM % 256)) +NSIM_SV_SYS=/sys/bus/netdevsim/devices/netdevsim$NSIM_SV_ID +NSIM_CL_ID=$((512 + RANDOM % 256)) +NSIM_CL_SYS=/sys/bus/netdevsim/devices/netdevsim$NSIM_CL_ID + +NSIM_DEV_SYS_NEW=/sys/bus/netdevsim/new_device +NSIM_DEV_SYS_DEL=/sys/bus/netdevsim/del_device +NSIM_DEV_SYS_LINK=/sys/bus/netdevsim/link_device +NSIM_DEV_SYS_UNLINK=/sys/bus/netdevsim/unlink_device + +SERVER_IP=192.168.1.1 +CLIENT_IP=192.168.1.2 +SERVER_PORT=48675 + +setup_ns() +{ + set -e + ip netns add nssv + ip netns add nscl + + NSIM_SV_NAME=$(find $NSIM_SV_SYS/net -maxdepth 1 -type d ! \ + -path $NSIM_SV_SYS/net -exec basename {} \;) + NSIM_CL_NAME=$(find $NSIM_CL_SYS/net -maxdepth 1 -type d ! \ + -path $NSIM_CL_SYS/net -exec basename {} \;) + + ip link set $NSIM_SV_NAME netns nssv + ip link set $NSIM_CL_NAME netns nscl + + ip netns exec nssv ip addr add "${SERVER_IP}/24" dev $NSIM_SV_NAME + ip netns exec nscl ip addr add "${CLIENT_IP}/24" dev $NSIM_CL_NAME + + ip netns exec nssv ip link set dev $NSIM_SV_NAME up + ip netns exec nscl ip link set dev $NSIM_CL_NAME up + + # Enable passive TFO + ip netns exec nssv sysctl -w net.ipv4.tcp_fastopen=519 > /dev/null + + set +e +} + +cleanup_ns() +{ + ip netns del nscl + ip netns del nssv +} + +### +### Code start +### + +modprobe netdevsim + +# linking + +echo $NSIM_SV_ID > $NSIM_DEV_SYS_NEW +echo $NSIM_CL_ID > $NSIM_DEV_SYS_NEW +udevadm settle + +setup_ns + +NSIM_SV_FD=$((256 + RANDOM % 256)) +exec {NSIM_SV_FD}</var/run/netns/nssv +NSIM_SV_IFIDX=$(ip netns exec nssv cat /sys/class/net/$NSIM_SV_NAME/ifindex) + +NSIM_CL_FD=$((256 + RANDOM % 256)) +exec {NSIM_CL_FD}</var/run/netns/nscl +NSIM_CL_IFIDX=$(ip netns exec nscl cat /sys/class/net/$NSIM_CL_NAME/ifindex) + +echo "$NSIM_SV_FD:$NSIM_SV_IFIDX $NSIM_CL_FD:$NSIM_CL_IFIDX" > \ + $NSIM_DEV_SYS_LINK + +if [ $? -ne 0 ]; then + echo "linking netdevsim1 with netdevsim2 should succeed" + cleanup_ns + exit 1 +fi + +out_file=$(mktemp) + +timeout -k 1s 30s ip netns exec nssv ./tfo \ + -s \ + -p ${SERVER_PORT} \ + -o ${out_file}& + +wait_local_port_listen nssv ${SERVER_PORT} tcp + +ip netns exec nscl ./tfo -c -h ${SERVER_IP} -p ${SERVER_PORT} + +wait + +res=$(cat $out_file) +rm $out_file + +if [ $res -eq 0 ]; then + echo "got invalid NAPI ID from passive TFO socket" + cleanup_ns + exit 1 +fi + +echo "$NSIM_SV_FD:$NSIM_SV_IFIDX" > $NSIM_DEV_SYS_UNLINK + +echo $NSIM_CL_ID > $NSIM_DEV_SYS_DEL + +cleanup_ns + +modprobe -r netdevsim + +exit 0 diff --git a/tools/testing/selftests/net/udpgro.sh b/tools/testing/selftests/net/udpgro.sh index 1dc337c709f8..b17e032a6d75 100755 --- a/tools/testing/selftests/net/udpgro.sh +++ b/tools/testing/selftests/net/udpgro.sh @@ -48,7 +48,7 @@ run_one() { cfg_veth - ip netns exec "${PEER_NS}" ./udpgso_bench_rx -C 1000 -R 10 ${rx_args} & + ip netns exec "${PEER_NS}" ./udpgso_bench_rx -C 1000 -R 100 ${rx_args} & local PID1=$! wait_local_port_listen ${PEER_NS} 8000 udp @@ -95,7 +95,7 @@ run_one_nat() { # will land on the 'plain' one ip netns exec "${PEER_NS}" ./udpgso_bench_rx -G ${family} -b ${addr1} -n 0 & local PID1=$! - ip netns exec "${PEER_NS}" ./udpgso_bench_rx -C 1000 -R 10 ${family} -b ${addr2%/*} ${rx_args} & + ip netns exec "${PEER_NS}" ./udpgso_bench_rx -C 1000 -R 100 ${family} -b ${addr2%/*} ${rx_args} & local PID2=$! wait_local_port_listen "${PEER_NS}" 8000 udp @@ -117,9 +117,9 @@ run_one_2sock() { cfg_veth - ip netns exec "${PEER_NS}" ./udpgso_bench_rx -C 1000 -R 10 ${rx_args} -p 12345 & + ip netns exec "${PEER_NS}" ./udpgso_bench_rx -C 1000 -R 100 ${rx_args} -p 12345 & local PID1=$! - ip netns exec "${PEER_NS}" ./udpgso_bench_rx -C 2000 -R 10 ${rx_args} & + ip netns exec "${PEER_NS}" ./udpgso_bench_rx -C 2000 -R 100 ${rx_args} & local PID2=$! wait_local_port_listen "${PEER_NS}" 12345 udp diff --git a/tools/testing/selftests/net/vlan_hw_filter.sh b/tools/testing/selftests/net/vlan_hw_filter.sh index 7bc804ffaf7c..0fb56baf28e4 100755 --- a/tools/testing/selftests/net/vlan_hw_filter.sh +++ b/tools/testing/selftests/net/vlan_hw_filter.sh @@ -3,27 +3,101 @@ readonly NETNS="ns-$(mktemp -u XXXXXX)" +ALL_TESTS=" + test_vlan_filter_check + test_vlan0_del_crash_01 + test_vlan0_del_crash_02 + test_vlan0_del_crash_03 + test_vid0_memleak +" + ret=0 +setup() { + ip netns add ${NETNS} +} + cleanup() { - ip netns del $NETNS + ip netns del $NETNS 2>/dev/null } trap cleanup EXIT fail() { - echo "ERROR: ${1:-unexpected return code} (ret: $_)" >&2 - ret=1 + echo "ERROR: ${1:-unexpected return code} (ret: $_)" >&2 + ret=1 +} + +tests_run() +{ + local current_test + for current_test in ${TESTS:-$ALL_TESTS}; do + $current_test + done +} + +test_vlan_filter_check() { + setup + ip netns exec ${NETNS} ip link add bond0 type bond mode 0 + ip netns exec ${NETNS} ip link add bond_slave_1 type veth peer veth2 + ip netns exec ${NETNS} ip link set bond_slave_1 master bond0 + ip netns exec ${NETNS} ethtool -K bond0 rx-vlan-filter off + ip netns exec ${NETNS} ip link add link bond_slave_1 name bond_slave_1.0 type vlan id 0 + ip netns exec ${NETNS} ip link add link bond0 name bond0.0 type vlan id 0 + ip netns exec ${NETNS} ip link set bond_slave_1 nomaster + ip netns exec ${NETNS} ip link del veth2 || fail "Please check vlan HW filter function" + cleanup } -ip netns add ${NETNS} -ip netns exec ${NETNS} ip link add bond0 type bond mode 0 -ip netns exec ${NETNS} ip link add bond_slave_1 type veth peer veth2 -ip netns exec ${NETNS} ip link set bond_slave_1 master bond0 -ip netns exec ${NETNS} ethtool -K bond0 rx-vlan-filter off -ip netns exec ${NETNS} ip link add link bond_slave_1 name bond_slave_1.0 type vlan id 0 -ip netns exec ${NETNS} ip link add link bond0 name bond0.0 type vlan id 0 -ip netns exec ${NETNS} ip link set bond_slave_1 nomaster -ip netns exec ${NETNS} ip link del veth2 || fail "Please check vlan HW filter function" +#enable vlan_filter feature of real_dev with vlan0 during running time +test_vlan0_del_crash_01() { + setup + ip netns exec ${NETNS} ip link add bond0 type bond mode 0 + ip netns exec ${NETNS} ip link add link bond0 name vlan0 type vlan id 0 protocol 802.1q + ip netns exec ${NETNS} ethtool -K bond0 rx-vlan-filter off + ip netns exec ${NETNS} ifconfig bond0 up + ip netns exec ${NETNS} ethtool -K bond0 rx-vlan-filter on + ip netns exec ${NETNS} ifconfig bond0 down + ip netns exec ${NETNS} ifconfig bond0 up + ip netns exec ${NETNS} ip link del vlan0 || fail "Please check vlan HW filter function" + cleanup +} + +#enable vlan_filter feature and add vlan0 for real_dev during running time +test_vlan0_del_crash_02() { + setup + ip netns exec ${NETNS} ip link add bond0 type bond mode 0 + ip netns exec ${NETNS} ethtool -K bond0 rx-vlan-filter off + ip netns exec ${NETNS} ifconfig bond0 up + ip netns exec ${NETNS} ethtool -K bond0 rx-vlan-filter on + ip netns exec ${NETNS} ip link add link bond0 name vlan0 type vlan id 0 protocol 802.1q + ip netns exec ${NETNS} ifconfig bond0 down + ip netns exec ${NETNS} ifconfig bond0 up + ip netns exec ${NETNS} ip link del vlan0 || fail "Please check vlan HW filter function" + cleanup +} + +#enable vlan_filter feature of real_dev during running time +#test kernel_bug of vlan unregister +test_vlan0_del_crash_03() { + setup + ip netns exec ${NETNS} ip link add bond0 type bond mode 0 + ip netns exec ${NETNS} ip link add link bond0 name vlan0 type vlan id 0 protocol 802.1q + ip netns exec ${NETNS} ethtool -K bond0 rx-vlan-filter off + ip netns exec ${NETNS} ifconfig bond0 up + ip netns exec ${NETNS} ethtool -K bond0 rx-vlan-filter on + ip netns exec ${NETNS} ifconfig bond0 down + ip netns exec ${NETNS} ip link del vlan0 || fail "Please check vlan HW filter function" + cleanup +} + +test_vid0_memleak() { + setup + ip netns exec ${NETNS} ip link add bond0 up type bond mode 0 + ip netns exec ${NETNS} ethtool -K bond0 rx-vlan-filter off + ip netns exec ${NETNS} ip link del dev bond0 || fail "Please check vlan HW filter function" + cleanup +} +tests_run exit $ret diff --git a/tools/testing/selftests/net/vrf_route_leaking.sh b/tools/testing/selftests/net/vrf_route_leaking.sh index e9c2f71da207..ce34cb2e6e0b 100755 --- a/tools/testing/selftests/net/vrf_route_leaking.sh +++ b/tools/testing/selftests/net/vrf_route_leaking.sh @@ -275,7 +275,7 @@ setup_sym() # Wait for ip config to settle - sleep 2 + slowwait 5 ip netns exec $h1 "${ping6}" -c1 -w1 ${H2_N2_IP6} >/dev/null 2>&1 } setup_asym() @@ -370,7 +370,7 @@ setup_asym() ip -netns $r2 -6 addr add dev eth1 ${R2_N2_IP6}/64 nodad # Wait for ip config to settle - sleep 2 + slowwait 5 ip netns exec $h1 "${ping6}" -c1 -w1 ${H2_N2_IP6} >/dev/null 2>&1 } check_connectivity() diff --git a/tools/testing/selftests/nolibc/Makefile b/tools/testing/selftests/nolibc/Makefile index 94176ffe4646..40f5c2908dda 100644 --- a/tools/testing/selftests/nolibc/Makefile +++ b/tools/testing/selftests/nolibc/Makefile @@ -1,341 +1,26 @@ # SPDX-License-Identifier: GPL-2.0 -# Makefile for nolibc tests -# we're in ".../tools/testing/selftests/nolibc" -ifeq ($(srctree),) -srctree := $(patsubst %/tools/testing/selftests/,%,$(dir $(CURDIR))) -endif - -include $(srctree)/tools/scripts/utilities.mak -# We need this for the "__cc-option" macro. -include $(srctree)/scripts/Makefile.compiler - -ifneq ($(O),) -ifneq ($(call is-absolute,$(O)),y) -$(error Only absolute O= parameters are supported) -endif -objtree := $(O) -else -objtree ?= $(srctree) -endif - -ifeq ($(ARCH),) -include $(srctree)/scripts/subarch.include -ARCH = $(SUBARCH) -endif - -cc-option = $(call __cc-option, $(CC),$(CLANG_CROSS_FLAGS),$(1),$(2)) - -# XARCH extends the kernel's ARCH with a few variants of the same -# architecture that only differ by the configuration, the toolchain -# and the Qemu program used. It is copied as-is into ARCH except for -# a few specific values which are mapped like this: -# -# XARCH | ARCH | config -# -------------|-----------|------------------------- -# ppc | powerpc | 32 bits -# ppc64 | powerpc | 64 bits big endian -# ppc64le | powerpc | 64 bits little endian -# -# It is recommended to only use XARCH, though it does not harm if -# ARCH is already set. For simplicity, ARCH is sufficient for all -# architectures where both are equal. - -# configure default variants for target kernel supported architectures -XARCH_powerpc = ppc -XARCH_mips = mips32le -XARCH_riscv = riscv64 -XARCH = $(or $(XARCH_$(ARCH)),$(ARCH)) - -# map from user input variants to their kernel supported architectures -ARCH_armthumb = arm -ARCH_ppc = powerpc -ARCH_ppc64 = powerpc -ARCH_ppc64le = powerpc -ARCH_mips32le = mips -ARCH_mips32be = mips -ARCH_riscv32 = riscv -ARCH_riscv64 = riscv -ARCH_s390x = s390 -ARCH_sparc32 = sparc -ARCH_sparc64 = sparc -ARCH := $(or $(ARCH_$(XARCH)),$(XARCH)) -# kernel image names by architecture -IMAGE_i386 = arch/x86/boot/bzImage -IMAGE_x86_64 = arch/x86/boot/bzImage -IMAGE_x86 = arch/x86/boot/bzImage -IMAGE_arm64 = arch/arm64/boot/Image -IMAGE_arm = arch/arm/boot/zImage -IMAGE_armthumb = arch/arm/boot/zImage -IMAGE_mips32le = vmlinuz -IMAGE_mips32be = vmlinuz -IMAGE_ppc = vmlinux -IMAGE_ppc64 = vmlinux -IMAGE_ppc64le = arch/powerpc/boot/zImage -IMAGE_riscv = arch/riscv/boot/Image -IMAGE_riscv32 = arch/riscv/boot/Image -IMAGE_riscv64 = arch/riscv/boot/Image -IMAGE_s390x = arch/s390/boot/bzImage -IMAGE_s390 = arch/s390/boot/bzImage -IMAGE_loongarch = arch/loongarch/boot/vmlinuz.efi -IMAGE_sparc32 = arch/sparc/boot/image -IMAGE_sparc64 = arch/sparc/boot/image -IMAGE_m68k = vmlinux -IMAGE = $(objtree)/$(IMAGE_$(XARCH)) -IMAGE_NAME = $(notdir $(IMAGE)) +TEST_GEN_PROGS := nolibc-test -# default kernel configurations that appear to be usable -DEFCONFIG_i386 = defconfig -DEFCONFIG_x86_64 = defconfig -DEFCONFIG_x86 = defconfig -DEFCONFIG_arm64 = defconfig -DEFCONFIG_arm = multi_v7_defconfig -DEFCONFIG_armthumb = multi_v7_defconfig -DEFCONFIG_mips32le = malta_defconfig -DEFCONFIG_mips32be = malta_defconfig generic/eb.config -DEFCONFIG_ppc = pmac32_defconfig -DEFCONFIG_ppc64 = powernv_be_defconfig -DEFCONFIG_ppc64le = powernv_defconfig -DEFCONFIG_riscv = defconfig -DEFCONFIG_riscv32 = rv32_defconfig -DEFCONFIG_riscv64 = defconfig -DEFCONFIG_s390x = defconfig -DEFCONFIG_s390 = defconfig compat.config -DEFCONFIG_loongarch = defconfig -DEFCONFIG_sparc32 = sparc32_defconfig -DEFCONFIG_sparc64 = sparc64_defconfig -DEFCONFIG_m68k = virt_defconfig -DEFCONFIG = $(DEFCONFIG_$(XARCH)) +include ../lib.mk +include $(top_srcdir)/scripts/Makefile.compiler -EXTRACONFIG_m68k = -e CONFIG_BLK_DEV_INITRD -EXTRACONFIG = $(EXTRACONFIG_$(XARCH)) -EXTRACONFIG_arm = -e CONFIG_NAMESPACES -EXTRACONFIG_armthumb = -e CONFIG_NAMESPACES - -# optional tests to run (default = all) -TEST = - -# QEMU_ARCH: arch names used by qemu -QEMU_ARCH_i386 = i386 -QEMU_ARCH_x86_64 = x86_64 -QEMU_ARCH_x86 = x86_64 -QEMU_ARCH_arm64 = aarch64 -QEMU_ARCH_arm = arm -QEMU_ARCH_armthumb = arm -QEMU_ARCH_mips32le = mipsel # works with malta_defconfig -QEMU_ARCH_mips32be = mips -QEMU_ARCH_ppc = ppc -QEMU_ARCH_ppc64 = ppc64 -QEMU_ARCH_ppc64le = ppc64 -QEMU_ARCH_riscv = riscv64 -QEMU_ARCH_riscv32 = riscv32 -QEMU_ARCH_riscv64 = riscv64 -QEMU_ARCH_s390x = s390x -QEMU_ARCH_s390 = s390x -QEMU_ARCH_loongarch = loongarch64 -QEMU_ARCH_sparc32 = sparc -QEMU_ARCH_sparc64 = sparc64 -QEMU_ARCH_m68k = m68k -QEMU_ARCH = $(QEMU_ARCH_$(XARCH)) - -QEMU_ARCH_USER_ppc64le = ppc64le -QEMU_ARCH_USER = $(or $(QEMU_ARCH_USER_$(XARCH)),$(QEMU_ARCH_$(XARCH))) - -QEMU_BIOS_DIR = /usr/share/edk2/ -QEMU_BIOS_loongarch = $(QEMU_BIOS_DIR)/loongarch64/OVMF_CODE.fd - -ifneq ($(QEMU_BIOS_$(XARCH)),) -QEMU_ARGS_BIOS = -bios $(QEMU_BIOS_$(XARCH)) -endif - -# QEMU_ARGS : some arch-specific args to pass to qemu -QEMU_ARGS_i386 = -M pc -append "console=ttyS0,9600 i8042.noaux panic=-1 $(TEST:%=NOLIBC_TEST=%)" -QEMU_ARGS_x86_64 = -M pc -append "console=ttyS0,9600 i8042.noaux panic=-1 $(TEST:%=NOLIBC_TEST=%)" -QEMU_ARGS_x86 = -M pc -append "console=ttyS0,9600 i8042.noaux panic=-1 $(TEST:%=NOLIBC_TEST=%)" -QEMU_ARGS_arm64 = -M virt -cpu cortex-a53 -append "panic=-1 $(TEST:%=NOLIBC_TEST=%)" -QEMU_ARGS_arm = -M virt -append "panic=-1 $(TEST:%=NOLIBC_TEST=%)" -QEMU_ARGS_armthumb = -M virt -append "panic=-1 $(TEST:%=NOLIBC_TEST=%)" -QEMU_ARGS_mips32le = -M malta -append "panic=-1 $(TEST:%=NOLIBC_TEST=%)" -QEMU_ARGS_mips32be = -M malta -append "panic=-1 $(TEST:%=NOLIBC_TEST=%)" -QEMU_ARGS_ppc = -M g3beige -append "console=ttyS0 panic=-1 $(TEST:%=NOLIBC_TEST=%)" -QEMU_ARGS_ppc64 = -M powernv -append "console=hvc0 panic=-1 $(TEST:%=NOLIBC_TEST=%)" -QEMU_ARGS_ppc64le = -M powernv -append "console=hvc0 panic=-1 $(TEST:%=NOLIBC_TEST=%)" -QEMU_ARGS_riscv = -M virt -append "console=ttyS0 panic=-1 $(TEST:%=NOLIBC_TEST=%)" -QEMU_ARGS_riscv32 = -M virt -append "console=ttyS0 panic=-1 $(TEST:%=NOLIBC_TEST=%)" -QEMU_ARGS_riscv64 = -M virt -append "console=ttyS0 panic=-1 $(TEST:%=NOLIBC_TEST=%)" -QEMU_ARGS_s390x = -M s390-ccw-virtio -append "console=ttyS0 panic=-1 $(TEST:%=NOLIBC_TEST=%)" -QEMU_ARGS_s390 = -M s390-ccw-virtio -append "console=ttyS0 panic=-1 $(TEST:%=NOLIBC_TEST=%)" -QEMU_ARGS_loongarch = -M virt -append "console=ttyS0,115200 panic=-1 $(TEST:%=NOLIBC_TEST=%)" -QEMU_ARGS_sparc32 = -M SS-5 -m 256M -append "console=ttyS0,115200 panic=-1 $(TEST:%=NOLIBC_TEST=%)" -QEMU_ARGS_sparc64 = -M sun4u -append "console=ttyS0,115200 panic=-1 $(TEST:%=NOLIBC_TEST=%)" -QEMU_ARGS_m68k = -M virt -append "console=ttyGF0,115200 panic=-1 $(TEST:%=NOLIBC_TEST=%)" -QEMU_ARGS = -m 1G $(QEMU_ARGS_$(XARCH)) $(QEMU_ARGS_BIOS) $(QEMU_ARGS_EXTRA) - -# OUTPUT is only set when run from the main makefile, otherwise -# it defaults to this nolibc directory. -OUTPUT ?= $(CURDIR)/ - -ifeq ($(V),1) -Q= -else -Q=@ -endif +cc-option = $(call __cc-option, $(CC),,$(1),$(2)) -CFLAGS_i386 = $(call cc-option,-m32) -CFLAGS_arm = -marm -CFLAGS_armthumb = -mthumb -march=armv6t2 -CFLAGS_ppc = -m32 -mbig-endian -mno-vsx $(call cc-option,-mmultiple) -CFLAGS_ppc64 = -m64 -mbig-endian -mno-vsx $(call cc-option,-mmultiple) -CFLAGS_ppc64le = -m64 -mlittle-endian -mno-vsx $(call cc-option,-mabi=elfv2) -CFLAGS_s390x = -m64 -CFLAGS_s390 = -m31 -CFLAGS_mips32le = -EL -mabi=32 -fPIC -CFLAGS_mips32be = -EB -mabi=32 -CFLAGS_sparc32 = $(call cc-option,-m32) -ifeq ($(origin XARCH),command line) -CFLAGS_XARCH = $(CFLAGS_$(XARCH)) -endif -CFLAGS_STACKPROTECTOR ?= $(call cc-option,-mstack-protector-guard=global $(call cc-option,-fstack-protector-all)) -CFLAGS_SANITIZER ?= $(call cc-option,-fsanitize=undefined -fsanitize-trap=all) -CFLAGS ?= -Os -fno-ident -fno-asynchronous-unwind-tables -std=c89 -W -Wall -Wextra \ - $(call cc-option,-fno-stack-protector) $(call cc-option,-Wmissing-prototypes) \ - $(CFLAGS_XARCH) $(CFLAGS_STACKPROTECTOR) $(CFLAGS_SANITIZER) $(CFLAGS_EXTRA) -LDFLAGS := +include Makefile.include -LIBGCC := -lgcc +CFLAGS = -nostdlib -nostdinc -static \ + -isystem $(top_srcdir)/tools/include/nolibc -isystem $(top_srcdir)/usr/include \ + $(CFLAGS_NOLIBC_TEST) -ifneq ($(LLVM),) -# Not needed for clang -LIBGCC := +ifeq ($(LLVM),) +LDLIBS := -lgcc endif -# Modify CFLAGS based on LLVM= -include $(srctree)/tools/scripts/Makefile.include - -# GCC uses "s390", clang "systemz" -CLANG_CROSS_FLAGS := $(subst --target=s390-linux,--target=systemz-linux,$(CLANG_CROSS_FLAGS)) - -REPORT ?= awk '/\[OK\][\r]*$$/{p++} /\[FAIL\][\r]*$$/{if (!f) printf("\n"); f++; print;} /\[SKIPPED\][\r]*$$/{s++} \ - END{ printf("\n%3d test(s): %3d passed, %3d skipped, %3d failed => status: ", p+s+f, p, s, f); \ - if (f || !p) printf("failure\n"); else if (s) printf("warning\n"); else printf("success\n");; \ - printf("\nSee all results in %s\n", ARGV[1]); }' +$(OUTPUT)/nolibc-test: nolibc-test.c nolibc-test-linkage.c | headers help: - @echo "Supported targets under selftests/nolibc:" - @echo " all call the \"run\" target below" - @echo " help this help" - @echo " sysroot create the nolibc sysroot here (uses \$$ARCH)" - @echo " nolibc-test build the executable (uses \$$CC and \$$CROSS_COMPILE)" - @echo " libc-test build an executable using the compiler's default libc instead" - @echo " run-user runs the executable under QEMU (uses \$$XARCH, \$$TEST)" - @echo " initramfs.cpio prepare the initramfs archive with nolibc-test" - @echo " initramfs prepare the initramfs tree with nolibc-test" - @echo " defconfig create a fresh new default config (uses \$$XARCH)" - @echo " kernel (re)build the kernel (uses \$$XARCH)" - @echo " kernel-standalone (re)build the kernel with the initramfs (uses \$$XARCH)" - @echo " run runs the kernel in QEMU after building it (uses \$$XARCH, \$$TEST)" - @echo " rerun runs a previously prebuilt kernel in QEMU (uses \$$XARCH, \$$TEST)" - @echo " clean clean the sysroot, initramfs, build and output files" - @echo "" - @echo "The output file is \"run.out\". Test ranges may be passed using \$$TEST." - @echo "" - @echo "Currently using the following variables:" - @echo " ARCH = $(ARCH)" - @echo " XARCH = $(XARCH)" - @echo " CROSS_COMPILE = $(CROSS_COMPILE)" - @echo " CC = $(CC)" - @echo " OUTPUT = $(OUTPUT)" - @echo " TEST = $(TEST)" - @echo " QEMU_ARCH = $(if $(QEMU_ARCH),$(QEMU_ARCH),UNKNOWN_ARCH) [determined from \$$XARCH]" - @echo " IMAGE_NAME = $(if $(IMAGE_NAME),$(IMAGE_NAME),UNKNOWN_ARCH) [determined from \$$XARCH]" - @echo "" - -all: run - -sysroot: sysroot/$(ARCH)/include - -sysroot/$(ARCH)/include: - $(Q)rm -rf sysroot/$(ARCH) sysroot/sysroot - $(QUIET_MKDIR)mkdir -p sysroot - $(Q)$(MAKE) -C $(srctree) outputmakefile - $(Q)$(MAKE) -C $(srctree)/tools/include/nolibc ARCH=$(ARCH) OUTPUT=$(CURDIR)/sysroot/ headers_standalone headers_check - $(Q)mv sysroot/sysroot sysroot/$(ARCH) - -ifneq ($(NOLIBC_SYSROOT),0) -nolibc-test: nolibc-test.c nolibc-test-linkage.c sysroot/$(ARCH)/include - $(QUIET_CC)$(CC) $(CFLAGS) $(LDFLAGS) -o $@ \ - -nostdlib -nostdinc -static -Isysroot/$(ARCH)/include nolibc-test.c nolibc-test-linkage.c $(LIBGCC) -else -nolibc-test: nolibc-test.c nolibc-test-linkage.c - $(QUIET_CC)$(CC) $(CFLAGS) $(LDFLAGS) -o $@ \ - -nostdlib -static -include $(srctree)/tools/include/nolibc/nolibc.h nolibc-test.c nolibc-test-linkage.c $(LIBGCC) -endif - -libc-test: nolibc-test.c nolibc-test-linkage.c - $(QUIET_CC)$(HOSTCC) -o $@ nolibc-test.c nolibc-test-linkage.c - -# local libc-test -run-libc-test: libc-test - $(Q)./libc-test > "$(CURDIR)/run.out" || : - $(Q)$(REPORT) $(CURDIR)/run.out - -# local nolibc-test -run-nolibc-test: nolibc-test - $(Q)./nolibc-test > "$(CURDIR)/run.out" || : - $(Q)$(REPORT) $(CURDIR)/run.out - -# qemu user-land test -run-user: nolibc-test - $(Q)qemu-$(QEMU_ARCH_USER) ./nolibc-test > "$(CURDIR)/run.out" || : - $(Q)$(REPORT) $(CURDIR)/run.out - -initramfs.cpio: kernel nolibc-test - $(QUIET_GEN)echo 'file /init nolibc-test 755 0 0' | $(objtree)/usr/gen_init_cpio - > initramfs.cpio - -initramfs: nolibc-test - $(QUIET_MKDIR)mkdir -p initramfs - $(call QUIET_INSTALL, initramfs/init) - $(Q)cp nolibc-test initramfs/init - -defconfig: - $(Q)$(MAKE) -C $(srctree) ARCH=$(ARCH) CC=$(CC) CROSS_COMPILE=$(CROSS_COMPILE) $(DEFCONFIG) - $(Q)if [ -n "$(EXTRACONFIG)" ]; then \ - $(srctree)/scripts/config --file $(objtree)/.config $(EXTRACONFIG); \ - $(MAKE) -C $(srctree) ARCH=$(ARCH) CC=$(CC) CROSS_COMPILE=$(CROSS_COMPILE) olddefconfig < /dev/null; \ - fi - -kernel: | defconfig - $(Q)$(MAKE) -C $(srctree) ARCH=$(ARCH) CC=$(CC) CROSS_COMPILE=$(CROSS_COMPILE) $(IMAGE_NAME) < /dev/null - -kernel-standalone: initramfs | defconfig - $(Q)$(MAKE) -C $(srctree) ARCH=$(ARCH) CC=$(CC) CROSS_COMPILE=$(CROSS_COMPILE) $(IMAGE_NAME) CONFIG_INITRAMFS_SOURCE=$(CURDIR)/initramfs < /dev/null - -# run the tests after building the kernel -run: kernel initramfs.cpio - $(Q)qemu-system-$(QEMU_ARCH) -display none -no-reboot -kernel "$(IMAGE)" -initrd initramfs.cpio -serial stdio $(QEMU_ARGS) > "$(CURDIR)/run.out" - $(Q)$(REPORT) $(CURDIR)/run.out - -# re-run the tests from an existing kernel -rerun: - $(Q)qemu-system-$(QEMU_ARCH) -display none -no-reboot -kernel "$(IMAGE)" -initrd initramfs.cpio -serial stdio $(QEMU_ARGS) > "$(CURDIR)/run.out" - $(Q)$(REPORT) $(CURDIR)/run.out - -# report with existing test log -report: - $(Q)$(REPORT) $(CURDIR)/run.out - -clean: - $(call QUIET_CLEAN, sysroot) - $(Q)rm -rf sysroot - $(call QUIET_CLEAN, nolibc-test) - $(Q)rm -f nolibc-test - $(call QUIET_CLEAN, libc-test) - $(Q)rm -f libc-test - $(call QUIET_CLEAN, initramfs.cpio) - $(Q)rm -rf initramfs.cpio - $(call QUIET_CLEAN, initramfs) - $(Q)rm -rf initramfs - $(call QUIET_CLEAN, run.out) - $(Q)rm -rf run.out + @echo "For the custom nolibc testsuite use '$(MAKE) -f Makefile.nolibc'; available targets:" + @$(MAKE) -f Makefile.nolibc help -.PHONY: sysroot/$(ARCH)/include +.PHONY: help diff --git a/tools/testing/selftests/nolibc/Makefile.include b/tools/testing/selftests/nolibc/Makefile.include new file mode 100644 index 000000000000..66287fafbbe0 --- /dev/null +++ b/tools/testing/selftests/nolibc/Makefile.include @@ -0,0 +1,10 @@ +# SPDX-License-Identifier: GPL-2.0 + +__CFLAGS_STACKPROTECTOR = $(call cc-option,-fstack-protector-all) $(call cc-option,-mstack-protector-guard=global) +_CFLAGS_STACKPROTECTOR ?= $(call try-run, \ + echo 'void foo(void) {}' | $(CC) -x c - -o - -S $(CLANG_CROSS_FLAGS) $(__CFLAGS_STACKPROTECTOR) | grep -q __stack_chk_guard, \ + $(__CFLAGS_STACKPROTECTOR)) +_CFLAGS_SANITIZER ?= $(call cc-option,-fsanitize=undefined -fsanitize-trap=all) +CFLAGS_NOLIBC_TEST ?= -Os -fno-ident -fno-asynchronous-unwind-tables -std=c89 -W -Wall -Wextra \ + $(call cc-option,-fno-stack-protector) $(call cc-option,-Wmissing-prototypes) \ + $(_CFLAGS_STACKPROTECTOR) $(_CFLAGS_SANITIZER) diff --git a/tools/testing/selftests/nolibc/Makefile.nolibc b/tools/testing/selftests/nolibc/Makefile.nolibc new file mode 100644 index 000000000000..0fb759ba992e --- /dev/null +++ b/tools/testing/selftests/nolibc/Makefile.nolibc @@ -0,0 +1,383 @@ +# SPDX-License-Identifier: GPL-2.0 +# Makefile for nolibc tests +# we're in ".../tools/testing/selftests/nolibc" +ifeq ($(srctree),) +srctree := $(patsubst %/tools/testing/selftests/,%,$(dir $(CURDIR))) +endif + +include $(srctree)/tools/scripts/utilities.mak +# We need this for the "__cc-option" macro. +include $(srctree)/scripts/Makefile.compiler + +ifneq ($(O),) +ifneq ($(call is-absolute,$(O)),y) +$(error Only absolute O= parameters are supported) +endif +objtree := $(O) +else +objtree ?= $(srctree) +endif + +ifeq ($(ARCH),) +include $(srctree)/scripts/subarch.include +ARCH = $(SUBARCH) +endif + +cc-option = $(call __cc-option, $(CC),$(CLANG_CROSS_FLAGS),$(1),$(2)) + +# XARCH extends the kernel's ARCH with a few variants of the same +# architecture that only differ by the configuration, the toolchain +# and the Qemu program used. It is copied as-is into ARCH except for +# a few specific values which are mapped like this: +# +# XARCH | ARCH | config +# -------------|-----------|------------------------- +# ppc | powerpc | 32 bits +# ppc64 | powerpc | 64 bits big endian +# ppc64le | powerpc | 64 bits little endian +# +# It is recommended to only use XARCH, though it does not harm if +# ARCH is already set. For simplicity, ARCH is sufficient for all +# architectures where both are equal. + +# configure default variants for target kernel supported architectures +XARCH_powerpc = ppc +XARCH_mips = mips32le +XARCH_riscv = riscv64 +XARCH = $(or $(XARCH_$(ARCH)),$(ARCH)) + +# map from user input variants to their kernel supported architectures +ARCH_x32 = x86 +ARCH_armthumb = arm +ARCH_ppc = powerpc +ARCH_ppc64 = powerpc +ARCH_ppc64le = powerpc +ARCH_mips32le = mips +ARCH_mips32be = mips +ARCH_mipsn32le = mips +ARCH_mipsn32be = mips +ARCH_mips64le = mips +ARCH_mips64be = mips +ARCH_riscv32 = riscv +ARCH_riscv64 = riscv +ARCH_s390x = s390 +ARCH_sparc32 = sparc +ARCH_sparc64 = sparc +ARCH_sh4 = sh +ARCH := $(or $(ARCH_$(XARCH)),$(XARCH)) + +# kernel image names by architecture +IMAGE_i386 = arch/x86/boot/bzImage +IMAGE_x86_64 = arch/x86/boot/bzImage +IMAGE_x32 = arch/x86/boot/bzImage +IMAGE_x86 = arch/x86/boot/bzImage +IMAGE_arm64 = arch/arm64/boot/Image +IMAGE_arm = arch/arm/boot/zImage +IMAGE_armthumb = arch/arm/boot/zImage +IMAGE_mips32le = vmlinuz +IMAGE_mips32be = vmlinuz +IMAGE_mipsn32le = vmlinuz +IMAGE_mipsn32be = vmlinuz +IMAGE_mips64le = vmlinuz +IMAGE_mips64be = vmlinuz +IMAGE_ppc = vmlinux +IMAGE_ppc64 = vmlinux +IMAGE_ppc64le = arch/powerpc/boot/zImage +IMAGE_riscv = arch/riscv/boot/Image +IMAGE_riscv32 = arch/riscv/boot/Image +IMAGE_riscv64 = arch/riscv/boot/Image +IMAGE_s390x = arch/s390/boot/bzImage +IMAGE_s390 = arch/s390/boot/bzImage +IMAGE_loongarch = arch/loongarch/boot/vmlinuz.efi +IMAGE_sparc32 = arch/sparc/boot/image +IMAGE_sparc64 = arch/sparc/boot/image +IMAGE_m68k = vmlinux +IMAGE_sh4 = arch/sh/boot/zImage +IMAGE = $(objtree)/$(IMAGE_$(XARCH)) +IMAGE_NAME = $(notdir $(IMAGE)) + +# default kernel configurations that appear to be usable +DEFCONFIG_i386 = defconfig +DEFCONFIG_x86_64 = defconfig +DEFCONFIG_x32 = defconfig +DEFCONFIG_x86 = defconfig +DEFCONFIG_arm64 = defconfig +DEFCONFIG_arm = multi_v7_defconfig +DEFCONFIG_armthumb = multi_v7_defconfig +DEFCONFIG_mips32le = malta_defconfig +DEFCONFIG_mips32be = malta_defconfig generic/eb.config +DEFCONFIG_mipsn32le = malta_defconfig generic/64r2.config +DEFCONFIG_mipsn32be = malta_defconfig generic/64r6.config generic/eb.config +DEFCONFIG_mips64le = malta_defconfig generic/64r6.config +DEFCONFIG_mips64be = malta_defconfig generic/64r2.config generic/eb.config +DEFCONFIG_ppc = pmac32_defconfig +DEFCONFIG_ppc64 = powernv_be_defconfig +DEFCONFIG_ppc64le = powernv_defconfig +DEFCONFIG_riscv = defconfig +DEFCONFIG_riscv32 = rv32_defconfig +DEFCONFIG_riscv64 = defconfig +DEFCONFIG_s390x = defconfig +DEFCONFIG_s390 = defconfig compat.config +DEFCONFIG_loongarch = defconfig +DEFCONFIG_sparc32 = sparc32_defconfig +DEFCONFIG_sparc64 = sparc64_defconfig +DEFCONFIG_m68k = virt_defconfig +DEFCONFIG_sh4 = rts7751r2dplus_defconfig +DEFCONFIG = $(DEFCONFIG_$(XARCH)) + +EXTRACONFIG_x32 = -e CONFIG_X86_X32_ABI +EXTRACONFIG_arm = -e CONFIG_NAMESPACES +EXTRACONFIG_armthumb = -e CONFIG_NAMESPACES +EXTRACONFIG_m68k = -e CONFIG_BLK_DEV_INITRD +EXTRACONFIG_sh4 = -e CONFIG_BLK_DEV_INITRD -e CONFIG_CMDLINE_FROM_BOOTLOADER +EXTRACONFIG = $(EXTRACONFIG_$(XARCH)) + +# optional tests to run (default = all) +TEST = + +# QEMU_ARCH: arch names used by qemu +QEMU_ARCH_i386 = i386 +QEMU_ARCH_x86_64 = x86_64 +QEMU_ARCH_x32 = x86_64 +QEMU_ARCH_x86 = x86_64 +QEMU_ARCH_arm64 = aarch64 +QEMU_ARCH_arm = arm +QEMU_ARCH_armthumb = arm +QEMU_ARCH_mips32le = mipsel # works with malta_defconfig +QEMU_ARCH_mips32be = mips +QEMU_ARCH_mipsn32le = mips64el +QEMU_ARCH_mipsn32be = mips64 +QEMU_ARCH_mips64le = mips64el +QEMU_ARCH_mips64be = mips64 +QEMU_ARCH_ppc = ppc +QEMU_ARCH_ppc64 = ppc64 +QEMU_ARCH_ppc64le = ppc64 +QEMU_ARCH_riscv = riscv64 +QEMU_ARCH_riscv32 = riscv32 +QEMU_ARCH_riscv64 = riscv64 +QEMU_ARCH_s390x = s390x +QEMU_ARCH_s390 = s390x +QEMU_ARCH_loongarch = loongarch64 +QEMU_ARCH_sparc32 = sparc +QEMU_ARCH_sparc64 = sparc64 +QEMU_ARCH_m68k = m68k +QEMU_ARCH_sh4 = sh4 +QEMU_ARCH = $(QEMU_ARCH_$(XARCH)) + +QEMU_ARCH_USER_ppc64le = ppc64le +QEMU_ARCH_USER_mipsn32le = mipsn32el +QEMU_ARCH_USER_mipsn32be = mipsn32 +QEMU_ARCH_USER = $(or $(QEMU_ARCH_USER_$(XARCH)),$(QEMU_ARCH_$(XARCH))) + +QEMU_BIOS_DIR = /usr/share/edk2/ +QEMU_BIOS_loongarch = $(QEMU_BIOS_DIR)/loongarch64/OVMF_CODE.fd + +ifneq ($(QEMU_BIOS_$(XARCH)),) +QEMU_ARGS_BIOS = -bios $(QEMU_BIOS_$(XARCH)) +endif + +# QEMU_ARGS : some arch-specific args to pass to qemu +QEMU_ARGS_i386 = -M pc -append "console=ttyS0,9600 i8042.noaux panic=-1 $(TEST:%=NOLIBC_TEST=%)" +QEMU_ARGS_x86_64 = -M pc -append "console=ttyS0,9600 i8042.noaux panic=-1 $(TEST:%=NOLIBC_TEST=%)" +QEMU_ARGS_x32 = -M pc -append "console=ttyS0,9600 i8042.noaux panic=-1 $(TEST:%=NOLIBC_TEST=%)" +QEMU_ARGS_x86 = -M pc -append "console=ttyS0,9600 i8042.noaux panic=-1 $(TEST:%=NOLIBC_TEST=%)" +QEMU_ARGS_arm64 = -M virt -cpu cortex-a53 -append "panic=-1 $(TEST:%=NOLIBC_TEST=%)" +QEMU_ARGS_arm = -M virt -append "panic=-1 $(TEST:%=NOLIBC_TEST=%)" +QEMU_ARGS_armthumb = -M virt -append "panic=-1 $(TEST:%=NOLIBC_TEST=%)" +QEMU_ARGS_mips32le = -M malta -append "panic=-1 $(TEST:%=NOLIBC_TEST=%)" +QEMU_ARGS_mips32be = -M malta -append "panic=-1 $(TEST:%=NOLIBC_TEST=%)" +QEMU_ARGS_mipsn32le = -M malta -cpu 5KEc -append "panic=-1 $(TEST:%=NOLIBC_TEST=%)" +QEMU_ARGS_mipsn32be = -M malta -cpu I6400 -append "panic=-1 $(TEST:%=NOLIBC_TEST=%)" +QEMU_ARGS_mips64le = -M malta -cpu I6400 -append "panic=-1 $(TEST:%=NOLIBC_TEST=%)" +QEMU_ARGS_mips64be = -M malta -cpu 5KEc -append "panic=-1 $(TEST:%=NOLIBC_TEST=%)" +QEMU_ARGS_ppc = -M g3beige -append "console=ttyS0 panic=-1 $(TEST:%=NOLIBC_TEST=%)" +QEMU_ARGS_ppc64 = -M powernv -append "console=hvc0 panic=-1 $(TEST:%=NOLIBC_TEST=%)" +QEMU_ARGS_ppc64le = -M powernv -append "console=hvc0 panic=-1 $(TEST:%=NOLIBC_TEST=%)" +QEMU_ARGS_riscv = -M virt -append "console=ttyS0 panic=-1 $(TEST:%=NOLIBC_TEST=%)" +QEMU_ARGS_riscv32 = -M virt -append "console=ttyS0 panic=-1 $(TEST:%=NOLIBC_TEST=%)" +QEMU_ARGS_riscv64 = -M virt -append "console=ttyS0 panic=-1 $(TEST:%=NOLIBC_TEST=%)" +QEMU_ARGS_s390x = -M s390-ccw-virtio -append "console=ttyS0 panic=-1 $(TEST:%=NOLIBC_TEST=%)" +QEMU_ARGS_s390 = -M s390-ccw-virtio -append "console=ttyS0 panic=-1 $(TEST:%=NOLIBC_TEST=%)" +QEMU_ARGS_loongarch = -M virt -append "console=ttyS0,115200 panic=-1 $(TEST:%=NOLIBC_TEST=%)" +QEMU_ARGS_sparc32 = -M SS-5 -m 256M -append "console=ttyS0,115200 panic=-1 $(TEST:%=NOLIBC_TEST=%)" +QEMU_ARGS_sparc64 = -M sun4u -append "console=ttyS0,115200 panic=-1 $(TEST:%=NOLIBC_TEST=%)" +QEMU_ARGS_m68k = -M virt -append "console=ttyGF0,115200 panic=-1 $(TEST:%=NOLIBC_TEST=%)" +QEMU_ARGS_sh4 = -M r2d -serial file:/dev/stdout -append "console=ttySC1,115200 panic=-1 $(TEST:%=NOLIBC_TEST=%)" +QEMU_ARGS = -m 1G $(QEMU_ARGS_$(XARCH)) $(QEMU_ARGS_BIOS) $(QEMU_ARGS_EXTRA) + +# OUTPUT is only set when run from the main makefile, otherwise +# it defaults to this nolibc directory. +OUTPUT ?= $(CURDIR)/ + +ifeq ($(V),1) +Q= +else +Q=@ +endif + +CFLAGS_i386 = $(call cc-option,-m32) +CFLAGS_x32 = -mx32 +CFLAGS_arm = -marm +CFLAGS_armthumb = -mthumb -march=armv6t2 +CFLAGS_ppc = -m32 -mbig-endian -mno-vsx $(call cc-option,-mmultiple) +CFLAGS_ppc64 = -m64 -mbig-endian -mno-vsx $(call cc-option,-mmultiple) +CFLAGS_ppc64le = -m64 -mlittle-endian -mno-vsx $(call cc-option,-mabi=elfv2) +CFLAGS_s390x = -m64 +CFLAGS_s390 = -m31 +CFLAGS_mips32le = -EL -mabi=32 -fPIC +CFLAGS_mips32be = -EB -mabi=32 +CFLAGS_mipsn32le = -EL -mabi=n32 -fPIC -march=mips64r2 +CFLAGS_mipsn32be = -EB -mabi=n32 -march=mips64r6 +CFLAGS_mips64le = -EL -mabi=64 -march=mips64r6 +CFLAGS_mips64be = -EB -mabi=64 -march=mips64r2 +CFLAGS_sparc32 = $(call cc-option,-m32) +CFLAGS_sh4 = -ml -m4 +ifeq ($(origin XARCH),command line) +CFLAGS_XARCH = $(CFLAGS_$(XARCH)) +endif + +include Makefile.include + +CFLAGS ?= $(CFLAGS_NOLIBC_TEST) $(CFLAGS_XARCH) $(CFLAGS_EXTRA) +LDFLAGS := + +LIBGCC := -lgcc + +ifeq ($(ARCH),x86) +# Not needed on x86, probably not present for x32 +LIBGCC := +endif + +ifneq ($(LLVM),) +# Not needed for clang +LIBGCC := +endif + +# Modify CFLAGS based on LLVM= +include $(srctree)/tools/scripts/Makefile.include + +REPORT ?= awk '/\[OK\][\r]*$$/{p++} /\[FAIL\][\r]*$$/{if (!f) printf("\n"); f++; print;} /\[SKIPPED\][\r]*$$/{s++} \ + /^Total number of errors:/{done++} \ + END{ printf("\n%3d test(s): %3d passed, %3d skipped, %3d failed => status: ", p+s+f, p, s, f); \ + if (f || !p || !done) printf("failure\n"); else if (s) printf("warning\n"); else printf("success\n");; \ + printf("\nSee all results in %s\n", ARGV[1]); }' + +help: + @echo "Supported targets under selftests/nolibc:" + @echo " all call the \"run\" target below" + @echo " help this help" + @echo " sysroot create the nolibc sysroot here (uses \$$ARCH)" + @echo " nolibc-test build the executable (uses \$$CC and \$$CROSS_COMPILE)" + @echo " libc-test build an executable using the compiler's default libc instead" + @echo " run-user runs the executable under QEMU (uses \$$XARCH, \$$TEST)" + @echo " initramfs.cpio prepare the initramfs archive with nolibc-test" + @echo " initramfs prepare the initramfs tree with nolibc-test" + @echo " defconfig create a fresh new default config (uses \$$XARCH)" + @echo " kernel (re)build the kernel (uses \$$XARCH)" + @echo " kernel-standalone (re)build the kernel with the initramfs (uses \$$XARCH)" + @echo " run runs the kernel in QEMU after building it (uses \$$XARCH, \$$TEST)" + @echo " rerun runs a previously prebuilt kernel in QEMU (uses \$$XARCH, \$$TEST)" + @echo " clean clean the sysroot, initramfs, build and output files" + @echo "" + @echo "The output file is \"run.out\". Test ranges may be passed using \$$TEST." + @echo "" + @echo "Currently using the following variables:" + @echo " ARCH = $(ARCH)" + @echo " XARCH = $(XARCH)" + @echo " CROSS_COMPILE = $(CROSS_COMPILE)" + @echo " CC = $(CC)" + @echo " OUTPUT = $(OUTPUT)" + @echo " TEST = $(TEST)" + @echo " QEMU_ARCH = $(if $(QEMU_ARCH),$(QEMU_ARCH),UNKNOWN_ARCH) [determined from \$$XARCH]" + @echo " IMAGE_NAME = $(if $(IMAGE_NAME),$(IMAGE_NAME),UNKNOWN_ARCH) [determined from \$$XARCH]" + @echo "" + +all: run + +sysroot: sysroot/$(ARCH)/include + +sysroot/$(ARCH)/include: + $(Q)rm -rf sysroot/$(ARCH) sysroot/sysroot + $(QUIET_MKDIR)mkdir -p sysroot + $(Q)$(MAKE) -C $(srctree) outputmakefile + $(Q)$(MAKE) -C $(srctree)/tools/include/nolibc ARCH=$(ARCH) OUTPUT=$(CURDIR)/sysroot/ headers_standalone headers_check + $(Q)mv sysroot/sysroot sysroot/$(ARCH) + +ifneq ($(NOLIBC_SYSROOT),0) +nolibc-test: nolibc-test.c nolibc-test-linkage.c sysroot/$(ARCH)/include + $(QUIET_CC)$(CC) $(CFLAGS) $(LDFLAGS) -o $@ \ + -nostdlib -nostdinc -static -Isysroot/$(ARCH)/include nolibc-test.c nolibc-test-linkage.c $(LIBGCC) +else +nolibc-test: nolibc-test.c nolibc-test-linkage.c + $(QUIET_CC)$(CC) $(CFLAGS) $(LDFLAGS) -o $@ \ + -nostdlib -static -include $(srctree)/tools/include/nolibc/nolibc.h nolibc-test.c nolibc-test-linkage.c $(LIBGCC) +endif + +libc-test: nolibc-test.c nolibc-test-linkage.c + $(QUIET_CC)$(HOSTCC) -o $@ nolibc-test.c nolibc-test-linkage.c + +# local libc-test +run-libc-test: libc-test + $(Q)./libc-test > "$(CURDIR)/run.out" || : + $(Q)$(REPORT) $(CURDIR)/run.out + +# local nolibc-test +run-nolibc-test: nolibc-test + $(Q)./nolibc-test > "$(CURDIR)/run.out" || : + $(Q)$(REPORT) $(CURDIR)/run.out + +# qemu user-land test +run-user: nolibc-test + $(Q)qemu-$(QEMU_ARCH_USER) ./nolibc-test > "$(CURDIR)/run.out" || : + $(Q)$(REPORT) $(CURDIR)/run.out + +initramfs.cpio: kernel nolibc-test + $(QUIET_GEN)echo 'file /init nolibc-test 755 0 0' | $(objtree)/usr/gen_init_cpio - > initramfs.cpio + +initramfs: nolibc-test + $(QUIET_MKDIR)mkdir -p initramfs + $(call QUIET_INSTALL, initramfs/init) + $(Q)cp nolibc-test initramfs/init + +defconfig: + $(Q)$(MAKE) -C $(srctree) ARCH=$(ARCH) CC=$(CC) CROSS_COMPILE=$(CROSS_COMPILE) $(DEFCONFIG) + $(Q)if [ -n "$(EXTRACONFIG)" ]; then \ + $(srctree)/scripts/config --file $(objtree)/.config $(EXTRACONFIG); \ + $(MAKE) -C $(srctree) ARCH=$(ARCH) CC=$(CC) CROSS_COMPILE=$(CROSS_COMPILE) olddefconfig < /dev/null; \ + fi + +kernel: + $(Q)$(MAKE) -C $(srctree) ARCH=$(ARCH) CC=$(CC) CROSS_COMPILE=$(CROSS_COMPILE) $(IMAGE_NAME) < /dev/null + +kernel-standalone: initramfs + $(Q)$(MAKE) -C $(srctree) ARCH=$(ARCH) CC=$(CC) CROSS_COMPILE=$(CROSS_COMPILE) $(IMAGE_NAME) CONFIG_INITRAMFS_SOURCE=$(CURDIR)/initramfs < /dev/null + +# run the tests after building the kernel +run: kernel initramfs.cpio + $(Q)qemu-system-$(QEMU_ARCH) -display none -no-reboot -kernel "$(IMAGE)" -initrd initramfs.cpio -serial file:/dev/stdout $(QEMU_ARGS) > "$(CURDIR)/run.out" + $(Q)$(REPORT) $(CURDIR)/run.out + +# re-run the tests from an existing kernel +rerun: + $(Q)qemu-system-$(QEMU_ARCH) -display none -no-reboot -kernel "$(IMAGE)" -initrd initramfs.cpio -serial file:/dev/stdout $(QEMU_ARGS) > "$(CURDIR)/run.out" + $(Q)$(REPORT) $(CURDIR)/run.out + +# report with existing test log +report: + $(Q)$(REPORT) $(CURDIR)/run.out + +clean: + $(call QUIET_CLEAN, sysroot) + $(Q)rm -rf sysroot + $(call QUIET_CLEAN, nolibc-test) + $(Q)rm -f nolibc-test + $(call QUIET_CLEAN, libc-test) + $(Q)rm -f libc-test + $(call QUIET_CLEAN, initramfs.cpio) + $(Q)rm -rf initramfs.cpio + $(call QUIET_CLEAN, initramfs) + $(Q)rm -rf initramfs + $(call QUIET_CLEAN, run.out) + $(Q)rm -rf run.out + +.PHONY: sysroot/$(ARCH)/include diff --git a/tools/testing/selftests/nolibc/nolibc-test.c b/tools/testing/selftests/nolibc/nolibc-test.c index dbe13000fb1a..a297ee0d6d07 100644 --- a/tools/testing/selftests/nolibc/nolibc-test.c +++ b/tools/testing/selftests/nolibc/nolibc-test.c @@ -877,7 +877,12 @@ int test_file_stream(void) return 0; } -int test_fork(void) +enum fork_type { + FORK_STANDARD, + FORK_VFORK, +}; + +int test_fork(enum fork_type type) { int status; pid_t pid; @@ -886,14 +891,23 @@ int test_fork(void) fflush(stdout); fflush(stderr); - pid = fork(); + switch (type) { + case FORK_STANDARD: + pid = fork(); + break; + case FORK_VFORK: + pid = vfork(); + break; + default: + return 1; + } switch (pid) { case -1: return 1; case 0: - exit(123); + _exit(123); default: pid = waitpid(pid, &status, 0); @@ -1330,7 +1344,7 @@ int run_syscall(int min, int max) CASE_TEST(dup3_m1); tmp = dup3(-1, 100, 0); EXPECT_SYSER(1, tmp, -1, EBADF); if (tmp != -1) close(tmp); break; CASE_TEST(execve_root); EXPECT_SYSER(1, execve("/", (char*[]){ [0] = "/", [1] = NULL }, NULL), -1, EACCES); break; CASE_TEST(file_stream); EXPECT_SYSZR(1, test_file_stream()); break; - CASE_TEST(fork); EXPECT_SYSZR(1, test_fork()); break; + CASE_TEST(fork); EXPECT_SYSZR(1, test_fork(FORK_STANDARD)); break; CASE_TEST(getdents64_root); EXPECT_SYSNE(1, test_getdents64("/"), -1); break; CASE_TEST(getdents64_null); EXPECT_SYSER(1, test_getdents64("/dev/null"), -1, ENOTDIR); break; CASE_TEST(directories); EXPECT_SYSZR(proc, test_dirent()); break; @@ -1349,6 +1363,7 @@ int run_syscall(int min, int max) CASE_TEST(mmap_bad); EXPECT_PTRER(1, mmap(NULL, 0, PROT_READ, MAP_PRIVATE, 0, 0), MAP_FAILED, EINVAL); break; CASE_TEST(munmap_bad); EXPECT_SYSER(1, munmap(NULL, 0), -1, EINVAL); break; CASE_TEST(mmap_munmap_good); EXPECT_SYSZR(1, test_mmap_munmap()); break; + CASE_TEST(nanosleep); ts.tv_nsec = -1; EXPECT_SYSER(1, nanosleep(&ts, NULL), -1, EINVAL); break; CASE_TEST(open_tty); EXPECT_SYSNE(1, tmp = open("/dev/null", O_RDONLY), -1); if (tmp != -1) close(tmp); break; CASE_TEST(open_blah); EXPECT_SYSER(1, tmp = open("/proc/self/blah", O_RDONLY), -1, ENOENT); if (tmp != -1) close(tmp); break; CASE_TEST(openat_dir); EXPECT_SYSZR(1, test_openat()); break; @@ -1374,6 +1389,7 @@ int run_syscall(int min, int max) CASE_TEST(uname_fault); EXPECT_SYSER(1, uname(NULL), -1, EFAULT); break; CASE_TEST(unlink_root); EXPECT_SYSER(1, unlink("/"), -1, EISDIR); break; CASE_TEST(unlink_blah); EXPECT_SYSER(1, unlink("/proc/self/blah"), -1, ENOENT); break; + CASE_TEST(vfork); EXPECT_SYSZR(1, test_fork(FORK_VFORK)); break; CASE_TEST(wait_child); EXPECT_SYSER(1, wait(&tmp), -1, ECHILD); break; CASE_TEST(waitpid_min); EXPECT_SYSER(1, waitpid(INT_MIN, &tmp, WNOHANG), -1, ESRCH); break; CASE_TEST(waitpid_child); EXPECT_SYSER(1, waitpid(getpid(), &tmp, WNOHANG), -1, ECHILD); break; @@ -1413,7 +1429,7 @@ int run_stdlib(int min, int max) * Add some more chars after the \0, to test functions that overwrite the buffer set * the \0 at the exact right position. */ - char buf[10] = "test123456"; + char buf[11] = "test123456"; buf[4] = '\0'; @@ -1646,6 +1662,28 @@ int test_strerror(void) return 0; } +static int test_printf_error(void) +{ + int fd, ret, saved_errno; + + fd = open("/dev/full", O_RDWR); + if (fd == -1) + return 1; + + errno = 0; + ret = dprintf(fd, "foo"); + saved_errno = errno; + close(fd); + + if (ret != -1) + return 2; + + if (saved_errno != ENOSPC) + return 3; + + return 0; +} + static int run_printf(int min, int max) { int test; @@ -1675,6 +1713,7 @@ static int run_printf(int min, int max) CASE_TEST(width_trunc); EXPECT_VFPRINTF(25, " ", "%25d", 1); break; CASE_TEST(scanf); EXPECT_ZR(1, test_scanf()); break; CASE_TEST(strerror); EXPECT_ZR(1, test_strerror()); break; + CASE_TEST(printf_error); EXPECT_ZR(1, test_printf_error()); break; case __LINE__: return ret; /* must be last */ /* note: do not set any defaults so as to permit holes above */ @@ -1762,12 +1801,14 @@ int prepare(void) if (stat("/dev/.", &stat_buf) == 0 || mkdir("/dev", 0755) == 0) { if (stat("/dev/console", &stat_buf) != 0 || stat("/dev/null", &stat_buf) != 0 || - stat("/dev/zero", &stat_buf) != 0) { + stat("/dev/zero", &stat_buf) != 0 || + stat("/dev/full", &stat_buf) != 0) { /* try devtmpfs first, otherwise fall back to manual creation */ if (mount("/dev", "/dev", "devtmpfs", 0, 0) != 0) { mknod("/dev/console", 0600 | S_IFCHR, makedev(5, 1)); mknod("/dev/null", 0666 | S_IFCHR, makedev(1, 3)); mknod("/dev/zero", 0666 | S_IFCHR, makedev(1, 5)); + mknod("/dev/full", 0666 | S_IFCHR, makedev(1, 7)); } } } diff --git a/tools/testing/selftests/nolibc/run-tests.sh b/tools/testing/selftests/nolibc/run-tests.sh index 8277599e6441..e8af1fb505cf 100755 --- a/tools/testing/selftests/nolibc/run-tests.sh +++ b/tools/testing/selftests/nolibc/run-tests.sh @@ -18,15 +18,16 @@ test_mode=system werror=1 llvm= all_archs=( - i386 x86_64 + i386 x86_64 x32 arm64 arm armthumb - mips32le mips32be + mips32le mips32be mipsn32le mipsn32be mips64le mips64be ppc ppc64 ppc64le riscv32 riscv64 s390x s390 loongarch sparc32 sparc64 m68k + sh4 ) archs="${all_archs[@]}" @@ -114,6 +115,7 @@ crosstool_arch() { mips*) echo mips;; s390*) echo s390;; sparc*) echo sparc64;; + x32*) echo x86_64;; *) echo "$1";; esac } @@ -169,7 +171,7 @@ test_arch() { if [ "$werror" -ne 0 ]; then CFLAGS_EXTRA="$CFLAGS_EXTRA -Werror" fi - MAKE=(make -j"${nproc}" XARCH="${arch}" CROSS_COMPILE="${cross_compile}" LLVM="${llvm}" O="${build_dir}") + MAKE=(make -f Makefile.nolibc -j"${nproc}" XARCH="${arch}" CROSS_COMPILE="${cross_compile}" LLVM="${llvm}" O="${build_dir}") case "$test_mode" in 'system') @@ -187,7 +189,11 @@ test_arch() { echo "Unsupported configuration" return fi - if [ "$arch" = "m68k" ] && [ "$llvm" = "1" ]; then + if [ "$arch" = "m68k" -o "$arch" = "sh4" ] && [ "$llvm" = "1" ]; then + echo "Unsupported configuration" + return + fi + if [ "$arch" = "x32" ] && [ "$test_mode" = "user" ]; then echo "Unsupported configuration" return fi diff --git a/tools/testing/selftests/pidfd/.gitignore b/tools/testing/selftests/pidfd/.gitignore index 0406a065deb4..144e7ff65d6a 100644 --- a/tools/testing/selftests/pidfd/.gitignore +++ b/tools/testing/selftests/pidfd/.gitignore @@ -10,3 +10,5 @@ pidfd_file_handle_test pidfd_bind_mount pidfd_info_test pidfd_exec_helper +pidfd_xattr_test +pidfd_setattr_test diff --git a/tools/testing/selftests/pidfd/Makefile b/tools/testing/selftests/pidfd/Makefile index fcbefc0d77f6..764a8f9ecefa 100644 --- a/tools/testing/selftests/pidfd/Makefile +++ b/tools/testing/selftests/pidfd/Makefile @@ -1,9 +1,10 @@ # SPDX-License-Identifier: GPL-2.0-only -CFLAGS += -g $(KHDR_INCLUDES) -pthread -Wall +CFLAGS += -g $(KHDR_INCLUDES) $(TOOLS_INCLUDES) -pthread -Wall TEST_GEN_PROGS := pidfd_test pidfd_fdinfo_test pidfd_open_test \ pidfd_poll_test pidfd_wait pidfd_getfd_test pidfd_setns_test \ - pidfd_file_handle_test pidfd_bind_mount pidfd_info_test + pidfd_file_handle_test pidfd_bind_mount pidfd_info_test \ + pidfd_xattr_test pidfd_setattr_test TEST_GEN_PROGS_EXTENDED := pidfd_exec_helper diff --git a/tools/testing/selftests/pidfd/pidfd.h b/tools/testing/selftests/pidfd/pidfd.h index efd74063126e..f87993def738 100644 --- a/tools/testing/selftests/pidfd/pidfd.h +++ b/tools/testing/selftests/pidfd/pidfd.h @@ -16,9 +16,22 @@ #include <sys/types.h> #include <sys/wait.h> +/* + * Remove the userspace definitions of the following preprocessor symbols + * to avoid duplicate-definition warnings from the subsequent in-kernel + * definitions. + */ +#undef SCHED_NORMAL +#undef SCHED_FLAG_KEEP_ALL +#undef SCHED_FLAG_UTIL_CLAMP + #include "../kselftest.h" #include "../clone3/clone3_selftests.h" +#ifndef FD_PIDFS_ROOT +#define FD_PIDFS_ROOT -10002 +#endif + #ifndef P_PIDFD #define P_PIDFD 3 #endif @@ -56,7 +69,7 @@ #endif #ifndef PIDFD_SELF_THREAD_GROUP -#define PIDFD_SELF_THREAD_GROUP -20000 /* Current thread group leader. */ +#define PIDFD_SELF_THREAD_GROUP -10001 /* Current thread group leader. */ #endif #ifndef PIDFD_SELF diff --git a/tools/testing/selftests/pidfd/pidfd_file_handle_test.c b/tools/testing/selftests/pidfd/pidfd_file_handle_test.c index 439b9c6c0457..6bd2e9c9565b 100644 --- a/tools/testing/selftests/pidfd/pidfd_file_handle_test.c +++ b/tools/testing/selftests/pidfd/pidfd_file_handle_test.c @@ -500,4 +500,64 @@ TEST_F(file_handle, valid_name_to_handle_at_flags) ASSERT_EQ(close(pidfd), 0); } +/* + * That we decode a file handle without having to pass a pidfd. + */ +TEST_F(file_handle, decode_purely_based_on_file_handle) +{ + int mnt_id; + struct file_handle *fh; + int pidfd = -EBADF; + struct stat st1, st2; + + fh = malloc(sizeof(struct file_handle) + MAX_HANDLE_SZ); + ASSERT_NE(fh, NULL); + memset(fh, 0, sizeof(struct file_handle) + MAX_HANDLE_SZ); + fh->handle_bytes = MAX_HANDLE_SZ; + + ASSERT_EQ(name_to_handle_at(self->child_pidfd1, "", fh, &mnt_id, AT_EMPTY_PATH), 0); + + ASSERT_EQ(fstat(self->child_pidfd1, &st1), 0); + + pidfd = open_by_handle_at(FD_PIDFS_ROOT, fh, 0); + ASSERT_GE(pidfd, 0); + + ASSERT_EQ(fstat(pidfd, &st2), 0); + ASSERT_TRUE(st1.st_dev == st2.st_dev && st1.st_ino == st2.st_ino); + + ASSERT_EQ(close(pidfd), 0); + + pidfd = open_by_handle_at(FD_PIDFS_ROOT, fh, O_CLOEXEC); + ASSERT_GE(pidfd, 0); + + ASSERT_EQ(fstat(pidfd, &st2), 0); + ASSERT_TRUE(st1.st_dev == st2.st_dev && st1.st_ino == st2.st_ino); + + ASSERT_EQ(close(pidfd), 0); + + pidfd = open_by_handle_at(FD_PIDFS_ROOT, fh, O_NONBLOCK); + ASSERT_GE(pidfd, 0); + + ASSERT_EQ(fstat(pidfd, &st2), 0); + ASSERT_TRUE(st1.st_dev == st2.st_dev && st1.st_ino == st2.st_ino); + + ASSERT_EQ(close(pidfd), 0); + + pidfd = open_by_handle_at(self->pidfd, fh, 0); + ASSERT_GE(pidfd, 0); + + ASSERT_EQ(fstat(pidfd, &st2), 0); + ASSERT_TRUE(st1.st_dev == st2.st_dev && st1.st_ino == st2.st_ino); + + ASSERT_EQ(close(pidfd), 0); + + pidfd = open_by_handle_at(-EBADF, fh, 0); + ASSERT_LT(pidfd, 0); + + pidfd = open_by_handle_at(AT_FDCWD, fh, 0); + ASSERT_LT(pidfd, 0); + + free(fh); +} + TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/pidfd/pidfd_setattr_test.c b/tools/testing/selftests/pidfd/pidfd_setattr_test.c new file mode 100644 index 000000000000..d7de05edc4b3 --- /dev/null +++ b/tools/testing/selftests/pidfd/pidfd_setattr_test.c @@ -0,0 +1,69 @@ +// SPDX-License-Identifier: GPL-2.0 + +#define _GNU_SOURCE +#include <errno.h> +#include <fcntl.h> +#include <limits.h> +#include <linux/types.h> +#include <poll.h> +#include <pthread.h> +#include <sched.h> +#include <signal.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <syscall.h> +#include <sys/prctl.h> +#include <sys/wait.h> +#include <unistd.h> +#include <sys/socket.h> +#include <linux/kcmp.h> +#include <sys/stat.h> +#include <sys/xattr.h> + +#include "pidfd.h" +#include "../kselftest_harness.h" + +FIXTURE(pidfs_setattr) +{ + pid_t child_pid; + int child_pidfd; +}; + +FIXTURE_SETUP(pidfs_setattr) +{ + self->child_pid = create_child(&self->child_pidfd, CLONE_NEWUSER | CLONE_NEWPID); + EXPECT_GE(self->child_pid, 0); + + if (self->child_pid == 0) + _exit(EXIT_SUCCESS); +} + +FIXTURE_TEARDOWN(pidfs_setattr) +{ + sys_waitid(P_PID, self->child_pid, NULL, WEXITED); + EXPECT_EQ(close(self->child_pidfd), 0); +} + +TEST_F(pidfs_setattr, no_chown) +{ + ASSERT_LT(fchown(self->child_pidfd, 1234, 5678), 0); + ASSERT_EQ(errno, EOPNOTSUPP); +} + +TEST_F(pidfs_setattr, no_chmod) +{ + ASSERT_LT(fchmod(self->child_pidfd, 0777), 0); + ASSERT_EQ(errno, EOPNOTSUPP); +} + +TEST_F(pidfs_setattr, no_exec) +{ + char *const argv[] = { NULL }; + char *const envp[] = { NULL }; + + ASSERT_LT(execveat(self->child_pidfd, "", argv, envp, AT_EMPTY_PATH), 0); + ASSERT_EQ(errno, EACCES); +} + +TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/pidfd/pidfd_xattr_test.c b/tools/testing/selftests/pidfd/pidfd_xattr_test.c new file mode 100644 index 000000000000..5cf7bb0e4bf2 --- /dev/null +++ b/tools/testing/selftests/pidfd/pidfd_xattr_test.c @@ -0,0 +1,132 @@ +// SPDX-License-Identifier: GPL-2.0 + +#define _GNU_SOURCE +#include <errno.h> +#include <fcntl.h> +#include <limits.h> +#include <linux/types.h> +#include <poll.h> +#include <pthread.h> +#include <sched.h> +#include <signal.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <syscall.h> +#include <sys/prctl.h> +#include <sys/wait.h> +#include <unistd.h> +#include <sys/socket.h> +#include <linux/kcmp.h> +#include <sys/stat.h> +#include <sys/xattr.h> + +#include "pidfd.h" +#include "../kselftest_harness.h" + +FIXTURE(pidfs_xattr) +{ + pid_t child_pid; + int child_pidfd; +}; + +FIXTURE_SETUP(pidfs_xattr) +{ + self->child_pid = create_child(&self->child_pidfd, CLONE_NEWUSER | CLONE_NEWPID); + EXPECT_GE(self->child_pid, 0); + + if (self->child_pid == 0) + _exit(EXIT_SUCCESS); +} + +FIXTURE_TEARDOWN(pidfs_xattr) +{ + sys_waitid(P_PID, self->child_pid, NULL, WEXITED); +} + +TEST_F(pidfs_xattr, set_get_list_xattr_multiple) +{ + int ret, i; + char xattr_name[32]; + char xattr_value[32]; + char buf[32]; + const int num_xattrs = 10; + char list[PATH_MAX] = {}; + + for (i = 0; i < num_xattrs; i++) { + snprintf(xattr_name, sizeof(xattr_name), "trusted.testattr%d", i); + snprintf(xattr_value, sizeof(xattr_value), "testvalue%d", i); + ret = fsetxattr(self->child_pidfd, xattr_name, xattr_value, strlen(xattr_value), 0); + ASSERT_EQ(ret, 0); + } + + for (i = 0; i < num_xattrs; i++) { + snprintf(xattr_name, sizeof(xattr_name), "trusted.testattr%d", i); + snprintf(xattr_value, sizeof(xattr_value), "testvalue%d", i); + memset(buf, 0, sizeof(buf)); + ret = fgetxattr(self->child_pidfd, xattr_name, buf, sizeof(buf)); + ASSERT_EQ(ret, strlen(xattr_value)); + ASSERT_EQ(strcmp(buf, xattr_value), 0); + } + + ret = flistxattr(self->child_pidfd, list, sizeof(list)); + ASSERT_GT(ret, 0); + for (i = 0; i < num_xattrs; i++) { + snprintf(xattr_name, sizeof(xattr_name), "trusted.testattr%d", i); + bool found = false; + for (char *it = list; it < list + ret; it += strlen(it) + 1) { + if (strcmp(it, xattr_name)) + continue; + found = true; + break; + } + ASSERT_TRUE(found); + } + + for (i = 0; i < num_xattrs; i++) { + snprintf(xattr_name, sizeof(xattr_name), "trusted.testattr%d", i); + ret = fremovexattr(self->child_pidfd, xattr_name); + ASSERT_EQ(ret, 0); + + ret = fgetxattr(self->child_pidfd, xattr_name, buf, sizeof(buf)); + ASSERT_EQ(ret, -1); + ASSERT_EQ(errno, ENODATA); + } +} + +TEST_F(pidfs_xattr, set_get_list_xattr_persistent) +{ + int ret; + char buf[32]; + char list[PATH_MAX] = {}; + + ret = fsetxattr(self->child_pidfd, "trusted.persistent", "persistent value", strlen("persistent value"), 0); + ASSERT_EQ(ret, 0); + + memset(buf, 0, sizeof(buf)); + ret = fgetxattr(self->child_pidfd, "trusted.persistent", buf, sizeof(buf)); + ASSERT_EQ(ret, strlen("persistent value")); + ASSERT_EQ(strcmp(buf, "persistent value"), 0); + + ret = flistxattr(self->child_pidfd, list, sizeof(list)); + ASSERT_GT(ret, 0); + ASSERT_EQ(strcmp(list, "trusted.persistent"), 0) + + ASSERT_EQ(close(self->child_pidfd), 0); + self->child_pidfd = -EBADF; + sleep(2); + + self->child_pidfd = sys_pidfd_open(self->child_pid, 0); + ASSERT_GE(self->child_pidfd, 0); + + memset(buf, 0, sizeof(buf)); + ret = fgetxattr(self->child_pidfd, "trusted.persistent", buf, sizeof(buf)); + ASSERT_EQ(ret, strlen("persistent value")); + ASSERT_EQ(strcmp(buf, "persistent value"), 0); + + ret = flistxattr(self->child_pidfd, list, sizeof(list)); + ASSERT_GT(ret, 0); + ASSERT_EQ(strcmp(list, "trusted.persistent"), 0); +} + +TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/ptp/testptp.c b/tools/testing/selftests/ptp/testptp.c index edc08a4433fd..ed1e2886ba3c 100644 --- a/tools/testing/selftests/ptp/testptp.c +++ b/tools/testing/selftests/ptp/testptp.c @@ -120,6 +120,7 @@ static void usage(char *progname) " -c query the ptp clock's capabilities\n" " -d name device to open\n" " -e val read 'val' external time stamp events\n" + " -E val enable rising (1), falling (2), or both (3) edges\n" " -f val adjust the ptp clock frequency by 'val' ppb\n" " -F chan Enable single channel mask and keep device open for debugfs verification.\n" " -g get the ptp clock time\n" @@ -178,6 +179,7 @@ int main(int argc, char *argv[]) int adjphase = 0; int capabilities = 0; int extts = 0; + int edge = 0; int flagtest = 0; int gettime = 0; int index = 0; @@ -202,7 +204,7 @@ int main(int argc, char *argv[]) progname = strrchr(argv[0], '/'); progname = progname ? 1+progname : argv[0]; - while (EOF != (c = getopt(argc, argv, "cd:e:f:F:ghH:i:k:lL:n:o:p:P:rsSt:T:w:x:Xy:z"))) { + while (EOF != (c = getopt(argc, argv, "cd:e:E:f:F:ghH:i:k:lL:n:o:p:P:rsSt:T:w:x:Xy:z"))) { switch (c) { case 'c': capabilities = 1; @@ -213,6 +215,11 @@ int main(int argc, char *argv[]) case 'e': extts = atoi(optarg); break; + case 'E': + edge = atoi(optarg); + edge = (edge & 1 ? PTP_RISING_EDGE : 0) | + (edge & 2 ? PTP_FALLING_EDGE : 0); + break; case 'f': adjfreq = atoi(optarg); break; @@ -444,7 +451,7 @@ int main(int argc, char *argv[]) if (!readonly) { memset(&extts_request, 0, sizeof(extts_request)); extts_request.index = index; - extts_request.flags = PTP_ENABLE_FEATURE; + extts_request.flags = PTP_ENABLE_FEATURE | edge; if (ioctl(fd, PTP_EXTTS_REQUEST, &extts_request)) { perror("PTP_EXTTS_REQUEST"); extts = 0; diff --git a/tools/testing/selftests/ptrace/peeksiginfo.c b/tools/testing/selftests/ptrace/peeksiginfo.c index a6884f66dc01..2f345d11e4b8 100644 --- a/tools/testing/selftests/ptrace/peeksiginfo.c +++ b/tools/testing/selftests/ptrace/peeksiginfo.c @@ -199,7 +199,7 @@ int main(int argc, char *argv[]) /* * Dump signal from the process-wide queue. - * The number of signals is not multible to the buffer size + * The number of signals is not multiple to the buffer size */ if (check_direct_path(child, 1, 3)) goto out; diff --git a/tools/testing/selftests/rcutorture/bin/kvm-build.sh b/tools/testing/selftests/rcutorture/bin/kvm-build.sh index 11f8d232b0ee..3edfd064ef81 100755 --- a/tools/testing/selftests/rcutorture/bin/kvm-build.sh +++ b/tools/testing/selftests/rcutorture/bin/kvm-build.sh @@ -44,7 +44,7 @@ fi ncpus="`getconf _NPROCESSORS_ONLN`" make -j$((2 * ncpus)) $TORTURE_KMAKE_ARG > $resdir/Make.out 2>&1 retval=$? -if test $retval -ne 0 || grep "rcu[^/]*": < $resdir/Make.out | grep -E -q "Stop|Error|error:|warning:" || grep -E -q "Stop|Error|error:" < $resdir/Make.out +if test $retval -ne 0 || grep "rcu[^/]*": < $resdir/Make.out | grep -E -q "Stop|ERROR|Error|error:|warning:" || grep -E -q "Stop|ERROR|Error|error:" < $resdir/Make.out then echo Kernel build error grep -E "Stop|Error|error:|warning:" < $resdir/Make.out diff --git a/tools/testing/selftests/rcutorture/bin/kvm.sh b/tools/testing/selftests/rcutorture/bin/kvm.sh index 42e5e8597a1a..617cba339d28 100755 --- a/tools/testing/selftests/rcutorture/bin/kvm.sh +++ b/tools/testing/selftests/rcutorture/bin/kvm.sh @@ -199,7 +199,7 @@ do fi ;; --kconfig|--kconfigs) - checkarg --kconfig "(Kconfig options)" $# "$2" '^\(#CHECK#\)\?CONFIG_[A-Z0-9_]\+=\([ynm]\|[0-9]\+\|"[^"]*"\)\( \(#CHECK#\)\?CONFIG_[A-Z0-9_]\+=\([ynm]\|[0-9]\+\|"[^"]*"\)\)*$' '^error$' + checkarg --kconfig "(Kconfig options)" $# "$2" '^\(#CHECK#\)\?CONFIG_[A-Z0-9_]\+=\([ynm]\|[0-9]\+\|"[^"]*"\)\( \+\(#CHECK#\)\?CONFIG_[A-Z0-9_]\+=\([ynm]\|[0-9]\+\|"[^"]*"\)\)* *$' '^error$' TORTURE_KCONFIG_ARG="`echo "$TORTURE_KCONFIG_ARG $2" | sed -e 's/^ *//' -e 's/ *$//'`" shift ;; @@ -442,18 +442,7 @@ echo $scriptname $args touch $resdir/$ds/log echo $scriptname $args >> $resdir/$ds/log echo ${TORTURE_SUITE} > $resdir/$ds/torture_suite -echo Build directory: `pwd` > $resdir/$ds/testid.txt -if test -d .git -then - echo Current commit: `git rev-parse HEAD` >> $resdir/$ds/testid.txt - echo >> $resdir/$ds/testid.txt - echo ' ---' Output of "'"git status"'": >> $resdir/$ds/testid.txt - git status >> $resdir/$ds/testid.txt - echo >> $resdir/$ds/testid.txt - echo >> $resdir/$ds/testid.txt - echo ' ---' Output of "'"git diff HEAD"'": >> $resdir/$ds/testid.txt - git diff HEAD >> $resdir/$ds/testid.txt -fi +mktestid.sh $resdir/$ds ___EOF___ kvm-assign-cpus.sh /sys/devices/system/node > $T/cpuarray.awk kvm-get-cpus-script.sh $T/cpuarray.awk $T/dumpbatches.awk diff --git a/tools/testing/selftests/rcutorture/bin/mktestid.sh b/tools/testing/selftests/rcutorture/bin/mktestid.sh new file mode 100755 index 000000000000..16f9907a4dae --- /dev/null +++ b/tools/testing/selftests/rcutorture/bin/mktestid.sh @@ -0,0 +1,29 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0+ +# +# Create a testid.txt file in the specified directory. +# +# Usage: mktestid.sh dirpath +# +# Copyright (C) Meta Platforms, Inc. 2025 +# +# Author: Paul E. McKenney <paulmck@kernel.org> + +resdir="$1" +if test -z "${resdir}" || ! test -d "${resdir}" || ! test -w "${resdir}" +then + echo Path '"'${resdir}'"' not writeable directory, no ${resdir}/testid.txt. + exit 1 +fi +echo Build directory: `pwd` > ${resdir}/testid.txt +if test -d .git +then + echo Current commit: `git rev-parse HEAD` >> ${resdir}/testid.txt + echo >> ${resdir}/testid.txt + echo ' ---' Output of "'"git status"'": >> ${resdir}/testid.txt + git status >> ${resdir}/testid.txt + echo >> ${resdir}/testid.txt + echo >> ${resdir}/testid.txt + echo ' ---' Output of "'"git diff HEAD"'": >> ${resdir}/testid.txt + git diff HEAD >> ${resdir}/testid.txt +fi diff --git a/tools/testing/selftests/rcutorture/bin/torture.sh b/tools/testing/selftests/rcutorture/bin/torture.sh index e03fdaca89b3..611bc03a8dc7 100755 --- a/tools/testing/selftests/rcutorture/bin/torture.sh +++ b/tools/testing/selftests/rcutorture/bin/torture.sh @@ -30,6 +30,15 @@ then VERBOSE_BATCH_CPUS=0 fi +# Machine architecture? ("uname -p" is said to be less portable.)1 +thisarch="`uname -m`" +if test "${thisarch}" = aarch64 +then + ifnotaarch64=no +else + ifnotaarch64=yes +fi + # Configurations/scenarios. configs_rcutorture= configs_locktorture= @@ -55,9 +64,9 @@ do_normal=yes explicit_normal=no do_kasan=yes do_kcsan=no -do_clocksourcewd=yes +do_clocksourcewd="${ifnotaarch64}" do_rt=yes -do_rcutasksflavors=yes +do_rcutasksflavors="${ifnotaarch64}" # FIXME: Back to "yes" when SMP=n auto-avoided do_srcu_lockdep=yes do_rcu_rust=no @@ -124,7 +133,7 @@ do ;; --do-all|--doall) do_allmodconfig=yes - do_rcutasksflavor=yes + do_rcutasksflavors="${ifnotaarch64}" # FIXME: Back to "yes" when SMP=n auto-avoided do_rcutorture=yes do_locktorture=yes do_scftorture=yes @@ -136,7 +145,7 @@ do explicit_normal=no do_kasan=yes do_kcsan=yes - do_clocksourcewd=yes + do_clocksourcewd="${ifnotaarch64}" do_srcu_lockdep=yes ;; --do-allmodconfig|--do-no-allmodconfig|--no-allmodconfig) @@ -274,7 +283,7 @@ then configs_rcutorture=CFLIST fi duration_rcutorture=$((duration_base*duration_rcutorture_frac/10)) -if test "$duration_rcutorture" -eq 0 +if test "$duration_rcutorture" -eq 0 && test "$do_locktorture" = "yes" then echo " --- Zero time for rcutorture, disabling" | tee -a $T/log do_rcutorture=no @@ -286,7 +295,7 @@ then configs_locktorture=CFLIST fi duration_locktorture=$((duration_base*duration_locktorture_frac/10)) -if test "$duration_locktorture" -eq 0 +if test "$duration_locktorture" -eq 0 && test "$do_locktorture" = "yes" then echo " --- Zero time for locktorture, disabling" | tee -a $T/log do_locktorture=no @@ -298,12 +307,19 @@ then configs_scftorture=CFLIST fi duration_scftorture=$((duration_base*duration_scftorture_frac/10)) -if test "$duration_scftorture" -eq 0 +if test "$duration_scftorture" -eq 0 && test "$do_scftorture" = "yes" then echo " --- Zero time for scftorture, disabling" | tee -a $T/log do_scftorture=no fi +# CONFIG_EXPERT=y is currently required for arm64 KCSAN runs. +kcsan_expert= +if test "${thisarch}" = aarch64 +then + kcsan_expert="CONFIG_EXPERT=y" +fi + touch $T/failures touch $T/successes @@ -362,13 +378,19 @@ function torture_set { then curflavor=$flavor torture_one "$@" - mv $T/last-resdir $T/last-resdir-nodebug || : + if test -e $T/last-resdir + then + mv $T/last-resdir $T/last-resdir-nodebug || : + fi fi if test "$do_kasan" = "yes" then curflavor=${flavor}-kasan torture_one "$@" --kasan - mv $T/last-resdir $T/last-resdir-kasan || : + if test -e $T/last-resdir + then + mv $T/last-resdir $T/last-resdir-kasan || : + fi fi if test "$do_kcsan" = "yes" then @@ -378,8 +400,16 @@ function torture_set { kcsan_kmake_tag="--kmake-args" cur_kcsan_kmake_args="$kcsan_kmake_args" fi - torture_one "$@" --kconfig "CONFIG_DEBUG_LOCK_ALLOC=y CONFIG_PROVE_LOCKING=y" $kcsan_kmake_tag $cur_kcsan_kmake_args --kcsan - mv $T/last-resdir $T/last-resdir-kcsan || : + chk_rdr_state= + if test "${flavor}" = rcutorture + then + chk_rdr_state="CONFIG_RCU_TORTURE_TEST_CHK_RDR_STATE=y" + fi + torture_one "$@" --kconfig "CONFIG_DEBUG_LOCK_ALLOC=y CONFIG_PROVE_LOCKING=y ${kcsan_expert} ${chk_rdr_state}" $kcsan_kmake_tag $cur_kcsan_kmake_args --kcsan + if test -e $T/last-resdir + then + mv $T/last-resdir $T/last-resdir-kcsan || : + fi fi } @@ -389,6 +419,7 @@ then echo " --- allmodconfig:" Start `date` | tee -a $T/log amcdir="tools/testing/selftests/rcutorture/res/$ds/allmodconfig" mkdir -p "$amcdir" + mktestid.sh "$amcdir" echo " --- make clean" | tee $amcdir/log > "$amcdir/Make.out" 2>&1 make -j$MAKE_ALLOTED_CPUS clean >> "$amcdir/Make.out" 2>&1 retcode=$? @@ -407,6 +438,10 @@ then make -j$MAKE_ALLOTED_CPUS >> "$amcdir/Make.out" 2>&1 retcode="$?" echo $retcode > "$amcdir/Make.exitcode" + if grep -E -q "Stop|ERROR|Error|error:|warning:" < "$amcdir/Make.out" + then + retcode=99 + fi buildphase='"make"' fi if test "$retcode" -eq 0 @@ -495,6 +530,7 @@ then echo " --- do-rcu-rust:" Start `date` | tee -a $T/log rrdir="tools/testing/selftests/rcutorture/res/$ds/results-rcu-rust" mkdir -p "$rrdir" + mktestid.sh "$rrdir" echo " --- make LLVM=1 rustavailable " | tee -a $rrdir/log > $rrdir/rustavailable.out make LLVM=1 rustavailable > $T/rustavailable.out 2>&1 retcode=$? @@ -681,7 +717,14 @@ nfailures=0 echo FAILURES: | tee -a $T/log if test -s "$T/failures" then - awk < "$T/failures" -v sq="'" '{ print "echo " sq $0 sq; print "sed -e " sq "1,/^ --- .* Test summary:$/d" sq " " $2 "/log | grep Summary: | sed -e " sq "s/^[^S]*/ /" sq; }' | sh | tee -a $T/log | tee "$T/failuresum" + awk < "$T/failures" -v sq="'" ' + { + print "echo " sq $0 sq; + if ($2 != "") + print "sed -e " sq "1,/^ --- .* Test summary:$/d" sq " " $2 "/log | grep Summary: | sed -e " sq "s/^[^S]*/ /" sq; + else + print "echo " sq " " sq "Run failed to produce results directory."; + }' | sh | tee -a $T/log | tee "$T/failuresum" nfailures="`wc -l "$T/failures" | awk '{ print $1 }'`" grep "^ Summary: " "$T/failuresum" | grep -v '^ Summary: Bugs: [0-9]* (all bugs kcsan)$' > "$T/nonkcsan" @@ -691,15 +734,18 @@ then fi ret=2 fi -if test "$do_kcsan" = "yes" +if test "$do_kcsan" = "yes" && test -e tools/testing/selftests/rcutorture/res/$ds then TORTURE_KCONFIG_KCSAN_ARG=1 tools/testing/selftests/rcutorture/bin/kcsan-collapse.sh tools/testing/selftests/rcutorture/res/$ds > tools/testing/selftests/rcutorture/res/$ds/kcsan.sum fi echo Started at $startdate, ended at `date`, duration `get_starttime_duration $starttime`. | tee -a $T/log echo Summary: Successes: $nsuccesses Failures: $nfailures. | tee -a $T/log -tdir="`cat $T/successes $T/failures | head -1 | awk '{ print $NF }' | sed -e 's,/[^/]\+/*$,,'`" -find "$tdir" -name 'ConfigFragment.diags' -print > $T/configerrors -find "$tdir" -name 'Make.out.diags' -print > $T/builderrors +tdir="`cat $T/successes $T/failures | awk 'NF > 1 { print $NF }' | head -1 | sed -e 's,/[^/]\+/*$,,'`" +if test -n "$tdir" +then + find "$tdir" -name 'ConfigFragment.diags' -print > $T/configerrors + find "$tdir" -name 'Make.out.diags' -print > $T/builderrors +fi if test -s "$T/configerrors" then echo " Scenarios with .config errors: `wc -l "$T/configerrors" | awk '{ print $1 }'`" diff --git a/tools/testing/selftests/rcutorture/configs/rcu/BUSTED b/tools/testing/selftests/rcutorture/configs/rcu/BUSTED index 48d8a245c7fa..7d75f4b94943 100644 --- a/tools/testing/selftests/rcutorture/configs/rcu/BUSTED +++ b/tools/testing/selftests/rcutorture/configs/rcu/BUSTED @@ -5,3 +5,6 @@ CONFIG_HOTPLUG_CPU=y CONFIG_PREEMPT_NONE=n CONFIG_PREEMPT_VOLUNTARY=n CONFIG_PREEMPT=y +CONFIG_RCU_TORTURE_TEST_CHK_RDR_STATE=y +CONFIG_RCU_TORTURE_TEST_LOG_CPU=y +CONFIG_RCU_TORTURE_TEST_LOG_GP=y diff --git a/tools/testing/selftests/rcutorture/configs/rcu/CFLIST b/tools/testing/selftests/rcutorture/configs/rcu/CFLIST index 45f572570a8c..98b6175e5aa0 100644 --- a/tools/testing/selftests/rcutorture/configs/rcu/CFLIST +++ b/tools/testing/selftests/rcutorture/configs/rcu/CFLIST @@ -5,7 +5,6 @@ TREE04 TREE05 TREE07 TREE09 -SRCU-L SRCU-N SRCU-P SRCU-T diff --git a/tools/testing/selftests/rcutorture/configs/rcu/SRCU-L b/tools/testing/selftests/rcutorture/configs/rcu/SRCU-L deleted file mode 100644 index 3b4fa8dbef8a..000000000000 --- a/tools/testing/selftests/rcutorture/configs/rcu/SRCU-L +++ /dev/null @@ -1,10 +0,0 @@ -CONFIG_RCU_TRACE=n -CONFIG_SMP=y -CONFIG_NR_CPUS=6 -CONFIG_HOTPLUG_CPU=y -CONFIG_PREEMPT_NONE=y -CONFIG_PREEMPT_VOLUNTARY=n -CONFIG_PREEMPT=n -#CHECK#CONFIG_RCU_EXPERT=n -CONFIG_KPROBES=n -CONFIG_FTRACE=n diff --git a/tools/testing/selftests/rcutorture/configs/rcu/SRCU-L.boot b/tools/testing/selftests/rcutorture/configs/rcu/SRCU-L.boot deleted file mode 100644 index 0207b3138c5b..000000000000 --- a/tools/testing/selftests/rcutorture/configs/rcu/SRCU-L.boot +++ /dev/null @@ -1,3 +0,0 @@ -rcutorture.torture_type=srcu -rcutorture.reader_flavor=0x4 -rcutorture.fwd_progress=3 diff --git a/tools/testing/selftests/sched_ext/exit.c b/tools/testing/selftests/sched_ext/exit.c index 9451782689de..ee25824b1cbe 100644 --- a/tools/testing/selftests/sched_ext/exit.c +++ b/tools/testing/selftests/sched_ext/exit.c @@ -22,6 +22,14 @@ static enum scx_test_status run(void *ctx) struct bpf_link *link; char buf[16]; + /* + * On single-CPU systems, ops.select_cpu() is never + * invoked, so skip this test to avoid getting stuck + * indefinitely. + */ + if (tc == EXIT_SELECT_CPU && libbpf_num_possible_cpus() == 1) + continue; + skel = exit__open(); SCX_ENUM_INIT(skel); skel->rodata->exit_point = tc; diff --git a/tools/testing/selftests/syscall_user_dispatch/sud_test.c b/tools/testing/selftests/syscall_user_dispatch/sud_test.c index d975a6767329..2eb2c06303f2 100644 --- a/tools/testing/selftests/syscall_user_dispatch/sud_test.c +++ b/tools/testing/selftests/syscall_user_dispatch/sud_test.c @@ -10,6 +10,8 @@ #include <sys/sysinfo.h> #include <sys/syscall.h> #include <signal.h> +#include <stdbool.h> +#include <stdlib.h> #include <asm/unistd.h> #include "../kselftest_harness.h" @@ -17,11 +19,15 @@ #ifndef PR_SET_SYSCALL_USER_DISPATCH # define PR_SET_SYSCALL_USER_DISPATCH 59 # define PR_SYS_DISPATCH_OFF 0 -# define PR_SYS_DISPATCH_ON 1 # define SYSCALL_DISPATCH_FILTER_ALLOW 0 # define SYSCALL_DISPATCH_FILTER_BLOCK 1 #endif +#ifndef PR_SYS_DISPATCH_EXCLUSIVE_ON +# define PR_SYS_DISPATCH_EXCLUSIVE_ON 1 +# define PR_SYS_DISPATCH_INCLUSIVE_ON 2 +#endif + #ifndef SYS_USER_DISPATCH # define SYS_USER_DISPATCH 2 #endif @@ -65,7 +71,7 @@ TEST_SIGNAL(dispatch_trigger_sigsys, SIGSYS) ret = sysinfo(&info); ASSERT_EQ(0, ret); - ret = prctl(PR_SET_SYSCALL_USER_DISPATCH, PR_SYS_DISPATCH_ON, 0, 0, &sel); + ret = prctl(PR_SET_SYSCALL_USER_DISPATCH, PR_SYS_DISPATCH_EXCLUSIVE_ON, 0, 0, &sel); ASSERT_EQ(0, ret) { TH_LOG("Kernel does not support CONFIG_SYSCALL_USER_DISPATCH"); } @@ -79,6 +85,21 @@ TEST_SIGNAL(dispatch_trigger_sigsys, SIGSYS) } } +static void prctl_valid(struct __test_metadata *_metadata, + unsigned long op, unsigned long off, + unsigned long size, void *sel) +{ + EXPECT_EQ(0, prctl(PR_SET_SYSCALL_USER_DISPATCH, op, off, size, sel)); +} + +static void prctl_invalid(struct __test_metadata *_metadata, + unsigned long op, unsigned long off, + unsigned long size, void *sel, int err) +{ + EXPECT_EQ(-1, prctl(PR_SET_SYSCALL_USER_DISPATCH, op, off, size, sel)); + EXPECT_EQ(err, errno); +} + TEST(bad_prctl_param) { char sel = SYSCALL_DISPATCH_FILTER_ALLOW; @@ -86,57 +107,54 @@ TEST(bad_prctl_param) /* Invalid op */ op = -1; - prctl(PR_SET_SYSCALL_USER_DISPATCH, op, 0, 0, &sel); - ASSERT_EQ(EINVAL, errno); + prctl_invalid(_metadata, op, 0, 0, &sel, EINVAL); /* PR_SYS_DISPATCH_OFF */ op = PR_SYS_DISPATCH_OFF; /* offset != 0 */ - prctl(PR_SET_SYSCALL_USER_DISPATCH, op, 0x1, 0x0, 0); - EXPECT_EQ(EINVAL, errno); + prctl_invalid(_metadata, op, 0x1, 0x0, 0, EINVAL); /* len != 0 */ - prctl(PR_SET_SYSCALL_USER_DISPATCH, op, 0x0, 0xff, 0); - EXPECT_EQ(EINVAL, errno); + prctl_invalid(_metadata, op, 0x0, 0xff, 0, EINVAL); /* sel != NULL */ - prctl(PR_SET_SYSCALL_USER_DISPATCH, op, 0x0, 0x0, &sel); - EXPECT_EQ(EINVAL, errno); + prctl_invalid(_metadata, op, 0x0, 0x0, &sel, EINVAL); /* Valid parameter */ - errno = 0; - prctl(PR_SET_SYSCALL_USER_DISPATCH, op, 0x0, 0x0, 0x0); - EXPECT_EQ(0, errno); + prctl_valid(_metadata, op, 0x0, 0x0, 0x0); - /* PR_SYS_DISPATCH_ON */ - op = PR_SYS_DISPATCH_ON; + /* PR_SYS_DISPATCH_EXCLUSIVE_ON */ + op = PR_SYS_DISPATCH_EXCLUSIVE_ON; /* Dispatcher region is bad (offset > 0 && len == 0) */ - prctl(PR_SET_SYSCALL_USER_DISPATCH, op, 0x1, 0x0, &sel); - EXPECT_EQ(EINVAL, errno); - prctl(PR_SET_SYSCALL_USER_DISPATCH, op, -1L, 0x0, &sel); - EXPECT_EQ(EINVAL, errno); + prctl_invalid(_metadata, op, 0x1, 0x0, &sel, EINVAL); + prctl_invalid(_metadata, op, -1L, 0x0, &sel, EINVAL); /* Invalid selector */ - prctl(PR_SET_SYSCALL_USER_DISPATCH, op, 0x0, 0x1, (void *) -1); - ASSERT_EQ(EFAULT, errno); + prctl_invalid(_metadata, op, 0x0, 0x1, (void *) -1, EFAULT); /* * Dispatcher range overflows unsigned long */ - prctl(PR_SET_SYSCALL_USER_DISPATCH, PR_SYS_DISPATCH_ON, 1, -1L, &sel); - ASSERT_EQ(EINVAL, errno) { - TH_LOG("Should reject bad syscall range"); - } + prctl_invalid(_metadata, PR_SYS_DISPATCH_EXCLUSIVE_ON, 1, -1L, &sel, EINVAL); /* * Allowed range overflows usigned long */ - prctl(PR_SET_SYSCALL_USER_DISPATCH, PR_SYS_DISPATCH_ON, -1L, 0x1, &sel); - ASSERT_EQ(EINVAL, errno) { - TH_LOG("Should reject bad syscall range"); - } + prctl_invalid(_metadata, PR_SYS_DISPATCH_EXCLUSIVE_ON, -1L, 0x1, &sel, EINVAL); + + /* 0 len should fail for PR_SYS_DISPATCH_INCLUSIVE_ON */ + prctl_invalid(_metadata, PR_SYS_DISPATCH_INCLUSIVE_ON, 1, 0, 0, EINVAL); + + /* Range wrap-around should fail */ + prctl_invalid(_metadata, PR_SYS_DISPATCH_INCLUSIVE_ON, -1L, 2, 0, EINVAL); + + /* Normal range shouldn't fail */ + prctl_valid(_metadata, PR_SYS_DISPATCH_INCLUSIVE_ON, 2, 3, 0); + + /* Invalid selector */ + prctl_invalid(_metadata, PR_SYS_DISPATCH_INCLUSIVE_ON, 2, 3, (void *) -1, EFAULT); } /* @@ -147,11 +165,13 @@ char glob_sel; int nr_syscalls_emulated; int si_code; int si_errno; +unsigned long syscall_addr; static void handle_sigsys(int sig, siginfo_t *info, void *ucontext) { si_code = info->si_code; si_errno = info->si_errno; + syscall_addr = (unsigned long)info->si_call_addr; if (info->si_syscall == MAGIC_SYSCALL_1) nr_syscalls_emulated++; @@ -174,31 +194,34 @@ static void handle_sigsys(int sig, siginfo_t *info, void *ucontext) #endif } -TEST(dispatch_and_return) +int setup_sigsys_handler(void) { - long ret; struct sigaction act; sigset_t mask; - glob_sel = 0; - nr_syscalls_emulated = 0; - si_code = 0; - si_errno = 0; - memset(&act, 0, sizeof(act)); sigemptyset(&mask); - act.sa_sigaction = handle_sigsys; act.sa_flags = SA_SIGINFO; act.sa_mask = mask; + return sigaction(SIGSYS, &act, NULL); +} - ret = sigaction(SIGSYS, &act, NULL); - ASSERT_EQ(0, ret); +TEST(dispatch_and_return) +{ + long ret; + + glob_sel = 0; + nr_syscalls_emulated = 0; + si_code = 0; + si_errno = 0; + + ASSERT_EQ(0, setup_sigsys_handler()); /* Make sure selector is good prior to prctl. */ SYSCALL_DISPATCH_OFF(glob_sel); - ret = prctl(PR_SET_SYSCALL_USER_DISPATCH, PR_SYS_DISPATCH_ON, 0, 0, &glob_sel); + ret = prctl(PR_SET_SYSCALL_USER_DISPATCH, PR_SYS_DISPATCH_EXCLUSIVE_ON, 0, 0, &glob_sel); ASSERT_EQ(0, ret) { TH_LOG("Kernel does not support CONFIG_SYSCALL_USER_DISPATCH"); } @@ -254,7 +277,7 @@ TEST_SIGNAL(bad_selector, SIGSYS) /* Make sure selector is good prior to prctl. */ SYSCALL_DISPATCH_OFF(glob_sel); - ret = prctl(PR_SET_SYSCALL_USER_DISPATCH, PR_SYS_DISPATCH_ON, 0, 0, &glob_sel); + ret = prctl(PR_SET_SYSCALL_USER_DISPATCH, PR_SYS_DISPATCH_EXCLUSIVE_ON, 0, 0, &glob_sel); ASSERT_EQ(0, ret) { TH_LOG("Kernel does not support CONFIG_SYSCALL_USER_DISPATCH"); } @@ -278,7 +301,7 @@ TEST(disable_dispatch) struct sysinfo info; char sel = 0; - ret = prctl(PR_SET_SYSCALL_USER_DISPATCH, PR_SYS_DISPATCH_ON, 0, 0, &sel); + ret = prctl(PR_SET_SYSCALL_USER_DISPATCH, PR_SYS_DISPATCH_EXCLUSIVE_ON, 0, 0, &sel); ASSERT_EQ(0, ret) { TH_LOG("Kernel does not support CONFIG_SYSCALL_USER_DISPATCH"); } @@ -310,7 +333,7 @@ TEST(direct_dispatch_range) * Instead of calculating libc addresses; allow the entire * memory map and lock the selector. */ - ret = prctl(PR_SET_SYSCALL_USER_DISPATCH, PR_SYS_DISPATCH_ON, 0, -1L, &sel); + ret = prctl(PR_SET_SYSCALL_USER_DISPATCH, PR_SYS_DISPATCH_EXCLUSIVE_ON, 0, -1L, &sel); ASSERT_EQ(0, ret) { TH_LOG("Kernel does not support CONFIG_SYSCALL_USER_DISPATCH"); } @@ -323,4 +346,35 @@ TEST(direct_dispatch_range) } } +static void test_range(struct __test_metadata *_metadata, + unsigned long op, unsigned long off, + unsigned long size, bool dispatch) +{ + nr_syscalls_emulated = 0; + SYSCALL_DISPATCH_OFF(glob_sel); + EXPECT_EQ(0, prctl(PR_SET_SYSCALL_USER_DISPATCH, op, off, size, &glob_sel)); + SYSCALL_DISPATCH_ON(glob_sel); + if (dispatch) { + EXPECT_EQ(syscall(MAGIC_SYSCALL_1), MAGIC_SYSCALL_1); + EXPECT_EQ(nr_syscalls_emulated, 1); + } else { + EXPECT_EQ(syscall(MAGIC_SYSCALL_1), -1); + EXPECT_EQ(nr_syscalls_emulated, 0); + } +} + +TEST(dispatch_range) +{ + ASSERT_EQ(0, setup_sigsys_handler()); + test_range(_metadata, PR_SYS_DISPATCH_EXCLUSIVE_ON, 0, 0, true); + test_range(_metadata, PR_SYS_DISPATCH_EXCLUSIVE_ON, syscall_addr, 1, false); + test_range(_metadata, PR_SYS_DISPATCH_EXCLUSIVE_ON, syscall_addr-100, 200, false); + test_range(_metadata, PR_SYS_DISPATCH_EXCLUSIVE_ON, syscall_addr+1, 100, true); + test_range(_metadata, PR_SYS_DISPATCH_EXCLUSIVE_ON, syscall_addr-100, 100, true); + test_range(_metadata, PR_SYS_DISPATCH_INCLUSIVE_ON, syscall_addr, 1, true); + test_range(_metadata, PR_SYS_DISPATCH_INCLUSIVE_ON, syscall_addr-1, 1, false); + test_range(_metadata, PR_SYS_DISPATCH_INCLUSIVE_ON, syscall_addr+1, 1, false); + SYSCALL_DISPATCH_OFF(glob_sel); +} + TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/tc-testing/config b/tools/testing/selftests/tc-testing/config index db176fe7d0c3..c20aa16b1d63 100644 --- a/tools/testing/selftests/tc-testing/config +++ b/tools/testing/selftests/tc-testing/config @@ -21,6 +21,7 @@ CONFIG_NF_NAT=m CONFIG_NETFILTER_XT_TARGET_LOG=m CONFIG_NET_SCHED=y +CONFIG_IP_SET=m # # Queueing/Scheduling @@ -30,6 +31,7 @@ CONFIG_NET_SCH_CBS=m CONFIG_NET_SCH_CHOKE=m CONFIG_NET_SCH_CODEL=m CONFIG_NET_SCH_DRR=m +CONFIG_NET_SCH_DUALPI2=m CONFIG_NET_SCH_ETF=m CONFIG_NET_SCH_FQ=m CONFIG_NET_SCH_FQ_CODEL=m diff --git a/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json b/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json index 9aa44d8176d9..23a61e5b99d0 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json +++ b/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json @@ -128,6 +128,32 @@ ] }, { + "id": "5456", + "name": "Test htb_dequeue_tree with deactivation and row emptying", + "category": [ + "qdisc", + "htb" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "$IP link set dev $DUMMY up || true", + "$IP addr add 10.10.11.10/24 dev $DUMMY || true", + "$TC qdisc add dev $DUMMY root handle 1: htb default 1", + "$TC class add dev $DUMMY parent 1: classid 1:1 htb rate 64bit ", + "$TC qdisc add dev $DUMMY parent 1:1 handle 2: netem", + "$TC qdisc add dev $DUMMY parent 2:1 handle 3: blackhole" + ], + "cmdUnderTest": "ping -c1 -W0.01 -I $DUMMY 10.10.11.11", + "expExitCode": "1", + "verifyCmd": "$TC -j qdisc show dev $DUMMY", + "matchJSON": [], + "teardown": [ + "$TC qdisc del dev $DUMMY root" + ] + }, + { "id": "c024", "name": "Test TBF with SKBPRIO - catch qlen corner cases", "category": [ @@ -478,7 +504,6 @@ "$TC qdisc add dev $DUMMY parent 1:1 handle 2:0 netem duplicate 100%", "$TC filter add dev $DUMMY parent 1:0 protocol ip prio 1 u32 match ip dst 10.10.10.1/32 flowid 1:1", "$TC class add dev $DUMMY parent 1:0 classid 1:2 hfsc ls m2 10Mbit", - "$TC qdisc add dev $DUMMY parent 1:2 handle 3:0 netem duplicate 100%", "$TC filter add dev $DUMMY parent 1:0 protocol ip prio 2 u32 match ip dst 10.10.10.2/32 flowid 1:2", "ping -c 1 10.10.10.1 -I$DUMMY > /dev/null || true", "$TC filter del dev $DUMMY parent 1:0 protocol ip prio 1", @@ -491,8 +516,8 @@ { "kind": "hfsc", "handle": "1:", - "bytes": 392, - "packets": 4 + "bytes": 294, + "packets": 3 } ], "matchCount": "1", @@ -635,5 +660,108 @@ "$TC qdisc del dev $DUMMY handle 1:0 root", "$IP addr del 10.10.10.10/24 dev $DUMMY || true" ] + }, + { + "id": "d74b", + "name": "Test use-after-free with DRR/NETEM/BLACKHOLE chain", + "category": [ + "qdisc", + "hfsc", + "drr", + "netem", + "blackhole" + ], + "plugins": { + "requires": [ + "nsPlugin", + "scapyPlugin" + ] + }, + "setup": [ + "$IP link set dev $DUMMY up || true", + "$IP addr add 10.10.11.10/24 dev $DUMMY || true", + "$TC qdisc add dev $DUMMY root handle 1: drr", + "$TC filter add dev $DUMMY parent 1: basic classid 1:1", + "$TC class add dev $DUMMY parent 1: classid 1:1 drr", + "$TC qdisc add dev $DUMMY parent 1:1 handle 2: hfsc def 1", + "$TC class add dev $DUMMY parent 2: classid 2:1 hfsc rt m1 8 d 1 m2 0", + "$TC qdisc add dev $DUMMY parent 2:1 handle 3: netem", + "$TC qdisc add dev $DUMMY parent 3:1 handle 4: blackhole", + "ping -c1 -W0.01 -I $DUMMY 10.10.11.11 || true", + "$TC class del dev $DUMMY classid 1:1" + ], + "cmdUnderTest": "ping -c1 -W0.01 -I $DUMMY 10.10.11.11", + "expExitCode": "1", + "verifyCmd": "$TC -j class ls dev $DUMMY classid 1:1", + "matchJSON": [], + "teardown": [ + "$TC qdisc del dev $DUMMY root handle 1: drr" + ] + }, + { + "id": "be28", + "name": "Try to add fq_codel qdisc as a child of an hhf qdisc", + "category": [ + "qdisc", + "fq_codel", + "hhf" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "$TC qdisc add dev $DUMMY root handle a: hhf" + ], + "cmdUnderTest": "$TC qdisc add dev $DUMMY parent a: handle b: fq_codel", + "expExitCode": "2", + "verifyCmd": "$TC -j qdisc ls dev $DUMMY handle b:", + "matchJSON": [], + "teardown": [ + "$TC qdisc del dev $DUMMY root" + ] + }, + { + "id": "fcb5", + "name": "Try to add pie qdisc as a child of a drr qdisc", + "category": [ + "qdisc", + "pie", + "drr" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "$TC qdisc add dev $DUMMY root handle a: drr" + ], + "cmdUnderTest": "$TC qdisc add dev $DUMMY parent a: handle b: pie", + "expExitCode": "2", + "verifyCmd": "$TC -j qdisc ls dev $DUMMY handle b:", + "matchJSON": [], + "teardown": [ + "$TC qdisc del dev $DUMMY root" + ] + }, + { + "id": "7801", + "name": "Try to add fq qdisc as a child of an inexistent hfsc class", + "category": [ + "qdisc", + "sfq", + "hfsc" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "$TC qdisc add dev $DUMMY root handle a: hfsc" + ], + "cmdUnderTest": "$TC qdisc add dev $DUMMY parent a:fff2 sfq limit 4", + "expExitCode": "2", + "verifyCmd": "$TC -j qdisc ls dev $DUMMY handle b:", + "matchJSON": [], + "teardown": [ + "$TC qdisc del dev $DUMMY root" + ] } ] diff --git a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/dualpi2.json b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/dualpi2.json new file mode 100644 index 000000000000..cd1f2ee8f354 --- /dev/null +++ b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/dualpi2.json @@ -0,0 +1,254 @@ +[ + { + "id": "a4c7", + "name": "Create DualPI2 with default setting", + "category": [ + "qdisc", + "dualpi2" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + ], + "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root dualpi2", + "expExitCode": "0", + "verifyCmd": "$TC qdisc show dev $DUMMY", + "matchPattern": "qdisc dualpi2 1: root refcnt [0-9]+ limit 10000p.* step_thresh 1ms min_qlen_step 0p coupling_factor 2 drop_on_overload drop_dequeue classic_protection 10% l4s_ect split_gso", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DUMMY handle 1: root" + ] + }, + { + "id": "1ea4", + "name": "Create DualPI2 with memlimit", + "category": [ + "qdisc", + "dualpi2" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + ], + "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root dualpi2 memlimit 20000000", + "expExitCode": "0", + "verifyCmd": "$TC qdisc show dev $DUMMY", + "matchPattern": "qdisc dualpi2 1: root refcnt [0-9]+ limit 10000p.* memlimit 20000000B", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DUMMY handle 1: root" + ] + }, + { + "id": "2130", + "name": "Create DualPI2 with typical_rtt and max_rtt", + "category": [ + "qdisc", + "dualpi2" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + ], + "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root dualpi2 typical_rtt 20ms max_rtt 200ms", + "expExitCode": "0", + "verifyCmd": "$TC qdisc show dev $DUMMY", + "matchPattern": "qdisc dualpi2 1: root refcnt [0-9]+ limit 10000p.* target 20ms tupdate 20ms alpha 0.042969 beta 1.496094", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DUMMY handle 1: root" + ] + }, + { + "id": "90c1", + "name": "Create DualPI2 with max_rtt", + "category": [ + "qdisc", + "dualpi2" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + ], + "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root dualpi2 max_rtt 300ms", + "expExitCode": "0", + "verifyCmd": "$TC qdisc show dev $DUMMY", + "matchPattern": "qdisc dualpi2 1: root refcnt [0-9]+ limit 10000p.* target 50ms tupdate 50ms alpha 0.050781 beta 0.996094", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DUMMY handle 1: root" + ] + }, + { + "id": "7b3c", + "name": "Create DualPI2 with any_ect option", + "category": [ + "qdisc", + "dualpi2" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + ], + "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root dualpi2 any_ect", + "expExitCode": "0", + "verifyCmd": "$TC qdisc show dev $DUMMY", + "matchPattern": "qdisc dualpi2 1: root refcnt [0-9]+ limit 10000p .* any_ect", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DUMMY handle 1: root" + ] + }, + { + "id": "49a3", + "name": "Create DualPI2 with overflow option", + "category": [ + "qdisc", + "dualpi2" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + ], + "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root dualpi2 overflow", + "expExitCode": "0", + "verifyCmd": "$TC qdisc show dev $DUMMY", + "matchPattern": "qdisc dualpi2 1: root refcnt [0-9]+ limit 10000p.* overflow", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DUMMY handle 1: root" + ] + }, + { + "id": "d0a1", + "name": "Create DualPI2 with drop_enqueue option", + "category": [ + "qdisc", + "dualpi2" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + ], + "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root dualpi2 drop_enqueue", + "expExitCode": "0", + "verifyCmd": "$TC qdisc show dev $DUMMY", + "matchPattern": "qdisc dualpi2 1: root refcnt [0-9]+ limit 10000p .* drop_enqueue", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DUMMY handle 1: root" + ] + }, + { + "id": "f051", + "name": "Create DualPI2 with no_split_gso option", + "category": [ + "qdisc", + "dualpi2" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + ], + "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root dualpi2 no_split_gso", + "expExitCode": "0", + "verifyCmd": "$TC qdisc show dev $DUMMY", + "matchPattern": "qdisc dualpi2 1: root refcnt [0-9]+ limit 10000p .* no_split_gso", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DUMMY handle 1: root" + ] + }, + { + "id": "456b", + "name": "Create DualPI2 with packet step_thresh", + "category": [ + "qdisc", + "dualpi2" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + ], + "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root dualpi2 step_thresh 3p", + "expExitCode": "0", + "verifyCmd": "$TC qdisc show dev $DUMMY", + "matchPattern": "qdisc dualpi2 1: root refcnt [0-9]+ limit 10000p .* step_thresh 3p", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DUMMY handle 1: root" + ] + }, + { + "id": "610c", + "name": "Create DualPI2 with packet min_qlen_step", + "category": [ + "qdisc", + "dualpi2" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + ], + "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root dualpi2 min_qlen_step 1", + "expExitCode": "0", + "verifyCmd": "$TC qdisc show dev $DUMMY", + "matchPattern": "qdisc dualpi2 1: root refcnt [0-9]+ limit 10000p .* min_qlen_step 1p", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DUMMY handle 1: root" + ] + }, + { + "id": "b4fa", + "name": "Create DualPI2 with packet coupling_factor", + "category": [ + "qdisc", + "dualpi2" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + ], + "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root dualpi2 coupling_factor 1", + "expExitCode": "0", + "verifyCmd": "$TC qdisc show dev $DUMMY", + "matchPattern": "qdisc dualpi2 1: root refcnt [0-9]+ limit 10000p .* coupling_factor 1", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DUMMY handle 1: root" + ] + }, + { + "id": "37f1", + "name": "Create DualPI2 with packet classic_protection", + "category": [ + "qdisc", + "dualpi2" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + ], + "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root dualpi2 classic_protection 0", + "expExitCode": "0", + "verifyCmd": "$TC qdisc show dev $DUMMY", + "matchPattern": "qdisc dualpi2 1: root refcnt [0-9]+ limit 10000p .* classic_protection 0%", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DUMMY handle 1: root" + ] + } +] diff --git a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/netem.json b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/netem.json index 3c4444961488..718d2df2aafa 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/netem.json +++ b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/netem.json @@ -336,5 +336,86 @@ "teardown": [ "$TC qdisc del dev $DUMMY handle 1: root" ] + }, + { + "id": "d34d", + "name": "NETEM test qdisc duplication restriction in qdisc tree in netem_change root", + "category": ["qdisc", "netem"], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "$TC qdisc add dev $DUMMY root handle 1: netem limit 1", + "$TC qdisc add dev $DUMMY parent 1: handle 2: netem limit 1" + ], + "cmdUnderTest": "$TC qdisc change dev $DUMMY handle 1: netem duplicate 50%", + "expExitCode": "2", + "verifyCmd": "$TC -s qdisc show dev $DUMMY", + "matchPattern": "qdisc netem", + "matchCount": "2", + "teardown": [ + "$TC qdisc del dev $DUMMY handle 1:0 root" + ] + }, + { + "id": "b33f", + "name": "NETEM test qdisc duplication restriction in qdisc tree in netem_change non-root", + "category": ["qdisc", "netem"], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "$TC qdisc add dev $DUMMY root handle 1: netem limit 1", + "$TC qdisc add dev $DUMMY parent 1: handle 2: netem limit 1" + ], + "cmdUnderTest": "$TC qdisc change dev $DUMMY handle 2: netem duplicate 50%", + "expExitCode": "2", + "verifyCmd": "$TC -s qdisc show dev $DUMMY", + "matchPattern": "qdisc netem", + "matchCount": "2", + "teardown": [ + "$TC qdisc del dev $DUMMY handle 1:0 root" + ] + }, + { + "id": "cafe", + "name": "NETEM test qdisc duplication restriction in qdisc tree", + "category": ["qdisc", "netem"], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "$TC qdisc add dev $DUMMY root handle 1: netem limit 1 duplicate 100%" + ], + "cmdUnderTest": "$TC qdisc add dev $DUMMY parent 1: handle 2: netem duplicate 100%", + "expExitCode": "2", + "verifyCmd": "$TC -s qdisc show dev $DUMMY", + "matchPattern": "qdisc netem", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DUMMY handle 1:0 root" + ] + }, + { + "id": "1337", + "name": "NETEM test qdisc duplication restriction in qdisc tree across branches", + "category": ["qdisc", "netem"], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "$TC qdisc add dev $DUMMY parent root handle 1:0 hfsc", + "$TC class add dev $DUMMY parent 1:0 classid 1:1 hfsc rt m2 10Mbit", + "$TC qdisc add dev $DUMMY parent 1:1 handle 2:0 netem", + "$TC class add dev $DUMMY parent 1:0 classid 1:2 hfsc rt m2 10Mbit" + ], + "cmdUnderTest": "$TC qdisc add dev $DUMMY parent 1:2 handle 3:0 netem duplicate 100%", + "expExitCode": "2", + "verifyCmd": "$TC -s qdisc show dev $DUMMY", + "matchPattern": "qdisc netem", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DUMMY handle 1:0 root" + ] } ] diff --git a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/sfq.json b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/sfq.json index 28c6ce6da7db..531a2f6e4900 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/sfq.json +++ b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/sfq.json @@ -264,5 +264,41 @@ "matchPattern": "sfq", "matchCount": "0", "teardown": [] + }, + { + "id": "cdc1", + "name": "Check that a negative perturb timer is rejected", + "category": [ + "qdisc", + "sfq" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [], + "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root sfq perturb -10", + "expExitCode": "2", + "verifyCmd": "$TC qdisc show dev $DUMMY", + "matchPattern": "sfq", + "matchCount": "0", + "teardown": [] + }, + { + "id": "a9f0", + "name": "Check that a too big perturb timer is rejected", + "category": [ + "qdisc", + "sfq" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [], + "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root sfq perturb 1000000000", + "expExitCode": "2", + "verifyCmd": "$TC qdisc show dev $DUMMY", + "matchPattern": "sfq", + "matchCount": "0", + "teardown": [] } ] diff --git a/tools/testing/selftests/tc-testing/tdc.sh b/tools/testing/selftests/tc-testing/tdc.sh index 589b18ed758a..dae19687912d 100755 --- a/tools/testing/selftests/tc-testing/tdc.sh +++ b/tools/testing/selftests/tc-testing/tdc.sh @@ -4,8 +4,7 @@ # If a module is required and was not compiled # the test that requires it will fail anyways try_modprobe() { - modprobe -q -R "$1" - if [ $? -ne 0 ]; then + if ! modprobe -q -R "$1"; then echo "Module $1 not found... skipping." else modprobe "$1" @@ -67,4 +66,5 @@ try_modprobe sch_hfsc try_modprobe sch_hhf try_modprobe sch_htb try_modprobe sch_teql -./tdc.py -J`nproc` +try_modprobe sch_dualpi2 +./tdc.py -J"$(nproc)" diff --git a/tools/testing/selftests/ublk/fault_inject.c b/tools/testing/selftests/ublk/fault_inject.c index 6e60f7d97125..b227bd78b252 100644 --- a/tools/testing/selftests/ublk/fault_inject.c +++ b/tools/testing/selftests/ublk/fault_inject.c @@ -38,7 +38,8 @@ static int ublk_fault_inject_tgt_init(const struct dev_ctx *ctx, return 0; } -static int ublk_fault_inject_queue_io(struct ublk_queue *q, int tag) +static int ublk_fault_inject_queue_io(struct ublk_thread *t, + struct ublk_queue *q, int tag) { const struct ublksrv_io_desc *iod = ublk_get_iod(q, tag); struct io_uring_sqe *sqe; @@ -46,25 +47,27 @@ static int ublk_fault_inject_queue_io(struct ublk_queue *q, int tag) .tv_nsec = (long long)q->dev->private_data, }; - ublk_io_alloc_sqes(ublk_get_io(q, tag), &sqe, 1); + ublk_io_alloc_sqes(t, &sqe, 1); io_uring_prep_timeout(sqe, &ts, 1, 0); sqe->user_data = build_user_data(tag, ublksrv_get_op(iod), 0, q->q_id, 1); - ublk_queued_tgt_io(q, tag, 1); + ublk_queued_tgt_io(t, q, tag, 1); return 0; } -static void ublk_fault_inject_tgt_io_done(struct ublk_queue *q, int tag, +static void ublk_fault_inject_tgt_io_done(struct ublk_thread *t, + struct ublk_queue *q, const struct io_uring_cqe *cqe) { + unsigned tag = user_data_to_tag(cqe->user_data); const struct ublksrv_io_desc *iod = ublk_get_iod(q, tag); if (cqe->res != -ETIME) ublk_err("%s: unexpected cqe res %d\n", __func__, cqe->res); - if (ublk_completed_tgt_io(q, tag)) - ublk_complete_io(q, tag, iod->nr_sectors << 9); + if (ublk_completed_tgt_io(t, q, tag)) + ublk_complete_io(t, q, tag, iod->nr_sectors << 9); else ublk_err("%s: io not complete after 1 cqe\n", __func__); } diff --git a/tools/testing/selftests/ublk/file_backed.c b/tools/testing/selftests/ublk/file_backed.c index cfa59b631693..2d93ac860bd5 100644 --- a/tools/testing/selftests/ublk/file_backed.c +++ b/tools/testing/selftests/ublk/file_backed.c @@ -13,12 +13,13 @@ static enum io_uring_op ublk_to_uring_op(const struct ublksrv_io_desc *iod, int assert(0); } -static int loop_queue_flush_io(struct ublk_queue *q, const struct ublksrv_io_desc *iod, int tag) +static int loop_queue_flush_io(struct ublk_thread *t, struct ublk_queue *q, + const struct ublksrv_io_desc *iod, int tag) { unsigned ublk_op = ublksrv_get_op(iod); struct io_uring_sqe *sqe[1]; - ublk_io_alloc_sqes(ublk_get_io(q, tag), sqe, 1); + ublk_io_alloc_sqes(t, sqe, 1); io_uring_prep_fsync(sqe[0], 1 /*fds[1]*/, IORING_FSYNC_DATASYNC); io_uring_sqe_set_flags(sqe[0], IOSQE_FIXED_FILE); /* bit63 marks us as tgt io */ @@ -26,7 +27,8 @@ static int loop_queue_flush_io(struct ublk_queue *q, const struct ublksrv_io_des return 1; } -static int loop_queue_tgt_rw_io(struct ublk_queue *q, const struct ublksrv_io_desc *iod, int tag) +static int loop_queue_tgt_rw_io(struct ublk_thread *t, struct ublk_queue *q, + const struct ublksrv_io_desc *iod, int tag) { unsigned ublk_op = ublksrv_get_op(iod); unsigned zc = ublk_queue_use_zc(q); @@ -36,7 +38,7 @@ static int loop_queue_tgt_rw_io(struct ublk_queue *q, const struct ublksrv_io_de void *addr = (zc | auto_zc) ? NULL : (void *)iod->addr; if (!zc || auto_zc) { - ublk_io_alloc_sqes(ublk_get_io(q, tag), sqe, 1); + ublk_io_alloc_sqes(t, sqe, 1); if (!sqe[0]) return -ENOMEM; @@ -52,7 +54,7 @@ static int loop_queue_tgt_rw_io(struct ublk_queue *q, const struct ublksrv_io_de return 1; } - ublk_io_alloc_sqes(ublk_get_io(q, tag), sqe, 3); + ublk_io_alloc_sqes(t, sqe, 3); io_uring_prep_buf_register(sqe[0], 0, tag, q->q_id, ublk_get_io(q, tag)->buf_index); sqe[0]->flags |= IOSQE_CQE_SKIP_SUCCESS | IOSQE_IO_HARDLINK; @@ -72,7 +74,7 @@ static int loop_queue_tgt_rw_io(struct ublk_queue *q, const struct ublksrv_io_de return 2; } -static int loop_queue_tgt_io(struct ublk_queue *q, int tag) +static int loop_queue_tgt_io(struct ublk_thread *t, struct ublk_queue *q, int tag) { const struct ublksrv_io_desc *iod = ublk_get_iod(q, tag); unsigned ublk_op = ublksrv_get_op(iod); @@ -80,7 +82,7 @@ static int loop_queue_tgt_io(struct ublk_queue *q, int tag) switch (ublk_op) { case UBLK_IO_OP_FLUSH: - ret = loop_queue_flush_io(q, iod, tag); + ret = loop_queue_flush_io(t, q, iod, tag); break; case UBLK_IO_OP_WRITE_ZEROES: case UBLK_IO_OP_DISCARD: @@ -88,7 +90,7 @@ static int loop_queue_tgt_io(struct ublk_queue *q, int tag) break; case UBLK_IO_OP_READ: case UBLK_IO_OP_WRITE: - ret = loop_queue_tgt_rw_io(q, iod, tag); + ret = loop_queue_tgt_rw_io(t, q, iod, tag); break; default: ret = -EINVAL; @@ -100,17 +102,19 @@ static int loop_queue_tgt_io(struct ublk_queue *q, int tag) return ret; } -static int ublk_loop_queue_io(struct ublk_queue *q, int tag) +static int ublk_loop_queue_io(struct ublk_thread *t, struct ublk_queue *q, + int tag) { - int queued = loop_queue_tgt_io(q, tag); + int queued = loop_queue_tgt_io(t, q, tag); - ublk_queued_tgt_io(q, tag, queued); + ublk_queued_tgt_io(t, q, tag, queued); return 0; } -static void ublk_loop_io_done(struct ublk_queue *q, int tag, +static void ublk_loop_io_done(struct ublk_thread *t, struct ublk_queue *q, const struct io_uring_cqe *cqe) { + unsigned tag = user_data_to_tag(cqe->user_data); unsigned op = user_data_to_op(cqe->user_data); struct ublk_io *io = ublk_get_io(q, tag); @@ -126,8 +130,8 @@ static void ublk_loop_io_done(struct ublk_queue *q, int tag, if (op == ublk_cmd_op_nr(UBLK_U_IO_REGISTER_IO_BUF)) io->tgt_ios += 1; - if (ublk_completed_tgt_io(q, tag)) - ublk_complete_io(q, tag, io->result); + if (ublk_completed_tgt_io(t, q, tag)) + ublk_complete_io(t, q, tag, io->result); } static int ublk_loop_tgt_init(const struct dev_ctx *ctx, struct ublk_dev *dev) diff --git a/tools/testing/selftests/ublk/kublk.c b/tools/testing/selftests/ublk/kublk.c index e2d2042810d4..95188065b2e9 100644 --- a/tools/testing/selftests/ublk/kublk.c +++ b/tools/testing/selftests/ublk/kublk.c @@ -441,17 +441,10 @@ static int ublk_queue_init(struct ublk_queue *q, unsigned extra_flags) unsigned long off; q->tgt_ops = dev->tgt.ops; - q->state = 0; + q->flags = 0; q->q_depth = depth; - - if (dev->dev_info.flags & (UBLK_F_SUPPORT_ZERO_COPY | UBLK_F_AUTO_BUF_REG)) { - q->state |= UBLKSRV_NO_BUF; - if (dev->dev_info.flags & UBLK_F_SUPPORT_ZERO_COPY) - q->state |= UBLKSRV_ZC; - if (dev->dev_info.flags & UBLK_F_AUTO_BUF_REG) - q->state |= UBLKSRV_AUTO_BUF_REG; - } - q->state |= extra_flags; + q->flags = dev->dev_info.flags; + q->flags |= extra_flags; cmd_buf_size = ublk_queue_cmd_buf_sz(q); off = UBLKSRV_CMD_BUF_OFFSET + q->q_id * ublk_queue_max_cmd_buf_sz(); @@ -466,10 +459,10 @@ static int ublk_queue_init(struct ublk_queue *q, unsigned extra_flags) io_buf_size = dev->dev_info.max_io_buf_bytes; for (i = 0; i < q->q_depth; i++) { q->ios[i].buf_addr = NULL; - q->ios[i].flags = UBLKSRV_NEED_FETCH_RQ | UBLKSRV_IO_FREE; + q->ios[i].flags = UBLKS_IO_NEED_FETCH_RQ | UBLKS_IO_FREE; q->ios[i].tag = i; - if (q->state & UBLKSRV_NO_BUF) + if (ublk_queue_no_buf(q)) continue; if (posix_memalign((void **)&q->ios[i].buf_addr, @@ -583,15 +576,14 @@ static void ublk_set_auto_buf_reg(const struct ublk_queue *q, else buf.index = q->ios[tag].buf_index; - if (q->state & UBLKSRV_AUTO_BUF_REG_FALLBACK) + if (ublk_queue_auto_zc_fallback(q)) buf.flags = UBLK_AUTO_BUF_REG_FALLBACK; sqe->addr = ublk_auto_buf_reg_to_sqe_addr(&buf); } -int ublk_queue_io_cmd(struct ublk_io *io) +int ublk_queue_io_cmd(struct ublk_thread *t, struct ublk_io *io) { - struct ublk_thread *t = io->t; struct ublk_queue *q = ublk_io_to_queue(io); struct ublksrv_io_cmd *cmd; struct io_uring_sqe *sqe[1]; @@ -599,7 +591,7 @@ int ublk_queue_io_cmd(struct ublk_io *io) __u64 user_data; /* only freed io can be issued */ - if (!(io->flags & UBLKSRV_IO_FREE)) + if (!(io->flags & UBLKS_IO_FREE)) return 0; /* @@ -607,20 +599,20 @@ int ublk_queue_io_cmd(struct ublk_io *io) * getting data */ if (!(io->flags & - (UBLKSRV_NEED_FETCH_RQ | UBLKSRV_NEED_COMMIT_RQ_COMP | UBLKSRV_NEED_GET_DATA))) + (UBLKS_IO_NEED_FETCH_RQ | UBLKS_IO_NEED_COMMIT_RQ_COMP | UBLKS_IO_NEED_GET_DATA))) return 0; - if (io->flags & UBLKSRV_NEED_GET_DATA) + if (io->flags & UBLKS_IO_NEED_GET_DATA) cmd_op = UBLK_U_IO_NEED_GET_DATA; - else if (io->flags & UBLKSRV_NEED_COMMIT_RQ_COMP) + else if (io->flags & UBLKS_IO_NEED_COMMIT_RQ_COMP) cmd_op = UBLK_U_IO_COMMIT_AND_FETCH_REQ; - else if (io->flags & UBLKSRV_NEED_FETCH_RQ) + else if (io->flags & UBLKS_IO_NEED_FETCH_RQ) cmd_op = UBLK_U_IO_FETCH_REQ; if (io_uring_sq_space_left(&t->ring) < 1) io_uring_submit(&t->ring); - ublk_io_alloc_sqes(io, sqe, 1); + ublk_io_alloc_sqes(t, sqe, 1); if (!sqe[0]) { ublk_err("%s: run out of sqe. thread %u, tag %d\n", __func__, t->idx, io->tag); @@ -640,12 +632,12 @@ int ublk_queue_io_cmd(struct ublk_io *io) sqe[0]->rw_flags = 0; cmd->tag = io->tag; cmd->q_id = q->q_id; - if (!(q->state & UBLKSRV_NO_BUF)) + if (!ublk_queue_no_buf(q)) cmd->addr = (__u64) (uintptr_t) io->buf_addr; else cmd->addr = 0; - if (q->state & UBLKSRV_AUTO_BUF_REG) + if (ublk_queue_use_auto_zc(q)) ublk_set_auto_buf_reg(q, sqe[0], io->tag); user_data = build_user_data(io->tag, _IOC_NR(cmd_op), 0, q->q_id, 0); @@ -657,7 +649,7 @@ int ublk_queue_io_cmd(struct ublk_io *io) ublk_dbg(UBLK_DBG_IO_CMD, "%s: (thread %u qid %d tag %u cmd_op %u) iof %x stopping %d\n", __func__, t->idx, q->q_id, io->tag, cmd_op, - io->flags, !!(t->state & UBLKSRV_THREAD_STOPPING)); + io->flags, !!(t->state & UBLKS_T_STOPPING)); return 1; } @@ -685,9 +677,8 @@ static void ublk_submit_fetch_commands(struct ublk_thread *t) int tag = i % dinfo->queue_depth; q = &t->dev->q[q_id]; io = &q->ios[tag]; - io->t = t; io->buf_index = j++; - ublk_queue_io_cmd(io); + ublk_queue_io_cmd(t, io); } } else { /* @@ -697,9 +688,8 @@ static void ublk_submit_fetch_commands(struct ublk_thread *t) struct ublk_queue *q = &t->dev->q[t->idx]; for (i = 0; i < q->q_depth; i++) { io = &q->ios[i]; - io->t = t; io->buf_index = i; - ublk_queue_io_cmd(io); + ublk_queue_io_cmd(t, io); } } } @@ -711,14 +701,13 @@ static int ublk_thread_is_idle(struct ublk_thread *t) static int ublk_thread_is_done(struct ublk_thread *t) { - return (t->state & UBLKSRV_THREAD_STOPPING) && ublk_thread_is_idle(t); + return (t->state & UBLKS_T_STOPPING) && ublk_thread_is_idle(t); } -static inline void ublksrv_handle_tgt_cqe(struct ublk_queue *q, - struct io_uring_cqe *cqe) +static inline void ublksrv_handle_tgt_cqe(struct ublk_thread *t, + struct ublk_queue *q, + struct io_uring_cqe *cqe) { - unsigned tag = user_data_to_tag(cqe->user_data); - if (cqe->res < 0 && cqe->res != -EAGAIN) ublk_err("%s: failed tgt io: res %d qid %u tag %u, cmd_op %u\n", __func__, cqe->res, q->q_id, @@ -726,7 +715,41 @@ static inline void ublksrv_handle_tgt_cqe(struct ublk_queue *q, user_data_to_op(cqe->user_data)); if (q->tgt_ops->tgt_io_done) - q->tgt_ops->tgt_io_done(q, tag, cqe); + q->tgt_ops->tgt_io_done(t, q, cqe); +} + +static void ublk_handle_uring_cmd(struct ublk_thread *t, + struct ublk_queue *q, + const struct io_uring_cqe *cqe) +{ + int fetch = (cqe->res != UBLK_IO_RES_ABORT) && + !(t->state & UBLKS_T_STOPPING); + unsigned tag = user_data_to_tag(cqe->user_data); + struct ublk_io *io = &q->ios[tag]; + + if (!fetch) { + t->state |= UBLKS_T_STOPPING; + io->flags &= ~UBLKS_IO_NEED_FETCH_RQ; + } + + if (cqe->res == UBLK_IO_RES_OK) { + assert(tag < q->q_depth); + if (q->tgt_ops->queue_io) + q->tgt_ops->queue_io(t, q, tag); + } else if (cqe->res == UBLK_IO_RES_NEED_GET_DATA) { + io->flags |= UBLKS_IO_NEED_GET_DATA | UBLKS_IO_FREE; + ublk_queue_io_cmd(t, io); + } else { + /* + * COMMIT_REQ will be completed immediately since no fetching + * piggyback is required. + * + * Marking IO_FREE only, then this io won't be issued since + * we only issue io with (UBLKS_IO_FREE | UBLKSRV_NEED_*) + * + * */ + io->flags = UBLKS_IO_FREE; + } } static void ublk_handle_cqe(struct ublk_thread *t, @@ -735,54 +758,27 @@ static void ublk_handle_cqe(struct ublk_thread *t, struct ublk_dev *dev = t->dev; unsigned q_id = user_data_to_q_id(cqe->user_data); struct ublk_queue *q = &dev->q[q_id]; - unsigned tag = user_data_to_tag(cqe->user_data); unsigned cmd_op = user_data_to_op(cqe->user_data); - int fetch = (cqe->res != UBLK_IO_RES_ABORT) && - !(t->state & UBLKSRV_THREAD_STOPPING); - struct ublk_io *io; if (cqe->res < 0 && cqe->res != -ENODEV) ublk_err("%s: res %d userdata %llx queue state %x\n", __func__, - cqe->res, cqe->user_data, q->state); + cqe->res, cqe->user_data, q->flags); ublk_dbg(UBLK_DBG_IO_CMD, "%s: res %d (qid %d tag %u cmd_op %u target %d/%d) stopping %d\n", - __func__, cqe->res, q->q_id, tag, cmd_op, - is_target_io(cqe->user_data), + __func__, cqe->res, q->q_id, user_data_to_tag(cqe->user_data), + cmd_op, is_target_io(cqe->user_data), user_data_to_tgt_data(cqe->user_data), - (t->state & UBLKSRV_THREAD_STOPPING)); + (t->state & UBLKS_T_STOPPING)); /* Don't retrieve io in case of target io */ if (is_target_io(cqe->user_data)) { - ublksrv_handle_tgt_cqe(q, cqe); + ublksrv_handle_tgt_cqe(t, q, cqe); return; } - io = &q->ios[tag]; t->cmd_inflight--; - if (!fetch) { - t->state |= UBLKSRV_THREAD_STOPPING; - io->flags &= ~UBLKSRV_NEED_FETCH_RQ; - } - - if (cqe->res == UBLK_IO_RES_OK) { - assert(tag < q->q_depth); - if (q->tgt_ops->queue_io) - q->tgt_ops->queue_io(q, tag); - } else if (cqe->res == UBLK_IO_RES_NEED_GET_DATA) { - io->flags |= UBLKSRV_NEED_GET_DATA | UBLKSRV_IO_FREE; - ublk_queue_io_cmd(io); - } else { - /* - * COMMIT_REQ will be completed immediately since no fetching - * piggyback is required. - * - * Marking IO_FREE only, then this io won't be issued since - * we only issue io with (UBLKSRV_IO_FREE | UBLKSRV_NEED_*) - * - * */ - io->flags = UBLKSRV_IO_FREE; - } + ublk_handle_uring_cmd(t, q, cqe); } static int ublk_reap_events_uring(struct ublk_thread *t) @@ -808,7 +804,7 @@ static int ublk_process_io(struct ublk_thread *t) t->dev->dev_info.dev_id, t->idx, io_uring_sq_ready(&t->ring), t->cmd_inflight, - (t->state & UBLKSRV_THREAD_STOPPING)); + (t->state & UBLKS_T_STOPPING)); if (ublk_thread_is_done(t)) return -ENODEV; @@ -817,8 +813,8 @@ static int ublk_process_io(struct ublk_thread *t) reapped = ublk_reap_events_uring(t); ublk_dbg(UBLK_DBG_THREAD, "submit result %d, reapped %d stop %d idle %d\n", - ret, reapped, (t->state & UBLKSRV_THREAD_STOPPING), - (t->state & UBLKSRV_THREAD_IDLE)); + ret, reapped, (t->state & UBLKS_T_STOPPING), + (t->state & UBLKS_T_IDLE)); return reapped; } @@ -915,7 +911,7 @@ static int ublk_start_daemon(const struct dev_ctx *ctx, struct ublk_dev *dev) { const struct ublksrv_ctrl_dev_info *dinfo = &dev->dev_info; struct ublk_thread_info *tinfo; - unsigned extra_flags = 0; + unsigned long long extra_flags = 0; cpu_set_t *affinity_buf; void *thread_ret; sem_t ready; @@ -937,7 +933,7 @@ static int ublk_start_daemon(const struct dev_ctx *ctx, struct ublk_dev *dev) return ret; if (ctx->auto_zc_fallback) - extra_flags = UBLKSRV_AUTO_BUF_REG_FALLBACK; + extra_flags = UBLKS_Q_AUTO_BUF_REG_FALLBACK; for (i = 0; i < dinfo->nr_hw_queues; i++) { dev->q[i].dev = dev; diff --git a/tools/testing/selftests/ublk/kublk.h b/tools/testing/selftests/ublk/kublk.h index 6be601536b3d..219233f8a053 100644 --- a/tools/testing/selftests/ublk/kublk.h +++ b/tools/testing/selftests/ublk/kublk.h @@ -29,13 +29,9 @@ #include "ublk_dep.h" #include <linux/ublk_cmd.h> -#define __maybe_unused __attribute__((unused)) -#define MAX_BACK_FILES 4 -#ifndef min -#define min(a, b) ((a) < (b) ? (a) : (b)) -#endif +#include "utils.h" -#define ARRAY_SIZE(x) (sizeof(x) / sizeof(x[0])) +#define MAX_BACK_FILES 4 /****************** part 1: libublk ********************/ @@ -45,9 +41,6 @@ #define UBLK_CTRL_RING_DEPTH 32 #define ERROR_EVTFD_DEVID -2 -/* queue idle timeout */ -#define UBLKSRV_IO_IDLE_SECS 20 - #define UBLK_IO_MAX_BYTES (1 << 20) #define UBLK_MAX_QUEUES_SHIFT 5 #define UBLK_MAX_QUEUES (1 << UBLK_MAX_QUEUES_SHIFT) @@ -55,13 +48,6 @@ #define UBLK_MAX_THREADS (1 << UBLK_MAX_THREADS_SHIFT) #define UBLK_QUEUE_DEPTH 1024 -#define UBLK_DBG_DEV (1U << 0) -#define UBLK_DBG_THREAD (1U << 1) -#define UBLK_DBG_IO_CMD (1U << 2) -#define UBLK_DBG_IO (1U << 3) -#define UBLK_DBG_CTRL_CMD (1U << 4) -#define UBLK_LOG (1U << 5) - struct ublk_dev; struct ublk_queue; struct ublk_thread; @@ -121,11 +107,11 @@ struct ublk_ctrl_cmd_data { struct ublk_io { char *buf_addr; -#define UBLKSRV_NEED_FETCH_RQ (1UL << 0) -#define UBLKSRV_NEED_COMMIT_RQ_COMP (1UL << 1) -#define UBLKSRV_IO_FREE (1UL << 2) -#define UBLKSRV_NEED_GET_DATA (1UL << 3) -#define UBLKSRV_NEED_REG_BUF (1UL << 4) +#define UBLKS_IO_NEED_FETCH_RQ (1UL << 0) +#define UBLKS_IO_NEED_COMMIT_RQ_COMP (1UL << 1) +#define UBLKS_IO_FREE (1UL << 2) +#define UBLKS_IO_NEED_GET_DATA (1UL << 3) +#define UBLKS_IO_NEED_REG_BUF (1UL << 4) unsigned short flags; unsigned short refs; /* used by target code only */ @@ -136,7 +122,6 @@ struct ublk_io { unsigned short buf_index; unsigned short tgt_ios; void *private_data; - struct ublk_thread *t; }; struct ublk_tgt_ops { @@ -144,9 +129,9 @@ struct ublk_tgt_ops { int (*init_tgt)(const struct dev_ctx *ctx, struct ublk_dev *); void (*deinit_tgt)(struct ublk_dev *); - int (*queue_io)(struct ublk_queue *, int tag); - void (*tgt_io_done)(struct ublk_queue *, - int tag, const struct io_uring_cqe *); + int (*queue_io)(struct ublk_thread *, struct ublk_queue *, int tag); + void (*tgt_io_done)(struct ublk_thread *, struct ublk_queue *, + const struct io_uring_cqe *); /* * Target specific command line handling @@ -179,12 +164,10 @@ struct ublk_queue { const struct ublk_tgt_ops *tgt_ops; struct ublksrv_io_desc *io_cmd_buf; +/* borrow one bit of ublk uapi flags, which may never be used */ +#define UBLKS_Q_AUTO_BUF_REG_FALLBACK (1ULL << 63) + __u64 flags; struct ublk_io ios[UBLK_QUEUE_DEPTH]; -#define UBLKSRV_NO_BUF (1U << 2) -#define UBLKSRV_ZC (1U << 3) -#define UBLKSRV_AUTO_BUF_REG (1U << 4) -#define UBLKSRV_AUTO_BUF_REG_FALLBACK (1U << 5) - unsigned state; }; struct ublk_thread { @@ -196,8 +179,8 @@ struct ublk_thread { pthread_t thread; unsigned idx; -#define UBLKSRV_THREAD_STOPPING (1U << 0) -#define UBLKSRV_THREAD_IDLE (1U << 1) +#define UBLKS_T_STOPPING (1U << 0) +#define UBLKS_T_IDLE (1U << 1) unsigned state; }; @@ -217,22 +200,7 @@ struct ublk_dev { void *private_data; }; -#ifndef offsetof -#define offsetof(TYPE, MEMBER) ((size_t)&((TYPE *)0)->MEMBER) -#endif - -#ifndef container_of -#define container_of(ptr, type, member) ({ \ - unsigned long __mptr = (unsigned long)(ptr); \ - ((type *)(__mptr - offsetof(type, member))); }) -#endif - -#define round_up(val, rnd) \ - (((val) + ((rnd) - 1)) & ~((rnd) - 1)) - - -extern unsigned int ublk_dbg_mask; -extern int ublk_queue_io_cmd(struct ublk_io *io); +extern int ublk_queue_io_cmd(struct ublk_thread *t, struct ublk_io *io); static inline int ublk_io_auto_zc_fallback(const struct ublksrv_io_desc *iod) @@ -281,43 +249,15 @@ static inline unsigned short ublk_cmd_op_nr(unsigned int op) return _IOC_NR(op); } -static inline void ublk_err(const char *fmt, ...) -{ - va_list ap; - - va_start(ap, fmt); - vfprintf(stderr, fmt, ap); -} - -static inline void ublk_log(const char *fmt, ...) -{ - if (ublk_dbg_mask & UBLK_LOG) { - va_list ap; - - va_start(ap, fmt); - vfprintf(stdout, fmt, ap); - } -} - -static inline void ublk_dbg(int level, const char *fmt, ...) -{ - if (level & ublk_dbg_mask) { - va_list ap; - - va_start(ap, fmt); - vfprintf(stdout, fmt, ap); - } -} - static inline struct ublk_queue *ublk_io_to_queue(const struct ublk_io *io) { return container_of(io, struct ublk_queue, ios[io->tag]); } -static inline int ublk_io_alloc_sqes(struct ublk_io *io, +static inline int ublk_io_alloc_sqes(struct ublk_thread *t, struct io_uring_sqe *sqes[], int nr_sqes) { - struct io_uring *ring = &io->t->ring; + struct io_uring *ring = &t->ring; unsigned left = io_uring_sq_space_left(ring); int i; @@ -380,7 +320,7 @@ static inline int ublk_get_io_res(const struct ublk_queue *q, unsigned tag) static inline void ublk_mark_io_done(struct ublk_io *io, int res) { - io->flags |= (UBLKSRV_NEED_COMMIT_RQ_COMP | UBLKSRV_IO_FREE); + io->flags |= (UBLKS_IO_NEED_COMMIT_RQ_COMP | UBLKS_IO_FREE); io->result = res; } @@ -402,45 +342,58 @@ static inline struct ublk_io *ublk_get_io(struct ublk_queue *q, unsigned tag) return &q->ios[tag]; } -static inline int ublk_complete_io(struct ublk_queue *q, unsigned tag, int res) +static inline int ublk_complete_io(struct ublk_thread *t, struct ublk_queue *q, + unsigned tag, int res) { struct ublk_io *io = &q->ios[tag]; ublk_mark_io_done(io, res); - return ublk_queue_io_cmd(io); + return ublk_queue_io_cmd(t, io); } -static inline void ublk_queued_tgt_io(struct ublk_queue *q, unsigned tag, int queued) +static inline void ublk_queued_tgt_io(struct ublk_thread *t, struct ublk_queue *q, + unsigned tag, int queued) { if (queued < 0) - ublk_complete_io(q, tag, queued); + ublk_complete_io(t, q, tag, queued); else { struct ublk_io *io = ublk_get_io(q, tag); - io->t->io_inflight += queued; + t->io_inflight += queued; io->tgt_ios = queued; io->result = 0; } } -static inline int ublk_completed_tgt_io(struct ublk_queue *q, unsigned tag) +static inline int ublk_completed_tgt_io(struct ublk_thread *t, + struct ublk_queue *q, unsigned tag) { struct ublk_io *io = ublk_get_io(q, tag); - io->t->io_inflight--; + t->io_inflight--; return --io->tgt_ios == 0; } static inline int ublk_queue_use_zc(const struct ublk_queue *q) { - return q->state & UBLKSRV_ZC; + return q->flags & UBLK_F_SUPPORT_ZERO_COPY; } static inline int ublk_queue_use_auto_zc(const struct ublk_queue *q) { - return q->state & UBLKSRV_AUTO_BUF_REG; + return q->flags & UBLK_F_AUTO_BUF_REG; +} + +static inline int ublk_queue_auto_zc_fallback(const struct ublk_queue *q) +{ + return q->flags & UBLKS_Q_AUTO_BUF_REG_FALLBACK; +} + +static inline int ublk_queue_no_buf(const struct ublk_queue *q) +{ + return ublk_queue_use_zc(q) || ublk_queue_use_auto_zc(q); } extern const struct ublk_tgt_ops null_tgt_ops; @@ -451,10 +404,4 @@ extern const struct ublk_tgt_ops fault_inject_tgt_ops; void backing_file_tgt_deinit(struct ublk_dev *dev); int backing_file_tgt_init(struct ublk_dev *dev); -static inline unsigned int ilog2(unsigned int x) -{ - if (x == 0) - return 0; - return (sizeof(x) * 8 - 1) - __builtin_clz(x); -} #endif diff --git a/tools/testing/selftests/ublk/null.c b/tools/testing/selftests/ublk/null.c index afe0b99d77ee..f0e0003a4860 100644 --- a/tools/testing/selftests/ublk/null.c +++ b/tools/testing/selftests/ublk/null.c @@ -55,12 +55,13 @@ static void __setup_nop_io(int tag, const struct ublksrv_io_desc *iod, sqe->user_data = build_user_data(tag, ublk_op, 0, q_id, 1); } -static int null_queue_zc_io(struct ublk_queue *q, int tag) +static int null_queue_zc_io(struct ublk_thread *t, struct ublk_queue *q, + int tag) { const struct ublksrv_io_desc *iod = ublk_get_iod(q, tag); struct io_uring_sqe *sqe[3]; - ublk_io_alloc_sqes(ublk_get_io(q, tag), sqe, 3); + ublk_io_alloc_sqes(t, sqe, 3); io_uring_prep_buf_register(sqe[0], 0, tag, q->q_id, ublk_get_io(q, tag)->buf_index); sqe[0]->user_data = build_user_data(tag, @@ -77,19 +78,21 @@ static int null_queue_zc_io(struct ublk_queue *q, int tag) return 2; } -static int null_queue_auto_zc_io(struct ublk_queue *q, int tag) +static int null_queue_auto_zc_io(struct ublk_thread *t, struct ublk_queue *q, + int tag) { const struct ublksrv_io_desc *iod = ublk_get_iod(q, tag); struct io_uring_sqe *sqe[1]; - ublk_io_alloc_sqes(ublk_get_io(q, tag), sqe, 1); + ublk_io_alloc_sqes(t, sqe, 1); __setup_nop_io(tag, iod, sqe[0], q->q_id); return 1; } -static void ublk_null_io_done(struct ublk_queue *q, int tag, - const struct io_uring_cqe *cqe) +static void ublk_null_io_done(struct ublk_thread *t, struct ublk_queue *q, + const struct io_uring_cqe *cqe) { + unsigned tag = user_data_to_tag(cqe->user_data); unsigned op = user_data_to_op(cqe->user_data); struct ublk_io *io = ublk_get_io(q, tag); @@ -105,11 +108,12 @@ static void ublk_null_io_done(struct ublk_queue *q, int tag, if (op == ublk_cmd_op_nr(UBLK_U_IO_REGISTER_IO_BUF)) io->tgt_ios += 1; - if (ublk_completed_tgt_io(q, tag)) - ublk_complete_io(q, tag, io->result); + if (ublk_completed_tgt_io(t, q, tag)) + ublk_complete_io(t, q, tag, io->result); } -static int ublk_null_queue_io(struct ublk_queue *q, int tag) +static int ublk_null_queue_io(struct ublk_thread *t, struct ublk_queue *q, + int tag) { const struct ublksrv_io_desc *iod = ublk_get_iod(q, tag); unsigned auto_zc = ublk_queue_use_auto_zc(q); @@ -117,14 +121,14 @@ static int ublk_null_queue_io(struct ublk_queue *q, int tag) int queued; if (auto_zc && !ublk_io_auto_zc_fallback(iod)) - queued = null_queue_auto_zc_io(q, tag); + queued = null_queue_auto_zc_io(t, q, tag); else if (zc) - queued = null_queue_zc_io(q, tag); + queued = null_queue_zc_io(t, q, tag); else { - ublk_complete_io(q, tag, iod->nr_sectors << 9); + ublk_complete_io(t, q, tag, iod->nr_sectors << 9); return 0; } - ublk_queued_tgt_io(q, tag, queued); + ublk_queued_tgt_io(t, q, tag, queued); return 0; } @@ -134,7 +138,7 @@ static int ublk_null_queue_io(struct ublk_queue *q, int tag) */ static unsigned short ublk_null_buf_index(const struct ublk_queue *q, int tag) { - if (q->state & UBLKSRV_AUTO_BUF_REG_FALLBACK) + if (ublk_queue_auto_zc_fallback(q)) return (unsigned short)-1; return q->ios[tag].buf_index; } diff --git a/tools/testing/selftests/ublk/stripe.c b/tools/testing/selftests/ublk/stripe.c index 37d50bbf5f5e..1fb9b7cc281b 100644 --- a/tools/testing/selftests/ublk/stripe.c +++ b/tools/testing/selftests/ublk/stripe.c @@ -123,7 +123,8 @@ static inline enum io_uring_op stripe_to_uring_op( assert(0); } -static int stripe_queue_tgt_rw_io(struct ublk_queue *q, const struct ublksrv_io_desc *iod, int tag) +static int stripe_queue_tgt_rw_io(struct ublk_thread *t, struct ublk_queue *q, + const struct ublksrv_io_desc *iod, int tag) { const struct stripe_conf *conf = get_chunk_shift(q); unsigned auto_zc = (ublk_queue_use_auto_zc(q) != 0); @@ -138,7 +139,7 @@ static int stripe_queue_tgt_rw_io(struct ublk_queue *q, const struct ublksrv_io_ io->private_data = s; calculate_stripe_array(conf, iod, s, base); - ublk_io_alloc_sqes(ublk_get_io(q, tag), sqe, s->nr + extra); + ublk_io_alloc_sqes(t, sqe, s->nr + extra); if (zc) { io_uring_prep_buf_register(sqe[0], 0, tag, q->q_id, io->buf_index); @@ -176,13 +177,14 @@ static int stripe_queue_tgt_rw_io(struct ublk_queue *q, const struct ublksrv_io_ return s->nr + zc; } -static int handle_flush(struct ublk_queue *q, const struct ublksrv_io_desc *iod, int tag) +static int handle_flush(struct ublk_thread *t, struct ublk_queue *q, + const struct ublksrv_io_desc *iod, int tag) { const struct stripe_conf *conf = get_chunk_shift(q); struct io_uring_sqe *sqe[NR_STRIPE]; int i; - ublk_io_alloc_sqes(ublk_get_io(q, tag), sqe, conf->nr_files); + ublk_io_alloc_sqes(t, sqe, conf->nr_files); for (i = 0; i < conf->nr_files; i++) { io_uring_prep_fsync(sqe[i], i + 1, IORING_FSYNC_DATASYNC); io_uring_sqe_set_flags(sqe[i], IOSQE_FIXED_FILE); @@ -191,7 +193,8 @@ static int handle_flush(struct ublk_queue *q, const struct ublksrv_io_desc *iod, return conf->nr_files; } -static int stripe_queue_tgt_io(struct ublk_queue *q, int tag) +static int stripe_queue_tgt_io(struct ublk_thread *t, struct ublk_queue *q, + int tag) { const struct ublksrv_io_desc *iod = ublk_get_iod(q, tag); unsigned ublk_op = ublksrv_get_op(iod); @@ -199,7 +202,7 @@ static int stripe_queue_tgt_io(struct ublk_queue *q, int tag) switch (ublk_op) { case UBLK_IO_OP_FLUSH: - ret = handle_flush(q, iod, tag); + ret = handle_flush(t, q, iod, tag); break; case UBLK_IO_OP_WRITE_ZEROES: case UBLK_IO_OP_DISCARD: @@ -207,7 +210,7 @@ static int stripe_queue_tgt_io(struct ublk_queue *q, int tag) break; case UBLK_IO_OP_READ: case UBLK_IO_OP_WRITE: - ret = stripe_queue_tgt_rw_io(q, iod, tag); + ret = stripe_queue_tgt_rw_io(t, q, iod, tag); break; default: ret = -EINVAL; @@ -218,17 +221,19 @@ static int stripe_queue_tgt_io(struct ublk_queue *q, int tag) return ret; } -static int ublk_stripe_queue_io(struct ublk_queue *q, int tag) +static int ublk_stripe_queue_io(struct ublk_thread *t, struct ublk_queue *q, + int tag) { - int queued = stripe_queue_tgt_io(q, tag); + int queued = stripe_queue_tgt_io(t, q, tag); - ublk_queued_tgt_io(q, tag, queued); + ublk_queued_tgt_io(t, q, tag, queued); return 0; } -static void ublk_stripe_io_done(struct ublk_queue *q, int tag, - const struct io_uring_cqe *cqe) +static void ublk_stripe_io_done(struct ublk_thread *t, struct ublk_queue *q, + const struct io_uring_cqe *cqe) { + unsigned tag = user_data_to_tag(cqe->user_data); const struct ublksrv_io_desc *iod = ublk_get_iod(q, tag); unsigned op = user_data_to_op(cqe->user_data); struct ublk_io *io = ublk_get_io(q, tag); @@ -257,13 +262,13 @@ static void ublk_stripe_io_done(struct ublk_queue *q, int tag, } } - if (ublk_completed_tgt_io(q, tag)) { + if (ublk_completed_tgt_io(t, q, tag)) { int res = io->result; if (!res) res = iod->nr_sectors << 9; - ublk_complete_io(q, tag, res); + ublk_complete_io(t, q, tag, res); free_stripe_array(io->private_data); io->private_data = NULL; diff --git a/tools/testing/selftests/ublk/test_stress_03.sh b/tools/testing/selftests/ublk/test_stress_03.sh index 6eef282d569f..3ed4c9b2d8c0 100755 --- a/tools/testing/selftests/ublk/test_stress_03.sh +++ b/tools/testing/selftests/ublk/test_stress_03.sh @@ -32,22 +32,23 @@ _create_backfile 2 128M ublk_io_and_remove 8G -t null -q 4 -z & ublk_io_and_remove 256M -t loop -q 4 -z "${UBLK_BACKFILES[0]}" & ublk_io_and_remove 256M -t stripe -q 4 -z "${UBLK_BACKFILES[1]}" "${UBLK_BACKFILES[2]}" & +wait if _have_feature "AUTO_BUF_REG"; then ublk_io_and_remove 8G -t null -q 4 --auto_zc & ublk_io_and_remove 256M -t loop -q 4 --auto_zc "${UBLK_BACKFILES[0]}" & ublk_io_and_remove 256M -t stripe -q 4 --auto_zc "${UBLK_BACKFILES[1]}" "${UBLK_BACKFILES[2]}" & ublk_io_and_remove 8G -t null -q 4 -z --auto_zc --auto_zc_fallback & + wait fi -wait if _have_feature "PER_IO_DAEMON"; then ublk_io_and_remove 8G -t null -q 4 --auto_zc --nthreads 8 --per_io_tasks & ublk_io_and_remove 256M -t loop -q 4 --auto_zc --nthreads 8 --per_io_tasks "${UBLK_BACKFILES[0]}" & ublk_io_and_remove 256M -t stripe -q 4 --auto_zc --nthreads 8 --per_io_tasks "${UBLK_BACKFILES[1]}" "${UBLK_BACKFILES[2]}" & ublk_io_and_remove 8G -t null -q 4 -z --auto_zc --auto_zc_fallback --nthreads 8 --per_io_tasks & + wait fi -wait _cleanup_test "stress" _show_result $TID $ERR_CODE diff --git a/tools/testing/selftests/ublk/utils.h b/tools/testing/selftests/ublk/utils.h new file mode 100644 index 000000000000..36545d1567f1 --- /dev/null +++ b/tools/testing/selftests/ublk/utils.h @@ -0,0 +1,70 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef KUBLK_UTILS_H +#define KUBLK_UTILS_H + +#define __maybe_unused __attribute__((unused)) + +#ifndef min +#define min(a, b) ((a) < (b) ? (a) : (b)) +#endif + +#define ARRAY_SIZE(x) (sizeof(x) / sizeof(x[0])) + +#ifndef offsetof +#define offsetof(TYPE, MEMBER) ((size_t)&((TYPE *)0)->MEMBER) +#endif + +#ifndef container_of +#define container_of(ptr, type, member) ({ \ + unsigned long __mptr = (unsigned long)(ptr); \ + ((type *)(__mptr - offsetof(type, member))); }) +#endif + +#define round_up(val, rnd) \ + (((val) + ((rnd) - 1)) & ~((rnd) - 1)) + +static inline unsigned int ilog2(unsigned int x) +{ + if (x == 0) + return 0; + return (sizeof(x) * 8 - 1) - __builtin_clz(x); +} + +#define UBLK_DBG_DEV (1U << 0) +#define UBLK_DBG_THREAD (1U << 1) +#define UBLK_DBG_IO_CMD (1U << 2) +#define UBLK_DBG_IO (1U << 3) +#define UBLK_DBG_CTRL_CMD (1U << 4) +#define UBLK_LOG (1U << 5) + +extern unsigned int ublk_dbg_mask; + +static inline void ublk_err(const char *fmt, ...) +{ + va_list ap; + + va_start(ap, fmt); + vfprintf(stderr, fmt, ap); +} + +static inline void ublk_log(const char *fmt, ...) +{ + if (ublk_dbg_mask & UBLK_LOG) { + va_list ap; + + va_start(ap, fmt); + vfprintf(stdout, fmt, ap); + } +} + +static inline void ublk_dbg(int level, const char *fmt, ...) +{ + if (level & ublk_dbg_mask) { + va_list ap; + + va_start(ap, fmt); + vfprintf(stdout, fmt, ap); + } +} + +#endif diff --git a/tools/testing/selftests/vDSO/Makefile b/tools/testing/selftests/vDSO/Makefile index 12a0614b9fd4..918a2caa070e 100644 --- a/tools/testing/selftests/vDSO/Makefile +++ b/tools/testing/selftests/vDSO/Makefile @@ -12,7 +12,7 @@ TEST_GEN_PROGS += vdso_test_correctness TEST_GEN_PROGS += vdso_test_getrandom TEST_GEN_PROGS += vdso_test_chacha -CFLAGS := -std=gnu99 -O2 +CFLAGS := -std=gnu99 -O2 -Wall -Wstrict-prototypes ifeq ($(CONFIG_X86_32),y) LDLIBS += -lgcc_s diff --git a/tools/testing/selftests/vDSO/vdso_config.h b/tools/testing/selftests/vDSO/vdso_config.h index 722260f97561..5fdd0f362337 100644 --- a/tools/testing/selftests/vDSO/vdso_config.h +++ b/tools/testing/selftests/vDSO/vdso_config.h @@ -58,6 +58,7 @@ #define VDSO_NAMES 1 #endif +__attribute__((unused)) static const char *versions[7] = { "LINUX_2.6", "LINUX_2.6.15", @@ -68,6 +69,7 @@ static const char *versions[7] = { "LINUX_5.10" }; +__attribute__((unused)) static const char *names[2][7] = { { "__kernel_gettimeofday", diff --git a/tools/testing/selftests/vDSO/vdso_standalone_test_x86.c b/tools/testing/selftests/vDSO/vdso_standalone_test_x86.c index 9ce795b806f0..4d3d96f1e440 100644..120000 --- a/tools/testing/selftests/vDSO/vdso_standalone_test_x86.c +++ b/tools/testing/selftests/vDSO/vdso_standalone_test_x86.c @@ -1,58 +1 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * vdso_test_gettimeofday.c: Sample code to test parse_vdso.c and - * vDSO gettimeofday() - * Copyright (c) 2014 Andy Lutomirski - * - * Compile with: - * gcc -std=gnu99 vdso_test_gettimeofday.c parse_vdso_gettimeofday.c - * - * Tested on x86, 32-bit and 64-bit. It may work on other architectures, too. - */ - -#include <stdio.h> -#ifndef NOLIBC -#include <sys/auxv.h> -#include <sys/time.h> -#endif - -#include "../kselftest.h" -#include "parse_vdso.h" -#include "vdso_config.h" -#include "vdso_call.h" - -int main(int argc, char **argv) -{ - const char *version = versions[VDSO_VERSION]; - const char **name = (const char **)&names[VDSO_NAMES]; - - unsigned long sysinfo_ehdr = getauxval(AT_SYSINFO_EHDR); - if (!sysinfo_ehdr) { - printf("AT_SYSINFO_EHDR is not present!\n"); - return KSFT_SKIP; - } - - vdso_init_from_sysinfo_ehdr(getauxval(AT_SYSINFO_EHDR)); - - /* Find gettimeofday. */ - typedef long (*gtod_t)(struct timeval *tv, struct timezone *tz); - gtod_t gtod = (gtod_t)vdso_sym(version, name[0]); - - if (!gtod) { - printf("Could not find %s\n", name[0]); - return KSFT_SKIP; - } - - struct timeval tv; - long ret = VDSO_CALL(gtod, 2, &tv, 0); - - if (ret == 0) { - printf("The time is %lld.%06lld\n", - (long long)tv.tv_sec, (long long)tv.tv_usec); - } else { - printf("%s failed\n", name[0]); - return KSFT_FAIL; - } - - return 0; -} +vdso_test_gettimeofday.c
\ No newline at end of file diff --git a/tools/testing/selftests/vDSO/vdso_test_chacha.c b/tools/testing/selftests/vDSO/vdso_test_chacha.c index 8757f738b0b1..0aad682b12c8 100644 --- a/tools/testing/selftests/vDSO/vdso_test_chacha.c +++ b/tools/testing/selftests/vDSO/vdso_test_chacha.c @@ -76,7 +76,8 @@ static void reference_chacha20_blocks(uint8_t *dst_bytes, const uint32_t *key, u void __weak __arch_chacha20_blocks_nostack(uint8_t *dst_bytes, const uint32_t *key, uint32_t *counter, size_t nblocks) { - ksft_exit_skip("Not implemented on architecture\n"); + ksft_test_result_skip("Not implemented on architecture\n"); + ksft_finished(); } int main(int argc, char *argv[]) diff --git a/tools/testing/selftests/vDSO/vdso_test_clock_getres.c b/tools/testing/selftests/vDSO/vdso_test_clock_getres.c index 38d46a8bf7cb..b5d5f59f725a 100644 --- a/tools/testing/selftests/vDSO/vdso_test_clock_getres.c +++ b/tools/testing/selftests/vDSO/vdso_test_clock_getres.c @@ -13,7 +13,6 @@ #define _GNU_SOURCE #include <elf.h> -#include <err.h> #include <fcntl.h> #include <stdint.h> #include <stdio.h> diff --git a/tools/testing/selftests/vDSO/vdso_test_correctness.c b/tools/testing/selftests/vDSO/vdso_test_correctness.c index 5fb97ad67eea..da651cf53c6c 100644 --- a/tools/testing/selftests/vDSO/vdso_test_correctness.c +++ b/tools/testing/selftests/vDSO/vdso_test_correctness.c @@ -108,7 +108,7 @@ static void *vsyscall_getcpu(void) } -static void fill_function_pointers() +static void fill_function_pointers(void) { void *vdso = dlopen("linux-vdso.so.1", RTLD_LAZY | RTLD_LOCAL | RTLD_NOLOAD); diff --git a/tools/testing/selftests/vDSO/vdso_test_getrandom.c b/tools/testing/selftests/vDSO/vdso_test_getrandom.c index 95057f7567db..dd1132508a0d 100644 --- a/tools/testing/selftests/vDSO/vdso_test_getrandom.c +++ b/tools/testing/selftests/vDSO/vdso_test_getrandom.c @@ -21,7 +21,6 @@ #include <sys/wait.h> #include <sys/types.h> #include <linux/random.h> -#include <linux/compiler.h> #include <linux/ptrace.h> #include "../kselftest.h" @@ -101,6 +100,7 @@ out: return state; } +__attribute__((unused)) /* Example for libc implementors */ static void vgetrandom_put_state(void *state) { if (!state) @@ -242,6 +242,7 @@ static void kselftest(void) pid_t child; ksft_print_header(); + vgetrandom_init(); ksft_set_plan(2); for (size_t i = 0; i < 1000; ++i) { @@ -265,7 +266,7 @@ static void kselftest(void) } for (;;) { struct ptrace_syscall_info info = { 0 }; - int status, ret; + int status; ksft_assert(waitpid(child, &status, 0) >= 0); if (WIFEXITED(status)) { ksft_assert(WEXITSTATUS(status) == 0); @@ -295,8 +296,6 @@ static void usage(const char *argv0) int main(int argc, char *argv[]) { - vgetrandom_init(); - if (argc == 1) { kselftest(); return 0; @@ -306,6 +305,9 @@ int main(int argc, char *argv[]) usage(argv[0]); return 1; } + + vgetrandom_init(); + if (!strcmp(argv[1], "bench-single")) bench_single(); else if (!strcmp(argv[1], "bench-multi")) diff --git a/tools/testing/selftests/vsock/.gitignore b/tools/testing/selftests/vsock/.gitignore new file mode 100644 index 000000000000..9c5bf379480f --- /dev/null +++ b/tools/testing/selftests/vsock/.gitignore @@ -0,0 +1,2 @@ +vmtest.log +vsock_test diff --git a/tools/testing/selftests/vsock/Makefile b/tools/testing/selftests/vsock/Makefile new file mode 100644 index 000000000000..c407c0afd938 --- /dev/null +++ b/tools/testing/selftests/vsock/Makefile @@ -0,0 +1,17 @@ +# SPDX-License-Identifier: GPL-2.0 + +CURDIR := $(abspath .) +TOOLSDIR := $(abspath ../../..) +VSOCK_TEST_DIR := $(TOOLSDIR)/testing/vsock +VSOCK_TEST_SRCS := $(wildcard $(VSOCK_TEST_DIR)/*.c $(VSOCK_TEST_DIR)/*.h) + +$(OUTPUT)/vsock_test: $(VSOCK_TEST_DIR)/vsock_test + install -m 755 $< $@ + +$(VSOCK_TEST_DIR)/vsock_test: $(VSOCK_TEST_SRCS) + $(MAKE) -C $(VSOCK_TEST_DIR) vsock_test +TEST_PROGS += vmtest.sh +TEST_GEN_FILES := vsock_test + +include ../lib.mk + diff --git a/tools/testing/selftests/vsock/config b/tools/testing/selftests/vsock/config new file mode 100644 index 000000000000..5f0a4f17dfc9 --- /dev/null +++ b/tools/testing/selftests/vsock/config @@ -0,0 +1,111 @@ +CONFIG_BLK_DEV_INITRD=y +CONFIG_BPF=y +CONFIG_BPF_SYSCALL=y +CONFIG_BPF_JIT=y +CONFIG_HAVE_EBPF_JIT=y +CONFIG_BPF_EVENTS=y +CONFIG_FTRACE_SYSCALLS=y +CONFIG_FUNCTION_TRACER=y +CONFIG_HAVE_DYNAMIC_FTRACE=y +CONFIG_DYNAMIC_FTRACE=y +CONFIG_HAVE_KPROBES=y +CONFIG_KPROBES=y +CONFIG_KPROBE_EVENTS=y +CONFIG_ARCH_SUPPORTS_UPROBES=y +CONFIG_UPROBES=y +CONFIG_UPROBE_EVENTS=y +CONFIG_DEBUG_FS=y +CONFIG_FW_CFG_SYSFS=y +CONFIG_FW_CFG_SYSFS_CMDLINE=y +CONFIG_DRM=y +CONFIG_DRM_VIRTIO_GPU=y +CONFIG_DRM_VIRTIO_GPU_KMS=y +CONFIG_DRM_BOCHS=y +CONFIG_VIRTIO_IOMMU=y +CONFIG_SOUND=y +CONFIG_SND=y +CONFIG_SND_SEQUENCER=y +CONFIG_SND_PCI=y +CONFIG_SND_INTEL8X0=y +CONFIG_SND_HDA_CODEC_REALTEK=y +CONFIG_SECURITYFS=y +CONFIG_CGROUP_BPF=y +CONFIG_SQUASHFS=y +CONFIG_SQUASHFS_XZ=y +CONFIG_SQUASHFS_ZSTD=y +CONFIG_FUSE_FS=y +CONFIG_VIRTIO_FS=y +CONFIG_SERIO=y +CONFIG_PCI=y +CONFIG_INPUT=y +CONFIG_INPUT_KEYBOARD=y +CONFIG_KEYBOARD_ATKBD=y +CONFIG_SERIAL_8250=y +CONFIG_SERIAL_8250_CONSOLE=y +CONFIG_X86_VERBOSE_BOOTUP=y +CONFIG_VGA_CONSOLE=y +CONFIG_FB=y +CONFIG_FB_VESA=y +CONFIG_FRAMEBUFFER_CONSOLE=y +CONFIG_RTC_CLASS=y +CONFIG_RTC_HCTOSYS=y +CONFIG_RTC_DRV_CMOS=y +CONFIG_HYPERVISOR_GUEST=y +CONFIG_PARAVIRT=y +CONFIG_KVM_GUEST=y +CONFIG_KVM=y +CONFIG_KVM_INTEL=y +CONFIG_KVM_AMD=y +CONFIG_VSOCKETS=y +CONFIG_VSOCKETS_DIAG=y +CONFIG_VSOCKETS_LOOPBACK=y +CONFIG_VMWARE_VMCI_VSOCKETS=y +CONFIG_VIRTIO_VSOCKETS=y +CONFIG_VIRTIO_VSOCKETS_COMMON=y +CONFIG_HYPERV_VSOCKETS=y +CONFIG_VMWARE_VMCI=y +CONFIG_VHOST_VSOCK=y +CONFIG_HYPERV=y +CONFIG_UEVENT_HELPER=n +CONFIG_VIRTIO=y +CONFIG_VIRTIO_PCI=y +CONFIG_VIRTIO_MMIO=y +CONFIG_VIRTIO_BALLOON=y +CONFIG_NET=y +CONFIG_NET_CORE=y +CONFIG_NETDEVICES=y +CONFIG_NETWORK_FILESYSTEMS=y +CONFIG_INET=y +CONFIG_NET_9P=y +CONFIG_NET_9P_VIRTIO=y +CONFIG_9P_FS=y +CONFIG_VIRTIO_NET=y +CONFIG_CMDLINE_OVERRIDE=n +CONFIG_BINFMT_SCRIPT=y +CONFIG_SHMEM=y +CONFIG_TMPFS=y +CONFIG_UNIX=y +CONFIG_MODULE_SIG_FORCE=n +CONFIG_DEVTMPFS=y +CONFIG_TTY=y +CONFIG_VT=y +CONFIG_UNIX98_PTYS=y +CONFIG_EARLY_PRINTK=y +CONFIG_INOTIFY_USER=y +CONFIG_BLOCK=y +CONFIG_SCSI_LOWLEVEL=y +CONFIG_SCSI=y +CONFIG_SCSI_VIRTIO=y +CONFIG_BLK_DEV_SD=y +CONFIG_VIRTIO_CONSOLE=y +CONFIG_WATCHDOG=y +CONFIG_WATCHDOG_CORE=y +CONFIG_I6300ESB_WDT=y +CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y +CONFIG_OVERLAY_FS=y +CONFIG_DAX=y +CONFIG_DAX_DRIVER=y +CONFIG_FS_DAX=y +CONFIG_MEMORY_HOTPLUG=y +CONFIG_MEMORY_HOTREMOVE=y +CONFIG_ZONE_DEVICE=y diff --git a/tools/testing/selftests/vsock/settings b/tools/testing/selftests/vsock/settings new file mode 100644 index 000000000000..694d70710ff0 --- /dev/null +++ b/tools/testing/selftests/vsock/settings @@ -0,0 +1 @@ +timeout=300 diff --git a/tools/testing/selftests/vsock/vmtest.sh b/tools/testing/selftests/vsock/vmtest.sh new file mode 100755 index 000000000000..edacebfc1632 --- /dev/null +++ b/tools/testing/selftests/vsock/vmtest.sh @@ -0,0 +1,487 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# Copyright (c) 2025 Meta Platforms, Inc. and affiliates +# +# Dependencies: +# * virtme-ng +# * busybox-static (used by virtme-ng) +# * qemu (used by virtme-ng) + +readonly SCRIPT_DIR="$(cd -P -- "$(dirname -- "${BASH_SOURCE[0]}")" && pwd -P)" +readonly KERNEL_CHECKOUT=$(realpath "${SCRIPT_DIR}"/../../../../) + +source "${SCRIPT_DIR}"/../kselftest/ktap_helpers.sh + +readonly VSOCK_TEST="${SCRIPT_DIR}"/vsock_test +readonly TEST_GUEST_PORT=51000 +readonly TEST_HOST_PORT=50000 +readonly TEST_HOST_PORT_LISTENER=50001 +readonly SSH_GUEST_PORT=22 +readonly SSH_HOST_PORT=2222 +readonly VSOCK_CID=1234 +readonly WAIT_PERIOD=3 +readonly WAIT_PERIOD_MAX=60 +readonly WAIT_TOTAL=$(( WAIT_PERIOD * WAIT_PERIOD_MAX )) +readonly QEMU_PIDFILE=$(mktemp /tmp/qemu_vsock_vmtest_XXXX.pid) + +# virtme-ng offers a netdev for ssh when using "--ssh", but we also need a +# control port forwarded for vsock_test. Because virtme-ng doesn't support +# adding an additional port to forward to the device created from "--ssh" and +# virtme-init mistakenly sets identical IPs to the ssh device and additional +# devices, we instead opt out of using --ssh, add the device manually, and also +# add the kernel cmdline options that virtme-init uses to setup the interface. +readonly QEMU_TEST_PORT_FWD="hostfwd=tcp::${TEST_HOST_PORT}-:${TEST_GUEST_PORT}" +readonly QEMU_SSH_PORT_FWD="hostfwd=tcp::${SSH_HOST_PORT}-:${SSH_GUEST_PORT}" +readonly QEMU_OPTS="\ + -netdev user,id=n0,${QEMU_TEST_PORT_FWD},${QEMU_SSH_PORT_FWD} \ + -device virtio-net-pci,netdev=n0 \ + -device vhost-vsock-pci,guest-cid=${VSOCK_CID} \ + --pidfile ${QEMU_PIDFILE} \ +" +readonly KERNEL_CMDLINE="\ + virtme.dhcp net.ifnames=0 biosdevname=0 \ + virtme.ssh virtme_ssh_channel=tcp virtme_ssh_user=$USER \ +" +readonly LOG=$(mktemp /tmp/vsock_vmtest_XXXX.log) +readonly TEST_NAMES=(vm_server_host_client vm_client_host_server vm_loopback) +readonly TEST_DESCS=( + "Run vsock_test in server mode on the VM and in client mode on the host." + "Run vsock_test in client mode on the VM and in server mode on the host." + "Run vsock_test using the loopback transport in the VM." +) + +VERBOSE=0 + +usage() { + local name + local desc + local i + + echo + echo "$0 [OPTIONS] [TEST]..." + echo "If no TEST argument is given, all tests will be run." + echo + echo "Options" + echo " -b: build the kernel from the current source tree and use it for guest VMs" + echo " -q: set the path to or name of qemu binary" + echo " -v: verbose output" + echo + echo "Available tests" + + for ((i = 0; i < ${#TEST_NAMES[@]}; i++)); do + name=${TEST_NAMES[${i}]} + desc=${TEST_DESCS[${i}]} + printf "\t%-35s%-35s\n" "${name}" "${desc}" + done + echo + + exit 1 +} + +die() { + echo "$*" >&2 + exit "${KSFT_FAIL}" +} + +vm_ssh() { + ssh -q -o UserKnownHostsFile=/dev/null -p ${SSH_HOST_PORT} localhost "$@" + return $? +} + +cleanup() { + if [[ -s "${QEMU_PIDFILE}" ]]; then + pkill -SIGTERM -F "${QEMU_PIDFILE}" > /dev/null 2>&1 + fi + + # If failure occurred during or before qemu start up, then we need + # to clean this up ourselves. + if [[ -e "${QEMU_PIDFILE}" ]]; then + rm "${QEMU_PIDFILE}" + fi +} + +check_args() { + local found + + for arg in "$@"; do + found=0 + for name in "${TEST_NAMES[@]}"; do + if [[ "${name}" = "${arg}" ]]; then + found=1 + break + fi + done + + if [[ "${found}" -eq 0 ]]; then + echo "${arg} is not an available test" >&2 + usage + fi + done + + for arg in "$@"; do + if ! command -v > /dev/null "test_${arg}"; then + echo "Test ${arg} not found" >&2 + usage + fi + done +} + +check_deps() { + for dep in vng ${QEMU} busybox pkill ssh; do + if [[ ! -x $(command -v "${dep}") ]]; then + echo -e "skip: dependency ${dep} not found!\n" + exit "${KSFT_SKIP}" + fi + done + + if [[ ! -x $(command -v "${VSOCK_TEST}") ]]; then + printf "skip: %s not found!" "${VSOCK_TEST}" + printf " Please build the kselftest vsock target.\n" + exit "${KSFT_SKIP}" + fi +} + +check_vng() { + local tested_versions + local version + local ok + + tested_versions=("1.33" "1.36") + version="$(vng --version)" + + ok=0 + for tv in "${tested_versions[@]}"; do + if [[ "${version}" == *"${tv}"* ]]; then + ok=1 + break + fi + done + + if [[ ! "${ok}" -eq 1 ]]; then + printf "warning: vng version '%s' has not been tested and may " "${version}" >&2 + printf "not function properly.\n\tThe following versions have been tested: " >&2 + echo "${tested_versions[@]}" >&2 + fi +} + +handle_build() { + if [[ ! "${BUILD}" -eq 1 ]]; then + return + fi + + if [[ ! -d "${KERNEL_CHECKOUT}" ]]; then + echo "-b requires vmtest.sh called from the kernel source tree" >&2 + exit 1 + fi + + pushd "${KERNEL_CHECKOUT}" &>/dev/null + + if ! vng --kconfig --config "${SCRIPT_DIR}"/config; then + die "failed to generate .config for kernel source tree (${KERNEL_CHECKOUT})" + fi + + if ! make -j$(nproc); then + die "failed to build kernel from source tree (${KERNEL_CHECKOUT})" + fi + + popd &>/dev/null +} + +vm_start() { + local logfile=/dev/null + local verbose_opt="" + local kernel_opt="" + local qemu + + qemu=$(command -v "${QEMU}") + + if [[ "${VERBOSE}" -eq 1 ]]; then + verbose_opt="--verbose" + logfile=/dev/stdout + fi + + if [[ "${BUILD}" -eq 1 ]]; then + kernel_opt="${KERNEL_CHECKOUT}" + fi + + vng \ + --run \ + ${kernel_opt} \ + ${verbose_opt} \ + --qemu-opts="${QEMU_OPTS}" \ + --qemu="${qemu}" \ + --user root \ + --append "${KERNEL_CMDLINE}" \ + --rw &> ${logfile} & + + if ! timeout ${WAIT_TOTAL} \ + bash -c 'while [[ ! -s '"${QEMU_PIDFILE}"' ]]; do sleep 1; done; exit 0'; then + die "failed to boot VM" + fi +} + +vm_wait_for_ssh() { + local i + + i=0 + while true; do + if [[ ${i} -gt ${WAIT_PERIOD_MAX} ]]; then + die "Timed out waiting for guest ssh" + fi + if vm_ssh -- true; then + break + fi + i=$(( i + 1 )) + sleep ${WAIT_PERIOD} + done +} + +# derived from selftests/net/net_helper.sh +wait_for_listener() +{ + local port=$1 + local interval=$2 + local max_intervals=$3 + local protocol=tcp + local pattern + local i + + pattern=":$(printf "%04X" "${port}") " + + # for tcp protocol additionally check the socket state + [ "${protocol}" = "tcp" ] && pattern="${pattern}0A" + for i in $(seq "${max_intervals}"); do + if awk '{print $2" "$4}' /proc/net/"${protocol}"* | \ + grep -q "${pattern}"; then + break + fi + sleep "${interval}" + done +} + +vm_wait_for_listener() { + local port=$1 + + vm_ssh <<EOF +$(declare -f wait_for_listener) +wait_for_listener ${port} ${WAIT_PERIOD} ${WAIT_PERIOD_MAX} +EOF +} + +host_wait_for_listener() { + wait_for_listener "${TEST_HOST_PORT_LISTENER}" "${WAIT_PERIOD}" "${WAIT_PERIOD_MAX}" +} + +__log_stdin() { + cat | awk '{ printf "%s:\t%s\n","'"${prefix}"'", $0 }' +} + +__log_args() { + echo "$*" | awk '{ printf "%s:\t%s\n","'"${prefix}"'", $0 }' +} + +log() { + local prefix="$1" + + shift + local redirect= + if [[ ${VERBOSE} -eq 0 ]]; then + redirect=/dev/null + else + redirect=/dev/stdout + fi + + if [[ "$#" -eq 0 ]]; then + __log_stdin | tee -a "${LOG}" > ${redirect} + else + __log_args "$@" | tee -a "${LOG}" > ${redirect} + fi +} + +log_setup() { + log "setup" "$@" +} + +log_host() { + local testname=$1 + + shift + log "test:${testname}:host" "$@" +} + +log_guest() { + local testname=$1 + + shift + log "test:${testname}:guest" "$@" +} + +test_vm_server_host_client() { + local testname="${FUNCNAME[0]#test_}" + + vm_ssh -- "${VSOCK_TEST}" \ + --mode=server \ + --control-port="${TEST_GUEST_PORT}" \ + --peer-cid=2 \ + 2>&1 | log_guest "${testname}" & + + vm_wait_for_listener "${TEST_GUEST_PORT}" + + ${VSOCK_TEST} \ + --mode=client \ + --control-host=127.0.0.1 \ + --peer-cid="${VSOCK_CID}" \ + --control-port="${TEST_HOST_PORT}" 2>&1 | log_host "${testname}" + + return $? +} + +test_vm_client_host_server() { + local testname="${FUNCNAME[0]#test_}" + + ${VSOCK_TEST} \ + --mode "server" \ + --control-port "${TEST_HOST_PORT_LISTENER}" \ + --peer-cid "${VSOCK_CID}" 2>&1 | log_host "${testname}" & + + host_wait_for_listener + + vm_ssh -- "${VSOCK_TEST}" \ + --mode=client \ + --control-host=10.0.2.2 \ + --peer-cid=2 \ + --control-port="${TEST_HOST_PORT_LISTENER}" 2>&1 | log_guest "${testname}" + + return $? +} + +test_vm_loopback() { + local testname="${FUNCNAME[0]#test_}" + local port=60000 # non-forwarded local port + + vm_ssh -- "${VSOCK_TEST}" \ + --mode=server \ + --control-port="${port}" \ + --peer-cid=1 2>&1 | log_guest "${testname}" & + + vm_wait_for_listener "${port}" + + vm_ssh -- "${VSOCK_TEST}" \ + --mode=client \ + --control-host="127.0.0.1" \ + --control-port="${port}" \ + --peer-cid=1 2>&1 | log_guest "${testname}" + + return $? +} + +run_test() { + local host_oops_cnt_before + local host_warn_cnt_before + local vm_oops_cnt_before + local vm_warn_cnt_before + local host_oops_cnt_after + local host_warn_cnt_after + local vm_oops_cnt_after + local vm_warn_cnt_after + local name + local rc + + host_oops_cnt_before=$(dmesg | grep -c -i 'Oops') + host_warn_cnt_before=$(dmesg --level=warn | wc -l) + vm_oops_cnt_before=$(vm_ssh -- dmesg | grep -c -i 'Oops') + vm_warn_cnt_before=$(vm_ssh -- dmesg --level=warn | wc -l) + + name=$(echo "${1}" | awk '{ print $1 }') + eval test_"${name}" + rc=$? + + host_oops_cnt_after=$(dmesg | grep -i 'Oops' | wc -l) + if [[ ${host_oops_cnt_after} -gt ${host_oops_cnt_before} ]]; then + echo "FAIL: kernel oops detected on host" | log_host "${name}" + rc=$KSFT_FAIL + fi + + host_warn_cnt_after=$(dmesg --level=warn | wc -l) + if [[ ${host_warn_cnt_after} -gt ${host_warn_cnt_before} ]]; then + echo "FAIL: kernel warning detected on host" | log_host "${name}" + rc=$KSFT_FAIL + fi + + vm_oops_cnt_after=$(vm_ssh -- dmesg | grep -i 'Oops' | wc -l) + if [[ ${vm_oops_cnt_after} -gt ${vm_oops_cnt_before} ]]; then + echo "FAIL: kernel oops detected on vm" | log_host "${name}" + rc=$KSFT_FAIL + fi + + vm_warn_cnt_after=$(vm_ssh -- dmesg --level=warn | wc -l) + if [[ ${vm_warn_cnt_after} -gt ${vm_warn_cnt_before} ]]; then + echo "FAIL: kernel warning detected on vm" | log_host "${name}" + rc=$KSFT_FAIL + fi + + return "${rc}" +} + +QEMU="qemu-system-$(uname -m)" + +while getopts :hvsq:b o +do + case $o in + v) VERBOSE=1;; + b) BUILD=1;; + q) QEMU=$OPTARG;; + h|*) usage;; + esac +done +shift $((OPTIND-1)) + +trap cleanup EXIT + +if [[ ${#} -eq 0 ]]; then + ARGS=("${TEST_NAMES[@]}") +else + ARGS=("$@") +fi + +check_args "${ARGS[@]}" +check_deps +check_vng +handle_build + +echo "1..${#ARGS[@]}" + +log_setup "Booting up VM" +vm_start +vm_wait_for_ssh +log_setup "VM booted up" + +cnt_pass=0 +cnt_fail=0 +cnt_skip=0 +cnt_total=0 +for arg in "${ARGS[@]}"; do + run_test "${arg}" + rc=$? + if [[ ${rc} -eq $KSFT_PASS ]]; then + cnt_pass=$(( cnt_pass + 1 )) + echo "ok ${cnt_total} ${arg}" + elif [[ ${rc} -eq $KSFT_SKIP ]]; then + cnt_skip=$(( cnt_skip + 1 )) + echo "ok ${cnt_total} ${arg} # SKIP" + elif [[ ${rc} -eq $KSFT_FAIL ]]; then + cnt_fail=$(( cnt_fail + 1 )) + echo "not ok ${cnt_total} ${arg} # exit=$rc" + fi + cnt_total=$(( cnt_total + 1 )) +done + +echo "SUMMARY: PASS=${cnt_pass} SKIP=${cnt_skip} FAIL=${cnt_fail}" +echo "Log: ${LOG}" + +if [ $((cnt_pass + cnt_skip)) -eq ${cnt_total} ]; then + exit "$KSFT_PASS" +else + exit "$KSFT_FAIL" +fi diff --git a/tools/testing/selftests/wireguard/qemu/kernel.config b/tools/testing/selftests/wireguard/qemu/kernel.config index f314d3789f17..0a5381717e9f 100644 --- a/tools/testing/selftests/wireguard/qemu/kernel.config +++ b/tools/testing/selftests/wireguard/qemu/kernel.config @@ -16,9 +16,13 @@ CONFIG_NETFILTER_ADVANCED=y CONFIG_NF_CONNTRACK=y CONFIG_NF_NAT=y CONFIG_NETFILTER_XTABLES=y +CONFIG_NETFILTER_XTABLES_LEGACY=y CONFIG_NETFILTER_XT_NAT=y CONFIG_NETFILTER_XT_MATCH_LENGTH=y CONFIG_NETFILTER_XT_MARK=y +CONFIG_NETFILTER_XT_TARGET_MASQUERADE=m +CONFIG_IP_NF_TARGET_REJECT=m +CONFIG_IP6_NF_TARGET_REJECT=m CONFIG_IP_NF_IPTABLES=y CONFIG_IP_NF_FILTER=y CONFIG_IP_NF_MANGLE=y diff --git a/tools/testing/selftests/x86/Makefile b/tools/testing/selftests/x86/Makefile index f703fcfe9f7c..83148875a12c 100644 --- a/tools/testing/selftests/x86/Makefile +++ b/tools/testing/selftests/x86/Makefile @@ -12,7 +12,7 @@ CAN_BUILD_WITH_NOPIE := $(shell ./check_cc.sh "$(CC)" trivial_program.c -no-pie) TARGETS_C_BOTHBITS := single_step_syscall sysret_ss_attrs syscall_nt test_mremap_vdso \ check_initial_reg_state sigreturn iopl ioperm \ - test_vsyscall mov_ss_trap \ + test_vsyscall mov_ss_trap sigtrap_loop \ syscall_arg_fault fsgsbase_restore sigaltstack TARGETS_C_BOTHBITS += nx_stack TARGETS_C_32BIT_ONLY := entry_from_vm86 test_syscall_vdso unwind_vdso \ diff --git a/tools/testing/selftests/x86/sigtrap_loop.c b/tools/testing/selftests/x86/sigtrap_loop.c new file mode 100644 index 000000000000..9d065479e89f --- /dev/null +++ b/tools/testing/selftests/x86/sigtrap_loop.c @@ -0,0 +1,101 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2025 Intel Corporation + */ +#define _GNU_SOURCE + +#include <err.h> +#include <signal.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/ucontext.h> + +#ifdef __x86_64__ +# define REG_IP REG_RIP +#else +# define REG_IP REG_EIP +#endif + +static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *), int flags) +{ + struct sigaction sa; + + memset(&sa, 0, sizeof(sa)); + sa.sa_sigaction = handler; + sa.sa_flags = SA_SIGINFO | flags; + sigemptyset(&sa.sa_mask); + + if (sigaction(sig, &sa, 0)) + err(1, "sigaction"); + + return; +} + +static void sigtrap(int sig, siginfo_t *info, void *ctx_void) +{ + ucontext_t *ctx = (ucontext_t *)ctx_void; + static unsigned int loop_count_on_same_ip; + static unsigned long last_trap_ip; + + if (last_trap_ip == ctx->uc_mcontext.gregs[REG_IP]) { + printf("\tTrapped at %016lx\n", last_trap_ip); + + /* + * If the same IP is hit more than 10 times in a row, it is + * _considered_ an infinite loop. + */ + if (++loop_count_on_same_ip > 10) { + printf("[FAIL]\tDetected SIGTRAP infinite loop\n"); + exit(1); + } + + return; + } + + loop_count_on_same_ip = 0; + last_trap_ip = ctx->uc_mcontext.gregs[REG_IP]; + printf("\tTrapped at %016lx\n", last_trap_ip); +} + +int main(int argc, char *argv[]) +{ + sethandler(SIGTRAP, sigtrap, 0); + + /* + * Set the Trap Flag (TF) to single-step the test code, therefore to + * trigger a SIGTRAP signal after each instruction until the TF is + * cleared. + * + * Because the arithmetic flags are not significant here, the TF is + * set by pushing 0x302 onto the stack and then popping it into the + * flags register. + * + * Four instructions in the following asm code are executed with the + * TF set, thus the SIGTRAP handler is expected to run four times. + */ + printf("[RUN]\tSIGTRAP infinite loop detection\n"); + asm volatile( +#ifdef __x86_64__ + /* + * Avoid clobbering the redzone + * + * Equivalent to "sub $128, %rsp", however -128 can be encoded + * in a single byte immediate while 128 uses 4 bytes. + */ + "add $-128, %rsp\n\t" +#endif + "push $0x302\n\t" + "popf\n\t" + "nop\n\t" + "nop\n\t" + "push $0x202\n\t" + "popf\n\t" +#ifdef __x86_64__ + "sub $-128, %rsp\n\t" +#endif + ); + + printf("[OK]\tNo SIGTRAP infinite loop detected\n"); + return 0; +} diff --git a/tools/testing/vma/vma_internal.h b/tools/testing/vma/vma_internal.h index 4505b1c31be1..816e7e057585 100644 --- a/tools/testing/vma/vma_internal.h +++ b/tools/testing/vma/vma_internal.h @@ -159,6 +159,14 @@ typedef __bitwise unsigned int vm_fault_t; #define ASSERT_EXCLUSIVE_WRITER(x) +/** + * swap - swap values of @a and @b + * @a: first value + * @b: second value + */ +#define swap(a, b) \ + do { typeof(a) __tmp = (a); (a) = (b); (b) = __tmp; } while (0) + struct kref { refcount_t refcount; }; @@ -1434,8 +1442,29 @@ static inline void free_anon_vma_name(struct vm_area_struct *vma) (void)vma; } +/* Declared in vma.h. */ +static inline void set_vma_from_desc(struct vm_area_struct *vma, + struct vm_area_desc *desc); + +static inline struct vm_area_desc *vma_to_desc(struct vm_area_struct *vma, + struct vm_area_desc *desc); + +static int compat_vma_mmap_prepare(struct file *file, + struct vm_area_struct *vma) +{ + struct vm_area_desc desc; + int err; + + err = file->f_op->mmap_prepare(vma_to_desc(vma, &desc)); + if (err) + return err; + set_vma_from_desc(vma, &desc); + + return 0; +} + /* Did the driver provide valid mmap hook configuration? */ -static inline bool file_has_valid_mmap_hooks(struct file *file) +static inline bool can_mmap_file(struct file *file) { bool has_mmap = file->f_op->mmap; bool has_mmap_prepare = file->f_op->mmap_prepare; @@ -1443,22 +1472,21 @@ static inline bool file_has_valid_mmap_hooks(struct file *file) /* Hooks are mutually exclusive. */ if (WARN_ON_ONCE(has_mmap && has_mmap_prepare)) return false; - if (WARN_ON_ONCE(!has_mmap && !has_mmap_prepare)) + if (!has_mmap && !has_mmap_prepare) return false; return true; } -static inline int call_mmap(struct file *file, struct vm_area_struct *vma) +static inline int vfs_mmap(struct file *file, struct vm_area_struct *vma) { - if (WARN_ON_ONCE(file->f_op->mmap_prepare)) - return -EINVAL; + if (file->f_op->mmap_prepare) + return compat_vma_mmap_prepare(file, vma); return file->f_op->mmap(file, vma); } -static inline int __call_mmap_prepare(struct file *file, - struct vm_area_desc *desc) +static inline int vfs_mmap_prepare(struct file *file, struct vm_area_desc *desc) { return file->f_op->mmap_prepare(desc); } @@ -1468,4 +1496,12 @@ static inline void fixup_hugetlb_reservations(struct vm_area_struct *vma) (void)vma; } +static inline void vma_set_file(struct vm_area_struct *vma, struct file *file) +{ + /* Changing an anonymous vma with this is illegal */ + get_file(file); + swap(vma->vm_file, file); + fput(file); +} + #endif /* __MM_VMA_INTERNAL_H */ diff --git a/tools/testing/vsock/Makefile b/tools/testing/vsock/Makefile index 6e0b4e95e230..88211fd132d2 100644 --- a/tools/testing/vsock/Makefile +++ b/tools/testing/vsock/Makefile @@ -5,6 +5,7 @@ vsock_test: vsock_test.o vsock_test_zerocopy.o timeout.o control.o util.o msg_ze vsock_diag_test: vsock_diag_test.o timeout.o control.o util.o vsock_perf: vsock_perf.o msg_zerocopy_common.o +vsock_test: LDLIBS = -lpthread vsock_uring_test: LDLIBS = -luring vsock_uring_test: control.o util.o vsock_uring_test.o timeout.o msg_zerocopy_common.o diff --git a/tools/testing/vsock/util.c b/tools/testing/vsock/util.c index 0c7e9cbcbc85..7b861a8e997a 100644 --- a/tools/testing/vsock/util.c +++ b/tools/testing/vsock/util.c @@ -7,6 +7,7 @@ * Author: Stefan Hajnoczi <stefanha@redhat.com> */ +#include <ctype.h> #include <errno.h> #include <stdio.h> #include <stdint.h> @@ -16,6 +17,7 @@ #include <unistd.h> #include <assert.h> #include <sys/epoll.h> +#include <sys/ioctl.h> #include <sys/mman.h> #include <linux/sockios.h> @@ -23,6 +25,9 @@ #include "control.h" #include "util.h" +#define KALLSYMS_PATH "/proc/kallsyms" +#define KALLSYMS_LINE_LEN 512 + /* Install signal handlers */ void init_signals(void) { @@ -97,39 +102,52 @@ void vsock_wait_remote_close(int fd) close(epollfd); } -/* Wait until transport reports no data left to be sent. - * Return false if transport does not implement the unsent_bytes() callback. +/* Wait until ioctl gives an expected int value. + * Return false if the op is not supported. */ -bool vsock_wait_sent(int fd) +bool vsock_ioctl_int(int fd, unsigned long op, int expected) { - int ret, sock_bytes_unsent; + int actual, ret; + char name[32]; + + snprintf(name, sizeof(name), "ioctl(%lu)", op); timeout_begin(TIMEOUT); do { - ret = ioctl(fd, SIOCOUTQ, &sock_bytes_unsent); + ret = ioctl(fd, op, &actual); if (ret < 0) { - if (errno == EOPNOTSUPP) + if (errno == EOPNOTSUPP || errno == ENOTTY) break; - perror("ioctl(SIOCOUTQ)"); + perror(name); exit(EXIT_FAILURE); } - timeout_check("SIOCOUTQ"); - } while (sock_bytes_unsent != 0); + timeout_check(name); + } while (actual != expected); timeout_end(); - return !ret; + return ret >= 0; } -/* Create socket <type>, bind to <cid, port> and return the file descriptor. */ -int vsock_bind(unsigned int cid, unsigned int port, int type) +/* Wait until transport reports no data left to be sent. + * Return false if transport does not implement the unsent_bytes() callback. + */ +bool vsock_wait_sent(int fd) +{ + return vsock_ioctl_int(fd, SIOCOUTQ, 0); +} + +/* Create socket <type>, bind to <cid, port>. + * Return the file descriptor, or -1 on error. + */ +int vsock_bind_try(unsigned int cid, unsigned int port, int type) { struct sockaddr_vm sa = { .svm_family = AF_VSOCK, .svm_cid = cid, .svm_port = port, }; - int fd; + int fd, saved_errno; fd = socket(AF_VSOCK, type, 0); if (fd < 0) { @@ -138,6 +156,22 @@ int vsock_bind(unsigned int cid, unsigned int port, int type) } if (bind(fd, (struct sockaddr *)&sa, sizeof(sa))) { + saved_errno = errno; + close(fd); + errno = saved_errno; + fd = -1; + } + + return fd; +} + +/* Create socket <type>, bind to <cid, port> and return the file descriptor. */ +int vsock_bind(unsigned int cid, unsigned int port, int type) +{ + int fd; + + fd = vsock_bind_try(cid, port, type); + if (fd < 0) { perror("bind"); exit(EXIT_FAILURE); } @@ -836,3 +870,55 @@ void enable_so_linger(int fd, int timeout) exit(EXIT_FAILURE); } } + +static int __get_transports(void) +{ + char buf[KALLSYMS_LINE_LEN]; + const char *ksym; + int ret = 0; + FILE *f; + + f = fopen(KALLSYMS_PATH, "r"); + if (!f) { + perror("Can't open " KALLSYMS_PATH); + exit(EXIT_FAILURE); + } + + while (fgets(buf, sizeof(buf), f)) { + char *match; + int i; + + assert(buf[strlen(buf) - 1] == '\n'); + + for (i = 0; i < TRANSPORT_NUM; ++i) { + if (ret & BIT(i)) + continue; + + /* Match should be followed by '\t' or '\n'. + * See kallsyms.c:s_show(). + */ + ksym = transport_ksyms[i]; + match = strstr(buf, ksym); + if (match && isspace(match[strlen(ksym)])) { + ret |= BIT(i); + break; + } + } + } + + fclose(f); + return ret; +} + +/* Return integer with TRANSPORT_* bit set for every (known) registered vsock + * transport. + */ +int get_transports(void) +{ + static int tr = -1; + + if (tr == -1) + tr = __get_transports(); + + return tr; +} diff --git a/tools/testing/vsock/util.h b/tools/testing/vsock/util.h index 5e2db67072d5..142c02a6834a 100644 --- a/tools/testing/vsock/util.h +++ b/tools/testing/vsock/util.h @@ -3,8 +3,40 @@ #define UTIL_H #include <sys/socket.h> +#include <linux/bitops.h> +#include <linux/kernel.h> #include <linux/vm_sockets.h> +/* All known vsock transports, see callers of vsock_core_register() */ +#define KNOWN_TRANSPORTS(x) \ + x(LOOPBACK, "loopback") \ + x(VIRTIO, "virtio") \ + x(VHOST, "vhost") \ + x(VMCI, "vmci") \ + x(HYPERV, "hvs") + +enum transport { + TRANSPORT_COUNTER_BASE = __COUNTER__ + 1, + #define x(name, symbol) \ + TRANSPORT_##name = BIT(__COUNTER__ - TRANSPORT_COUNTER_BASE), + KNOWN_TRANSPORTS(x) + TRANSPORT_NUM = __COUNTER__ - TRANSPORT_COUNTER_BASE, + #undef x +}; + +static const char * const transport_ksyms[] = { + #define x(name, symbol) "d " symbol "_transport", + KNOWN_TRANSPORTS(x) + #undef x +}; + +static_assert(ARRAY_SIZE(transport_ksyms) == TRANSPORT_NUM); +static_assert(BITS_PER_TYPE(int) >= TRANSPORT_NUM); + +#define TRANSPORTS_G2H (TRANSPORT_VIRTIO | TRANSPORT_VMCI | TRANSPORT_HYPERV) +#define TRANSPORTS_H2G (TRANSPORT_VHOST | TRANSPORT_VMCI) +#define TRANSPORTS_LOCAL (TRANSPORT_LOOPBACK) + /* Tests can either run as the client or the server */ enum test_mode { TEST_MODE_UNSET, @@ -44,6 +76,7 @@ int vsock_connect(unsigned int cid, unsigned int port, int type); int vsock_accept(unsigned int cid, unsigned int port, struct sockaddr_vm *clientaddrp, int type); int vsock_stream_connect(unsigned int cid, unsigned int port); +int vsock_bind_try(unsigned int cid, unsigned int port, int type); int vsock_bind(unsigned int cid, unsigned int port, int type); int vsock_bind_connect(unsigned int cid, unsigned int port, unsigned int bind_port, int type); @@ -54,6 +87,7 @@ int vsock_stream_listen(unsigned int cid, unsigned int port); int vsock_seqpacket_accept(unsigned int cid, unsigned int port, struct sockaddr_vm *clientaddrp); void vsock_wait_remote_close(int fd); +bool vsock_ioctl_int(int fd, unsigned long op, int expected); bool vsock_wait_sent(int fd); void send_buf(int fd, const void *buf, size_t len, int flags, ssize_t expected_ret); @@ -81,4 +115,5 @@ void setsockopt_timeval_check(int fd, int level, int optname, struct timeval val, char const *errmsg); void enable_so_zerocopy_check(int fd); void enable_so_linger(int fd, int timeout); +int get_transports(void); #endif /* UTIL_H */ diff --git a/tools/testing/vsock/vsock_test.c b/tools/testing/vsock/vsock_test.c index f669baaa0dca..d4517386e551 100644 --- a/tools/testing/vsock/vsock_test.c +++ b/tools/testing/vsock/vsock_test.c @@ -22,6 +22,9 @@ #include <signal.h> #include <sys/ioctl.h> #include <linux/time64.h> +#include <pthread.h> +#include <fcntl.h> +#include <linux/sockios.h> #include "vsock_test_zerocopy.h" #include "timeout.h" @@ -1305,6 +1308,54 @@ static void test_unsent_bytes_client(const struct test_opts *opts, int type) close(fd); } +static void test_unread_bytes_server(const struct test_opts *opts, int type) +{ + unsigned char buf[MSG_BUF_IOCTL_LEN]; + int client_fd; + + client_fd = vsock_accept(VMADDR_CID_ANY, opts->peer_port, NULL, type); + if (client_fd < 0) { + perror("accept"); + exit(EXIT_FAILURE); + } + + for (int i = 0; i < sizeof(buf); i++) + buf[i] = rand() & 0xFF; + + send_buf(client_fd, buf, sizeof(buf), 0, sizeof(buf)); + control_writeln("SENT"); + + close(client_fd); +} + +static void test_unread_bytes_client(const struct test_opts *opts, int type) +{ + unsigned char buf[MSG_BUF_IOCTL_LEN]; + int fd; + + fd = vsock_connect(opts->peer_cid, opts->peer_port, type); + if (fd < 0) { + perror("connect"); + exit(EXIT_FAILURE); + } + + control_expectln("SENT"); + /* The data has arrived but has not been read. The expected is + * MSG_BUF_IOCTL_LEN. + */ + if (!vsock_ioctl_int(fd, SIOCINQ, MSG_BUF_IOCTL_LEN)) { + fprintf(stderr, "Test skipped, SIOCINQ not supported.\n"); + goto out; + } + + recv_buf(fd, buf, sizeof(buf), 0, sizeof(buf)); + /* All data has been consumed, so the expected is 0. */ + vsock_ioctl_int(fd, SIOCINQ, 0); + +out: + close(fd); +} + static void test_stream_unsent_bytes_client(const struct test_opts *opts) { test_unsent_bytes_client(opts, SOCK_STREAM); @@ -1325,6 +1376,26 @@ static void test_seqpacket_unsent_bytes_server(const struct test_opts *opts) test_unsent_bytes_server(opts, SOCK_SEQPACKET); } +static void test_stream_unread_bytes_client(const struct test_opts *opts) +{ + test_unread_bytes_client(opts, SOCK_STREAM); +} + +static void test_stream_unread_bytes_server(const struct test_opts *opts) +{ + test_unread_bytes_server(opts, SOCK_STREAM); +} + +static void test_seqpacket_unread_bytes_client(const struct test_opts *opts) +{ + test_unread_bytes_client(opts, SOCK_SEQPACKET); +} + +static void test_seqpacket_unread_bytes_server(const struct test_opts *opts) +{ + test_unread_bytes_server(opts, SOCK_SEQPACKET); +} + #define RCVLOWAT_CREDIT_UPD_BUF_SIZE (1024 * 128) /* This define is the same as in 'include/linux/virtio_vsock.h': * it is used to decide when to send credit update message during @@ -1718,16 +1789,27 @@ static void test_stream_msgzcopy_leak_zcskb_server(const struct test_opts *opts) #define MAX_PORT_RETRIES 24 /* net/vmw_vsock/af_vsock.c */ -/* Test attempts to trigger a transport release for an unbound socket. This can - * lead to a reference count mishandling. - */ -static void test_stream_transport_uaf_client(const struct test_opts *opts) +static bool test_stream_transport_uaf(int cid) { int sockets[MAX_PORT_RETRIES]; struct sockaddr_vm addr; - int fd, i, alen; + socklen_t alen; + int fd, i, c; + bool ret; - fd = vsock_bind(VMADDR_CID_ANY, VMADDR_PORT_ANY, SOCK_STREAM); + /* Probe for a transport by attempting a local CID bind. Unavailable + * transport (or more specifically: an unsupported transport/CID + * combination) results in EADDRNOTAVAIL, other errnos are fatal. + */ + fd = vsock_bind_try(cid, VMADDR_PORT_ANY, SOCK_STREAM); + if (fd < 0) { + if (errno != EADDRNOTAVAIL) { + perror("Unexpected bind() errno"); + exit(EXIT_FAILURE); + } + + return false; + } alen = sizeof(addr); if (getsockname(fd, (struct sockaddr *)&addr, &alen)) { @@ -1735,38 +1817,83 @@ static void test_stream_transport_uaf_client(const struct test_opts *opts) exit(EXIT_FAILURE); } + /* Drain the autobind pool; see __vsock_bind_connectible(). */ for (i = 0; i < MAX_PORT_RETRIES; ++i) - sockets[i] = vsock_bind(VMADDR_CID_ANY, ++addr.svm_port, - SOCK_STREAM); + sockets[i] = vsock_bind(cid, ++addr.svm_port, SOCK_STREAM); close(fd); - fd = socket(AF_VSOCK, SOCK_STREAM, 0); + + /* Setting SOCK_NONBLOCK makes connect() return soon after + * (re-)assigning the transport. We are not connecting to anything + * anyway, so there is no point entering the main loop in + * vsock_connect(); waiting for timeout, checking for signals, etc. + */ + fd = socket(AF_VSOCK, SOCK_STREAM | SOCK_NONBLOCK, 0); if (fd < 0) { perror("socket"); exit(EXIT_FAILURE); } - if (!vsock_connect_fd(fd, addr.svm_cid, addr.svm_port)) { - perror("Unexpected connect() #1 success"); + /* Assign transport, while failing to autobind. Autobind pool was + * drained, so EADDRNOTAVAIL coming from __vsock_bind_connectible() is + * expected. + * + * One exception is ENODEV which is thrown by vsock_assign_transport(), + * i.e. before vsock_auto_bind(), when the only transport loaded is + * vhost. + */ + if (!connect(fd, (struct sockaddr *)&addr, alen)) { + fprintf(stderr, "Unexpected connect() success\n"); exit(EXIT_FAILURE); } - - /* Vulnerable system may crash now. */ - if (!vsock_connect_fd(fd, VMADDR_CID_HOST, VMADDR_PORT_ANY)) { - perror("Unexpected connect() #2 success"); + if (errno == ENODEV && cid == VMADDR_CID_HOST) { + ret = false; + goto cleanup; + } + if (errno != EADDRNOTAVAIL) { + perror("Unexpected connect() errno"); exit(EXIT_FAILURE); } + /* Reassign transport, triggering old transport release and + * (potentially) unbinding of an unbound socket. + * + * Vulnerable system may crash now. + */ + for (c = VMADDR_CID_HYPERVISOR; c <= VMADDR_CID_HOST + 1; ++c) { + if (c != cid) { + addr.svm_cid = c; + (void)connect(fd, (struct sockaddr *)&addr, alen); + } + } + + ret = true; +cleanup: close(fd); while (i--) close(sockets[i]); - control_writeln("DONE"); + return ret; } -static void test_stream_transport_uaf_server(const struct test_opts *opts) +/* Test attempts to trigger a transport release for an unbound socket. This can + * lead to a reference count mishandling. + */ +static void test_stream_transport_uaf_client(const struct test_opts *opts) { - control_expectln("DONE"); + bool tested = false; + int cid, tr; + + for (cid = VMADDR_CID_HYPERVISOR; cid <= VMADDR_CID_HOST + 1; ++cid) + tested |= test_stream_transport_uaf(cid); + + tr = get_transports(); + if (!tr) + fprintf(stderr, "No transports detected\n"); + else if (tr == TRANSPORT_VIRTIO) + fprintf(stderr, "Setup unsupported: sole virtio transport\n"); + else if (!tested) + fprintf(stderr, "No transports tested\n"); } static void test_stream_connect_retry_client(const struct test_opts *opts) @@ -1811,6 +1938,180 @@ static void test_stream_connect_retry_server(const struct test_opts *opts) close(fd); } +#define TRANSPORT_CHANGE_TIMEOUT 2 /* seconds */ + +static void *test_stream_transport_change_thread(void *vargp) +{ + pid_t *pid = (pid_t *)vargp; + int ret; + + ret = pthread_setcanceltype(PTHREAD_CANCEL_ASYNCHRONOUS, NULL); + if (ret) { + fprintf(stderr, "pthread_setcanceltype: %d\n", ret); + exit(EXIT_FAILURE); + } + + while (true) { + if (kill(*pid, SIGUSR1) < 0) { + perror("kill"); + exit(EXIT_FAILURE); + } + } + return NULL; +} + +static void test_transport_change_signal_handler(int signal) +{ + /* We need a custom handler for SIGUSR1 as the default one terminates the process. */ +} + +static void test_stream_transport_change_client(const struct test_opts *opts) +{ + __sighandler_t old_handler; + pid_t pid = getpid(); + pthread_t thread_id; + time_t tout; + int ret, tr; + + tr = get_transports(); + + /* Print a warning if there is a G2H transport loaded. + * This is on a best effort basis because VMCI can be either G2H and H2G, and there is + * no easy way to understand it. + * The bug we are testing only appears when G2H transports are not loaded. + * This is because `vsock_assign_transport`, when using CID 0, assigns a G2H transport + * to vsk->transport. If none is available it is set to NULL, causing the null-ptr-deref. + */ + if (tr & TRANSPORTS_G2H) + fprintf(stderr, "G2H Transport detected. This test will not fail.\n"); + + old_handler = signal(SIGUSR1, test_transport_change_signal_handler); + if (old_handler == SIG_ERR) { + perror("signal"); + exit(EXIT_FAILURE); + } + + ret = pthread_create(&thread_id, NULL, test_stream_transport_change_thread, &pid); + if (ret) { + fprintf(stderr, "pthread_create: %d\n", ret); + exit(EXIT_FAILURE); + } + + control_expectln("LISTENING"); + + tout = current_nsec() + TRANSPORT_CHANGE_TIMEOUT * NSEC_PER_SEC; + do { + struct sockaddr_vm sa = { + .svm_family = AF_VSOCK, + .svm_cid = opts->peer_cid, + .svm_port = opts->peer_port, + }; + bool send_control = false; + int s; + + s = socket(AF_VSOCK, SOCK_STREAM, 0); + if (s < 0) { + perror("socket"); + exit(EXIT_FAILURE); + } + + ret = connect(s, (struct sockaddr *)&sa, sizeof(sa)); + /* The connect can fail due to signals coming from the thread, + * or because the receiver connection queue is full. + * Ignoring also the latter case because there is no way + * of synchronizing client's connect and server's accept when + * connect(s) are constantly being interrupted by signals. + */ + if (ret == -1 && (errno != EINTR && errno != ECONNRESET)) { + perror("connect"); + exit(EXIT_FAILURE); + } + + /* Notify the server if the connect() is successful or the + * receiver connection queue is full, so it will do accept() + * to drain it. + */ + if (!ret || errno == ECONNRESET) + send_control = true; + + /* Set CID to 0 cause a transport change. */ + sa.svm_cid = 0; + + /* There is a case where this will not fail: + * if the previous connect() is interrupted while the + * connection request is already sent, this second + * connect() will wait for the response. + */ + ret = connect(s, (struct sockaddr *)&sa, sizeof(sa)); + if (!ret || errno == ECONNRESET) + send_control = true; + + close(s); + + if (send_control) + control_writeulong(CONTROL_CONTINUE); + + } while (current_nsec() < tout); + + control_writeulong(CONTROL_DONE); + + ret = pthread_cancel(thread_id); + if (ret) { + fprintf(stderr, "pthread_cancel: %d\n", ret); + exit(EXIT_FAILURE); + } + + ret = pthread_join(thread_id, NULL); + if (ret) { + fprintf(stderr, "pthread_join: %d\n", ret); + exit(EXIT_FAILURE); + } + + if (signal(SIGUSR1, old_handler) == SIG_ERR) { + perror("signal"); + exit(EXIT_FAILURE); + } +} + +static void test_stream_transport_change_server(const struct test_opts *opts) +{ + int s = vsock_stream_listen(VMADDR_CID_ANY, opts->peer_port); + + /* Set the socket to be nonblocking because connects that have been interrupted + * (EINTR) can fill the receiver's accept queue anyway, leading to connect failure. + * As of today (6.15) in such situation there is no way to understand, from the + * client side, if the connection has been queued in the server or not. + */ + if (fcntl(s, F_SETFL, fcntl(s, F_GETFL, 0) | O_NONBLOCK) < 0) { + perror("fcntl"); + exit(EXIT_FAILURE); + } + control_writeln("LISTENING"); + + while (control_readulong() == CONTROL_CONTINUE) { + /* Must accept the connection, otherwise the `listen` + * queue will fill up and new connections will fail. + * There can be more than one queued connection, + * clear them all. + */ + while (true) { + int client = accept(s, NULL, NULL); + + if (client < 0) { + if (errno == EAGAIN) + break; + + perror("accept"); + exit(EXIT_FAILURE); + } + + close(client); + } + } + + close(s); +} + static void test_stream_linger_client(const struct test_opts *opts) { int fd; @@ -2034,7 +2335,6 @@ static struct test_case test_cases[] = { { .name = "SOCK_STREAM transport release use-after-free", .run_client = test_stream_transport_uaf_client, - .run_server = test_stream_transport_uaf_server, }, { .name = "SOCK_STREAM retry failed connect()", @@ -2051,6 +2351,21 @@ static struct test_case test_cases[] = { .run_client = test_stream_nolinger_client, .run_server = test_stream_nolinger_server, }, + { + .name = "SOCK_STREAM transport change null-ptr-deref", + .run_client = test_stream_transport_change_client, + .run_server = test_stream_transport_change_server, + }, + { + .name = "SOCK_STREAM ioctl(SIOCINQ) functionality", + .run_client = test_stream_unread_bytes_client, + .run_server = test_stream_unread_bytes_server, + }, + { + .name = "SOCK_SEQPACKET ioctl(SIOCINQ) functionality", + .run_client = test_seqpacket_unread_bytes_client, + .run_server = test_seqpacket_unread_bytes_server, + }, {}, }; diff --git a/tools/verification/dot2/Makefile b/tools/verification/dot2/Makefile deleted file mode 100644 index 021beb07a521..000000000000 --- a/tools/verification/dot2/Makefile +++ /dev/null @@ -1,26 +0,0 @@ -INSTALL=install - -prefix ?= /usr -bindir ?= $(prefix)/bin -mandir ?= $(prefix)/share/man -miscdir ?= $(prefix)/share/dot2 -srcdir ?= $(prefix)/src - -PYLIB ?= $(shell python3 -c 'import sysconfig; print (sysconfig.get_path("purelib"))') - -.PHONY: all -all: - -.PHONY: clean -clean: - -.PHONY: install -install: - $(INSTALL) automata.py -D -m 644 $(DESTDIR)$(PYLIB)/dot2/automata.py - $(INSTALL) dot2c.py -D -m 644 $(DESTDIR)$(PYLIB)/dot2/dot2c.py - $(INSTALL) dot2c -D -m 755 $(DESTDIR)$(bindir)/ - $(INSTALL) dot2k.py -D -m 644 $(DESTDIR)$(PYLIB)/dot2/dot2k.py - $(INSTALL) dot2k -D -m 755 $(DESTDIR)$(bindir)/ - - mkdir -p ${miscdir}/ - cp -rp dot2k_templates $(DESTDIR)$(miscdir)/ diff --git a/tools/verification/dot2/dot2k b/tools/verification/dot2/dot2k deleted file mode 100644 index 767064f415e7..000000000000 --- a/tools/verification/dot2/dot2k +++ /dev/null @@ -1,53 +0,0 @@ -#!/usr/bin/env python3 -# SPDX-License-Identifier: GPL-2.0-only -# -# Copyright (C) 2019-2022 Red Hat, Inc. Daniel Bristot de Oliveira <bristot@kernel.org> -# -# dot2k: transform dot files into a monitor for the Linux kernel. -# -# For further information, see: -# Documentation/trace/rv/da_monitor_synthesis.rst - -if __name__ == '__main__': - from dot2.dot2k import dot2k - import argparse - import sys - - def is_container(): - """Should work even before parsing the arguments""" - return "-c" in sys.argv or "--container" in sys.argv - - parser = argparse.ArgumentParser(description='transform .dot file into kernel rv monitor') - parser.add_argument('-d', "--dot", dest="dot_file", required=not is_container()) - parser.add_argument('-t', "--monitor_type", dest="monitor_type", required=not is_container(), - help=f"Available options: {', '.join(dot2k.monitor_types.keys())}") - parser.add_argument('-n', "--model_name", dest="model_name", required=is_container()) - parser.add_argument("-D", "--description", dest="description", required=False) - parser.add_argument("-a", "--auto_patch", dest="auto_patch", - action="store_true", required=False, - help="Patch the kernel in place") - parser.add_argument("-p", "--parent", dest="parent", - required=False, help="Create a monitor nested to parent") - parser.add_argument("-c", "--container", dest="container", - action="store_true", required=False, - help="Create an empty monitor to be used as a container") - params = parser.parse_args() - - if not is_container(): - print("Opening and parsing the dot file %s" % params.dot_file) - try: - monitor=dot2k(params.dot_file, params.monitor_type, vars(params)) - except Exception as e: - print('Error: '+ str(e)) - print("Sorry : :-(") - sys.exit(1) - - print("Writing the monitor into the directory %s" % monitor.name) - monitor.print_files() - print("Almost done, checklist") - if not is_container(): - print(" - Edit the %s/%s.c to add the instrumentation" % (monitor.name, monitor.name)) - print(monitor.fill_tracepoint_tooltip()) - print(monitor.fill_makefile_tooltip()) - print(monitor.fill_kconfig_tooltip()) - print(monitor.fill_monitor_tooltip()) diff --git a/tools/verification/models/rtapp/pagefault.ltl b/tools/verification/models/rtapp/pagefault.ltl new file mode 100644 index 000000000000..d7ce62102733 --- /dev/null +++ b/tools/verification/models/rtapp/pagefault.ltl @@ -0,0 +1 @@ +RULE = always (RT imply not PAGEFAULT) diff --git a/tools/verification/models/rtapp/sleep.ltl b/tools/verification/models/rtapp/sleep.ltl new file mode 100644 index 000000000000..6379bbeb6212 --- /dev/null +++ b/tools/verification/models/rtapp/sleep.ltl @@ -0,0 +1,22 @@ +RULE = always ((RT and SLEEP) imply (RT_FRIENDLY_SLEEP or ALLOWLIST)) + +RT_FRIENDLY_SLEEP = (RT_VALID_SLEEP_REASON or KERNEL_THREAD) + and ((not WAKE) until RT_FRIENDLY_WAKE) + +RT_VALID_SLEEP_REASON = FUTEX_WAIT + or RT_FRIENDLY_NANOSLEEP + +RT_FRIENDLY_NANOSLEEP = CLOCK_NANOSLEEP + and NANOSLEEP_TIMER_ABSTIME + and (NANOSLEEP_CLOCK_MONOTONIC or NANOSLEEP_CLOCK_TAI) + +RT_FRIENDLY_WAKE = WOKEN_BY_EQUAL_OR_HIGHER_PRIO + or WOKEN_BY_HARDIRQ + or WOKEN_BY_NMI + or ABORT_SLEEP + or KTHREAD_SHOULD_STOP + +ALLOWLIST = BLOCK_ON_RT_MUTEX + or FUTEX_LOCK_PI + or TASK_IS_RCU + or TASK_IS_MIGRATION diff --git a/tools/verification/models/sched/nrp.dot b/tools/verification/models/sched/nrp.dot new file mode 100644 index 000000000000..77bb64669416 --- /dev/null +++ b/tools/verification/models/sched/nrp.dot @@ -0,0 +1,29 @@ +digraph state_automaton { + center = true; + size = "7,11"; + {node [shape = doublecircle] "any_thread_running"}; + {node [shape = circle] "any_thread_running"}; + {node [shape = circle] "nested_preempt"}; + {node [shape = plaintext, style=invis, label=""] "__init_preempt_irq"}; + {node [shape = circle] "preempt_irq"}; + {node [shape = circle] "rescheduling"}; + "__init_preempt_irq" -> "preempt_irq"; + "any_thread_running" [label = "any_thread_running", color = green3]; + "any_thread_running" -> "any_thread_running" [ label = "schedule_entry\nirq_entry" ]; + "any_thread_running" -> "rescheduling" [ label = "sched_need_resched" ]; + "nested_preempt" [label = "nested_preempt"]; + "nested_preempt" -> "any_thread_running" [ label = "schedule_entry_preempt\nschedule_entry" ]; + "nested_preempt" -> "nested_preempt" [ label = "irq_entry" ]; + "nested_preempt" -> "preempt_irq" [ label = "sched_need_resched" ]; + "preempt_irq" [label = "preempt_irq"]; + "preempt_irq" -> "nested_preempt" [ label = "schedule_entry_preempt\nschedule_entry" ]; + "preempt_irq" -> "preempt_irq" [ label = "irq_entry\nsched_need_resched" ]; + "rescheduling" [label = "rescheduling"]; + "rescheduling" -> "any_thread_running" [ label = "schedule_entry_preempt\nschedule_entry" ]; + "rescheduling" -> "preempt_irq" [ label = "irq_entry" ]; + "rescheduling" -> "rescheduling" [ label = "sched_need_resched" ]; + { rank = min ; + "__init_preempt_irq"; + "preempt_irq"; + } +} diff --git a/tools/verification/models/sched/opid.dot b/tools/verification/models/sched/opid.dot new file mode 100644 index 000000000000..840052f6952b --- /dev/null +++ b/tools/verification/models/sched/opid.dot @@ -0,0 +1,35 @@ +digraph state_automaton { + center = true; + size = "7,11"; + {node [shape = plaintext, style=invis, label=""] "__init_disabled"}; + {node [shape = circle] "disabled"}; + {node [shape = doublecircle] "enabled"}; + {node [shape = circle] "enabled"}; + {node [shape = circle] "in_irq"}; + {node [shape = circle] "irq_disabled"}; + {node [shape = circle] "preempt_disabled"}; + "__init_disabled" -> "disabled"; + "disabled" [label = "disabled"]; + "disabled" -> "disabled" [ label = "sched_need_resched\nsched_waking\nirq_entry" ]; + "disabled" -> "irq_disabled" [ label = "preempt_enable" ]; + "disabled" -> "preempt_disabled" [ label = "irq_enable" ]; + "enabled" [label = "enabled", color = green3]; + "enabled" -> "enabled" [ label = "preempt_enable" ]; + "enabled" -> "irq_disabled" [ label = "irq_disable" ]; + "enabled" -> "preempt_disabled" [ label = "preempt_disable" ]; + "in_irq" [label = "in_irq"]; + "in_irq" -> "enabled" [ label = "irq_enable" ]; + "in_irq" -> "in_irq" [ label = "sched_need_resched\nsched_waking\nirq_entry" ]; + "irq_disabled" [label = "irq_disabled"]; + "irq_disabled" -> "disabled" [ label = "preempt_disable" ]; + "irq_disabled" -> "enabled" [ label = "irq_enable" ]; + "irq_disabled" -> "in_irq" [ label = "irq_entry" ]; + "irq_disabled" -> "irq_disabled" [ label = "sched_need_resched" ]; + "preempt_disabled" [label = "preempt_disabled"]; + "preempt_disabled" -> "disabled" [ label = "irq_disable" ]; + "preempt_disabled" -> "enabled" [ label = "preempt_enable" ]; + { rank = min ; + "__init_disabled"; + "disabled"; + } +} diff --git a/tools/verification/models/sched/sncid.dot b/tools/verification/models/sched/sncid.dot deleted file mode 100644 index 072851721b50..000000000000 --- a/tools/verification/models/sched/sncid.dot +++ /dev/null @@ -1,18 +0,0 @@ -digraph state_automaton { - center = true; - size = "7,11"; - {node [shape = plaintext, style=invis, label=""] "__init_can_sched"}; - {node [shape = ellipse] "can_sched"}; - {node [shape = plaintext] "can_sched"}; - {node [shape = plaintext] "cant_sched"}; - "__init_can_sched" -> "can_sched"; - "can_sched" [label = "can_sched", color = green3]; - "can_sched" -> "can_sched" [ label = "schedule_entry\nschedule_exit" ]; - "can_sched" -> "cant_sched" [ label = "irq_disable" ]; - "cant_sched" [label = "cant_sched"]; - "cant_sched" -> "can_sched" [ label = "irq_enable" ]; - { rank = min ; - "__init_can_sched"; - "can_sched"; - } -} diff --git a/tools/verification/models/sched/sssw.dot b/tools/verification/models/sched/sssw.dot new file mode 100644 index 000000000000..4994c3e876be --- /dev/null +++ b/tools/verification/models/sched/sssw.dot @@ -0,0 +1,30 @@ +digraph state_automaton { + center = true; + size = "7,11"; + {node [shape = plaintext, style=invis, label=""] "__init_runnable"}; + {node [shape = doublecircle] "runnable"}; + {node [shape = circle] "runnable"}; + {node [shape = circle] "signal_wakeup"}; + {node [shape = circle] "sleepable"}; + {node [shape = circle] "sleeping"}; + "__init_runnable" -> "runnable"; + "runnable" [label = "runnable", color = green3]; + "runnable" -> "runnable" [ label = "sched_set_state_runnable\nsched_wakeup\nsched_switch_in\nsched_switch_yield\nsched_switch_preempt\nsignal_deliver" ]; + "runnable" -> "sleepable" [ label = "sched_set_state_sleepable" ]; + "runnable" -> "sleeping" [ label = "sched_switch_blocking" ]; + "signal_wakeup" [label = "signal_wakeup"]; + "signal_wakeup" -> "runnable" [ label = "signal_deliver" ]; + "signal_wakeup" -> "signal_wakeup" [ label = "sched_switch_in\nsched_switch_preempt\nsched_switch_yield\nsched_wakeup" ]; + "signal_wakeup" -> "sleepable" [ label = "sched_set_state_sleepable" ]; + "sleepable" [label = "sleepable"]; + "sleepable" -> "runnable" [ label = "sched_set_state_runnable\nsched_wakeup" ]; + "sleepable" -> "signal_wakeup" [ label = "sched_switch_yield" ]; + "sleepable" -> "sleepable" [ label = "sched_set_state_sleepable\nsched_switch_in\nsched_switch_preempt\nsignal_deliver" ]; + "sleepable" -> "sleeping" [ label = "sched_switch_suspend\nsched_switch_blocking" ]; + "sleeping" [label = "sleeping"]; + "sleeping" -> "runnable" [ label = "sched_wakeup" ]; + { rank = min ; + "__init_runnable"; + "runnable"; + } +} diff --git a/tools/verification/models/sched/sts.dot b/tools/verification/models/sched/sts.dot new file mode 100644 index 000000000000..8f5f38be04d5 --- /dev/null +++ b/tools/verification/models/sched/sts.dot @@ -0,0 +1,38 @@ +digraph state_automaton { + center = true; + size = "7,11"; + {node [shape = plaintext, style=invis, label=""] "__init_can_sched"}; + {node [shape = doublecircle] "can_sched"}; + {node [shape = circle] "can_sched"}; + {node [shape = circle] "cant_sched"}; + {node [shape = circle] "disable_to_switch"}; + {node [shape = circle] "enable_to_exit"}; + {node [shape = circle] "in_irq"}; + {node [shape = circle] "scheduling"}; + {node [shape = circle] "switching"}; + "__init_can_sched" -> "can_sched"; + "can_sched" [label = "can_sched", color = green3]; + "can_sched" -> "cant_sched" [ label = "irq_disable" ]; + "can_sched" -> "scheduling" [ label = "schedule_entry" ]; + "cant_sched" [label = "cant_sched"]; + "cant_sched" -> "can_sched" [ label = "irq_enable" ]; + "cant_sched" -> "cant_sched" [ label = "irq_entry" ]; + "disable_to_switch" [label = "disable_to_switch"]; + "disable_to_switch" -> "enable_to_exit" [ label = "irq_enable" ]; + "disable_to_switch" -> "in_irq" [ label = "irq_entry" ]; + "disable_to_switch" -> "switching" [ label = "sched_switch" ]; + "enable_to_exit" [label = "enable_to_exit"]; + "enable_to_exit" -> "can_sched" [ label = "schedule_exit" ]; + "enable_to_exit" -> "enable_to_exit" [ label = "irq_disable\nirq_entry\nirq_enable" ]; + "in_irq" [label = "in_irq"]; + "in_irq" -> "in_irq" [ label = "irq_entry" ]; + "in_irq" -> "scheduling" [ label = "irq_enable" ]; + "scheduling" [label = "scheduling"]; + "scheduling" -> "disable_to_switch" [ label = "irq_disable" ]; + "switching" [label = "switching"]; + "switching" -> "enable_to_exit" [ label = "irq_enable" ]; + { rank = min ; + "__init_can_sched"; + "can_sched"; + } +} diff --git a/tools/verification/models/sched/tss.dot b/tools/verification/models/sched/tss.dot deleted file mode 100644 index 7dfa1d9121bb..000000000000 --- a/tools/verification/models/sched/tss.dot +++ /dev/null @@ -1,18 +0,0 @@ -digraph state_automaton { - center = true; - size = "7,11"; - {node [shape = plaintext] "sched"}; - {node [shape = plaintext, style=invis, label=""] "__init_thread"}; - {node [shape = ellipse] "thread"}; - {node [shape = plaintext] "thread"}; - "__init_thread" -> "thread"; - "sched" [label = "sched"]; - "sched" -> "sched" [ label = "sched_switch" ]; - "sched" -> "thread" [ label = "schedule_exit" ]; - "thread" [label = "thread", color = green3]; - "thread" -> "sched" [ label = "schedule_entry" ]; - { rank = min ; - "__init_thread"; - "thread"; - } -} diff --git a/tools/verification/rv/src/in_kernel.c b/tools/verification/rv/src/in_kernel.c index c0dcee795c0d..4bb746ea6e17 100644 --- a/tools/verification/rv/src/in_kernel.c +++ b/tools/verification/rv/src/in_kernel.c @@ -431,7 +431,7 @@ ikm_event_handler(struct trace_seq *s, struct tep_record *record, if (config_has_id && (config_my_pid == id)) return 0; - else if (config_my_pid && (config_my_pid == pid)) + else if (config_my_pid == pid) return 0; tep_print_event(trace_event->tep, s, record, "%16s-%-8d [%.3d] ", @@ -734,7 +734,7 @@ static int parse_arguments(char *monitor_name, int argc, char **argv) config_reactor = optarg; break; case 's': - config_my_pid = 0; + config_my_pid = -1; break; case 't': config_trace = 1; diff --git a/tools/verification/rv/src/rv.c b/tools/verification/rv/src/rv.c index 239de054d1e0..b8fe24a87d97 100644 --- a/tools/verification/rv/src/rv.c +++ b/tools/verification/rv/src/rv.c @@ -191,6 +191,7 @@ int main(int argc, char **argv) * and exit. */ signal(SIGINT, stop_rv); + signal(SIGTERM, stop_rv); rv_mon(argc - 1, &argv[1]); } diff --git a/tools/verification/rvgen/.gitignore b/tools/verification/rvgen/.gitignore new file mode 100644 index 000000000000..1e288a076560 --- /dev/null +++ b/tools/verification/rvgen/.gitignore @@ -0,0 +1,3 @@ +__pycache__/ +parser.out +parsetab.py diff --git a/tools/verification/rvgen/Makefile b/tools/verification/rvgen/Makefile new file mode 100644 index 000000000000..cfc4056c1e87 --- /dev/null +++ b/tools/verification/rvgen/Makefile @@ -0,0 +1,27 @@ +INSTALL=install + +prefix ?= /usr +bindir ?= $(prefix)/bin +mandir ?= $(prefix)/share/man +srcdir ?= $(prefix)/src + +PYLIB ?= $(shell python3 -c 'import sysconfig; print (sysconfig.get_path("purelib"))') + +.PHONY: all +all: + +.PHONY: clean +clean: + +.PHONY: install +install: + $(INSTALL) rvgen/automata.py -D -m 644 $(DESTDIR)$(PYLIB)/rvgen/automata.py + $(INSTALL) rvgen/dot2c.py -D -m 644 $(DESTDIR)$(PYLIB)/rvgen/dot2c.py + $(INSTALL) dot2c -D -m 755 $(DESTDIR)$(bindir)/ + $(INSTALL) rvgen/dot2k.py -D -m 644 $(DESTDIR)$(PYLIB)/rvgen/dot2k.py + $(INSTALL) rvgen/container.py -D -m 644 $(DESTDIR)$(PYLIB)/rvgen/container.py + $(INSTALL) rvgen/generator.py -D -m 644 $(DESTDIR)$(PYLIB)/rvgen/generator.py + $(INSTALL) rvgen/ltl2ba.py -D -m 644 $(DESTDIR)$(PYLIB)/rvgen/ltl2ba.py + $(INSTALL) rvgen/ltl2k.py -D -m 644 $(DESTDIR)$(PYLIB)/rvgen/ltl2k.py + $(INSTALL) __main__.py -D -m 755 $(DESTDIR)$(bindir)/rvgen + cp -rp rvgen/templates $(DESTDIR)$(PYLIB)/rvgen/ diff --git a/tools/verification/rvgen/__main__.py b/tools/verification/rvgen/__main__.py new file mode 100644 index 000000000000..fa6fc1f4de2f --- /dev/null +++ b/tools/verification/rvgen/__main__.py @@ -0,0 +1,67 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0-only +# +# Copyright (C) 2019-2022 Red Hat, Inc. Daniel Bristot de Oliveira <bristot@kernel.org> +# +# dot2k: transform dot files into a monitor for the Linux kernel. +# +# For further information, see: +# Documentation/trace/rv/da_monitor_synthesis.rst + +if __name__ == '__main__': + from rvgen.dot2k import dot2k + from rvgen.generator import Monitor + from rvgen.container import Container + from rvgen.ltl2k import ltl2k + import argparse + import sys + + parser = argparse.ArgumentParser(description='Generate kernel rv monitor') + parser.add_argument("-D", "--description", dest="description", required=False) + parser.add_argument("-a", "--auto_patch", dest="auto_patch", + action="store_true", required=False, + help="Patch the kernel in place") + + subparsers = parser.add_subparsers(dest="subcmd", required=True) + + monitor_parser = subparsers.add_parser("monitor") + monitor_parser.add_argument('-n', "--model_name", dest="model_name") + monitor_parser.add_argument("-p", "--parent", dest="parent", + required=False, help="Create a monitor nested to parent") + monitor_parser.add_argument('-c', "--class", dest="monitor_class", + help="Monitor class, either \"da\" or \"ltl\"") + monitor_parser.add_argument('-s', "--spec", dest="spec", help="Monitor specification file") + monitor_parser.add_argument('-t', "--monitor_type", dest="monitor_type", + help=f"Available options: {', '.join(Monitor.monitor_types.keys())}") + + container_parser = subparsers.add_parser("container") + container_parser.add_argument('-n', "--model_name", dest="model_name", required=True) + + params = parser.parse_args() + + try: + if params.subcmd == "monitor": + print("Opening and parsing the specification file %s" % params.spec) + if params.monitor_class == "da": + monitor = dot2k(params.spec, params.monitor_type, vars(params)) + elif params.monitor_class == "ltl": + monitor = ltl2k(params.spec, params.monitor_type, vars(params)) + else: + print("Unknown monitor class:", params.monitor_class) + sys.exit(1) + else: + monitor = Container(vars(params)) + except Exception as e: + print('Error: '+ str(e)) + print("Sorry : :-(") + sys.exit(1) + + print("Writing the monitor into the directory %s" % monitor.name) + monitor.print_files() + print("Almost done, checklist") + if params.subcmd == "monitor": + print(" - Edit the %s/%s.c to add the instrumentation" % (monitor.name, monitor.name)) + print(monitor.fill_tracepoint_tooltip()) + print(monitor.fill_makefile_tooltip()) + print(monitor.fill_kconfig_tooltip()) + print(monitor.fill_monitor_tooltip()) diff --git a/tools/verification/dot2/dot2c b/tools/verification/rvgen/dot2c index 3fe89ab88b65..bf0c67c5b66c 100644 --- a/tools/verification/dot2/dot2c +++ b/tools/verification/rvgen/dot2c @@ -14,7 +14,7 @@ # Documentation/trace/rv/deterministic_automata.rst if __name__ == '__main__': - from dot2 import dot2c + from rvgen import dot2c import argparse import sys diff --git a/tools/verification/dot2/automata.py b/tools/verification/rvgen/rvgen/automata.py index d9a3fe2b74bf..d9a3fe2b74bf 100644 --- a/tools/verification/dot2/automata.py +++ b/tools/verification/rvgen/rvgen/automata.py diff --git a/tools/verification/rvgen/rvgen/container.py b/tools/verification/rvgen/rvgen/container.py new file mode 100644 index 000000000000..51f188530b4d --- /dev/null +++ b/tools/verification/rvgen/rvgen/container.py @@ -0,0 +1,32 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0-only +# +# Copyright (C) 2019-2022 Red Hat, Inc. Daniel Bristot de Oliveira <bristot@kernel.org> +# +# Generator for runtime verification monitor container + +from . import generator + + +class Container(generator.RVGenerator): + template_dir = "container" + + def __init__(self, extra_params={}): + super().__init__(extra_params) + self.name = extra_params.get("model_name") + self.main_h = self._read_template_file("main.h") + + def fill_model_h(self): + main_h = self.main_h + main_h = main_h.replace("%%MODEL_NAME%%", self.name) + return main_h + + def fill_kconfig_tooltip(self): + """Override to produce a marker for this container in the Kconfig""" + container_marker = self._kconfig_marker(self.name) + "\n" + result = super().fill_kconfig_tooltip() + if self.auto_patch: + self._patch_file("Kconfig", + self._kconfig_marker(), container_marker) + return result + return result + container_marker diff --git a/tools/verification/dot2/dot2c.py b/tools/verification/rvgen/rvgen/dot2c.py index fa2816ac7b61..b9b6f14cc536 100644 --- a/tools/verification/dot2/dot2c.py +++ b/tools/verification/rvgen/rvgen/dot2c.py @@ -13,7 +13,7 @@ # For further information, see: # Documentation/trace/rv/deterministic_automata.rst -from dot2.automata import Automata +from .automata import Automata class Dot2c(Automata): enum_suffix = "" @@ -152,28 +152,30 @@ class Dot2c(Automata): max_state_name = max(self.states, key = len).__len__() return max(max_state_name, self.invalid_state_str.__len__()) - def __get_state_string_length(self): - maxlen = self.__get_max_strlen_of_states() + self.enum_suffix.__len__() - return "%" + str(maxlen) + "s" - def get_aut_init_function(self): nr_states = self.states.__len__() nr_events = self.events.__len__() buff = [] - strformat = self.__get_state_string_length() - + maxlen = self.__get_max_strlen_of_states() + len(self.enum_suffix) + tab_braces = 2 * 8 + 2 + 1 # "\t\t{ " ... "}" + comma_space = 2 # ", " count last comma here + linetoolong = tab_braces + (maxlen + comma_space) * nr_events > self.line_length for x in range(nr_states): - line = "\t\t{ " + line = "\t\t{\n" if linetoolong else "\t\t{ " for y in range(nr_events): next_state = self.function[x][y] if next_state != self.invalid_state_str: next_state = self.function[x][y] + self.enum_suffix + if linetoolong: + line += "\t\t\t%s" % next_state + else: + line += "%*s" % (maxlen, next_state) if y != nr_events-1: - line = line + strformat % next_state + ", " + line += ",\n" if linetoolong else ", " else: - line = line + strformat % next_state + " }," + line += "\n\t\t}," if linetoolong else " }," buff.append(line) return self.__buff_to_string(buff) diff --git a/tools/verification/rvgen/rvgen/dot2k.py b/tools/verification/rvgen/rvgen/dot2k.py new file mode 100644 index 000000000000..ed0a3c901106 --- /dev/null +++ b/tools/verification/rvgen/rvgen/dot2k.py @@ -0,0 +1,129 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0-only +# +# Copyright (C) 2019-2022 Red Hat, Inc. Daniel Bristot de Oliveira <bristot@kernel.org> +# +# dot2k: transform dot files into a monitor for the Linux kernel. +# +# For further information, see: +# Documentation/trace/rv/da_monitor_synthesis.rst + +from .dot2c import Dot2c +from .generator import Monitor + + +class dot2k(Monitor, Dot2c): + template_dir = "dot2k" + + def __init__(self, file_path, MonitorType, extra_params={}): + self.monitor_type = MonitorType + Monitor.__init__(self, extra_params) + Dot2c.__init__(self, file_path, extra_params.get("model_name")) + self.enum_suffix = "_%s" % self.name + + def fill_monitor_type(self): + return self.monitor_type.upper() + + def fill_tracepoint_handlers_skel(self): + buff = [] + for event in self.events: + buff.append("static void handle_%s(void *data, /* XXX: fill header */)" % event) + buff.append("{") + handle = "handle_event" + if self.is_start_event(event): + buff.append("\t/* XXX: validate that this event always leads to the initial state */") + handle = "handle_start_event" + elif self.is_start_run_event(event): + buff.append("\t/* XXX: validate that this event is only valid in the initial state */") + handle = "handle_start_run_event" + if self.monitor_type == "per_task": + buff.append("\tstruct task_struct *p = /* XXX: how do I get p? */;"); + buff.append("\tda_%s_%s(p, %s%s);" % (handle, self.name, event, self.enum_suffix)); + else: + buff.append("\tda_%s_%s(%s%s);" % (handle, self.name, event, self.enum_suffix)); + buff.append("}") + buff.append("") + return '\n'.join(buff) + + def fill_tracepoint_attach_probe(self): + buff = [] + for event in self.events: + buff.append("\trv_attach_trace_probe(\"%s\", /* XXX: tracepoint */, handle_%s);" % (self.name, event)) + return '\n'.join(buff) + + def fill_tracepoint_detach_helper(self): + buff = [] + for event in self.events: + buff.append("\trv_detach_trace_probe(\"%s\", /* XXX: tracepoint */, handle_%s);" % (self.name, event)) + return '\n'.join(buff) + + def fill_model_h_header(self): + buff = [] + buff.append("/* SPDX-License-Identifier: GPL-2.0 */") + buff.append("/*") + buff.append(" * Automatically generated C representation of %s automaton" % (self.name)) + buff.append(" * For further information about this format, see kernel documentation:") + buff.append(" * Documentation/trace/rv/deterministic_automata.rst") + buff.append(" */") + buff.append("") + + return buff + + def fill_model_h(self): + # + # Adjust the definition names + # + self.enum_states_def = "states_%s" % self.name + self.enum_events_def = "events_%s" % self.name + self.struct_automaton_def = "automaton_%s" % self.name + self.var_automaton_def = "automaton_%s" % self.name + + buff = self.fill_model_h_header() + buff += self.format_model() + + return '\n'.join(buff) + + def fill_monitor_class_type(self): + if self.monitor_type == "per_task": + return "DA_MON_EVENTS_ID" + return "DA_MON_EVENTS_IMPLICIT" + + def fill_monitor_class(self): + if self.monitor_type == "per_task": + return "da_monitor_id" + return "da_monitor" + + def fill_tracepoint_args_skel(self, tp_type): + buff = [] + tp_args_event = [ + ("char *", "state"), + ("char *", "event"), + ("char *", "next_state"), + ("bool ", "final_state"), + ] + tp_args_error = [ + ("char *", "state"), + ("char *", "event"), + ] + tp_args_id = ("int ", "id") + tp_args = tp_args_event if tp_type == "event" else tp_args_error + if self.monitor_type == "per_task": + tp_args.insert(0, tp_args_id) + tp_proto_c = ", ".join([a+b for a,b in tp_args]) + tp_args_c = ", ".join([b for a,b in tp_args]) + buff.append(" TP_PROTO(%s)," % tp_proto_c) + buff.append(" TP_ARGS(%s)" % tp_args_c) + return '\n'.join(buff) + + def fill_main_c(self): + main_c = super().fill_main_c() + + min_type = self.get_minimun_type() + nr_events = len(self.events) + monitor_type = self.fill_monitor_type() + + main_c = main_c.replace("%%MIN_TYPE%%", min_type) + main_c = main_c.replace("%%NR_EVENTS%%", str(nr_events)) + main_c = main_c.replace("%%MONITOR_TYPE%%", monitor_type) + + return main_c diff --git a/tools/verification/dot2/dot2k.py b/tools/verification/rvgen/rvgen/generator.py index 745d35a4a379..3441385c1177 100644 --- a/tools/verification/dot2/dot2k.py +++ b/tools/verification/rvgen/rvgen/generator.py @@ -3,74 +3,27 @@ # # Copyright (C) 2019-2022 Red Hat, Inc. Daniel Bristot de Oliveira <bristot@kernel.org> # -# dot2k: transform dot files into a monitor for the Linux kernel. -# -# For further information, see: -# Documentation/trace/rv/da_monitor_synthesis.rst +# Abtract class for generating kernel runtime verification monitors from specification file -from dot2.dot2c import Dot2c import platform import os -class dot2k(Dot2c): - monitor_types = { "global" : 1, "per_cpu" : 2, "per_task" : 3 } - monitor_templates_dir = "dot2/dot2k_templates/" + +class RVGenerator: rv_dir = "kernel/trace/rv" - monitor_type = "per_cpu" - def __init__(self, file_path, MonitorType, extra_params={}): - self.container = extra_params.get("container") + def __init__(self, extra_params={}): + self.name = extra_params.get("model_name") self.parent = extra_params.get("parent") - self.__fill_rv_templates_dir() - - if self.container: - if file_path: - raise ValueError("A container does not require a dot file") - if MonitorType: - raise ValueError("A container does not require a monitor type") - if self.parent: - raise ValueError("A container cannot have a parent") - self.name = extra_params.get("model_name") - self.events = [] - self.states = [] - self.main_c = self.__read_file(self.monitor_templates_dir + "main_container.c") - self.main_h = self.__read_file(self.monitor_templates_dir + "main_container.h") - else: - super().__init__(file_path, extra_params.get("model_name")) - - self.monitor_type = self.monitor_types.get(MonitorType) - if self.monitor_type is None: - raise ValueError("Unknown monitor type: %s" % MonitorType) - self.monitor_type = MonitorType - self.main_c = self.__read_file(self.monitor_templates_dir + "main.c") - self.trace_h = self.__read_file(self.monitor_templates_dir + "trace.h") - self.kconfig = self.__read_file(self.monitor_templates_dir + "Kconfig") - self.enum_suffix = "_%s" % self.name + self.abs_template_dir = \ + os.path.join(os.path.dirname(__file__), "templates", self.template_dir) + self.main_c = self._read_template_file("main.c") + self.kconfig = self._read_template_file("Kconfig") self.description = extra_params.get("description", self.name) or "auto-generated" self.auto_patch = extra_params.get("auto_patch") if self.auto_patch: self.__fill_rv_kernel_dir() - def __fill_rv_templates_dir(self): - - if os.path.exists(self.monitor_templates_dir): - return - - if platform.system() != "Linux": - raise OSError("I can only run on Linux.") - - kernel_path = "/lib/modules/%s/build/tools/verification/dot2/dot2k_templates/" % (platform.release()) - - if os.path.exists(kernel_path): - self.monitor_templates_dir = kernel_path - return - - if os.path.exists("/usr/share/dot2/dot2k_templates/"): - self.monitor_templates_dir = "/usr/share/dot2/dot2k_templates/" - return - - raise FileNotFoundError("Could not find the template directory, do you have the kernel source installed?") - def __fill_rv_kernel_dir(self): # first try if we are running in the kernel tree root @@ -97,7 +50,7 @@ class dot2k(Dot2c): raise FileNotFoundError("Could not find the rv directory, do you have the kernel source installed?") - def __read_file(self, path): + def _read_file(self, path): try: fd = open(path, 'r') except OSError: @@ -108,17 +61,15 @@ class dot2k(Dot2c): fd.close() return content - def __buff_to_string(self, buff): - string = "" - - for line in buff: - string = string + line + "\n" - - # cut off the last \n - return string[:-1] - - def fill_monitor_type(self): - return self.monitor_type.upper() + def _read_template_file(self, file): + try: + path = os.path.join(self.abs_template_dir, file) + return self._read_file(path) + except Exception: + # Specific template file not found. Try the generic template file in the template/ + # directory, which is one level up + path = os.path.join(self.abs_template_dir, "..", file) + return self._read_file(path) def fill_parent(self): return "&rv_%s" % self.parent if self.parent else "NULL" @@ -129,53 +80,23 @@ class dot2k(Dot2c): return "" def fill_tracepoint_handlers_skel(self): - buff = [] - for event in self.events: - buff.append("static void handle_%s(void *data, /* XXX: fill header */)" % event) - buff.append("{") - handle = "handle_event" - if self.is_start_event(event): - buff.append("\t/* XXX: validate that this event always leads to the initial state */") - handle = "handle_start_event" - elif self.is_start_run_event(event): - buff.append("\t/* XXX: validate that this event is only valid in the initial state */") - handle = "handle_start_run_event" - if self.monitor_type == "per_task": - buff.append("\tstruct task_struct *p = /* XXX: how do I get p? */;"); - buff.append("\tda_%s_%s(p, %s%s);" % (handle, self.name, event, self.enum_suffix)); - else: - buff.append("\tda_%s_%s(%s%s);" % (handle, self.name, event, self.enum_suffix)); - buff.append("}") - buff.append("") - return self.__buff_to_string(buff) + return "NotImplemented" def fill_tracepoint_attach_probe(self): - buff = [] - for event in self.events: - buff.append("\trv_attach_trace_probe(\"%s\", /* XXX: tracepoint */, handle_%s);" % (self.name, event)) - return self.__buff_to_string(buff) + return "NotImplemented" def fill_tracepoint_detach_helper(self): - buff = [] - for event in self.events: - buff.append("\trv_detach_trace_probe(\"%s\", /* XXX: tracepoint */, handle_%s);" % (self.name, event)) - return self.__buff_to_string(buff) + return "NotImplemented" def fill_main_c(self): main_c = self.main_c - monitor_type = self.fill_monitor_type() - min_type = self.get_minimun_type() - nr_events = len(self.events) tracepoint_handlers = self.fill_tracepoint_handlers_skel() tracepoint_attach = self.fill_tracepoint_attach_probe() tracepoint_detach = self.fill_tracepoint_detach_helper() parent = self.fill_parent() parent_include = self.fill_include_parent() - main_c = main_c.replace("%%MONITOR_TYPE%%", monitor_type) - main_c = main_c.replace("%%MIN_TYPE%%", min_type) main_c = main_c.replace("%%MODEL_NAME%%", self.name) - main_c = main_c.replace("%%NR_EVENTS%%", str(nr_events)) main_c = main_c.replace("%%TRACEPOINT_HANDLERS_SKEL%%", tracepoint_handlers) main_c = main_c.replace("%%TRACEPOINT_ATTACH%%", tracepoint_attach) main_c = main_c.replace("%%TRACEPOINT_DETACH%%", tracepoint_detach) @@ -185,63 +106,17 @@ class dot2k(Dot2c): return main_c - def fill_model_h_header(self): - buff = [] - buff.append("/* SPDX-License-Identifier: GPL-2.0 */") - buff.append("/*") - buff.append(" * Automatically generated C representation of %s automaton" % (self.name)) - buff.append(" * For further information about this format, see kernel documentation:") - buff.append(" * Documentation/trace/rv/deterministic_automata.rst") - buff.append(" */") - buff.append("") - - return buff - def fill_model_h(self): - # - # Adjust the definition names - # - self.enum_states_def = "states_%s" % self.name - self.enum_events_def = "events_%s" % self.name - self.struct_automaton_def = "automaton_%s" % self.name - self.var_automaton_def = "automaton_%s" % self.name - - buff = self.fill_model_h_header() - buff += self.format_model() - - return self.__buff_to_string(buff) + return "NotImplemented" def fill_monitor_class_type(self): - if self.monitor_type == "per_task": - return "DA_MON_EVENTS_ID" - return "DA_MON_EVENTS_IMPLICIT" + return "NotImplemented" def fill_monitor_class(self): - if self.monitor_type == "per_task": - return "da_monitor_id" - return "da_monitor" + return "NotImplemented" def fill_tracepoint_args_skel(self, tp_type): - buff = [] - tp_args_event = [ - ("char *", "state"), - ("char *", "event"), - ("char *", "next_state"), - ("bool ", "final_state"), - ] - tp_args_error = [ - ("char *", "state"), - ("char *", "event"), - ] - tp_args_id = ("int ", "id") - tp_args = tp_args_event if tp_type == "event" else tp_args_error - if self.monitor_type == "per_task": - tp_args.insert(0, tp_args_id) - tp_proto_c = ", ".join([a+b for a,b in tp_args]) - tp_args_c = ", ".join([b for a,b in tp_args]) - buff.append(" TP_PROTO(%s)," % tp_proto_c) - buff.append(" TP_ARGS(%s)" % tp_args_c) - return self.__buff_to_string(buff) + return "NotImplemented" def fill_monitor_deps(self): buff = [] @@ -249,21 +124,7 @@ class dot2k(Dot2c): if self.parent: buff.append(" depends on RV_MON_%s" % self.parent.upper()) buff.append(" default y") - return self.__buff_to_string(buff) - - def fill_trace_h(self): - trace_h = self.trace_h - monitor_class = self.fill_monitor_class() - monitor_class_type = self.fill_monitor_class_type() - tracepoint_args_skel_event = self.fill_tracepoint_args_skel("event") - tracepoint_args_skel_error = self.fill_tracepoint_args_skel("error") - trace_h = trace_h.replace("%%MODEL_NAME%%", self.name) - trace_h = trace_h.replace("%%MODEL_NAME_UP%%", self.name.upper()) - trace_h = trace_h.replace("%%MONITOR_CLASS%%", monitor_class) - trace_h = trace_h.replace("%%MONITOR_CLASS_TYPE%%", monitor_class_type) - trace_h = trace_h.replace("%%TRACEPOINT_ARGS_SKEL_EVENT%%", tracepoint_args_skel_event) - trace_h = trace_h.replace("%%TRACEPOINT_ARGS_SKEL_ERROR%%", tracepoint_args_skel_error) - return trace_h + return '\n'.join(buff) def fill_kconfig(self): kconfig = self.kconfig @@ -276,21 +137,17 @@ class dot2k(Dot2c): kconfig = kconfig.replace("%%MONITOR_DEPS%%", monitor_deps) return kconfig - def fill_main_container_h(self): - main_h = self.main_h - main_h = main_h.replace("%%MODEL_NAME%%", self.name) - return main_h - - def __patch_file(self, file, marker, line): + def _patch_file(self, file, marker, line): + assert self.auto_patch file_to_patch = os.path.join(self.rv_dir, file) - content = self.__read_file(file_to_patch) + content = self._read_file(file_to_patch) content = content.replace(marker, line + "\n" + marker) self.__write_file(file_to_patch, content) def fill_tracepoint_tooltip(self): monitor_class_type = self.fill_monitor_class_type() if self.auto_patch: - self.__patch_file("rv_trace.h", + self._patch_file("rv_trace.h", "// Add new monitors based on CONFIG_%s here" % monitor_class_type, "#include <monitors/%s/%s_trace.h>" % (self.name, self.name)) return " - Patching %s/rv_trace.h, double check the result" % self.rv_dir @@ -300,10 +157,15 @@ Add this line where other tracepoints are included and %s is defined: #include <monitors/%s/%s_trace.h> """ % (self.rv_dir, monitor_class_type, self.name, self.name) + def _kconfig_marker(self, container=None) -> str: + return "# Add new %smonitors here" % (container + " " + if container else "") + def fill_kconfig_tooltip(self): if self.auto_patch: - self.__patch_file("Kconfig", - "# Add new monitors here", + # monitors with a container should stay together in the Kconfig + self._patch_file("Kconfig", + self._kconfig_marker(self.parent), "source \"kernel/trace/rv/monitors/%s/Kconfig\"" % (self.name)) return " - Patching %s/Kconfig, double check the result" % self.rv_dir @@ -316,7 +178,7 @@ source \"kernel/trace/rv/monitors/%s/Kconfig\" name = self.name name_up = name.upper() if self.auto_patch: - self.__patch_file("Makefile", + self._patch_file("Makefile", "# Add new monitors here", "obj-$(CONFIG_RV_MON_%s) += monitors/%s/%s.o" % (name_up, name, name)) return " - Patching %s/Makefile, double check the result" % self.rv_dir @@ -352,7 +214,7 @@ obj-$(CONFIG_RV_MON_%s) += monitors/%s/%s.o file.close() - def __create_file(self, file_name, content): + def _create_file(self, file_name, content): path = "%s/%s" % (self.name, file_name) if self.auto_patch: path = os.path.join(self.rv_dir, "monitors", path) @@ -370,20 +232,39 @@ obj-$(CONFIG_RV_MON_%s) += monitors/%s/%s.o self.__create_directory() path = "%s.c" % self.name - self.__create_file(path, main_c) - - if self.container: - main_h = self.fill_main_container_h() - path = "%s.h" % self.name - self.__create_file(path, main_h) - else: - model_h = self.fill_model_h() - path = "%s.h" % self.name - self.__create_file(path, model_h) + self._create_file(path, main_c) - trace_h = self.fill_trace_h() - path = "%s_trace.h" % self.name - self.__create_file(path, trace_h) + model_h = self.fill_model_h() + path = "%s.h" % self.name + self._create_file(path, model_h) kconfig = self.fill_kconfig() - self.__create_file("Kconfig", kconfig) + self._create_file("Kconfig", kconfig) + + +class Monitor(RVGenerator): + monitor_types = { "global" : 1, "per_cpu" : 2, "per_task" : 3 } + + def __init__(self, extra_params={}): + super().__init__(extra_params) + self.trace_h = self._read_template_file("trace.h") + + def fill_trace_h(self): + trace_h = self.trace_h + monitor_class = self.fill_monitor_class() + monitor_class_type = self.fill_monitor_class_type() + tracepoint_args_skel_event = self.fill_tracepoint_args_skel("event") + tracepoint_args_skel_error = self.fill_tracepoint_args_skel("error") + trace_h = trace_h.replace("%%MODEL_NAME%%", self.name) + trace_h = trace_h.replace("%%MODEL_NAME_UP%%", self.name.upper()) + trace_h = trace_h.replace("%%MONITOR_CLASS%%", monitor_class) + trace_h = trace_h.replace("%%MONITOR_CLASS_TYPE%%", monitor_class_type) + trace_h = trace_h.replace("%%TRACEPOINT_ARGS_SKEL_EVENT%%", tracepoint_args_skel_event) + trace_h = trace_h.replace("%%TRACEPOINT_ARGS_SKEL_ERROR%%", tracepoint_args_skel_error) + return trace_h + + def print_files(self): + super().print_files() + trace_h = self.fill_trace_h() + path = "%s_trace.h" % self.name + self._create_file(path, trace_h) diff --git a/tools/verification/rvgen/rvgen/ltl2ba.py b/tools/verification/rvgen/rvgen/ltl2ba.py new file mode 100644 index 000000000000..f14e6760ac3d --- /dev/null +++ b/tools/verification/rvgen/rvgen/ltl2ba.py @@ -0,0 +1,566 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0-only +# +# Implementation based on +# Gerth, R., Peled, D., Vardi, M.Y., Wolper, P. (1996). +# Simple On-the-fly Automatic Verification of Linear Temporal Logic. +# https://doi.org/10.1007/978-0-387-34892-6_1 +# With extra optimizations + +from ply.lex import lex +from ply.yacc import yacc + +# Grammar: +# ltl ::= opd | ( ltl ) | ltl binop ltl | unop ltl +# +# Operands (opd): +# true, false, user-defined names +# +# Unary Operators (unop): +# always +# eventually +# next +# not +# +# Binary Operators (binop): +# until +# and +# or +# imply +# equivalent + +tokens = ( + 'AND', + 'OR', + 'IMPLY', + 'UNTIL', + 'ALWAYS', + 'EVENTUALLY', + 'NEXT', + 'VARIABLE', + 'LITERAL', + 'NOT', + 'LPAREN', + 'RPAREN', + 'ASSIGN', +) + +t_AND = r'and' +t_OR = r'or' +t_IMPLY = r'imply' +t_UNTIL = r'until' +t_ALWAYS = r'always' +t_NEXT = r'next' +t_EVENTUALLY = r'eventually' +t_VARIABLE = r'[A-Z_0-9]+' +t_LITERAL = r'true|false' +t_NOT = r'not' +t_LPAREN = r'\(' +t_RPAREN = r'\)' +t_ASSIGN = r'=' +t_ignore_COMMENT = r'\#.*' +t_ignore = ' \t\n' + +def t_error(t): + raise ValueError(f"Illegal character '{t.value[0]}'") + +lexer = lex() + +class GraphNode: + uid = 0 + + def __init__(self, incoming: set['GraphNode'], new, old, _next): + self.init = False + self.outgoing = set() + self.labels = set() + self.incoming = incoming.copy() + self.new = new.copy() + self.old = old.copy() + self.next = _next.copy() + self.id = GraphNode.uid + GraphNode.uid += 1 + + def expand(self, node_set): + if not self.new: + for nd in node_set: + if nd.old == self.old and nd.next == self.next: + nd.incoming |= self.incoming + return node_set + + new_current_node = GraphNode({self}, self.next, set(), set()) + return new_current_node.expand({self} | node_set) + n = self.new.pop() + return n.expand(self, node_set) + + def __lt__(self, other): + return self.id < other.id + +class ASTNode: + uid = 1 + + def __init__(self, op): + self.op = op + self.id = ASTNode.uid + ASTNode.uid += 1 + + def __hash__(self): + return hash(self.op) + + def __eq__(self, other): + return self is other + + def __iter__(self): + yield self + yield from self.op + + def negate(self): + self.op = self.op.negate() + return self + + def expand(self, node, node_set): + return self.op.expand(self, node, node_set) + + def __str__(self): + if isinstance(self.op, Literal): + return str(self.op.value) + if isinstance(self.op, Variable): + return self.op.name.lower() + return "val" + str(self.id) + + def normalize(self): + # Get rid of: + # - ALWAYS + # - EVENTUALLY + # - IMPLY + # And move all the NOT to be inside + self.op = self.op.normalize() + return self + +class BinaryOp: + op_str = "not_supported" + + def __init__(self, left: ASTNode, right: ASTNode): + self.left = left + self.right = right + + def __hash__(self): + return hash((self.left, self.right)) + + def __iter__(self): + yield from self.left + yield from self.right + + def normalize(self): + raise NotImplementedError + + def negate(self): + raise NotImplementedError + + def _is_temporal(self): + raise NotImplementedError + + def is_temporal(self): + if self.left.op.is_temporal(): + return True + if self.right.op.is_temporal(): + return True + return self._is_temporal() + + @staticmethod + def expand(n: ASTNode, node: GraphNode, node_set) -> set[GraphNode]: + raise NotImplementedError + +class AndOp(BinaryOp): + op_str = '&&' + + def normalize(self): + return self + + def negate(self): + return OrOp(self.left.negate(), self.right.negate()) + + def _is_temporal(self): + return False + + @staticmethod + def expand(n: ASTNode, node: GraphNode, node_set) -> set[GraphNode]: + if not n.op.is_temporal(): + node.old.add(n) + return node.expand(node_set) + + tmp = GraphNode(node.incoming, + node.new | ({n.op.left, n.op.right} - node.old), + node.old | {n}, + node.next) + return tmp.expand(node_set) + +class OrOp(BinaryOp): + op_str = '||' + + def normalize(self): + return self + + def negate(self): + return AndOp(self.left.negate(), self.right.negate()) + + def _is_temporal(self): + return False + + @staticmethod + def expand(n: ASTNode, node: GraphNode, node_set) -> set[GraphNode]: + if not n.op.is_temporal(): + node.old |= {n} + return node.expand(node_set) + + node1 = GraphNode(node.incoming, + node.new | ({n.op.left} - node.old), + node.old | {n}, + node.next) + node2 = GraphNode(node.incoming, + node.new | ({n.op.right} - node.old), + node.old | {n}, + node.next) + return node2.expand(node1.expand(node_set)) + +class UntilOp(BinaryOp): + def normalize(self): + return self + + def negate(self): + return VOp(self.left.negate(), self.right.negate()) + + def _is_temporal(self): + return True + + @staticmethod + def expand(n: ASTNode, node: GraphNode, node_set) -> set[GraphNode]: + node1 = GraphNode(node.incoming, + node.new | ({n.op.left} - node.old), + node.old | {n}, + node.next | {n}) + node2 = GraphNode(node.incoming, + node.new | ({n.op.right} - node.old), + node.old | {n}, + node.next) + return node2.expand(node1.expand(node_set)) + +class VOp(BinaryOp): + def normalize(self): + return self + + def negate(self): + return UntilOp(self.left.negate(), self.right.negate()) + + def _is_temporal(self): + return True + + @staticmethod + def expand(n: ASTNode, node: GraphNode, node_set) -> set[GraphNode]: + node1 = GraphNode(node.incoming, + node.new | ({n.op.right} - node.old), + node.old | {n}, + node.next | {n}) + node2 = GraphNode(node.incoming, + node.new | ({n.op.left, n.op.right} - node.old), + node.old | {n}, + node.next) + return node2.expand(node1.expand(node_set)) + +class ImplyOp(BinaryOp): + def normalize(self): + # P -> Q === !P | Q + return OrOp(self.left.negate(), self.right) + + def _is_temporal(self): + return False + + def negate(self): + # !(P -> Q) === !(!P | Q) === P & !Q + return AndOp(self.left, self.right.negate()) + +class UnaryOp: + def __init__(self, child: ASTNode): + self.child = child + + def __iter__(self): + yield from self.child + + def __hash__(self): + return hash(self.child) + + def normalize(self): + raise NotImplementedError + + def _is_temporal(self): + raise NotImplementedError + + def is_temporal(self): + if self.child.op.is_temporal(): + return True + return self._is_temporal() + + def negate(self): + raise NotImplementedError + +class EventuallyOp(UnaryOp): + def __str__(self): + return "eventually " + str(self.child) + + def normalize(self): + # <>F == true U F + return UntilOp(ASTNode(Literal(True)), self.child) + + def _is_temporal(self): + return True + + def negate(self): + # !<>F == [](!F) + return AlwaysOp(self.child.negate()).normalize() + +class AlwaysOp(UnaryOp): + def normalize(self): + # []F === !(true U !F) == false V F + new = ASTNode(Literal(False)) + return VOp(new, self.child) + + def _is_temporal(self): + return True + + def negate(self): + # ![]F == <>(!F) + return EventuallyOp(self.child.negate()).normalize() + +class NextOp(UnaryOp): + def normalize(self): + return self + + def _is_temporal(self): + return True + + def negate(self): + # not (next A) == next (not A) + self.child = self.child.negate() + return self + + @staticmethod + def expand(n: ASTNode, node: GraphNode, node_set) -> set[GraphNode]: + tmp = GraphNode(node.incoming, + node.new, + node.old | {n}, + node.next | {n.op.child}) + return tmp.expand(node_set) + +class NotOp(UnaryOp): + def __str__(self): + return "!" + str(self.child) + + def normalize(self): + return self.child.op.negate() + + def negate(self): + return self.child.op + + def _is_temporal(self): + return False + + @staticmethod + def expand(n: ASTNode, node: GraphNode, node_set) -> set[GraphNode]: + for f in node.old: + if n.op.child is f: + return node_set + node.old |= {n} + return node.expand(node_set) + +class Variable: + def __init__(self, name: str): + self.name = name + + def __hash__(self): + return hash(self.name) + + def __iter__(self): + yield from () + + def negate(self): + new = ASTNode(self) + return NotOp(new) + + def normalize(self): + return self + + def is_temporal(self): + return False + + @staticmethod + def expand(n: ASTNode, node: GraphNode, node_set) -> set[GraphNode]: + for f in node.old: + if isinstance(f, NotOp) and f.op.child is n: + return node_set + node.old |= {n} + return node.expand(node_set) + +class Literal: + def __init__(self, value: bool): + self.value = value + + def __iter__(self): + yield from () + + def __hash__(self): + return hash(self.value) + + def __str__(self): + if self.value: + return "true" + return "false" + + def negate(self): + self.value = not self.value + return self + + def normalize(self): + return self + + def is_temporal(self): + return False + + @staticmethod + def expand(n: ASTNode, node: GraphNode, node_set) -> set[GraphNode]: + if not n.op.value: + return node_set + node.old |= {n} + return node.expand(node_set) + +def p_spec(p): + ''' + spec : assign + | assign spec + ''' + if len(p) == 3: + p[2].append(p[1]) + p[0] = p[2] + else: + p[0] = [p[1]] + +def p_assign(p): + ''' + assign : VARIABLE ASSIGN ltl + ''' + p[0] = (p[1], p[3]) + +def p_ltl(p): + ''' + ltl : opd + | binop + | unop + ''' + p[0] = p[1] + +def p_opd(p): + ''' + opd : VARIABLE + | LITERAL + | LPAREN ltl RPAREN + ''' + if p[1] == "true": + p[0] = ASTNode(Literal(True)) + elif p[1] == "false": + p[0] = ASTNode(Literal(False)) + elif p[1] == '(': + p[0] = p[2] + else: + p[0] = ASTNode(Variable(p[1])) + +def p_unop(p): + ''' + unop : ALWAYS ltl + | EVENTUALLY ltl + | NEXT ltl + | NOT ltl + ''' + if p[1] == "always": + op = AlwaysOp(p[2]) + elif p[1] == "eventually": + op = EventuallyOp(p[2]) + elif p[1] == "next": + op = NextOp(p[2]) + elif p[1] == "not": + op = NotOp(p[2]) + else: + raise ValueError(f"Invalid unary operator {p[1]}") + + p[0] = ASTNode(op) + +def p_binop(p): + ''' + binop : opd UNTIL ltl + | opd AND ltl + | opd OR ltl + | opd IMPLY ltl + ''' + if p[2] == "and": + op = AndOp(p[1], p[3]) + elif p[2] == "until": + op = UntilOp(p[1], p[3]) + elif p[2] == "or": + op = OrOp(p[1], p[3]) + elif p[2] == "imply": + op = ImplyOp(p[1], p[3]) + else: + raise ValueError(f"Invalid binary operator {p[2]}") + + p[0] = ASTNode(op) + +parser = yacc() + +def parse_ltl(s: str) -> ASTNode: + spec = parser.parse(s) + + rule = None + subexpr = {} + + for assign in spec: + if assign[0] == "RULE": + rule = assign[1] + else: + subexpr[assign[0]] = assign[1] + + if rule is None: + raise ValueError("Please define your specification in the \"RULE = <LTL spec>\" format") + + for node in rule: + if not isinstance(node.op, Variable): + continue + replace = subexpr.get(node.op.name) + if replace is not None: + node.op = replace.op + + return rule + +def create_graph(s: str): + atoms = set() + + ltl = parse_ltl(s) + for c in ltl: + c.normalize() + if isinstance(c.op, Variable): + atoms.add(c.op.name) + + init = GraphNode(set(), set(), set(), set()) + head = GraphNode({init}, {ltl}, set(), set()) + graph = sorted(head.expand(set())) + + for i, node in enumerate(graph): + # The id assignment during graph generation has gaps. Reassign them + node.id = i + + for incoming in node.incoming: + if incoming is init: + node.init = True + else: + incoming.outgoing.add(node) + for o in node.old: + if not o.op.is_temporal(): + node.labels.add(str(o)) + + return sorted(atoms), graph, ltl diff --git a/tools/verification/rvgen/rvgen/ltl2k.py b/tools/verification/rvgen/rvgen/ltl2k.py new file mode 100644 index 000000000000..b075f98d50c4 --- /dev/null +++ b/tools/verification/rvgen/rvgen/ltl2k.py @@ -0,0 +1,271 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0-only + +from pathlib import Path +from . import generator +from . import ltl2ba + +COLUMN_LIMIT = 100 + +def line_len(line: str) -> int: + tabs = line.count('\t') + return tabs * 7 + len(line) + +def break_long_line(line: str, indent='') -> list[str]: + result = [] + while line_len(line) > COLUMN_LIMIT: + i = line[:COLUMN_LIMIT - line_len(line)].rfind(' ') + result.append(line[:i]) + line = indent + line[i + 1:] + if line: + result.append(line) + return result + +def build_condition_string(node: ltl2ba.GraphNode): + if not node.labels: + return "(true)" + + result = "(" + + first = True + for label in sorted(node.labels): + if not first: + result += " && " + result += label + first = False + + result += ")" + + return result + +def abbreviate_atoms(atoms: list[str]) -> list[str]: + def shorten(s: str) -> str: + skip = ["is", "by", "or", "and"] + return '_'.join([x[:2] for x in s.lower().split('_') if x not in skip]) + + abbrs = [] + for atom in atoms: + for i in range(len(atom), -1, -1): + if sum(a.startswith(atom[:i]) for a in atoms) > 1: + break + share = atom[:i] + unique = atom[i:] + abbrs.append((shorten(share) + shorten(unique))) + return abbrs + +class ltl2k(generator.Monitor): + template_dir = "ltl2k" + + def __init__(self, file_path, MonitorType, extra_params={}): + if MonitorType != "per_task": + raise NotImplementedError("Only per_task monitor is supported for LTL") + super().__init__(extra_params) + with open(file_path) as f: + self.atoms, self.ba, self.ltl = ltl2ba.create_graph(f.read()) + self.atoms_abbr = abbreviate_atoms(self.atoms) + self.name = extra_params.get("model_name") + if not self.name: + self.name = Path(file_path).stem + + def _fill_states(self) -> str: + buf = [ + "enum ltl_buchi_state {", + ] + + for node in self.ba: + buf.append("\tS%i," % node.id) + buf.append("\tRV_NUM_BA_STATES") + buf.append("};") + buf.append("static_assert(RV_NUM_BA_STATES <= RV_MAX_BA_STATES);") + return buf + + def _fill_atoms(self): + buf = ["enum ltl_atom {"] + for a in sorted(self.atoms): + buf.append("\tLTL_%s," % a) + buf.append("\tLTL_NUM_ATOM") + buf.append("};") + buf.append("static_assert(LTL_NUM_ATOM <= RV_MAX_LTL_ATOM);") + return buf + + def _fill_atoms_to_string(self): + buf = [ + "static const char *ltl_atom_str(enum ltl_atom atom)", + "{", + "\tstatic const char *const names[] = {" + ] + + for name in self.atoms_abbr: + buf.append("\t\t\"%s\"," % name) + + buf.extend([ + "\t};", + "", + "\treturn names[atom];", + "}" + ]) + return buf + + def _fill_atom_values(self, required_values): + buf = [] + for node in self.ltl: + if str(node) not in required_values: + continue + + if isinstance(node.op, ltl2ba.AndOp): + buf.append("\tbool %s = %s && %s;" % (node, node.op.left, node.op.right)) + required_values |= {str(node.op.left), str(node.op.right)} + elif isinstance(node.op, ltl2ba.OrOp): + buf.append("\tbool %s = %s || %s;" % (node, node.op.left, node.op.right)) + required_values |= {str(node.op.left), str(node.op.right)} + elif isinstance(node.op, ltl2ba.NotOp): + buf.append("\tbool %s = !%s;" % (node, node.op.child)) + required_values.add(str(node.op.child)) + + for atom in self.atoms: + if atom.lower() not in required_values: + continue + buf.append("\tbool %s = test_bit(LTL_%s, mon->atoms);" % (atom.lower(), atom)) + + buf.reverse() + + buf2 = [] + for line in buf: + buf2.extend(break_long_line(line, "\t ")) + return buf2 + + def _fill_transitions(self): + buf = [ + "static void", + "ltl_possible_next_states(struct ltl_monitor *mon, unsigned int state, unsigned long *next)", + "{" + ] + + required_values = set() + for node in self.ba: + for o in sorted(node.outgoing): + required_values |= o.labels + + buf.extend(self._fill_atom_values(required_values)) + buf.extend([ + "", + "\tswitch (state) {" + ]) + + for node in self.ba: + buf.append("\tcase S%i:" % node.id) + + for o in sorted(node.outgoing): + line = "\t\tif " + indent = "\t\t " + + line += build_condition_string(o) + lines = break_long_line(line, indent) + buf.extend(lines) + + buf.append("\t\t\t__set_bit(S%i, next);" % o.id) + buf.append("\t\tbreak;") + buf.extend([ + "\t}", + "}" + ]) + + return buf + + def _fill_start(self): + buf = [ + "static void ltl_start(struct task_struct *task, struct ltl_monitor *mon)", + "{" + ] + + required_values = set() + for node in self.ba: + if node.init: + required_values |= node.labels + + buf.extend(self._fill_atom_values(required_values)) + buf.append("") + + for node in self.ba: + if not node.init: + continue + + line = "\tif " + indent = "\t " + + line += build_condition_string(node) + lines = break_long_line(line, indent) + buf.extend(lines) + + buf.append("\t\t__set_bit(S%i, mon->states);" % node.id) + buf.append("}") + return buf + + def fill_tracepoint_handlers_skel(self): + buff = [] + buff.append("static void handle_example_event(void *data, /* XXX: fill header */)") + buff.append("{") + buff.append("\tltl_atom_update(task, LTL_%s, true/false);" % self.atoms[0]) + buff.append("}") + buff.append("") + return '\n'.join(buff) + + def fill_tracepoint_attach_probe(self): + return "\trv_attach_trace_probe(\"%s\", /* XXX: tracepoint */, handle_example_event);" \ + % self.name + + def fill_tracepoint_detach_helper(self): + return "\trv_detach_trace_probe(\"%s\", /* XXX: tracepoint */, handle_sample_event);" \ + % self.name + + def fill_atoms_init(self): + buff = [] + for a in self.atoms: + buff.append("\tltl_atom_set(mon, LTL_%s, true/false);" % a) + return '\n'.join(buff) + + def fill_model_h(self): + buf = [ + "/* SPDX-License-Identifier: GPL-2.0 */", + "", + "/*", + " * C implementation of Buchi automaton, automatically generated by", + " * tools/verification/rvgen from the linear temporal logic specification.", + " * For further information, see kernel documentation:", + " * Documentation/trace/rv/linear_temporal_logic.rst", + " */", + "", + "#include <linux/rv.h>", + "", + "#define MONITOR_NAME " + self.name, + "" + ] + + buf.extend(self._fill_atoms()) + buf.append('') + + buf.extend(self._fill_atoms_to_string()) + buf.append('') + + buf.extend(self._fill_states()) + buf.append('') + + buf.extend(self._fill_start()) + buf.append('') + + buf.extend(self._fill_transitions()) + buf.append('') + + return '\n'.join(buf) + + def fill_monitor_class_type(self): + return "LTL_MON_EVENTS_ID" + + def fill_monitor_class(self): + return "ltl_monitor_id" + + def fill_main_c(self): + main_c = super().fill_main_c() + main_c = main_c.replace("%%ATOMS_INIT%%", self.fill_atoms_init()) + + return main_c diff --git a/tools/verification/dot2/dot2k_templates/Kconfig b/tools/verification/rvgen/rvgen/templates/Kconfig index 291b29ea28db..291b29ea28db 100644 --- a/tools/verification/dot2/dot2k_templates/Kconfig +++ b/tools/verification/rvgen/rvgen/templates/Kconfig diff --git a/tools/verification/rvgen/rvgen/templates/container/Kconfig b/tools/verification/rvgen/rvgen/templates/container/Kconfig new file mode 100644 index 000000000000..a606111949c2 --- /dev/null +++ b/tools/verification/rvgen/rvgen/templates/container/Kconfig @@ -0,0 +1,5 @@ +config RV_MON_%%MODEL_NAME_UP%% + depends on RV + bool "%%MODEL_NAME%% monitor" + help + %%DESCRIPTION%% diff --git a/tools/verification/dot2/dot2k_templates/main_container.c b/tools/verification/rvgen/rvgen/templates/container/main.c index 89fc17cf8958..7d9b2f95c7e9 100644 --- a/tools/verification/dot2/dot2k_templates/main_container.c +++ b/tools/verification/rvgen/rvgen/templates/container/main.c @@ -21,8 +21,7 @@ struct rv_monitor rv_%%MODEL_NAME%% = { static int __init register_%%MODEL_NAME%%(void) { - rv_register_monitor(&rv_%%MODEL_NAME%%, NULL); - return 0; + return rv_register_monitor(&rv_%%MODEL_NAME%%, NULL); } static void __exit unregister_%%MODEL_NAME%%(void) diff --git a/tools/verification/dot2/dot2k_templates/main_container.h b/tools/verification/rvgen/rvgen/templates/container/main.h index 0f6883ab4bcc..0f6883ab4bcc 100644 --- a/tools/verification/dot2/dot2k_templates/main_container.h +++ b/tools/verification/rvgen/rvgen/templates/container/main.h diff --git a/tools/verification/dot2/dot2k_templates/main.c b/tools/verification/rvgen/rvgen/templates/dot2k/main.c index 83044a20c89a..e0fd1134bd85 100644 --- a/tools/verification/dot2/dot2k_templates/main.c +++ b/tools/verification/rvgen/rvgen/templates/dot2k/main.c @@ -74,8 +74,7 @@ static struct rv_monitor rv_%%MODEL_NAME%% = { static int __init register_%%MODEL_NAME%%(void) { - rv_register_monitor(&rv_%%MODEL_NAME%%, %%PARENT%%); - return 0; + return rv_register_monitor(&rv_%%MODEL_NAME%%, %%PARENT%%); } static void __exit unregister_%%MODEL_NAME%%(void) diff --git a/tools/verification/dot2/dot2k_templates/trace.h b/tools/verification/rvgen/rvgen/templates/dot2k/trace.h index 87d3a1308926..87d3a1308926 100644 --- a/tools/verification/dot2/dot2k_templates/trace.h +++ b/tools/verification/rvgen/rvgen/templates/dot2k/trace.h diff --git a/tools/verification/rvgen/rvgen/templates/ltl2k/main.c b/tools/verification/rvgen/rvgen/templates/ltl2k/main.c new file mode 100644 index 000000000000..f85d076fbf78 --- /dev/null +++ b/tools/verification/rvgen/rvgen/templates/ltl2k/main.c @@ -0,0 +1,102 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <linux/ftrace.h> +#include <linux/tracepoint.h> +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/init.h> +#include <linux/rv.h> +#include <rv/instrumentation.h> + +#define MODULE_NAME "%%MODEL_NAME%%" + +/* + * XXX: include required tracepoint headers, e.g., + * #include <trace/events/sched.h> + */ +#include <rv_trace.h> +%%INCLUDE_PARENT%% + +/* + * This is the self-generated part of the monitor. Generally, there is no need + * to touch this section. + */ +#include "%%MODEL_NAME%%.h" +#include <rv/ltl_monitor.h> + +static void ltl_atoms_fetch(struct task_struct *task, struct ltl_monitor *mon) +{ + /* + * This is called everytime the Buchi automaton is triggered. + * + * This function could be used to fetch the atomic propositions which + * are expensive to trace. It is possible only if the atomic proposition + * does not need to be updated at precise time. + * + * It is recommended to use tracepoints and ltl_atom_update() instead. + */ +} + +static void ltl_atoms_init(struct task_struct *task, struct ltl_monitor *mon, bool task_creation) +{ + /* + * This should initialize as many atomic propositions as possible. + * + * @task_creation indicates whether the task is being created. This is + * false if the task is already running before the monitor is enabled. + */ +%%ATOMS_INIT%% +} + +/* + * This is the instrumentation part of the monitor. + * + * This is the section where manual work is required. Here the kernel events + * are translated into model's event. + */ +%%TRACEPOINT_HANDLERS_SKEL%% +static int enable_%%MODEL_NAME%%(void) +{ + int retval; + + retval = ltl_monitor_init(); + if (retval) + return retval; + +%%TRACEPOINT_ATTACH%% + + return 0; +} + +static void disable_%%MODEL_NAME%%(void) +{ +%%TRACEPOINT_DETACH%% + + ltl_monitor_destroy(); +} + +/* + * This is the monitor register section. + */ +static struct rv_monitor rv_%%MODEL_NAME%% = { + .name = "%%MODEL_NAME%%", + .description = "%%DESCRIPTION%%", + .enable = enable_%%MODEL_NAME%%, + .disable = disable_%%MODEL_NAME%%, +}; + +static int __init register_%%MODEL_NAME%%(void) +{ + return rv_register_monitor(&rv_%%MODEL_NAME%%, %%PARENT%%); +} + +static void __exit unregister_%%MODEL_NAME%%(void) +{ + rv_unregister_monitor(&rv_%%MODEL_NAME%%); +} + +module_init(register_%%MODEL_NAME%%); +module_exit(unregister_%%MODEL_NAME%%); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR(/* TODO */); +MODULE_DESCRIPTION("%%MODEL_NAME%%: %%DESCRIPTION%%"); diff --git a/tools/verification/rvgen/rvgen/templates/ltl2k/trace.h b/tools/verification/rvgen/rvgen/templates/ltl2k/trace.h new file mode 100644 index 000000000000..49394c4b0f1c --- /dev/null +++ b/tools/verification/rvgen/rvgen/templates/ltl2k/trace.h @@ -0,0 +1,14 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +/* + * Snippet to be included in rv_trace.h + */ + +#ifdef CONFIG_RV_MON_%%MODEL_NAME_UP%% +DEFINE_EVENT(event_%%MONITOR_CLASS%%, event_%%MODEL_NAME%%, + TP_PROTO(struct task_struct *task, char *states, char *atoms, char *next), + TP_ARGS(task, states, atoms, next)); +DEFINE_EVENT(error_%%MONITOR_CLASS%%, error_%%MODEL_NAME%%, + TP_PROTO(struct task_struct *task), + TP_ARGS(task)); +#endif /* CONFIG_RV_MON_%%MODEL_NAME_UP%% */ |