diff options
Diffstat (limited to 'tools')
253 files changed, 10247 insertions, 2122 deletions
diff --git a/tools/arch/arm64/include/uapi/asm/kvm.h b/tools/arch/arm64/include/uapi/asm/kvm.h index 6d44f8c8a18f..af9d9acaf997 100644 --- a/tools/arch/arm64/include/uapi/asm/kvm.h +++ b/tools/arch/arm64/include/uapi/asm/kvm.h @@ -43,9 +43,6 @@ #define KVM_COALESCED_MMIO_PAGE_OFFSET 1 #define KVM_DIRTY_LOG_PAGE_OFFSET 64 -#define KVM_REG_SIZE(id) \ - (1U << (((id) & KVM_REG_SIZE_MASK) >> KVM_REG_SIZE_SHIFT)) - struct kvm_regs { struct user_pt_regs regs; /* sp = sp_el0 */ @@ -108,6 +105,7 @@ struct kvm_regs { #define KVM_ARM_VCPU_PTRAUTH_ADDRESS 5 /* VCPU uses address authentication */ #define KVM_ARM_VCPU_PTRAUTH_GENERIC 6 /* VCPU uses generic authentication */ #define KVM_ARM_VCPU_HAS_EL2 7 /* Support nested virtualization */ +#define KVM_ARM_VCPU_HAS_EL2_E2H0 8 /* Limit NV support to E2H RES0 */ struct kvm_vcpu_init { __u32 target; @@ -418,6 +416,7 @@ enum { #define KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS 6 #define KVM_DEV_ARM_VGIC_GRP_LEVEL_INFO 7 #define KVM_DEV_ARM_VGIC_GRP_ITS_REGS 8 +#define KVM_DEV_ARM_VGIC_GRP_MAINT_IRQ 9 #define KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_SHIFT 10 #define KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_MASK \ (0x3fffffULL << KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_SHIFT) diff --git a/tools/arch/arm64/include/uapi/asm/unistd.h b/tools/arch/arm64/include/uapi/asm/unistd.h index 9306726337fe..df36f23876e8 100644 --- a/tools/arch/arm64/include/uapi/asm/unistd.h +++ b/tools/arch/arm64/include/uapi/asm/unistd.h @@ -1,24 +1,2 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ -/* - * Copyright (C) 2012 ARM Ltd. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see <http://www.gnu.org/licenses/>. - */ - -#define __ARCH_WANT_RENAMEAT -#define __ARCH_WANT_NEW_STAT -#define __ARCH_WANT_SET_GET_RLIMIT -#define __ARCH_WANT_TIME32_SYSCALLS -#define __ARCH_WANT_MEMFD_SECRET - -#include <asm-generic/unistd.h> +#include <asm/unistd_64.h> diff --git a/tools/arch/x86/include/asm/amd-ibs.h b/tools/arch/x86/include/asm/amd/ibs.h index cb1740bc3da2..300b6e0765b2 100644 --- a/tools/arch/x86/include/asm/amd-ibs.h +++ b/tools/arch/x86/include/asm/amd/ibs.h @@ -4,7 +4,7 @@ * 55898 Rev 0.35 - Feb 5, 2021 */ -#include "msr-index.h" +#include "../msr-index.h" /* IBS_OP_DATA2 DataSrc */ #define IBS_DATA_SRC_LOC_CACHE 2 diff --git a/tools/arch/x86/include/asm/cpufeatures.h b/tools/arch/x86/include/asm/cpufeatures.h index 9e3fa7942e7d..bc81b9d1aeca 100644 --- a/tools/arch/x86/include/asm/cpufeatures.h +++ b/tools/arch/x86/include/asm/cpufeatures.h @@ -75,8 +75,8 @@ #define X86_FEATURE_CENTAUR_MCR ( 3*32+ 3) /* "centaur_mcr" Centaur MCRs (= MTRRs) */ #define X86_FEATURE_K8 ( 3*32+ 4) /* Opteron, Athlon64 */ #define X86_FEATURE_ZEN5 ( 3*32+ 5) /* CPU based on Zen5 microarchitecture */ -#define X86_FEATURE_P3 ( 3*32+ 6) /* P3 */ -#define X86_FEATURE_P4 ( 3*32+ 7) /* P4 */ +/* Free ( 3*32+ 6) */ +/* Free ( 3*32+ 7) */ #define X86_FEATURE_CONSTANT_TSC ( 3*32+ 8) /* "constant_tsc" TSC ticks at a constant rate */ #define X86_FEATURE_UP ( 3*32+ 9) /* "up" SMP kernel running on UP */ #define X86_FEATURE_ART ( 3*32+10) /* "art" Always running timer (ART) */ @@ -329,6 +329,7 @@ #define X86_FEATURE_CLZERO (13*32+ 0) /* "clzero" CLZERO instruction */ #define X86_FEATURE_IRPERF (13*32+ 1) /* "irperf" Instructions Retired Count */ #define X86_FEATURE_XSAVEERPTR (13*32+ 2) /* "xsaveerptr" Always save/restore FP error pointers */ +#define X86_FEATURE_INVLPGB (13*32+ 3) /* INVLPGB and TLBSYNC instructions supported */ #define X86_FEATURE_RDPRU (13*32+ 4) /* "rdpru" Read processor register at user level */ #define X86_FEATURE_WBNOINVD (13*32+ 9) /* "wbnoinvd" WBNOINVD instruction */ #define X86_FEATURE_AMD_IBPB (13*32+12) /* Indirect Branch Prediction Barrier */ @@ -377,6 +378,7 @@ #define X86_FEATURE_V_SPEC_CTRL (15*32+20) /* "v_spec_ctrl" Virtual SPEC_CTRL */ #define X86_FEATURE_VNMI (15*32+25) /* "vnmi" Virtual NMI */ #define X86_FEATURE_SVME_ADDR_CHK (15*32+28) /* SVME addr check */ +#define X86_FEATURE_IDLE_HLT (15*32+30) /* IDLE HLT intercept */ /* Intel-defined CPU features, CPUID level 0x00000007:0 (ECX), word 16 */ #define X86_FEATURE_AVX512VBMI (16*32+ 1) /* "avx512vbmi" AVX512 Vector Bit Manipulation instructions*/ @@ -434,15 +436,18 @@ #define X86_FEATURE_SPEC_CTRL_SSBD (18*32+31) /* Speculative Store Bypass Disable */ /* AMD-defined memory encryption features, CPUID level 0x8000001f (EAX), word 19 */ -#define X86_FEATURE_SME (19*32+ 0) /* "sme" AMD Secure Memory Encryption */ -#define X86_FEATURE_SEV (19*32+ 1) /* "sev" AMD Secure Encrypted Virtualization */ +#define X86_FEATURE_SME (19*32+ 0) /* "sme" Secure Memory Encryption */ +#define X86_FEATURE_SEV (19*32+ 1) /* "sev" Secure Encrypted Virtualization */ #define X86_FEATURE_VM_PAGE_FLUSH (19*32+ 2) /* VM Page Flush MSR is supported */ -#define X86_FEATURE_SEV_ES (19*32+ 3) /* "sev_es" AMD Secure Encrypted Virtualization - Encrypted State */ -#define X86_FEATURE_SEV_SNP (19*32+ 4) /* "sev_snp" AMD Secure Encrypted Virtualization - Secure Nested Paging */ +#define X86_FEATURE_SEV_ES (19*32+ 3) /* "sev_es" Secure Encrypted Virtualization - Encrypted State */ +#define X86_FEATURE_SEV_SNP (19*32+ 4) /* "sev_snp" Secure Encrypted Virtualization - Secure Nested Paging */ #define X86_FEATURE_V_TSC_AUX (19*32+ 9) /* Virtual TSC_AUX */ -#define X86_FEATURE_SME_COHERENT (19*32+10) /* AMD hardware-enforced cache coherency */ -#define X86_FEATURE_DEBUG_SWAP (19*32+14) /* "debug_swap" AMD SEV-ES full debug state swap support */ +#define X86_FEATURE_SME_COHERENT (19*32+10) /* hardware-enforced cache coherency */ +#define X86_FEATURE_DEBUG_SWAP (19*32+14) /* "debug_swap" SEV-ES full debug state swap support */ +#define X86_FEATURE_RMPREAD (19*32+21) /* RMPREAD instruction */ +#define X86_FEATURE_SEGMENTED_RMP (19*32+23) /* Segmented RMP support */ #define X86_FEATURE_SVSM (19*32+28) /* "svsm" SVSM present */ +#define X86_FEATURE_HV_INUSE_WR_ALLOWED (19*32+30) /* Allow Write to in-use hypervisor-owned pages */ /* AMD-defined Extended Feature 2 EAX, CPUID level 0x80000021 (EAX), word 20 */ #define X86_FEATURE_NO_NESTED_DATA_BP (20*32+ 0) /* No Nested Data Breakpoints */ @@ -455,6 +460,11 @@ #define X86_FEATURE_SBPB (20*32+27) /* Selective Branch Prediction Barrier */ #define X86_FEATURE_IBPB_BRTYPE (20*32+28) /* MSR_PRED_CMD[IBPB] flushes all branch type predictions */ #define X86_FEATURE_SRSO_NO (20*32+29) /* CPU is not affected by SRSO */ +#define X86_FEATURE_SRSO_USER_KERNEL_NO (20*32+30) /* CPU is not affected by SRSO across user/kernel boundaries */ +#define X86_FEATURE_SRSO_BP_SPEC_REDUCE (20*32+31) /* + * BP_CFG[BpSpecReduce] can be used to mitigate SRSO for VMs. + * (SRSO_MSR_FIX in the official doc). + */ /* * Extended auxiliary flags: Linux defined - for features scattered in various @@ -466,10 +476,11 @@ #define X86_FEATURE_CLEAR_BHB_LOOP (21*32+ 1) /* Clear branch history at syscall entry using SW loop */ #define X86_FEATURE_BHI_CTRL (21*32+ 2) /* BHI_DIS_S HW control available */ #define X86_FEATURE_CLEAR_BHB_HW (21*32+ 3) /* BHI_DIS_S HW control enabled */ -#define X86_FEATURE_CLEAR_BHB_LOOP_ON_VMEXIT (21*32+ 4) /* Clear branch history at vmexit using SW loop */ -#define X86_FEATURE_AMD_FAST_CPPC (21*32 + 5) /* Fast CPPC */ -#define X86_FEATURE_AMD_HETEROGENEOUS_CORES (21*32 + 6) /* Heterogeneous Core Topology */ -#define X86_FEATURE_AMD_WORKLOAD_CLASS (21*32 + 7) /* Workload Classification */ +#define X86_FEATURE_CLEAR_BHB_VMEXIT (21*32+ 4) /* Clear branch history at vmexit using SW loop */ +#define X86_FEATURE_AMD_FAST_CPPC (21*32+ 5) /* Fast CPPC */ +#define X86_FEATURE_AMD_HTR_CORES (21*32+ 6) /* Heterogeneous Core Topology */ +#define X86_FEATURE_AMD_WORKLOAD_CLASS (21*32+ 7) /* Workload Classification */ +#define X86_FEATURE_PREFER_YMM (21*32+ 8) /* Avoid ZMM registers due to downclocking */ /* * BUG word(s) @@ -508,7 +519,7 @@ #define X86_BUG_ITLB_MULTIHIT X86_BUG(23) /* "itlb_multihit" CPU may incur MCE during certain page attribute changes */ #define X86_BUG_SRBDS X86_BUG(24) /* "srbds" CPU may leak RNG bits if not mitigated */ #define X86_BUG_MMIO_STALE_DATA X86_BUG(25) /* "mmio_stale_data" CPU is affected by Processor MMIO Stale Data vulnerabilities */ -#define X86_BUG_MMIO_UNKNOWN X86_BUG(26) /* "mmio_unknown" CPU is too old and its MMIO Stale Data status is unknown */ +/* unused, was #define X86_BUG_MMIO_UNKNOWN X86_BUG(26) "mmio_unknown" CPU is too old and its MMIO Stale Data status is unknown */ #define X86_BUG_RETBLEED X86_BUG(27) /* "retbleed" CPU is affected by RETBleed */ #define X86_BUG_EIBRS_PBRSB X86_BUG(28) /* "eibrs_pbrsb" EIBRS is vulnerable to Post Barrier RSB Predictions */ #define X86_BUG_SMT_RSB X86_BUG(29) /* "smt_rsb" CPU is vulnerable to Cross-Thread Return Address Predictions */ @@ -516,9 +527,10 @@ #define X86_BUG_TDX_PW_MCE X86_BUG(31) /* "tdx_pw_mce" CPU may incur #MC if non-TD software does partial write to TDX private memory */ /* BUG word 2 */ -#define X86_BUG_SRSO X86_BUG(1*32 + 0) /* "srso" AMD SRSO bug */ -#define X86_BUG_DIV0 X86_BUG(1*32 + 1) /* "div0" AMD DIV0 speculation bug */ -#define X86_BUG_RFDS X86_BUG(1*32 + 2) /* "rfds" CPU is vulnerable to Register File Data Sampling */ -#define X86_BUG_BHI X86_BUG(1*32 + 3) /* "bhi" CPU is affected by Branch History Injection */ -#define X86_BUG_IBPB_NO_RET X86_BUG(1*32 + 4) /* "ibpb_no_ret" IBPB omits return target predictions */ +#define X86_BUG_SRSO X86_BUG( 1*32+ 0) /* "srso" AMD SRSO bug */ +#define X86_BUG_DIV0 X86_BUG( 1*32+ 1) /* "div0" AMD DIV0 speculation bug */ +#define X86_BUG_RFDS X86_BUG( 1*32+ 2) /* "rfds" CPU is vulnerable to Register File Data Sampling */ +#define X86_BUG_BHI X86_BUG( 1*32+ 3) /* "bhi" CPU is affected by Branch History Injection */ +#define X86_BUG_IBPB_NO_RET X86_BUG( 1*32+ 4) /* "ibpb_no_ret" IBPB omits return target predictions */ +#define X86_BUG_SPECTRE_V2_USER X86_BUG( 1*32+ 5) /* "spectre_v2_user" CPU is affected by Spectre variant 2 attack between user processes */ #endif /* _ASM_X86_CPUFEATURES_H */ diff --git a/tools/arch/x86/include/asm/inat.h b/tools/arch/x86/include/asm/inat.h index 253690eb3c26..183aa662b165 100644 --- a/tools/arch/x86/include/asm/inat.h +++ b/tools/arch/x86/include/asm/inat.h @@ -82,6 +82,7 @@ #define INAT_NO_REX2 (1 << (INAT_FLAG_OFFS + 8)) #define INAT_REX2_VARIANT (1 << (INAT_FLAG_OFFS + 9)) #define INAT_EVEX_SCALABLE (1 << (INAT_FLAG_OFFS + 10)) +#define INAT_INV64 (1 << (INAT_FLAG_OFFS + 11)) /* Attribute making macros for attribute tables */ #define INAT_MAKE_PREFIX(pfx) (pfx << INAT_PFX_OFFS) #define INAT_MAKE_ESCAPE(esc) (esc << INAT_ESC_OFFS) @@ -242,4 +243,9 @@ static inline int inat_evex_scalable(insn_attr_t attr) { return attr & INAT_EVEX_SCALABLE; } + +static inline int inat_is_invalid64(insn_attr_t attr) +{ + return attr & INAT_INV64; +} #endif diff --git a/tools/arch/x86/include/asm/msr-index.h b/tools/arch/x86/include/asm/msr-index.h index dc1c1057f26e..e6134ef2263d 100644 --- a/tools/arch/x86/include/asm/msr-index.h +++ b/tools/arch/x86/include/asm/msr-index.h @@ -397,7 +397,8 @@ #define MSR_IA32_PASID_VALID BIT_ULL(31) /* DEBUGCTLMSR bits (others vary by model): */ -#define DEBUGCTLMSR_LBR (1UL << 0) /* last branch recording */ +#define DEBUGCTLMSR_LBR_BIT 0 /* last branch recording */ +#define DEBUGCTLMSR_LBR (1UL << DEBUGCTLMSR_LBR_BIT) #define DEBUGCTLMSR_BTF_SHIFT 1 #define DEBUGCTLMSR_BTF (1UL << 1) /* single-step on branches */ #define DEBUGCTLMSR_BUS_LOCK_DETECT (1UL << 2) @@ -610,6 +611,7 @@ #define MSR_AMD_PERF_CTL 0xc0010062 #define MSR_AMD_PERF_STATUS 0xc0010063 #define MSR_AMD_PSTATE_DEF_BASE 0xc0010064 +#define MSR_AMD64_GUEST_TSC_FREQ 0xc0010134 #define MSR_AMD64_OSVW_ID_LENGTH 0xc0010140 #define MSR_AMD64_OSVW_STATUS 0xc0010141 #define MSR_AMD_PPIN_CTL 0xc00102f0 @@ -646,6 +648,7 @@ #define MSR_AMD64_IBS_REG_COUNT_MAX 8 /* includes MSR_AMD64_IBSBRTARGET */ #define MSR_AMD64_SVM_AVIC_DOORBELL 0xc001011b #define MSR_AMD64_VM_PAGE_FLUSH 0xc001011e +#define MSR_AMD64_VIRT_SPEC_CTRL 0xc001011f #define MSR_AMD64_SEV_ES_GHCB 0xc0010130 #define MSR_AMD64_SEV 0xc0010131 #define MSR_AMD64_SEV_ENABLED_BIT 0 @@ -684,11 +687,12 @@ #define MSR_AMD64_SNP_SMT_PROT BIT_ULL(MSR_AMD64_SNP_SMT_PROT_BIT) #define MSR_AMD64_SNP_RESV_BIT 18 #define MSR_AMD64_SNP_RESERVED_MASK GENMASK_ULL(63, MSR_AMD64_SNP_RESV_BIT) - -#define MSR_AMD64_VIRT_SPEC_CTRL 0xc001011f - #define MSR_AMD64_RMP_BASE 0xc0010132 #define MSR_AMD64_RMP_END 0xc0010133 +#define MSR_AMD64_RMP_CFG 0xc0010136 +#define MSR_AMD64_SEG_RMP_ENABLED_BIT 0 +#define MSR_AMD64_SEG_RMP_ENABLED BIT_ULL(MSR_AMD64_SEG_RMP_ENABLED_BIT) +#define MSR_AMD64_RMP_SEGMENT_SHIFT(x) (((x) & GENMASK_ULL(13, 8)) >> 8) #define MSR_SVSM_CAA 0xc001f000 @@ -699,15 +703,17 @@ #define MSR_AMD_CPPC_REQ 0xc00102b3 #define MSR_AMD_CPPC_STATUS 0xc00102b4 -#define AMD_CPPC_LOWEST_PERF(x) (((x) >> 0) & 0xff) -#define AMD_CPPC_LOWNONLIN_PERF(x) (((x) >> 8) & 0xff) -#define AMD_CPPC_NOMINAL_PERF(x) (((x) >> 16) & 0xff) -#define AMD_CPPC_HIGHEST_PERF(x) (((x) >> 24) & 0xff) +/* Masks for use with MSR_AMD_CPPC_CAP1 */ +#define AMD_CPPC_LOWEST_PERF_MASK GENMASK(7, 0) +#define AMD_CPPC_LOWNONLIN_PERF_MASK GENMASK(15, 8) +#define AMD_CPPC_NOMINAL_PERF_MASK GENMASK(23, 16) +#define AMD_CPPC_HIGHEST_PERF_MASK GENMASK(31, 24) -#define AMD_CPPC_MAX_PERF(x) (((x) & 0xff) << 0) -#define AMD_CPPC_MIN_PERF(x) (((x) & 0xff) << 8) -#define AMD_CPPC_DES_PERF(x) (((x) & 0xff) << 16) -#define AMD_CPPC_ENERGY_PERF_PREF(x) (((x) & 0xff) << 24) +/* Masks for use with MSR_AMD_CPPC_REQ */ +#define AMD_CPPC_MAX_PERF_MASK GENMASK(7, 0) +#define AMD_CPPC_MIN_PERF_MASK GENMASK(15, 8) +#define AMD_CPPC_DES_PERF_MASK GENMASK(23, 16) +#define AMD_CPPC_EPP_PERF_MASK GENMASK(31, 24) /* AMD Performance Counter Global Status and Control MSRs */ #define MSR_AMD64_PERF_CNTR_GLOBAL_STATUS 0xc0000300 @@ -719,6 +725,7 @@ /* Zen4 */ #define MSR_ZEN4_BP_CFG 0xc001102e +#define MSR_ZEN4_BP_CFG_BP_SPEC_REDUCE_BIT 4 #define MSR_ZEN4_BP_CFG_SHARED_BTB_FIX_BIT 5 /* Fam 19h MSRs */ diff --git a/tools/arch/x86/include/uapi/asm/kvm.h b/tools/arch/x86/include/uapi/asm/kvm.h index 88585c1de416..460306b35a4b 100644 --- a/tools/arch/x86/include/uapi/asm/kvm.h +++ b/tools/arch/x86/include/uapi/asm/kvm.h @@ -559,6 +559,9 @@ struct kvm_x86_mce { #define KVM_XEN_HVM_CONFIG_PVCLOCK_TSC_UNSTABLE (1 << 7) #define KVM_XEN_HVM_CONFIG_SHARED_INFO_HVA (1 << 8) +#define KVM_XEN_MSR_MIN_INDEX 0x40000000u +#define KVM_XEN_MSR_MAX_INDEX 0x4fffffffu + struct kvm_xen_hvm_config { __u32 flags; __u32 msr; @@ -925,5 +928,6 @@ struct kvm_hyperv_eventfd { #define KVM_X86_SEV_VM 2 #define KVM_X86_SEV_ES_VM 3 #define KVM_X86_SNP_VM 4 +#define KVM_X86_TDX_VM 5 #endif /* _ASM_X86_KVM_H */ diff --git a/tools/arch/x86/include/uapi/asm/svm.h b/tools/arch/x86/include/uapi/asm/svm.h index 1814b413fd57..ec1321248dac 100644 --- a/tools/arch/x86/include/uapi/asm/svm.h +++ b/tools/arch/x86/include/uapi/asm/svm.h @@ -95,6 +95,7 @@ #define SVM_EXIT_CR14_WRITE_TRAP 0x09e #define SVM_EXIT_CR15_WRITE_TRAP 0x09f #define SVM_EXIT_INVPCID 0x0a2 +#define SVM_EXIT_IDLE_HLT 0x0a6 #define SVM_EXIT_NPF 0x400 #define SVM_EXIT_AVIC_INCOMPLETE_IPI 0x401 #define SVM_EXIT_AVIC_UNACCELERATED_ACCESS 0x402 @@ -224,6 +225,7 @@ { SVM_EXIT_CR4_WRITE_TRAP, "write_cr4_trap" }, \ { SVM_EXIT_CR8_WRITE_TRAP, "write_cr8_trap" }, \ { SVM_EXIT_INVPCID, "invpcid" }, \ + { SVM_EXIT_IDLE_HLT, "idle-halt" }, \ { SVM_EXIT_NPF, "npf" }, \ { SVM_EXIT_AVIC_INCOMPLETE_IPI, "avic_incomplete_ipi" }, \ { SVM_EXIT_AVIC_UNACCELERATED_ACCESS, "avic_unaccelerated_access" }, \ diff --git a/tools/arch/x86/kcpuid/cpuid.csv b/tools/arch/x86/kcpuid/cpuid.csv index d751eb8585d0..8d925ce9750f 100644 --- a/tools/arch/x86/kcpuid/cpuid.csv +++ b/tools/arch/x86/kcpuid/cpuid.csv @@ -1,5 +1,5 @@ # SPDX-License-Identifier: CC0-1.0 -# Generator: x86-cpuid-db v1.0 +# Generator: x86-cpuid-db v2.4 # # Auto-generated file. @@ -12,297 +12,298 @@ # Leaf 0H # Maximum standard leaf number + CPU vendor string - 0, 0, eax, 31:0, max_std_leaf , Highest cpuid standard leaf supported - 0, 0, ebx, 31:0, cpu_vendorid_0 , CPU vendor ID string bytes 0 - 3 - 0, 0, ecx, 31:0, cpu_vendorid_2 , CPU vendor ID string bytes 8 - 11 - 0, 0, edx, 31:0, cpu_vendorid_1 , CPU vendor ID string bytes 4 - 7 + 0x0, 0, eax, 31:0, max_std_leaf , Highest standard CPUID leaf supported + 0x0, 0, ebx, 31:0, cpu_vendorid_0 , CPU vendor ID string bytes 0 - 3 + 0x0, 0, ecx, 31:0, cpu_vendorid_2 , CPU vendor ID string bytes 8 - 11 + 0x0, 0, edx, 31:0, cpu_vendorid_1 , CPU vendor ID string bytes 4 - 7 # Leaf 1H # CPU FMS (Family/Model/Stepping) + standard feature flags - 1, 0, eax, 3:0, stepping , Stepping ID - 1, 0, eax, 7:4, base_model , Base CPU model ID - 1, 0, eax, 11:8, base_family_id , Base CPU family ID - 1, 0, eax, 13:12, cpu_type , CPU type - 1, 0, eax, 19:16, ext_model , Extended CPU model ID - 1, 0, eax, 27:20, ext_family , Extended CPU family ID - 1, 0, ebx, 7:0, brand_id , Brand index - 1, 0, ebx, 15:8, clflush_size , CLFLUSH instruction cache line size - 1, 0, ebx, 23:16, n_logical_cpu , Logical CPU (HW threads) count - 1, 0, ebx, 31:24, local_apic_id , Initial local APIC physical ID - 1, 0, ecx, 0, pni , Streaming SIMD Extensions 3 (SSE3) - 1, 0, ecx, 1, pclmulqdq , PCLMULQDQ instruction support - 1, 0, ecx, 2, dtes64 , 64-bit DS save area - 1, 0, ecx, 3, monitor , MONITOR/MWAIT support - 1, 0, ecx, 4, ds_cpl , CPL Qualified Debug Store - 1, 0, ecx, 5, vmx , Virtual Machine Extensions - 1, 0, ecx, 6, smx , Safer Mode Extensions - 1, 0, ecx, 7, est , Enhanced Intel SpeedStep - 1, 0, ecx, 8, tm2 , Thermal Monitor 2 - 1, 0, ecx, 9, ssse3 , Supplemental SSE3 - 1, 0, ecx, 10, cid , L1 Context ID - 1, 0, ecx, 11, sdbg , Sillicon Debug - 1, 0, ecx, 12, fma , FMA extensions using YMM state - 1, 0, ecx, 13, cx16 , CMPXCHG16B instruction support - 1, 0, ecx, 14, xtpr , xTPR Update Control - 1, 0, ecx, 15, pdcm , Perfmon and Debug Capability - 1, 0, ecx, 17, pcid , Process-context identifiers - 1, 0, ecx, 18, dca , Direct Cache Access - 1, 0, ecx, 19, sse4_1 , SSE4.1 - 1, 0, ecx, 20, sse4_2 , SSE4.2 - 1, 0, ecx, 21, x2apic , X2APIC support - 1, 0, ecx, 22, movbe , MOVBE instruction support - 1, 0, ecx, 23, popcnt , POPCNT instruction support - 1, 0, ecx, 24, tsc_deadline_timer , APIC timer one-shot operation - 1, 0, ecx, 25, aes , AES instructions - 1, 0, ecx, 26, xsave , XSAVE (and related instructions) support - 1, 0, ecx, 27, osxsave , XSAVE (and related instructions) are enabled by OS - 1, 0, ecx, 28, avx , AVX instructions support - 1, 0, ecx, 29, f16c , Half-precision floating-point conversion support - 1, 0, ecx, 30, rdrand , RDRAND instruction support - 1, 0, ecx, 31, guest_status , System is running as guest; (para-)virtualized system - 1, 0, edx, 0, fpu , Floating-Point Unit on-chip (x87) - 1, 0, edx, 1, vme , Virtual-8086 Mode Extensions - 1, 0, edx, 2, de , Debugging Extensions - 1, 0, edx, 3, pse , Page Size Extension - 1, 0, edx, 4, tsc , Time Stamp Counter - 1, 0, edx, 5, msr , Model-Specific Registers (RDMSR and WRMSR support) - 1, 0, edx, 6, pae , Physical Address Extensions - 1, 0, edx, 7, mce , Machine Check Exception - 1, 0, edx, 8, cx8 , CMPXCHG8B instruction - 1, 0, edx, 9, apic , APIC on-chip - 1, 0, edx, 11, sep , SYSENTER, SYSEXIT, and associated MSRs - 1, 0, edx, 12, mtrr , Memory Type Range Registers - 1, 0, edx, 13, pge , Page Global Extensions - 1, 0, edx, 14, mca , Machine Check Architecture - 1, 0, edx, 15, cmov , Conditional Move Instruction - 1, 0, edx, 16, pat , Page Attribute Table - 1, 0, edx, 17, pse36 , Page Size Extension (36-bit) - 1, 0, edx, 18, pn , Processor Serial Number - 1, 0, edx, 19, clflush , CLFLUSH instruction - 1, 0, edx, 21, dts , Debug Store - 1, 0, edx, 22, acpi , Thermal monitor and clock control - 1, 0, edx, 23, mmx , MMX instructions - 1, 0, edx, 24, fxsr , FXSAVE and FXRSTOR instructions - 1, 0, edx, 25, sse , SSE instructions - 1, 0, edx, 26, sse2 , SSE2 instructions - 1, 0, edx, 27, ss , Self Snoop - 1, 0, edx, 28, ht , Hyper-threading - 1, 0, edx, 29, tm , Thermal Monitor - 1, 0, edx, 30, ia64 , Legacy IA-64 (Itanium) support bit, now resreved - 1, 0, edx, 31, pbe , Pending Break Enable + 0x1, 0, eax, 3:0, stepping , Stepping ID + 0x1, 0, eax, 7:4, base_model , Base CPU model ID + 0x1, 0, eax, 11:8, base_family_id , Base CPU family ID + 0x1, 0, eax, 13:12, cpu_type , CPU type + 0x1, 0, eax, 19:16, ext_model , Extended CPU model ID + 0x1, 0, eax, 27:20, ext_family , Extended CPU family ID + 0x1, 0, ebx, 7:0, brand_id , Brand index + 0x1, 0, ebx, 15:8, clflush_size , CLFLUSH instruction cache line size + 0x1, 0, ebx, 23:16, n_logical_cpu , Logical CPU count + 0x1, 0, ebx, 31:24, local_apic_id , Initial local APIC physical ID + 0x1, 0, ecx, 0, pni , Streaming SIMD Extensions 3 (SSE3) + 0x1, 0, ecx, 1, pclmulqdq , PCLMULQDQ instruction support + 0x1, 0, ecx, 2, dtes64 , 64-bit DS save area + 0x1, 0, ecx, 3, monitor , MONITOR/MWAIT support + 0x1, 0, ecx, 4, ds_cpl , CPL Qualified Debug Store + 0x1, 0, ecx, 5, vmx , Virtual Machine Extensions + 0x1, 0, ecx, 6, smx , Safer Mode Extensions + 0x1, 0, ecx, 7, est , Enhanced Intel SpeedStep + 0x1, 0, ecx, 8, tm2 , Thermal Monitor 2 + 0x1, 0, ecx, 9, ssse3 , Supplemental SSE3 + 0x1, 0, ecx, 10, cid , L1 Context ID + 0x1, 0, ecx, 11, sdbg , Silicon Debug + 0x1, 0, ecx, 12, fma , FMA extensions using YMM state + 0x1, 0, ecx, 13, cx16 , CMPXCHG16B instruction support + 0x1, 0, ecx, 14, xtpr , xTPR Update Control + 0x1, 0, ecx, 15, pdcm , Perfmon and Debug Capability + 0x1, 0, ecx, 17, pcid , Process-context identifiers + 0x1, 0, ecx, 18, dca , Direct Cache Access + 0x1, 0, ecx, 19, sse4_1 , SSE4.1 + 0x1, 0, ecx, 20, sse4_2 , SSE4.2 + 0x1, 0, ecx, 21, x2apic , X2APIC support + 0x1, 0, ecx, 22, movbe , MOVBE instruction support + 0x1, 0, ecx, 23, popcnt , POPCNT instruction support + 0x1, 0, ecx, 24, tsc_deadline_timer , APIC timer one-shot operation + 0x1, 0, ecx, 25, aes , AES instructions + 0x1, 0, ecx, 26, xsave , XSAVE (and related instructions) support + 0x1, 0, ecx, 27, osxsave , XSAVE (and related instructions) are enabled by OS + 0x1, 0, ecx, 28, avx , AVX instructions support + 0x1, 0, ecx, 29, f16c , Half-precision floating-point conversion support + 0x1, 0, ecx, 30, rdrand , RDRAND instruction support + 0x1, 0, ecx, 31, guest_status , System is running as guest; (para-)virtualized system + 0x1, 0, edx, 0, fpu , Floating-Point Unit on-chip (x87) + 0x1, 0, edx, 1, vme , Virtual-8086 Mode Extensions + 0x1, 0, edx, 2, de , Debugging Extensions + 0x1, 0, edx, 3, pse , Page Size Extension + 0x1, 0, edx, 4, tsc , Time Stamp Counter + 0x1, 0, edx, 5, msr , Model-Specific Registers (RDMSR and WRMSR support) + 0x1, 0, edx, 6, pae , Physical Address Extensions + 0x1, 0, edx, 7, mce , Machine Check Exception + 0x1, 0, edx, 8, cx8 , CMPXCHG8B instruction + 0x1, 0, edx, 9, apic , APIC on-chip + 0x1, 0, edx, 11, sep , SYSENTER, SYSEXIT, and associated MSRs + 0x1, 0, edx, 12, mtrr , Memory Type Range Registers + 0x1, 0, edx, 13, pge , Page Global Extensions + 0x1, 0, edx, 14, mca , Machine Check Architecture + 0x1, 0, edx, 15, cmov , Conditional Move Instruction + 0x1, 0, edx, 16, pat , Page Attribute Table + 0x1, 0, edx, 17, pse36 , Page Size Extension (36-bit) + 0x1, 0, edx, 18, pn , Processor Serial Number + 0x1, 0, edx, 19, clflush , CLFLUSH instruction + 0x1, 0, edx, 21, dts , Debug Store + 0x1, 0, edx, 22, acpi , Thermal monitor and clock control + 0x1, 0, edx, 23, mmx , MMX instructions + 0x1, 0, edx, 24, fxsr , FXSAVE and FXRSTOR instructions + 0x1, 0, edx, 25, sse , SSE instructions + 0x1, 0, edx, 26, sse2 , SSE2 instructions + 0x1, 0, edx, 27, ss , Self Snoop + 0x1, 0, edx, 28, ht , Hyper-threading + 0x1, 0, edx, 29, tm , Thermal Monitor + 0x1, 0, edx, 30, ia64 , Legacy IA-64 (Itanium) support bit, now reserved + 0x1, 0, edx, 31, pbe , Pending Break Enable # Leaf 2H # Intel cache and TLB information one-byte descriptors - 2, 0, eax, 7:0, iteration_count , Number of times this CPUD leaf must be queried - 2, 0, eax, 15:8, desc1 , Descriptor #1 - 2, 0, eax, 23:16, desc2 , Descriptor #2 - 2, 0, eax, 30:24, desc3 , Descriptor #3 - 2, 0, eax, 31, eax_invalid , Descriptors 1-3 are invalid if set - 2, 0, ebx, 7:0, desc4 , Descriptor #4 - 2, 0, ebx, 15:8, desc5 , Descriptor #5 - 2, 0, ebx, 23:16, desc6 , Descriptor #6 - 2, 0, ebx, 30:24, desc7 , Descriptor #7 - 2, 0, ebx, 31, ebx_invalid , Descriptors 4-7 are invalid if set - 2, 0, ecx, 7:0, desc8 , Descriptor #8 - 2, 0, ecx, 15:8, desc9 , Descriptor #9 - 2, 0, ecx, 23:16, desc10 , Descriptor #10 - 2, 0, ecx, 30:24, desc11 , Descriptor #11 - 2, 0, ecx, 31, ecx_invalid , Descriptors 8-11 are invalid if set - 2, 0, edx, 7:0, desc12 , Descriptor #12 - 2, 0, edx, 15:8, desc13 , Descriptor #13 - 2, 0, edx, 23:16, desc14 , Descriptor #14 - 2, 0, edx, 30:24, desc15 , Descriptor #15 - 2, 0, edx, 31, edx_invalid , Descriptors 12-15 are invalid if set + 0x2, 0, eax, 7:0, iteration_count , Number of times this leaf must be queried + 0x2, 0, eax, 15:8, desc1 , Descriptor #1 + 0x2, 0, eax, 23:16, desc2 , Descriptor #2 + 0x2, 0, eax, 30:24, desc3 , Descriptor #3 + 0x2, 0, eax, 31, eax_invalid , Descriptors 1-3 are invalid if set + 0x2, 0, ebx, 7:0, desc4 , Descriptor #4 + 0x2, 0, ebx, 15:8, desc5 , Descriptor #5 + 0x2, 0, ebx, 23:16, desc6 , Descriptor #6 + 0x2, 0, ebx, 30:24, desc7 , Descriptor #7 + 0x2, 0, ebx, 31, ebx_invalid , Descriptors 4-7 are invalid if set + 0x2, 0, ecx, 7:0, desc8 , Descriptor #8 + 0x2, 0, ecx, 15:8, desc9 , Descriptor #9 + 0x2, 0, ecx, 23:16, desc10 , Descriptor #10 + 0x2, 0, ecx, 30:24, desc11 , Descriptor #11 + 0x2, 0, ecx, 31, ecx_invalid , Descriptors 8-11 are invalid if set + 0x2, 0, edx, 7:0, desc12 , Descriptor #12 + 0x2, 0, edx, 15:8, desc13 , Descriptor #13 + 0x2, 0, edx, 23:16, desc14 , Descriptor #14 + 0x2, 0, edx, 30:24, desc15 , Descriptor #15 + 0x2, 0, edx, 31, edx_invalid , Descriptors 12-15 are invalid if set # Leaf 4H # Intel deterministic cache parameters - 4, 31:0, eax, 4:0, cache_type , Cache type field - 4, 31:0, eax, 7:5, cache_level , Cache level (1-based) - 4, 31:0, eax, 8, cache_self_init , Self-initialializing cache level - 4, 31:0, eax, 9, fully_associative , Fully-associative cache - 4, 31:0, eax, 25:14, num_threads_sharing , Number logical CPUs sharing this cache - 4, 31:0, eax, 31:26, num_cores_on_die , Number of cores in the physical package - 4, 31:0, ebx, 11:0, cache_linesize , System coherency line size (0-based) - 4, 31:0, ebx, 21:12, cache_npartitions , Physical line partitions (0-based) - 4, 31:0, ebx, 31:22, cache_nways , Ways of associativity (0-based) - 4, 31:0, ecx, 30:0, cache_nsets , Cache number of sets (0-based) - 4, 31:0, edx, 0, wbinvd_rll_no_guarantee, WBINVD/INVD not guaranteed for Remote Lower-Level caches - 4, 31:0, edx, 1, ll_inclusive , Cache is inclusive of Lower-Level caches - 4, 31:0, edx, 2, complex_indexing , Not a direct-mapped cache (complex function) + 0x4, 31:0, eax, 4:0, cache_type , Cache type field + 0x4, 31:0, eax, 7:5, cache_level , Cache level (1-based) + 0x4, 31:0, eax, 8, cache_self_init , Self-initializing cache level + 0x4, 31:0, eax, 9, fully_associative , Fully-associative cache + 0x4, 31:0, eax, 25:14, num_threads_sharing , Number logical CPUs sharing this cache + 0x4, 31:0, eax, 31:26, num_cores_on_die , Number of cores in the physical package + 0x4, 31:0, ebx, 11:0, cache_linesize , System coherency line size (0-based) + 0x4, 31:0, ebx, 21:12, cache_npartitions , Physical line partitions (0-based) + 0x4, 31:0, ebx, 31:22, cache_nways , Ways of associativity (0-based) + 0x4, 31:0, ecx, 30:0, cache_nsets , Cache number of sets (0-based) + 0x4, 31:0, edx, 0, wbinvd_rll_no_guarantee, WBINVD/INVD not guaranteed for Remote Lower-Level caches + 0x4, 31:0, edx, 1, ll_inclusive , Cache is inclusive of Lower-Level caches + 0x4, 31:0, edx, 2, complex_indexing , Not a direct-mapped cache (complex function) # Leaf 5H # MONITOR/MWAIT instructions enumeration - 5, 0, eax, 15:0, min_mon_size , Smallest monitor-line size, in bytes - 5, 0, ebx, 15:0, max_mon_size , Largest monitor-line size, in bytes - 5, 0, ecx, 0, mwait_ext , Enumeration of MONITOR/MWAIT extensions is supported - 5, 0, ecx, 1, mwait_irq_break , Interrupts as a break-event for MWAIT is supported - 5, 0, edx, 3:0, n_c0_substates , Number of C0 sub C-states supported using MWAIT - 5, 0, edx, 7:4, n_c1_substates , Number of C1 sub C-states supported using MWAIT - 5, 0, edx, 11:8, n_c2_substates , Number of C2 sub C-states supported using MWAIT - 5, 0, edx, 15:12, n_c3_substates , Number of C3 sub C-states supported using MWAIT - 5, 0, edx, 19:16, n_c4_substates , Number of C4 sub C-states supported using MWAIT - 5, 0, edx, 23:20, n_c5_substates , Number of C5 sub C-states supported using MWAIT - 5, 0, edx, 27:24, n_c6_substates , Number of C6 sub C-states supported using MWAIT - 5, 0, edx, 31:28, n_c7_substates , Number of C7 sub C-states supported using MWAIT + 0x5, 0, eax, 15:0, min_mon_size , Smallest monitor-line size, in bytes + 0x5, 0, ebx, 15:0, max_mon_size , Largest monitor-line size, in bytes + 0x5, 0, ecx, 0, mwait_ext , Enumeration of MONITOR/MWAIT extensions is supported + 0x5, 0, ecx, 1, mwait_irq_break , Interrupts as a break-event for MWAIT is supported + 0x5, 0, edx, 3:0, n_c0_substates , Number of C0 sub C-states supported using MWAIT + 0x5, 0, edx, 7:4, n_c1_substates , Number of C1 sub C-states supported using MWAIT + 0x5, 0, edx, 11:8, n_c2_substates , Number of C2 sub C-states supported using MWAIT + 0x5, 0, edx, 15:12, n_c3_substates , Number of C3 sub C-states supported using MWAIT + 0x5, 0, edx, 19:16, n_c4_substates , Number of C4 sub C-states supported using MWAIT + 0x5, 0, edx, 23:20, n_c5_substates , Number of C5 sub C-states supported using MWAIT + 0x5, 0, edx, 27:24, n_c6_substates , Number of C6 sub C-states supported using MWAIT + 0x5, 0, edx, 31:28, n_c7_substates , Number of C7 sub C-states supported using MWAIT # Leaf 6H # Thermal and Power Management enumeration - 6, 0, eax, 0, dtherm , Digital temprature sensor - 6, 0, eax, 1, turbo_boost , Intel Turbo Boost - 6, 0, eax, 2, arat , Always-Running APIC Timer (not affected by p-state) - 6, 0, eax, 4, pln , Power Limit Notification (PLN) event - 6, 0, eax, 5, ecmd , Clock modulation duty cycle extension - 6, 0, eax, 6, pts , Package thermal management - 6, 0, eax, 7, hwp , HWP (Hardware P-states) base registers are supported - 6, 0, eax, 8, hwp_notify , HWP notification (IA32_HWP_INTERRUPT MSR) - 6, 0, eax, 9, hwp_act_window , HWP activity window (IA32_HWP_REQUEST[bits 41:32]) supported - 6, 0, eax, 10, hwp_epp , HWP Energy Performance Preference - 6, 0, eax, 11, hwp_pkg_req , HWP Package Level Request - 6, 0, eax, 13, hdc_base_regs , HDC base registers are supported - 6, 0, eax, 14, turbo_boost_3_0 , Intel Turbo Boost Max 3.0 - 6, 0, eax, 15, hwp_capabilities , HWP Highest Performance change - 6, 0, eax, 16, hwp_peci_override , HWP PECI override - 6, 0, eax, 17, hwp_flexible , Flexible HWP - 6, 0, eax, 18, hwp_fast , IA32_HWP_REQUEST MSR fast access mode - 6, 0, eax, 19, hfi , HW_FEEDBACK MSRs supported - 6, 0, eax, 20, hwp_ignore_idle , Ignoring idle logical CPU HWP req is supported - 6, 0, eax, 23, thread_director , Intel thread director support - 6, 0, eax, 24, therm_interrupt_bit25 , IA32_THERM_INTERRUPT MSR bit 25 is supported - 6, 0, ebx, 3:0, n_therm_thresholds , Digital thermometer thresholds - 6, 0, ecx, 0, aperfmperf , MPERF/APERF MSRs (effective frequency interface) - 6, 0, ecx, 3, epb , IA32_ENERGY_PERF_BIAS MSR support - 6, 0, ecx, 15:8, thrd_director_nclasses , Number of classes, Intel thread director - 6, 0, edx, 0, perfcap_reporting , Performance capability reporting - 6, 0, edx, 1, encap_reporting , Energy efficiency capability reporting - 6, 0, edx, 11:8, feedback_sz , HW feedback interface struct size, in 4K pages - 6, 0, edx, 31:16, this_lcpu_hwfdbk_idx , This logical CPU index @ HW feedback struct, 0-based + 0x6, 0, eax, 0, dtherm , Digital temperature sensor + 0x6, 0, eax, 1, turbo_boost , Intel Turbo Boost + 0x6, 0, eax, 2, arat , Always-Running APIC Timer (not affected by p-state) + 0x6, 0, eax, 4, pln , Power Limit Notification (PLN) event + 0x6, 0, eax, 5, ecmd , Clock modulation duty cycle extension + 0x6, 0, eax, 6, pts , Package thermal management + 0x6, 0, eax, 7, hwp , HWP (Hardware P-states) base registers are supported + 0x6, 0, eax, 8, hwp_notify , HWP notification (IA32_HWP_INTERRUPT MSR) + 0x6, 0, eax, 9, hwp_act_window , HWP activity window (IA32_HWP_REQUEST[bits 41:32]) supported + 0x6, 0, eax, 10, hwp_epp , HWP Energy Performance Preference + 0x6, 0, eax, 11, hwp_pkg_req , HWP Package Level Request + 0x6, 0, eax, 13, hdc_base_regs , HDC base registers are supported + 0x6, 0, eax, 14, turbo_boost_3_0 , Intel Turbo Boost Max 3.0 + 0x6, 0, eax, 15, hwp_capabilities , HWP Highest Performance change + 0x6, 0, eax, 16, hwp_peci_override , HWP PECI override + 0x6, 0, eax, 17, hwp_flexible , Flexible HWP + 0x6, 0, eax, 18, hwp_fast , IA32_HWP_REQUEST MSR fast access mode + 0x6, 0, eax, 19, hfi , HW_FEEDBACK MSRs supported + 0x6, 0, eax, 20, hwp_ignore_idle , Ignoring idle logical CPU HWP req is supported + 0x6, 0, eax, 23, thread_director , Intel thread director support + 0x6, 0, eax, 24, therm_interrupt_bit25 , IA32_THERM_INTERRUPT MSR bit 25 is supported + 0x6, 0, ebx, 3:0, n_therm_thresholds , Digital thermometer thresholds + 0x6, 0, ecx, 0, aperfmperf , MPERF/APERF MSRs (effective frequency interface) + 0x6, 0, ecx, 3, epb , IA32_ENERGY_PERF_BIAS MSR support + 0x6, 0, ecx, 15:8, thrd_director_nclasses , Number of classes, Intel thread director + 0x6, 0, edx, 0, perfcap_reporting , Performance capability reporting + 0x6, 0, edx, 1, encap_reporting , Energy efficiency capability reporting + 0x6, 0, edx, 11:8, feedback_sz , Feedback interface structure size, in 4K pages + 0x6, 0, edx, 31:16, this_lcpu_hwfdbk_idx , This logical CPU hardware feedback interface index # Leaf 7H # Extended CPU features enumeration - 7, 0, eax, 31:0, leaf7_n_subleaves , Number of cpuid 0x7 subleaves - 7, 0, ebx, 0, fsgsbase , FSBASE/GSBASE read/write support - 7, 0, ebx, 1, tsc_adjust , IA32_TSC_ADJUST MSR supported - 7, 0, ebx, 2, sgx , Intel SGX (Software Guard Extensions) - 7, 0, ebx, 3, bmi1 , Bit manipulation extensions group 1 - 7, 0, ebx, 4, hle , Hardware Lock Elision - 7, 0, ebx, 5, avx2 , AVX2 instruction set - 7, 0, ebx, 6, fdp_excptn_only , FPU Data Pointer updated only on x87 exceptions - 7, 0, ebx, 7, smep , Supervisor Mode Execution Protection - 7, 0, ebx, 8, bmi2 , Bit manipulation extensions group 2 - 7, 0, ebx, 9, erms , Enhanced REP MOVSB/STOSB - 7, 0, ebx, 10, invpcid , INVPCID instruction (Invalidate Processor Context ID) - 7, 0, ebx, 11, rtm , Intel restricted transactional memory - 7, 0, ebx, 12, cqm , Intel RDT-CMT / AMD Platform-QoS cache monitoring - 7, 0, ebx, 13, zero_fcs_fds , Deprecated FPU CS/DS (stored as zero) - 7, 0, ebx, 14, mpx , Intel memory protection extensions - 7, 0, ebx, 15, rdt_a , Intel RDT / AMD Platform-QoS Enforcemeent - 7, 0, ebx, 16, avx512f , AVX-512 foundation instructions - 7, 0, ebx, 17, avx512dq , AVX-512 double/quadword instructions - 7, 0, ebx, 18, rdseed , RDSEED instruction - 7, 0, ebx, 19, adx , ADCX/ADOX instructions - 7, 0, ebx, 20, smap , Supervisor mode access prevention - 7, 0, ebx, 21, avx512ifma , AVX-512 integer fused multiply add - 7, 0, ebx, 23, clflushopt , CLFLUSHOPT instruction - 7, 0, ebx, 24, clwb , CLWB instruction - 7, 0, ebx, 25, intel_pt , Intel processor trace - 7, 0, ebx, 26, avx512pf , AVX-512 prefetch instructions - 7, 0, ebx, 27, avx512er , AVX-512 exponent/reciprocal instrs - 7, 0, ebx, 28, avx512cd , AVX-512 conflict detection instrs - 7, 0, ebx, 29, sha_ni , SHA/SHA256 instructions - 7, 0, ebx, 30, avx512bw , AVX-512 BW (byte/word granular) instructions - 7, 0, ebx, 31, avx512vl , AVX-512 VL (128/256 vector length) extensions - 7, 0, ecx, 0, prefetchwt1 , PREFETCHWT1 (Intel Xeon Phi only) - 7, 0, ecx, 1, avx512vbmi , AVX-512 Vector byte manipulation instrs - 7, 0, ecx, 2, umip , User mode instruction protection - 7, 0, ecx, 3, pku , Protection keys for user-space - 7, 0, ecx, 4, ospke , OS protection keys enable - 7, 0, ecx, 5, waitpkg , WAITPKG instructions - 7, 0, ecx, 6, avx512_vbmi2 , AVX-512 vector byte manipulation instrs group 2 - 7, 0, ecx, 7, cet_ss , CET shadow stack features - 7, 0, ecx, 8, gfni , Galois field new instructions - 7, 0, ecx, 9, vaes , Vector AES instrs - 7, 0, ecx, 10, vpclmulqdq , VPCLMULQDQ 256-bit instruction support - 7, 0, ecx, 11, avx512_vnni , Vector neural network instructions - 7, 0, ecx, 12, avx512_bitalg , AVX-512 bit count/shiffle - 7, 0, ecx, 13, tme , Intel total memory encryption - 7, 0, ecx, 14, avx512_vpopcntdq , AVX-512: POPCNT for vectors of DW/QW - 7, 0, ecx, 16, la57 , 57-bit linear addreses (five-level paging) - 7, 0, ecx, 21:17, mawau_val_lm , BNDLDX/BNDSTX MAWAU value in 64-bit mode - 7, 0, ecx, 22, rdpid , RDPID instruction - 7, 0, ecx, 23, key_locker , Intel key locker support - 7, 0, ecx, 24, bus_lock_detect , OS bus-lock detection - 7, 0, ecx, 25, cldemote , CLDEMOTE instruction - 7, 0, ecx, 27, movdiri , MOVDIRI instruction - 7, 0, ecx, 28, movdir64b , MOVDIR64B instruction - 7, 0, ecx, 29, enqcmd , Enqueue stores supported (ENQCMD{,S}) - 7, 0, ecx, 30, sgx_lc , Intel SGX launch configuration - 7, 0, ecx, 31, pks , Protection keys for supervisor-mode pages - 7, 0, edx, 1, sgx_keys , Intel SGX attestation services - 7, 0, edx, 2, avx512_4vnniw , AVX-512 neural network instructions - 7, 0, edx, 3, avx512_4fmaps , AVX-512 multiply accumulation single precision - 7, 0, edx, 4, fsrm , Fast short REP MOV - 7, 0, edx, 5, uintr , CPU supports user interrupts - 7, 0, edx, 8, avx512_vp2intersect , VP2INTERSECT{D,Q} instructions - 7, 0, edx, 9, srdbs_ctrl , SRBDS mitigation MSR available - 7, 0, edx, 10, md_clear , VERW MD_CLEAR microcode support - 7, 0, edx, 11, rtm_always_abort , XBEGIN (RTM transaction) always aborts - 7, 0, edx, 13, tsx_force_abort , MSR TSX_FORCE_ABORT, RTM_ABORT bit, supported - 7, 0, edx, 14, serialize , SERIALIZE instruction - 7, 0, edx, 15, hybrid_cpu , The CPU is identified as a 'hybrid part' - 7, 0, edx, 16, tsxldtrk , TSX suspend/resume load address tracking - 7, 0, edx, 18, pconfig , PCONFIG instruction - 7, 0, edx, 19, arch_lbr , Intel architectural LBRs - 7, 0, edx, 20, ibt , CET indirect branch tracking - 7, 0, edx, 22, amx_bf16 , AMX-BF16: tile bfloat16 support - 7, 0, edx, 23, avx512_fp16 , AVX-512 FP16 instructions - 7, 0, edx, 24, amx_tile , AMX-TILE: tile architecture support - 7, 0, edx, 25, amx_int8 , AMX-INT8: tile 8-bit integer support - 7, 0, edx, 26, spec_ctrl , Speculation Control (IBRS/IBPB: indirect branch restrictions) - 7, 0, edx, 27, intel_stibp , Single thread indirect branch predictors - 7, 0, edx, 28, flush_l1d , FLUSH L1D cache: IA32_FLUSH_CMD MSR - 7, 0, edx, 29, arch_capabilities , Intel IA32_ARCH_CAPABILITIES MSR - 7, 0, edx, 30, core_capabilities , IA32_CORE_CAPABILITIES MSR - 7, 0, edx, 31, spec_ctrl_ssbd , Speculative store bypass disable - 7, 1, eax, 4, avx_vnni , AVX-VNNI instructions - 7, 1, eax, 5, avx512_bf16 , AVX-512 bFloat16 instructions - 7, 1, eax, 6, lass , Linear address space separation - 7, 1, eax, 7, cmpccxadd , CMPccXADD instructions - 7, 1, eax, 8, arch_perfmon_ext , ArchPerfmonExt: CPUID leaf 0x23 is supported - 7, 1, eax, 10, fzrm , Fast zero-length REP MOVSB - 7, 1, eax, 11, fsrs , Fast short REP STOSB - 7, 1, eax, 12, fsrc , Fast Short REP CMPSB/SCASB - 7, 1, eax, 17, fred , FRED: Flexible return and event delivery transitions - 7, 1, eax, 18, lkgs , LKGS: Load 'kernel' (userspace) GS - 7, 1, eax, 19, wrmsrns , WRMSRNS instr (WRMSR-non-serializing) - 7, 1, eax, 21, amx_fp16 , AMX-FP16: FP16 tile operations - 7, 1, eax, 22, hreset , History reset support - 7, 1, eax, 23, avx_ifma , Integer fused multiply add - 7, 1, eax, 26, lam , Linear address masking - 7, 1, eax, 27, rd_wr_msrlist , RDMSRLIST/WRMSRLIST instructions - 7, 1, ebx, 0, intel_ppin , Protected processor inventory number (PPIN{,_CTL} MSRs) - 7, 1, edx, 4, avx_vnni_int8 , AVX-VNNI-INT8 instructions - 7, 1, edx, 5, avx_ne_convert , AVX-NE-CONVERT instructions - 7, 1, edx, 8, amx_complex , AMX-COMPLEX instructions (starting from Granite Rapids) - 7, 1, edx, 14, prefetchit_0_1 , PREFETCHIT0/1 instructions - 7, 1, edx, 18, cet_sss , CET supervisor shadow stacks safe to use - 7, 2, edx, 0, intel_psfd , Intel predictive store forward disable - 7, 2, edx, 1, ipred_ctrl , MSR bits IA32_SPEC_CTRL.IPRED_DIS_{U,S} - 7, 2, edx, 2, rrsba_ctrl , MSR bits IA32_SPEC_CTRL.RRSBA_DIS_{U,S} - 7, 2, edx, 3, ddp_ctrl , MSR bit IA32_SPEC_CTRL.DDPD_U - 7, 2, edx, 4, bhi_ctrl , MSR bit IA32_SPEC_CTRL.BHI_DIS_S - 7, 2, edx, 5, mcdt_no , MCDT mitigation not needed - 7, 2, edx, 6, uclock_disable , UC-lock disable is supported + 0x7, 0, eax, 31:0, leaf7_n_subleaves , Number of leaf 0x7 subleaves + 0x7, 0, ebx, 0, fsgsbase , FSBASE/GSBASE read/write support + 0x7, 0, ebx, 1, tsc_adjust , IA32_TSC_ADJUST MSR supported + 0x7, 0, ebx, 2, sgx , Intel SGX (Software Guard Extensions) + 0x7, 0, ebx, 3, bmi1 , Bit manipulation extensions group 1 + 0x7, 0, ebx, 4, hle , Hardware Lock Elision + 0x7, 0, ebx, 5, avx2 , AVX2 instruction set + 0x7, 0, ebx, 6, fdp_excptn_only , FPU Data Pointer updated only on x87 exceptions + 0x7, 0, ebx, 7, smep , Supervisor Mode Execution Protection + 0x7, 0, ebx, 8, bmi2 , Bit manipulation extensions group 2 + 0x7, 0, ebx, 9, erms , Enhanced REP MOVSB/STOSB + 0x7, 0, ebx, 10, invpcid , INVPCID instruction (Invalidate Processor Context ID) + 0x7, 0, ebx, 11, rtm , Intel restricted transactional memory + 0x7, 0, ebx, 12, cqm , Intel RDT-CMT / AMD Platform-QoS cache monitoring + 0x7, 0, ebx, 13, zero_fcs_fds , Deprecated FPU CS/DS (stored as zero) + 0x7, 0, ebx, 14, mpx , Intel memory protection extensions + 0x7, 0, ebx, 15, rdt_a , Intel RDT / AMD Platform-QoS Enforcement + 0x7, 0, ebx, 16, avx512f , AVX-512 foundation instructions + 0x7, 0, ebx, 17, avx512dq , AVX-512 double/quadword instructions + 0x7, 0, ebx, 18, rdseed , RDSEED instruction + 0x7, 0, ebx, 19, adx , ADCX/ADOX instructions + 0x7, 0, ebx, 20, smap , Supervisor mode access prevention + 0x7, 0, ebx, 21, avx512ifma , AVX-512 integer fused multiply add + 0x7, 0, ebx, 23, clflushopt , CLFLUSHOPT instruction + 0x7, 0, ebx, 24, clwb , CLWB instruction + 0x7, 0, ebx, 25, intel_pt , Intel processor trace + 0x7, 0, ebx, 26, avx512pf , AVX-512 prefetch instructions + 0x7, 0, ebx, 27, avx512er , AVX-512 exponent/reciprocal instructions + 0x7, 0, ebx, 28, avx512cd , AVX-512 conflict detection instructions + 0x7, 0, ebx, 29, sha_ni , SHA/SHA256 instructions + 0x7, 0, ebx, 30, avx512bw , AVX-512 byte/word instructions + 0x7, 0, ebx, 31, avx512vl , AVX-512 VL (128/256 vector length) extensions + 0x7, 0, ecx, 0, prefetchwt1 , PREFETCHWT1 (Intel Xeon Phi only) + 0x7, 0, ecx, 1, avx512vbmi , AVX-512 Vector byte manipulation instructions + 0x7, 0, ecx, 2, umip , User mode instruction protection + 0x7, 0, ecx, 3, pku , Protection keys for user-space + 0x7, 0, ecx, 4, ospke , OS protection keys enable + 0x7, 0, ecx, 5, waitpkg , WAITPKG instructions + 0x7, 0, ecx, 6, avx512_vbmi2 , AVX-512 vector byte manipulation instructions group 2 + 0x7, 0, ecx, 7, cet_ss , CET shadow stack features + 0x7, 0, ecx, 8, gfni , Galois field new instructions + 0x7, 0, ecx, 9, vaes , Vector AES instructions + 0x7, 0, ecx, 10, vpclmulqdq , VPCLMULQDQ 256-bit instruction support + 0x7, 0, ecx, 11, avx512_vnni , Vector neural network instructions + 0x7, 0, ecx, 12, avx512_bitalg , AVX-512 bitwise algorithms + 0x7, 0, ecx, 13, tme , Intel total memory encryption + 0x7, 0, ecx, 14, avx512_vpopcntdq , AVX-512: POPCNT for vectors of DWORD/QWORD + 0x7, 0, ecx, 16, la57 , 57-bit linear addresses (five-level paging) + 0x7, 0, ecx, 21:17, mawau_val_lm , BNDLDX/BNDSTX MAWAU value in 64-bit mode + 0x7, 0, ecx, 22, rdpid , RDPID instruction + 0x7, 0, ecx, 23, key_locker , Intel key locker support + 0x7, 0, ecx, 24, bus_lock_detect , OS bus-lock detection + 0x7, 0, ecx, 25, cldemote , CLDEMOTE instruction + 0x7, 0, ecx, 27, movdiri , MOVDIRI instruction + 0x7, 0, ecx, 28, movdir64b , MOVDIR64B instruction + 0x7, 0, ecx, 29, enqcmd , Enqueue stores supported (ENQCMD{,S}) + 0x7, 0, ecx, 30, sgx_lc , Intel SGX launch configuration + 0x7, 0, ecx, 31, pks , Protection keys for supervisor-mode pages + 0x7, 0, edx, 1, sgx_keys , Intel SGX attestation services + 0x7, 0, edx, 2, avx512_4vnniw , AVX-512 neural network instructions + 0x7, 0, edx, 3, avx512_4fmaps , AVX-512 multiply accumulation single precision + 0x7, 0, edx, 4, fsrm , Fast short REP MOV + 0x7, 0, edx, 5, uintr , CPU supports user interrupts + 0x7, 0, edx, 8, avx512_vp2intersect , VP2INTERSECT{D,Q} instructions + 0x7, 0, edx, 9, srdbs_ctrl , SRBDS mitigation MSR available + 0x7, 0, edx, 10, md_clear , VERW MD_CLEAR microcode support + 0x7, 0, edx, 11, rtm_always_abort , XBEGIN (RTM transaction) always aborts + 0x7, 0, edx, 13, tsx_force_abort , MSR TSX_FORCE_ABORT, RTM_ABORT bit, supported + 0x7, 0, edx, 14, serialize , SERIALIZE instruction + 0x7, 0, edx, 15, hybrid_cpu , The CPU is identified as a 'hybrid part' + 0x7, 0, edx, 16, tsxldtrk , TSX suspend/resume load address tracking + 0x7, 0, edx, 18, pconfig , PCONFIG instruction + 0x7, 0, edx, 19, arch_lbr , Intel architectural LBRs + 0x7, 0, edx, 20, ibt , CET indirect branch tracking + 0x7, 0, edx, 22, amx_bf16 , AMX-BF16: tile bfloat16 support + 0x7, 0, edx, 23, avx512_fp16 , AVX-512 FP16 instructions + 0x7, 0, edx, 24, amx_tile , AMX-TILE: tile architecture support + 0x7, 0, edx, 25, amx_int8 , AMX-INT8: tile 8-bit integer support + 0x7, 0, edx, 26, spec_ctrl , Speculation Control (IBRS/IBPB: indirect branch restrictions) + 0x7, 0, edx, 27, intel_stibp , Single thread indirect branch predictors + 0x7, 0, edx, 28, flush_l1d , FLUSH L1D cache: IA32_FLUSH_CMD MSR + 0x7, 0, edx, 29, arch_capabilities , Intel IA32_ARCH_CAPABILITIES MSR + 0x7, 0, edx, 30, core_capabilities , IA32_CORE_CAPABILITIES MSR + 0x7, 0, edx, 31, spec_ctrl_ssbd , Speculative store bypass disable + 0x7, 1, eax, 4, avx_vnni , AVX-VNNI instructions + 0x7, 1, eax, 5, avx512_bf16 , AVX-512 bfloat16 instructions + 0x7, 1, eax, 6, lass , Linear address space separation + 0x7, 1, eax, 7, cmpccxadd , CMPccXADD instructions + 0x7, 1, eax, 8, arch_perfmon_ext , ArchPerfmonExt: leaf 0x23 is supported + 0x7, 1, eax, 10, fzrm , Fast zero-length REP MOVSB + 0x7, 1, eax, 11, fsrs , Fast short REP STOSB + 0x7, 1, eax, 12, fsrc , Fast Short REP CMPSB/SCASB + 0x7, 1, eax, 17, fred , FRED: Flexible return and event delivery transitions + 0x7, 1, eax, 18, lkgs , LKGS: Load 'kernel' (userspace) GS + 0x7, 1, eax, 19, wrmsrns , WRMSRNS instruction (WRMSR-non-serializing) + 0x7, 1, eax, 20, nmi_src , NMI-source reporting with FRED event data + 0x7, 1, eax, 21, amx_fp16 , AMX-FP16: FP16 tile operations + 0x7, 1, eax, 22, hreset , History reset support + 0x7, 1, eax, 23, avx_ifma , Integer fused multiply add + 0x7, 1, eax, 26, lam , Linear address masking + 0x7, 1, eax, 27, rd_wr_msrlist , RDMSRLIST/WRMSRLIST instructions + 0x7, 1, ebx, 0, intel_ppin , Protected processor inventory number (PPIN{,_CTL} MSRs) + 0x7, 1, edx, 4, avx_vnni_int8 , AVX-VNNI-INT8 instructions + 0x7, 1, edx, 5, avx_ne_convert , AVX-NE-CONVERT instructions + 0x7, 1, edx, 8, amx_complex , AMX-COMPLEX instructions (starting from Granite Rapids) + 0x7, 1, edx, 14, prefetchit_0_1 , PREFETCHIT0/1 instructions + 0x7, 1, edx, 18, cet_sss , CET supervisor shadow stacks safe to use + 0x7, 2, edx, 0, intel_psfd , Intel predictive store forward disable + 0x7, 2, edx, 1, ipred_ctrl , MSR bits IA32_SPEC_CTRL.IPRED_DIS_{U,S} + 0x7, 2, edx, 2, rrsba_ctrl , MSR bits IA32_SPEC_CTRL.RRSBA_DIS_{U,S} + 0x7, 2, edx, 3, ddp_ctrl , MSR bit IA32_SPEC_CTRL.DDPD_U + 0x7, 2, edx, 4, bhi_ctrl , MSR bit IA32_SPEC_CTRL.BHI_DIS_S + 0x7, 2, edx, 5, mcdt_no , MCDT mitigation not needed + 0x7, 2, edx, 6, uclock_disable , UC-lock disable is supported # Leaf 9H # Intel DCA (Direct Cache Access) enumeration - 9, 0, eax, 0, dca_enabled_in_bios , DCA is enabled in BIOS + 0x9, 0, eax, 0, dca_enabled_in_bios , DCA is enabled in BIOS # Leaf AH # Intel PMU (Performance Monitoring Unit) enumeration @@ -310,7 +311,7 @@ 0xa, 0, eax, 7:0, pmu_version , Performance monitoring unit version ID 0xa, 0, eax, 15:8, pmu_n_gcounters , Number of general PMU counters per logical CPU 0xa, 0, eax, 23:16, pmu_gcounters_nbits , Bitwidth of PMU general counters - 0xa, 0, eax, 31:24, pmu_cpuid_ebx_bits , Length of cpuid leaf 0xa EBX bit vector + 0xa, 0, eax, 31:24, pmu_cpuid_ebx_bits , Length of leaf 0xa EBX bit vector 0xa, 0, ebx, 0, no_core_cycle_evt , Core cycle event not available 0xa, 0, ebx, 1, no_insn_retired_evt , Instruction retired event not available 0xa, 0, ebx, 2, no_refcycle_evt , Reference cycles event not available @@ -339,18 +340,18 @@ 0xd, 0, eax, 0, xcr0_x87 , XCR0.X87 (bit 0) supported 0xd, 0, eax, 1, xcr0_sse , XCR0.SEE (bit 1) supported 0xd, 0, eax, 2, xcr0_avx , XCR0.AVX (bit 2) supported - 0xd, 0, eax, 3, xcr0_mpx_bndregs , XCR0.BNDREGS (bit 3) supported (MPX BND0-BND3 regs) - 0xd, 0, eax, 4, xcr0_mpx_bndcsr , XCR0.BNDCSR (bit 4) supported (MPX BNDCFGU/BNDSTATUS regs) - 0xd, 0, eax, 5, xcr0_avx512_opmask , XCR0.OPMASK (bit 5) supported (AVX-512 k0-k7 regs) - 0xd, 0, eax, 6, xcr0_avx512_zmm_hi256 , XCR0.ZMM_Hi256 (bit 6) supported (AVX-512 ZMM0->ZMM7/15 regs) - 0xd, 0, eax, 7, xcr0_avx512_hi16_zmm , XCR0.HI16_ZMM (bit 7) supported (AVX-512 ZMM16->ZMM31 regs) - 0xd, 0, eax, 9, xcr0_pkru , XCR0.PKRU (bit 9) supported (XSAVE PKRU reg) - 0xd, 0, eax, 11, xcr0_cet_u , AMD XCR0.CET_U (bit 11) supported (CET supervisor state) - 0xd, 0, eax, 12, xcr0_cet_s , AMD XCR0.CET_S (bit 12) support (CET user state) + 0xd, 0, eax, 3, xcr0_mpx_bndregs , XCR0.BNDREGS (bit 3) supported (MPX BND0-BND3 registers) + 0xd, 0, eax, 4, xcr0_mpx_bndcsr , XCR0.BNDCSR (bit 4) supported (MPX BNDCFGU/BNDSTATUS registers) + 0xd, 0, eax, 5, xcr0_avx512_opmask , XCR0.OPMASK (bit 5) supported (AVX-512 k0-k7 registers) + 0xd, 0, eax, 6, xcr0_avx512_zmm_hi256 , XCR0.ZMM_Hi256 (bit 6) supported (AVX-512 ZMM0->ZMM7/15 registers) + 0xd, 0, eax, 7, xcr0_avx512_hi16_zmm , XCR0.HI16_ZMM (bit 7) supported (AVX-512 ZMM16->ZMM31 registers) + 0xd, 0, eax, 9, xcr0_pkru , XCR0.PKRU (bit 9) supported (XSAVE PKRU registers) + 0xd, 0, eax, 11, xcr0_cet_u , XCR0.CET_U (bit 11) supported (CET user state) + 0xd, 0, eax, 12, xcr0_cet_s , XCR0.CET_S (bit 12) supported (CET supervisor state) 0xd, 0, eax, 17, xcr0_tileconfig , XCR0.TILECONFIG (bit 17) supported (AMX can manage TILECONFIG) 0xd, 0, eax, 18, xcr0_tiledata , XCR0.TILEDATA (bit 18) supported (AMX can manage TILEDATA) - 0xd, 0, ebx, 31:0, xsave_sz_xcr0_enabled , XSAVE/XRSTR area byte size, for XCR0 enabled features - 0xd, 0, ecx, 31:0, xsave_sz_max , XSAVE/XRSTR area max byte size, all CPU features + 0xd, 0, ebx, 31:0, xsave_sz_xcr0_enabled , XSAVE/XRSTOR area byte size, for XCR0 enabled features + 0xd, 0, ecx, 31:0, xsave_sz_max , XSAVE/XRSTOR area max byte size, all CPU features 0xd, 0, edx, 30, xcr0_lwp , AMD XCR0.LWP (bit 62) supported (Light-weight Profiling) 0xd, 1, eax, 0, xsaveopt , XSAVEOPT instruction 0xd, 1, eax, 1, xsavec , XSAVEC instruction @@ -369,7 +370,7 @@ 0xd, 63:2, eax, 31:0, xsave_sz , Size of save area for subleaf-N feature, in bytes 0xd, 63:2, ebx, 31:0, xsave_offset , Offset of save area for subleaf-N feature, in bytes 0xd, 63:2, ecx, 0, is_xss_bit , Subleaf N describes an XSS bit, otherwise XCR0 bit - 0xd, 63:2, ecx, 1, compacted_xsave_64byte_aligned, When compacted, subleaf-N feature xsave area is 64-byte aligned + 0xd, 63:2, ecx, 1, compacted_xsave_64byte_aligned, When compacted, subleaf-N feature XSAVE area is 64-byte aligned # Leaf FH # Intel RDT / AMD PQoS resource monitoring @@ -426,17 +427,17 @@ 0x12, 1, ecx, 0, xfrm_x87 , Enclave XFRM.X87 (bit 0) supported 0x12, 1, ecx, 1, xfrm_sse , Enclave XFRM.SEE (bit 1) supported 0x12, 1, ecx, 2, xfrm_avx , Enclave XFRM.AVX (bit 2) supported - 0x12, 1, ecx, 3, xfrm_mpx_bndregs , Enclave XFRM.BNDREGS (bit 3) supported (MPX BND0-BND3 regs) - 0x12, 1, ecx, 4, xfrm_mpx_bndcsr , Enclave XFRM.BNDCSR (bit 4) supported (MPX BNDCFGU/BNDSTATUS regs) - 0x12, 1, ecx, 5, xfrm_avx512_opmask , Enclave XFRM.OPMASK (bit 5) supported (AVX-512 k0-k7 regs) - 0x12, 1, ecx, 6, xfrm_avx512_zmm_hi256 , Enclave XFRM.ZMM_Hi256 (bit 6) supported (AVX-512 ZMM0->ZMM7/15 regs) - 0x12, 1, ecx, 7, xfrm_avx512_hi16_zmm , Enclave XFRM.HI16_ZMM (bit 7) supported (AVX-512 ZMM16->ZMM31 regs) - 0x12, 1, ecx, 9, xfrm_pkru , Enclave XFRM.PKRU (bit 9) supported (XSAVE PKRU reg) + 0x12, 1, ecx, 3, xfrm_mpx_bndregs , Enclave XFRM.BNDREGS (bit 3) supported (MPX BND0-BND3 registers) + 0x12, 1, ecx, 4, xfrm_mpx_bndcsr , Enclave XFRM.BNDCSR (bit 4) supported (MPX BNDCFGU/BNDSTATUS registers) + 0x12, 1, ecx, 5, xfrm_avx512_opmask , Enclave XFRM.OPMASK (bit 5) supported (AVX-512 k0-k7 registers) + 0x12, 1, ecx, 6, xfrm_avx512_zmm_hi256 , Enclave XFRM.ZMM_Hi256 (bit 6) supported (AVX-512 ZMM0->ZMM7/15 registers) + 0x12, 1, ecx, 7, xfrm_avx512_hi16_zmm , Enclave XFRM.HI16_ZMM (bit 7) supported (AVX-512 ZMM16->ZMM31 registers) + 0x12, 1, ecx, 9, xfrm_pkru , Enclave XFRM.PKRU (bit 9) supported (XSAVE PKRU registers) 0x12, 1, ecx, 17, xfrm_tileconfig , Enclave XFRM.TILECONFIG (bit 17) supported (AMX can manage TILECONFIG) 0x12, 1, ecx, 18, xfrm_tiledata , Enclave XFRM.TILEDATA (bit 18) supported (AMX can manage TILEDATA) 0x12, 31:2, eax, 3:0, subleaf_type , Subleaf type (dictates output layout) - 0x12, 31:2, eax, 31:12, epc_sec_base_addr_0 , EPC section base addr, bits[12:31] - 0x12, 31:2, ebx, 19:0, epc_sec_base_addr_1 , EPC section base addr, bits[32:51] + 0x12, 31:2, eax, 31:12, epc_sec_base_addr_0 , EPC section base address, bits[12:31] + 0x12, 31:2, ebx, 19:0, epc_sec_base_addr_1 , EPC section base address, bits[32:51] 0x12, 31:2, ecx, 3:0, epc_sec_type , EPC section type / property encoding 0x12, 31:2, ecx, 31:12, epc_sec_size_0 , EPC section size, bits[12:31] 0x12, 31:2, edx, 19:0, epc_sec_size_1 , EPC section size, bits[32:51] @@ -444,7 +445,7 @@ # Leaf 14H # Intel Processor Trace enumeration - 0x14, 0, eax, 31:0, pt_max_subleaf , Max cpuid 0x14 subleaf + 0x14, 0, eax, 31:0, pt_max_subleaf , Maximum leaf 0x14 subleaf 0x14, 0, ebx, 0, cr3_filtering , IA32_RTIT_CR3_MATCH is accessible 0x14, 0, ebx, 1, psb_cyc , Configurable PSB and cycle-accurate mode 0x14, 0, ebx, 2, ip_filtering , IP/TraceStop filtering; Warm-reset PT MSRs preservation @@ -472,7 +473,7 @@ 0x15, 0, ecx, 31:0, cpu_crystal_hz , Core crystal clock nominal frequency, in Hz # Leaf 16H -# Intel processor fequency enumeration +# Intel processor frequency enumeration 0x16, 0, eax, 15:0, cpu_base_mhz , Processor base frequency, in MHz 0x16, 0, ebx, 15:0, cpu_max_mhz , Processor max frequency, in MHz @@ -481,9 +482,9 @@ # Leaf 17H # Intel SoC vendor attributes enumeration - 0x17, 0, eax, 31:0, soc_max_subleaf , Max cpuid leaf 0x17 subleaf + 0x17, 0, eax, 31:0, soc_max_subleaf , Maximum leaf 0x17 subleaf 0x17, 0, ebx, 15:0, soc_vendor_id , SoC vendor ID - 0x17, 0, ebx, 16, is_vendor_scheme , Assigned by industry enumaeratoion scheme (not Intel) + 0x17, 0, ebx, 16, is_vendor_scheme , Assigned by industry enumeration scheme (not Intel) 0x17, 0, ecx, 31:0, soc_proj_id , SoC project ID, assigned by vendor 0x17, 0, edx, 31:0, soc_stepping_id , Soc project stepping ID, assigned by vendor 0x17, 3:1, eax, 31:0, vendor_brand_a , Vendor Brand ID string, bytes subleaf_nr * (0 -> 3) @@ -494,18 +495,18 @@ # Leaf 18H # Intel determenestic address translation (TLB) parameters - 0x18, 31:0, eax, 31:0, tlb_max_subleaf , Max cpuid 0x18 subleaf + 0x18, 31:0, eax, 31:0, tlb_max_subleaf , Maximum leaf 0x18 subleaf 0x18, 31:0, ebx, 0, tlb_4k_page , TLB 4KB-page entries supported 0x18, 31:0, ebx, 1, tlb_2m_page , TLB 2MB-page entries supported 0x18, 31:0, ebx, 2, tlb_4m_page , TLB 4MB-page entries supported 0x18, 31:0, ebx, 3, tlb_1g_page , TLB 1GB-page entries supported - 0x18, 31:0, ebx, 10:8, hard_partitioning , (Hard/Soft) partitioning between logical CPUs sharing this struct + 0x18, 31:0, ebx, 10:8, hard_partitioning , (Hard/Soft) partitioning between logical CPUs sharing this structure 0x18, 31:0, ebx, 31:16, n_way_associative , Ways of associativity 0x18, 31:0, ecx, 31:0, n_sets , Number of sets 0x18, 31:0, edx, 4:0, tlb_type , Translation cache type (TLB type) 0x18, 31:0, edx, 7:5, tlb_cache_level , Translation cache level (1-based) 0x18, 31:0, edx, 8, is_fully_associative , Fully-associative structure - 0x18, 31:0, edx, 25:14, tlb_max_addressible_ids, Max num of addressible IDs for logical CPUs sharing this TLB - 1 + 0x18, 31:0, edx, 25:14, tlb_max_addressible_ids, Max number of addressable IDs for logical CPUs sharing this TLB - 1 # Leaf 19H # Intel Key Locker enumeration @@ -568,7 +569,7 @@ # Intel AMX, TMUL (Tile-matrix MULtiply) accelerator unit enumeration 0x1e, 0, ebx, 7:0, tmul_maxk , TMUL unit maximum height, K (rows or columns) - 0x1e, 0, ebx, 23:8, tmul_maxn , TMUL unit maxiumum SIMD dimension, N (column bytes) + 0x1e, 0, ebx, 23:8, tmul_maxn , TMUL unit maximum SIMD dimension, N (column bytes) # Leaf 1FH # Intel extended topology enumeration v2 @@ -623,9 +624,9 @@ 0x40000000, 0, edx, 31:0, hypervisor_id_2 , Hypervisor ID string bytes 8 - 11 # Leaf 80000000H -# Maximum extended leaf number + CPU vendor string (AMD) +# Maximum extended leaf number + AMD/Transmeta CPU vendor string -0x80000000, 0, eax, 31:0, max_ext_leaf , Maximum extended cpuid leaf supported +0x80000000, 0, eax, 31:0, max_ext_leaf , Maximum extended CPUID leaf supported 0x80000000, 0, ebx, 31:0, cpu_vendorid_0 , Vendor ID string bytes 0 - 3 0x80000000, 0, ecx, 31:0, cpu_vendorid_2 , Vendor ID string bytes 8 - 11 0x80000000, 0, edx, 31:0, cpu_vendorid_1 , Vendor ID string bytes 4 - 7 @@ -636,6 +637,7 @@ 0x80000001, 0, eax, 3:0, e_stepping_id , Stepping ID 0x80000001, 0, eax, 7:4, e_base_model , Base processor model 0x80000001, 0, eax, 11:8, e_base_family , Base processor family +0x80000001, 0, eax, 13:12, e_base_type , Base processor type (Transmeta) 0x80000001, 0, eax, 19:16, e_ext_model , Extended processor model 0x80000001, 0, eax, 27:20, e_ext_family , Extended processor family 0x80000001, 0, ebx, 15:0, brand_id , Brand ID @@ -659,7 +661,7 @@ 0x80000001, 0, ecx, 17, tce , Translation cache extension 0x80000001, 0, ecx, 19, nodeid_msr , NodeId MSR (0xc001100c) 0x80000001, 0, ecx, 21, tbm , Trailing bit manipulations -0x80000001, 0, ecx, 22, topoext , Topology Extensions (cpuid leaf 0x8000001d) +0x80000001, 0, ecx, 22, topoext , Topology Extensions (leaf 0x8000001d) 0x80000001, 0, ecx, 23, perfctr_core , Core performance counter extensions 0x80000001, 0, ecx, 24, perfctr_nb , NB/DF performance counter extensions 0x80000001, 0, ecx, 26, bpext , Data access breakpoint extension @@ -687,6 +689,7 @@ 0x80000001, 0, edx, 19, mp , Out-of-spec AMD Multiprocessing bit 0x80000001, 0, edx, 20, nx , No-execute page protection 0x80000001, 0, edx, 22, mmxext , AMD MMX extensions +0x80000001, 0, edx, 23, e_mmx , MMX instructions 0x80000001, 0, edx, 24, e_fxsr , FXSAVE and FXRSTOR instructions 0x80000001, 0, edx, 25, fxsr_opt , FXSAVE and FXRSTOR optimizations 0x80000001, 0, edx, 26, pdpe1gb , 1-GB large page support @@ -720,11 +723,11 @@ 0x80000004, 0, edx, 31:0, cpu_brandid_11 , CPU brand ID string, bytes 44 - 47 # Leaf 80000005H -# AMD L1 cache and L1 TLB enumeration +# AMD/Transmeta L1 cache and L1 TLB enumeration -0x80000005, 0, eax, 7:0, l1_itlb_2m_4m_nentries , L1 ITLB #entires, 2M and 4M pages +0x80000005, 0, eax, 7:0, l1_itlb_2m_4m_nentries , L1 ITLB #entries, 2M and 4M pages 0x80000005, 0, eax, 15:8, l1_itlb_2m_4m_assoc , L1 ITLB associativity, 2M and 4M pages -0x80000005, 0, eax, 23:16, l1_dtlb_2m_4m_nentries , L1 DTLB #entires, 2M and 4M pages +0x80000005, 0, eax, 23:16, l1_dtlb_2m_4m_nentries , L1 DTLB #entries, 2M and 4M pages 0x80000005, 0, eax, 31:24, l1_dtlb_2m_4m_assoc , L1 DTLB associativity, 2M and 4M pages 0x80000005, 0, ebx, 7:0, l1_itlb_4k_nentries , L1 ITLB #entries, 4K pages 0x80000005, 0, ebx, 15:8, l1_itlb_4k_assoc , L1 ITLB associativity, 4K pages @@ -763,11 +766,11 @@ # CPU power management (mostly AMD) and AMD RAS enumeration 0x80000007, 0, ebx, 0, overflow_recov , MCA overflow conditions not fatal -0x80000007, 0, ebx, 1, succor , Software containment of UnCORRectable errors +0x80000007, 0, ebx, 1, succor , Software containment of uncorrectable errors 0x80000007, 0, ebx, 2, hw_assert , Hardware assert MSRs 0x80000007, 0, ebx, 3, smca , Scalable MCA (MCAX MSRs) 0x80000007, 0, ecx, 31:0, cpu_pwr_sample_ratio , CPU power sample time ratio -0x80000007, 0, edx, 0, digital_temp , Digital temprature sensor +0x80000007, 0, edx, 0, digital_temp , Digital temperature sensor 0x80000007, 0, edx, 1, powernow_freq_id , PowerNOW! frequency scaling 0x80000007, 0, edx, 2, powernow_volt_id , PowerNOW! voltage scaling 0x80000007, 0, edx, 3, thermal_trip , THERMTRIP (Thermal Trip) @@ -810,7 +813,7 @@ 0x80000008, 0, ebx, 23, amd_ppin , Protected Processor Inventory Number 0x80000008, 0, ebx, 24, amd_ssbd , Speculative Store Bypass Disable 0x80000008, 0, ebx, 25, virt_ssbd , virtualized SSBD (Speculative Store Bypass Disable) -0x80000008, 0, ebx, 26, amd_ssb_no , SSBD not needed (fixed in HW) +0x80000008, 0, ebx, 26, amd_ssb_no , SSBD is not needed (fixed in hardware) 0x80000008, 0, ebx, 27, cppc , Collaborative Processor Performance Control 0x80000008, 0, ebx, 28, amd_psfd , Predictive Store Forward Disable 0x80000008, 0, ebx, 29, btc_no , CPU not affected by Branch Type Confusion @@ -838,7 +841,7 @@ 0x8000000a, 0, edx, 10, pausefilter , Pause intercept filter 0x8000000a, 0, edx, 12, pfthreshold , Pause filter threshold 0x8000000a, 0, edx, 13, avic , Advanced virtual interrupt controller -0x8000000a, 0, edx, 15, v_vmsave_vmload , Virtual VMSAVE/VMLOAD (nested virt) +0x8000000a, 0, edx, 15, v_vmsave_vmload , Virtual VMSAVE/VMLOAD (nested virtualization) 0x8000000a, 0, edx, 16, vgif , Virtualize the Global Interrupt Flag 0x8000000a, 0, edx, 17, gmet , Guest mode execution trap 0x8000000a, 0, edx, 18, x2avic , Virtual x2APIC @@ -850,7 +853,7 @@ 0x8000000a, 0, edx, 25, vnmi , NMI virtualization 0x8000000a, 0, edx, 26, ibs_virt , IBS Virtualization 0x8000000a, 0, edx, 27, ext_lvt_off_chg , Extended LVT offset fault change -0x8000000a, 0, edx, 28, svme_addr_chk , Guest SVME addr check +0x8000000a, 0, edx, 28, svme_addr_chk , Guest SVME address check # Leaf 80000019H # AMD TLB 1G-pages enumeration @@ -891,20 +894,20 @@ # AMD LWP (Lightweight Profiling) 0x8000001c, 0, eax, 0, os_lwp_avail , LWP is available to application programs (supported by OS) -0x8000001c, 0, eax, 1, os_lpwval , LWPVAL instruction (EventId=1) is supported by OS -0x8000001c, 0, eax, 2, os_lwp_ire , Instructions Retired Event (EventId=2) is supported by OS -0x8000001c, 0, eax, 3, os_lwp_bre , Branch Retired Event (EventId=3) is supported by OS -0x8000001c, 0, eax, 4, os_lwp_dme , DCache Miss Event (EventId=4) is supported by OS -0x8000001c, 0, eax, 5, os_lwp_cnh , CPU Clocks Not Halted event (EventId=5) is supported by OS -0x8000001c, 0, eax, 6, os_lwp_rnh , CPU Reference clocks Not Halted event (EventId=6) is supported by OS +0x8000001c, 0, eax, 1, os_lpwval , LWPVAL instruction is supported by OS +0x8000001c, 0, eax, 2, os_lwp_ire , Instructions Retired Event is supported by OS +0x8000001c, 0, eax, 3, os_lwp_bre , Branch Retired Event is supported by OS +0x8000001c, 0, eax, 4, os_lwp_dme , Dcache Miss Event is supported by OS +0x8000001c, 0, eax, 5, os_lwp_cnh , CPU Clocks Not Halted event is supported by OS +0x8000001c, 0, eax, 6, os_lwp_rnh , CPU Reference clocks Not Halted event is supported by OS 0x8000001c, 0, eax, 29, os_lwp_cont , LWP sampling in continuous mode is supported by OS 0x8000001c, 0, eax, 30, os_lwp_ptsc , Performance Time Stamp Counter in event records is supported by OS 0x8000001c, 0, eax, 31, os_lwp_int , Interrupt on threshold overflow is supported by OS 0x8000001c, 0, ebx, 7:0, lwp_lwpcb_sz , LWP Control Block size, in quadwords 0x8000001c, 0, ebx, 15:8, lwp_event_sz , LWP event record size, in bytes -0x8000001c, 0, ebx, 23:16, lwp_max_events , LWP max supported EventId value (EventID 255 not included) +0x8000001c, 0, ebx, 23:16, lwp_max_events , LWP max supported EventID value (EventID 255 not included) 0x8000001c, 0, ebx, 31:24, lwp_event_offset , LWP events area offset in the LWP Control Block -0x8000001c, 0, ecx, 4:0, lwp_latency_max , Num of bits in cache latency counters (10 to 31) +0x8000001c, 0, ecx, 4:0, lwp_latency_max , Number of bits in cache latency counters (10 to 31) 0x8000001c, 0, ecx, 5, lwp_data_adddr , Cache miss events report the data address of the reference 0x8000001c, 0, ecx, 8:6, lwp_latency_rnd , Amount by which cache latency is rounded 0x8000001c, 0, ecx, 15:9, lwp_version , LWP implementation version @@ -913,16 +916,16 @@ 0x8000001c, 0, ecx, 29, lwp_ip_filtering , IP filtering (IPI, IPF, BaseIP, and LimitIP @ LWPCP) supported 0x8000001c, 0, ecx, 30, lwp_cache_levels , Cache-related events can be filtered by cache level 0x8000001c, 0, ecx, 31, lwp_cache_latency , Cache-related events can be filtered by latency -0x8000001c, 0, edx, 0, hw_lwp_avail , LWP is available in Hardware -0x8000001c, 0, edx, 1, hw_lpwval , LWPVAL instruction (EventId=1) is available in HW -0x8000001c, 0, edx, 2, hw_lwp_ire , Instructions Retired Event (EventId=2) is available in HW -0x8000001c, 0, edx, 3, hw_lwp_bre , Branch Retired Event (EventId=3) is available in HW -0x8000001c, 0, edx, 4, hw_lwp_dme , DCache Miss Event (EventId=4) is available in HW -0x8000001c, 0, edx, 5, hw_lwp_cnh , CPU Clocks Not Halted event (EventId=5) is available in HW -0x8000001c, 0, edx, 6, hw_lwp_rnh , CPU Reference clocks Not Halted event (EventId=6) is available in HW -0x8000001c, 0, edx, 29, hw_lwp_cont , LWP sampling in continuous mode is available in HW -0x8000001c, 0, edx, 30, hw_lwp_ptsc , Performance Time Stamp Counter in event records is available in HW -0x8000001c, 0, edx, 31, hw_lwp_int , Interrupt on threshold overflow is available in HW +0x8000001c, 0, edx, 0, hw_lwp_avail , LWP is available in hardware +0x8000001c, 0, edx, 1, hw_lpwval , LWPVAL instruction is available in hardware +0x8000001c, 0, edx, 2, hw_lwp_ire , Instructions Retired Event is available in hardware +0x8000001c, 0, edx, 3, hw_lwp_bre , Branch Retired Event is available in hardware +0x8000001c, 0, edx, 4, hw_lwp_dme , Dcache Miss Event is available in hardware +0x8000001c, 0, edx, 5, hw_lwp_cnh , Clocks Not Halted event is available in hardware +0x8000001c, 0, edx, 6, hw_lwp_rnh , Reference clocks Not Halted event is available in hardware +0x8000001c, 0, edx, 29, hw_lwp_cont , LWP sampling in continuous mode is available in hardware +0x8000001c, 0, edx, 30, hw_lwp_ptsc , Performance Time Stamp Counter in event records is available in hardware +0x8000001c, 0, edx, 31, hw_lwp_int , Interrupt on threshold overflow is available in hardware # Leaf 8000001DH # AMD deterministic cache parameters @@ -958,10 +961,10 @@ 0x8000001f, 0, eax, 4, sev_nested_paging , SEV secure nested paging supported 0x8000001f, 0, eax, 5, vm_permission_levels , VMPL supported 0x8000001f, 0, eax, 6, rpmquery , RPMQUERY instruction supported -0x8000001f, 0, eax, 7, vmpl_sss , VMPL supervisor shadwo stack supported +0x8000001f, 0, eax, 7, vmpl_sss , VMPL supervisor shadow stack supported 0x8000001f, 0, eax, 8, secure_tsc , Secure TSC supported 0x8000001f, 0, eax, 9, v_tsc_aux , Hardware virtualizes TSC_AUX -0x8000001f, 0, eax, 10, sme_coherent , HW enforces cache coherency across encryption domains +0x8000001f, 0, eax, 10, sme_coherent , Cache coherency is enforced across encryption domains 0x8000001f, 0, eax, 11, req_64bit_hypervisor , SEV guest mandates 64-bit hypervisor 0x8000001f, 0, eax, 12, restricted_injection , Restricted Injection supported 0x8000001f, 0, eax, 13, alternate_injection , Alternate Injection supported @@ -973,13 +976,13 @@ 0x8000001f, 0, eax, 19, virt_ibs , IBS state virtualization is supported for SEV-ES guests 0x8000001f, 0, eax, 24, vmsa_reg_protection , VMSA register protection is supported 0x8000001f, 0, eax, 25, smt_protection , SMT protection is supported -0x8000001f, 0, eax, 28, svsm_page_msr , SVSM communication page MSR (0xc001f000h) is supported +0x8000001f, 0, eax, 28, svsm_page_msr , SVSM communication page MSR (0xc001f000) is supported 0x8000001f, 0, eax, 29, nested_virt_snp_msr , VIRT_RMPUPDATE/VIRT_PSMASH MSRs are supported 0x8000001f, 0, ebx, 5:0, pte_cbit_pos , PTE bit number used to enable memory encryption 0x8000001f, 0, ebx, 11:6, phys_addr_reduction_nbits, Reduction of phys address space when encryption is enabled, in bits 0x8000001f, 0, ebx, 15:12, vmpl_count , Number of VM permission levels (VMPL) supported 0x8000001f, 0, ecx, 31:0, enc_guests_max , Max supported number of simultaneous encrypted guests -0x8000001f, 0, edx, 31:0, min_sev_asid_no_sev_es , Mininum ASID for SEV-enabled SEV-ES-disabled guest +0x8000001f, 0, edx, 31:0, min_sev_asid_no_sev_es , Minimum ASID for SEV-enabled SEV-ES-disabled guest # Leaf 80000020H # AMD Platform QoS extended feature IDs @@ -988,6 +991,8 @@ 0x80000020, 0, ebx, 2, smba , Slow Memory Bandwidth Allocation support 0x80000020, 0, ebx, 3, bmec , Bandwidth Monitoring Event Configuration support 0x80000020, 0, ebx, 4, l3rr , L3 Range Reservation support +0x80000020, 0, ebx, 5, abmc , Assignable Bandwidth Monitoring Counters +0x80000020, 0, ebx, 6, sdciae , Smart Data Cache Injection (SDCI) Allocation Enforcement 0x80000020, 1, eax, 31:0, mba_limit_len , MBA enforcement limit size 0x80000020, 1, edx, 31:0, mba_cos_max , MBA max Class of Service number (zero-based) 0x80000020, 2, eax, 31:0, smba_limit_len , SMBA enforcement limit size @@ -1007,17 +1012,26 @@ 0x80000021, 0, eax, 0, no_nested_data_bp , No nested data breakpoints 0x80000021, 0, eax, 1, fsgs_non_serializing , WRMSR to {FS,GS,KERNEL_GS}_BASE is non-serializing 0x80000021, 0, eax, 2, lfence_rdtsc , LFENCE always serializing / synchronizes RDTSC -0x80000021, 0, eax, 3, smm_page_cfg_lock , SMM paging configuration lock is supported +0x80000021, 0, eax, 3, smm_page_cfg_lock , SMM paging configuration lock 0x80000021, 0, eax, 6, null_sel_clr_base , Null selector clears base -0x80000021, 0, eax, 7, upper_addr_ignore , EFER MSR Upper Address Ignore Enable bit supported -0x80000021, 0, eax, 8, autoibrs , EFER MSR Automatic IBRS enable bit supported -0x80000021, 0, eax, 9, no_smm_ctl_msr , SMM_CTL MSR (0xc0010116) is not present -0x80000021, 0, eax, 10, fsrs_supported , Fast Short Rep Stosb (FSRS) is supported -0x80000021, 0, eax, 11, fsrc_supported , Fast Short Repe Cmpsb (FSRC) is supported -0x80000021, 0, eax, 13, prefetch_ctl_msr , Prefetch control MSR is supported +0x80000021, 0, eax, 7, upper_addr_ignore , EFER MSR Upper Address Ignore +0x80000021, 0, eax, 8, autoibrs , EFER MSR Automatic IBRS +0x80000021, 0, eax, 9, no_smm_ctl_msr , SMM_CTL MSR (0xc0010116) is not available +0x80000021, 0, eax, 10, fsrs , Fast Short Rep STOSB +0x80000021, 0, eax, 11, fsrc , Fast Short Rep CMPSB +0x80000021, 0, eax, 13, prefetch_ctl_msr , Prefetch control MSR is available +0x80000021, 0, eax, 16, opcode_reclaim , Reserves opcode space 0x80000021, 0, eax, 17, user_cpuid_disable , #GP when executing CPUID at CPL > 0 is supported -0x80000021, 0, eax, 18, epsf_supported , Enhanced Predictive Store Forwarding (EPSF) is supported -0x80000021, 0, ebx, 11:0, microcode_patch_size , Size of microcode patch, in 16-byte units +0x80000021, 0, eax, 18, epsf , Enhanced Predictive Store Forwarding +0x80000021, 0, eax, 22, wl_feedback , Workload-based heuristic feedback to OS +0x80000021, 0, eax, 24, eraps , Enhanced Return Address Predictor Security +0x80000021, 0, eax, 27, sbpb , Selective Branch Predictor Barrier +0x80000021, 0, eax, 28, ibpb_brtype , Branch predictions flushed from CPU branch predictor +0x80000021, 0, eax, 29, srso_no , CPU is not subject to the SRSO vulnerability +0x80000021, 0, eax, 30, srso_uk_no , CPU is not vulnerable to SRSO at user-kernel boundary +0x80000021, 0, eax, 31, srso_msr_fix , Software may use MSR BP_CFG[BpSpecReduce] to mitigate SRSO +0x80000021, 0, ebx, 15:0, microcode_patch_size , Size of microcode patch, in 16-byte units +0x80000021, 0, ebx, 23:16, rap_size , Return Address Predictor size # Leaf 80000022H # AMD Performance Monitoring v2 enumeration @@ -1025,7 +1039,7 @@ 0x80000022, 0, eax, 0, perfmon_v2 , Performance monitoring v2 supported 0x80000022, 0, eax, 1, lbr_v2 , Last Branch Record v2 extensions (LBR Stack) 0x80000022, 0, eax, 2, lbr_pmc_freeze , Freezing core performance counters / LBR Stack supported -0x80000022, 0, ebx, 3:0, n_pmc_core , Number of core perfomance counters +0x80000022, 0, ebx, 3:0, n_pmc_core , Number of core performance counters 0x80000022, 0, ebx, 9:4, lbr_v2_stack_size , Number of available LBR stack entries 0x80000022, 0, ebx, 15:10, n_pmc_northbridge , Number of available northbridge (data fabric) performance counters 0x80000022, 0, ebx, 21:16, n_pmc_umc , Number of available UMC performance counters @@ -1035,7 +1049,7 @@ # AMD Secure Multi-key Encryption enumeration 0x80000023, 0, eax, 0, mem_hmk_mode , MEM-HMK encryption mode is supported -0x80000023, 0, ebx, 15:0, mem_hmk_avail_keys , MEM-HMK mode: total num of available encryption keys +0x80000023, 0, ebx, 15:0, mem_hmk_avail_keys , MEM-HMK mode: total number of available encryption keys # Leaf 80000026H # AMD extended topology enumeration v2 @@ -1051,3 +1065,108 @@ 0x80000026, 3:0, ecx, 7:0, domain_level , This domain level (subleaf ID) 0x80000026, 3:0, ecx, 15:8, domain_type , This domain type 0x80000026, 3:0, edx, 31:0, x2apic_id , x2APIC ID of current logical CPU + +# Leaf 80860000H +# Maximum Transmeta leaf number + CPU vendor ID string + +0x80860000, 0, eax, 31:0, max_tra_leaf , Maximum supported Transmeta leaf number +0x80860000, 0, ebx, 31:0, cpu_vendorid_0 , Transmeta Vendor ID string bytes 0 - 3 +0x80860000, 0, ecx, 31:0, cpu_vendorid_2 , Transmeta Vendor ID string bytes 8 - 11 +0x80860000, 0, edx, 31:0, cpu_vendorid_1 , Transmeta Vendor ID string bytes 4 - 7 + +# Leaf 80860001H +# Transmeta extended CPU information + +0x80860001, 0, eax, 3:0, stepping , Stepping ID +0x80860001, 0, eax, 7:4, base_model , Base CPU model ID +0x80860001, 0, eax, 11:8, base_family_id , Base CPU family ID +0x80860001, 0, eax, 13:12, cpu_type , CPU type +0x80860001, 0, ebx, 7:0, cpu_rev_mask_minor , CPU revision ID, mask minor +0x80860001, 0, ebx, 15:8, cpu_rev_mask_major , CPU revision ID, mask major +0x80860001, 0, ebx, 23:16, cpu_rev_minor , CPU revision ID, minor +0x80860001, 0, ebx, 31:24, cpu_rev_major , CPU revision ID, major +0x80860001, 0, ecx, 31:0, cpu_base_mhz , CPU nominal frequency, in MHz +0x80860001, 0, edx, 0, recovery , Recovery CMS is active (after bad flush) +0x80860001, 0, edx, 1, longrun , LongRun power management capabilities +0x80860001, 0, edx, 3, lrti , LongRun Table Interface + +# Leaf 80860002H +# Transmeta Code Morphing Software (CMS) enumeration + +0x80860002, 0, eax, 31:0, cpu_rev_id , CPU revision ID +0x80860002, 0, ebx, 7:0, cms_rev_mask_2 , CMS revision ID, mask component 2 +0x80860002, 0, ebx, 15:8, cms_rev_mask_1 , CMS revision ID, mask component 1 +0x80860002, 0, ebx, 23:16, cms_rev_minor , CMS revision ID, minor +0x80860002, 0, ebx, 31:24, cms_rev_major , CMS revision ID, major +0x80860002, 0, ecx, 31:0, cms_rev_mask_3 , CMS revision ID, mask component 3 + +# Leaf 80860003H +# Transmeta CPU information string, bytes 0 - 15 + +0x80860003, 0, eax, 31:0, cpu_info_0 , CPU info string bytes 0 - 3 +0x80860003, 0, ebx, 31:0, cpu_info_1 , CPU info string bytes 4 - 7 +0x80860003, 0, ecx, 31:0, cpu_info_2 , CPU info string bytes 8 - 11 +0x80860003, 0, edx, 31:0, cpu_info_3 , CPU info string bytes 12 - 15 + +# Leaf 80860004H +# Transmeta CPU information string, bytes 16 - 31 + +0x80860004, 0, eax, 31:0, cpu_info_4 , CPU info string bytes 16 - 19 +0x80860004, 0, ebx, 31:0, cpu_info_5 , CPU info string bytes 20 - 23 +0x80860004, 0, ecx, 31:0, cpu_info_6 , CPU info string bytes 24 - 27 +0x80860004, 0, edx, 31:0, cpu_info_7 , CPU info string bytes 28 - 31 + +# Leaf 80860005H +# Transmeta CPU information string, bytes 32 - 47 + +0x80860005, 0, eax, 31:0, cpu_info_8 , CPU info string bytes 32 - 35 +0x80860005, 0, ebx, 31:0, cpu_info_9 , CPU info string bytes 36 - 39 +0x80860005, 0, ecx, 31:0, cpu_info_10 , CPU info string bytes 40 - 43 +0x80860005, 0, edx, 31:0, cpu_info_11 , CPU info string bytes 44 - 47 + +# Leaf 80860006H +# Transmeta CPU information string, bytes 48 - 63 + +0x80860006, 0, eax, 31:0, cpu_info_12 , CPU info string bytes 48 - 51 +0x80860006, 0, ebx, 31:0, cpu_info_13 , CPU info string bytes 52 - 55 +0x80860006, 0, ecx, 31:0, cpu_info_14 , CPU info string bytes 56 - 59 +0x80860006, 0, edx, 31:0, cpu_info_15 , CPU info string bytes 60 - 63 + +# Leaf 80860007H +# Transmeta live CPU information + +0x80860007, 0, eax, 31:0, cpu_cur_mhz , Current CPU frequency, in MHz +0x80860007, 0, ebx, 31:0, cpu_cur_voltage , Current CPU voltage, in millivolts +0x80860007, 0, ecx, 31:0, cpu_cur_perf_pctg , Current CPU performance percentage, 0 - 100 +0x80860007, 0, edx, 31:0, cpu_cur_gate_delay , Current CPU gate delay, in femtoseconds + +# Leaf C0000000H +# Maximum Centaur/Zhaoxin leaf number + +0xc0000000, 0, eax, 31:0, max_cntr_leaf , Maximum Centaur/Zhaoxin leaf number + +# Leaf C0000001H +# Centaur/Zhaoxin extended CPU features + +0xc0000001, 0, edx, 0, ccs_sm2 , CCS SM2 instructions +0xc0000001, 0, edx, 1, ccs_sm2_en , CCS SM2 enabled +0xc0000001, 0, edx, 2, xstore , Random Number Generator +0xc0000001, 0, edx, 3, xstore_en , RNG enabled +0xc0000001, 0, edx, 4, ccs_sm3_sm4 , CCS SM3 and SM4 instructions +0xc0000001, 0, edx, 5, ccs_sm3_sm4_en , CCS SM3/SM4 enabled +0xc0000001, 0, edx, 6, ace , Advanced Cryptography Engine +0xc0000001, 0, edx, 7, ace_en , ACE enabled +0xc0000001, 0, edx, 8, ace2 , Advanced Cryptography Engine v2 +0xc0000001, 0, edx, 9, ace2_en , ACE v2 enabled +0xc0000001, 0, edx, 10, phe , PadLock Hash Engine +0xc0000001, 0, edx, 11, phe_en , PHE enabled +0xc0000001, 0, edx, 12, pmm , PadLock Montgomery Multiplier +0xc0000001, 0, edx, 13, pmm_en , PMM enabled +0xc0000001, 0, edx, 16, parallax , Parallax auto adjust processor voltage +0xc0000001, 0, edx, 17, parallax_en , Parallax enabled +0xc0000001, 0, edx, 20, tm3 , Thermal Monitor v3 +0xc0000001, 0, edx, 21, tm3_en , TM v3 enabled +0xc0000001, 0, edx, 25, phe2 , PadLock Hash Engine v2 (SHA384/SHA512) +0xc0000001, 0, edx, 26, phe2_en , PHE v2 enabled +0xc0000001, 0, edx, 27, rsa , RSA instructions (XMODEXP/MONTMUL2) +0xc0000001, 0, edx, 28, rsa_en , RSA instructions enabled diff --git a/tools/arch/x86/kcpuid/kcpuid.c b/tools/arch/x86/kcpuid/kcpuid.c index 1b25c0a95d3f..7dc6b9235d02 100644 --- a/tools/arch/x86/kcpuid/kcpuid.c +++ b/tools/arch/x86/kcpuid/kcpuid.c @@ -1,14 +1,17 @@ // SPDX-License-Identifier: GPL-2.0 #define _GNU_SOURCE -#include <stdio.h> +#include <cpuid.h> +#include <err.h> +#include <getopt.h> #include <stdbool.h> +#include <stdio.h> #include <stdlib.h> #include <string.h> -#include <getopt.h> #define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) #define min(a, b) (((a) < (b)) ? (a) : (b)) +#define __noreturn __attribute__((__noreturn__)) typedef unsigned int u32; typedef unsigned long long u64; @@ -49,7 +52,7 @@ static const char * const reg_names[] = { struct subleaf { u32 index; u32 sub; - u32 eax, ebx, ecx, edx; + u32 output[NR_REGS]; struct reg_desc info[NR_REGS]; }; @@ -63,21 +66,64 @@ struct cpuid_func { int nr; }; +enum range_index { + RANGE_STD = 0, /* Standard */ + RANGE_EXT = 0x80000000, /* Extended */ + RANGE_TSM = 0x80860000, /* Transmeta */ + RANGE_CTR = 0xc0000000, /* Centaur/Zhaoxin */ +}; + +#define CPUID_INDEX_MASK 0xffff0000 +#define CPUID_FUNCTION_MASK (~CPUID_INDEX_MASK) + struct cpuid_range { /* array of main leafs */ struct cpuid_func *funcs; /* number of valid leafs */ int nr; - bool is_ext; + enum range_index index; }; -/* - * basic: basic functions range: [0... ] - * ext: extended functions range: [0x80000000... ] - */ -struct cpuid_range *leafs_basic, *leafs_ext; +static struct cpuid_range ranges[] = { + { .index = RANGE_STD, }, + { .index = RANGE_EXT, }, + { .index = RANGE_TSM, }, + { .index = RANGE_CTR, }, +}; + +static char *range_to_str(struct cpuid_range *range) +{ + switch (range->index) { + case RANGE_STD: return "Standard"; + case RANGE_EXT: return "Extended"; + case RANGE_TSM: return "Transmeta"; + case RANGE_CTR: return "Centaur"; + default: return NULL; + } +} + +#define __for_each_cpuid_range(range, __condition) \ + for (unsigned int i = 0; \ + i < ARRAY_SIZE(ranges) && ((range) = &ranges[i]) && (__condition); \ + i++) + +#define for_each_valid_cpuid_range(range) __for_each_cpuid_range(range, (range)->nr != 0) +#define for_each_cpuid_range(range) __for_each_cpuid_range(range, true) + +struct cpuid_range *index_to_cpuid_range(u32 index) +{ + u32 func_idx = index & CPUID_FUNCTION_MASK; + u32 range_idx = index & CPUID_INDEX_MASK; + struct cpuid_range *range; + + for_each_valid_cpuid_range(range) { + if (range->index == range_idx && (u32)range->nr > func_idx) + return range; + } + + return NULL; +} -static bool is_amd; static bool show_details; static bool show_raw; static bool show_flags_only = true; @@ -85,16 +131,16 @@ static u32 user_index = 0xFFFFFFFF; static u32 user_sub = 0xFFFFFFFF; static int flines; -static inline void cpuid(u32 *eax, u32 *ebx, u32 *ecx, u32 *edx) -{ - /* ecx is often an input as well as an output. */ - asm volatile("cpuid" - : "=a" (*eax), - "=b" (*ebx), - "=c" (*ecx), - "=d" (*edx) - : "0" (*eax), "2" (*ecx)); -} +/* + * Force using <cpuid.h> __cpuid_count() instead of __cpuid(). The + * latter leaves ECX uninitialized, which can break CPUID queries. + */ + +#define cpuid(leaf, a, b, c, d) \ + __cpuid_count(leaf, 0, a, b, c, d) + +#define cpuid_count(leaf, subleaf, a, b, c, d) \ + __cpuid_count(leaf, subleaf, a, b, c, d) static inline bool has_subleafs(u32 f) { @@ -117,11 +163,11 @@ static void leaf_print_raw(struct subleaf *leaf) if (leaf->sub == 0) printf("0x%08x: subleafs:\n", leaf->index); - printf(" %2d: EAX=0x%08x, EBX=0x%08x, ECX=0x%08x, EDX=0x%08x\n", - leaf->sub, leaf->eax, leaf->ebx, leaf->ecx, leaf->edx); + printf(" %2d: EAX=0x%08x, EBX=0x%08x, ECX=0x%08x, EDX=0x%08x\n", leaf->sub, + leaf->output[0], leaf->output[1], leaf->output[2], leaf->output[3]); } else { - printf("0x%08x: EAX=0x%08x, EBX=0x%08x, ECX=0x%08x, EDX=0x%08x\n", - leaf->index, leaf->eax, leaf->ebx, leaf->ecx, leaf->edx); + printf("0x%08x: EAX=0x%08x, EBX=0x%08x, ECX=0x%08x, EDX=0x%08x\n", leaf->index, + leaf->output[0], leaf->output[1], leaf->output[2], leaf->output[3]); } } @@ -140,19 +186,19 @@ static bool cpuid_store(struct cpuid_range *range, u32 f, int subleaf, * Cut off vendor-prefix from CPUID function as we're using it as an * index into ->funcs. */ - func = &range->funcs[f & 0xffff]; + func = &range->funcs[f & CPUID_FUNCTION_MASK]; if (!func->leafs) { func->leafs = malloc(sizeof(struct subleaf)); if (!func->leafs) - perror("malloc func leaf"); + err(EXIT_FAILURE, NULL); func->nr = 1; } else { s = func->nr; func->leafs = realloc(func->leafs, (s + 1) * sizeof(*leaf)); if (!func->leafs) - perror("realloc f->leafs"); + err(EXIT_FAILURE, NULL); func->nr++; } @@ -161,84 +207,73 @@ static bool cpuid_store(struct cpuid_range *range, u32 f, int subleaf, leaf->index = f; leaf->sub = subleaf; - leaf->eax = a; - leaf->ebx = b; - leaf->ecx = c; - leaf->edx = d; + leaf->output[R_EAX] = a; + leaf->output[R_EBX] = b; + leaf->output[R_ECX] = c; + leaf->output[R_EDX] = d; return false; } static void raw_dump_range(struct cpuid_range *range) { - u32 f; - int i; - - printf("%s Leafs :\n", range->is_ext ? "Extended" : "Basic"); + printf("%s Leafs :\n", range_to_str(range)); printf("================\n"); - for (f = 0; (int)f < range->nr; f++) { + for (u32 f = 0; (int)f < range->nr; f++) { struct cpuid_func *func = &range->funcs[f]; - u32 index = f; - - if (range->is_ext) - index += 0x80000000; /* Skip leaf without valid items */ if (!func->nr) continue; /* First item is the main leaf, followed by all subleafs */ - for (i = 0; i < func->nr; i++) + for (int i = 0; i < func->nr; i++) leaf_print_raw(&func->leafs[i]); } } #define MAX_SUBLEAF_NUM 64 -struct cpuid_range *setup_cpuid_range(u32 input_eax) +#define MAX_RANGE_INDEX_OFFSET 0xff +void setup_cpuid_range(struct cpuid_range *range) { - u32 max_func, idx_func, subleaf, max_subleaf; - u32 eax, ebx, ecx, edx, f = input_eax; - struct cpuid_range *range; - bool allzero; - - eax = input_eax; - ebx = ecx = edx = 0; + u32 max_func, range_funcs_sz; + u32 eax, ebx, ecx, edx; - cpuid(&eax, &ebx, &ecx, &edx); - max_func = eax; - idx_func = (max_func & 0xffff) + 1; + cpuid(range->index, max_func, ebx, ecx, edx); - range = malloc(sizeof(struct cpuid_range)); - if (!range) - perror("malloc range"); + /* + * If the CPUID range's maximum function value is garbage, then it + * is not recognized by this CPU. Set the range's number of valid + * leaves to zero so that for_each_valid_cpu_range() can ignore it. + */ + if (max_func < range->index || max_func > (range->index + MAX_RANGE_INDEX_OFFSET)) { + range->nr = 0; + return; + } - if (input_eax & 0x80000000) - range->is_ext = true; - else - range->is_ext = false; + range->nr = (max_func & CPUID_FUNCTION_MASK) + 1; + range_funcs_sz = range->nr * sizeof(struct cpuid_func); - range->funcs = malloc(sizeof(struct cpuid_func) * idx_func); + range->funcs = malloc(range_funcs_sz); if (!range->funcs) - perror("malloc range->funcs"); + err(EXIT_FAILURE, NULL); - range->nr = idx_func; - memset(range->funcs, 0, sizeof(struct cpuid_func) * idx_func); + memset(range->funcs, 0, range_funcs_sz); - for (; f <= max_func; f++) { - eax = f; - subleaf = ecx = 0; + for (u32 f = range->index; f <= max_func; f++) { + u32 max_subleaf = MAX_SUBLEAF_NUM; + bool allzero; - cpuid(&eax, &ebx, &ecx, &edx); - allzero = cpuid_store(range, f, subleaf, eax, ebx, ecx, edx); + cpuid(f, eax, ebx, ecx, edx); + + allzero = cpuid_store(range, f, 0, eax, ebx, ecx, edx); if (allzero) continue; if (!has_subleafs(f)) continue; - max_subleaf = MAX_SUBLEAF_NUM; - /* * Some can provide the exact number of subleafs, * others have to be tried (0xf) @@ -256,20 +291,15 @@ struct cpuid_range *setup_cpuid_range(u32 input_eax) if (f == 0x80000026) max_subleaf = 5; - for (subleaf = 1; subleaf < max_subleaf; subleaf++) { - eax = f; - ecx = subleaf; + for (u32 subleaf = 1; subleaf < max_subleaf; subleaf++) { + cpuid_count(f, subleaf, eax, ebx, ecx, edx); - cpuid(&eax, &ebx, &ecx, &edx); - allzero = cpuid_store(range, f, subleaf, - eax, ebx, ecx, edx); + allzero = cpuid_store(range, f, subleaf, eax, ebx, ecx, edx); if (allzero) continue; } } - - return range; } /* @@ -280,15 +310,13 @@ struct cpuid_range *setup_cpuid_range(u32 input_eax) * 0, 0, EAX, 31:0, max_basic_leafs, Max input value for supported subleafs * 1, 0, ECX, 0, sse3, Streaming SIMD Extensions 3(SSE3) */ -static int parse_line(char *line) +static void parse_line(char *line) { char *str; - int i; struct cpuid_range *range; struct cpuid_func *func; struct subleaf *leaf; u32 index; - u32 sub; char buffer[512]; char *buf; /* @@ -310,12 +338,12 @@ static int parse_line(char *line) /* Skip comments and NULL line */ if (line[0] == '#' || line[0] == '\n') - return 0; + return; strncpy(buffer, line, 511); buffer[511] = 0; str = buffer; - for (i = 0; i < 5; i++) { + for (int i = 0; i < 5; i++) { tokens[i] = strtok(str, ","); if (!tokens[i]) goto err_exit; @@ -328,21 +356,19 @@ static int parse_line(char *line) /* index/main-leaf */ index = strtoull(tokens[0], NULL, 0); - if (index & 0x80000000) - range = leafs_ext; - else - range = leafs_basic; - - index &= 0x7FFFFFFF; - /* Skip line parsing for non-existing indexes */ - if ((int)index >= range->nr) - return -1; + /* + * Skip line parsing if the index is not covered by known-valid + * CPUID ranges on this CPU. + */ + range = index_to_cpuid_range(index); + if (!range) + return; + /* Skip line parsing if the index CPUID output is all zero */ + index &= CPUID_FUNCTION_MASK; func = &range->funcs[index]; - - /* Return if the index has no valid item on this platform */ if (!func->nr) - return 0; + return; /* subleaf */ buf = tokens[1]; @@ -355,11 +381,11 @@ static int parse_line(char *line) subleaf_start = strtoul(start, NULL, 0); subleaf_end = min(subleaf_end, (u32)(func->nr - 1)); if (subleaf_start > subleaf_end) - return 0; + return; } else { subleaf_start = subleaf_end; if (subleaf_start > (u32)(func->nr - 1)) - return 0; + return; } /* register */ @@ -382,7 +408,7 @@ static int parse_line(char *line) bit_end = strtoul(end, NULL, 0); bit_start = (start) ? strtoul(start, NULL, 0) : bit_end; - for (sub = subleaf_start; sub <= subleaf_end; sub++) { + for (u32 sub = subleaf_start; sub <= subleaf_end; sub++) { leaf = &func->leafs[sub]; reg = &leaf->info[reg_index]; bdesc = ®->descs[reg->nr++]; @@ -392,12 +418,11 @@ static int parse_line(char *line) strcpy(bdesc->simp, strtok(tokens[4], " \t")); strcpy(bdesc->detail, tokens[5]); } - return 0; + return; err_exit: - printf("Warning: wrong line format:\n"); - printf("\tline[%d]: %s\n", flines, line); - return -1; + warnx("Wrong line format:\n" + "\tline[%d]: %s", flines, line); } /* Parse csv file, and construct the array of all leafs and subleafs */ @@ -418,10 +443,8 @@ static void parse_text(void) file = fopen("./cpuid.csv", "r"); } - if (!file) { - printf("Fail to open '%s'\n", filename); - return; - } + if (!file) + err(EXIT_FAILURE, "%s", filename); while (1) { ret = getline(&line, &len, file); @@ -436,21 +459,13 @@ static void parse_text(void) fclose(file); } - -/* Decode every eax/ebx/ecx/edx */ -static void decode_bits(u32 value, struct reg_desc *rdesc, enum cpuid_reg reg) +static void show_reg(const struct reg_desc *rdesc, u32 value) { - struct bits_desc *bdesc; - int start, end, i; + const struct bits_desc *bdesc; + int start, end; u32 mask; - if (!rdesc->nr) { - if (show_details) - printf("\t %s: 0x%08x\n", reg_names[reg], value); - return; - } - - for (i = 0; i < rdesc->nr; i++) { + for (int i = 0; i < rdesc->nr; i++) { bdesc = &rdesc->descs[i]; start = bdesc->start; @@ -480,23 +495,21 @@ static void decode_bits(u32 value, struct reg_desc *rdesc, enum cpuid_reg reg) } } -static void show_leaf(struct subleaf *leaf) +static void show_reg_header(bool has_entries, u32 leaf, u32 subleaf, const char *reg_name) { - if (!leaf) - return; + if (show_details && has_entries) + printf("CPUID_0x%x_%s[0x%x]:\n", leaf, reg_name, subleaf); +} - if (show_raw) { +static void show_leaf(struct subleaf *leaf) +{ + if (show_raw) leaf_print_raw(leaf); - } else { - if (show_details) - printf("CPUID_0x%x_ECX[0x%x]:\n", - leaf->index, leaf->sub); - } - decode_bits(leaf->eax, &leaf->info[R_EAX], R_EAX); - decode_bits(leaf->ebx, &leaf->info[R_EBX], R_EBX); - decode_bits(leaf->ecx, &leaf->info[R_ECX], R_ECX); - decode_bits(leaf->edx, &leaf->info[R_EDX], R_EDX); + for (int i = R_EAX; i < NR_REGS; i++) { + show_reg_header((leaf->info[i].nr > 0), leaf->index, leaf->sub, reg_names[i]); + show_reg(&leaf->info[i], leaf->output[i]); + } if (!show_raw && show_details) printf("\n"); @@ -504,46 +517,37 @@ static void show_leaf(struct subleaf *leaf) static void show_func(struct cpuid_func *func) { - int i; - - if (!func) - return; - - for (i = 0; i < func->nr; i++) + for (int i = 0; i < func->nr; i++) show_leaf(&func->leafs[i]); } static void show_range(struct cpuid_range *range) { - int i; - - for (i = 0; i < range->nr; i++) + for (int i = 0; i < range->nr; i++) show_func(&range->funcs[i]); } static inline struct cpuid_func *index_to_func(u32 index) { + u32 func_idx = index & CPUID_FUNCTION_MASK; struct cpuid_range *range; - u32 func_idx; - - range = (index & 0x80000000) ? leafs_ext : leafs_basic; - func_idx = index & 0xffff; - if ((func_idx + 1) > (u32)range->nr) { - printf("ERR: invalid input index (0x%x)\n", index); + range = index_to_cpuid_range(index); + if (!range) return NULL; - } + return &range->funcs[func_idx]; } static void show_info(void) { + struct cpuid_range *range; struct cpuid_func *func; if (show_raw) { /* Show all of the raw output of 'cpuid' instr */ - raw_dump_range(leafs_basic); - raw_dump_range(leafs_ext); + for_each_valid_cpuid_range(range) + raw_dump_range(range); return; } @@ -551,18 +555,19 @@ static void show_info(void) /* Only show specific leaf/subleaf info */ func = index_to_func(user_index); if (!func) - return; + errx(EXIT_FAILURE, "Invalid input leaf (0x%x)", user_index); /* Dump the raw data also */ show_raw = true; if (user_sub != 0xFFFFFFFF) { - if (user_sub + 1 <= (u32)func->nr) { - show_leaf(&func->leafs[user_sub]); - return; + if (user_sub + 1 > (u32)func->nr) { + errx(EXIT_FAILURE, "Leaf 0x%x has no valid subleaf = 0x%x", + user_index, user_sub); } - printf("ERR: invalid input subleaf (0x%x)\n", user_sub); + show_leaf(&func->leafs[user_sub]); + return; } show_func(func); @@ -570,38 +575,21 @@ static void show_info(void) } printf("CPU features:\n=============\n\n"); - show_range(leafs_basic); - show_range(leafs_ext); + for_each_valid_cpuid_range(range) + show_range(range); } -static void setup_platform_cpuid(void) +static void __noreturn usage(int exit_code) { - u32 eax, ebx, ecx, edx; - - /* Check vendor */ - eax = ebx = ecx = edx = 0; - cpuid(&eax, &ebx, &ecx, &edx); - - /* "htuA" */ - if (ebx == 0x68747541) - is_amd = true; - - /* Setup leafs for the basic and extended range */ - leafs_basic = setup_cpuid_range(0x0); - leafs_ext = setup_cpuid_range(0x80000000); -} - -static void usage(void) -{ - printf("kcpuid [-abdfhr] [-l leaf] [-s subleaf]\n" - "\t-a|--all Show both bit flags and complex bit fields info\n" - "\t-b|--bitflags Show boolean flags only\n" - "\t-d|--detail Show details of the flag/fields (default)\n" - "\t-f|--flags Specify the cpuid csv file\n" - "\t-h|--help Show usage info\n" - "\t-l|--leaf=index Specify the leaf you want to check\n" - "\t-r|--raw Show raw cpuid data\n" - "\t-s|--subleaf=sub Specify the subleaf you want to check\n" + errx(exit_code, "kcpuid [-abdfhr] [-l leaf] [-s subleaf]\n" + "\t-a|--all Show both bit flags and complex bit fields info\n" + "\t-b|--bitflags Show boolean flags only\n" + "\t-d|--detail Show details of the flag/fields (default)\n" + "\t-f|--flags Specify the CPUID CSV file\n" + "\t-h|--help Show usage info\n" + "\t-l|--leaf=index Specify the leaf you want to check\n" + "\t-r|--raw Show raw CPUID data\n" + "\t-s|--subleaf=sub Specify the subleaf you want to check" ); } @@ -617,7 +605,7 @@ static struct option opts[] = { { NULL, 0, NULL, 0 } }; -static int parse_options(int argc, char *argv[]) +static void parse_options(int argc, char *argv[]) { int c; @@ -637,9 +625,7 @@ static int parse_options(int argc, char *argv[]) user_csv = optarg; break; case 'h': - usage(); - exit(1); - break; + usage(EXIT_SUCCESS); case 'l': /* main leaf */ user_index = strtoul(optarg, NULL, 0); @@ -652,11 +638,8 @@ static int parse_options(int argc, char *argv[]) user_sub = strtoul(optarg, NULL, 0); break; default: - printf("%s: Invalid option '%c'\n", argv[0], optopt); - return -1; - } - - return 0; + usage(EXIT_FAILURE); + } } /* @@ -669,11 +652,13 @@ static int parse_options(int argc, char *argv[]) */ int main(int argc, char *argv[]) { - if (parse_options(argc, argv)) - return -1; + struct cpuid_range *range; + + parse_options(argc, argv); /* Setup the cpuid leafs of current platform */ - setup_platform_cpuid(); + for_each_cpuid_range(range) + setup_cpuid_range(range); /* Read and parse the 'cpuid.csv' */ parse_text(); diff --git a/tools/arch/x86/lib/insn.c b/tools/arch/x86/lib/insn.c index e91d4c4e1c16..bce69c6bfa69 100644 --- a/tools/arch/x86/lib/insn.c +++ b/tools/arch/x86/lib/insn.c @@ -324,6 +324,11 @@ int insn_get_opcode(struct insn *insn) } insn->attr = inat_get_opcode_attribute(op); + if (insn->x86_64 && inat_is_invalid64(insn->attr)) { + /* This instruction is invalid, like UD2. Stop decoding. */ + insn->attr &= INAT_INV64; + } + while (inat_is_escape(insn->attr)) { /* Get escaped opcode */ op = get_next(insn_byte_t, insn); @@ -337,6 +342,7 @@ int insn_get_opcode(struct insn *insn) insn->attr = 0; return -EINVAL; } + end: opcode->got = 1; return 0; @@ -658,7 +664,6 @@ int insn_get_immediate(struct insn *insn) } if (!inat_has_immediate(insn->attr)) - /* no immediates */ goto done; switch (inat_immediate_size(insn->attr)) { diff --git a/tools/arch/x86/lib/memset_64.S b/tools/arch/x86/lib/memset_64.S index 0199d56cb479..d66b710d628f 100644 --- a/tools/arch/x86/lib/memset_64.S +++ b/tools/arch/x86/lib/memset_64.S @@ -3,6 +3,7 @@ #include <linux/export.h> #include <linux/linkage.h> +#include <linux/cfi_types.h> #include <asm/cpufeatures.h> #include <asm/alternative.h> @@ -28,7 +29,7 @@ * only for the return value that is the same as the source input, * which the compiler could/should do much better anyway. */ -SYM_FUNC_START(__memset) +SYM_TYPED_FUNC_START(__memset) ALTERNATIVE "jmp memset_orig", "", X86_FEATURE_FSRS movq %rdi,%r9 diff --git a/tools/arch/x86/lib/x86-opcode-map.txt b/tools/arch/x86/lib/x86-opcode-map.txt index caedb3ef6688..262f7ca1fb95 100644 --- a/tools/arch/x86/lib/x86-opcode-map.txt +++ b/tools/arch/x86/lib/x86-opcode-map.txt @@ -35,7 +35,7 @@ # - (!F3) : the last prefix is not 0xF3 (including non-last prefix case) # - (66&F2): Both 0x66 and 0xF2 prefixes are specified. # -# REX2 Prefix +# REX2 Prefix Superscripts # - (!REX2): REX2 is not allowed # - (REX2): REX2 variant e.g. JMPABS @@ -147,7 +147,7 @@ AVXcode: # 0x60 - 0x6f 60: PUSHA/PUSHAD (i64) 61: POPA/POPAD (i64) -62: BOUND Gv,Ma (i64) | EVEX (Prefix) +62: BOUND Gv,Ma (i64) | EVEX (Prefix),(o64) 63: ARPL Ew,Gw (i64) | MOVSXD Gv,Ev (o64) 64: SEG=FS (Prefix) 65: SEG=GS (Prefix) @@ -253,8 +253,8 @@ c0: Grp2 Eb,Ib (1A) c1: Grp2 Ev,Ib (1A) c2: RETN Iw (f64) c3: RETN -c4: LES Gz,Mp (i64) | VEX+2byte (Prefix) -c5: LDS Gz,Mp (i64) | VEX+1byte (Prefix) +c4: LES Gz,Mp (i64) | VEX+2byte (Prefix),(o64) +c5: LDS Gz,Mp (i64) | VEX+1byte (Prefix),(o64) c6: Grp11A Eb,Ib (1A) c7: Grp11B Ev,Iz (1A) c8: ENTER Iw,Ib @@ -286,10 +286,10 @@ df: ESC # Note: "forced64" is Intel CPU behavior: they ignore 0x66 prefix # in 64-bit mode. AMD CPUs accept 0x66 prefix, it causes RIP truncation # to 16 bits. In 32-bit mode, 0x66 is accepted by both Intel and AMD. -e0: LOOPNE/LOOPNZ Jb (f64) (!REX2) -e1: LOOPE/LOOPZ Jb (f64) (!REX2) -e2: LOOP Jb (f64) (!REX2) -e3: JrCXZ Jb (f64) (!REX2) +e0: LOOPNE/LOOPNZ Jb (f64),(!REX2) +e1: LOOPE/LOOPZ Jb (f64),(!REX2) +e2: LOOP Jb (f64),(!REX2) +e3: JrCXZ Jb (f64),(!REX2) e4: IN AL,Ib (!REX2) e5: IN eAX,Ib (!REX2) e6: OUT Ib,AL (!REX2) @@ -298,10 +298,10 @@ e7: OUT Ib,eAX (!REX2) # in "near" jumps and calls is 16-bit. For CALL, # push of return address is 16-bit wide, RSP is decremented by 2 # but is not truncated to 16 bits, unlike RIP. -e8: CALL Jz (f64) (!REX2) -e9: JMP-near Jz (f64) (!REX2) -ea: JMP-far Ap (i64) (!REX2) -eb: JMP-short Jb (f64) (!REX2) +e8: CALL Jz (f64),(!REX2) +e9: JMP-near Jz (f64),(!REX2) +ea: JMP-far Ap (i64),(!REX2) +eb: JMP-short Jb (f64),(!REX2) ec: IN AL,DX (!REX2) ed: IN eAX,DX (!REX2) ee: OUT DX,AL (!REX2) @@ -478,22 +478,22 @@ AVXcode: 1 7f: movq Qq,Pq | vmovdqa Wx,Vx (66) | vmovdqa32/64 Wx,Vx (66),(evo) | vmovdqu Wx,Vx (F3) | vmovdqu32/64 Wx,Vx (F3),(evo) | vmovdqu8/16 Wx,Vx (F2),(ev) # 0x0f 0x80-0x8f # Note: "forced64" is Intel CPU behavior (see comment about CALL insn). -80: JO Jz (f64) (!REX2) -81: JNO Jz (f64) (!REX2) -82: JB/JC/JNAE Jz (f64) (!REX2) -83: JAE/JNB/JNC Jz (f64) (!REX2) -84: JE/JZ Jz (f64) (!REX2) -85: JNE/JNZ Jz (f64) (!REX2) -86: JBE/JNA Jz (f64) (!REX2) -87: JA/JNBE Jz (f64) (!REX2) -88: JS Jz (f64) (!REX2) -89: JNS Jz (f64) (!REX2) -8a: JP/JPE Jz (f64) (!REX2) -8b: JNP/JPO Jz (f64) (!REX2) -8c: JL/JNGE Jz (f64) (!REX2) -8d: JNL/JGE Jz (f64) (!REX2) -8e: JLE/JNG Jz (f64) (!REX2) -8f: JNLE/JG Jz (f64) (!REX2) +80: JO Jz (f64),(!REX2) +81: JNO Jz (f64),(!REX2) +82: JB/JC/JNAE Jz (f64),(!REX2) +83: JAE/JNB/JNC Jz (f64),(!REX2) +84: JE/JZ Jz (f64),(!REX2) +85: JNE/JNZ Jz (f64),(!REX2) +86: JBE/JNA Jz (f64),(!REX2) +87: JA/JNBE Jz (f64),(!REX2) +88: JS Jz (f64),(!REX2) +89: JNS Jz (f64),(!REX2) +8a: JP/JPE Jz (f64),(!REX2) +8b: JNP/JPO Jz (f64),(!REX2) +8c: JL/JNGE Jz (f64),(!REX2) +8d: JNL/JGE Jz (f64),(!REX2) +8e: JLE/JNG Jz (f64),(!REX2) +8f: JNLE/JG Jz (f64),(!REX2) # 0x0f 0x90-0x9f 90: SETO Eb | kmovw/q Vk,Wk | kmovb/d Vk,Wk (66) 91: SETNO Eb | kmovw/q Mv,Vk | kmovb/d Mv,Vk (66) @@ -996,8 +996,8 @@ AVXcode: 4 83: Grp1 Ev,Ib (1A),(es) # CTESTSCC instructions are: CTESTB, CTESTBE, CTESTF, CTESTL, CTESTLE, CTESTNB, CTESTNBE, CTESTNL, # CTESTNLE, CTESTNO, CTESTNS, CTESTNZ, CTESTO, CTESTS, CTESTT, CTESTZ -84: CTESTSCC (ev) -85: CTESTSCC (es) | CTESTSCC (66),(es) +84: CTESTSCC Eb,Gb (ev) +85: CTESTSCC Ev,Gv (es) | CTESTSCC Ev,Gv (66),(es) 88: POPCNT Gv,Ev (es) | POPCNT Gv,Ev (66),(es) 8f: POP2 Bq,Rq (000),(11B),(ev) a5: SHLD Ev,Gv,CL (es) | SHLD Ev,Gv,CL (66),(es) diff --git a/tools/arch/x86/tools/gen-insn-attr-x86.awk b/tools/arch/x86/tools/gen-insn-attr-x86.awk index 5770c8097f32..2c19d7fc8a85 100644 --- a/tools/arch/x86/tools/gen-insn-attr-x86.awk +++ b/tools/arch/x86/tools/gen-insn-attr-x86.awk @@ -64,6 +64,8 @@ BEGIN { modrm_expr = "^([CDEGMNPQRSUVW/][a-z]+|NTA|T[012])" force64_expr = "\\([df]64\\)" + invalid64_expr = "\\(i64\\)" + only64_expr = "\\(o64\\)" rex_expr = "^((REX(\\.[XRWB]+)+)|(REX$))" rex2_expr = "\\(REX2\\)" no_rex2_expr = "\\(!REX2\\)" @@ -319,6 +321,11 @@ function convert_operands(count,opnd, i,j,imm,mod) if (match(ext, force64_expr)) flags = add_flags(flags, "INAT_FORCE64") + # check invalid in 64-bit (and no only64) + if (match(ext, invalid64_expr) && + !match($0, only64_expr)) + flags = add_flags(flags, "INAT_INV64") + # check REX2 not allowed if (match(ext, no_rex2_expr)) flags = add_flags(flags, "INAT_NO_REX2") diff --git a/tools/hv/hv_kvp_daemon.c b/tools/hv/hv_kvp_daemon.c index 04ba035d67e9..b9ce3aab15fe 100644 --- a/tools/hv/hv_kvp_daemon.c +++ b/tools/hv/hv_kvp_daemon.c @@ -24,6 +24,7 @@ #include <sys/poll.h> #include <sys/utsname.h> +#include <stdbool.h> #include <stdio.h> #include <stdlib.h> #include <unistd.h> @@ -677,6 +678,88 @@ static void kvp_process_ipconfig_file(char *cmd, pclose(file); } +static bool kvp_verify_ip_address(const void *address_string) +{ + char verify_buf[sizeof(struct in6_addr)]; + + if (inet_pton(AF_INET, address_string, verify_buf) == 1) + return true; + if (inet_pton(AF_INET6, address_string, verify_buf) == 1) + return true; + return false; +} + +static void kvp_extract_routes(const char *line, void **output, size_t *remaining) +{ + static const char needle[] = "via "; + const char *match, *haystack = line; + + while ((match = strstr(haystack, needle))) { + const char *address, *next_char; + + /* Address starts after needle. */ + address = match + strlen(needle); + + /* The char following address is a space or end of line. */ + next_char = strpbrk(address, " \t\\"); + if (!next_char) + next_char = address + strlen(address) + 1; + + /* Enough room for address and semicolon. */ + if (*remaining >= (next_char - address) + 1) { + memcpy(*output, address, next_char - address); + /* Terminate string for verification. */ + memcpy(*output + (next_char - address), "", 1); + if (kvp_verify_ip_address(*output)) { + /* Advance output buffer. */ + *output += next_char - address; + *remaining -= next_char - address; + + /* Each address needs a trailing semicolon. */ + memcpy(*output, ";", 1); + *output += 1; + *remaining -= 1; + } + } + haystack = next_char; + } +} + +static void kvp_get_gateway(void *buffer, size_t buffer_len) +{ + static const char needle[] = "default "; + FILE *f; + void *output = buffer; + char *line = NULL; + size_t alloc_size = 0, remaining = buffer_len - 1; + ssize_t num_chars; + + /* Show route information in a single line, for each address family */ + f = popen("ip --oneline -4 route show;ip --oneline -6 route show", "r"); + if (!f) { + /* Convert buffer into C-String. */ + memcpy(output, "", 1); + return; + } + while ((num_chars = getline(&line, &alloc_size, f)) > 0) { + /* Skip short lines. */ + if (num_chars <= strlen(needle)) + continue; + /* Skip lines without default route. */ + if (memcmp(line, needle, strlen(needle))) + continue; + /* Remove trailing newline to simplify further parsing. */ + if (line[num_chars - 1] == '\n') + line[num_chars - 1] = '\0'; + /* Search routes after match. */ + kvp_extract_routes(line + strlen(needle), &output, &remaining); + } + /* Convert buffer into C-String. */ + memcpy(output, "", 1); + free(line); + pclose(f); +} + static void kvp_get_ipconfig_info(char *if_name, struct hv_kvp_ipaddr_value *buffer) { @@ -685,30 +768,7 @@ static void kvp_get_ipconfig_info(char *if_name, char *p; FILE *file; - /* - * Get the address of default gateway (ipv4). - */ - sprintf(cmd, "%s %s", "ip route show dev", if_name); - strcat(cmd, " | awk '/default/ {print $3 }'"); - - /* - * Execute the command to gather gateway info. - */ - kvp_process_ipconfig_file(cmd, (char *)buffer->gate_way, - (MAX_GATEWAY_SIZE * 2), INET_ADDRSTRLEN, 0); - - /* - * Get the address of default gateway (ipv6). - */ - sprintf(cmd, "%s %s", "ip -f inet6 route show dev", if_name); - strcat(cmd, " | awk '/default/ {print $3 }'"); - - /* - * Execute the command to gather gateway info (ipv6). - */ - kvp_process_ipconfig_file(cmd, (char *)buffer->gate_way, - (MAX_GATEWAY_SIZE * 2), INET6_ADDRSTRLEN, 1); - + kvp_get_gateway(buffer->gate_way, sizeof(buffer->gate_way)); /* * Gather the DNS state. diff --git a/tools/include/linux/cfi_types.h b/tools/include/linux/cfi_types.h new file mode 100644 index 000000000000..6b8713675765 --- /dev/null +++ b/tools/include/linux/cfi_types.h @@ -0,0 +1,45 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Clang Control Flow Integrity (CFI) type definitions. + */ +#ifndef _LINUX_CFI_TYPES_H +#define _LINUX_CFI_TYPES_H + +#ifdef __ASSEMBLY__ +#include <linux/linkage.h> + +#ifdef CONFIG_CFI_CLANG +/* + * Use the __kcfi_typeid_<function> type identifier symbol to + * annotate indirectly called assembly functions. The compiler emits + * these symbols for all address-taken function declarations in C + * code. + */ +#ifndef __CFI_TYPE +#define __CFI_TYPE(name) \ + .4byte __kcfi_typeid_##name +#endif + +#define SYM_TYPED_ENTRY(name, linkage, align...) \ + linkage(name) ASM_NL \ + align ASM_NL \ + __CFI_TYPE(name) ASM_NL \ + name: + +#define SYM_TYPED_START(name, linkage, align...) \ + SYM_TYPED_ENTRY(name, linkage, align) + +#else /* CONFIG_CFI_CLANG */ + +#define SYM_TYPED_START(name, linkage, align...) \ + SYM_START(name, linkage, align) + +#endif /* CONFIG_CFI_CLANG */ + +#ifndef SYM_TYPED_FUNC_START +#define SYM_TYPED_FUNC_START(name) \ + SYM_TYPED_START(name, SYM_L_GLOBAL, SYM_A_ALIGN) +#endif + +#endif /* __ASSEMBLY__ */ +#endif /* _LINUX_CFI_TYPES_H */ diff --git a/tools/include/uapi/asm-generic/mman-common.h b/tools/include/uapi/asm-generic/mman-common.h index 1ea2c4c33b86..ef1c27fa3c57 100644 --- a/tools/include/uapi/asm-generic/mman-common.h +++ b/tools/include/uapi/asm-generic/mman-common.h @@ -85,6 +85,7 @@ /* compatibility flags */ #define MAP_FILE 0 +#define PKEY_UNRESTRICTED 0x0 #define PKEY_DISABLE_ACCESS 0x1 #define PKEY_DISABLE_WRITE 0x2 #define PKEY_ACCESS_MASK (PKEY_DISABLE_ACCESS |\ diff --git a/tools/include/uapi/asm-generic/unistd.h b/tools/include/uapi/asm-generic/unistd.h index 88dc393c2bca..2892a45023af 100644 --- a/tools/include/uapi/asm-generic/unistd.h +++ b/tools/include/uapi/asm-generic/unistd.h @@ -849,9 +849,11 @@ __SYSCALL(__NR_getxattrat, sys_getxattrat) __SYSCALL(__NR_listxattrat, sys_listxattrat) #define __NR_removexattrat 466 __SYSCALL(__NR_removexattrat, sys_removexattrat) +#define __NR_open_tree_attr 467 +__SYSCALL(__NR_open_tree_attr, sys_open_tree_attr) #undef __NR_syscalls -#define __NR_syscalls 467 +#define __NR_syscalls 468 /* * 32 bit systems traditionally used different diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 28705ae67784..fd404729b115 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -4968,6 +4968,9 @@ union bpf_attr { * the netns switch takes place from ingress to ingress without * going through the CPU's backlog queue. * + * *skb*\ **->mark** and *skb*\ **->tstamp** are not cleared during + * the netns switch. + * * The *flags* argument is reserved and must be 0. The helper is * currently only supported for tc BPF program types at the * ingress hook and for veth and netkit target device types. The diff --git a/tools/include/uapi/linux/fanotify.h b/tools/include/uapi/linux/fanotify.h new file mode 100644 index 000000000000..e710967c7c26 --- /dev/null +++ b/tools/include/uapi/linux/fanotify.h @@ -0,0 +1,274 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +#ifndef _UAPI_LINUX_FANOTIFY_H +#define _UAPI_LINUX_FANOTIFY_H + +#include <linux/types.h> + +/* the following events that user-space can register for */ +#define FAN_ACCESS 0x00000001 /* File was accessed */ +#define FAN_MODIFY 0x00000002 /* File was modified */ +#define FAN_ATTRIB 0x00000004 /* Metadata changed */ +#define FAN_CLOSE_WRITE 0x00000008 /* Writable file closed */ +#define FAN_CLOSE_NOWRITE 0x00000010 /* Unwritable file closed */ +#define FAN_OPEN 0x00000020 /* File was opened */ +#define FAN_MOVED_FROM 0x00000040 /* File was moved from X */ +#define FAN_MOVED_TO 0x00000080 /* File was moved to Y */ +#define FAN_CREATE 0x00000100 /* Subfile was created */ +#define FAN_DELETE 0x00000200 /* Subfile was deleted */ +#define FAN_DELETE_SELF 0x00000400 /* Self was deleted */ +#define FAN_MOVE_SELF 0x00000800 /* Self was moved */ +#define FAN_OPEN_EXEC 0x00001000 /* File was opened for exec */ + +#define FAN_Q_OVERFLOW 0x00004000 /* Event queued overflowed */ +#define FAN_FS_ERROR 0x00008000 /* Filesystem error */ + +#define FAN_OPEN_PERM 0x00010000 /* File open in perm check */ +#define FAN_ACCESS_PERM 0x00020000 /* File accessed in perm check */ +#define FAN_OPEN_EXEC_PERM 0x00040000 /* File open/exec in perm check */ +/* #define FAN_DIR_MODIFY 0x00080000 */ /* Deprecated (reserved) */ + +#define FAN_PRE_ACCESS 0x00100000 /* Pre-content access hook */ +#define FAN_MNT_ATTACH 0x01000000 /* Mount was attached */ +#define FAN_MNT_DETACH 0x02000000 /* Mount was detached */ + +#define FAN_EVENT_ON_CHILD 0x08000000 /* Interested in child events */ + +#define FAN_RENAME 0x10000000 /* File was renamed */ + +#define FAN_ONDIR 0x40000000 /* Event occurred against dir */ + +/* helper events */ +#define FAN_CLOSE (FAN_CLOSE_WRITE | FAN_CLOSE_NOWRITE) /* close */ +#define FAN_MOVE (FAN_MOVED_FROM | FAN_MOVED_TO) /* moves */ + +/* flags used for fanotify_init() */ +#define FAN_CLOEXEC 0x00000001 +#define FAN_NONBLOCK 0x00000002 + +/* These are NOT bitwise flags. Both bits are used together. */ +#define FAN_CLASS_NOTIF 0x00000000 +#define FAN_CLASS_CONTENT 0x00000004 +#define FAN_CLASS_PRE_CONTENT 0x00000008 + +/* Deprecated - do not use this in programs and do not add new flags here! */ +#define FAN_ALL_CLASS_BITS (FAN_CLASS_NOTIF | FAN_CLASS_CONTENT | \ + FAN_CLASS_PRE_CONTENT) + +#define FAN_UNLIMITED_QUEUE 0x00000010 +#define FAN_UNLIMITED_MARKS 0x00000020 +#define FAN_ENABLE_AUDIT 0x00000040 + +/* Flags to determine fanotify event format */ +#define FAN_REPORT_PIDFD 0x00000080 /* Report pidfd for event->pid */ +#define FAN_REPORT_TID 0x00000100 /* event->pid is thread id */ +#define FAN_REPORT_FID 0x00000200 /* Report unique file id */ +#define FAN_REPORT_DIR_FID 0x00000400 /* Report unique directory id */ +#define FAN_REPORT_NAME 0x00000800 /* Report events with name */ +#define FAN_REPORT_TARGET_FID 0x00001000 /* Report dirent target id */ +#define FAN_REPORT_FD_ERROR 0x00002000 /* event->fd can report error */ +#define FAN_REPORT_MNT 0x00004000 /* Report mount events */ + +/* Convenience macro - FAN_REPORT_NAME requires FAN_REPORT_DIR_FID */ +#define FAN_REPORT_DFID_NAME (FAN_REPORT_DIR_FID | FAN_REPORT_NAME) +/* Convenience macro - FAN_REPORT_TARGET_FID requires all other FID flags */ +#define FAN_REPORT_DFID_NAME_TARGET (FAN_REPORT_DFID_NAME | \ + FAN_REPORT_FID | FAN_REPORT_TARGET_FID) + +/* Deprecated - do not use this in programs and do not add new flags here! */ +#define FAN_ALL_INIT_FLAGS (FAN_CLOEXEC | FAN_NONBLOCK | \ + FAN_ALL_CLASS_BITS | FAN_UNLIMITED_QUEUE |\ + FAN_UNLIMITED_MARKS) + +/* flags used for fanotify_modify_mark() */ +#define FAN_MARK_ADD 0x00000001 +#define FAN_MARK_REMOVE 0x00000002 +#define FAN_MARK_DONT_FOLLOW 0x00000004 +#define FAN_MARK_ONLYDIR 0x00000008 +/* FAN_MARK_MOUNT is 0x00000010 */ +#define FAN_MARK_IGNORED_MASK 0x00000020 +#define FAN_MARK_IGNORED_SURV_MODIFY 0x00000040 +#define FAN_MARK_FLUSH 0x00000080 +/* FAN_MARK_FILESYSTEM is 0x00000100 */ +#define FAN_MARK_EVICTABLE 0x00000200 +/* This bit is mutually exclusive with FAN_MARK_IGNORED_MASK bit */ +#define FAN_MARK_IGNORE 0x00000400 + +/* These are NOT bitwise flags. Both bits can be used togther. */ +#define FAN_MARK_INODE 0x00000000 +#define FAN_MARK_MOUNT 0x00000010 +#define FAN_MARK_FILESYSTEM 0x00000100 +#define FAN_MARK_MNTNS 0x00000110 + +/* + * Convenience macro - FAN_MARK_IGNORE requires FAN_MARK_IGNORED_SURV_MODIFY + * for non-inode mark types. + */ +#define FAN_MARK_IGNORE_SURV (FAN_MARK_IGNORE | FAN_MARK_IGNORED_SURV_MODIFY) + +/* Deprecated - do not use this in programs and do not add new flags here! */ +#define FAN_ALL_MARK_FLAGS (FAN_MARK_ADD |\ + FAN_MARK_REMOVE |\ + FAN_MARK_DONT_FOLLOW |\ + FAN_MARK_ONLYDIR |\ + FAN_MARK_MOUNT |\ + FAN_MARK_IGNORED_MASK |\ + FAN_MARK_IGNORED_SURV_MODIFY |\ + FAN_MARK_FLUSH) + +/* Deprecated - do not use this in programs and do not add new flags here! */ +#define FAN_ALL_EVENTS (FAN_ACCESS |\ + FAN_MODIFY |\ + FAN_CLOSE |\ + FAN_OPEN) + +/* + * All events which require a permission response from userspace + */ +/* Deprecated - do not use this in programs and do not add new flags here! */ +#define FAN_ALL_PERM_EVENTS (FAN_OPEN_PERM |\ + FAN_ACCESS_PERM) + +/* Deprecated - do not use this in programs and do not add new flags here! */ +#define FAN_ALL_OUTGOING_EVENTS (FAN_ALL_EVENTS |\ + FAN_ALL_PERM_EVENTS |\ + FAN_Q_OVERFLOW) + +#define FANOTIFY_METADATA_VERSION 3 + +struct fanotify_event_metadata { + __u32 event_len; + __u8 vers; + __u8 reserved; + __u16 metadata_len; + __aligned_u64 mask; + __s32 fd; + __s32 pid; +}; + +#define FAN_EVENT_INFO_TYPE_FID 1 +#define FAN_EVENT_INFO_TYPE_DFID_NAME 2 +#define FAN_EVENT_INFO_TYPE_DFID 3 +#define FAN_EVENT_INFO_TYPE_PIDFD 4 +#define FAN_EVENT_INFO_TYPE_ERROR 5 +#define FAN_EVENT_INFO_TYPE_RANGE 6 +#define FAN_EVENT_INFO_TYPE_MNT 7 + +/* Special info types for FAN_RENAME */ +#define FAN_EVENT_INFO_TYPE_OLD_DFID_NAME 10 +/* Reserved for FAN_EVENT_INFO_TYPE_OLD_DFID 11 */ +#define FAN_EVENT_INFO_TYPE_NEW_DFID_NAME 12 +/* Reserved for FAN_EVENT_INFO_TYPE_NEW_DFID 13 */ + +/* Variable length info record following event metadata */ +struct fanotify_event_info_header { + __u8 info_type; + __u8 pad; + __u16 len; +}; + +/* + * Unique file identifier info record. + * This structure is used for records of types FAN_EVENT_INFO_TYPE_FID, + * FAN_EVENT_INFO_TYPE_DFID and FAN_EVENT_INFO_TYPE_DFID_NAME. + * For FAN_EVENT_INFO_TYPE_DFID_NAME there is additionally a null terminated + * name immediately after the file handle. + */ +struct fanotify_event_info_fid { + struct fanotify_event_info_header hdr; + __kernel_fsid_t fsid; + /* + * Following is an opaque struct file_handle that can be passed as + * an argument to open_by_handle_at(2). + */ + unsigned char handle[]; +}; + +/* + * This structure is used for info records of type FAN_EVENT_INFO_TYPE_PIDFD. + * It holds a pidfd for the pid that was responsible for generating an event. + */ +struct fanotify_event_info_pidfd { + struct fanotify_event_info_header hdr; + __s32 pidfd; +}; + +struct fanotify_event_info_error { + struct fanotify_event_info_header hdr; + __s32 error; + __u32 error_count; +}; + +struct fanotify_event_info_range { + struct fanotify_event_info_header hdr; + __u32 pad; + __u64 offset; + __u64 count; +}; + +struct fanotify_event_info_mnt { + struct fanotify_event_info_header hdr; + __u64 mnt_id; +}; + +/* + * User space may need to record additional information about its decision. + * The extra information type records what kind of information is included. + * The default is none. We also define an extra information buffer whose + * size is determined by the extra information type. + * + * If the information type is Audit Rule, then the information following + * is the rule number that triggered the user space decision that + * requires auditing. + */ + +#define FAN_RESPONSE_INFO_NONE 0 +#define FAN_RESPONSE_INFO_AUDIT_RULE 1 + +struct fanotify_response { + __s32 fd; + __u32 response; +}; + +struct fanotify_response_info_header { + __u8 type; + __u8 pad; + __u16 len; +}; + +struct fanotify_response_info_audit_rule { + struct fanotify_response_info_header hdr; + __u32 rule_number; + __u32 subj_trust; + __u32 obj_trust; +}; + +/* Legit userspace responses to a _PERM event */ +#define FAN_ALLOW 0x01 +#define FAN_DENY 0x02 +/* errno other than EPERM can specified in upper byte of deny response */ +#define FAN_ERRNO_BITS 8 +#define FAN_ERRNO_SHIFT (32 - FAN_ERRNO_BITS) +#define FAN_ERRNO_MASK ((1 << FAN_ERRNO_BITS) - 1) +#define FAN_DENY_ERRNO(err) \ + (FAN_DENY | ((((__u32)(err)) & FAN_ERRNO_MASK) << FAN_ERRNO_SHIFT)) + +#define FAN_AUDIT 0x10 /* Bitmask to create audit record for result */ +#define FAN_INFO 0x20 /* Bitmask to indicate additional information */ + +/* No fd set in event */ +#define FAN_NOFD -1 +#define FAN_NOPIDFD FAN_NOFD +#define FAN_EPIDFD -2 + +/* Helper functions to deal with fanotify_event_metadata buffers */ +#define FAN_EVENT_METADATA_LEN (sizeof(struct fanotify_event_metadata)) + +#define FAN_EVENT_NEXT(meta, len) ((len) -= (meta)->event_len, \ + (struct fanotify_event_metadata*)(((char *)(meta)) + \ + (meta)->event_len)) + +#define FAN_EVENT_OK(meta, len) ((long)(len) >= (long)FAN_EVENT_METADATA_LEN && \ + (long)(meta)->event_len >= (long)FAN_EVENT_METADATA_LEN && \ + (long)(meta)->event_len <= (long)(len)) + +#endif /* _UAPI_LINUX_FANOTIFY_H */ diff --git a/tools/include/uapi/linux/in.h b/tools/include/uapi/linux/in.h index 5d32d53508d9..ced0fc3c3aa5 100644 --- a/tools/include/uapi/linux/in.h +++ b/tools/include/uapi/linux/in.h @@ -79,6 +79,8 @@ enum { #define IPPROTO_MPLS IPPROTO_MPLS IPPROTO_ETHERNET = 143, /* Ethernet-within-IPv6 Encapsulation */ #define IPPROTO_ETHERNET IPPROTO_ETHERNET + IPPROTO_AGGFRAG = 144, /* AGGFRAG in ESP (RFC 9347) */ +#define IPPROTO_AGGFRAG IPPROTO_AGGFRAG IPPROTO_RAW = 255, /* Raw IP packets */ #define IPPROTO_RAW IPPROTO_RAW IPPROTO_SMC = 256, /* Shared Memory Communications */ diff --git a/tools/include/uapi/linux/kvm.h b/tools/include/uapi/linux/kvm.h index 502ea63b5d2e..b6ae8ad8934b 100644 --- a/tools/include/uapi/linux/kvm.h +++ b/tools/include/uapi/linux/kvm.h @@ -617,10 +617,6 @@ struct kvm_ioeventfd { #define KVM_X86_DISABLE_EXITS_HLT (1 << 1) #define KVM_X86_DISABLE_EXITS_PAUSE (1 << 2) #define KVM_X86_DISABLE_EXITS_CSTATE (1 << 3) -#define KVM_X86_DISABLE_VALID_EXITS (KVM_X86_DISABLE_EXITS_MWAIT | \ - KVM_X86_DISABLE_EXITS_HLT | \ - KVM_X86_DISABLE_EXITS_PAUSE | \ - KVM_X86_DISABLE_EXITS_CSTATE) /* for KVM_ENABLE_CAP */ struct kvm_enable_cap { @@ -933,6 +929,7 @@ struct kvm_enable_cap { #define KVM_CAP_PRE_FAULT_MEMORY 236 #define KVM_CAP_X86_APIC_BUS_CYCLES_NS 237 #define KVM_CAP_X86_GUEST_MODE 238 +#define KVM_CAP_ARM_WRITABLE_IMP_ID_REGS 239 struct kvm_irq_routing_irqchip { __u32 irqchip; @@ -1070,6 +1067,10 @@ struct kvm_dirty_tlb { #define KVM_REG_SIZE_SHIFT 52 #define KVM_REG_SIZE_MASK 0x00f0000000000000ULL + +#define KVM_REG_SIZE(id) \ + (1U << (((id) & KVM_REG_SIZE_MASK) >> KVM_REG_SIZE_SHIFT)) + #define KVM_REG_SIZE_U8 0x0000000000000000ULL #define KVM_REG_SIZE_U16 0x0010000000000000ULL #define KVM_REG_SIZE_U32 0x0020000000000000ULL diff --git a/tools/include/uapi/linux/mount.h b/tools/include/uapi/linux/mount.h new file mode 100644 index 000000000000..7fa67c2031a5 --- /dev/null +++ b/tools/include/uapi/linux/mount.h @@ -0,0 +1,235 @@ +#ifndef _UAPI_LINUX_MOUNT_H +#define _UAPI_LINUX_MOUNT_H + +#include <linux/types.h> + +/* + * These are the fs-independent mount-flags: up to 32 flags are supported + * + * Usage of these is restricted within the kernel to core mount(2) code and + * callers of sys_mount() only. Filesystems should be using the SB_* + * equivalent instead. + */ +#define MS_RDONLY 1 /* Mount read-only */ +#define MS_NOSUID 2 /* Ignore suid and sgid bits */ +#define MS_NODEV 4 /* Disallow access to device special files */ +#define MS_NOEXEC 8 /* Disallow program execution */ +#define MS_SYNCHRONOUS 16 /* Writes are synced at once */ +#define MS_REMOUNT 32 /* Alter flags of a mounted FS */ +#define MS_MANDLOCK 64 /* Allow mandatory locks on an FS */ +#define MS_DIRSYNC 128 /* Directory modifications are synchronous */ +#define MS_NOSYMFOLLOW 256 /* Do not follow symlinks */ +#define MS_NOATIME 1024 /* Do not update access times. */ +#define MS_NODIRATIME 2048 /* Do not update directory access times */ +#define MS_BIND 4096 +#define MS_MOVE 8192 +#define MS_REC 16384 +#define MS_VERBOSE 32768 /* War is peace. Verbosity is silence. + MS_VERBOSE is deprecated. */ +#define MS_SILENT 32768 +#define MS_POSIXACL (1<<16) /* VFS does not apply the umask */ +#define MS_UNBINDABLE (1<<17) /* change to unbindable */ +#define MS_PRIVATE (1<<18) /* change to private */ +#define MS_SLAVE (1<<19) /* change to slave */ +#define MS_SHARED (1<<20) /* change to shared */ +#define MS_RELATIME (1<<21) /* Update atime relative to mtime/ctime. */ +#define MS_KERNMOUNT (1<<22) /* this is a kern_mount call */ +#define MS_I_VERSION (1<<23) /* Update inode I_version field */ +#define MS_STRICTATIME (1<<24) /* Always perform atime updates */ +#define MS_LAZYTIME (1<<25) /* Update the on-disk [acm]times lazily */ + +/* These sb flags are internal to the kernel */ +#define MS_SUBMOUNT (1<<26) +#define MS_NOREMOTELOCK (1<<27) +#define MS_NOSEC (1<<28) +#define MS_BORN (1<<29) +#define MS_ACTIVE (1<<30) +#define MS_NOUSER (1<<31) + +/* + * Superblock flags that can be altered by MS_REMOUNT + */ +#define MS_RMT_MASK (MS_RDONLY|MS_SYNCHRONOUS|MS_MANDLOCK|MS_I_VERSION|\ + MS_LAZYTIME) + +/* + * Old magic mount flag and mask + */ +#define MS_MGC_VAL 0xC0ED0000 +#define MS_MGC_MSK 0xffff0000 + +/* + * open_tree() flags. + */ +#define OPEN_TREE_CLONE 1 /* Clone the target tree and attach the clone */ +#define OPEN_TREE_CLOEXEC O_CLOEXEC /* Close the file on execve() */ + +/* + * move_mount() flags. + */ +#define MOVE_MOUNT_F_SYMLINKS 0x00000001 /* Follow symlinks on from path */ +#define MOVE_MOUNT_F_AUTOMOUNTS 0x00000002 /* Follow automounts on from path */ +#define MOVE_MOUNT_F_EMPTY_PATH 0x00000004 /* Empty from path permitted */ +#define MOVE_MOUNT_T_SYMLINKS 0x00000010 /* Follow symlinks on to path */ +#define MOVE_MOUNT_T_AUTOMOUNTS 0x00000020 /* Follow automounts on to path */ +#define MOVE_MOUNT_T_EMPTY_PATH 0x00000040 /* Empty to path permitted */ +#define MOVE_MOUNT_SET_GROUP 0x00000100 /* Set sharing group instead */ +#define MOVE_MOUNT_BENEATH 0x00000200 /* Mount beneath top mount */ +#define MOVE_MOUNT__MASK 0x00000377 + +/* + * fsopen() flags. + */ +#define FSOPEN_CLOEXEC 0x00000001 + +/* + * fspick() flags. + */ +#define FSPICK_CLOEXEC 0x00000001 +#define FSPICK_SYMLINK_NOFOLLOW 0x00000002 +#define FSPICK_NO_AUTOMOUNT 0x00000004 +#define FSPICK_EMPTY_PATH 0x00000008 + +/* + * The type of fsconfig() call made. + */ +enum fsconfig_command { + FSCONFIG_SET_FLAG = 0, /* Set parameter, supplying no value */ + FSCONFIG_SET_STRING = 1, /* Set parameter, supplying a string value */ + FSCONFIG_SET_BINARY = 2, /* Set parameter, supplying a binary blob value */ + FSCONFIG_SET_PATH = 3, /* Set parameter, supplying an object by path */ + FSCONFIG_SET_PATH_EMPTY = 4, /* Set parameter, supplying an object by (empty) path */ + FSCONFIG_SET_FD = 5, /* Set parameter, supplying an object by fd */ + FSCONFIG_CMD_CREATE = 6, /* Create new or reuse existing superblock */ + FSCONFIG_CMD_RECONFIGURE = 7, /* Invoke superblock reconfiguration */ + FSCONFIG_CMD_CREATE_EXCL = 8, /* Create new superblock, fail if reusing existing superblock */ +}; + +/* + * fsmount() flags. + */ +#define FSMOUNT_CLOEXEC 0x00000001 + +/* + * Mount attributes. + */ +#define MOUNT_ATTR_RDONLY 0x00000001 /* Mount read-only */ +#define MOUNT_ATTR_NOSUID 0x00000002 /* Ignore suid and sgid bits */ +#define MOUNT_ATTR_NODEV 0x00000004 /* Disallow access to device special files */ +#define MOUNT_ATTR_NOEXEC 0x00000008 /* Disallow program execution */ +#define MOUNT_ATTR__ATIME 0x00000070 /* Setting on how atime should be updated */ +#define MOUNT_ATTR_RELATIME 0x00000000 /* - Update atime relative to mtime/ctime. */ +#define MOUNT_ATTR_NOATIME 0x00000010 /* - Do not update access times. */ +#define MOUNT_ATTR_STRICTATIME 0x00000020 /* - Always perform atime updates */ +#define MOUNT_ATTR_NODIRATIME 0x00000080 /* Do not update directory access times */ +#define MOUNT_ATTR_IDMAP 0x00100000 /* Idmap mount to @userns_fd in struct mount_attr. */ +#define MOUNT_ATTR_NOSYMFOLLOW 0x00200000 /* Do not follow symlinks */ + +/* + * mount_setattr() + */ +struct mount_attr { + __u64 attr_set; + __u64 attr_clr; + __u64 propagation; + __u64 userns_fd; +}; + +/* List of all mount_attr versions. */ +#define MOUNT_ATTR_SIZE_VER0 32 /* sizeof first published struct */ + + +/* + * Structure for getting mount/superblock/filesystem info with statmount(2). + * + * The interface is similar to statx(2): individual fields or groups can be + * selected with the @mask argument of statmount(). Kernel will set the @mask + * field according to the supported fields. + * + * If string fields are selected, then the caller needs to pass a buffer that + * has space after the fixed part of the structure. Nul terminated strings are + * copied there and offsets relative to @str are stored in the relevant fields. + * If the buffer is too small, then EOVERFLOW is returned. The actually used + * size is returned in @size. + */ +struct statmount { + __u32 size; /* Total size, including strings */ + __u32 mnt_opts; /* [str] Options (comma separated, escaped) */ + __u64 mask; /* What results were written */ + __u32 sb_dev_major; /* Device ID */ + __u32 sb_dev_minor; + __u64 sb_magic; /* ..._SUPER_MAGIC */ + __u32 sb_flags; /* SB_{RDONLY,SYNCHRONOUS,DIRSYNC,LAZYTIME} */ + __u32 fs_type; /* [str] Filesystem type */ + __u64 mnt_id; /* Unique ID of mount */ + __u64 mnt_parent_id; /* Unique ID of parent (for root == mnt_id) */ + __u32 mnt_id_old; /* Reused IDs used in proc/.../mountinfo */ + __u32 mnt_parent_id_old; + __u64 mnt_attr; /* MOUNT_ATTR_... */ + __u64 mnt_propagation; /* MS_{SHARED,SLAVE,PRIVATE,UNBINDABLE} */ + __u64 mnt_peer_group; /* ID of shared peer group */ + __u64 mnt_master; /* Mount receives propagation from this ID */ + __u64 propagate_from; /* Propagation from in current namespace */ + __u32 mnt_root; /* [str] Root of mount relative to root of fs */ + __u32 mnt_point; /* [str] Mountpoint relative to current root */ + __u64 mnt_ns_id; /* ID of the mount namespace */ + __u32 fs_subtype; /* [str] Subtype of fs_type (if any) */ + __u32 sb_source; /* [str] Source string of the mount */ + __u32 opt_num; /* Number of fs options */ + __u32 opt_array; /* [str] Array of nul terminated fs options */ + __u32 opt_sec_num; /* Number of security options */ + __u32 opt_sec_array; /* [str] Array of nul terminated security options */ + __u64 supported_mask; /* Mask flags that this kernel supports */ + __u32 mnt_uidmap_num; /* Number of uid mappings */ + __u32 mnt_uidmap; /* [str] Array of uid mappings (as seen from callers namespace) */ + __u32 mnt_gidmap_num; /* Number of gid mappings */ + __u32 mnt_gidmap; /* [str] Array of gid mappings (as seen from callers namespace) */ + __u64 __spare2[43]; + char str[]; /* Variable size part containing strings */ +}; + +/* + * Structure for passing mount ID and miscellaneous parameters to statmount(2) + * and listmount(2). + * + * For statmount(2) @param represents the request mask. + * For listmount(2) @param represents the last listed mount id (or zero). + */ +struct mnt_id_req { + __u32 size; + __u32 spare; + __u64 mnt_id; + __u64 param; + __u64 mnt_ns_id; +}; + +/* List of all mnt_id_req versions. */ +#define MNT_ID_REQ_SIZE_VER0 24 /* sizeof first published struct */ +#define MNT_ID_REQ_SIZE_VER1 32 /* sizeof second published struct */ + +/* + * @mask bits for statmount(2) + */ +#define STATMOUNT_SB_BASIC 0x00000001U /* Want/got sb_... */ +#define STATMOUNT_MNT_BASIC 0x00000002U /* Want/got mnt_... */ +#define STATMOUNT_PROPAGATE_FROM 0x00000004U /* Want/got propagate_from */ +#define STATMOUNT_MNT_ROOT 0x00000008U /* Want/got mnt_root */ +#define STATMOUNT_MNT_POINT 0x00000010U /* Want/got mnt_point */ +#define STATMOUNT_FS_TYPE 0x00000020U /* Want/got fs_type */ +#define STATMOUNT_MNT_NS_ID 0x00000040U /* Want/got mnt_ns_id */ +#define STATMOUNT_MNT_OPTS 0x00000080U /* Want/got mnt_opts */ +#define STATMOUNT_FS_SUBTYPE 0x00000100U /* Want/got fs_subtype */ +#define STATMOUNT_SB_SOURCE 0x00000200U /* Want/got sb_source */ +#define STATMOUNT_OPT_ARRAY 0x00000400U /* Want/got opt_... */ +#define STATMOUNT_OPT_SEC_ARRAY 0x00000800U /* Want/got opt_sec... */ +#define STATMOUNT_SUPPORTED_MASK 0x00001000U /* Want/got supported mask flags */ +#define STATMOUNT_MNT_UIDMAP 0x00002000U /* Want/got uidmap... */ +#define STATMOUNT_MNT_GIDMAP 0x00004000U /* Want/got gidmap... */ + +/* + * Special @mnt_id values that can be passed to listmount + */ +#define LSMT_ROOT 0xffffffffffffffff /* root mount */ +#define LISTMOUNT_REVERSE (1 << 0) /* List later mounts first */ + +#endif /* _UAPI_LINUX_MOUNT_H */ diff --git a/tools/include/uapi/linux/nsfs.h b/tools/include/uapi/linux/nsfs.h new file mode 100644 index 000000000000..34127653fd00 --- /dev/null +++ b/tools/include/uapi/linux/nsfs.h @@ -0,0 +1,45 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +#ifndef __LINUX_NSFS_H +#define __LINUX_NSFS_H + +#include <linux/ioctl.h> +#include <linux/types.h> + +#define NSIO 0xb7 + +/* Returns a file descriptor that refers to an owning user namespace */ +#define NS_GET_USERNS _IO(NSIO, 0x1) +/* Returns a file descriptor that refers to a parent namespace */ +#define NS_GET_PARENT _IO(NSIO, 0x2) +/* Returns the type of namespace (CLONE_NEW* value) referred to by + file descriptor */ +#define NS_GET_NSTYPE _IO(NSIO, 0x3) +/* Get owner UID (in the caller's user namespace) for a user namespace */ +#define NS_GET_OWNER_UID _IO(NSIO, 0x4) +/* Get the id for a mount namespace */ +#define NS_GET_MNTNS_ID _IOR(NSIO, 0x5, __u64) +/* Translate pid from target pid namespace into the caller's pid namespace. */ +#define NS_GET_PID_FROM_PIDNS _IOR(NSIO, 0x6, int) +/* Return thread-group leader id of pid in the callers pid namespace. */ +#define NS_GET_TGID_FROM_PIDNS _IOR(NSIO, 0x7, int) +/* Translate pid from caller's pid namespace into a target pid namespace. */ +#define NS_GET_PID_IN_PIDNS _IOR(NSIO, 0x8, int) +/* Return thread-group leader id of pid in the target pid namespace. */ +#define NS_GET_TGID_IN_PIDNS _IOR(NSIO, 0x9, int) + +struct mnt_ns_info { + __u32 size; + __u32 nr_mounts; + __u64 mnt_ns_id; +}; + +#define MNT_NS_INFO_SIZE_VER0 16 /* size of first published struct */ + +/* Get information about namespace. */ +#define NS_MNT_GET_INFO _IOR(NSIO, 10, struct mnt_ns_info) +/* Get next namespace. */ +#define NS_MNT_GET_NEXT _IOR(NSIO, 11, struct mnt_ns_info) +/* Get previous namespace. */ +#define NS_MNT_GET_PREV _IOR(NSIO, 12, struct mnt_ns_info) + +#endif /* __LINUX_NSFS_H */ diff --git a/tools/include/uapi/linux/perf_event.h b/tools/include/uapi/linux/perf_event.h index 0524d541d4e3..78a362b80027 100644 --- a/tools/include/uapi/linux/perf_event.h +++ b/tools/include/uapi/linux/perf_event.h @@ -39,18 +39,21 @@ enum perf_type_id { /* * attr.config layout for type PERF_TYPE_HARDWARE and PERF_TYPE_HW_CACHE + * * PERF_TYPE_HARDWARE: 0xEEEEEEEE000000AA * AA: hardware event ID * EEEEEEEE: PMU type ID + * * PERF_TYPE_HW_CACHE: 0xEEEEEEEE00DDCCBB * BB: hardware cache ID * CC: hardware cache op ID * DD: hardware cache op result ID * EEEEEEEE: PMU type ID - * If the PMU type ID is 0, the PERF_TYPE_RAW will be applied. + * + * If the PMU type ID is 0, PERF_TYPE_RAW will be applied. */ -#define PERF_PMU_TYPE_SHIFT 32 -#define PERF_HW_EVENT_MASK 0xffffffff +#define PERF_PMU_TYPE_SHIFT 32 +#define PERF_HW_EVENT_MASK 0xffffffff /* * Generalized performance event event_id types, used by the @@ -112,7 +115,7 @@ enum perf_hw_cache_op_result_id { /* * Special "software" events provided by the kernel, even if the hardware * does not support performance events. These events measure various - * physical and sw events of the kernel (and allow the profiling of them as + * physical and SW events of the kernel (and allow the profiling of them as * well): */ enum perf_sw_ids { @@ -167,8 +170,9 @@ enum perf_event_sample_format { }; #define PERF_SAMPLE_WEIGHT_TYPE (PERF_SAMPLE_WEIGHT | PERF_SAMPLE_WEIGHT_STRUCT) + /* - * values to program into branch_sample_type when PERF_SAMPLE_BRANCH is set + * Values to program into branch_sample_type when PERF_SAMPLE_BRANCH is set. * * If the user does not pass priv level information via branch_sample_type, * the kernel uses the event's priv level. Branch and event priv levels do @@ -178,20 +182,20 @@ enum perf_event_sample_format { * of branches and therefore it supersedes all the other types. */ enum perf_branch_sample_type_shift { - PERF_SAMPLE_BRANCH_USER_SHIFT = 0, /* user branches */ - PERF_SAMPLE_BRANCH_KERNEL_SHIFT = 1, /* kernel branches */ - PERF_SAMPLE_BRANCH_HV_SHIFT = 2, /* hypervisor branches */ - - PERF_SAMPLE_BRANCH_ANY_SHIFT = 3, /* any branch types */ - PERF_SAMPLE_BRANCH_ANY_CALL_SHIFT = 4, /* any call branch */ - PERF_SAMPLE_BRANCH_ANY_RETURN_SHIFT = 5, /* any return branch */ - PERF_SAMPLE_BRANCH_IND_CALL_SHIFT = 6, /* indirect calls */ - PERF_SAMPLE_BRANCH_ABORT_TX_SHIFT = 7, /* transaction aborts */ - PERF_SAMPLE_BRANCH_IN_TX_SHIFT = 8, /* in transaction */ - PERF_SAMPLE_BRANCH_NO_TX_SHIFT = 9, /* not in transaction */ + PERF_SAMPLE_BRANCH_USER_SHIFT = 0, /* user branches */ + PERF_SAMPLE_BRANCH_KERNEL_SHIFT = 1, /* kernel branches */ + PERF_SAMPLE_BRANCH_HV_SHIFT = 2, /* hypervisor branches */ + + PERF_SAMPLE_BRANCH_ANY_SHIFT = 3, /* any branch types */ + PERF_SAMPLE_BRANCH_ANY_CALL_SHIFT = 4, /* any call branch */ + PERF_SAMPLE_BRANCH_ANY_RETURN_SHIFT = 5, /* any return branch */ + PERF_SAMPLE_BRANCH_IND_CALL_SHIFT = 6, /* indirect calls */ + PERF_SAMPLE_BRANCH_ABORT_TX_SHIFT = 7, /* transaction aborts */ + PERF_SAMPLE_BRANCH_IN_TX_SHIFT = 8, /* in transaction */ + PERF_SAMPLE_BRANCH_NO_TX_SHIFT = 9, /* not in transaction */ PERF_SAMPLE_BRANCH_COND_SHIFT = 10, /* conditional branches */ - PERF_SAMPLE_BRANCH_CALL_STACK_SHIFT = 11, /* call/ret stack */ + PERF_SAMPLE_BRANCH_CALL_STACK_SHIFT = 11, /* CALL/RET stack */ PERF_SAMPLE_BRANCH_IND_JUMP_SHIFT = 12, /* indirect jumps */ PERF_SAMPLE_BRANCH_CALL_SHIFT = 13, /* direct call */ @@ -210,96 +214,95 @@ enum perf_branch_sample_type_shift { }; enum perf_branch_sample_type { - PERF_SAMPLE_BRANCH_USER = 1U << PERF_SAMPLE_BRANCH_USER_SHIFT, - PERF_SAMPLE_BRANCH_KERNEL = 1U << PERF_SAMPLE_BRANCH_KERNEL_SHIFT, - PERF_SAMPLE_BRANCH_HV = 1U << PERF_SAMPLE_BRANCH_HV_SHIFT, + PERF_SAMPLE_BRANCH_USER = 1U << PERF_SAMPLE_BRANCH_USER_SHIFT, + PERF_SAMPLE_BRANCH_KERNEL = 1U << PERF_SAMPLE_BRANCH_KERNEL_SHIFT, + PERF_SAMPLE_BRANCH_HV = 1U << PERF_SAMPLE_BRANCH_HV_SHIFT, - PERF_SAMPLE_BRANCH_ANY = 1U << PERF_SAMPLE_BRANCH_ANY_SHIFT, - PERF_SAMPLE_BRANCH_ANY_CALL = 1U << PERF_SAMPLE_BRANCH_ANY_CALL_SHIFT, - PERF_SAMPLE_BRANCH_ANY_RETURN = 1U << PERF_SAMPLE_BRANCH_ANY_RETURN_SHIFT, - PERF_SAMPLE_BRANCH_IND_CALL = 1U << PERF_SAMPLE_BRANCH_IND_CALL_SHIFT, - PERF_SAMPLE_BRANCH_ABORT_TX = 1U << PERF_SAMPLE_BRANCH_ABORT_TX_SHIFT, - PERF_SAMPLE_BRANCH_IN_TX = 1U << PERF_SAMPLE_BRANCH_IN_TX_SHIFT, - PERF_SAMPLE_BRANCH_NO_TX = 1U << PERF_SAMPLE_BRANCH_NO_TX_SHIFT, - PERF_SAMPLE_BRANCH_COND = 1U << PERF_SAMPLE_BRANCH_COND_SHIFT, + PERF_SAMPLE_BRANCH_ANY = 1U << PERF_SAMPLE_BRANCH_ANY_SHIFT, + PERF_SAMPLE_BRANCH_ANY_CALL = 1U << PERF_SAMPLE_BRANCH_ANY_CALL_SHIFT, + PERF_SAMPLE_BRANCH_ANY_RETURN = 1U << PERF_SAMPLE_BRANCH_ANY_RETURN_SHIFT, + PERF_SAMPLE_BRANCH_IND_CALL = 1U << PERF_SAMPLE_BRANCH_IND_CALL_SHIFT, + PERF_SAMPLE_BRANCH_ABORT_TX = 1U << PERF_SAMPLE_BRANCH_ABORT_TX_SHIFT, + PERF_SAMPLE_BRANCH_IN_TX = 1U << PERF_SAMPLE_BRANCH_IN_TX_SHIFT, + PERF_SAMPLE_BRANCH_NO_TX = 1U << PERF_SAMPLE_BRANCH_NO_TX_SHIFT, + PERF_SAMPLE_BRANCH_COND = 1U << PERF_SAMPLE_BRANCH_COND_SHIFT, - PERF_SAMPLE_BRANCH_CALL_STACK = 1U << PERF_SAMPLE_BRANCH_CALL_STACK_SHIFT, - PERF_SAMPLE_BRANCH_IND_JUMP = 1U << PERF_SAMPLE_BRANCH_IND_JUMP_SHIFT, - PERF_SAMPLE_BRANCH_CALL = 1U << PERF_SAMPLE_BRANCH_CALL_SHIFT, + PERF_SAMPLE_BRANCH_CALL_STACK = 1U << PERF_SAMPLE_BRANCH_CALL_STACK_SHIFT, + PERF_SAMPLE_BRANCH_IND_JUMP = 1U << PERF_SAMPLE_BRANCH_IND_JUMP_SHIFT, + PERF_SAMPLE_BRANCH_CALL = 1U << PERF_SAMPLE_BRANCH_CALL_SHIFT, - PERF_SAMPLE_BRANCH_NO_FLAGS = 1U << PERF_SAMPLE_BRANCH_NO_FLAGS_SHIFT, - PERF_SAMPLE_BRANCH_NO_CYCLES = 1U << PERF_SAMPLE_BRANCH_NO_CYCLES_SHIFT, + PERF_SAMPLE_BRANCH_NO_FLAGS = 1U << PERF_SAMPLE_BRANCH_NO_FLAGS_SHIFT, + PERF_SAMPLE_BRANCH_NO_CYCLES = 1U << PERF_SAMPLE_BRANCH_NO_CYCLES_SHIFT, - PERF_SAMPLE_BRANCH_TYPE_SAVE = - 1U << PERF_SAMPLE_BRANCH_TYPE_SAVE_SHIFT, + PERF_SAMPLE_BRANCH_TYPE_SAVE = 1U << PERF_SAMPLE_BRANCH_TYPE_SAVE_SHIFT, - PERF_SAMPLE_BRANCH_HW_INDEX = 1U << PERF_SAMPLE_BRANCH_HW_INDEX_SHIFT, + PERF_SAMPLE_BRANCH_HW_INDEX = 1U << PERF_SAMPLE_BRANCH_HW_INDEX_SHIFT, - PERF_SAMPLE_BRANCH_PRIV_SAVE = 1U << PERF_SAMPLE_BRANCH_PRIV_SAVE_SHIFT, + PERF_SAMPLE_BRANCH_PRIV_SAVE = 1U << PERF_SAMPLE_BRANCH_PRIV_SAVE_SHIFT, - PERF_SAMPLE_BRANCH_COUNTERS = 1U << PERF_SAMPLE_BRANCH_COUNTERS_SHIFT, + PERF_SAMPLE_BRANCH_COUNTERS = 1U << PERF_SAMPLE_BRANCH_COUNTERS_SHIFT, - PERF_SAMPLE_BRANCH_MAX = 1U << PERF_SAMPLE_BRANCH_MAX_SHIFT, + PERF_SAMPLE_BRANCH_MAX = 1U << PERF_SAMPLE_BRANCH_MAX_SHIFT, }; /* - * Common flow change classification + * Common control flow change classifications: */ enum { - PERF_BR_UNKNOWN = 0, /* unknown */ - PERF_BR_COND = 1, /* conditional */ - PERF_BR_UNCOND = 2, /* unconditional */ - PERF_BR_IND = 3, /* indirect */ - PERF_BR_CALL = 4, /* function call */ - PERF_BR_IND_CALL = 5, /* indirect function call */ - PERF_BR_RET = 6, /* function return */ - PERF_BR_SYSCALL = 7, /* syscall */ - PERF_BR_SYSRET = 8, /* syscall return */ - PERF_BR_COND_CALL = 9, /* conditional function call */ - PERF_BR_COND_RET = 10, /* conditional function return */ - PERF_BR_ERET = 11, /* exception return */ - PERF_BR_IRQ = 12, /* irq */ - PERF_BR_SERROR = 13, /* system error */ - PERF_BR_NO_TX = 14, /* not in transaction */ - PERF_BR_EXTEND_ABI = 15, /* extend ABI */ + PERF_BR_UNKNOWN = 0, /* Unknown */ + PERF_BR_COND = 1, /* Conditional */ + PERF_BR_UNCOND = 2, /* Unconditional */ + PERF_BR_IND = 3, /* Indirect */ + PERF_BR_CALL = 4, /* Function call */ + PERF_BR_IND_CALL = 5, /* Indirect function call */ + PERF_BR_RET = 6, /* Function return */ + PERF_BR_SYSCALL = 7, /* Syscall */ + PERF_BR_SYSRET = 8, /* Syscall return */ + PERF_BR_COND_CALL = 9, /* Conditional function call */ + PERF_BR_COND_RET = 10, /* Conditional function return */ + PERF_BR_ERET = 11, /* Exception return */ + PERF_BR_IRQ = 12, /* IRQ */ + PERF_BR_SERROR = 13, /* System error */ + PERF_BR_NO_TX = 14, /* Not in transaction */ + PERF_BR_EXTEND_ABI = 15, /* Extend ABI */ PERF_BR_MAX, }; /* - * Common branch speculation outcome classification + * Common branch speculation outcome classifications: */ enum { - PERF_BR_SPEC_NA = 0, /* Not available */ - PERF_BR_SPEC_WRONG_PATH = 1, /* Speculative but on wrong path */ - PERF_BR_NON_SPEC_CORRECT_PATH = 2, /* Non-speculative but on correct path */ - PERF_BR_SPEC_CORRECT_PATH = 3, /* Speculative and on correct path */ + PERF_BR_SPEC_NA = 0, /* Not available */ + PERF_BR_SPEC_WRONG_PATH = 1, /* Speculative but on wrong path */ + PERF_BR_NON_SPEC_CORRECT_PATH = 2, /* Non-speculative but on correct path */ + PERF_BR_SPEC_CORRECT_PATH = 3, /* Speculative and on correct path */ PERF_BR_SPEC_MAX, }; enum { - PERF_BR_NEW_FAULT_ALGN = 0, /* Alignment fault */ - PERF_BR_NEW_FAULT_DATA = 1, /* Data fault */ - PERF_BR_NEW_FAULT_INST = 2, /* Inst fault */ - PERF_BR_NEW_ARCH_1 = 3, /* Architecture specific */ - PERF_BR_NEW_ARCH_2 = 4, /* Architecture specific */ - PERF_BR_NEW_ARCH_3 = 5, /* Architecture specific */ - PERF_BR_NEW_ARCH_4 = 6, /* Architecture specific */ - PERF_BR_NEW_ARCH_5 = 7, /* Architecture specific */ + PERF_BR_NEW_FAULT_ALGN = 0, /* Alignment fault */ + PERF_BR_NEW_FAULT_DATA = 1, /* Data fault */ + PERF_BR_NEW_FAULT_INST = 2, /* Inst fault */ + PERF_BR_NEW_ARCH_1 = 3, /* Architecture specific */ + PERF_BR_NEW_ARCH_2 = 4, /* Architecture specific */ + PERF_BR_NEW_ARCH_3 = 5, /* Architecture specific */ + PERF_BR_NEW_ARCH_4 = 6, /* Architecture specific */ + PERF_BR_NEW_ARCH_5 = 7, /* Architecture specific */ PERF_BR_NEW_MAX, }; enum { - PERF_BR_PRIV_UNKNOWN = 0, - PERF_BR_PRIV_USER = 1, - PERF_BR_PRIV_KERNEL = 2, - PERF_BR_PRIV_HV = 3, + PERF_BR_PRIV_UNKNOWN = 0, + PERF_BR_PRIV_USER = 1, + PERF_BR_PRIV_KERNEL = 2, + PERF_BR_PRIV_HV = 3, }; -#define PERF_BR_ARM64_FIQ PERF_BR_NEW_ARCH_1 -#define PERF_BR_ARM64_DEBUG_HALT PERF_BR_NEW_ARCH_2 -#define PERF_BR_ARM64_DEBUG_EXIT PERF_BR_NEW_ARCH_3 -#define PERF_BR_ARM64_DEBUG_INST PERF_BR_NEW_ARCH_4 -#define PERF_BR_ARM64_DEBUG_DATA PERF_BR_NEW_ARCH_5 +#define PERF_BR_ARM64_FIQ PERF_BR_NEW_ARCH_1 +#define PERF_BR_ARM64_DEBUG_HALT PERF_BR_NEW_ARCH_2 +#define PERF_BR_ARM64_DEBUG_EXIT PERF_BR_NEW_ARCH_3 +#define PERF_BR_ARM64_DEBUG_INST PERF_BR_NEW_ARCH_4 +#define PERF_BR_ARM64_DEBUG_DATA PERF_BR_NEW_ARCH_5 #define PERF_SAMPLE_BRANCH_PLM_ALL \ (PERF_SAMPLE_BRANCH_USER|\ @@ -310,9 +313,9 @@ enum { * Values to determine ABI of the registers dump. */ enum perf_sample_regs_abi { - PERF_SAMPLE_REGS_ABI_NONE = 0, - PERF_SAMPLE_REGS_ABI_32 = 1, - PERF_SAMPLE_REGS_ABI_64 = 2, + PERF_SAMPLE_REGS_ABI_NONE = 0, + PERF_SAMPLE_REGS_ABI_32 = 1, + PERF_SAMPLE_REGS_ABI_64 = 2, }; /* @@ -320,21 +323,21 @@ enum perf_sample_regs_abi { * abort events. Multiple bits can be set. */ enum { - PERF_TXN_ELISION = (1 << 0), /* From elision */ - PERF_TXN_TRANSACTION = (1 << 1), /* From transaction */ - PERF_TXN_SYNC = (1 << 2), /* Instruction is related */ - PERF_TXN_ASYNC = (1 << 3), /* Instruction not related */ - PERF_TXN_RETRY = (1 << 4), /* Retry possible */ - PERF_TXN_CONFLICT = (1 << 5), /* Conflict abort */ - PERF_TXN_CAPACITY_WRITE = (1 << 6), /* Capacity write abort */ - PERF_TXN_CAPACITY_READ = (1 << 7), /* Capacity read abort */ + PERF_TXN_ELISION = (1 << 0), /* From elision */ + PERF_TXN_TRANSACTION = (1 << 1), /* From transaction */ + PERF_TXN_SYNC = (1 << 2), /* Instruction is related */ + PERF_TXN_ASYNC = (1 << 3), /* Instruction is not related */ + PERF_TXN_RETRY = (1 << 4), /* Retry possible */ + PERF_TXN_CONFLICT = (1 << 5), /* Conflict abort */ + PERF_TXN_CAPACITY_WRITE = (1 << 6), /* Capacity write abort */ + PERF_TXN_CAPACITY_READ = (1 << 7), /* Capacity read abort */ - PERF_TXN_MAX = (1 << 8), /* non-ABI */ + PERF_TXN_MAX = (1 << 8), /* non-ABI */ - /* bits 32..63 are reserved for the abort code */ + /* Bits 32..63 are reserved for the abort code */ - PERF_TXN_ABORT_MASK = (0xffffffffULL << 32), - PERF_TXN_ABORT_SHIFT = 32, + PERF_TXN_ABORT_MASK = (0xffffffffULL << 32), + PERF_TXN_ABORT_SHIFT = 32, }; /* @@ -369,22 +372,22 @@ enum perf_event_read_format { PERF_FORMAT_MAX = 1U << 5, /* non-ABI */ }; -#define PERF_ATTR_SIZE_VER0 64 /* sizeof first published struct */ -#define PERF_ATTR_SIZE_VER1 72 /* add: config2 */ -#define PERF_ATTR_SIZE_VER2 80 /* add: branch_sample_type */ -#define PERF_ATTR_SIZE_VER3 96 /* add: sample_regs_user */ - /* add: sample_stack_user */ -#define PERF_ATTR_SIZE_VER4 104 /* add: sample_regs_intr */ -#define PERF_ATTR_SIZE_VER5 112 /* add: aux_watermark */ -#define PERF_ATTR_SIZE_VER6 120 /* add: aux_sample_size */ -#define PERF_ATTR_SIZE_VER7 128 /* add: sig_data */ -#define PERF_ATTR_SIZE_VER8 136 /* add: config3 */ +#define PERF_ATTR_SIZE_VER0 64 /* Size of first published 'struct perf_event_attr' */ +#define PERF_ATTR_SIZE_VER1 72 /* Add: config2 */ +#define PERF_ATTR_SIZE_VER2 80 /* Add: branch_sample_type */ +#define PERF_ATTR_SIZE_VER3 96 /* Add: sample_regs_user */ + /* Add: sample_stack_user */ +#define PERF_ATTR_SIZE_VER4 104 /* Add: sample_regs_intr */ +#define PERF_ATTR_SIZE_VER5 112 /* Add: aux_watermark */ +#define PERF_ATTR_SIZE_VER6 120 /* Add: aux_sample_size */ +#define PERF_ATTR_SIZE_VER7 128 /* Add: sig_data */ +#define PERF_ATTR_SIZE_VER8 136 /* Add: config3 */ /* - * Hardware event_id to monitor via a performance monitoring event: - * - * @sample_max_stack: Max number of frame pointers in a callchain, - * should be < /proc/sys/kernel/perf_event_max_stack + * 'struct perf_event_attr' contains various attributes that define + * a performance event - most of them hardware related configuration + * details, but also a lot of behavioral switches and values implemented + * by the kernel. */ struct perf_event_attr { @@ -394,7 +397,7 @@ struct perf_event_attr { __u32 type; /* - * Size of the attr structure, for fwd/bwd compat. + * Size of the attr structure, for forward/backwards compatibility. */ __u32 size; @@ -449,21 +452,21 @@ struct perf_event_attr { comm_exec : 1, /* flag comm events that are due to an exec */ use_clockid : 1, /* use @clockid for time fields */ context_switch : 1, /* context switch data */ - write_backward : 1, /* Write ring buffer from end to beginning */ + write_backward : 1, /* write ring buffer from end to beginning */ namespaces : 1, /* include namespaces data */ ksymbol : 1, /* include ksymbol events */ - bpf_event : 1, /* include bpf events */ + bpf_event : 1, /* include BPF events */ aux_output : 1, /* generate AUX records instead of events */ cgroup : 1, /* include cgroup events */ text_poke : 1, /* include text poke events */ - build_id : 1, /* use build id in mmap2 events */ + build_id : 1, /* use build ID in mmap2 events */ inherit_thread : 1, /* children only inherit if cloned with CLONE_THREAD */ remove_on_exec : 1, /* event is removed from task on exec */ sigtrap : 1, /* send synchronous SIGTRAP on event */ __reserved_1 : 26; union { - __u32 wakeup_events; /* wakeup every n events */ + __u32 wakeup_events; /* wake up every n events */ __u32 wakeup_watermark; /* bytes before wakeup */ }; @@ -472,13 +475,13 @@ struct perf_event_attr { __u64 bp_addr; __u64 kprobe_func; /* for perf_kprobe */ __u64 uprobe_path; /* for perf_uprobe */ - __u64 config1; /* extension of config */ + __u64 config1; /* extension of config */ }; union { __u64 bp_len; - __u64 kprobe_addr; /* when kprobe_func == NULL */ + __u64 kprobe_addr; /* when kprobe_func == NULL */ __u64 probe_offset; /* for perf_[k,u]probe */ - __u64 config2; /* extension of config1 */ + __u64 config2; /* extension of config1 */ }; __u64 branch_sample_type; /* enum perf_branch_sample_type */ @@ -508,7 +511,16 @@ struct perf_event_attr { * Wakeup watermark for AUX area */ __u32 aux_watermark; + + /* + * Max number of frame pointers in a callchain, should be + * lower than /proc/sys/kernel/perf_event_max_stack. + * + * Max number of entries of branch stack should be lower + * than the hardware limit. + */ __u16 sample_max_stack; + __u16 __reserved_2; __u32 aux_sample_size; @@ -535,7 +547,7 @@ struct perf_event_attr { /* * Structure used by below PERF_EVENT_IOC_QUERY_BPF command - * to query bpf programs attached to the same perf tracepoint + * to query BPF programs attached to the same perf tracepoint * as the given perf event. */ struct perf_event_query_bpf { @@ -557,21 +569,21 @@ struct perf_event_query_bpf { /* * Ioctls that can be done on a perf event fd: */ -#define PERF_EVENT_IOC_ENABLE _IO ('$', 0) -#define PERF_EVENT_IOC_DISABLE _IO ('$', 1) -#define PERF_EVENT_IOC_REFRESH _IO ('$', 2) -#define PERF_EVENT_IOC_RESET _IO ('$', 3) -#define PERF_EVENT_IOC_PERIOD _IOW('$', 4, __u64) -#define PERF_EVENT_IOC_SET_OUTPUT _IO ('$', 5) -#define PERF_EVENT_IOC_SET_FILTER _IOW('$', 6, char *) -#define PERF_EVENT_IOC_ID _IOR('$', 7, __u64 *) -#define PERF_EVENT_IOC_SET_BPF _IOW('$', 8, __u32) -#define PERF_EVENT_IOC_PAUSE_OUTPUT _IOW('$', 9, __u32) +#define PERF_EVENT_IOC_ENABLE _IO ('$', 0) +#define PERF_EVENT_IOC_DISABLE _IO ('$', 1) +#define PERF_EVENT_IOC_REFRESH _IO ('$', 2) +#define PERF_EVENT_IOC_RESET _IO ('$', 3) +#define PERF_EVENT_IOC_PERIOD _IOW ('$', 4, __u64) +#define PERF_EVENT_IOC_SET_OUTPUT _IO ('$', 5) +#define PERF_EVENT_IOC_SET_FILTER _IOW ('$', 6, char *) +#define PERF_EVENT_IOC_ID _IOR ('$', 7, __u64 *) +#define PERF_EVENT_IOC_SET_BPF _IOW ('$', 8, __u32) +#define PERF_EVENT_IOC_PAUSE_OUTPUT _IOW ('$', 9, __u32) #define PERF_EVENT_IOC_QUERY_BPF _IOWR('$', 10, struct perf_event_query_bpf *) -#define PERF_EVENT_IOC_MODIFY_ATTRIBUTES _IOW('$', 11, struct perf_event_attr *) +#define PERF_EVENT_IOC_MODIFY_ATTRIBUTES _IOW ('$', 11, struct perf_event_attr *) enum perf_event_ioc_flags { - PERF_IOC_FLAG_GROUP = 1U << 0, + PERF_IOC_FLAG_GROUP = 1U << 0, }; /* @@ -582,7 +594,7 @@ struct perf_event_mmap_page { __u32 compat_version; /* lowest version this is compat with */ /* - * Bits needed to read the hw events in user-space. + * Bits needed to read the HW events in user-space. * * u32 seq, time_mult, time_shift, index, width; * u64 count, enabled, running; @@ -620,7 +632,7 @@ struct perf_event_mmap_page { __u32 index; /* hardware event identifier */ __s64 offset; /* add to hardware event value */ __u64 time_enabled; /* time event active */ - __u64 time_running; /* time event on cpu */ + __u64 time_running; /* time event on CPU */ union { __u64 capabilities; struct { @@ -648,7 +660,7 @@ struct perf_event_mmap_page { /* * If cap_usr_time the below fields can be used to compute the time - * delta since time_enabled (in ns) using rdtsc or similar. + * delta since time_enabled (in ns) using RDTSC or similar. * * u64 quot, rem; * u64 delta; @@ -721,7 +733,7 @@ struct perf_event_mmap_page { * after reading this value. * * When the mapping is PROT_WRITE the @data_tail value should be - * written by userspace to reflect the last read data, after issueing + * written by user-space to reflect the last read data, after issuing * an smp_mb() to separate the data read from the ->data_tail store. * In this case the kernel will not over-write unread data. * @@ -737,7 +749,7 @@ struct perf_event_mmap_page { /* * AUX area is defined by aux_{offset,size} fields that should be set - * by the userspace, so that + * by the user-space, so that * * aux_offset >= data_offset + data_size * @@ -811,7 +823,7 @@ struct perf_event_mmap_page { * Indicates that thread was preempted in TASK_RUNNING state. * * PERF_RECORD_MISC_MMAP_BUILD_ID: - * Indicates that mmap2 event carries build id data. + * Indicates that mmap2 event carries build ID data. */ #define PERF_RECORD_MISC_EXACT_IP (1 << 14) #define PERF_RECORD_MISC_SWITCH_OUT_PREEMPT (1 << 14) @@ -822,26 +834,26 @@ struct perf_event_mmap_page { #define PERF_RECORD_MISC_EXT_RESERVED (1 << 15) struct perf_event_header { - __u32 type; - __u16 misc; - __u16 size; + __u32 type; + __u16 misc; + __u16 size; }; struct perf_ns_link_info { - __u64 dev; - __u64 ino; + __u64 dev; + __u64 ino; }; enum { - NET_NS_INDEX = 0, - UTS_NS_INDEX = 1, - IPC_NS_INDEX = 2, - PID_NS_INDEX = 3, - USER_NS_INDEX = 4, - MNT_NS_INDEX = 5, - CGROUP_NS_INDEX = 6, - - NR_NAMESPACES, /* number of available namespaces */ + NET_NS_INDEX = 0, + UTS_NS_INDEX = 1, + IPC_NS_INDEX = 2, + PID_NS_INDEX = 3, + USER_NS_INDEX = 4, + MNT_NS_INDEX = 5, + CGROUP_NS_INDEX = 6, + + NR_NAMESPACES, /* number of available namespaces */ }; enum perf_event_type { @@ -857,11 +869,11 @@ enum perf_event_type { * optional fields being ignored. * * struct sample_id { - * { u32 pid, tid; } && PERF_SAMPLE_TID - * { u64 time; } && PERF_SAMPLE_TIME - * { u64 id; } && PERF_SAMPLE_ID - * { u64 stream_id;} && PERF_SAMPLE_STREAM_ID - * { u32 cpu, res; } && PERF_SAMPLE_CPU + * { u32 pid, tid; } && PERF_SAMPLE_TID + * { u64 time; } && PERF_SAMPLE_TIME + * { u64 id; } && PERF_SAMPLE_ID + * { u64 stream_id;} && PERF_SAMPLE_STREAM_ID + * { u32 cpu, res; } && PERF_SAMPLE_CPU * { u64 id; } && PERF_SAMPLE_IDENTIFIER * } && perf_event_attr::sample_id_all * @@ -872,7 +884,7 @@ enum perf_event_type { /* * The MMAP events record the PROT_EXEC mappings so that we can - * correlate userspace IPs to code. They have the following structure: + * correlate user-space IPs to code. They have the following structure: * * struct { * struct perf_event_header header; @@ -882,7 +894,7 @@ enum perf_event_type { * u64 len; * u64 pgoff; * char filename[]; - * struct sample_id sample_id; + * struct sample_id sample_id; * }; */ PERF_RECORD_MMAP = 1, @@ -892,7 +904,7 @@ enum perf_event_type { * struct perf_event_header header; * u64 id; * u64 lost; - * struct sample_id sample_id; + * struct sample_id sample_id; * }; */ PERF_RECORD_LOST = 2, @@ -903,7 +915,7 @@ enum perf_event_type { * * u32 pid, tid; * char comm[]; - * struct sample_id sample_id; + * struct sample_id sample_id; * }; */ PERF_RECORD_COMM = 3, @@ -914,7 +926,7 @@ enum perf_event_type { * u32 pid, ppid; * u32 tid, ptid; * u64 time; - * struct sample_id sample_id; + * struct sample_id sample_id; * }; */ PERF_RECORD_EXIT = 4, @@ -925,7 +937,7 @@ enum perf_event_type { * u64 time; * u64 id; * u64 stream_id; - * struct sample_id sample_id; + * struct sample_id sample_id; * }; */ PERF_RECORD_THROTTLE = 5, @@ -937,7 +949,7 @@ enum perf_event_type { * u32 pid, ppid; * u32 tid, ptid; * u64 time; - * struct sample_id sample_id; + * struct sample_id sample_id; * }; */ PERF_RECORD_FORK = 7, @@ -948,7 +960,7 @@ enum perf_event_type { * u32 pid, tid; * * struct read_format values; - * struct sample_id sample_id; + * struct sample_id sample_id; * }; */ PERF_RECORD_READ = 8, @@ -1003,12 +1015,12 @@ enum perf_event_type { * { u64 counters; } cntr[nr] && PERF_SAMPLE_BRANCH_COUNTERS * } && PERF_SAMPLE_BRANCH_STACK * - * { u64 abi; # enum perf_sample_regs_abi - * u64 regs[weight(mask)]; } && PERF_SAMPLE_REGS_USER + * { u64 abi; # enum perf_sample_regs_abi + * u64 regs[weight(mask)]; } && PERF_SAMPLE_REGS_USER * - * { u64 size; - * char data[size]; - * u64 dyn_size; } && PERF_SAMPLE_STACK_USER + * { u64 size; + * char data[size]; + * u64 dyn_size; } && PERF_SAMPLE_STACK_USER * * { union perf_sample_weight * { @@ -1033,10 +1045,11 @@ enum perf_event_type { * { u64 abi; # enum perf_sample_regs_abi * u64 regs[weight(mask)]; } && PERF_SAMPLE_REGS_INTR * { u64 phys_addr;} && PERF_SAMPLE_PHYS_ADDR - * { u64 size; - * char data[size]; } && PERF_SAMPLE_AUX + * { u64 cgroup;} && PERF_SAMPLE_CGROUP * { u64 data_page_size;} && PERF_SAMPLE_DATA_PAGE_SIZE * { u64 code_page_size;} && PERF_SAMPLE_CODE_PAGE_SIZE + * { u64 size; + * char data[size]; } && PERF_SAMPLE_AUX * }; */ PERF_RECORD_SAMPLE = 9, @@ -1068,7 +1081,7 @@ enum perf_event_type { * }; * u32 prot, flags; * char filename[]; - * struct sample_id sample_id; + * struct sample_id sample_id; * }; */ PERF_RECORD_MMAP2 = 10, @@ -1077,12 +1090,12 @@ enum perf_event_type { * Records that new data landed in the AUX buffer part. * * struct { - * struct perf_event_header header; + * struct perf_event_header header; * - * u64 aux_offset; - * u64 aux_size; + * u64 aux_offset; + * u64 aux_size; * u64 flags; - * struct sample_id sample_id; + * struct sample_id sample_id; * }; */ PERF_RECORD_AUX = 11, @@ -1165,7 +1178,7 @@ enum perf_event_type { PERF_RECORD_KSYMBOL = 17, /* - * Record bpf events: + * Record BPF events: * enum perf_bpf_event_type { * PERF_BPF_EVENT_UNKNOWN = 0, * PERF_BPF_EVENT_PROG_LOAD = 1, @@ -1243,181 +1256,181 @@ enum perf_record_ksymbol_type { #define PERF_RECORD_KSYMBOL_FLAGS_UNREGISTER (1 << 0) enum perf_bpf_event_type { - PERF_BPF_EVENT_UNKNOWN = 0, - PERF_BPF_EVENT_PROG_LOAD = 1, - PERF_BPF_EVENT_PROG_UNLOAD = 2, - PERF_BPF_EVENT_MAX, /* non-ABI */ + PERF_BPF_EVENT_UNKNOWN = 0, + PERF_BPF_EVENT_PROG_LOAD = 1, + PERF_BPF_EVENT_PROG_UNLOAD = 2, + PERF_BPF_EVENT_MAX, /* non-ABI */ }; -#define PERF_MAX_STACK_DEPTH 127 -#define PERF_MAX_CONTEXTS_PER_STACK 8 +#define PERF_MAX_STACK_DEPTH 127 +#define PERF_MAX_CONTEXTS_PER_STACK 8 enum perf_callchain_context { - PERF_CONTEXT_HV = (__u64)-32, - PERF_CONTEXT_KERNEL = (__u64)-128, - PERF_CONTEXT_USER = (__u64)-512, + PERF_CONTEXT_HV = (__u64)-32, + PERF_CONTEXT_KERNEL = (__u64)-128, + PERF_CONTEXT_USER = (__u64)-512, - PERF_CONTEXT_GUEST = (__u64)-2048, - PERF_CONTEXT_GUEST_KERNEL = (__u64)-2176, - PERF_CONTEXT_GUEST_USER = (__u64)-2560, + PERF_CONTEXT_GUEST = (__u64)-2048, + PERF_CONTEXT_GUEST_KERNEL = (__u64)-2176, + PERF_CONTEXT_GUEST_USER = (__u64)-2560, - PERF_CONTEXT_MAX = (__u64)-4095, + PERF_CONTEXT_MAX = (__u64)-4095, }; /** * PERF_RECORD_AUX::flags bits */ -#define PERF_AUX_FLAG_TRUNCATED 0x01 /* record was truncated to fit */ -#define PERF_AUX_FLAG_OVERWRITE 0x02 /* snapshot from overwrite mode */ -#define PERF_AUX_FLAG_PARTIAL 0x04 /* record contains gaps */ -#define PERF_AUX_FLAG_COLLISION 0x08 /* sample collided with another */ +#define PERF_AUX_FLAG_TRUNCATED 0x0001 /* Record was truncated to fit */ +#define PERF_AUX_FLAG_OVERWRITE 0x0002 /* Snapshot from overwrite mode */ +#define PERF_AUX_FLAG_PARTIAL 0x0004 /* Record contains gaps */ +#define PERF_AUX_FLAG_COLLISION 0x0008 /* Sample collided with another */ #define PERF_AUX_FLAG_PMU_FORMAT_TYPE_MASK 0xff00 /* PMU specific trace format type */ /* CoreSight PMU AUX buffer formats */ -#define PERF_AUX_FLAG_CORESIGHT_FORMAT_CORESIGHT 0x0000 /* Default for backward compatibility */ -#define PERF_AUX_FLAG_CORESIGHT_FORMAT_RAW 0x0100 /* Raw format of the source */ +#define PERF_AUX_FLAG_CORESIGHT_FORMAT_CORESIGHT 0x0000 /* Default for backward compatibility */ +#define PERF_AUX_FLAG_CORESIGHT_FORMAT_RAW 0x0100 /* Raw format of the source */ -#define PERF_FLAG_FD_NO_GROUP (1UL << 0) -#define PERF_FLAG_FD_OUTPUT (1UL << 1) -#define PERF_FLAG_PID_CGROUP (1UL << 2) /* pid=cgroup id, per-cpu mode only */ -#define PERF_FLAG_FD_CLOEXEC (1UL << 3) /* O_CLOEXEC */ +#define PERF_FLAG_FD_NO_GROUP (1UL << 0) +#define PERF_FLAG_FD_OUTPUT (1UL << 1) +#define PERF_FLAG_PID_CGROUP (1UL << 2) /* pid=cgroup ID, per-CPU mode only */ +#define PERF_FLAG_FD_CLOEXEC (1UL << 3) /* O_CLOEXEC */ #if defined(__LITTLE_ENDIAN_BITFIELD) union perf_mem_data_src { __u64 val; struct { - __u64 mem_op:5, /* type of opcode */ - mem_lvl:14, /* memory hierarchy level */ - mem_snoop:5, /* snoop mode */ - mem_lock:2, /* lock instr */ - mem_dtlb:7, /* tlb access */ - mem_lvl_num:4, /* memory hierarchy level number */ - mem_remote:1, /* remote */ - mem_snoopx:2, /* snoop mode, ext */ - mem_blk:3, /* access blocked */ - mem_hops:3, /* hop level */ - mem_rsvd:18; + __u64 mem_op : 5, /* Type of opcode */ + mem_lvl : 14, /* Memory hierarchy level */ + mem_snoop : 5, /* Snoop mode */ + mem_lock : 2, /* Lock instr */ + mem_dtlb : 7, /* TLB access */ + mem_lvl_num : 4, /* Memory hierarchy level number */ + mem_remote : 1, /* Remote */ + mem_snoopx : 2, /* Snoop mode, ext */ + mem_blk : 3, /* Access blocked */ + mem_hops : 3, /* Hop level */ + mem_rsvd : 18; }; }; #elif defined(__BIG_ENDIAN_BITFIELD) union perf_mem_data_src { __u64 val; struct { - __u64 mem_rsvd:18, - mem_hops:3, /* hop level */ - mem_blk:3, /* access blocked */ - mem_snoopx:2, /* snoop mode, ext */ - mem_remote:1, /* remote */ - mem_lvl_num:4, /* memory hierarchy level number */ - mem_dtlb:7, /* tlb access */ - mem_lock:2, /* lock instr */ - mem_snoop:5, /* snoop mode */ - mem_lvl:14, /* memory hierarchy level */ - mem_op:5; /* type of opcode */ + __u64 mem_rsvd : 18, + mem_hops : 3, /* Hop level */ + mem_blk : 3, /* Access blocked */ + mem_snoopx : 2, /* Snoop mode, ext */ + mem_remote : 1, /* Remote */ + mem_lvl_num : 4, /* Memory hierarchy level number */ + mem_dtlb : 7, /* TLB access */ + mem_lock : 2, /* Lock instr */ + mem_snoop : 5, /* Snoop mode */ + mem_lvl : 14, /* Memory hierarchy level */ + mem_op : 5; /* Type of opcode */ }; }; #else -#error "Unknown endianness" +# error "Unknown endianness" #endif -/* type of opcode (load/store/prefetch,code) */ -#define PERF_MEM_OP_NA 0x01 /* not available */ -#define PERF_MEM_OP_LOAD 0x02 /* load instruction */ -#define PERF_MEM_OP_STORE 0x04 /* store instruction */ -#define PERF_MEM_OP_PFETCH 0x08 /* prefetch */ -#define PERF_MEM_OP_EXEC 0x10 /* code (execution) */ -#define PERF_MEM_OP_SHIFT 0 +/* Type of memory opcode: */ +#define PERF_MEM_OP_NA 0x0001 /* Not available */ +#define PERF_MEM_OP_LOAD 0x0002 /* Load instruction */ +#define PERF_MEM_OP_STORE 0x0004 /* Store instruction */ +#define PERF_MEM_OP_PFETCH 0x0008 /* Prefetch */ +#define PERF_MEM_OP_EXEC 0x0010 /* Code (execution) */ +#define PERF_MEM_OP_SHIFT 0 /* - * PERF_MEM_LVL_* namespace being depricated to some extent in the + * The PERF_MEM_LVL_* namespace is being deprecated to some extent in * favour of newer composite PERF_MEM_{LVLNUM_,REMOTE_,SNOOPX_} fields. - * Supporting this namespace inorder to not break defined ABIs. + * We support this namespace in order to not break defined ABIs. * - * memory hierarchy (memory level, hit or miss) + * Memory hierarchy (memory level, hit or miss) */ -#define PERF_MEM_LVL_NA 0x01 /* not available */ -#define PERF_MEM_LVL_HIT 0x02 /* hit level */ -#define PERF_MEM_LVL_MISS 0x04 /* miss level */ -#define PERF_MEM_LVL_L1 0x08 /* L1 */ -#define PERF_MEM_LVL_LFB 0x10 /* Line Fill Buffer */ -#define PERF_MEM_LVL_L2 0x20 /* L2 */ -#define PERF_MEM_LVL_L3 0x40 /* L3 */ -#define PERF_MEM_LVL_LOC_RAM 0x80 /* Local DRAM */ -#define PERF_MEM_LVL_REM_RAM1 0x100 /* Remote DRAM (1 hop) */ -#define PERF_MEM_LVL_REM_RAM2 0x200 /* Remote DRAM (2 hops) */ -#define PERF_MEM_LVL_REM_CCE1 0x400 /* Remote Cache (1 hop) */ -#define PERF_MEM_LVL_REM_CCE2 0x800 /* Remote Cache (2 hops) */ -#define PERF_MEM_LVL_IO 0x1000 /* I/O memory */ -#define PERF_MEM_LVL_UNC 0x2000 /* Uncached memory */ -#define PERF_MEM_LVL_SHIFT 5 - -#define PERF_MEM_REMOTE_REMOTE 0x01 /* Remote */ -#define PERF_MEM_REMOTE_SHIFT 37 - -#define PERF_MEM_LVLNUM_L1 0x01 /* L1 */ -#define PERF_MEM_LVLNUM_L2 0x02 /* L2 */ -#define PERF_MEM_LVLNUM_L3 0x03 /* L3 */ -#define PERF_MEM_LVLNUM_L4 0x04 /* L4 */ -#define PERF_MEM_LVLNUM_L2_MHB 0x05 /* L2 Miss Handling Buffer */ -#define PERF_MEM_LVLNUM_MSC 0x06 /* Memory-side Cache */ -/* 0x7 available */ -#define PERF_MEM_LVLNUM_UNC 0x08 /* Uncached */ -#define PERF_MEM_LVLNUM_CXL 0x09 /* CXL */ -#define PERF_MEM_LVLNUM_IO 0x0a /* I/O */ -#define PERF_MEM_LVLNUM_ANY_CACHE 0x0b /* Any cache */ -#define PERF_MEM_LVLNUM_LFB 0x0c /* LFB / L1 Miss Handling Buffer */ -#define PERF_MEM_LVLNUM_RAM 0x0d /* RAM */ -#define PERF_MEM_LVLNUM_PMEM 0x0e /* PMEM */ -#define PERF_MEM_LVLNUM_NA 0x0f /* N/A */ - -#define PERF_MEM_LVLNUM_SHIFT 33 - -/* snoop mode */ -#define PERF_MEM_SNOOP_NA 0x01 /* not available */ -#define PERF_MEM_SNOOP_NONE 0x02 /* no snoop */ -#define PERF_MEM_SNOOP_HIT 0x04 /* snoop hit */ -#define PERF_MEM_SNOOP_MISS 0x08 /* snoop miss */ -#define PERF_MEM_SNOOP_HITM 0x10 /* snoop hit modified */ -#define PERF_MEM_SNOOP_SHIFT 19 - -#define PERF_MEM_SNOOPX_FWD 0x01 /* forward */ -#define PERF_MEM_SNOOPX_PEER 0x02 /* xfer from peer */ -#define PERF_MEM_SNOOPX_SHIFT 38 - -/* locked instruction */ -#define PERF_MEM_LOCK_NA 0x01 /* not available */ -#define PERF_MEM_LOCK_LOCKED 0x02 /* locked transaction */ -#define PERF_MEM_LOCK_SHIFT 24 +#define PERF_MEM_LVL_NA 0x0001 /* Not available */ +#define PERF_MEM_LVL_HIT 0x0002 /* Hit level */ +#define PERF_MEM_LVL_MISS 0x0004 /* Miss level */ +#define PERF_MEM_LVL_L1 0x0008 /* L1 */ +#define PERF_MEM_LVL_LFB 0x0010 /* Line Fill Buffer */ +#define PERF_MEM_LVL_L2 0x0020 /* L2 */ +#define PERF_MEM_LVL_L3 0x0040 /* L3 */ +#define PERF_MEM_LVL_LOC_RAM 0x0080 /* Local DRAM */ +#define PERF_MEM_LVL_REM_RAM1 0x0100 /* Remote DRAM (1 hop) */ +#define PERF_MEM_LVL_REM_RAM2 0x0200 /* Remote DRAM (2 hops) */ +#define PERF_MEM_LVL_REM_CCE1 0x0400 /* Remote Cache (1 hop) */ +#define PERF_MEM_LVL_REM_CCE2 0x0800 /* Remote Cache (2 hops) */ +#define PERF_MEM_LVL_IO 0x1000 /* I/O memory */ +#define PERF_MEM_LVL_UNC 0x2000 /* Uncached memory */ +#define PERF_MEM_LVL_SHIFT 5 + +#define PERF_MEM_REMOTE_REMOTE 0x0001 /* Remote */ +#define PERF_MEM_REMOTE_SHIFT 37 + +#define PERF_MEM_LVLNUM_L1 0x0001 /* L1 */ +#define PERF_MEM_LVLNUM_L2 0x0002 /* L2 */ +#define PERF_MEM_LVLNUM_L3 0x0003 /* L3 */ +#define PERF_MEM_LVLNUM_L4 0x0004 /* L4 */ +#define PERF_MEM_LVLNUM_L2_MHB 0x0005 /* L2 Miss Handling Buffer */ +#define PERF_MEM_LVLNUM_MSC 0x0006 /* Memory-side Cache */ +/* 0x007 available */ +#define PERF_MEM_LVLNUM_UNC 0x0008 /* Uncached */ +#define PERF_MEM_LVLNUM_CXL 0x0009 /* CXL */ +#define PERF_MEM_LVLNUM_IO 0x000a /* I/O */ +#define PERF_MEM_LVLNUM_ANY_CACHE 0x000b /* Any cache */ +#define PERF_MEM_LVLNUM_LFB 0x000c /* LFB / L1 Miss Handling Buffer */ +#define PERF_MEM_LVLNUM_RAM 0x000d /* RAM */ +#define PERF_MEM_LVLNUM_PMEM 0x000e /* PMEM */ +#define PERF_MEM_LVLNUM_NA 0x000f /* N/A */ + +#define PERF_MEM_LVLNUM_SHIFT 33 + +/* Snoop mode */ +#define PERF_MEM_SNOOP_NA 0x0001 /* Not available */ +#define PERF_MEM_SNOOP_NONE 0x0002 /* No snoop */ +#define PERF_MEM_SNOOP_HIT 0x0004 /* Snoop hit */ +#define PERF_MEM_SNOOP_MISS 0x0008 /* Snoop miss */ +#define PERF_MEM_SNOOP_HITM 0x0010 /* Snoop hit modified */ +#define PERF_MEM_SNOOP_SHIFT 19 + +#define PERF_MEM_SNOOPX_FWD 0x0001 /* Forward */ +#define PERF_MEM_SNOOPX_PEER 0x0002 /* Transfer from peer */ +#define PERF_MEM_SNOOPX_SHIFT 38 + +/* Locked instruction */ +#define PERF_MEM_LOCK_NA 0x0001 /* Not available */ +#define PERF_MEM_LOCK_LOCKED 0x0002 /* Locked transaction */ +#define PERF_MEM_LOCK_SHIFT 24 /* TLB access */ -#define PERF_MEM_TLB_NA 0x01 /* not available */ -#define PERF_MEM_TLB_HIT 0x02 /* hit level */ -#define PERF_MEM_TLB_MISS 0x04 /* miss level */ -#define PERF_MEM_TLB_L1 0x08 /* L1 */ -#define PERF_MEM_TLB_L2 0x10 /* L2 */ -#define PERF_MEM_TLB_WK 0x20 /* Hardware Walker*/ -#define PERF_MEM_TLB_OS 0x40 /* OS fault handler */ -#define PERF_MEM_TLB_SHIFT 26 +#define PERF_MEM_TLB_NA 0x0001 /* Not available */ +#define PERF_MEM_TLB_HIT 0x0002 /* Hit level */ +#define PERF_MEM_TLB_MISS 0x0004 /* Miss level */ +#define PERF_MEM_TLB_L1 0x0008 /* L1 */ +#define PERF_MEM_TLB_L2 0x0010 /* L2 */ +#define PERF_MEM_TLB_WK 0x0020 /* Hardware Walker*/ +#define PERF_MEM_TLB_OS 0x0040 /* OS fault handler */ +#define PERF_MEM_TLB_SHIFT 26 /* Access blocked */ -#define PERF_MEM_BLK_NA 0x01 /* not available */ -#define PERF_MEM_BLK_DATA 0x02 /* data could not be forwarded */ -#define PERF_MEM_BLK_ADDR 0x04 /* address conflict */ -#define PERF_MEM_BLK_SHIFT 40 - -/* hop level */ -#define PERF_MEM_HOPS_0 0x01 /* remote core, same node */ -#define PERF_MEM_HOPS_1 0x02 /* remote node, same socket */ -#define PERF_MEM_HOPS_2 0x03 /* remote socket, same board */ -#define PERF_MEM_HOPS_3 0x04 /* remote board */ +#define PERF_MEM_BLK_NA 0x0001 /* Not available */ +#define PERF_MEM_BLK_DATA 0x0002 /* Data could not be forwarded */ +#define PERF_MEM_BLK_ADDR 0x0004 /* Address conflict */ +#define PERF_MEM_BLK_SHIFT 40 + +/* Hop level */ +#define PERF_MEM_HOPS_0 0x0001 /* Remote core, same node */ +#define PERF_MEM_HOPS_1 0x0002 /* Remote node, same socket */ +#define PERF_MEM_HOPS_2 0x0003 /* Remote socket, same board */ +#define PERF_MEM_HOPS_3 0x0004 /* Remote board */ /* 5-7 available */ -#define PERF_MEM_HOPS_SHIFT 43 +#define PERF_MEM_HOPS_SHIFT 43 #define PERF_MEM_S(a, s) \ (((__u64)PERF_MEM_##a##_##s) << PERF_MEM_##a##_SHIFT) /* - * single taken branch record layout: + * Layout of single taken branch records: * * from: source instruction (may not always be a branch insn) * to: branch target @@ -1436,37 +1449,37 @@ union perf_mem_data_src { struct perf_branch_entry { __u64 from; __u64 to; - __u64 mispred:1, /* target mispredicted */ - predicted:1,/* target predicted */ - in_tx:1, /* in transaction */ - abort:1, /* transaction abort */ - cycles:16, /* cycle count to last branch */ - type:4, /* branch type */ - spec:2, /* branch speculation info */ - new_type:4, /* additional branch type */ - priv:3, /* privilege level */ - reserved:31; + __u64 mispred : 1, /* target mispredicted */ + predicted : 1, /* target predicted */ + in_tx : 1, /* in transaction */ + abort : 1, /* transaction abort */ + cycles : 16, /* cycle count to last branch */ + type : 4, /* branch type */ + spec : 2, /* branch speculation info */ + new_type : 4, /* additional branch type */ + priv : 3, /* privilege level */ + reserved : 31; }; /* Size of used info bits in struct perf_branch_entry */ #define PERF_BRANCH_ENTRY_INFO_BITS_MAX 33 union perf_sample_weight { - __u64 full; + __u64 full; #if defined(__LITTLE_ENDIAN_BITFIELD) struct { - __u32 var1_dw; - __u16 var2_w; - __u16 var3_w; + __u32 var1_dw; + __u16 var2_w; + __u16 var3_w; }; #elif defined(__BIG_ENDIAN_BITFIELD) struct { - __u16 var3_w; - __u16 var2_w; - __u32 var1_dw; + __u16 var3_w; + __u16 var2_w; + __u32 var1_dw; }; #else -#error "Unknown endianness" +# error "Unknown endianness" #endif }; diff --git a/tools/include/uapi/linux/prctl.h b/tools/include/uapi/linux/prctl.h index 35791791a879..43dec6eed559 100644 --- a/tools/include/uapi/linux/prctl.h +++ b/tools/include/uapi/linux/prctl.h @@ -230,7 +230,7 @@ struct prctl_mm_map { # define PR_PAC_APDBKEY (1UL << 3) # define PR_PAC_APGAKEY (1UL << 4) -/* Tagged user address controls for arm64 */ +/* Tagged user address controls for arm64 and RISC-V */ #define PR_SET_TAGGED_ADDR_CTRL 55 #define PR_GET_TAGGED_ADDR_CTRL 56 # define PR_TAGGED_ADDR_ENABLE (1UL << 0) @@ -244,6 +244,9 @@ struct prctl_mm_map { # define PR_MTE_TAG_MASK (0xffffUL << PR_MTE_TAG_SHIFT) /* Unused; kept only for source compatibility */ # define PR_MTE_TCF_SHIFT 1 +/* RISC-V pointer masking tag length */ +# define PR_PMLEN_SHIFT 24 +# define PR_PMLEN_MASK (0x7fUL << PR_PMLEN_SHIFT) /* Control reclaim behavior when allocating memory */ #define PR_SET_IO_FLUSHER 57 @@ -328,4 +331,44 @@ struct prctl_mm_map { # define PR_PPC_DEXCR_CTRL_CLEAR_ONEXEC 0x10 /* Clear the aspect on exec */ # define PR_PPC_DEXCR_CTRL_MASK 0x1f +/* + * Get the current shadow stack configuration for the current thread, + * this will be the value configured via PR_SET_SHADOW_STACK_STATUS. + */ +#define PR_GET_SHADOW_STACK_STATUS 74 + +/* + * Set the current shadow stack configuration. Enabling the shadow + * stack will cause a shadow stack to be allocated for the thread. + */ +#define PR_SET_SHADOW_STACK_STATUS 75 +# define PR_SHADOW_STACK_ENABLE (1UL << 0) +# define PR_SHADOW_STACK_WRITE (1UL << 1) +# define PR_SHADOW_STACK_PUSH (1UL << 2) + +/* + * Prevent further changes to the specified shadow stack + * configuration. All bits may be locked via this call, including + * undefined bits. + */ +#define PR_LOCK_SHADOW_STACK_STATUS 76 + +/* + * Controls the mode of timer_create() for CRIU restore operations. + * Enabling this allows CRIU to restore timers with explicit IDs. + * + * Don't use for normal operations as the result might be undefined. + */ +#define PR_TIMER_CREATE_RESTORE_IDS 77 +# define PR_TIMER_CREATE_RESTORE_IDS_OFF 0 +# define PR_TIMER_CREATE_RESTORE_IDS_ON 1 +# define PR_TIMER_CREATE_RESTORE_IDS_GET 2 + +/* FUTEX hash management */ +#define PR_FUTEX_HASH 78 +# define PR_FUTEX_HASH_SET_SLOTS 1 +# define FH_FLAG_IMMUTABLE (1ULL << 0) +# define PR_FUTEX_HASH_GET_SLOTS 2 +# define PR_FUTEX_HASH_GET_IMMUTABLE 3 + #endif /* _LINUX_PRCTL_H */ diff --git a/tools/include/uapi/linux/stat.h b/tools/include/uapi/linux/stat.h index 887a25286441..f78ee3670dd5 100644 --- a/tools/include/uapi/linux/stat.h +++ b/tools/include/uapi/linux/stat.h @@ -98,43 +98,93 @@ struct statx_timestamp { */ struct statx { /* 0x00 */ - __u32 stx_mask; /* What results were written [uncond] */ - __u32 stx_blksize; /* Preferred general I/O size [uncond] */ - __u64 stx_attributes; /* Flags conveying information about the file [uncond] */ + /* What results were written [uncond] */ + __u32 stx_mask; + + /* Preferred general I/O size [uncond] */ + __u32 stx_blksize; + + /* Flags conveying information about the file [uncond] */ + __u64 stx_attributes; + /* 0x10 */ - __u32 stx_nlink; /* Number of hard links */ - __u32 stx_uid; /* User ID of owner */ - __u32 stx_gid; /* Group ID of owner */ - __u16 stx_mode; /* File mode */ + /* Number of hard links */ + __u32 stx_nlink; + + /* User ID of owner */ + __u32 stx_uid; + + /* Group ID of owner */ + __u32 stx_gid; + + /* File mode */ + __u16 stx_mode; __u16 __spare0[1]; + /* 0x20 */ - __u64 stx_ino; /* Inode number */ - __u64 stx_size; /* File size */ - __u64 stx_blocks; /* Number of 512-byte blocks allocated */ - __u64 stx_attributes_mask; /* Mask to show what's supported in stx_attributes */ + /* Inode number */ + __u64 stx_ino; + + /* File size */ + __u64 stx_size; + + /* Number of 512-byte blocks allocated */ + __u64 stx_blocks; + + /* Mask to show what's supported in stx_attributes */ + __u64 stx_attributes_mask; + /* 0x40 */ - struct statx_timestamp stx_atime; /* Last access time */ - struct statx_timestamp stx_btime; /* File creation time */ - struct statx_timestamp stx_ctime; /* Last attribute change time */ - struct statx_timestamp stx_mtime; /* Last data modification time */ + /* Last access time */ + struct statx_timestamp stx_atime; + + /* File creation time */ + struct statx_timestamp stx_btime; + + /* Last attribute change time */ + struct statx_timestamp stx_ctime; + + /* Last data modification time */ + struct statx_timestamp stx_mtime; + /* 0x80 */ - __u32 stx_rdev_major; /* Device ID of special file [if bdev/cdev] */ + /* Device ID of special file [if bdev/cdev] */ + __u32 stx_rdev_major; __u32 stx_rdev_minor; - __u32 stx_dev_major; /* ID of device containing file [uncond] */ + + /* ID of device containing file [uncond] */ + __u32 stx_dev_major; __u32 stx_dev_minor; + /* 0x90 */ __u64 stx_mnt_id; - __u32 stx_dio_mem_align; /* Memory buffer alignment for direct I/O */ - __u32 stx_dio_offset_align; /* File offset alignment for direct I/O */ + + /* Memory buffer alignment for direct I/O */ + __u32 stx_dio_mem_align; + + /* File offset alignment for direct I/O */ + __u32 stx_dio_offset_align; + /* 0xa0 */ - __u64 stx_subvol; /* Subvolume identifier */ - __u32 stx_atomic_write_unit_min; /* Min atomic write unit in bytes */ - __u32 stx_atomic_write_unit_max; /* Max atomic write unit in bytes */ + /* Subvolume identifier */ + __u64 stx_subvol; + + /* Min atomic write unit in bytes */ + __u32 stx_atomic_write_unit_min; + + /* Max atomic write unit in bytes */ + __u32 stx_atomic_write_unit_max; + /* 0xb0 */ - __u32 stx_atomic_write_segments_max; /* Max atomic write segment count */ - __u32 __spare1[1]; + /* Max atomic write segment count */ + __u32 stx_atomic_write_segments_max; + + /* File offset alignment for direct I/O reads */ + __u32 stx_dio_read_offset_align; + /* 0xb8 */ __u64 __spare3[9]; /* Spare space for future expansion */ + /* 0x100 */ }; @@ -164,6 +214,7 @@ struct statx { #define STATX_MNT_ID_UNIQUE 0x00004000U /* Want/got extended stx_mount_id */ #define STATX_SUBVOL 0x00008000U /* Want/got stx_subvol */ #define STATX_WRITE_ATOMIC 0x00010000U /* Want/got atomic_write_* fields */ +#define STATX_DIO_READ_ALIGN 0x00020000U /* Want/got dio read alignment info */ #define STATX__RESERVED 0x80000000U /* Reserved for future struct statx expansion */ diff --git a/tools/lib/perf/Makefile b/tools/lib/perf/Makefile index ffcfd777c451..7fbb50b74c00 100644 --- a/tools/lib/perf/Makefile +++ b/tools/lib/perf/Makefile @@ -42,6 +42,7 @@ libdir_relative_SQ = $(subst ','\'',$(libdir_relative)) TEST_ARGS := $(if $(V),-v) INCLUDES = \ +-I$(OUTPUT)arch/$(SRCARCH)/include/generated/uapi \ -I$(srctree)/tools/lib/perf/include \ -I$(srctree)/tools/lib/ \ -I$(srctree)/tools/include \ @@ -99,7 +100,16 @@ $(LIBAPI)-clean: $(call QUIET_CLEAN, libapi) $(Q)$(MAKE) -C $(LIB_DIR) O=$(OUTPUT) clean >/dev/null -$(LIBPERF_IN): FORCE +uapi-asm := $(OUTPUT)arch/$(SRCARCH)/include/generated/uapi/asm +ifeq ($(SRCARCH),arm64) + syscall-y := $(uapi-asm)/unistd_64.h +endif +uapi-asm-generic: + $(if $(syscall-y),\ + $(Q)$(MAKE) -f $(srctree)/scripts/Makefile.asm-headers obj=$(uapi-asm) \ + generic=include/uapi/asm-generic $(syscall-y),) + +$(LIBPERF_IN): uapi-asm-generic FORCE $(Q)$(MAKE) $(build)=libperf $(LIBPERF_A): $(LIBPERF_IN) @@ -120,7 +130,7 @@ all: fixdep clean: $(LIBAPI)-clean $(call QUIET_CLEAN, libperf) $(RM) $(LIBPERF_A) \ *.o *~ *.a *.so *.so.$(VERSION) *.so.$(LIBPERF_VERSION) .*.d .*.cmd tests/*.o LIBPERF-CFLAGS $(LIBPERF_PC) \ - $(TESTS_STATIC) $(TESTS_SHARED) + $(TESTS_STATIC) $(TESTS_SHARED) $(syscall-y) TESTS_IN = tests-in.o diff --git a/tools/net/ynl/lib/ynl.c b/tools/net/ynl/lib/ynl.c index ce32cb35007d..c4da34048ef8 100644 --- a/tools/net/ynl/lib/ynl.c +++ b/tools/net/ynl/lib/ynl.c @@ -364,7 +364,7 @@ int ynl_attr_validate(struct ynl_parse_arg *yarg, const struct nlattr *attr) "Invalid attribute (binary %s)", policy->name); return -1; case YNL_PT_NUL_STR: - if ((!policy->len || len <= policy->len) && !data[len - 1]) + if (len && (!policy->len || len <= policy->len) && !data[len - 1]) break; yerr(yarg->ys, YNL_ERROR_ATTR_INVALID, "Invalid attribute (string %s)", policy->name); diff --git a/tools/net/ynl/pyynl/ethtool.py b/tools/net/ynl/pyynl/ethtool.py index af7fddd7b085..cab6b576c876 100755 --- a/tools/net/ynl/pyynl/ethtool.py +++ b/tools/net/ynl/pyynl/ethtool.py @@ -338,16 +338,24 @@ def main(): print('Capabilities:') [print(f'\t{v}') for v in bits_to_dict(tsinfo['timestamping'])] - print(f'PTP Hardware Clock: {tsinfo["phc-index"]}') + print(f'PTP Hardware Clock: {tsinfo.get("phc-index", "none")}') - print('Hardware Transmit Timestamp Modes:') - [print(f'\t{v}') for v in bits_to_dict(tsinfo['tx-types'])] + if 'tx-types' in tsinfo: + print('Hardware Transmit Timestamp Modes:') + [print(f'\t{v}') for v in bits_to_dict(tsinfo['tx-types'])] + else: + print('Hardware Transmit Timestamp Modes: none') + + if 'rx-filters' in tsinfo: + print('Hardware Receive Filter Modes:') + [print(f'\t{v}') for v in bits_to_dict(tsinfo['rx-filters'])] + else: + print('Hardware Receive Filter Modes: none') - print('Hardware Receive Filter Modes:') - [print(f'\t{v}') for v in bits_to_dict(tsinfo['rx-filters'])] + if 'stats' in tsinfo and tsinfo['stats']: + print('Statistics:') + [print(f'\t{k}: {v}') for k, v in tsinfo['stats'].items()] - print('Statistics:') - [print(f'\t{k}: {v}') for k, v in tsinfo['stats'].items()] return print(f'Settings for {args.device}:') diff --git a/tools/net/ynl/pyynl/ynl_gen_c.py b/tools/net/ynl/pyynl/ynl_gen_c.py index a1427c537030..a7f08edbc235 100755 --- a/tools/net/ynl/pyynl/ynl_gen_c.py +++ b/tools/net/ynl/pyynl/ynl_gen_c.py @@ -162,9 +162,15 @@ class Type(SpecAttr): def free_needs_iter(self): return False - def free(self, ri, var, ref): + def _free_lines(self, ri, var, ref): if self.is_multi_val() or self.presence_type() == 'len': - ri.cw.p(f'free({var}->{ref}{self.c_name});') + return [f'free({var}->{ref}{self.c_name});'] + return [] + + def free(self, ri, var, ref): + lines = self._free_lines(ri, var, ref) + for line in lines: + ri.cw.p(line) def arg_member(self, ri): member = self._complex_member_type(ri) @@ -263,6 +269,10 @@ class Type(SpecAttr): var = "req" member = f"{var}->{'.'.join(ref)}" + local_vars = [] + if self.free_needs_iter(): + local_vars += ['unsigned int i;'] + code = [] presence = '' for i in range(0, len(ref)): @@ -272,6 +282,10 @@ class Type(SpecAttr): if i == len(ref) - 1 and self.presence_type() != 'bit': continue code.append(presence + ' = 1;') + ref_path = '.'.join(ref[:-1]) + if ref_path: + ref_path += '.' + code += self._free_lines(ri, var, ref_path) code += self._setter_lines(ri, member, presence) func_name = f"{op_prefix(ri, direction, deref=deref)}_set_{'_'.join(ref)}" @@ -279,7 +293,8 @@ class Type(SpecAttr): alloc = bool([x for x in code if 'alloc(' in x]) if free and not alloc: func_name = '__' + func_name - ri.cw.write_func('static inline void', func_name, body=code, + ri.cw.write_func('static inline void', func_name, local_vars=local_vars, + body=code, args=[f'{type_name(ri, direction, deref=deref)} *{var}'] + self.arg_member(ri)) @@ -482,8 +497,7 @@ class TypeString(Type): ['unsigned int len;'] def _setter_lines(self, ri, member, presence): - return [f"free({member});", - f"{presence}_len = strlen({self.c_name});", + return [f"{presence}_len = strlen({self.c_name});", f"{member} = malloc({presence}_len + 1);", f'memcpy({member}, {self.c_name}, {presence}_len);', f'{member}[{presence}_len] = 0;'] @@ -536,8 +550,7 @@ class TypeBinary(Type): ['unsigned int len;'] def _setter_lines(self, ri, member, presence): - return [f"free({member});", - f"{presence}_len = len;", + return [f"{presence}_len = len;", f"{member} = malloc({presence}_len);", f'memcpy({member}, {self.c_name}, {presence}_len);'] @@ -574,12 +587,14 @@ class TypeNest(Type): def _complex_member_type(self, ri): return self.nested_struct_type - def free(self, ri, var, ref): + def _free_lines(self, ri, var, ref): + lines = [] at = '&' if self.is_recursive_for_op(ri): at = '' - ri.cw.p(f'if ({var}->{ref}{self.c_name})') - ri.cw.p(f'{self.nested_render_name}_free({at}{var}->{ref}{self.c_name});') + lines += [f'if ({var}->{ref}{self.c_name})'] + lines += [f'{self.nested_render_name}_free({at}{var}->{ref}{self.c_name});'] + return lines def _attr_typol(self): return f'.type = YNL_PT_NEST, .nest = &{self.nested_render_name}_nest, ' @@ -632,15 +647,19 @@ class TypeMultiAttr(Type): def free_needs_iter(self): return 'type' not in self.attr or self.attr['type'] == 'nest' - def free(self, ri, var, ref): + def _free_lines(self, ri, var, ref): + lines = [] if self.attr['type'] in scalars: - ri.cw.p(f"free({var}->{ref}{self.c_name});") + lines += [f"free({var}->{ref}{self.c_name});"] elif 'type' not in self.attr or self.attr['type'] == 'nest': - ri.cw.p(f"for (i = 0; i < {var}->{ref}n_{self.c_name}; i++)") - ri.cw.p(f'{self.nested_render_name}_free(&{var}->{ref}{self.c_name}[i]);') - ri.cw.p(f"free({var}->{ref}{self.c_name});") + lines += [ + f"for (i = 0; i < {var}->{ref}n_{self.c_name}; i++)", + f'{self.nested_render_name}_free(&{var}->{ref}{self.c_name}[i]);', + f"free({var}->{ref}{self.c_name});", + ] else: raise Exception(f"Free of MultiAttr sub-type {self.attr['type']} not supported yet") + return lines def _attr_policy(self, policy): return self.base_type._attr_policy(policy) @@ -654,10 +673,10 @@ class TypeMultiAttr(Type): def attr_put(self, ri, var): if self.attr['type'] in scalars: put_type = self.type - ri.cw.p(f"for (unsigned int i = 0; i < {var}->n_{self.c_name}; i++)") + ri.cw.p(f"for (i = 0; i < {var}->n_{self.c_name}; i++)") ri.cw.p(f"ynl_attr_put_{put_type}(nlh, {self.enum_name}, {var}->{self.c_name}[i]);") elif 'type' not in self.attr or self.attr['type'] == 'nest': - ri.cw.p(f"for (unsigned int i = 0; i < {var}->n_{self.c_name}; i++)") + ri.cw.p(f"for (i = 0; i < {var}->n_{self.c_name}; i++)") self._attr_put_line(ri, var, f"{self.nested_render_name}_put(nlh, " + f"{self.enum_name}, &{var}->{self.c_name}[i])") else: @@ -666,8 +685,7 @@ class TypeMultiAttr(Type): def _setter_lines(self, ri, member, presence): # For multi-attr we have a count, not presence, hack up the presence presence = presence[:-(len('_present.') + len(self.c_name))] + "n_" + self.c_name - return [f"free({member});", - f"{member} = {self.c_name};", + return [f"{member} = {self.c_name};", f"{presence} = n_{self.c_name};"] @@ -696,8 +714,11 @@ class TypeArrayNest(Type): def _attr_get(self, ri, var): local_vars = ['const struct nlattr *attr2;'] get_lines = [f'attr_{self.c_name} = attr;', - 'ynl_attr_for_each_nested(attr2, attr)', - f'\t{var}->n_{self.c_name}++;'] + 'ynl_attr_for_each_nested(attr2, attr) {', + '\tif (ynl_attr_validate(yarg, attr2))', + '\t\treturn YNL_PARSE_CB_ERROR;', + f'\t{var}->n_{self.c_name}++;', + '}'] return get_lines, None, local_vars @@ -755,6 +776,7 @@ class Struct: self.request = False self.reply = False self.recursive = False + self.in_multi_val = False # used by a MultiAttr or and legacy arrays self.attr_list = [] self.attrs = dict() @@ -1121,6 +1143,9 @@ class Family(SpecFamily): self.pure_nested_structs[nested].request = True if attr in rs_members['reply']: self.pure_nested_structs[nested].reply = True + if spec.is_multi_val(): + child = self.pure_nested_structs.get(nested) + child.in_multi_val = True self._sort_pure_types() @@ -1136,6 +1161,8 @@ class Family(SpecFamily): struct.child_nests.update(child.child_nests) child.request |= struct.request child.reply |= struct.reply + if spec.is_multi_val(): + child.in_multi_val = True if attr_set in struct.child_nests: struct.recursive = True @@ -1399,9 +1426,9 @@ class CodeWriter: def write_func(self, qual_ret, name, body, args=None, local_vars=None): self.write_func_prot(qual_ret=qual_ret, name=name, args=args) + self.block_start() self.write_func_lvar(local_vars=local_vars) - self.block_start() for line in body: self.p(line) self.block_end() @@ -1644,11 +1671,23 @@ def put_req_nested_prototype(ri, struct, suffix=';'): def put_req_nested(ri, struct): + local_vars = [] + init_lines = [] + + local_vars.append('struct nlattr *nest;') + init_lines.append("nest = ynl_attr_nest_start(nlh, attr_type);") + + for _, arg in struct.member_list(): + if arg.presence_type() == 'count': + local_vars.append('unsigned int i;') + break + put_req_nested_prototype(ri, struct, suffix='') ri.cw.block_start() - ri.cw.write_func_lvar('struct nlattr *nest;') + ri.cw.write_func_lvar(local_vars) - ri.cw.p("nest = ynl_attr_nest_start(nlh, attr_type);") + for line in init_lines: + ri.cw.p(line) for _, arg in struct.member_list(): arg.attr_put(ri, "obj") @@ -1850,6 +1889,11 @@ def print_req(ri): local_vars += ['size_t hdr_len;', 'void *hdr;'] + for _, attr in ri.struct["request"].member_list(): + if attr.presence_type() == 'count': + local_vars += ['unsigned int i;'] + break + print_prototype(ri, direction, terminate=False) ri.cw.block_start() ri.cw.write_func_lvar(local_vars) @@ -2941,6 +2985,9 @@ def main(): for attr_set, struct in parsed.pure_nested_structs.items(): ri = RenderInfo(cw, parsed, args.mode, "", "", attr_set) print_type_full(ri, struct) + if struct.request and struct.in_multi_val: + free_rsp_nested_prototype(ri) + cw.nl() for op_name, op in parsed.ops.items(): cw.p(f"/* ============== {op.enum_name} ============== */") diff --git a/tools/objtool/arch/x86/decode.c b/tools/objtool/arch/x86/decode.c index 33d861c04ebd..98c4713c1b09 100644 --- a/tools/objtool/arch/x86/decode.c +++ b/tools/objtool/arch/x86/decode.c @@ -189,6 +189,15 @@ int arch_decode_instruction(struct objtool_file *file, const struct section *sec op2 = ins.opcode.bytes[1]; op3 = ins.opcode.bytes[2]; + /* + * XXX hack, decoder is buggered and thinks 0xea is 7 bytes long. + */ + if (op1 == 0xea) { + insn->len = 1; + insn->type = INSN_BUG; + return 0; + } + if (ins.rex_prefix.nbytes) { rex = ins.rex_prefix.bytes[0]; rex_w = X86_REX_W(rex) >> 3; @@ -522,7 +531,7 @@ int arch_decode_instruction(struct objtool_file *file, const struct section *sec case INAT_PFX_REPNE: if (modrm == 0xca) /* eretu/erets */ - insn->type = INSN_CONTEXT_SWITCH; + insn->type = INSN_SYSRET; break; default: if (modrm == 0xca) @@ -535,11 +544,15 @@ int arch_decode_instruction(struct objtool_file *file, const struct section *sec insn->type = INSN_JUMP_CONDITIONAL; - } else if (op2 == 0x05 || op2 == 0x07 || op2 == 0x34 || - op2 == 0x35) { + } else if (op2 == 0x05 || op2 == 0x34) { + + /* syscall, sysenter */ + insn->type = INSN_SYSCALL; + + } else if (op2 == 0x07 || op2 == 0x35) { - /* sysenter, sysret */ - insn->type = INSN_CONTEXT_SWITCH; + /* sysret, sysexit */ + insn->type = INSN_SYSRET; } else if (op2 == 0x0b || op2 == 0xb9) { @@ -676,7 +689,7 @@ int arch_decode_instruction(struct objtool_file *file, const struct section *sec case 0xca: /* retf */ case 0xcb: /* retf */ - insn->type = INSN_CONTEXT_SWITCH; + insn->type = INSN_SYSRET; break; case 0xe0: /* loopne */ @@ -721,7 +734,7 @@ int arch_decode_instruction(struct objtool_file *file, const struct section *sec } else if (modrm_reg == 5) { /* jmpf */ - insn->type = INSN_CONTEXT_SWITCH; + insn->type = INSN_SYSRET; } else if (modrm_reg == 6) { @@ -838,12 +851,14 @@ int arch_decode_hint_reg(u8 sp_reg, int *base) bool arch_is_retpoline(struct symbol *sym) { - return !strncmp(sym->name, "__x86_indirect_", 15); + return !strncmp(sym->name, "__x86_indirect_", 15) || + !strncmp(sym->name, "__pi___x86_indirect_", 20); } bool arch_is_rethunk(struct symbol *sym) { - return !strcmp(sym->name, "__x86_return_thunk"); + return !strcmp(sym->name, "__x86_return_thunk") || + !strcmp(sym->name, "__pi___x86_return_thunk"); } bool arch_is_embedded_insn(struct symbol *sym) diff --git a/tools/objtool/arch/x86/special.c b/tools/objtool/arch/x86/special.c index 403e587676f1..06ca4a2659a4 100644 --- a/tools/objtool/arch/x86/special.c +++ b/tools/objtool/arch/x86/special.c @@ -126,7 +126,7 @@ struct reloc *arch_find_switch_table(struct objtool_file *file, * indicates a rare GCC quirk/bug which can leave dead * code behind. */ - if (reloc_type(text_reloc) == R_X86_64_PC32) { + if (!file->ignore_unreachables && reloc_type(text_reloc) == R_X86_64_PC32) { WARN_INSN(insn, "ignoring unreachables due to jump table quirk"); file->ignore_unreachables = true; } diff --git a/tools/objtool/check.c b/tools/objtool/check.c index 4a1f6c3169b3..b21b12ec88d9 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -225,7 +225,9 @@ static bool is_rust_noreturn(const struct symbol *func) str_ends_with(func->name, "_4core9panicking14panic_nounwind") || str_ends_with(func->name, "_4core9panicking18panic_bounds_check") || str_ends_with(func->name, "_4core9panicking19assert_failed_inner") || + str_ends_with(func->name, "_4core9panicking30panic_null_pointer_dereference") || str_ends_with(func->name, "_4core9panicking36panic_misaligned_pointer_dereference") || + str_ends_with(func->name, "_7___rustc17rust_begin_unwind") || strstr(func->name, "_4core9panicking13assert_failed") || strstr(func->name, "_4core9panicking11panic_const24panic_const_") || (strstr(func->name, "_4core5slice5index24slice_") && @@ -3505,6 +3507,34 @@ next_orig: return next_insn_same_sec(file, alt_group->orig_group->last_insn); } +static bool skip_alt_group(struct instruction *insn) +{ + struct instruction *alt_insn = insn->alts ? insn->alts->insn : NULL; + + /* ANNOTATE_IGNORE_ALTERNATIVE */ + if (insn->alt_group && insn->alt_group->ignore) + return true; + + /* + * For NOP patched with CLAC/STAC, only follow the latter to avoid + * impossible code paths combining patched CLAC with unpatched STAC + * or vice versa. + * + * ANNOTATE_IGNORE_ALTERNATIVE could have been used here, but Linus + * requested not to do that to avoid hurting .s file readability + * around CLAC/STAC alternative sites. + */ + + if (!alt_insn) + return false; + + /* Don't override ASM_{CLAC,STAC}_UNSAFE */ + if (alt_insn->alt_group && alt_insn->alt_group->ignore) + return false; + + return alt_insn->type == INSN_CLAC || alt_insn->type == INSN_STAC; +} + /* * Follow the branch starting at the given instruction, and recursively follow * any other branches (jumps). Meanwhile, track the frame pointer state at @@ -3625,7 +3655,7 @@ static int validate_branch(struct objtool_file *file, struct symbol *func, } } - if (insn->alt_group && insn->alt_group->ignore) + if (skip_alt_group(insn)) return 0; if (handle_insn_ops(insn, next_insn, &state)) @@ -3684,14 +3714,20 @@ static int validate_branch(struct objtool_file *file, struct symbol *func, break; - case INSN_CONTEXT_SWITCH: - if (func) { - if (!next_insn || !next_insn->hint) { - WARN_INSN(insn, "unsupported instruction in callable function"); - return 1; - } - break; + case INSN_SYSCALL: + if (func && (!next_insn || !next_insn->hint)) { + WARN_INSN(insn, "unsupported instruction in callable function"); + return 1; + } + + break; + + case INSN_SYSRET: + if (func && (!next_insn || !next_insn->hint)) { + WARN_INSN(insn, "unsupported instruction in callable function"); + return 1; } + return 0; case INSN_STAC: @@ -3886,6 +3922,12 @@ static int validate_unret(struct objtool_file *file, struct instruction *insn) WARN_INSN(insn, "RET before UNTRAIN"); return 1; + case INSN_SYSCALL: + break; + + case INSN_SYSRET: + return 0; + case INSN_NOP: if (insn->retpoline_safe) return 0; @@ -3895,6 +3937,9 @@ static int validate_unret(struct objtool_file *file, struct instruction *insn) break; } + if (insn->dead_end) + return 0; + if (!next) { WARN_INSN(insn, "teh end!"); return 1; diff --git a/tools/objtool/elf.c b/tools/objtool/elf.c index 727a3a4fd9d7..ca5d77db692a 100644 --- a/tools/objtool/elf.c +++ b/tools/objtool/elf.c @@ -572,6 +572,34 @@ err: return -1; } +static int mark_group_syms(struct elf *elf) +{ + struct section *symtab, *sec; + struct symbol *sym; + + symtab = find_section_by_name(elf, ".symtab"); + if (!symtab) { + ERROR("no .symtab"); + return -1; + } + + list_for_each_entry(sec, &elf->sections, list) { + if (sec->sh.sh_type == SHT_GROUP && + sec->sh.sh_link == symtab->idx) { + sym = find_symbol_by_index(elf, sec->sh.sh_info); + if (!sym) { + ERROR("%s: can't find SHT_GROUP signature symbol", + sec->name); + return -1; + } + + sym->group_sec = sec; + } + } + + return 0; +} + /* * @sym's idx has changed. Update the relocs which reference it. */ @@ -745,7 +773,7 @@ __elf_create_symbol(struct elf *elf, struct symbol *sym) /* * Move the first global symbol, as per sh_info, into a new, higher - * symbol index. This fees up a spot for a new local symbol. + * symbol index. This frees up a spot for a new local symbol. */ first_non_local = symtab->sh.sh_info; old = find_symbol_by_index(elf, first_non_local); @@ -763,6 +791,11 @@ __elf_create_symbol(struct elf *elf, struct symbol *sym) if (elf_update_sym_relocs(elf, old)) return NULL; + if (old->group_sec) { + old->group_sec->sh.sh_info = new_idx; + mark_sec_changed(elf, old->group_sec, true); + } + new_idx = first_non_local; } @@ -1035,6 +1068,9 @@ struct elf *elf_open_read(const char *name, int flags) if (read_symbols(elf)) goto err; + if (mark_group_syms(elf)) + goto err; + if (read_relocs(elf)) goto err; diff --git a/tools/objtool/include/objtool/arch.h b/tools/objtool/include/objtool/arch.h index 089a1acc48a8..01ef6f415adf 100644 --- a/tools/objtool/include/objtool/arch.h +++ b/tools/objtool/include/objtool/arch.h @@ -19,7 +19,8 @@ enum insn_type { INSN_CALL, INSN_CALL_DYNAMIC, INSN_RETURN, - INSN_CONTEXT_SWITCH, + INSN_SYSCALL, + INSN_SYSRET, INSN_BUG, INSN_NOP, INSN_STAC, diff --git a/tools/objtool/include/objtool/elf.h b/tools/objtool/include/objtool/elf.h index c7c4e87ebe88..0a2fa3ac0079 100644 --- a/tools/objtool/include/objtool/elf.h +++ b/tools/objtool/include/objtool/elf.h @@ -72,6 +72,7 @@ struct symbol { u8 ignore : 1; struct list_head pv_target; struct reloc *relocs; + struct section *group_sec; }; struct reloc { diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config index eea95c6c0c71..b7769a22fe1a 100644 --- a/tools/perf/Makefile.config +++ b/tools/perf/Makefile.config @@ -29,6 +29,7 @@ include $(srctree)/tools/scripts/Makefile.arch $(call detected_var,SRCARCH) CFLAGS += -I$(OUTPUT)arch/$(SRCARCH)/include/generated +CFLAGS += -I$(OUTPUT)libperf/arch/$(SRCARCH)/include/generated/uapi # Additional ARCH settings for ppc ifeq ($(SRCARCH),powerpc) diff --git a/tools/perf/arch/arm/entry/syscalls/syscall.tbl b/tools/perf/arch/arm/entry/syscalls/syscall.tbl index 49eeb2ad8dbd..27c1d5ebcd91 100644 --- a/tools/perf/arch/arm/entry/syscalls/syscall.tbl +++ b/tools/perf/arch/arm/entry/syscalls/syscall.tbl @@ -481,3 +481,4 @@ 464 common getxattrat sys_getxattrat 465 common listxattrat sys_listxattrat 466 common removexattrat sys_removexattrat +467 common open_tree_attr sys_open_tree_attr diff --git a/tools/perf/arch/mips/entry/syscalls/syscall_n64.tbl b/tools/perf/arch/mips/entry/syscalls/syscall_n64.tbl index c844cd5cda62..1e8c44c7b614 100644 --- a/tools/perf/arch/mips/entry/syscalls/syscall_n64.tbl +++ b/tools/perf/arch/mips/entry/syscalls/syscall_n64.tbl @@ -381,3 +381,4 @@ 464 n64 getxattrat sys_getxattrat 465 n64 listxattrat sys_listxattrat 466 n64 removexattrat sys_removexattrat +467 n64 open_tree_attr sys_open_tree_attr diff --git a/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl b/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl index d8b4ab78bef0..9a084bdb8926 100644 --- a/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl +++ b/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl @@ -557,3 +557,4 @@ 464 common getxattrat sys_getxattrat 465 common listxattrat sys_listxattrat 466 common removexattrat sys_removexattrat +467 common open_tree_attr sys_open_tree_attr diff --git a/tools/perf/arch/s390/entry/syscalls/syscall.tbl b/tools/perf/arch/s390/entry/syscalls/syscall.tbl index e9115b4d8b63..a4569b96ef06 100644 --- a/tools/perf/arch/s390/entry/syscalls/syscall.tbl +++ b/tools/perf/arch/s390/entry/syscalls/syscall.tbl @@ -469,3 +469,4 @@ 464 common getxattrat sys_getxattrat sys_getxattrat 465 common listxattrat sys_listxattrat sys_listxattrat 466 common removexattrat sys_removexattrat sys_removexattrat +467 common open_tree_attr sys_open_tree_attr sys_open_tree_attr diff --git a/tools/perf/arch/sh/entry/syscalls/syscall.tbl b/tools/perf/arch/sh/entry/syscalls/syscall.tbl index c8cad33bf250..52a7652fcff6 100644 --- a/tools/perf/arch/sh/entry/syscalls/syscall.tbl +++ b/tools/perf/arch/sh/entry/syscalls/syscall.tbl @@ -470,3 +470,4 @@ 464 common getxattrat sys_getxattrat 465 common listxattrat sys_listxattrat 466 common removexattrat sys_removexattrat +467 common open_tree_attr sys_open_tree_attr diff --git a/tools/perf/arch/sparc/entry/syscalls/syscall.tbl b/tools/perf/arch/sparc/entry/syscalls/syscall.tbl index 727f99d333b3..83e45eb6c095 100644 --- a/tools/perf/arch/sparc/entry/syscalls/syscall.tbl +++ b/tools/perf/arch/sparc/entry/syscalls/syscall.tbl @@ -512,3 +512,4 @@ 464 common getxattrat sys_getxattrat 465 common listxattrat sys_listxattrat 466 common removexattrat sys_removexattrat +467 common open_tree_attr sys_open_tree_attr diff --git a/tools/perf/arch/x86/entry/syscalls/syscall_32.tbl b/tools/perf/arch/x86/entry/syscalls/syscall_32.tbl index 4d0fb2fba7e2..ac007ea00979 100644 --- a/tools/perf/arch/x86/entry/syscalls/syscall_32.tbl +++ b/tools/perf/arch/x86/entry/syscalls/syscall_32.tbl @@ -396,7 +396,7 @@ 381 i386 pkey_alloc sys_pkey_alloc 382 i386 pkey_free sys_pkey_free 383 i386 statx sys_statx -384 i386 arch_prctl sys_arch_prctl compat_sys_arch_prctl +384 i386 arch_prctl sys_arch_prctl 385 i386 io_pgetevents sys_io_pgetevents_time32 compat_sys_io_pgetevents 386 i386 rseq sys_rseq 393 i386 semget sys_semget @@ -472,3 +472,4 @@ 464 i386 getxattrat sys_getxattrat 465 i386 listxattrat sys_listxattrat 466 i386 removexattrat sys_removexattrat +467 i386 open_tree_attr sys_open_tree_attr diff --git a/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl b/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl index 5eb708bff1c7..cfb5ca41e30d 100644 --- a/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl +++ b/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl @@ -390,6 +390,7 @@ 464 common getxattrat sys_getxattrat 465 common listxattrat sys_listxattrat 466 common removexattrat sys_removexattrat +467 common open_tree_attr sys_open_tree_attr # # Due to a historical design error, certain syscalls are numbered differently diff --git a/tools/perf/arch/xtensa/entry/syscalls/syscall.tbl b/tools/perf/arch/xtensa/entry/syscalls/syscall.tbl index 37effc1b134e..f657a77314f8 100644 --- a/tools/perf/arch/xtensa/entry/syscalls/syscall.tbl +++ b/tools/perf/arch/xtensa/entry/syscalls/syscall.tbl @@ -437,3 +437,4 @@ 464 common getxattrat sys_getxattrat 465 common listxattrat sys_listxattrat 466 common removexattrat sys_removexattrat +467 common open_tree_attr sys_open_tree_attr diff --git a/tools/perf/bench/Build b/tools/perf/bench/Build index 279ab2ab4abe..b558ab98719f 100644 --- a/tools/perf/bench/Build +++ b/tools/perf/bench/Build @@ -3,6 +3,7 @@ perf-bench-y += sched-pipe.o perf-bench-y += sched-seccomp-notify.o perf-bench-y += syscall.o perf-bench-y += mem-functions.o +perf-bench-y += futex.o perf-bench-y += futex-hash.o perf-bench-y += futex-wake.o perf-bench-y += futex-wake-parallel.o diff --git a/tools/perf/bench/futex-hash.c b/tools/perf/bench/futex-hash.c index b472eded521b..fdf133c9520f 100644 --- a/tools/perf/bench/futex-hash.c +++ b/tools/perf/bench/futex-hash.c @@ -18,9 +18,11 @@ #include <stdlib.h> #include <linux/compiler.h> #include <linux/kernel.h> +#include <linux/prctl.h> #include <linux/zalloc.h> #include <sys/time.h> #include <sys/mman.h> +#include <sys/prctl.h> #include <perf/cpumap.h> #include "../util/mutex.h" @@ -50,9 +52,12 @@ struct worker { static struct bench_futex_parameters params = { .nfutexes = 1024, .runtime = 10, + .nbuckets = -1, }; static const struct option options[] = { + OPT_INTEGER( 'b', "buckets", ¶ms.nbuckets, "Specify amount of hash buckets"), + OPT_BOOLEAN( 'I', "immutable", ¶ms.buckets_immutable, "Make the hash buckets immutable"), OPT_UINTEGER('t', "threads", ¶ms.nthreads, "Specify amount of threads"), OPT_UINTEGER('r', "runtime", ¶ms.runtime, "Specify runtime (in seconds)"), OPT_UINTEGER('f', "futexes", ¶ms.nfutexes, "Specify amount of futexes per threads"), @@ -118,6 +123,7 @@ static void print_summary(void) printf("%sAveraged %ld operations/sec (+- %.2f%%), total secs = %d\n", !params.silent ? "\n" : "", avg, rel_stddev_stats(stddev, avg), (int)bench__runtime.tv_sec); + futex_print_nbuckets(¶ms); } int bench_futex_hash(int argc, const char **argv) @@ -161,6 +167,7 @@ int bench_futex_hash(int argc, const char **argv) if (!params.fshared) futex_flag = FUTEX_PRIVATE_FLAG; + futex_set_nbuckets_param(¶ms); printf("Run summary [PID %d]: %d threads, each operating on %d [%s] futexes for %d secs.\n\n", getpid(), params.nthreads, params.nfutexes, params.fshared ? "shared":"private", params.runtime); diff --git a/tools/perf/bench/futex-lock-pi.c b/tools/perf/bench/futex-lock-pi.c index 0416120c091b..5144a158512c 100644 --- a/tools/perf/bench/futex-lock-pi.c +++ b/tools/perf/bench/futex-lock-pi.c @@ -41,10 +41,13 @@ static struct stats throughput_stats; static struct cond thread_parent, thread_worker; static struct bench_futex_parameters params = { + .nbuckets = -1, .runtime = 10, }; static const struct option options[] = { + OPT_INTEGER( 'b', "buckets", ¶ms.nbuckets, "Specify amount of hash buckets"), + OPT_BOOLEAN( 'I', "immutable", ¶ms.buckets_immutable, "Make the hash buckets immutable"), OPT_UINTEGER('t', "threads", ¶ms.nthreads, "Specify amount of threads"), OPT_UINTEGER('r', "runtime", ¶ms.runtime, "Specify runtime (in seconds)"), OPT_BOOLEAN( 'M', "multi", ¶ms.multi, "Use multiple futexes"), @@ -67,6 +70,7 @@ static void print_summary(void) printf("%sAveraged %ld operations/sec (+- %.2f%%), total secs = %d\n", !params.silent ? "\n" : "", avg, rel_stddev_stats(stddev, avg), (int)bench__runtime.tv_sec); + futex_print_nbuckets(¶ms); } static void toggle_done(int sig __maybe_unused, @@ -203,6 +207,7 @@ int bench_futex_lock_pi(int argc, const char **argv) mutex_init(&thread_lock); cond_init(&thread_parent); cond_init(&thread_worker); + futex_set_nbuckets_param(¶ms); threads_starting = params.nthreads; gettimeofday(&bench__start, NULL); diff --git a/tools/perf/bench/futex-requeue.c b/tools/perf/bench/futex-requeue.c index aad5bfc4fe18..a2f91ee1950b 100644 --- a/tools/perf/bench/futex-requeue.c +++ b/tools/perf/bench/futex-requeue.c @@ -42,6 +42,7 @@ static unsigned int threads_starting; static int futex_flag = 0; static struct bench_futex_parameters params = { + .nbuckets = -1, /* * How many tasks to requeue at a time. * Default to 1 in order to make the kernel work more. @@ -50,6 +51,8 @@ static struct bench_futex_parameters params = { }; static const struct option options[] = { + OPT_INTEGER( 'b', "buckets", ¶ms.nbuckets, "Specify amount of hash buckets"), + OPT_BOOLEAN( 'I', "immutable", ¶ms.buckets_immutable, "Make the hash buckets immutable"), OPT_UINTEGER('t', "threads", ¶ms.nthreads, "Specify amount of threads"), OPT_UINTEGER('q', "nrequeue", ¶ms.nrequeue, "Specify amount of threads to requeue at once"), OPT_BOOLEAN( 's', "silent", ¶ms.silent, "Silent mode: do not display data/details"), @@ -77,6 +80,7 @@ static void print_summary(void) params.nthreads, requeuetime_avg / USEC_PER_MSEC, rel_stddev_stats(requeuetime_stddev, requeuetime_avg)); + futex_print_nbuckets(¶ms); } static void *workerfn(void *arg __maybe_unused) @@ -204,6 +208,8 @@ int bench_futex_requeue(int argc, const char **argv) if (params.broadcast) params.nrequeue = params.nthreads; + futex_set_nbuckets_param(¶ms); + printf("Run summary [PID %d]: Requeuing %d threads (from [%s] %p to %s%p), " "%d at a time.\n\n", getpid(), params.nthreads, params.fshared ? "shared":"private", &futex1, diff --git a/tools/perf/bench/futex-wake-parallel.c b/tools/perf/bench/futex-wake-parallel.c index 4352e318631e..ee66482c29fd 100644 --- a/tools/perf/bench/futex-wake-parallel.c +++ b/tools/perf/bench/futex-wake-parallel.c @@ -57,9 +57,13 @@ static struct stats waketime_stats, wakeup_stats; static unsigned int threads_starting; static int futex_flag = 0; -static struct bench_futex_parameters params; +static struct bench_futex_parameters params = { + .nbuckets = -1, +}; static const struct option options[] = { + OPT_INTEGER( 'b', "buckets", ¶ms.nbuckets, "Specify amount of hash buckets"), + OPT_BOOLEAN( 'I', "immutable", ¶ms.buckets_immutable, "Make the hash buckets immutable"), OPT_UINTEGER('t', "threads", ¶ms.nthreads, "Specify amount of threads"), OPT_UINTEGER('w', "nwakers", ¶ms.nwakes, "Specify amount of waking threads"), OPT_BOOLEAN( 's', "silent", ¶ms.silent, "Silent mode: do not display data/details"), @@ -218,6 +222,7 @@ static void print_summary(void) params.nthreads, waketime_avg / USEC_PER_MSEC, rel_stddev_stats(waketime_stddev, waketime_avg)); + futex_print_nbuckets(¶ms); } @@ -291,6 +296,8 @@ int bench_futex_wake_parallel(int argc, const char **argv) if (!params.fshared) futex_flag = FUTEX_PRIVATE_FLAG; + futex_set_nbuckets_param(¶ms); + printf("Run summary [PID %d]: blocking on %d threads (at [%s] " "futex %p), %d threads waking up %d at a time.\n\n", getpid(), params.nthreads, params.fshared ? "shared":"private", diff --git a/tools/perf/bench/futex-wake.c b/tools/perf/bench/futex-wake.c index 49b3c89b0b35..8d6107f7cd94 100644 --- a/tools/perf/bench/futex-wake.c +++ b/tools/perf/bench/futex-wake.c @@ -42,6 +42,7 @@ static unsigned int threads_starting; static int futex_flag = 0; static struct bench_futex_parameters params = { + .nbuckets = -1, /* * How many wakeups to do at a time. * Default to 1 in order to make the kernel work more. @@ -50,6 +51,8 @@ static struct bench_futex_parameters params = { }; static const struct option options[] = { + OPT_INTEGER( 'b', "buckets", ¶ms.nbuckets, "Specify amount of hash buckets"), + OPT_BOOLEAN( 'I', "immutable", ¶ms.buckets_immutable, "Make the hash buckets immutable"), OPT_UINTEGER('t', "threads", ¶ms.nthreads, "Specify amount of threads"), OPT_UINTEGER('w', "nwakes", ¶ms.nwakes, "Specify amount of threads to wake at once"), OPT_BOOLEAN( 's', "silent", ¶ms.silent, "Silent mode: do not display data/details"), @@ -93,6 +96,7 @@ static void print_summary(void) params.nthreads, waketime_avg / USEC_PER_MSEC, rel_stddev_stats(waketime_stddev, waketime_avg)); + futex_print_nbuckets(¶ms); } static void block_threads(pthread_t *w, struct perf_cpu_map *cpu) diff --git a/tools/perf/bench/futex.c b/tools/perf/bench/futex.c new file mode 100644 index 000000000000..26382e4d8d4c --- /dev/null +++ b/tools/perf/bench/futex.c @@ -0,0 +1,67 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <err.h> +#include <stdio.h> +#include <stdlib.h> +#include <linux/prctl.h> +#include <sys/prctl.h> + +#include "futex.h" + +void futex_set_nbuckets_param(struct bench_futex_parameters *params) +{ + unsigned long flags; + int ret; + + if (params->nbuckets < 0) + return; + + flags = params->buckets_immutable ? FH_FLAG_IMMUTABLE : 0; + ret = prctl(PR_FUTEX_HASH, PR_FUTEX_HASH_SET_SLOTS, params->nbuckets, flags); + if (ret) { + printf("Requesting %d hash buckets failed: %d/%m\n", + params->nbuckets, ret); + err(EXIT_FAILURE, "prctl(PR_FUTEX_HASH)"); + } +} + +void futex_print_nbuckets(struct bench_futex_parameters *params) +{ + char *futex_hash_mode; + int ret; + + ret = prctl(PR_FUTEX_HASH, PR_FUTEX_HASH_GET_SLOTS); + if (params->nbuckets >= 0) { + if (ret != params->nbuckets) { + if (ret < 0) { + printf("Can't query number of buckets: %m\n"); + err(EXIT_FAILURE, "prctl(PR_FUTEX_HASH)"); + } + printf("Requested number of hash buckets does not currently used.\n"); + printf("Requested: %d in usage: %d\n", params->nbuckets, ret); + err(EXIT_FAILURE, "prctl(PR_FUTEX_HASH)"); + } + if (params->nbuckets == 0) { + ret = asprintf(&futex_hash_mode, "Futex hashing: global hash"); + } else { + ret = prctl(PR_FUTEX_HASH, PR_FUTEX_HASH_GET_IMMUTABLE); + if (ret < 0) { + printf("Can't check if the hash is immutable: %m\n"); + err(EXIT_FAILURE, "prctl(PR_FUTEX_HASH)"); + } + ret = asprintf(&futex_hash_mode, "Futex hashing: %d hash buckets %s", + params->nbuckets, + ret == 1 ? "(immutable)" : ""); + } + } else { + if (ret <= 0) { + ret = asprintf(&futex_hash_mode, "Futex hashing: global hash"); + } else { + ret = asprintf(&futex_hash_mode, "Futex hashing: auto resized to %d buckets", + ret); + } + } + if (ret < 0) + err(EXIT_FAILURE, "ENOMEM, futex_hash_mode"); + printf("%s\n", futex_hash_mode); + free(futex_hash_mode); +} diff --git a/tools/perf/bench/futex.h b/tools/perf/bench/futex.h index ebdc2b032afc..9c9a73f9d865 100644 --- a/tools/perf/bench/futex.h +++ b/tools/perf/bench/futex.h @@ -25,6 +25,8 @@ struct bench_futex_parameters { unsigned int nfutexes; unsigned int nwakes; unsigned int nrequeue; + int nbuckets; + bool buckets_immutable; }; /** @@ -143,4 +145,7 @@ futex_cmp_requeue_pi(u_int32_t *uaddr, u_int32_t val, u_int32_t *uaddr2, val, opflags); } +void futex_set_nbuckets_param(struct bench_futex_parameters *params); +void futex_print_nbuckets(struct bench_futex_parameters *params); + #endif /* _FUTEX_H */ diff --git a/tools/perf/check-headers.sh b/tools/perf/check-headers.sh index a4499e5a6f9c..e9fab20e9330 100755 --- a/tools/perf/check-headers.sh +++ b/tools/perf/check-headers.sh @@ -20,6 +20,7 @@ FILES=( "include/uapi/linux/stat.h" "include/linux/bits.h" "include/vdso/bits.h" + "include/linux/cfi_types.h" "include/linux/const.h" "include/vdso/const.h" "include/vdso/unaligned.h" @@ -185,7 +186,7 @@ done # diff with extra ignore lines check arch/x86/lib/memcpy_64.S '-I "^EXPORT_SYMBOL" -I "^#include <asm/export.h>" -I"^SYM_FUNC_START\(_LOCAL\)*(memcpy_\(erms\|orig\))" -I"^#include <linux/cfi_types.h>"' check arch/x86/lib/memset_64.S '-I "^EXPORT_SYMBOL" -I "^#include <asm/export.h>" -I"^SYM_FUNC_START\(_LOCAL\)*(memset_\(erms\|orig\))"' -check arch/x86/include/asm/amd-ibs.h '-I "^#include [<\"]\(asm/\)*msr-index.h"' +check arch/x86/include/asm/amd/ibs.h '-I "^#include [<\"]\(asm/\)*msr-index.h"' check arch/arm64/include/asm/cputype.h '-I "^#include [<\"]\(asm/\)*sysreg.h"' check include/linux/unaligned.h '-I "^#include <linux/unaligned/packed_struct.h>" -I "^#include <asm/byteorder.h>" -I "^#pragma GCC diagnostic"' check include/uapi/asm-generic/mman.h '-I "^#include <\(uapi/\)*asm-generic/mman-common\(-tools\)*.h>"' diff --git a/tools/perf/trace/beauty/include/linux/socket.h b/tools/perf/trace/beauty/include/linux/socket.h index d18cc47e89bd..c3322eb3d686 100644 --- a/tools/perf/trace/beauty/include/linux/socket.h +++ b/tools/perf/trace/beauty/include/linux/socket.h @@ -392,6 +392,8 @@ struct ucred { extern int move_addr_to_kernel(void __user *uaddr, int ulen, struct sockaddr_storage *kaddr); extern int put_cmsg(struct msghdr*, int level, int type, int len, void *data); +extern int put_cmsg_notrunc(struct msghdr *msg, int level, int type, int len, + void *data); struct timespec64; struct __kernel_timespec; diff --git a/tools/perf/trace/beauty/include/uapi/linux/fcntl.h b/tools/perf/trace/beauty/include/uapi/linux/fcntl.h index 6e6907e63bfc..a15ac2fa4b20 100644 --- a/tools/perf/trace/beauty/include/uapi/linux/fcntl.h +++ b/tools/perf/trace/beauty/include/uapi/linux/fcntl.h @@ -155,4 +155,8 @@ #define AT_HANDLE_MNT_ID_UNIQUE 0x001 /* Return the u64 unique mount ID. */ #define AT_HANDLE_CONNECTABLE 0x002 /* Request a connectable file handle */ +/* Flags for execveat2(2). */ +#define AT_EXECVE_CHECK 0x10000 /* Only perform a check if execution + would be allowed. */ + #endif /* _UAPI_LINUX_FCNTL_H */ diff --git a/tools/perf/trace/beauty/include/uapi/linux/fs.h b/tools/perf/trace/beauty/include/uapi/linux/fs.h index 753971770733..e762e1af650c 100644 --- a/tools/perf/trace/beauty/include/uapi/linux/fs.h +++ b/tools/perf/trace/beauty/include/uapi/linux/fs.h @@ -40,6 +40,15 @@ #define BLOCK_SIZE_BITS 10 #define BLOCK_SIZE (1<<BLOCK_SIZE_BITS) +/* flags for integrity meta */ +#define IO_INTEGRITY_CHK_GUARD (1U << 0) /* enforce guard check */ +#define IO_INTEGRITY_CHK_REFTAG (1U << 1) /* enforce ref check */ +#define IO_INTEGRITY_CHK_APPTAG (1U << 2) /* enforce app check */ + +#define IO_INTEGRITY_VALID_FLAGS (IO_INTEGRITY_CHK_GUARD | \ + IO_INTEGRITY_CHK_REFTAG | \ + IO_INTEGRITY_CHK_APPTAG) + #define SEEK_SET 0 /* seek relative to beginning of file */ #define SEEK_CUR 1 /* seek relative to current file position */ #define SEEK_END 2 /* seek relative to end of file */ @@ -203,10 +212,8 @@ struct fsxattr { #define BLKROTATIONAL _IO(0x12,126) #define BLKZEROOUT _IO(0x12,127) #define BLKGETDISKSEQ _IOR(0x12,128,__u64) -/* - * A jump here: 130-136 are reserved for zoned block devices - * (see uapi/linux/blkzoned.h) - */ +/* 130-136 are used by zoned block device ioctls (uapi/linux/blkzoned.h) */ +/* 137-141 are used by blk-crypto ioctls (uapi/linux/blk-crypto.h) */ #define BMAP_IOCTL 1 /* obsolete - kept for compatibility */ #define FIBMAP _IO(0x00,1) /* bmap access */ @@ -332,9 +339,13 @@ typedef int __bitwise __kernel_rwf_t; /* Atomic Write */ #define RWF_ATOMIC ((__force __kernel_rwf_t)0x00000040) +/* buffered IO that drops the cache after reading or writing data */ +#define RWF_DONTCACHE ((__force __kernel_rwf_t)0x00000080) + /* mask of flags supported by the kernel */ #define RWF_SUPPORTED (RWF_HIPRI | RWF_DSYNC | RWF_SYNC | RWF_NOWAIT |\ - RWF_APPEND | RWF_NOAPPEND | RWF_ATOMIC) + RWF_APPEND | RWF_NOAPPEND | RWF_ATOMIC |\ + RWF_DONTCACHE) #define PROCFS_IOCTL_MAGIC 'f' diff --git a/tools/perf/trace/beauty/include/uapi/linux/mount.h b/tools/perf/trace/beauty/include/uapi/linux/mount.h index c07008816aca..7fa67c2031a5 100644 --- a/tools/perf/trace/beauty/include/uapi/linux/mount.h +++ b/tools/perf/trace/beauty/include/uapi/linux/mount.h @@ -179,7 +179,12 @@ struct statmount { __u32 opt_array; /* [str] Array of nul terminated fs options */ __u32 opt_sec_num; /* Number of security options */ __u32 opt_sec_array; /* [str] Array of nul terminated security options */ - __u64 __spare2[46]; + __u64 supported_mask; /* Mask flags that this kernel supports */ + __u32 mnt_uidmap_num; /* Number of uid mappings */ + __u32 mnt_uidmap; /* [str] Array of uid mappings (as seen from callers namespace) */ + __u32 mnt_gidmap_num; /* Number of gid mappings */ + __u32 mnt_gidmap; /* [str] Array of gid mappings (as seen from callers namespace) */ + __u64 __spare2[43]; char str[]; /* Variable size part containing strings */ }; @@ -217,6 +222,9 @@ struct mnt_id_req { #define STATMOUNT_SB_SOURCE 0x00000200U /* Want/got sb_source */ #define STATMOUNT_OPT_ARRAY 0x00000400U /* Want/got opt_... */ #define STATMOUNT_OPT_SEC_ARRAY 0x00000800U /* Want/got opt_sec... */ +#define STATMOUNT_SUPPORTED_MASK 0x00001000U /* Want/got supported mask flags */ +#define STATMOUNT_MNT_UIDMAP 0x00002000U /* Want/got uidmap... */ +#define STATMOUNT_MNT_GIDMAP 0x00004000U /* Want/got gidmap... */ /* * Special @mnt_id values that can be passed to listmount diff --git a/tools/perf/trace/beauty/include/uapi/linux/prctl.h b/tools/perf/trace/beauty/include/uapi/linux/prctl.h index 5c6080680cb2..15c18ef4eb11 100644 --- a/tools/perf/trace/beauty/include/uapi/linux/prctl.h +++ b/tools/perf/trace/beauty/include/uapi/linux/prctl.h @@ -353,4 +353,15 @@ struct prctl_mm_map { */ #define PR_LOCK_SHADOW_STACK_STATUS 76 +/* + * Controls the mode of timer_create() for CRIU restore operations. + * Enabling this allows CRIU to restore timers with explicit IDs. + * + * Don't use for normal operations as the result might be undefined. + */ +#define PR_TIMER_CREATE_RESTORE_IDS 77 +# define PR_TIMER_CREATE_RESTORE_IDS_OFF 0 +# define PR_TIMER_CREATE_RESTORE_IDS_ON 1 +# define PR_TIMER_CREATE_RESTORE_IDS_GET 2 + #endif /* _LINUX_PRCTL_H */ diff --git a/tools/perf/trace/beauty/include/uapi/linux/stat.h b/tools/perf/trace/beauty/include/uapi/linux/stat.h index 887a25286441..f78ee3670dd5 100644 --- a/tools/perf/trace/beauty/include/uapi/linux/stat.h +++ b/tools/perf/trace/beauty/include/uapi/linux/stat.h @@ -98,43 +98,93 @@ struct statx_timestamp { */ struct statx { /* 0x00 */ - __u32 stx_mask; /* What results were written [uncond] */ - __u32 stx_blksize; /* Preferred general I/O size [uncond] */ - __u64 stx_attributes; /* Flags conveying information about the file [uncond] */ + /* What results were written [uncond] */ + __u32 stx_mask; + + /* Preferred general I/O size [uncond] */ + __u32 stx_blksize; + + /* Flags conveying information about the file [uncond] */ + __u64 stx_attributes; + /* 0x10 */ - __u32 stx_nlink; /* Number of hard links */ - __u32 stx_uid; /* User ID of owner */ - __u32 stx_gid; /* Group ID of owner */ - __u16 stx_mode; /* File mode */ + /* Number of hard links */ + __u32 stx_nlink; + + /* User ID of owner */ + __u32 stx_uid; + + /* Group ID of owner */ + __u32 stx_gid; + + /* File mode */ + __u16 stx_mode; __u16 __spare0[1]; + /* 0x20 */ - __u64 stx_ino; /* Inode number */ - __u64 stx_size; /* File size */ - __u64 stx_blocks; /* Number of 512-byte blocks allocated */ - __u64 stx_attributes_mask; /* Mask to show what's supported in stx_attributes */ + /* Inode number */ + __u64 stx_ino; + + /* File size */ + __u64 stx_size; + + /* Number of 512-byte blocks allocated */ + __u64 stx_blocks; + + /* Mask to show what's supported in stx_attributes */ + __u64 stx_attributes_mask; + /* 0x40 */ - struct statx_timestamp stx_atime; /* Last access time */ - struct statx_timestamp stx_btime; /* File creation time */ - struct statx_timestamp stx_ctime; /* Last attribute change time */ - struct statx_timestamp stx_mtime; /* Last data modification time */ + /* Last access time */ + struct statx_timestamp stx_atime; + + /* File creation time */ + struct statx_timestamp stx_btime; + + /* Last attribute change time */ + struct statx_timestamp stx_ctime; + + /* Last data modification time */ + struct statx_timestamp stx_mtime; + /* 0x80 */ - __u32 stx_rdev_major; /* Device ID of special file [if bdev/cdev] */ + /* Device ID of special file [if bdev/cdev] */ + __u32 stx_rdev_major; __u32 stx_rdev_minor; - __u32 stx_dev_major; /* ID of device containing file [uncond] */ + + /* ID of device containing file [uncond] */ + __u32 stx_dev_major; __u32 stx_dev_minor; + /* 0x90 */ __u64 stx_mnt_id; - __u32 stx_dio_mem_align; /* Memory buffer alignment for direct I/O */ - __u32 stx_dio_offset_align; /* File offset alignment for direct I/O */ + + /* Memory buffer alignment for direct I/O */ + __u32 stx_dio_mem_align; + + /* File offset alignment for direct I/O */ + __u32 stx_dio_offset_align; + /* 0xa0 */ - __u64 stx_subvol; /* Subvolume identifier */ - __u32 stx_atomic_write_unit_min; /* Min atomic write unit in bytes */ - __u32 stx_atomic_write_unit_max; /* Max atomic write unit in bytes */ + /* Subvolume identifier */ + __u64 stx_subvol; + + /* Min atomic write unit in bytes */ + __u32 stx_atomic_write_unit_min; + + /* Max atomic write unit in bytes */ + __u32 stx_atomic_write_unit_max; + /* 0xb0 */ - __u32 stx_atomic_write_segments_max; /* Max atomic write segment count */ - __u32 __spare1[1]; + /* Max atomic write segment count */ + __u32 stx_atomic_write_segments_max; + + /* File offset alignment for direct I/O reads */ + __u32 stx_dio_read_offset_align; + /* 0xb8 */ __u64 __spare3[9]; /* Spare space for future expansion */ + /* 0x100 */ }; @@ -164,6 +214,7 @@ struct statx { #define STATX_MNT_ID_UNIQUE 0x00004000U /* Want/got extended stx_mount_id */ #define STATX_SUBVOL 0x00008000U /* Want/got stx_subvol */ #define STATX_WRITE_ATOMIC 0x00010000U /* Want/got atomic_write_* fields */ +#define STATX_DIO_READ_ALIGN 0x00020000U /* Want/got dio read alignment info */ #define STATX__RESERVED 0x80000000U /* Reserved for future struct statx expansion */ diff --git a/tools/perf/trace/beauty/include/uapi/sound/asound.h b/tools/perf/trace/beauty/include/uapi/sound/asound.h index 4cd513215bcd..5a049eeaecce 100644 --- a/tools/perf/trace/beauty/include/uapi/sound/asound.h +++ b/tools/perf/trace/beauty/include/uapi/sound/asound.h @@ -716,7 +716,7 @@ enum { * Raw MIDI section - /dev/snd/midi?? */ -#define SNDRV_RAWMIDI_VERSION SNDRV_PROTOCOL_VERSION(2, 0, 4) +#define SNDRV_RAWMIDI_VERSION SNDRV_PROTOCOL_VERSION(2, 0, 5) enum { SNDRV_RAWMIDI_STREAM_OUTPUT = 0, @@ -728,6 +728,9 @@ enum { #define SNDRV_RAWMIDI_INFO_INPUT 0x00000002 #define SNDRV_RAWMIDI_INFO_DUPLEX 0x00000004 #define SNDRV_RAWMIDI_INFO_UMP 0x00000008 +#define SNDRV_RAWMIDI_INFO_STREAM_INACTIVE 0x00000010 + +#define SNDRV_RAWMIDI_DEVICE_UNKNOWN 0 struct snd_rawmidi_info { unsigned int device; /* RO/WR (control): device number */ @@ -740,7 +743,8 @@ struct snd_rawmidi_info { unsigned char subname[32]; /* name of active or selected subdevice */ unsigned int subdevices_count; unsigned int subdevices_avail; - unsigned char reserved[64]; /* reserved for future use */ + int tied_device; /* R: tied rawmidi device (UMP/legacy) */ + unsigned char reserved[60]; /* reserved for future use */ }; #define SNDRV_RAWMIDI_MODE_FRAMING_MASK (7<<0) diff --git a/tools/perf/util/amd-sample-raw.c b/tools/perf/util/amd-sample-raw.c index 9d0ce88e90e4..456ce64ad822 100644 --- a/tools/perf/util/amd-sample-raw.c +++ b/tools/perf/util/amd-sample-raw.c @@ -9,7 +9,7 @@ #include <inttypes.h> #include <linux/string.h> -#include "../../arch/x86/include/asm/amd-ibs.h" +#include "../../arch/x86/include/asm/amd/ibs.h" #include "debug.h" #include "session.h" diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 1974395492d7..3c030da2e477 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -2566,25 +2566,6 @@ check: return false; } -static bool evsel__handle_error_quirks(struct evsel *evsel, int error) -{ - /* - * AMD core PMU tries to forward events with precise_ip to IBS PMU - * implicitly. But IBS PMU has more restrictions so it can fail with - * supported event attributes. Let's forward it back to the core PMU - * by clearing precise_ip only if it's from precise_max (:P). - */ - if ((error == -EINVAL || error == -ENOENT) && x86__is_amd_cpu() && - evsel->core.attr.precise_ip && evsel->precise_max) { - evsel->core.attr.precise_ip = 0; - pr_debug2_peo("removing precise_ip on AMD\n"); - display_attr(&evsel->core.attr); - return true; - } - - return false; -} - static int evsel__open_cpu(struct evsel *evsel, struct perf_cpu_map *cpus, struct perf_thread_map *threads, int start_cpu_map_idx, int end_cpu_map_idx) @@ -2730,9 +2711,6 @@ try_fallback: if (evsel__precise_ip_fallback(evsel)) goto retry_open; - if (evsel__handle_error_quirks(evsel, err)) - goto retry_open; - out_close: if (err) threads->err_thread = thread; diff --git a/tools/perf/util/unwind-libunwind-local.c b/tools/perf/util/unwind-libunwind-local.c index 9fb2c1343c7f..0b037e7389a0 100644 --- a/tools/perf/util/unwind-libunwind-local.c +++ b/tools/perf/util/unwind-libunwind-local.c @@ -371,7 +371,7 @@ static int read_unwind_spec_debug_frame(struct dso *dso, * has to be pointed by symsrc_filename */ if (ofs == 0) { - if (dso__data_get_fd(dso, machine, &fd) { + if (dso__data_get_fd(dso, machine, &fd)) { ofs = elf_section_offset(fd, ".debug_frame"); dso__data_put_fd(dso); } diff --git a/tools/sched_ext/scx_flatcg.bpf.c b/tools/sched_ext/scx_flatcg.bpf.c index 2c720e3ecad5..fdc7170639e6 100644 --- a/tools/sched_ext/scx_flatcg.bpf.c +++ b/tools/sched_ext/scx_flatcg.bpf.c @@ -950,5 +950,5 @@ SCX_OPS_DEFINE(flatcg_ops, .cgroup_move = (void *)fcg_cgroup_move, .init = (void *)fcg_init, .exit = (void *)fcg_exit, - .flags = SCX_OPS_HAS_CGROUP_WEIGHT | SCX_OPS_ENQ_EXITING, + .flags = SCX_OPS_ENQ_EXITING, .name = "flatcg"); diff --git a/tools/scripts/syscall.tbl b/tools/scripts/syscall.tbl index ebbdb3c42e9f..580b4e246aec 100644 --- a/tools/scripts/syscall.tbl +++ b/tools/scripts/syscall.tbl @@ -407,3 +407,4 @@ 464 common getxattrat sys_getxattrat 465 common listxattrat sys_listxattrat 466 common removexattrat sys_removexattrat +467 common open_tree_attr sys_open_tree_attr diff --git a/tools/testing/crypto/chacha20-s390/test-cipher.c b/tools/testing/crypto/chacha20-s390/test-cipher.c index 35ea65c54ffa..827507844e8f 100644 --- a/tools/testing/crypto/chacha20-s390/test-cipher.c +++ b/tools/testing/crypto/chacha20-s390/test-cipher.c @@ -50,7 +50,7 @@ struct skcipher_def { /* Perform cipher operations with the chacha lib */ static int test_lib_chacha(u8 *revert, u8 *cipher, u8 *plain) { - u32 chacha_state[CHACHA_STATE_WORDS]; + struct chacha_state chacha_state; u8 iv[16], key[32]; u64 start, end; @@ -66,10 +66,10 @@ static int test_lib_chacha(u8 *revert, u8 *cipher, u8 *plain) } /* Encrypt */ - chacha_init(chacha_state, (u32 *)key, iv); + chacha_init(&chacha_state, (u32 *)key, iv); start = ktime_get_ns(); - chacha_crypt_arch(chacha_state, cipher, plain, data_size, 20); + chacha_crypt_arch(&chacha_state, cipher, plain, data_size, 20); end = ktime_get_ns(); @@ -81,10 +81,10 @@ static int test_lib_chacha(u8 *revert, u8 *cipher, u8 *plain) pr_info("lib encryption took: %lld nsec", end - start); /* Decrypt */ - chacha_init(chacha_state, (u32 *)key, iv); + chacha_init(&chacha_state, (u32 *)key, iv); start = ktime_get_ns(); - chacha_crypt_arch(chacha_state, revert, cipher, data_size, 20); + chacha_crypt_arch(&chacha_state, revert, cipher, data_size, 20); end = ktime_get_ns(); if (debug) diff --git a/tools/testing/cxl/test/mem.c b/tools/testing/cxl/test/mem.c index f2957a3e36fe..bf9caa908f89 100644 --- a/tools/testing/cxl/test/mem.c +++ b/tools/testing/cxl/test/mem.c @@ -1780,7 +1780,7 @@ static int cxl_mock_mem_probe(struct platform_device *pdev) if (rc) return rc; - rc = devm_cxl_setup_fwctl(cxlmd); + rc = devm_cxl_setup_fwctl(&pdev->dev, cxlmd); if (rc) dev_dbg(dev, "No CXL FWCTL setup\n"); diff --git a/tools/testing/kunit/configs/all_tests.config b/tools/testing/kunit/configs/all_tests.config index cdd9782f9646..e70c502a16df 100644 --- a/tools/testing/kunit/configs/all_tests.config +++ b/tools/testing/kunit/configs/all_tests.config @@ -10,6 +10,7 @@ CONFIG_KUNIT_EXAMPLE_TEST=y CONFIG_KUNIT_ALL_TESTS=y CONFIG_FORTIFY_SOURCE=y +CONFIG_INIT_STACK_ALL_PATTERN=y CONFIG_IIO=y @@ -20,6 +21,7 @@ CONFIG_VFAT_FS=y CONFIG_PCI=y CONFIG_USB4=y +CONFIG_I2C=y CONFIG_NET=y CONFIG_MCTP=y @@ -43,6 +45,8 @@ CONFIG_REGMAP_BUILD=y CONFIG_AUDIT=y +CONFIG_PRIME_NUMBERS=y + CONFIG_SECURITY=y CONFIG_SECURITY_APPARMOR=y CONFIG_SECURITY_LANDLOCK=y @@ -51,3 +55,4 @@ CONFIG_SOUND=y CONFIG_SND=y CONFIG_SND_SOC=y CONFIG_SND_SOC_TOPOLOGY_BUILD=y +CONFIG_SND_SOC_CS35L56_I2C=y diff --git a/tools/testing/kunit/kunit_json.py b/tools/testing/kunit/kunit_json.py index 10ff65689dd8..80fa4e354a17 100644 --- a/tools/testing/kunit/kunit_json.py +++ b/tools/testing/kunit/kunit_json.py @@ -39,10 +39,20 @@ def _get_group_json(test: Test, common_fields: JsonObj) -> JsonObj: status = _status_map.get(subtest.status, "FAIL") test_cases.append({"name": subtest.name, "status": status}) + test_counts = test.counts + counts_json = { + "tests": test_counts.total(), + "passed": test_counts.passed, + "failed": test_counts.failed, + "crashed": test_counts.crashed, + "skipped": test_counts.skipped, + "errors": test_counts.errors, + } test_group = { "name": test.name, "sub_groups": sub_groups, "test_cases": test_cases, + "misc": counts_json } test_group.update(common_fields) return test_group diff --git a/tools/testing/kunit/kunit_kernel.py b/tools/testing/kunit/kunit_kernel.py index d3f39bc1ceec..260d8d9aa1db 100644 --- a/tools/testing/kunit/kunit_kernel.py +++ b/tools/testing/kunit/kunit_kernel.py @@ -14,6 +14,7 @@ import os import shlex import shutil import signal +import sys import threading from typing import Iterator, List, Optional, Tuple from types import FrameType @@ -201,6 +202,13 @@ def _default_qemu_config_path(arch: str) -> str: return config_path options = [f[:-3] for f in os.listdir(QEMU_CONFIGS_DIR) if f.endswith('.py')] + + if arch == 'help': + print('um') + for option in options: + print(option) + sys.exit() + raise ConfigError(arch + ' is not a valid arch, options are ' + str(sorted(options))) def _get_qemu_ops(config_path: str, diff --git a/tools/testing/kunit/kunit_parser.py b/tools/testing/kunit/kunit_parser.py index da53a709773a..c176487356e6 100644 --- a/tools/testing/kunit/kunit_parser.py +++ b/tools/testing/kunit/kunit_parser.py @@ -809,6 +809,10 @@ def parse_test(lines: LineStream, expected_num: int, log: List[str], is_subtest: test.log.extend(parse_diagnostic(lines)) if test.name != "" and not peek_test_name_match(lines, test): test.add_error(printer, 'missing subtest result line!') + elif not lines: + print_log(test.log, printer) + test.status = TestStatus.NO_TESTS + test.add_error(printer, 'No more test results!') else: parse_test_result(lines, test, expected_num, printer) diff --git a/tools/testing/kunit/kunit_tool_test.py b/tools/testing/kunit/kunit_tool_test.py index 5ff4f6ffd873..bbba921e0eac 100755 --- a/tools/testing/kunit/kunit_tool_test.py +++ b/tools/testing/kunit/kunit_tool_test.py @@ -371,8 +371,8 @@ class KUnitParserTest(unittest.TestCase): """ result = kunit_parser.parse_run_tests(output.splitlines(), stdout) # Missing test results after test plan should alert a suspected test crash. - self.assertEqual(kunit_parser.TestStatus.TEST_CRASHED, result.status) - self.assertEqual(result.counts, kunit_parser.TestCounts(passed=1, crashed=1, errors=1)) + self.assertEqual(kunit_parser.TestStatus.SUCCESS, result.status) + self.assertEqual(result.counts, kunit_parser.TestCounts(passed=1, errors=2)) def line_stream_from_strs(strs: Iterable[str]) -> kunit_parser.LineStream: return kunit_parser.LineStream(enumerate(strs, start=1)) diff --git a/tools/testing/kunit/qemu_configs/powerpc.py b/tools/testing/kunit/qemu_configs/powerpc.py index 7ec38d4131f7..5b4c895d5d5a 100644 --- a/tools/testing/kunit/qemu_configs/powerpc.py +++ b/tools/testing/kunit/qemu_configs/powerpc.py @@ -3,6 +3,7 @@ from ..qemu_config import QemuArchParams QEMU_ARCH = QemuArchParams(linux_arch='powerpc', kconfig=''' CONFIG_PPC64=y +CONFIG_CPU_BIG_ENDIAN=y CONFIG_SERIAL_8250=y CONFIG_SERIAL_8250_CONSOLE=y CONFIG_HVC_CONSOLE=y''', diff --git a/tools/testing/kunit/qemu_configs/powerpc32.py b/tools/testing/kunit/qemu_configs/powerpc32.py new file mode 100644 index 000000000000..88bd60dbb948 --- /dev/null +++ b/tools/testing/kunit/qemu_configs/powerpc32.py @@ -0,0 +1,17 @@ +# SPDX-License-Identifier: GPL-2.0 + +from ..qemu_config import QemuArchParams + +QEMU_ARCH = QemuArchParams(linux_arch='powerpc', + kconfig=''' +CONFIG_PPC32=y +CONFIG_CPU_BIG_ENDIAN=y +CONFIG_ADB_CUDA=y +CONFIG_SERIAL_PMACZILOG=y +CONFIG_SERIAL_PMACZILOG_TTYS=y +CONFIG_SERIAL_PMACZILOG_CONSOLE=y +''', + qemu_arch='ppc', + kernel_path='vmlinux', + kernel_command_line='console=ttyS0', + extra_qemu_params=['-M', 'g3beige', '-cpu', 'max']) diff --git a/tools/testing/kunit/qemu_configs/powerpcle.py b/tools/testing/kunit/qemu_configs/powerpcle.py new file mode 100644 index 000000000000..7ddee8af4bd7 --- /dev/null +++ b/tools/testing/kunit/qemu_configs/powerpcle.py @@ -0,0 +1,14 @@ +# SPDX-License-Identifier: GPL-2.0 + +from ..qemu_config import QemuArchParams + +QEMU_ARCH = QemuArchParams(linux_arch='powerpc', + kconfig=''' +CONFIG_PPC64=y +CONFIG_CPU_LITTLE_ENDIAN=y +CONFIG_HVC_CONSOLE=y +''', + qemu_arch='ppc64', + kernel_path='vmlinux', + kernel_command_line='console=ttyS0', + extra_qemu_params=['-M', 'pseries', '-cpu', 'power8']) diff --git a/tools/testing/kunit/qemu_configs/riscv32.py b/tools/testing/kunit/qemu_configs/riscv32.py new file mode 100644 index 000000000000..b79ba0ae30f8 --- /dev/null +++ b/tools/testing/kunit/qemu_configs/riscv32.py @@ -0,0 +1,17 @@ +# SPDX-License-Identifier: GPL-2.0 + +from ..qemu_config import QemuArchParams + +QEMU_ARCH = QemuArchParams(linux_arch='riscv', + kconfig=''' +CONFIG_NONPORTABLE=y +CONFIG_ARCH_RV32I=y +CONFIG_ARCH_VIRT=y +CONFIG_SERIAL_8250=y +CONFIG_SERIAL_8250_CONSOLE=y +CONFIG_SERIAL_OF_PLATFORM=y +''', + qemu_arch='riscv32', + kernel_path='arch/riscv/boot/Image', + kernel_command_line='console=ttyS0', + extra_qemu_params=['-machine', 'virt']) diff --git a/tools/testing/kunit/qemu_configs/sh.py b/tools/testing/kunit/qemu_configs/sh.py index 78a474a5b95f..f00cb89fdef6 100644 --- a/tools/testing/kunit/qemu_configs/sh.py +++ b/tools/testing/kunit/qemu_configs/sh.py @@ -7,7 +7,9 @@ CONFIG_CPU_SUBTYPE_SH7751R=y CONFIG_MEMORY_START=0x0c000000 CONFIG_SH_RTS7751R2D=y CONFIG_RTS7751R2D_PLUS=y -CONFIG_SERIAL_SH_SCI=y''', +CONFIG_SERIAL_SH_SCI=y +CONFIG_CMDLINE_EXTEND=y +''', qemu_arch='sh4', kernel_path='arch/sh/boot/zImage', kernel_command_line='console=ttySC1', diff --git a/tools/testing/kunit/qemu_configs/sparc.py b/tools/testing/kunit/qemu_configs/sparc.py index 256d9573b446..2019550a1b69 100644 --- a/tools/testing/kunit/qemu_configs/sparc.py +++ b/tools/testing/kunit/qemu_configs/sparc.py @@ -2,6 +2,8 @@ from ..qemu_config import QemuArchParams QEMU_ARCH = QemuArchParams(linux_arch='sparc', kconfig=''' +CONFIG_KUNIT_FAULT_TEST=n +CONFIG_SPARC32=y CONFIG_SERIAL_SUNZILOG=y CONFIG_SERIAL_SUNZILOG_CONSOLE=y ''', diff --git a/tools/testing/kunit/qemu_configs/sparc64.py b/tools/testing/kunit/qemu_configs/sparc64.py new file mode 100644 index 000000000000..53d4e5a8c972 --- /dev/null +++ b/tools/testing/kunit/qemu_configs/sparc64.py @@ -0,0 +1,16 @@ +# SPDX-License-Identifier: GPL-2.0 + +from ..qemu_config import QemuArchParams + +QEMU_ARCH = QemuArchParams(linux_arch='sparc', + kconfig=''' +CONFIG_64BIT=y +CONFIG_SPARC64=y +CONFIG_PCI=y +CONFIG_SERIAL_SUNSU=y +CONFIG_SERIAL_SUNSU_CONSOLE=y +''', + qemu_arch='sparc64', + kernel_path='arch/sparc/boot/image', + kernel_command_line='console=ttyS0 kunit_shutdown=poweroff', + extra_qemu_params=[]) diff --git a/tools/testing/memblock/internal.h b/tools/testing/memblock/internal.h index 1cf82acb2a3e..0ab4b53bb4f3 100644 --- a/tools/testing/memblock/internal.h +++ b/tools/testing/memblock/internal.h @@ -24,4 +24,10 @@ static inline void accept_memory(phys_addr_t start, unsigned long size) { } +static inline unsigned long free_reserved_area(void *start, void *end, + int poison, const char *s) +{ + return 0; +} + #endif diff --git a/tools/testing/memblock/linux/mutex.h b/tools/testing/memblock/linux/mutex.h new file mode 100644 index 000000000000..ae3f497165d6 --- /dev/null +++ b/tools/testing/memblock/linux/mutex.h @@ -0,0 +1,14 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _MUTEX_H +#define _MUTEX_H + +#define DEFINE_MUTEX(name) int name + +static inline void dummy_mutex_guard(int *name) +{ +} + +#define guard(mutex) \ + dummy_##mutex##_guard + +#endif /* _MUTEX_H */
\ No newline at end of file diff --git a/tools/testing/memblock/tests/basic_api.c b/tools/testing/memblock/tests/basic_api.c index 67503089e6a0..01e836fba488 100644 --- a/tools/testing/memblock/tests/basic_api.c +++ b/tools/testing/memblock/tests/basic_api.c @@ -2434,6 +2434,107 @@ static int memblock_overlaps_region_checks(void) return 0; } +#ifdef CONFIG_NUMA +static int memblock_set_node_check(void) +{ + unsigned long i, max_reserved; + struct memblock_region *rgn; + void *orig_region; + + PREFIX_PUSH(); + + reset_memblock_regions(); + memblock_allow_resize(); + + dummy_physical_memory_init(); + memblock_add(dummy_physical_memory_base(), MEM_SIZE); + orig_region = memblock.reserved.regions; + + /* Equally Split range to node 0 and 1*/ + memblock_set_node(memblock_start_of_DRAM(), + memblock_phys_mem_size() / 2, &memblock.memory, 0); + memblock_set_node(memblock_start_of_DRAM() + memblock_phys_mem_size() / 2, + memblock_phys_mem_size() / 2, &memblock.memory, 1); + + ASSERT_EQ(memblock.memory.cnt, 2); + rgn = &memblock.memory.regions[0]; + ASSERT_EQ(rgn->base, memblock_start_of_DRAM()); + ASSERT_EQ(rgn->size, memblock_phys_mem_size() / 2); + ASSERT_EQ(memblock_get_region_node(rgn), 0); + rgn = &memblock.memory.regions[1]; + ASSERT_EQ(rgn->base, memblock_start_of_DRAM() + memblock_phys_mem_size() / 2); + ASSERT_EQ(rgn->size, memblock_phys_mem_size() / 2); + ASSERT_EQ(memblock_get_region_node(rgn), 1); + + /* Reserve 126 regions with the last one across node boundary */ + for (i = 0; i < 125; i++) + memblock_reserve(memblock_start_of_DRAM() + SZ_16 * i, SZ_8); + + memblock_reserve(memblock_start_of_DRAM() + memblock_phys_mem_size() / 2 - SZ_8, + SZ_16); + + /* + * Commit 61167ad5fecd ("mm: pass nid to reserve_bootmem_region()") + * do following process to set nid to each memblock.reserved region. + * But it may miss some region if memblock_set_node() double the + * array. + * + * By checking 'max', we make sure all region nid is set properly. + */ +repeat: + max_reserved = memblock.reserved.max; + for_each_mem_region(rgn) { + int nid = memblock_get_region_node(rgn); + + memblock_set_node(rgn->base, rgn->size, &memblock.reserved, nid); + } + if (max_reserved != memblock.reserved.max) + goto repeat; + + /* Confirm each region has valid node set */ + for_each_reserved_mem_region(rgn) { + ASSERT_TRUE(numa_valid_node(memblock_get_region_node(rgn))); + if (rgn == (memblock.reserved.regions + memblock.reserved.cnt - 1)) + ASSERT_EQ(1, memblock_get_region_node(rgn)); + else + ASSERT_EQ(0, memblock_get_region_node(rgn)); + } + + dummy_physical_memory_cleanup(); + + /* + * The current reserved.regions is occupying a range of memory that + * allocated from dummy_physical_memory_init(). After free the memory, + * we must not use it. So restore the origin memory region to make sure + * the tests can run as normal and not affected by the double array. + */ + memblock.reserved.regions = orig_region; + memblock.reserved.cnt = INIT_MEMBLOCK_RESERVED_REGIONS; + + test_pass_pop(); + + return 0; +} + +static int memblock_set_node_checks(void) +{ + prefix_reset(); + prefix_push("memblock_set_node"); + test_print("Running memblock_set_node tests...\n"); + + memblock_set_node_check(); + + prefix_pop(); + + return 0; +} +#else +static int memblock_set_node_checks(void) +{ + return 0; +} +#endif + int memblock_basic_checks(void) { memblock_initialization_check(); @@ -2444,6 +2545,7 @@ int memblock_basic_checks(void) memblock_bottom_up_checks(); memblock_trim_memory_checks(); memblock_overlaps_region_checks(); + memblock_set_node_checks(); return 0; } diff --git a/tools/testing/selftests/.gitignore b/tools/testing/selftests/.gitignore index cb24124ac5b9..674aaa02e396 100644 --- a/tools/testing/selftests/.gitignore +++ b/tools/testing/selftests/.gitignore @@ -4,7 +4,6 @@ gpiogpio-hammer gpioinclude/ gpiolsgpio kselftest_install/ -tpm2/SpaceTest.log # Python bytecode and cache __pycache__/ diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile index c77c8c8e3d9b..80fb84fa3cfc 100644 --- a/tools/testing/selftests/Makefile +++ b/tools/testing/selftests/Makefile @@ -121,6 +121,7 @@ TARGETS += user_events TARGETS += vDSO TARGETS += mm TARGETS += x86 +TARGETS += x86/bugs TARGETS += zram #Please keep the TARGETS list alphabetically sorted # Run "make quicktest=1 run_tests" or diff --git a/tools/testing/selftests/bpf/bench.c b/tools/testing/selftests/bpf/bench.c index 1bd403a5ef7b..0fd8c9b0d38f 100644 --- a/tools/testing/selftests/bpf/bench.c +++ b/tools/testing/selftests/bpf/bench.c @@ -526,6 +526,12 @@ extern const struct bench bench_trig_uprobe_multi_push; extern const struct bench bench_trig_uretprobe_multi_push; extern const struct bench bench_trig_uprobe_multi_ret; extern const struct bench bench_trig_uretprobe_multi_ret; +#ifdef __x86_64__ +extern const struct bench bench_trig_uprobe_nop5; +extern const struct bench bench_trig_uretprobe_nop5; +extern const struct bench bench_trig_uprobe_multi_nop5; +extern const struct bench bench_trig_uretprobe_multi_nop5; +#endif extern const struct bench bench_rb_libbpf; extern const struct bench bench_rb_custom; @@ -586,6 +592,12 @@ static const struct bench *benchs[] = { &bench_trig_uretprobe_multi_push, &bench_trig_uprobe_multi_ret, &bench_trig_uretprobe_multi_ret, +#ifdef __x86_64__ + &bench_trig_uprobe_nop5, + &bench_trig_uretprobe_nop5, + &bench_trig_uprobe_multi_nop5, + &bench_trig_uretprobe_multi_nop5, +#endif /* ringbuf/perfbuf benchmarks */ &bench_rb_libbpf, &bench_rb_custom, diff --git a/tools/testing/selftests/bpf/benchs/bench_trigger.c b/tools/testing/selftests/bpf/benchs/bench_trigger.c index 32e9f194d449..82327657846e 100644 --- a/tools/testing/selftests/bpf/benchs/bench_trigger.c +++ b/tools/testing/selftests/bpf/benchs/bench_trigger.c @@ -333,6 +333,20 @@ static void *uprobe_producer_ret(void *input) return NULL; } +#ifdef __x86_64__ +__nocf_check __weak void uprobe_target_nop5(void) +{ + asm volatile (".byte 0x0f, 0x1f, 0x44, 0x00, 0x00"); +} + +static void *uprobe_producer_nop5(void *input) +{ + while (true) + uprobe_target_nop5(); + return NULL; +} +#endif + static void usetup(bool use_retprobe, bool use_multi, void *target_addr) { size_t uprobe_offset; @@ -448,6 +462,28 @@ static void uretprobe_multi_ret_setup(void) usetup(true, true /* use_multi */, &uprobe_target_ret); } +#ifdef __x86_64__ +static void uprobe_nop5_setup(void) +{ + usetup(false, false /* !use_multi */, &uprobe_target_nop5); +} + +static void uretprobe_nop5_setup(void) +{ + usetup(true, false /* !use_multi */, &uprobe_target_nop5); +} + +static void uprobe_multi_nop5_setup(void) +{ + usetup(false, true /* use_multi */, &uprobe_target_nop5); +} + +static void uretprobe_multi_nop5_setup(void) +{ + usetup(true, true /* use_multi */, &uprobe_target_nop5); +} +#endif + const struct bench bench_trig_syscall_count = { .name = "trig-syscall-count", .validate = trigger_validate, @@ -506,3 +542,9 @@ BENCH_TRIG_USERMODE(uprobe_multi_ret, ret, "uprobe-multi-ret"); BENCH_TRIG_USERMODE(uretprobe_multi_nop, nop, "uretprobe-multi-nop"); BENCH_TRIG_USERMODE(uretprobe_multi_push, push, "uretprobe-multi-push"); BENCH_TRIG_USERMODE(uretprobe_multi_ret, ret, "uretprobe-multi-ret"); +#ifdef __x86_64__ +BENCH_TRIG_USERMODE(uprobe_nop5, nop5, "uprobe-nop5"); +BENCH_TRIG_USERMODE(uretprobe_nop5, nop5, "uretprobe-nop5"); +BENCH_TRIG_USERMODE(uprobe_multi_nop5, nop5, "uprobe-multi-nop5"); +BENCH_TRIG_USERMODE(uretprobe_multi_nop5, nop5, "uretprobe-multi-nop5"); +#endif diff --git a/tools/testing/selftests/bpf/benchs/run_bench_uprobes.sh b/tools/testing/selftests/bpf/benchs/run_bench_uprobes.sh index af169f831f2f..03f55405484b 100755 --- a/tools/testing/selftests/bpf/benchs/run_bench_uprobes.sh +++ b/tools/testing/selftests/bpf/benchs/run_bench_uprobes.sh @@ -2,7 +2,7 @@ set -eufo pipefail -for i in usermode-count syscall-count {uprobe,uretprobe}-{nop,push,ret} +for i in usermode-count syscall-count {uprobe,uretprobe}-{nop,push,ret,nop5} do summary=$(sudo ./bench -w2 -d5 -a trig-$i | tail -n1 | cut -d'(' -f1 | cut -d' ' -f3-) printf "%-15s: %s\n" $i "$summary" diff --git a/tools/testing/selftests/bpf/config.aarch64 b/tools/testing/selftests/bpf/config.aarch64 index 3720b7611523..e1495a4bbc99 100644 --- a/tools/testing/selftests/bpf/config.aarch64 +++ b/tools/testing/selftests/bpf/config.aarch64 @@ -158,7 +158,6 @@ CONFIG_TRANSPARENT_HUGEPAGE=y CONFIG_TUN=y CONFIG_UNIX=y CONFIG_UPROBES=y -CONFIG_USELIB=y CONFIG_USER_NS=y CONFIG_VETH=y CONFIG_VLAN_8021Q=y diff --git a/tools/testing/selftests/bpf/config.s390x b/tools/testing/selftests/bpf/config.s390x index 706931a8c2c6..26c3bc2ce11d 100644 --- a/tools/testing/selftests/bpf/config.s390x +++ b/tools/testing/selftests/bpf/config.s390x @@ -128,7 +128,6 @@ CONFIG_TRANSPARENT_HUGEPAGE=y CONFIG_TUN=y CONFIG_UNIX=y CONFIG_UPROBES=y -CONFIG_USELIB=y CONFIG_USER_NS=y CONFIG_VETH=y CONFIG_VLAN_8021Q=y diff --git a/tools/testing/selftests/bpf/config.x86_64 b/tools/testing/selftests/bpf/config.x86_64 index 5680befae8c6..5e713ef7caa3 100644 --- a/tools/testing/selftests/bpf/config.x86_64 +++ b/tools/testing/selftests/bpf/config.x86_64 @@ -39,7 +39,6 @@ CONFIG_CPU_FREQ_GOV_USERSPACE=y CONFIG_CPU_FREQ_STAT=y CONFIG_CPU_IDLE_GOV_LADDER=y CONFIG_CPUSETS=y -CONFIG_CRC_T10DIF=y CONFIG_CRYPTO_BLAKE2B=y CONFIG_CRYPTO_SEQIV=y CONFIG_CRYPTO_XXHASH=y diff --git a/tools/testing/selftests/bpf/prog_tests/for_each.c b/tools/testing/selftests/bpf/prog_tests/for_each.c index 09f6487f58b9..5fea3209566e 100644 --- a/tools/testing/selftests/bpf/prog_tests/for_each.c +++ b/tools/testing/selftests/bpf/prog_tests/for_each.c @@ -6,6 +6,7 @@ #include "for_each_array_map_elem.skel.h" #include "for_each_map_elem_write_key.skel.h" #include "for_each_multi_maps.skel.h" +#include "for_each_hash_modify.skel.h" static unsigned int duration; @@ -203,6 +204,40 @@ out: for_each_multi_maps__destroy(skel); } +static void test_hash_modify(void) +{ + struct for_each_hash_modify *skel; + int max_entries, i, err; + __u64 key, val; + + LIBBPF_OPTS(bpf_test_run_opts, topts, + .data_in = &pkt_v4, + .data_size_in = sizeof(pkt_v4), + .repeat = 1 + ); + + skel = for_each_hash_modify__open_and_load(); + if (!ASSERT_OK_PTR(skel, "for_each_hash_modify__open_and_load")) + return; + + max_entries = bpf_map__max_entries(skel->maps.hashmap); + for (i = 0; i < max_entries; i++) { + key = i; + val = i; + err = bpf_map__update_elem(skel->maps.hashmap, &key, sizeof(key), + &val, sizeof(val), BPF_ANY); + if (!ASSERT_OK(err, "map_update")) + goto out; + } + + err = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.test_pkt_access), &topts); + ASSERT_OK(err, "bpf_prog_test_run_opts"); + ASSERT_OK(topts.retval, "retval"); + +out: + for_each_hash_modify__destroy(skel); +} + void test_for_each(void) { if (test__start_subtest("hash_map")) @@ -213,4 +248,6 @@ void test_for_each(void) test_write_map_key(); if (test__start_subtest("multi_maps")) test_multi_maps(); + if (test__start_subtest("hash_modify")) + test_hash_modify(); } diff --git a/tools/testing/selftests/bpf/prog_tests/res_spin_lock.c b/tools/testing/selftests/bpf/prog_tests/res_spin_lock.c index 115287ba441b..0703e987df89 100644 --- a/tools/testing/selftests/bpf/prog_tests/res_spin_lock.c +++ b/tools/testing/selftests/bpf/prog_tests/res_spin_lock.c @@ -25,8 +25,11 @@ static void *spin_lock_thread(void *arg) while (!READ_ONCE(skip)) { err = bpf_prog_test_run_opts(prog_fd, &topts); - ASSERT_OK(err, "test_run"); - ASSERT_OK(topts.retval, "test_run retval"); + if (err || topts.retval) { + ASSERT_OK(err, "test_run"); + ASSERT_OK(topts.retval, "test_run retval"); + break; + } } pthread_exit(arg); } diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_ktls.c b/tools/testing/selftests/bpf/prog_tests/sockmap_ktls.c index 2d0796314862..0a99fd404f6d 100644 --- a/tools/testing/selftests/bpf/prog_tests/sockmap_ktls.c +++ b/tools/testing/selftests/bpf/prog_tests/sockmap_ktls.c @@ -68,7 +68,6 @@ static void test_sockmap_ktls_disconnect_after_delete(int family, int map) goto close_cli; err = disconnect(cli); - ASSERT_OK(err, "disconnect"); close_cli: close(cli); diff --git a/tools/testing/selftests/bpf/progs/bpf_misc.h b/tools/testing/selftests/bpf/progs/bpf_misc.h index 13a2e22f5465..863df7c0fdd0 100644 --- a/tools/testing/selftests/bpf/progs/bpf_misc.h +++ b/tools/testing/selftests/bpf/progs/bpf_misc.h @@ -221,7 +221,7 @@ #define CAN_USE_GOTOL #endif -#if _clang_major__ >= 18 +#if __clang_major__ >= 18 #define CAN_USE_BPF_ST #endif diff --git a/tools/testing/selftests/bpf/progs/for_each_hash_modify.c b/tools/testing/selftests/bpf/progs/for_each_hash_modify.c new file mode 100644 index 000000000000..82307166f789 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/for_each_hash_modify.c @@ -0,0 +1,30 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2025 Intel Corporation */ +#include "vmlinux.h" +#include <bpf/bpf_helpers.h> + +char _license[] SEC("license") = "GPL"; + +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __uint(max_entries, 128); + __type(key, __u64); + __type(value, __u64); +} hashmap SEC(".maps"); + +static int cb(struct bpf_map *map, __u64 *key, __u64 *val, void *arg) +{ + bpf_map_delete_elem(map, key); + bpf_map_update_elem(map, key, val, 0); + return 0; +} + +SEC("tc") +int test_pkt_access(struct __sk_buff *skb) +{ + (void)skb; + + bpf_for_each_map_elem(&hashmap, cb, NULL, 0); + + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/res_spin_lock.c b/tools/testing/selftests/bpf/progs/res_spin_lock.c index b33385dfbd35..22c4fb8b9266 100644 --- a/tools/testing/selftests/bpf/progs/res_spin_lock.c +++ b/tools/testing/selftests/bpf/progs/res_spin_lock.c @@ -38,13 +38,14 @@ int res_spin_lock_test(struct __sk_buff *ctx) r = bpf_res_spin_lock(&elem1->lock); if (r) return r; - if (!bpf_res_spin_lock(&elem2->lock)) { + r = bpf_res_spin_lock(&elem2->lock); + if (!r) { bpf_res_spin_unlock(&elem2->lock); bpf_res_spin_unlock(&elem1->lock); return -1; } bpf_res_spin_unlock(&elem1->lock); - return 0; + return r != -EDEADLK; } SEC("tc") @@ -124,12 +125,15 @@ int res_spin_lock_test_held_lock_max(struct __sk_buff *ctx) /* Trigger AA, after exhausting entries in the held lock table. This * time, only the timeout can save us, as AA detection won't succeed. */ - if (!bpf_res_spin_lock(locks[34])) { + ret = bpf_res_spin_lock(locks[34]); + if (!ret) { bpf_res_spin_unlock(locks[34]); ret = 1; goto end; } + ret = ret != -ETIMEDOUT ? 2 : 0; + end: for (i = i - 1; i >= 0; i--) bpf_res_spin_unlock(locks[i]); diff --git a/tools/testing/selftests/cgroup/test_cpuset_prs.sh b/tools/testing/selftests/cgroup/test_cpuset_prs.sh index 400a696a0d21..a17256d9f88a 100755 --- a/tools/testing/selftests/cgroup/test_cpuset_prs.sh +++ b/tools/testing/selftests/cgroup/test_cpuset_prs.sh @@ -88,22 +88,32 @@ echo "" > test/cpuset.cpus # If isolated CPUs have been reserved at boot time (as shown in # cpuset.cpus.isolated), these isolated CPUs should be outside of CPUs 0-8 # that will be used by this script for testing purpose. If not, some of -# the tests may fail incorrectly. These pre-isolated CPUs should stay in -# an isolated state throughout the testing process for now. +# the tests may fail incorrectly. Wait a bit and retry again just in case +# these isolated CPUs are leftover from previous run and have just been +# cleaned up earlier in this script. +# +# These pre-isolated CPUs should stay in an isolated state throughout the +# testing process for now. # BOOT_ISOLCPUS=$(cat $CGROUP2/cpuset.cpus.isolated) +[[ -n "$BOOT_ISOLCPUS" ]] && { + sleep 0.5 + BOOT_ISOLCPUS=$(cat $CGROUP2/cpuset.cpus.isolated) +} if [[ -n "$BOOT_ISOLCPUS" ]] then [[ $(echo $BOOT_ISOLCPUS | sed -e "s/[,-].*//") -le 8 ]] && skip_test "Pre-isolated CPUs ($BOOT_ISOLCPUS) overlap CPUs to be tested" echo "Pre-isolated CPUs: $BOOT_ISOLCPUS" fi + cleanup() { online_cpus cd $CGROUP2 - rmdir A1/A2/A3 A1/A2 A1 B1 > /dev/null 2>&1 - rmdir test > /dev/null 2>&1 + rmdir A1/A2/A3 A1/A2 A1 B1 test/A1 test/B1 test > /dev/null 2>&1 + rmdir rtest/p1/c11 rtest/p1/c12 rtest/p2/c21 \ + rtest/p2/c22 rtest/p1 rtest/p2 rtest > /dev/null 2>&1 [[ -n "$SCHED_DEBUG" ]] && echo "$SCHED_DEBUG" > /sys/kernel/debug/sched/verbose } @@ -173,14 +183,22 @@ test_add_proc() # # Cgroup test hierarchy # -# root -- A1 -- A2 -- A3 -# +- B1 +# root +# | +# +------+------+ +# | | +# A1 B1 +# | +# A2 +# | +# A3 # # P<v> = set cpus.partition (0:member, 1:root, 2:isolated) # C<l> = add cpu-list to cpuset.cpus # X<l> = add cpu-list to cpuset.cpus.exclusive # S<p> = use prefix in subtree_control # T = put a task into cgroup +# CX<l> = add cpu-list to both cpuset.cpus and cpuset.cpus.exclusive # O<c>=<v> = Write <v> to CPU online file of <c> # # ECPUs - effective CPUs of cpusets @@ -207,130 +225,129 @@ TEST_MATRIX=( " C0-1:P1 . . C2-3 S+:C4-5 . . . 0 A1:4-5" " C0-1 . . C2-3:P1 . . . C2 0 " " C0-1 . . C2-3:P1 . . . C4-5 0 B1:4-5" - "C0-3:P1:S+ C2-3:P1 . . . . . . 0 A1:0-1,A2:2-3" - "C0-3:P1:S+ C2-3:P1 . . C1-3 . . . 0 A1:1,A2:2-3" - "C2-3:P1:S+ C3:P1 . . C3 . . . 0 A1:,A2:3 A1:P1,A2:P1" - "C2-3:P1:S+ C3:P1 . . C3 P0 . . 0 A1:3,A2:3 A1:P1,A2:P0" - "C2-3:P1:S+ C2:P1 . . C2-4 . . . 0 A1:3-4,A2:2" - "C2-3:P1:S+ C3:P1 . . C3 . . C0-2 0 A1:,B1:0-2 A1:P1,A2:P1" - "$SETUP_A123_PARTITIONS . C2-3 . . . 0 A1:,A2:2,A3:3 A1:P1,A2:P1,A3:P1" + "C0-3:P1:S+ C2-3:P1 . . . . . . 0 A1:0-1|A2:2-3|XA2:2-3" + "C0-3:P1:S+ C2-3:P1 . . C1-3 . . . 0 A1:1|A2:2-3|XA2:2-3" + "C2-3:P1:S+ C3:P1 . . C3 . . . 0 A1:|A2:3|XA2:3 A1:P1|A2:P1" + "C2-3:P1:S+ C3:P1 . . C3 P0 . . 0 A1:3|A2:3 A1:P1|A2:P0" + "C2-3:P1:S+ C2:P1 . . C2-4 . . . 0 A1:3-4|A2:2" + "C2-3:P1:S+ C3:P1 . . C3 . . C0-2 0 A1:|B1:0-2 A1:P1|A2:P1" + "$SETUP_A123_PARTITIONS . C2-3 . . . 0 A1:|A2:2|A3:3 A1:P1|A2:P1|A3:P1" # CPU offlining cases: - " C0-1 . . C2-3 S+ C4-5 . O2=0 0 A1:0-1,B1:3" - "C0-3:P1:S+ C2-3:P1 . . O2=0 . . . 0 A1:0-1,A2:3" - "C0-3:P1:S+ C2-3:P1 . . O2=0 O2=1 . . 0 A1:0-1,A2:2-3" - "C0-3:P1:S+ C2-3:P1 . . O1=0 . . . 0 A1:0,A2:2-3" - "C0-3:P1:S+ C2-3:P1 . . O1=0 O1=1 . . 0 A1:0-1,A2:2-3" - "C2-3:P1:S+ C3:P1 . . O3=0 O3=1 . . 0 A1:2,A2:3 A1:P1,A2:P1" - "C2-3:P1:S+ C3:P2 . . O3=0 O3=1 . . 0 A1:2,A2:3 A1:P1,A2:P2" - "C2-3:P1:S+ C3:P1 . . O2=0 O2=1 . . 0 A1:2,A2:3 A1:P1,A2:P1" - "C2-3:P1:S+ C3:P2 . . O2=0 O2=1 . . 0 A1:2,A2:3 A1:P1,A2:P2" - "C2-3:P1:S+ C3:P1 . . O2=0 . . . 0 A1:,A2:3 A1:P1,A2:P1" - "C2-3:P1:S+ C3:P1 . . O3=0 . . . 0 A1:2,A2: A1:P1,A2:P1" - "C2-3:P1:S+ C3:P1 . . T:O2=0 . . . 0 A1:3,A2:3 A1:P1,A2:P-1" - "C2-3:P1:S+ C3:P1 . . . T:O3=0 . . 0 A1:2,A2:2 A1:P1,A2:P-1" - "$SETUP_A123_PARTITIONS . O1=0 . . . 0 A1:,A2:2,A3:3 A1:P1,A2:P1,A3:P1" - "$SETUP_A123_PARTITIONS . O2=0 . . . 0 A1:1,A2:,A3:3 A1:P1,A2:P1,A3:P1" - "$SETUP_A123_PARTITIONS . O3=0 . . . 0 A1:1,A2:2,A3: A1:P1,A2:P1,A3:P1" - "$SETUP_A123_PARTITIONS . T:O1=0 . . . 0 A1:2-3,A2:2-3,A3:3 A1:P1,A2:P-1,A3:P-1" - "$SETUP_A123_PARTITIONS . . T:O2=0 . . 0 A1:1,A2:3,A3:3 A1:P1,A2:P1,A3:P-1" - "$SETUP_A123_PARTITIONS . . . T:O3=0 . 0 A1:1,A2:2,A3:2 A1:P1,A2:P1,A3:P-1" - "$SETUP_A123_PARTITIONS . T:O1=0 O1=1 . . 0 A1:1,A2:2,A3:3 A1:P1,A2:P1,A3:P1" - "$SETUP_A123_PARTITIONS . . T:O2=0 O2=1 . 0 A1:1,A2:2,A3:3 A1:P1,A2:P1,A3:P1" - "$SETUP_A123_PARTITIONS . . . T:O3=0 O3=1 0 A1:1,A2:2,A3:3 A1:P1,A2:P1,A3:P1" - "$SETUP_A123_PARTITIONS . T:O1=0 O2=0 O1=1 . 0 A1:1,A2:,A3:3 A1:P1,A2:P1,A3:P1" - "$SETUP_A123_PARTITIONS . T:O1=0 O2=0 O2=1 . 0 A1:2-3,A2:2-3,A3:3 A1:P1,A2:P-1,A3:P-1" + " C0-1 . . C2-3 S+ C4-5 . O2=0 0 A1:0-1|B1:3" + "C0-3:P1:S+ C2-3:P1 . . O2=0 . . . 0 A1:0-1|A2:3" + "C0-3:P1:S+ C2-3:P1 . . O2=0 O2=1 . . 0 A1:0-1|A2:2-3" + "C0-3:P1:S+ C2-3:P1 . . O1=0 . . . 0 A1:0|A2:2-3" + "C0-3:P1:S+ C2-3:P1 . . O1=0 O1=1 . . 0 A1:0-1|A2:2-3" + "C2-3:P1:S+ C3:P1 . . O3=0 O3=1 . . 0 A1:2|A2:3 A1:P1|A2:P1" + "C2-3:P1:S+ C3:P2 . . O3=0 O3=1 . . 0 A1:2|A2:3 A1:P1|A2:P2" + "C2-3:P1:S+ C3:P1 . . O2=0 O2=1 . . 0 A1:2|A2:3 A1:P1|A2:P1" + "C2-3:P1:S+ C3:P2 . . O2=0 O2=1 . . 0 A1:2|A2:3 A1:P1|A2:P2" + "C2-3:P1:S+ C3:P1 . . O2=0 . . . 0 A1:|A2:3 A1:P1|A2:P1" + "C2-3:P1:S+ C3:P1 . . O3=0 . . . 0 A1:2|A2: A1:P1|A2:P1" + "C2-3:P1:S+ C3:P1 . . T:O2=0 . . . 0 A1:3|A2:3 A1:P1|A2:P-1" + "C2-3:P1:S+ C3:P1 . . . T:O3=0 . . 0 A1:2|A2:2 A1:P1|A2:P-1" + "$SETUP_A123_PARTITIONS . O1=0 . . . 0 A1:|A2:2|A3:3 A1:P1|A2:P1|A3:P1" + "$SETUP_A123_PARTITIONS . O2=0 . . . 0 A1:1|A2:|A3:3 A1:P1|A2:P1|A3:P1" + "$SETUP_A123_PARTITIONS . O3=0 . . . 0 A1:1|A2:2|A3: A1:P1|A2:P1|A3:P1" + "$SETUP_A123_PARTITIONS . T:O1=0 . . . 0 A1:2-3|A2:2-3|A3:3 A1:P1|A2:P-1|A3:P-1" + "$SETUP_A123_PARTITIONS . . T:O2=0 . . 0 A1:1|A2:3|A3:3 A1:P1|A2:P1|A3:P-1" + "$SETUP_A123_PARTITIONS . . . T:O3=0 . 0 A1:1|A2:2|A3:2 A1:P1|A2:P1|A3:P-1" + "$SETUP_A123_PARTITIONS . T:O1=0 O1=1 . . 0 A1:1|A2:2|A3:3 A1:P1|A2:P1|A3:P1" + "$SETUP_A123_PARTITIONS . . T:O2=0 O2=1 . 0 A1:1|A2:2|A3:3 A1:P1|A2:P1|A3:P1" + "$SETUP_A123_PARTITIONS . . . T:O3=0 O3=1 0 A1:1|A2:2|A3:3 A1:P1|A2:P1|A3:P1" + "$SETUP_A123_PARTITIONS . T:O1=0 O2=0 O1=1 . 0 A1:1|A2:|A3:3 A1:P1|A2:P1|A3:P1" + "$SETUP_A123_PARTITIONS . T:O1=0 O2=0 O2=1 . 0 A1:2-3|A2:2-3|A3:3 A1:P1|A2:P-1|A3:P-1" # old-A1 old-A2 old-A3 old-B1 new-A1 new-A2 new-A3 new-B1 fail ECPUs Pstate ISOLCPUS # ------ ------ ------ ------ ------ ------ ------ ------ ---- ----- ------ -------- # # Remote partition and cpuset.cpus.exclusive tests # - " C0-3:S+ C1-3:S+ C2-3 . X2-3 . . . 0 A1:0-3,A2:1-3,A3:2-3,XA1:2-3" - " C0-3:S+ C1-3:S+ C2-3 . X2-3 X2-3:P2 . . 0 A1:0-1,A2:2-3,A3:2-3 A1:P0,A2:P2 2-3" - " C0-3:S+ C1-3:S+ C2-3 . X2-3 X3:P2 . . 0 A1:0-2,A2:3,A3:3 A1:P0,A2:P2 3" - " C0-3:S+ C1-3:S+ C2-3 . X2-3 X2-3 X2-3:P2 . 0 A1:0-1,A2:1,A3:2-3 A1:P0,A3:P2 2-3" - " C0-3:S+ C1-3:S+ C2-3 . X2-3 X2-3 X2-3:P2:C3 . 0 A1:0-1,A2:1,A3:2-3 A1:P0,A3:P2 2-3" - " C0-3:S+ C1-3:S+ C2-3 C2-3 . . . P2 0 A1:0-3,A2:1-3,A3:2-3,B1:2-3 A1:P0,A3:P0,B1:P-2" + " C0-3:S+ C1-3:S+ C2-3 . X2-3 . . . 0 A1:0-3|A2:1-3|A3:2-3|XA1:2-3" + " C0-3:S+ C1-3:S+ C2-3 . X2-3 X2-3:P2 . . 0 A1:0-1|A2:2-3|A3:2-3 A1:P0|A2:P2 2-3" + " C0-3:S+ C1-3:S+ C2-3 . X2-3 X3:P2 . . 0 A1:0-2|A2:3|A3:3 A1:P0|A2:P2 3" + " C0-3:S+ C1-3:S+ C2-3 . X2-3 X2-3 X2-3:P2 . 0 A1:0-1|A2:1|A3:2-3 A1:P0|A3:P2 2-3" + " C0-3:S+ C1-3:S+ C2-3 . X2-3 X2-3 X2-3:P2:C3 . 0 A1:0-1|A2:1|A3:2-3 A1:P0|A3:P2 2-3" + " C0-3:S+ C1-3:S+ C2-3 C2-3 . . . P2 0 A1:0-3|A2:1-3|A3:2-3|B1:2-3 A1:P0|A3:P0|B1:P-2" " C0-3:S+ C1-3:S+ C2-3 C4-5 . . . P2 0 B1:4-5 B1:P2 4-5" - " C0-3:S+ C1-3:S+ C2-3 C4 X2-3 X2-3 X2-3:P2 P2 0 A3:2-3,B1:4 A3:P2,B1:P2 2-4" - " C0-3:S+ C1-3:S+ C2-3 C4 X2-3 X2-3 X2-3:P2:C1-3 P2 0 A3:2-3,B1:4 A3:P2,B1:P2 2-4" - " C0-3:S+ C1-3:S+ C2-3 C4 X1-3 X1-3:P2 P2 . 0 A2:1,A3:2-3 A2:P2,A3:P2 1-3" - " C0-3:S+ C1-3:S+ C2-3 C4 X2-3 X2-3 X2-3:P2 P2:C4-5 0 A3:2-3,B1:4-5 A3:P2,B1:P2 2-5" - " C4:X0-3:S+ X1-3:S+ X2-3 . . P2 . . 0 A1:4,A2:1-3,A3:1-3 A2:P2 1-3" - " C4:X0-3:S+ X1-3:S+ X2-3 . . . P2 . 0 A1:4,A2:4,A3:2-3 A3:P2 2-3" + " C0-3:S+ C1-3:S+ C2-3 C4 X2-3 X2-3 X2-3:P2 P2 0 A3:2-3|B1:4 A3:P2|B1:P2 2-4" + " C0-3:S+ C1-3:S+ C2-3 C4 X2-3 X2-3 X2-3:P2:C1-3 P2 0 A3:2-3|B1:4 A3:P2|B1:P2 2-4" + " C0-3:S+ C1-3:S+ C2-3 C4 X1-3 X1-3:P2 P2 . 0 A2:1|A3:2-3 A2:P2|A3:P2 1-3" + " C0-3:S+ C1-3:S+ C2-3 C4 X2-3 X2-3 X2-3:P2 P2:C4-5 0 A3:2-3|B1:4-5 A3:P2|B1:P2 2-5" + " C4:X0-3:S+ X1-3:S+ X2-3 . . P2 . . 0 A1:4|A2:1-3|A3:1-3 A2:P2 1-3" + " C4:X0-3:S+ X1-3:S+ X2-3 . . . P2 . 0 A1:4|A2:4|A3:2-3 A3:P2 2-3" # Nested remote/local partition tests - " C0-3:S+ C1-3:S+ C2-3 C4-5 X2-3 X2-3:P1 P2 P1 0 A1:0-1,A2:,A3:2-3,B1:4-5 \ - A1:P0,A2:P1,A3:P2,B1:P1 2-3" - " C0-3:S+ C1-3:S+ C2-3 C4 X2-3 X2-3:P1 P2 P1 0 A1:0-1,A2:,A3:2-3,B1:4 \ - A1:P0,A2:P1,A3:P2,B1:P1 2-4,2-3" - " C0-3:S+ C1-3:S+ C2-3 C4 X2-3 X2-3:P1 . P1 0 A1:0-1,A2:2-3,A3:2-3,B1:4 \ - A1:P0,A2:P1,A3:P0,B1:P1" - " C0-3:S+ C1-3:S+ C3 C4 X2-3 X2-3:P1 P2 P1 0 A1:0-1,A2:2,A3:3,B1:4 \ - A1:P0,A2:P1,A3:P2,B1:P1 2-4,3" - " C0-4:S+ C1-4:S+ C2-4 . X2-4 X2-4:P2 X4:P1 . 0 A1:0-1,A2:2-3,A3:4 \ - A1:P0,A2:P2,A3:P1 2-4,2-3" - " C0-4:S+ C1-4:S+ C2-4 . X2-4 X2-4:P2 X3-4:P1 . 0 A1:0-1,A2:2,A3:3-4 \ - A1:P0,A2:P2,A3:P1 2" + " C0-3:S+ C1-3:S+ C2-3 C4-5 X2-3 X2-3:P1 P2 P1 0 A1:0-1|A2:|A3:2-3|B1:4-5 \ + A1:P0|A2:P1|A3:P2|B1:P1 2-3" + " C0-3:S+ C1-3:S+ C2-3 C4 X2-3 X2-3:P1 P2 P1 0 A1:0-1|A2:|A3:2-3|B1:4 \ + A1:P0|A2:P1|A3:P2|B1:P1 2-4|2-3" + " C0-3:S+ C1-3:S+ C2-3 C4 X2-3 X2-3:P1 . P1 0 A1:0-1|A2:2-3|A3:2-3|B1:4 \ + A1:P0|A2:P1|A3:P0|B1:P1" + " C0-3:S+ C1-3:S+ C3 C4 X2-3 X2-3:P1 P2 P1 0 A1:0-1|A2:2|A3:3|B1:4 \ + A1:P0|A2:P1|A3:P2|B1:P1 2-4|3" + " C0-4:S+ C1-4:S+ C2-4 . X2-4 X2-4:P2 X4:P1 . 0 A1:0-1|A2:2-3|A3:4 \ + A1:P0|A2:P2|A3:P1 2-4|2-3" + " C0-4:S+ C1-4:S+ C2-4 . X2-4 X2-4:P2 X3-4:P1 . 0 A1:0-1|A2:2|A3:3-4 \ + A1:P0|A2:P2|A3:P1 2" " C0-4:X2-4:S+ C1-4:X2-4:S+:P2 C2-4:X4:P1 \ - . . X5 . . 0 A1:0-4,A2:1-4,A3:2-4 \ - A1:P0,A2:P-2,A3:P-1" + . . X5 . . 0 A1:0-4|A2:1-4|A3:2-4 \ + A1:P0|A2:P-2|A3:P-1 ." " C0-4:X2-4:S+ C1-4:X2-4:S+:P2 C2-4:X4:P1 \ - . . . X1 . 0 A1:0-1,A2:2-4,A3:2-4 \ - A1:P0,A2:P2,A3:P-1 2-4" + . . . X1 . 0 A1:0-1|A2:2-4|A3:2-4 \ + A1:P0|A2:P2|A3:P-1 2-4" # Remote partition offline tests - " C0-3:S+ C1-3:S+ C2-3 . X2-3 X2-3 X2-3:P2:O2=0 . 0 A1:0-1,A2:1,A3:3 A1:P0,A3:P2 2-3" - " C0-3:S+ C1-3:S+ C2-3 . X2-3 X2-3 X2-3:P2:O2=0 O2=1 0 A1:0-1,A2:1,A3:2-3 A1:P0,A3:P2 2-3" - " C0-3:S+ C1-3:S+ C3 . X2-3 X2-3 P2:O3=0 . 0 A1:0-2,A2:1-2,A3: A1:P0,A3:P2 3" - " C0-3:S+ C1-3:S+ C3 . X2-3 X2-3 T:P2:O3=0 . 0 A1:0-2,A2:1-2,A3:1-2 A1:P0,A3:P-2 3," + " C0-3:S+ C1-3:S+ C2-3 . X2-3 X2-3 X2-3:P2:O2=0 . 0 A1:0-1|A2:1|A3:3 A1:P0|A3:P2 2-3" + " C0-3:S+ C1-3:S+ C2-3 . X2-3 X2-3 X2-3:P2:O2=0 O2=1 0 A1:0-1|A2:1|A3:2-3 A1:P0|A3:P2 2-3" + " C0-3:S+ C1-3:S+ C3 . X2-3 X2-3 P2:O3=0 . 0 A1:0-2|A2:1-2|A3: A1:P0|A3:P2 3" + " C0-3:S+ C1-3:S+ C3 . X2-3 X2-3 T:P2:O3=0 . 0 A1:0-2|A2:1-2|A3:1-2 A1:P0|A3:P-2 3|" # An invalidated remote partition cannot self-recover from hotplug - " C0-3:S+ C1-3:S+ C2 . X2-3 X2-3 T:P2:O2=0 O2=1 0 A1:0-3,A2:1-3,A3:2 A1:P0,A3:P-2" + " C0-3:S+ C1-3:S+ C2 . X2-3 X2-3 T:P2:O2=0 O2=1 0 A1:0-3|A2:1-3|A3:2 A1:P0|A3:P-2 ." # cpus.exclusive.effective clearing test - " C0-3:S+ C1-3:S+ C2 . X2-3:X . . . 0 A1:0-3,A2:1-3,A3:2,XA1:" + " C0-3:S+ C1-3:S+ C2 . X2-3:X . . . 0 A1:0-3|A2:1-3|A3:2|XA1:" # Invalid to valid remote partition transition test - " C0-3:S+ C1-3 . . . X3:P2 . . 0 A1:0-3,A2:1-3,XA2: A2:P-2" + " C0-3:S+ C1-3 . . . X3:P2 . . 0 A1:0-3|A2:1-3|XA2: A2:P-2 ." " C0-3:S+ C1-3:X3:P2 - . . X2-3 P2 . . 0 A1:0-2,A2:3,XA2:3 A2:P2 3" + . . X2-3 P2 . . 0 A1:0-2|A2:3|XA2:3 A2:P2 3" # Invalid to valid local partition direct transition tests - " C1-3:S+:P2 X4:P2 . . . . . . 0 A1:1-3,XA1:1-3,A2:1-3:XA2: A1:P2,A2:P-2 1-3" - " C1-3:S+:P2 X4:P2 . . . X3:P2 . . 0 A1:1-2,XA1:1-3,A2:3:XA2:3 A1:P2,A2:P2 1-3" - " C0-3:P2 . . C4-6 C0-4 . . . 0 A1:0-4,B1:4-6 A1:P-2,B1:P0" - " C0-3:P2 . . C4-6 C0-4:C0-3 . . . 0 A1:0-3,B1:4-6 A1:P2,B1:P0 0-3" - " C0-3:P2 . . C3-5:C4-5 . . . . 0 A1:0-3,B1:4-5 A1:P2,B1:P0 0-3" + " C1-3:S+:P2 X4:P2 . . . . . . 0 A1:1-3|XA1:1-3|A2:1-3:XA2: A1:P2|A2:P-2 1-3" + " C1-3:S+:P2 X4:P2 . . . X3:P2 . . 0 A1:1-2|XA1:1-3|A2:3:XA2:3 A1:P2|A2:P2 1-3" + " C0-3:P2 . . C4-6 C0-4 . . . 0 A1:0-4|B1:4-6 A1:P-2|B1:P0" + " C0-3:P2 . . C4-6 C0-4:C0-3 . . . 0 A1:0-3|B1:4-6 A1:P2|B1:P0 0-3" # Local partition invalidation tests " C0-3:X1-3:S+:P2 C1-3:X2-3:S+:P2 C2-3:X3:P2 \ - . . . . . 0 A1:1,A2:2,A3:3 A1:P2,A2:P2,A3:P2 1-3" + . . . . . 0 A1:1|A2:2|A3:3 A1:P2|A2:P2|A3:P2 1-3" " C0-3:X1-3:S+:P2 C1-3:X2-3:S+:P2 C2-3:X3:P2 \ - . . X4 . . 0 A1:1-3,A2:1-3,A3:2-3,XA2:,XA3: A1:P2,A2:P-2,A3:P-2 1-3" + . . X4 . . 0 A1:1-3|A2:1-3|A3:2-3|XA2:|XA3: A1:P2|A2:P-2|A3:P-2 1-3" " C0-3:X1-3:S+:P2 C1-3:X2-3:S+:P2 C2-3:X3:P2 \ - . . C4:X . . 0 A1:1-3,A2:1-3,A3:2-3,XA2:,XA3: A1:P2,A2:P-2,A3:P-2 1-3" + . . C4:X . . 0 A1:1-3|A2:1-3|A3:2-3|XA2:|XA3: A1:P2|A2:P-2|A3:P-2 1-3" # Local partition CPU change tests - " C0-5:S+:P2 C4-5:S+:P1 . . . C3-5 . . 0 A1:0-2,A2:3-5 A1:P2,A2:P1 0-2" - " C0-5:S+:P2 C4-5:S+:P1 . . C1-5 . . . 0 A1:1-3,A2:4-5 A1:P2,A2:P1 1-3" + " C0-5:S+:P2 C4-5:S+:P1 . . . C3-5 . . 0 A1:0-2|A2:3-5 A1:P2|A2:P1 0-2" + " C0-5:S+:P2 C4-5:S+:P1 . . C1-5 . . . 0 A1:1-3|A2:4-5 A1:P2|A2:P1 1-3" # cpus_allowed/exclusive_cpus update tests " C0-3:X2-3:S+ C1-3:X2-3:S+ C2-3:X2-3 \ - . X:C4 . P2 . 0 A1:4,A2:4,XA2:,XA3:,A3:4 \ - A1:P0,A3:P-2" + . X:C4 . P2 . 0 A1:4|A2:4|XA2:|XA3:|A3:4 \ + A1:P0|A3:P-2 ." " C0-3:X2-3:S+ C1-3:X2-3:S+ C2-3:X2-3 \ - . X1 . P2 . 0 A1:0-3,A2:1-3,XA1:1,XA2:,XA3:,A3:2-3 \ - A1:P0,A3:P-2" + . X1 . P2 . 0 A1:0-3|A2:1-3|XA1:1|XA2:|XA3:|A3:2-3 \ + A1:P0|A3:P-2 ." " C0-3:X2-3:S+ C1-3:X2-3:S+ C2-3:X2-3 \ - . . X3 P2 . 0 A1:0-2,A2:1-2,XA2:3,XA3:3,A3:3 \ - A1:P0,A3:P2 3" + . . X3 P2 . 0 A1:0-2|A2:1-2|XA2:3|XA3:3|A3:3 \ + A1:P0|A3:P2 3" " C0-3:X2-3:S+ C1-3:X2-3:S+ C2-3:X2-3:P2 \ - . . X3 . . 0 A1:0-3,A2:1-3,XA2:3,XA3:3,A3:2-3 \ - A1:P0,A3:P-2" + . . X3 . . 0 A1:0-2|A2:1-2|XA2:3|XA3:3|A3:3|XA3:3 \ + A1:P0|A3:P2 3" " C0-3:X2-3:S+ C1-3:X2-3:S+ C2-3:X2-3:P2 \ - . X4 . . . 0 A1:0-3,A2:1-3,A3:2-3,XA1:4,XA2:,XA3 \ - A1:P0,A3:P-2" + . X4 . . . 0 A1:0-3|A2:1-3|A3:2-3|XA1:4|XA2:|XA3 \ + A1:P0|A3:P-2" # old-A1 old-A2 old-A3 old-B1 new-A1 new-A2 new-A3 new-B1 fail ECPUs Pstate ISOLCPUS # ------ ------ ------ ------ ------ ------ ------ ------ ---- ----- ------ -------- @@ -339,68 +356,127 @@ TEST_MATRIX=( # # Adding CPUs to partition root that are not in parent's # cpuset.cpus is allowed, but those extra CPUs are ignored. - "C2-3:P1:S+ C3:P1 . . . C2-4 . . 0 A1:,A2:2-3 A1:P1,A2:P1" + "C2-3:P1:S+ C3:P1 . . . C2-4 . . 0 A1:|A2:2-3 A1:P1|A2:P1" # Taking away all CPUs from parent or itself if there are tasks # will make the partition invalid. - "C2-3:P1:S+ C3:P1 . . T C2-3 . . 0 A1:2-3,A2:2-3 A1:P1,A2:P-1" - " C3:P1:S+ C3 . . T P1 . . 0 A1:3,A2:3 A1:P1,A2:P-1" - "$SETUP_A123_PARTITIONS . T:C2-3 . . . 0 A1:2-3,A2:2-3,A3:3 A1:P1,A2:P-1,A3:P-1" - "$SETUP_A123_PARTITIONS . T:C2-3:C1-3 . . . 0 A1:1,A2:2,A3:3 A1:P1,A2:P1,A3:P1" + "C2-3:P1:S+ C3:P1 . . T C2-3 . . 0 A1:2-3|A2:2-3 A1:P1|A2:P-1" + " C3:P1:S+ C3 . . T P1 . . 0 A1:3|A2:3 A1:P1|A2:P-1" + "$SETUP_A123_PARTITIONS . T:C2-3 . . . 0 A1:2-3|A2:2-3|A3:3 A1:P1|A2:P-1|A3:P-1" + "$SETUP_A123_PARTITIONS . T:C2-3:C1-3 . . . 0 A1:1|A2:2|A3:3 A1:P1|A2:P1|A3:P1" # Changing a partition root to member makes child partitions invalid - "C2-3:P1:S+ C3:P1 . . P0 . . . 0 A1:2-3,A2:3 A1:P0,A2:P-1" - "$SETUP_A123_PARTITIONS . C2-3 P0 . . 0 A1:2-3,A2:2-3,A3:3 A1:P1,A2:P0,A3:P-1" + "C2-3:P1:S+ C3:P1 . . P0 . . . 0 A1:2-3|A2:3 A1:P0|A2:P-1" + "$SETUP_A123_PARTITIONS . C2-3 P0 . . 0 A1:2-3|A2:2-3|A3:3 A1:P1|A2:P0|A3:P-1" # cpuset.cpus can contains cpus not in parent's cpuset.cpus as long # as they overlap. - "C2-3:P1:S+ . . . . C3-4:P1 . . 0 A1:2,A2:3 A1:P1,A2:P1" + "C2-3:P1:S+ . . . . C3-4:P1 . . 0 A1:2|A2:3 A1:P1|A2:P1" # Deletion of CPUs distributed to child cgroup is allowed. - "C0-1:P1:S+ C1 . C2-3 C4-5 . . . 0 A1:4-5,A2:4-5" + "C0-1:P1:S+ C1 . C2-3 C4-5 . . . 0 A1:4-5|A2:4-5" # To become a valid partition root, cpuset.cpus must overlap parent's # cpuset.cpus. - " C0-1:P1 . . C2-3 S+ C4-5:P1 . . 0 A1:0-1,A2:0-1 A1:P1,A2:P-1" + " C0-1:P1 . . C2-3 S+ C4-5:P1 . . 0 A1:0-1|A2:0-1 A1:P1|A2:P-1" # Enabling partition with child cpusets is allowed - " C0-1:S+ C1 . C2-3 P1 . . . 0 A1:0-1,A2:1 A1:P1" + " C0-1:S+ C1 . C2-3 P1 . . . 0 A1:0-1|A2:1 A1:P1" - # A partition root with non-partition root parent is invalid, but it + # A partition root with non-partition root parent is invalid| but it # can be made valid if its parent becomes a partition root too. - " C0-1:S+ C1 . C2-3 . P2 . . 0 A1:0-1,A2:1 A1:P0,A2:P-2" - " C0-1:S+ C1:P2 . C2-3 P1 . . . 0 A1:0,A2:1 A1:P1,A2:P2" + " C0-1:S+ C1 . C2-3 . P2 . . 0 A1:0-1|A2:1 A1:P0|A2:P-2" + " C0-1:S+ C1:P2 . C2-3 P1 . . . 0 A1:0|A2:1 A1:P1|A2:P2 0-1|1" # A non-exclusive cpuset.cpus change will invalidate partition and its siblings - " C0-1:P1 . . C2-3 C0-2 . . . 0 A1:0-2,B1:2-3 A1:P-1,B1:P0" - " C0-1:P1 . . P1:C2-3 C0-2 . . . 0 A1:0-2,B1:2-3 A1:P-1,B1:P-1" - " C0-1 . . P1:C2-3 C0-2 . . . 0 A1:0-2,B1:2-3 A1:P0,B1:P-1" + " C0-1:P1 . . C2-3 C0-2 . . . 0 A1:0-2|B1:2-3 A1:P-1|B1:P0" + " C0-1:P1 . . P1:C2-3 C0-2 . . . 0 A1:0-2|B1:2-3 A1:P-1|B1:P-1" + " C0-1 . . P1:C2-3 C0-2 . . . 0 A1:0-2|B1:2-3 A1:P0|B1:P-1" # cpuset.cpus can overlap with sibling cpuset.cpus.exclusive but not subsumed by it - " C0-3 . . C4-5 X5 . . . 0 A1:0-3,B1:4-5" + " C0-3 . . C4-5 X5 . . . 0 A1:0-3|B1:4-5" # Child partition root that try to take all CPUs from parent partition # with tasks will remain invalid. - " C1-4:P1:S+ P1 . . . . . . 0 A1:1-4,A2:1-4 A1:P1,A2:P-1" - " C1-4:P1:S+ P1 . . . C1-4 . . 0 A1,A2:1-4 A1:P1,A2:P1" - " C1-4:P1:S+ P1 . . T C1-4 . . 0 A1:1-4,A2:1-4 A1:P1,A2:P-1" + " C1-4:P1:S+ P1 . . . . . . 0 A1:1-4|A2:1-4 A1:P1|A2:P-1" + " C1-4:P1:S+ P1 . . . C1-4 . . 0 A1|A2:1-4 A1:P1|A2:P1" + " C1-4:P1:S+ P1 . . T C1-4 . . 0 A1:1-4|A2:1-4 A1:P1|A2:P-1" # Clearing of cpuset.cpus with a preset cpuset.cpus.exclusive shouldn't # affect cpuset.cpus.exclusive.effective. - " C1-4:X3:S+ C1:X3 . . . C . . 0 A2:1-4,XA2:3" + " C1-4:X3:S+ C1:X3 . . . C . . 0 A2:1-4|XA2:3" + + # cpuset.cpus can contain CPUs that overlap a sibling cpuset with cpus.exclusive + # but creating a local partition out of it is not allowed. Similarly and change + # in cpuset.cpus of a local partition that overlaps sibling exclusive CPUs will + # invalidate it. + " CX1-4:S+ CX2-4:P2 . C5-6 . . . P1 0 A1:1|A2:2-4|B1:5-6|XB1:5-6 \ + A1:P0|A2:P2:B1:P1 2-4" + " CX1-4:S+ CX2-4:P2 . C3-6 . . . P1 0 A1:1|A2:2-4|B1:5-6 \ + A1:P0|A2:P2:B1:P-1 2-4" + " CX1-4:S+ CX2-4:P2 . C5-6 . . . P1:C3-6 0 A1:1|A2:2-4|B1:5-6 \ + A1:P0|A2:P2:B1:P-1 2-4" # old-A1 old-A2 old-A3 old-B1 new-A1 new-A2 new-A3 new-B1 fail ECPUs Pstate ISOLCPUS # ------ ------ ------ ------ ------ ------ ------ ------ ---- ----- ------ -------- # Failure cases: # A task cannot be added to a partition with no cpu - "C2-3:P1:S+ C3:P1 . . O2=0:T . . . 1 A1:,A2:3 A1:P1,A2:P1" + "C2-3:P1:S+ C3:P1 . . O2=0:T . . . 1 A1:|A2:3 A1:P1|A2:P1" # Changes to cpuset.cpus.exclusive that violate exclusivity rule is rejected - " C0-3 . . C4-5 X0-3 . . X3-5 1 A1:0-3,B1:4-5" + " C0-3 . . C4-5 X0-3 . . X3-5 1 A1:0-3|B1:4-5" # cpuset.cpus cannot be a subset of sibling cpuset.cpus.exclusive - " C0-3 . . C4-5 X3-5 . . . 1 A1:0-3,B1:4-5" + " C0-3 . . C4-5 X3-5 . . . 1 A1:0-3|B1:4-5" +) + +# +# Cpuset controller remote partition test matrix. +# +# Cgroup test hierarchy +# +# root +# | +# rtest (cpuset.cpus.exclusive=1-7) +# | +# +------+------+ +# | | +# p1 p2 +# +--+--+ +--+--+ +# | | | | +# c11 c12 c21 c22 +# +# REMOTE_TEST_MATRIX uses the same notational convention as TEST_MATRIX. +# Only CPUs 1-7 should be used. +# +REMOTE_TEST_MATRIX=( + # old-p1 old-p2 old-c11 old-c12 old-c21 old-c22 + # new-p1 new-p2 new-c11 new-c12 new-c21 new-c22 ECPUs Pstate ISOLCPUS + # ------ ------ ------- ------- ------- ------- ----- ------ -------- + " X1-3:S+ X4-6:S+ X1-2 X3 X4-5 X6 \ + . . P2 P2 P2 P2 c11:1-2|c12:3|c21:4-5|c22:6 \ + c11:P2|c12:P2|c21:P2|c22:P2 1-6" + " CX1-4:S+ . X1-2:P2 C3 . . \ + . . . C3-4 . . p1:3-4|c11:1-2|c12:3-4 \ + p1:P0|c11:P2|c12:P0 1-2" + " CX1-4:S+ . X1-2:P2 . . . \ + X2-4 . . . . . p1:1,3-4|c11:2 \ + p1:P0|c11:P2 2" + " CX1-5:S+ . X1-2:P2 X3-5:P1 . . \ + X2-4 . . . . . p1:1,5|c11:2|c12:3-4 \ + p1:P0|c11:P2|c12:P1 2" + " CX1-4:S+ . X1-2:P2 X3-4:P1 . . \ + . . X2 . . . p1:1|c11:2|c12:3-4 \ + p1:P0|c11:P2|c12:P1 2" + # p1 as member, will get its effective CPUs from its parent rtest + " CX1-4:S+ . X1-2:P2 X3-4:P1 . . \ + . . X1 CX2-4 . . p1:5-7|c11:1|c12:2-4 \ + p1:P0|c11:P2|c12:P1 1" + " CX1-4:S+ X5-6:P1:S+ . . . . \ + . . X1-2:P2 X4-5:P1 . X1-7:P2 p1:3|c11:1-2|c12:4:c22:5-6 \ + p1:P0|p2:P1|c11:P2|c12:P1|c22:P2 \ + 1-2,4-6|1-2,5-6" ) # @@ -453,25 +529,26 @@ set_ctrl_state() PFILE=$CGRP/cpuset.cpus.partition CFILE=$CGRP/cpuset.cpus XFILE=$CGRP/cpuset.cpus.exclusive - S=$(expr substr $CMD 1 1) - if [[ $S = S ]] - then - PREFIX=${CMD#?} + case $CMD in + S*) PREFIX=${CMD#?} COMM="echo ${PREFIX}${CTRL} > $SFILE" eval $COMM $REDIRECT - elif [[ $S = X ]] - then + ;; + X*) CPUS=${CMD#?} COMM="echo $CPUS > $XFILE" eval $COMM $REDIRECT - elif [[ $S = C ]] - then - CPUS=${CMD#?} + ;; + CX*) + CPUS=${CMD#??} + COMM="echo $CPUS > $CFILE; echo $CPUS > $XFILE" + eval $COMM $REDIRECT + ;; + C*) CPUS=${CMD#?} COMM="echo $CPUS > $CFILE" eval $COMM $REDIRECT - elif [[ $S = P ]] - then - VAL=${CMD#?} + ;; + P*) VAL=${CMD#?} case $VAL in 0) VAL=member ;; @@ -486,15 +563,17 @@ set_ctrl_state() esac COMM="echo $VAL > $PFILE" eval $COMM $REDIRECT - elif [[ $S = O ]] - then - VAL=${CMD#?} + ;; + O*) VAL=${CMD#?} write_cpu_online $VAL - elif [[ $S = T ]] - then - COMM="echo 0 > $TFILE" + ;; + T*) COMM="echo 0 > $TFILE" eval $COMM $REDIRECT - fi + ;; + *) echo "Unknown command: $CMD" + exit 1 + ;; + esac RET=$? [[ $RET -ne 0 ]] && { [[ -n "$SHOWERR" ]] && { @@ -532,21 +611,18 @@ online_cpus() } # -# Return 1 if the list of effective cpus isn't the same as the initial list. +# Remove all the test cgroup directories # reset_cgroup_states() { echo 0 > $CGROUP2/cgroup.procs online_cpus - rmdir A1/A2/A3 A1/A2 A1 B1 > /dev/null 2>&1 - pause 0.02 - set_ctrl_state . R- - pause 0.01 + rmdir $RESET_LIST > /dev/null 2>&1 } dump_states() { - for DIR in . A1 A1/A2 A1/A2/A3 B1 + for DIR in $CGROUP_LIST do CPUS=$DIR/cpuset.cpus ECPUS=$DIR/cpuset.cpus.effective @@ -566,17 +642,33 @@ dump_states() } # +# Set the actual cgroup directory into $CGRP_DIR +# $1 - cgroup name +# +set_cgroup_dir() +{ + CGRP_DIR=$1 + [[ $CGRP_DIR = A2 ]] && CGRP_DIR=A1/A2 + [[ $CGRP_DIR = A3 ]] && CGRP_DIR=A1/A2/A3 + [[ $CGRP_DIR = c11 ]] && CGRP_DIR=p1/c11 + [[ $CGRP_DIR = c12 ]] && CGRP_DIR=p1/c12 + [[ $CGRP_DIR = c21 ]] && CGRP_DIR=p2/c21 + [[ $CGRP_DIR = c22 ]] && CGRP_DIR=p2/c22 +} + +# # Check effective cpus -# $1 - check string, format: <cgroup>:<cpu-list>[,<cgroup>:<cpu-list>]* +# $1 - check string, format: <cgroup>:<cpu-list>[|<cgroup>:<cpu-list>]* # check_effective_cpus() { CHK_STR=$1 - for CHK in $(echo $CHK_STR | sed -e "s/,/ /g") + for CHK in $(echo $CHK_STR | sed -e "s/|/ /g") do set -- $(echo $CHK | sed -e "s/:/ /g") CGRP=$1 - CPUS=$2 + EXPECTED_CPUS=$2 + ACTUAL_CPUS= if [[ $CGRP = X* ]] then CGRP=${CGRP#X} @@ -584,41 +676,39 @@ check_effective_cpus() else FILE=cpuset.cpus.effective fi - [[ $CGRP = A2 ]] && CGRP=A1/A2 - [[ $CGRP = A3 ]] && CGRP=A1/A2/A3 - [[ -e $CGRP/$FILE ]] || return 1 - [[ $CPUS = $(cat $CGRP/$FILE) ]] || return 1 + set_cgroup_dir $CGRP + [[ -e $CGRP_DIR/$FILE ]] || return 1 + ACTUAL_CPUS=$(cat $CGRP_DIR/$FILE) + [[ $EXPECTED_CPUS = $ACTUAL_CPUS ]] || return 1 done } # # Check cgroup states -# $1 - check string, format: <cgroup>:<state>[,<cgroup>:<state>]* +# $1 - check string, format: <cgroup>:<state>[|<cgroup>:<state>]* # check_cgroup_states() { CHK_STR=$1 - for CHK in $(echo $CHK_STR | sed -e "s/,/ /g") + for CHK in $(echo $CHK_STR | sed -e "s/|/ /g") do set -- $(echo $CHK | sed -e "s/:/ /g") CGRP=$1 - CGRP_DIR=$CGRP - STATE=$2 + EXPECTED_STATE=$2 FILE= - EVAL=$(expr substr $STATE 2 2) - [[ $CGRP = A2 ]] && CGRP_DIR=A1/A2 - [[ $CGRP = A3 ]] && CGRP_DIR=A1/A2/A3 + EVAL=$(expr substr $EXPECTED_STATE 2 2) - case $STATE in + set_cgroup_dir $CGRP + case $EXPECTED_STATE in P*) FILE=$CGRP_DIR/cpuset.cpus.partition ;; - *) echo "Unknown state: $STATE!" + *) echo "Unknown state: $EXPECTED_STATE!" exit 1 ;; esac - VAL=$(cat $FILE) + ACTUAL_STATE=$(cat $FILE) - case "$VAL" in + case "$ACTUAL_STATE" in member) VAL=0 ;; root) VAL=1 @@ -642,7 +732,7 @@ check_cgroup_states() [[ $VAL -eq 1 && $VERBOSE -gt 0 ]] && { DOMS=$(cat $CGRP_DIR/cpuset.cpus.effective) [[ -n "$DOMS" ]] && - echo " [$CGRP] sched-domain: $DOMS" > $CONSOLE + echo " [$CGRP_DIR] sched-domain: $DOMS" > $CONSOLE } done return 0 @@ -665,22 +755,22 @@ check_cgroup_states() # check_isolcpus() { - EXPECT_VAL=$1 - ISOLCPUS= + EXPECTED_ISOLCPUS=$1 + ISCPUS=${CGROUP2}/cpuset.cpus.isolated + ISOLCPUS=$(cat $ISCPUS) LASTISOLCPU= SCHED_DOMAINS=/sys/kernel/debug/sched/domains - ISCPUS=${CGROUP2}/cpuset.cpus.isolated - if [[ $EXPECT_VAL = . ]] + if [[ $EXPECTED_ISOLCPUS = . ]] then - EXPECT_VAL= - EXPECT_VAL2= - elif [[ $(expr $EXPECT_VAL : ".*,.*") > 0 ]] + EXPECTED_ISOLCPUS= + EXPECTED_SDOMAIN= + elif [[ $(expr $EXPECTED_ISOLCPUS : ".*|.*") > 0 ]] then - set -- $(echo $EXPECT_VAL | sed -e "s/,/ /g") - EXPECT_VAL=$1 - EXPECT_VAL2=$2 + set -- $(echo $EXPECTED_ISOLCPUS | sed -e "s/|/ /g") + EXPECTED_ISOLCPUS=$2 + EXPECTED_SDOMAIN=$1 else - EXPECT_VAL2=$EXPECT_VAL + EXPECTED_SDOMAIN=$EXPECTED_ISOLCPUS fi # @@ -689,20 +779,21 @@ check_isolcpus() # to make appending those CPUs easier. # [[ -n "$BOOT_ISOLCPUS" ]] && { - EXPECT_VAL=${EXPECT_VAL:+${EXPECT_VAL},}${BOOT_ISOLCPUS} - EXPECT_VAL2=${EXPECT_VAL2:+${EXPECT_VAL2},}${BOOT_ISOLCPUS} + EXPECTED_ISOLCPUS=${EXPECTED_ISOLCPUS:+${EXPECTED_ISOLCPUS},}${BOOT_ISOLCPUS} + EXPECTED_SDOMAIN=${EXPECTED_SDOMAIN:+${EXPECTED_SDOMAIN},}${BOOT_ISOLCPUS} } # # Check cpuset.cpus.isolated cpumask # - [[ "$EXPECT_VAL2" != "$ISOLCPUS" ]] && { + [[ "$EXPECTED_ISOLCPUS" != "$ISOLCPUS" ]] && { # Take a 50ms pause and try again pause 0.05 ISOLCPUS=$(cat $ISCPUS) } - [[ "$EXPECT_VAL2" != "$ISOLCPUS" ]] && return 1 + [[ "$EXPECTED_ISOLCPUS" != "$ISOLCPUS" ]] && return 1 ISOLCPUS= + EXPECTED_ISOLCPUS=$EXPECTED_SDOMAIN # # Use the sched domain in debugfs to check isolated CPUs, if available @@ -736,7 +827,7 @@ check_isolcpus() done [[ "$ISOLCPUS" = *- ]] && ISOLCPUS=${ISOLCPUS}$LASTISOLCPU - [[ "$EXPECT_VAL" = "$ISOLCPUS" ]] + [[ "$EXPECTED_SDOMAIN" = "$ISOLCPUS" ]] } test_fail() @@ -774,6 +865,63 @@ null_isolcpus_check() } # +# Check state transition test result +# $1 - Test number +# $2 - Expected effective CPU values +# $3 - Expected partition states +# $4 - Expected isolated CPUs +# +check_test_results() +{ + _NR=$1 + _ECPUS="$2" + _PSTATES="$3" + _ISOLCPUS="$4" + + [[ -n "$_ECPUS" && "$_ECPUS" != . ]] && { + check_effective_cpus $_ECPUS + [[ $? -ne 0 ]] && test_fail $_NR "effective CPU" \ + "Cgroup $CGRP: expected $EXPECTED_CPUS, got $ACTUAL_CPUS" + } + + [[ -n "$_PSTATES" && "$_PSTATES" != . ]] && { + check_cgroup_states $_PSTATES + [[ $? -ne 0 ]] && test_fail $_NR states \ + "Cgroup $CGRP: expected $EXPECTED_STATE, got $ACTUAL_STATE" + } + + # Compare the expected isolated CPUs with the actual ones, + # if available + [[ -n "$_ISOLCPUS" ]] && { + check_isolcpus $_ISOLCPUS + [[ $? -ne 0 ]] && { + [[ -n "$BOOT_ISOLCPUS" ]] && _ISOLCPUS=${_ISOLCPUS},${BOOT_ISOLCPUS} + test_fail $_NR "isolated CPU" \ + "Expect $_ISOLCPUS, get $ISOLCPUS instead" + } + } + reset_cgroup_states + # + # Check to see if effective cpu list changes + # + _NEWLIST=$(cat $CGROUP2/cpuset.cpus.effective) + RETRY=0 + while [[ $_NEWLIST != $CPULIST && $RETRY -lt 8 ]] + do + # Wait a bit longer & recheck a few times + pause 0.02 + ((RETRY++)) + _NEWLIST=$(cat $CGROUP2/cpuset.cpus.effective) + done + [[ $_NEWLIST != $CPULIST ]] && { + echo "Effective cpus changed to $_NEWLIST after test $_NR!" + exit 1 + } + null_isolcpus_check + [[ $VERBOSE -gt 0 ]] && echo "Test $I done." +} + +# # Run cpuset state transition test # $1 - test matrix name # @@ -785,6 +933,8 @@ run_state_test() { TEST=$1 CONTROLLER=cpuset + CGROUP_LIST=". A1 A1/A2 A1/A2/A3 B1" + RESET_LIST="A1/A2/A3 A1/A2 A1 B1" I=0 eval CNT="\${#$TEST[@]}" @@ -812,10 +962,11 @@ run_state_test() STATES=${11} ICPUS=${12} - set_ctrl_state_noerr B1 $OLD_B1 set_ctrl_state_noerr A1 $OLD_A1 set_ctrl_state_noerr A1/A2 $OLD_A2 set_ctrl_state_noerr A1/A2/A3 $OLD_A3 + set_ctrl_state_noerr B1 $OLD_B1 + RETVAL=0 set_ctrl_state A1 $NEW_A1; ((RETVAL += $?)) set_ctrl_state A1/A2 $NEW_A2; ((RETVAL += $?)) @@ -824,47 +975,79 @@ run_state_test() [[ $RETVAL -ne $RESULT ]] && test_fail $I result - [[ -n "$ECPUS" && "$ECPUS" != . ]] && { - check_effective_cpus $ECPUS - [[ $? -ne 0 ]] && test_fail $I "effective CPU" - } + check_test_results $I "$ECPUS" "$STATES" "$ICPUS" + ((I++)) + done + echo "All $I tests of $TEST PASSED." +} - [[ -n "$STATES" && "$STATES" != . ]] && { - check_cgroup_states $STATES - [[ $? -ne 0 ]] && test_fail $I states - } +# +# Run cpuset remote partition state transition test +# $1 - test matrix name +# +run_remote_state_test() +{ + TEST=$1 + CONTROLLER=cpuset + [[ -d rtest ]] || mkdir rtest + cd rtest + echo +cpuset > cgroup.subtree_control + echo "1-7" > cpuset.cpus + echo "1-7" > cpuset.cpus.exclusive + CGROUP_LIST=".. . p1 p2 p1/c11 p1/c12 p2/c21 p2/c22" + RESET_LIST="p1/c11 p1/c12 p2/c21 p2/c22 p1 p2" + I=0 + eval CNT="\${#$TEST[@]}" - # Compare the expected isolated CPUs with the actual ones, - # if available - [[ -n "$ICPUS" ]] && { - check_isolcpus $ICPUS - [[ $? -ne 0 ]] && { - [[ -n "$BOOT_ISOLCPUS" ]] && ICPUS=${ICPUS},${BOOT_ISOLCPUS} - test_fail $I "isolated CPU" \ - "Expect $ICPUS, get $ISOLCPUS instead" - } - } - reset_cgroup_states - # - # Check to see if effective cpu list changes - # - NEWLIST=$(cat cpuset.cpus.effective) - RETRY=0 - while [[ $NEWLIST != $CPULIST && $RETRY -lt 8 ]] - do - # Wait a bit longer & recheck a few times - pause 0.02 - ((RETRY++)) - NEWLIST=$(cat cpuset.cpus.effective) - done - [[ $NEWLIST != $CPULIST ]] && { - echo "Effective cpus changed to $NEWLIST after test $I!" - exit 1 + reset_cgroup_states + console_msg "Running remote partition state transition test ..." + + while [[ $I -lt $CNT ]] + do + echo "Running test $I ..." > $CONSOLE + [[ $VERBOSE -gt 1 ]] && { + echo "" + eval echo \${$TEST[$I]} } - null_isolcpus_check - [[ $VERBOSE -gt 0 ]] && echo "Test $I done." + eval set -- "\${$TEST[$I]}" + OLD_p1=$1 + OLD_p2=$2 + OLD_c11=$3 + OLD_c12=$4 + OLD_c21=$5 + OLD_c22=$6 + NEW_p1=$7 + NEW_p2=$8 + NEW_c11=$9 + NEW_c12=${10} + NEW_c21=${11} + NEW_c22=${12} + ECPUS=${13} + STATES=${14} + ICPUS=${15} + + set_ctrl_state_noerr p1 $OLD_p1 + set_ctrl_state_noerr p2 $OLD_p2 + set_ctrl_state_noerr p1/c11 $OLD_c11 + set_ctrl_state_noerr p1/c12 $OLD_c12 + set_ctrl_state_noerr p2/c21 $OLD_c21 + set_ctrl_state_noerr p2/c22 $OLD_c22 + + RETVAL=0 + set_ctrl_state p1 $NEW_p1 ; ((RETVAL += $?)) + set_ctrl_state p2 $NEW_p2 ; ((RETVAL += $?)) + set_ctrl_state p1/c11 $NEW_c11; ((RETVAL += $?)) + set_ctrl_state p1/c12 $NEW_c12; ((RETVAL += $?)) + set_ctrl_state p2/c21 $NEW_c21; ((RETVAL += $?)) + set_ctrl_state p2/c22 $NEW_c22; ((RETVAL += $?)) + + [[ $RETVAL -ne 0 ]] && test_fail $I result + + check_test_results $I "$ECPUS" "$STATES" "$ICPUS" ((I++)) done + cd .. + rmdir rtest echo "All $I tests of $TEST PASSED." } @@ -932,6 +1115,7 @@ test_isolated() echo $$ > $CGROUP2/cgroup.procs [[ -d A1 ]] && rmdir A1 null_isolcpus_check + pause 0.05 } # @@ -997,10 +1181,13 @@ test_inotify() else echo "Inotify test PASSED" fi + echo member > cpuset.cpus.partition + echo "" > cpuset.cpus } trap cleanup 0 2 3 6 run_state_test TEST_MATRIX +run_remote_state_test REMOTE_TEST_MATRIX test_isolated test_inotify echo "All tests PASSED." diff --git a/tools/testing/selftests/coredump/stackdump_test.c b/tools/testing/selftests/coredump/stackdump_test.c index 137b2364a082..9984413be9f0 100644 --- a/tools/testing/selftests/coredump/stackdump_test.c +++ b/tools/testing/selftests/coredump/stackdump_test.c @@ -1,14 +1,20 @@ // SPDX-License-Identifier: GPL-2.0 #include <fcntl.h> +#include <inttypes.h> #include <libgen.h> #include <linux/limits.h> #include <pthread.h> #include <string.h> +#include <sys/mount.h> #include <sys/resource.h> +#include <sys/stat.h> +#include <sys/socket.h> +#include <sys/un.h> #include <unistd.h> #include "../kselftest_harness.h" +#include "../pidfd/pidfd.h" #define STACKDUMP_FILE "stack_values" #define STACKDUMP_SCRIPT "stackdump" @@ -35,6 +41,7 @@ static void crashing_child(void) FIXTURE(coredump) { char original_core_pattern[256]; + pid_t pid_coredump_server; }; FIXTURE_SETUP(coredump) @@ -44,6 +51,7 @@ FIXTURE_SETUP(coredump) char *dir; int ret; + self->pid_coredump_server = -ESRCH; file = fopen("/proc/sys/kernel/core_pattern", "r"); ASSERT_NE(NULL, file); @@ -61,10 +69,17 @@ FIXTURE_TEARDOWN(coredump) { const char *reason; FILE *file; - int ret; + int ret, status; unlink(STACKDUMP_FILE); + if (self->pid_coredump_server > 0) { + kill(self->pid_coredump_server, SIGTERM); + waitpid(self->pid_coredump_server, &status, 0); + } + unlink("/tmp/coredump.file"); + unlink("/tmp/coredump.socket"); + file = fopen("/proc/sys/kernel/core_pattern", "w"); if (!file) { reason = "Unable to open core_pattern"; @@ -89,14 +104,14 @@ fail: fprintf(stderr, "Failed to cleanup stackdump test: %s\n", reason); } -TEST_F(coredump, stackdump) +TEST_F_TIMEOUT(coredump, stackdump, 120) { struct sigaction action = {}; unsigned long long stack; char *test_dir, *line; size_t line_length; char buf[PATH_MAX]; - int ret, i; + int ret, i, status; FILE *file; pid_t pid; @@ -129,6 +144,10 @@ TEST_F(coredump, stackdump) /* * Step 3: Wait for the stackdump script to write the stack pointers to the stackdump file */ + waitpid(pid, &status, 0); + ASSERT_TRUE(WIFSIGNALED(status)); + ASSERT_TRUE(WCOREDUMP(status)); + for (i = 0; i < 10; ++i) { file = fopen(STACKDUMP_FILE, "r"); if (file) @@ -138,14 +157,466 @@ TEST_F(coredump, stackdump) ASSERT_NE(file, NULL); /* Step 4: Make sure all stack pointer values are non-zero */ + line = NULL; for (i = 0; -1 != getline(&line, &line_length, file); ++i) { stack = strtoull(line, NULL, 10); ASSERT_NE(stack, 0); } + free(line); ASSERT_EQ(i, 1 + NUM_THREAD_SPAWN); fclose(file); } +TEST_F(coredump, socket) +{ + int fd, pidfd, ret, status; + FILE *file; + pid_t pid, pid_coredump_server; + struct stat st; + char core_file[PATH_MAX]; + struct pidfd_info info = {}; + int ipc_sockets[2]; + char c; + const struct sockaddr_un coredump_sk = { + .sun_family = AF_UNIX, + .sun_path = "/tmp/coredump.socket", + }; + size_t coredump_sk_len = offsetof(struct sockaddr_un, sun_path) + + sizeof("/tmp/coredump.socket"); + + ret = socketpair(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC, 0, ipc_sockets); + ASSERT_EQ(ret, 0); + + file = fopen("/proc/sys/kernel/core_pattern", "w"); + ASSERT_NE(file, NULL); + + ret = fprintf(file, "@/tmp/coredump.socket"); + ASSERT_EQ(ret, strlen("@/tmp/coredump.socket")); + ASSERT_EQ(fclose(file), 0); + + pid_coredump_server = fork(); + ASSERT_GE(pid_coredump_server, 0); + if (pid_coredump_server == 0) { + int fd_server, fd_coredump, fd_peer_pidfd, fd_core_file; + socklen_t fd_peer_pidfd_len; + + close(ipc_sockets[0]); + + fd_server = socket(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC, 0); + if (fd_server < 0) + _exit(EXIT_FAILURE); + + ret = bind(fd_server, (const struct sockaddr *)&coredump_sk, coredump_sk_len); + if (ret < 0) { + fprintf(stderr, "Failed to bind coredump socket\n"); + close(fd_server); + close(ipc_sockets[1]); + _exit(EXIT_FAILURE); + } + + ret = listen(fd_server, 1); + if (ret < 0) { + fprintf(stderr, "Failed to listen on coredump socket\n"); + close(fd_server); + close(ipc_sockets[1]); + _exit(EXIT_FAILURE); + } + + if (write_nointr(ipc_sockets[1], "1", 1) < 0) { + close(fd_server); + close(ipc_sockets[1]); + _exit(EXIT_FAILURE); + } + + close(ipc_sockets[1]); + + fd_coredump = accept4(fd_server, NULL, NULL, SOCK_CLOEXEC); + if (fd_coredump < 0) { + fprintf(stderr, "Failed to accept coredump socket connection\n"); + close(fd_server); + _exit(EXIT_FAILURE); + } + + fd_peer_pidfd_len = sizeof(fd_peer_pidfd); + ret = getsockopt(fd_coredump, SOL_SOCKET, SO_PEERPIDFD, + &fd_peer_pidfd, &fd_peer_pidfd_len); + if (ret < 0) { + fprintf(stderr, "%m - Failed to retrieve peer pidfd for coredump socket connection\n"); + close(fd_coredump); + close(fd_server); + _exit(EXIT_FAILURE); + } + + memset(&info, 0, sizeof(info)); + info.mask = PIDFD_INFO_EXIT | PIDFD_INFO_COREDUMP; + ret = ioctl(fd_peer_pidfd, PIDFD_GET_INFO, &info); + if (ret < 0) { + fprintf(stderr, "Failed to retrieve pidfd info from peer pidfd for coredump socket connection\n"); + close(fd_coredump); + close(fd_server); + close(fd_peer_pidfd); + _exit(EXIT_FAILURE); + } + + if (!(info.mask & PIDFD_INFO_COREDUMP)) { + fprintf(stderr, "Missing coredump information from coredumping task\n"); + close(fd_coredump); + close(fd_server); + close(fd_peer_pidfd); + _exit(EXIT_FAILURE); + } + + if (!(info.coredump_mask & PIDFD_COREDUMPED)) { + fprintf(stderr, "Received connection from non-coredumping task\n"); + close(fd_coredump); + close(fd_server); + close(fd_peer_pidfd); + _exit(EXIT_FAILURE); + } + + fd_core_file = creat("/tmp/coredump.file", 0644); + if (fd_core_file < 0) { + fprintf(stderr, "Failed to create coredump file\n"); + close(fd_coredump); + close(fd_server); + close(fd_peer_pidfd); + _exit(EXIT_FAILURE); + } + + for (;;) { + char buffer[4096]; + ssize_t bytes_read, bytes_write; + + bytes_read = read(fd_coredump, buffer, sizeof(buffer)); + if (bytes_read < 0) { + close(fd_coredump); + close(fd_server); + close(fd_peer_pidfd); + close(fd_core_file); + _exit(EXIT_FAILURE); + } + + if (bytes_read == 0) + break; + + bytes_write = write(fd_core_file, buffer, bytes_read); + if (bytes_read != bytes_write) { + close(fd_coredump); + close(fd_server); + close(fd_peer_pidfd); + close(fd_core_file); + _exit(EXIT_FAILURE); + } + } + + close(fd_coredump); + close(fd_server); + close(fd_peer_pidfd); + close(fd_core_file); + _exit(EXIT_SUCCESS); + } + self->pid_coredump_server = pid_coredump_server; + + EXPECT_EQ(close(ipc_sockets[1]), 0); + ASSERT_EQ(read_nointr(ipc_sockets[0], &c, 1), 1); + EXPECT_EQ(close(ipc_sockets[0]), 0); + + pid = fork(); + ASSERT_GE(pid, 0); + if (pid == 0) + crashing_child(); + + pidfd = sys_pidfd_open(pid, 0); + ASSERT_GE(pidfd, 0); + + waitpid(pid, &status, 0); + ASSERT_TRUE(WIFSIGNALED(status)); + ASSERT_TRUE(WCOREDUMP(status)); + + info.mask = PIDFD_INFO_EXIT | PIDFD_INFO_COREDUMP; + ASSERT_EQ(ioctl(pidfd, PIDFD_GET_INFO, &info), 0); + ASSERT_GT((info.mask & PIDFD_INFO_COREDUMP), 0); + ASSERT_GT((info.coredump_mask & PIDFD_COREDUMPED), 0); + + waitpid(pid_coredump_server, &status, 0); + self->pid_coredump_server = -ESRCH; + ASSERT_TRUE(WIFEXITED(status)); + ASSERT_EQ(WEXITSTATUS(status), 0); + + ASSERT_EQ(stat("/tmp/coredump.file", &st), 0); + ASSERT_GT(st.st_size, 0); + /* + * We should somehow validate the produced core file. + * For now just allow for visual inspection + */ + system("file /tmp/coredump.file"); +} + +TEST_F(coredump, socket_detect_userspace_client) +{ + int fd, pidfd, ret, status; + FILE *file; + pid_t pid, pid_coredump_server; + struct stat st; + char core_file[PATH_MAX]; + struct pidfd_info info = {}; + int ipc_sockets[2]; + char c; + const struct sockaddr_un coredump_sk = { + .sun_family = AF_UNIX, + .sun_path = "/tmp/coredump.socket", + }; + size_t coredump_sk_len = offsetof(struct sockaddr_un, sun_path) + + sizeof("/tmp/coredump.socket"); + + file = fopen("/proc/sys/kernel/core_pattern", "w"); + ASSERT_NE(file, NULL); + + ret = fprintf(file, "@/tmp/coredump.socket"); + ASSERT_EQ(ret, strlen("@/tmp/coredump.socket")); + ASSERT_EQ(fclose(file), 0); + + ret = socketpair(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC, 0, ipc_sockets); + ASSERT_EQ(ret, 0); + + pid_coredump_server = fork(); + ASSERT_GE(pid_coredump_server, 0); + if (pid_coredump_server == 0) { + int fd_server, fd_coredump, fd_peer_pidfd, fd_core_file; + socklen_t fd_peer_pidfd_len; + + close(ipc_sockets[0]); + + fd_server = socket(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC, 0); + if (fd_server < 0) + _exit(EXIT_FAILURE); + + ret = bind(fd_server, (const struct sockaddr *)&coredump_sk, coredump_sk_len); + if (ret < 0) { + fprintf(stderr, "Failed to bind coredump socket\n"); + close(fd_server); + close(ipc_sockets[1]); + _exit(EXIT_FAILURE); + } + + ret = listen(fd_server, 1); + if (ret < 0) { + fprintf(stderr, "Failed to listen on coredump socket\n"); + close(fd_server); + close(ipc_sockets[1]); + _exit(EXIT_FAILURE); + } + + if (write_nointr(ipc_sockets[1], "1", 1) < 0) { + close(fd_server); + close(ipc_sockets[1]); + _exit(EXIT_FAILURE); + } + + close(ipc_sockets[1]); + + fd_coredump = accept4(fd_server, NULL, NULL, SOCK_CLOEXEC); + if (fd_coredump < 0) { + fprintf(stderr, "Failed to accept coredump socket connection\n"); + close(fd_server); + _exit(EXIT_FAILURE); + } + + fd_peer_pidfd_len = sizeof(fd_peer_pidfd); + ret = getsockopt(fd_coredump, SOL_SOCKET, SO_PEERPIDFD, + &fd_peer_pidfd, &fd_peer_pidfd_len); + if (ret < 0) { + fprintf(stderr, "%m - Failed to retrieve peer pidfd for coredump socket connection\n"); + close(fd_coredump); + close(fd_server); + _exit(EXIT_FAILURE); + } + + memset(&info, 0, sizeof(info)); + info.mask = PIDFD_INFO_EXIT | PIDFD_INFO_COREDUMP; + ret = ioctl(fd_peer_pidfd, PIDFD_GET_INFO, &info); + if (ret < 0) { + fprintf(stderr, "Failed to retrieve pidfd info from peer pidfd for coredump socket connection\n"); + close(fd_coredump); + close(fd_server); + close(fd_peer_pidfd); + _exit(EXIT_FAILURE); + } + + if (!(info.mask & PIDFD_INFO_COREDUMP)) { + fprintf(stderr, "Missing coredump information from coredumping task\n"); + close(fd_coredump); + close(fd_server); + close(fd_peer_pidfd); + _exit(EXIT_FAILURE); + } + + if (info.coredump_mask & PIDFD_COREDUMPED) { + fprintf(stderr, "Received unexpected connection from coredumping task\n"); + close(fd_coredump); + close(fd_server); + close(fd_peer_pidfd); + _exit(EXIT_FAILURE); + } + + close(fd_coredump); + close(fd_server); + close(fd_peer_pidfd); + close(fd_core_file); + _exit(EXIT_SUCCESS); + } + self->pid_coredump_server = pid_coredump_server; + + EXPECT_EQ(close(ipc_sockets[1]), 0); + ASSERT_EQ(read_nointr(ipc_sockets[0], &c, 1), 1); + EXPECT_EQ(close(ipc_sockets[0]), 0); + + pid = fork(); + ASSERT_GE(pid, 0); + if (pid == 0) { + int fd_socket; + ssize_t ret; + + fd_socket = socket(AF_UNIX, SOCK_STREAM, 0); + if (fd_socket < 0) + _exit(EXIT_FAILURE); + + + ret = connect(fd_socket, (const struct sockaddr *)&coredump_sk, coredump_sk_len); + if (ret < 0) + _exit(EXIT_FAILURE); + + (void *)write(fd_socket, &(char){ 0 }, 1); + close(fd_socket); + _exit(EXIT_SUCCESS); + } + + pidfd = sys_pidfd_open(pid, 0); + ASSERT_GE(pidfd, 0); + + waitpid(pid, &status, 0); + ASSERT_TRUE(WIFEXITED(status)); + ASSERT_EQ(WEXITSTATUS(status), 0); + + info.mask = PIDFD_INFO_EXIT | PIDFD_INFO_COREDUMP; + ASSERT_EQ(ioctl(pidfd, PIDFD_GET_INFO, &info), 0); + ASSERT_GT((info.mask & PIDFD_INFO_COREDUMP), 0); + ASSERT_EQ((info.coredump_mask & PIDFD_COREDUMPED), 0); + + waitpid(pid_coredump_server, &status, 0); + self->pid_coredump_server = -ESRCH; + ASSERT_TRUE(WIFEXITED(status)); + ASSERT_EQ(WEXITSTATUS(status), 0); + + ASSERT_NE(stat("/tmp/coredump.file", &st), 0); + ASSERT_EQ(errno, ENOENT); +} + +TEST_F(coredump, socket_enoent) +{ + int pidfd, ret, status; + FILE *file; + pid_t pid; + char core_file[PATH_MAX]; + + file = fopen("/proc/sys/kernel/core_pattern", "w"); + ASSERT_NE(file, NULL); + + ret = fprintf(file, "@/tmp/coredump.socket"); + ASSERT_EQ(ret, strlen("@/tmp/coredump.socket")); + ASSERT_EQ(fclose(file), 0); + + pid = fork(); + ASSERT_GE(pid, 0); + if (pid == 0) + crashing_child(); + + pidfd = sys_pidfd_open(pid, 0); + ASSERT_GE(pidfd, 0); + + waitpid(pid, &status, 0); + ASSERT_TRUE(WIFSIGNALED(status)); + ASSERT_FALSE(WCOREDUMP(status)); +} + +TEST_F(coredump, socket_no_listener) +{ + int pidfd, ret, status; + FILE *file; + pid_t pid, pid_coredump_server; + int ipc_sockets[2]; + char c; + const struct sockaddr_un coredump_sk = { + .sun_family = AF_UNIX, + .sun_path = "/tmp/coredump.socket", + }; + size_t coredump_sk_len = offsetof(struct sockaddr_un, sun_path) + + sizeof("/tmp/coredump.socket"); + + ret = socketpair(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC, 0, ipc_sockets); + ASSERT_EQ(ret, 0); + + file = fopen("/proc/sys/kernel/core_pattern", "w"); + ASSERT_NE(file, NULL); + + ret = fprintf(file, "@/tmp/coredump.socket"); + ASSERT_EQ(ret, strlen("@/tmp/coredump.socket")); + ASSERT_EQ(fclose(file), 0); + + pid_coredump_server = fork(); + ASSERT_GE(pid_coredump_server, 0); + if (pid_coredump_server == 0) { + int fd_server; + socklen_t fd_peer_pidfd_len; + + close(ipc_sockets[0]); + + fd_server = socket(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC, 0); + if (fd_server < 0) + _exit(EXIT_FAILURE); + + ret = bind(fd_server, (const struct sockaddr *)&coredump_sk, coredump_sk_len); + if (ret < 0) { + fprintf(stderr, "Failed to bind coredump socket\n"); + close(fd_server); + close(ipc_sockets[1]); + _exit(EXIT_FAILURE); + } + + if (write_nointr(ipc_sockets[1], "1", 1) < 0) { + close(fd_server); + close(ipc_sockets[1]); + _exit(EXIT_FAILURE); + } + + close(fd_server); + close(ipc_sockets[1]); + _exit(EXIT_SUCCESS); + } + self->pid_coredump_server = pid_coredump_server; + + EXPECT_EQ(close(ipc_sockets[1]), 0); + ASSERT_EQ(read_nointr(ipc_sockets[0], &c, 1), 1); + EXPECT_EQ(close(ipc_sockets[0]), 0); + + pid = fork(); + ASSERT_GE(pid, 0); + if (pid == 0) + crashing_child(); + + pidfd = sys_pidfd_open(pid, 0); + ASSERT_GE(pidfd, 0); + + waitpid(pid, &status, 0); + ASSERT_TRUE(WIFSIGNALED(status)); + ASSERT_FALSE(WCOREDUMP(status)); + + waitpid(pid_coredump_server, &status, 0); + self->pid_coredump_server = -ESRCH; + ASSERT_TRUE(WIFEXITED(status)); + ASSERT_EQ(WEXITSTATUS(status), 0); +} + TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/cpufreq/cpufreq.sh b/tools/testing/selftests/cpufreq/cpufreq.sh index e350c521b467..9927b654fb8f 100755 --- a/tools/testing/selftests/cpufreq/cpufreq.sh +++ b/tools/testing/selftests/cpufreq/cpufreq.sh @@ -52,7 +52,14 @@ read_cpufreq_files_in_dir() for file in $files; do if [ -f $1/$file ]; then printf "$file:" - cat $1/$file + #file is readable ? + local rfile=$(ls -l $1/$file | awk '$1 ~ /^.*r.*/ { print $NF; }') + + if [ ! -z $rfile ]; then + cat $1/$file + else + printf "$file is not readable\n" + fi else printf "\n" read_cpufreq_files_in_dir "$1/$file" @@ -83,10 +90,10 @@ update_cpufreq_files_in_dir() for file in $files; do if [ -f $1/$file ]; then - # file is writable ? - local wfile=$(ls -l $1/$file | awk '$1 ~ /^.*w.*/ { print $NF; }') + # file is readable and writable ? + local rwfile=$(ls -l $1/$file | awk '$1 ~ /^.*rw.*/ { print $NF; }') - if [ ! -z $wfile ]; then + if [ ! -z $rwfile ]; then # scaling_setspeed is a special file and we # should skip updating it if [ $file != "scaling_setspeed" ]; then @@ -244,9 +251,10 @@ do_suspend() printf "Failed to suspend using RTC wake alarm\n" return 1 fi + else + echo $filename > $SYSFS/power/state fi - echo $filename > $SYSFS/power/state printf "Came out of $1\n" printf "Do basic tests after finishing $1 to verify cpufreq state\n\n" diff --git a/tools/testing/selftests/drivers/net/dsa/tc_taprio.sh b/tools/testing/selftests/drivers/net/dsa/tc_taprio.sh new file mode 120000 index 000000000000..d16a65e7595d --- /dev/null +++ b/tools/testing/selftests/drivers/net/dsa/tc_taprio.sh @@ -0,0 +1 @@ +run_net_forwarding_test.sh
\ No newline at end of file diff --git a/tools/testing/selftests/drivers/net/hds.py b/tools/testing/selftests/drivers/net/hds.py index 8b7f6acad15f..7c90a040ce45 100755 --- a/tools/testing/selftests/drivers/net/hds.py +++ b/tools/testing/selftests/drivers/net/hds.py @@ -6,7 +6,7 @@ import os from lib.py import ksft_run, ksft_exit, ksft_eq, ksft_raises, KsftSkipEx from lib.py import CmdExitFailure, EthtoolFamily, NlError from lib.py import NetDrvEnv -from lib.py import defer, ethtool, ip +from lib.py import defer, ethtool, ip, random def _get_hds_mode(cfg, netnl) -> str: @@ -109,6 +109,36 @@ def set_hds_thresh_zero(cfg, netnl) -> None: ksft_eq(0, rings['hds-thresh']) +def set_hds_thresh_random(cfg, netnl) -> None: + try: + rings = netnl.rings_get({'header': {'dev-index': cfg.ifindex}}) + except NlError as e: + raise KsftSkipEx('ring-get not supported by device') + if 'hds-thresh' not in rings: + raise KsftSkipEx('hds-thresh not supported by device') + if 'hds-thresh-max' not in rings: + raise KsftSkipEx('hds-thresh-max not defined by device') + + if rings['hds-thresh-max'] < 2: + raise KsftSkipEx('hds-thresh-max is too small') + elif rings['hds-thresh-max'] == 2: + hds_thresh = 1 + else: + while True: + hds_thresh = random.randint(1, rings['hds-thresh-max'] - 1) + if hds_thresh != rings['hds-thresh']: + break + + try: + netnl.rings_set({'header': {'dev-index': cfg.ifindex}, 'hds-thresh': hds_thresh}) + except NlError as e: + if e.error == errno.EINVAL: + raise KsftSkipEx("hds-thresh-set not supported by the device") + elif e.error == errno.EOPNOTSUPP: + raise KsftSkipEx("ring-set not supported by the device") + rings = netnl.rings_get({'header': {'dev-index': cfg.ifindex}}) + ksft_eq(hds_thresh, rings['hds-thresh']) + def set_hds_thresh_max(cfg, netnl) -> None: try: rings = netnl.rings_get({'header': {'dev-index': cfg.ifindex}}) @@ -243,6 +273,7 @@ def main() -> None: get_hds_thresh, set_hds_disable, set_hds_enable, + set_hds_thresh_random, set_hds_thresh_zero, set_hds_thresh_max, set_hds_thresh_gt, diff --git a/tools/testing/selftests/drivers/net/hw/iou-zcrx.py b/tools/testing/selftests/drivers/net/hw/iou-zcrx.py index 9f271ab6ec04..6a0378e06cab 100755 --- a/tools/testing/selftests/drivers/net/hw/iou-zcrx.py +++ b/tools/testing/selftests/drivers/net/hw/iou-zcrx.py @@ -35,6 +35,7 @@ def test_zcrx(cfg) -> None: rx_ring = _get_rx_ring_entries(cfg) try: + ethtool(f"-G {cfg.ifname} tcp-data-split on", host=cfg.remote) ethtool(f"-G {cfg.ifname} rx 64", host=cfg.remote) ethtool(f"-X {cfg.ifname} equal {combined_chans - 1}", host=cfg.remote) flow_rule_id = _set_flow_rule(cfg, combined_chans - 1) @@ -48,6 +49,7 @@ def test_zcrx(cfg) -> None: ethtool(f"-N {cfg.ifname} delete {flow_rule_id}", host=cfg.remote) ethtool(f"-X {cfg.ifname} default", host=cfg.remote) ethtool(f"-G {cfg.ifname} rx {rx_ring}", host=cfg.remote) + ethtool(f"-G {cfg.ifname} tcp-data-split auto", host=cfg.remote) def test_zcrx_oneshot(cfg) -> None: @@ -59,6 +61,7 @@ def test_zcrx_oneshot(cfg) -> None: rx_ring = _get_rx_ring_entries(cfg) try: + ethtool(f"-G {cfg.ifname} tcp-data-split on", host=cfg.remote) ethtool(f"-G {cfg.ifname} rx 64", host=cfg.remote) ethtool(f"-X {cfg.ifname} equal {combined_chans - 1}", host=cfg.remote) flow_rule_id = _set_flow_rule(cfg, combined_chans - 1) @@ -72,6 +75,7 @@ def test_zcrx_oneshot(cfg) -> None: ethtool(f"-N {cfg.ifname} delete {flow_rule_id}", host=cfg.remote) ethtool(f"-X {cfg.ifname} default", host=cfg.remote) ethtool(f"-G {cfg.ifname} rx {rx_ring}", host=cfg.remote) + ethtool(f"-G {cfg.ifname} tcp-data-split auto", host=cfg.remote) def main() -> None: diff --git a/tools/testing/selftests/drivers/net/hw/ncdevmem.c b/tools/testing/selftests/drivers/net/hw/ncdevmem.c index 2bf14ac2b8c6..9d48004ff1a1 100644 --- a/tools/testing/selftests/drivers/net/hw/ncdevmem.c +++ b/tools/testing/selftests/drivers/net/hw/ncdevmem.c @@ -431,6 +431,22 @@ static int parse_address(const char *str, int port, struct sockaddr_in6 *sin6) return 0; } +static struct netdev_queue_id *create_queues(void) +{ + struct netdev_queue_id *queues; + size_t i = 0; + + queues = calloc(num_queues, sizeof(*queues)); + for (i = 0; i < num_queues; i++) { + queues[i]._present.type = 1; + queues[i]._present.id = 1; + queues[i].type = NETDEV_QUEUE_TYPE_RX; + queues[i].id = start_queue + i; + } + + return queues; +} + int do_server(struct memory_buffer *mem) { char ctrl_data[sizeof(int) * 20000]; @@ -448,7 +464,6 @@ int do_server(struct memory_buffer *mem) char buffer[256]; int socket_fd; int client_fd; - size_t i = 0; int ret; ret = parse_address(server_ip, atoi(port), &server_sin); @@ -471,16 +486,7 @@ int do_server(struct memory_buffer *mem) sleep(1); - queues = malloc(sizeof(*queues) * num_queues); - - for (i = 0; i < num_queues; i++) { - queues[i]._present.type = 1; - queues[i]._present.id = 1; - queues[i].type = NETDEV_QUEUE_TYPE_RX; - queues[i].id = start_queue + i; - } - - if (bind_rx_queue(ifindex, mem->fd, queues, num_queues, &ys)) + if (bind_rx_queue(ifindex, mem->fd, create_queues(), num_queues, &ys)) error(1, 0, "Failed to bind\n"); tmp_mem = malloc(mem->size); @@ -545,7 +551,6 @@ int do_server(struct memory_buffer *mem) goto cleanup; } - i++; for (cm = CMSG_FIRSTHDR(&msg); cm; cm = CMSG_NXTHDR(&msg, cm)) { if (cm->cmsg_level != SOL_SOCKET || (cm->cmsg_type != SCM_DEVMEM_DMABUF && @@ -630,10 +635,8 @@ cleanup: void run_devmem_tests(void) { - struct netdev_queue_id *queues; struct memory_buffer *mem; struct ynl_sock *ys; - size_t i = 0; mem = provider->alloc(getpagesize() * NUM_PAGES); @@ -641,38 +644,24 @@ void run_devmem_tests(void) if (configure_rss()) error(1, 0, "rss error\n"); - queues = calloc(num_queues, sizeof(*queues)); - if (configure_headersplit(1)) error(1, 0, "Failed to configure header split\n"); - if (!bind_rx_queue(ifindex, mem->fd, queues, num_queues, &ys)) + if (!bind_rx_queue(ifindex, mem->fd, + calloc(num_queues, sizeof(struct netdev_queue_id)), + num_queues, &ys)) error(1, 0, "Binding empty queues array should have failed\n"); - for (i = 0; i < num_queues; i++) { - queues[i]._present.type = 1; - queues[i]._present.id = 1; - queues[i].type = NETDEV_QUEUE_TYPE_RX; - queues[i].id = start_queue + i; - } - if (configure_headersplit(0)) error(1, 0, "Failed to configure header split\n"); - if (!bind_rx_queue(ifindex, mem->fd, queues, num_queues, &ys)) + if (!bind_rx_queue(ifindex, mem->fd, create_queues(), num_queues, &ys)) error(1, 0, "Configure dmabuf with header split off should have failed\n"); if (configure_headersplit(1)) error(1, 0, "Failed to configure header split\n"); - for (i = 0; i < num_queues; i++) { - queues[i]._present.type = 1; - queues[i]._present.id = 1; - queues[i].type = NETDEV_QUEUE_TYPE_RX; - queues[i].id = start_queue + i; - } - - if (bind_rx_queue(ifindex, mem->fd, queues, num_queues, &ys)) + if (bind_rx_queue(ifindex, mem->fd, create_queues(), num_queues, &ys)) error(1, 0, "Failed to bind\n"); /* Deactivating a bound queue should not be legal */ diff --git a/tools/testing/selftests/drivers/net/ocelot/psfp.sh b/tools/testing/selftests/drivers/net/ocelot/psfp.sh index bed748dde4b0..8972f42dfe03 100755 --- a/tools/testing/selftests/drivers/net/ocelot/psfp.sh +++ b/tools/testing/selftests/drivers/net/ocelot/psfp.sh @@ -266,18 +266,14 @@ run_test() "${base_time}" \ "${CYCLE_TIME_NS}" \ "${SHIFT_TIME_NS}" \ + "${GATE_DURATION_NS}" \ "${NUM_PKTS}" \ "${STREAM_VID}" \ "${STREAM_PRIO}" \ "" \ "${isochron_dat}" - # Count all received packets by looking at the non-zero RX timestamps - received=$(isochron report \ - --input-file "${isochron_dat}" \ - --printf-format "%u\n" --printf-args "R" | \ - grep -w -v '0' | wc -l) - + received=$(isochron_report_num_received "${isochron_dat}") if [ "${received}" = "${expected}" ]; then RET=0 else diff --git a/tools/testing/selftests/drivers/net/ping.py b/tools/testing/selftests/drivers/net/ping.py index 4b6822866066..af8df2313a3b 100755 --- a/tools/testing/selftests/drivers/net/ping.py +++ b/tools/testing/selftests/drivers/net/ping.py @@ -9,11 +9,11 @@ from lib.py import EthtoolFamily, NetDrvEpEnv from lib.py import bkg, cmd, wait_port_listen, rand_port from lib.py import defer, ethtool, ip -remote_ifname="" no_sleep=False def _test_v4(cfg) -> None: - cfg.require_ipver("4") + if not cfg.addr_v["4"]: + return cmd("ping -c 1 -W0.5 " + cfg.remote_addr_v["4"]) cmd("ping -c 1 -W0.5 " + cfg.addr_v["4"], host=cfg.remote) @@ -21,7 +21,8 @@ def _test_v4(cfg) -> None: cmd("ping -s 65000 -c 1 -W0.5 " + cfg.addr_v["4"], host=cfg.remote) def _test_v6(cfg) -> None: - cfg.require_ipver("6") + if not cfg.addr_v["6"]: + return cmd("ping -c 1 -W5 " + cfg.remote_addr_v["6"]) cmd("ping -c 1 -W5 " + cfg.addr_v["6"], host=cfg.remote) @@ -57,7 +58,7 @@ def _set_offload_checksum(cfg, netnl, on) -> None: def _set_xdp_generic_sb_on(cfg) -> None: prog = cfg.net_lib_dir / "xdp_dummy.bpf.o" - cmd(f"ip link set dev {remote_ifname} mtu 1500", shell=True, host=cfg.remote) + cmd(f"ip link set dev {cfg.remote_ifname} mtu 1500", shell=True, host=cfg.remote) cmd(f"ip link set dev {cfg.ifname} mtu 1500 xdpgeneric obj {prog} sec xdp", shell=True) defer(cmd, f"ip link set dev {cfg.ifname} xdpgeneric off") @@ -66,8 +67,8 @@ def _set_xdp_generic_sb_on(cfg) -> None: def _set_xdp_generic_mb_on(cfg) -> None: prog = cfg.net_lib_dir / "xdp_dummy.bpf.o" - cmd(f"ip link set dev {remote_ifname} mtu 9000", shell=True, host=cfg.remote) - defer(ip, f"link set dev {remote_ifname} mtu 1500", host=cfg.remote) + cmd(f"ip link set dev {cfg.remote_ifname} mtu 9000", shell=True, host=cfg.remote) + defer(ip, f"link set dev {cfg.remote_ifname} mtu 1500", host=cfg.remote) ip("link set dev %s mtu 9000 xdpgeneric obj %s sec xdp.frags" % (cfg.ifname, prog)) defer(ip, f"link set dev {cfg.ifname} mtu 1500 xdpgeneric off") @@ -76,7 +77,7 @@ def _set_xdp_generic_mb_on(cfg) -> None: def _set_xdp_native_sb_on(cfg) -> None: prog = cfg.net_lib_dir / "xdp_dummy.bpf.o" - cmd(f"ip link set dev {remote_ifname} mtu 1500", shell=True, host=cfg.remote) + cmd(f"ip link set dev {cfg.remote_ifname} mtu 1500", shell=True, host=cfg.remote) cmd(f"ip -j link set dev {cfg.ifname} mtu 1500 xdp obj {prog} sec xdp", shell=True) defer(ip, f"link set dev {cfg.ifname} mtu 1500 xdp off") xdp_info = ip("-d link show %s" % (cfg.ifname), json=True)[0] @@ -93,8 +94,8 @@ def _set_xdp_native_sb_on(cfg) -> None: def _set_xdp_native_mb_on(cfg) -> None: prog = cfg.net_lib_dir / "xdp_dummy.bpf.o" - cmd(f"ip link set dev {remote_ifname} mtu 9000", shell=True, host=cfg.remote) - defer(ip, f"link set dev {remote_ifname} mtu 1500", host=cfg.remote) + cmd(f"ip link set dev {cfg.remote_ifname} mtu 9000", shell=True, host=cfg.remote) + defer(ip, f"link set dev {cfg.remote_ifname} mtu 1500", host=cfg.remote) try: cmd(f"ip link set dev {cfg.ifname} mtu 9000 xdp obj {prog} sec xdp.frags", shell=True) defer(ip, f"link set dev {cfg.ifname} mtu 1500 xdp off") @@ -112,18 +113,15 @@ def _set_xdp_offload_on(cfg) -> None: except Exception as e: raise KsftSkipEx('device does not support offloaded XDP') defer(ip, f"link set dev {cfg.ifname} xdpoffload off") - cmd(f"ip link set dev {remote_ifname} mtu 1500", shell=True, host=cfg.remote) + cmd(f"ip link set dev {cfg.remote_ifname} mtu 1500", shell=True, host=cfg.remote) if no_sleep != True: time.sleep(10) def get_interface_info(cfg) -> None: - global remote_ifname global no_sleep - remote_info = cmd(f"ip -4 -o addr show to {cfg.remote_addr_v['4']} | awk '{{print $2}}'", shell=True, host=cfg.remote).stdout - remote_ifname = remote_info.rstrip('\n') - if remote_ifname == "": + if cfg.remote_ifname == "": raise KsftFailEx('Can not get remote interface') local_info = ip("-d link show %s" % (cfg.ifname), json=True)[0] if 'parentbus' in local_info and local_info['parentbus'] == "netdevsim": @@ -136,15 +134,25 @@ def set_interface_init(cfg) -> None: cmd(f"ip link set dev {cfg.ifname} xdp off ", shell=True) cmd(f"ip link set dev {cfg.ifname} xdpgeneric off ", shell=True) cmd(f"ip link set dev {cfg.ifname} xdpoffload off", shell=True) - cmd(f"ip link set dev {remote_ifname} mtu 1500", shell=True, host=cfg.remote) + cmd(f"ip link set dev {cfg.remote_ifname} mtu 1500", shell=True, host=cfg.remote) + +def test_default_v4(cfg, netnl) -> None: + cfg.require_ipver("4") -def test_default(cfg, netnl) -> None: _set_offload_checksum(cfg, netnl, "off") _test_v4(cfg) - _test_v6(cfg) _test_tcp(cfg) _set_offload_checksum(cfg, netnl, "on") _test_v4(cfg) + _test_tcp(cfg) + +def test_default_v6(cfg, netnl) -> None: + cfg.require_ipver("6") + + _set_offload_checksum(cfg, netnl, "off") + _test_v6(cfg) + _test_tcp(cfg) + _set_offload_checksum(cfg, netnl, "on") _test_v6(cfg) _test_tcp(cfg) @@ -202,7 +210,8 @@ def main() -> None: with NetDrvEpEnv(__file__) as cfg: get_interface_info(cfg) set_interface_init(cfg) - ksft_run([test_default, + ksft_run([test_default_v4, + test_default_v6, test_xdp_generic_sb, test_xdp_generic_mb, test_xdp_native_sb, diff --git a/tools/testing/selftests/filesystems/.gitignore b/tools/testing/selftests/filesystems/.gitignore index 828b66a10c63..7afa58e2bb20 100644 --- a/tools/testing/selftests/filesystems/.gitignore +++ b/tools/testing/selftests/filesystems/.gitignore @@ -2,3 +2,4 @@ dnotify_test devpts_pts file_stressor +anon_inode_test diff --git a/tools/testing/selftests/filesystems/Makefile b/tools/testing/selftests/filesystems/Makefile index 66305fc34c60..b02326193fee 100644 --- a/tools/testing/selftests/filesystems/Makefile +++ b/tools/testing/selftests/filesystems/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 CFLAGS += $(KHDR_INCLUDES) -TEST_GEN_PROGS := devpts_pts file_stressor +TEST_GEN_PROGS := devpts_pts file_stressor anon_inode_test TEST_GEN_PROGS_EXTENDED := dnotify_test include ../lib.mk diff --git a/tools/testing/selftests/filesystems/anon_inode_test.c b/tools/testing/selftests/filesystems/anon_inode_test.c new file mode 100644 index 000000000000..e8e0ef1460d2 --- /dev/null +++ b/tools/testing/selftests/filesystems/anon_inode_test.c @@ -0,0 +1,69 @@ +// SPDX-License-Identifier: GPL-2.0 +#define _GNU_SOURCE +#define __SANE_USERSPACE_TYPES__ + +#include <fcntl.h> +#include <stdio.h> +#include <sys/stat.h> + +#include "../kselftest_harness.h" +#include "overlayfs/wrappers.h" + +TEST(anon_inode_no_chown) +{ + int fd_context; + + fd_context = sys_fsopen("tmpfs", 0); + ASSERT_GE(fd_context, 0); + + ASSERT_LT(fchown(fd_context, 1234, 5678), 0); + ASSERT_EQ(errno, EOPNOTSUPP); + + EXPECT_EQ(close(fd_context), 0); +} + +TEST(anon_inode_no_chmod) +{ + int fd_context; + + fd_context = sys_fsopen("tmpfs", 0); + ASSERT_GE(fd_context, 0); + + ASSERT_LT(fchmod(fd_context, 0777), 0); + ASSERT_EQ(errno, EOPNOTSUPP); + + EXPECT_EQ(close(fd_context), 0); +} + +TEST(anon_inode_no_exec) +{ + int fd_context; + + fd_context = sys_fsopen("tmpfs", 0); + ASSERT_GE(fd_context, 0); + + ASSERT_LT(execveat(fd_context, "", NULL, NULL, AT_EMPTY_PATH), 0); + ASSERT_EQ(errno, EACCES); + + EXPECT_EQ(close(fd_context), 0); +} + +TEST(anon_inode_no_open) +{ + int fd_context; + + fd_context = sys_fsopen("tmpfs", 0); + ASSERT_GE(fd_context, 0); + + ASSERT_GE(dup2(fd_context, 500), 0); + ASSERT_EQ(close(fd_context), 0); + fd_context = 500; + + ASSERT_LT(open("/proc/self/fd/500", 0), 0); + ASSERT_EQ(errno, ENXIO); + + EXPECT_EQ(close(fd_context), 0); +} + +TEST_HARNESS_MAIN + diff --git a/tools/testing/selftests/filesystems/mount-notify/.gitignore b/tools/testing/selftests/filesystems/mount-notify/.gitignore index 82a4846cbc4b..124339ea7845 100644 --- a/tools/testing/selftests/filesystems/mount-notify/.gitignore +++ b/tools/testing/selftests/filesystems/mount-notify/.gitignore @@ -1,2 +1,3 @@ # SPDX-License-Identifier: GPL-2.0-only /*_test +/*_test_ns diff --git a/tools/testing/selftests/filesystems/mount-notify/Makefile b/tools/testing/selftests/filesystems/mount-notify/Makefile index 10be0227b5ae..836a4eb7be06 100644 --- a/tools/testing/selftests/filesystems/mount-notify/Makefile +++ b/tools/testing/selftests/filesystems/mount-notify/Makefile @@ -1,6 +1,11 @@ # SPDX-License-Identifier: GPL-2.0-or-later -CFLAGS += -Wall -O2 -g $(KHDR_INCLUDES) -TEST_GEN_PROGS := mount-notify_test +CFLAGS += -Wall -O2 -g $(KHDR_INCLUDES) $(TOOLS_INCLUDES) +LDLIBS += -lcap + +TEST_GEN_PROGS := mount-notify_test mount-notify_test_ns include ../../lib.mk + +$(OUTPUT)/mount-notify_test: ../utils.c +$(OUTPUT)/mount-notify_test_ns: ../utils.c diff --git a/tools/testing/selftests/filesystems/mount-notify/mount-notify_test.c b/tools/testing/selftests/filesystems/mount-notify/mount-notify_test.c index 4a2d5c454fd1..63ce708d93ed 100644 --- a/tools/testing/selftests/filesystems/mount-notify/mount-notify_test.c +++ b/tools/testing/selftests/filesystems/mount-notify/mount-notify_test.c @@ -8,48 +8,34 @@ #include <string.h> #include <sys/stat.h> #include <sys/mount.h> -#include <linux/fanotify.h> #include <unistd.h> -#include <sys/fanotify.h> #include <sys/syscall.h> #include "../../kselftest_harness.h" #include "../statmount/statmount.h" +#include "../utils.h" -#ifndef FAN_MNT_ATTACH -struct fanotify_event_info_mnt { - struct fanotify_event_info_header hdr; - __u64 mnt_id; -}; -#define FAN_MNT_ATTACH 0x01000000 /* Mount was attached */ -#endif - -#ifndef FAN_MNT_DETACH -#define FAN_MNT_DETACH 0x02000000 /* Mount was detached */ -#endif - -#ifndef FAN_REPORT_MNT -#define FAN_REPORT_MNT 0x00004000 /* Report mount events */ +// Needed for linux/fanotify.h +#ifndef __kernel_fsid_t +typedef struct { + int val[2]; +} __kernel_fsid_t; #endif -#ifndef FAN_MARK_MNTNS -#define FAN_MARK_MNTNS 0x00000110 -#endif +#include <sys/fanotify.h> -static uint64_t get_mnt_id(struct __test_metadata *const _metadata, - const char *path) -{ - struct statx sx; +static const char root_mntpoint_templ[] = "/tmp/mount-notify_test_root.XXXXXX"; - ASSERT_EQ(statx(AT_FDCWD, path, 0, STATX_MNT_ID_UNIQUE, &sx), 0); - ASSERT_TRUE(!!(sx.stx_mask & STATX_MNT_ID_UNIQUE)); - return sx.stx_mnt_id; -} +static const int mark_cmds[] = { + FAN_MARK_ADD, + FAN_MARK_REMOVE, + FAN_MARK_FLUSH +}; -static const char root_mntpoint_templ[] = "/tmp/mount-notify_test_root.XXXXXX"; +#define NUM_FAN_FDS ARRAY_SIZE(mark_cmds) FIXTURE(fanotify) { - int fan_fd; + int fan_fd[NUM_FAN_FDS]; char buf[256]; unsigned int rem; void *next; @@ -61,7 +47,7 @@ FIXTURE(fanotify) { FIXTURE_SETUP(fanotify) { - int ret; + int i, ret; ASSERT_EQ(unshare(CLONE_NEWNS), 0); @@ -86,23 +72,37 @@ FIXTURE_SETUP(fanotify) ASSERT_EQ(mkdir("b", 0700), 0); - self->root_id = get_mnt_id(_metadata, "/"); + self->root_id = get_unique_mnt_id("/"); ASSERT_NE(self->root_id, 0); - self->fan_fd = fanotify_init(FAN_REPORT_MNT, 0); - ASSERT_GE(self->fan_fd, 0); - - ret = fanotify_mark(self->fan_fd, FAN_MARK_ADD | FAN_MARK_MNTNS, - FAN_MNT_ATTACH | FAN_MNT_DETACH, self->ns_fd, NULL); - ASSERT_EQ(ret, 0); + for (i = 0; i < NUM_FAN_FDS; i++) { + self->fan_fd[i] = fanotify_init(FAN_REPORT_MNT | FAN_NONBLOCK, + 0); + ASSERT_GE(self->fan_fd[i], 0); + ret = fanotify_mark(self->fan_fd[i], FAN_MARK_ADD | + FAN_MARK_MNTNS, + FAN_MNT_ATTACH | FAN_MNT_DETACH, + self->ns_fd, NULL); + ASSERT_EQ(ret, 0); + // On fd[0] we do an extra ADD that changes nothing. + // On fd[1]/fd[2] we REMOVE/FLUSH which removes the mark. + ret = fanotify_mark(self->fan_fd[i], mark_cmds[i] | + FAN_MARK_MNTNS, + FAN_MNT_ATTACH | FAN_MNT_DETACH, + self->ns_fd, NULL); + ASSERT_EQ(ret, 0); + } self->rem = 0; } FIXTURE_TEARDOWN(fanotify) { + int i; + ASSERT_EQ(self->rem, 0); - close(self->fan_fd); + for (i = 0; i < NUM_FAN_FDS; i++) + close(self->fan_fd[i]); ASSERT_EQ(fchdir(self->orig_root), 0); @@ -123,8 +123,21 @@ static uint64_t expect_notify(struct __test_metadata *const _metadata, unsigned int thislen; if (!self->rem) { - ssize_t len = read(self->fan_fd, self->buf, sizeof(self->buf)); - ASSERT_GT(len, 0); + ssize_t len; + int i; + + for (i = NUM_FAN_FDS - 1; i >= 0; i--) { + len = read(self->fan_fd[i], self->buf, + sizeof(self->buf)); + if (i > 0) { + // Groups 1,2 should get EAGAIN + ASSERT_EQ(len, -1); + ASSERT_EQ(errno, EAGAIN); + } else { + // Group 0 should get events + ASSERT_GT(len, 0); + } + } self->rem = len; self->next = (void *) self->buf; diff --git a/tools/testing/selftests/filesystems/mount-notify/mount-notify_test_ns.c b/tools/testing/selftests/filesystems/mount-notify/mount-notify_test_ns.c new file mode 100644 index 000000000000..090a5ca65004 --- /dev/null +++ b/tools/testing/selftests/filesystems/mount-notify/mount-notify_test_ns.c @@ -0,0 +1,557 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +// Copyright (c) 2025 Miklos Szeredi <miklos@szeredi.hu> + +#define _GNU_SOURCE +#include <fcntl.h> +#include <sched.h> +#include <stdio.h> +#include <string.h> +#include <sys/stat.h> +#include <sys/mount.h> +#include <unistd.h> +#include <sys/syscall.h> + +#include "../../kselftest_harness.h" +#include "../../pidfd/pidfd.h" +#include "../statmount/statmount.h" +#include "../utils.h" + +// Needed for linux/fanotify.h +#ifndef __kernel_fsid_t +typedef struct { + int val[2]; +} __kernel_fsid_t; +#endif + +#include <sys/fanotify.h> + +static const char root_mntpoint_templ[] = "/tmp/mount-notify_test_root.XXXXXX"; + +static const int mark_types[] = { + FAN_MARK_FILESYSTEM, + FAN_MARK_MOUNT, + FAN_MARK_INODE +}; + +static const int mark_cmds[] = { + FAN_MARK_ADD, + FAN_MARK_REMOVE, + FAN_MARK_FLUSH +}; + +#define NUM_FAN_FDS ARRAY_SIZE(mark_cmds) + +FIXTURE(fanotify) { + int fan_fd[NUM_FAN_FDS]; + char buf[256]; + unsigned int rem; + void *next; + char root_mntpoint[sizeof(root_mntpoint_templ)]; + int orig_root; + int orig_ns_fd; + int ns_fd; + uint64_t root_id; +}; + +FIXTURE_SETUP(fanotify) +{ + int i, ret; + + self->orig_ns_fd = open("/proc/self/ns/mnt", O_RDONLY); + ASSERT_GE(self->orig_ns_fd, 0); + + ret = setup_userns(); + ASSERT_EQ(ret, 0); + + self->ns_fd = open("/proc/self/ns/mnt", O_RDONLY); + ASSERT_GE(self->ns_fd, 0); + + strcpy(self->root_mntpoint, root_mntpoint_templ); + ASSERT_NE(mkdtemp(self->root_mntpoint), NULL); + + self->orig_root = open("/", O_PATH | O_CLOEXEC); + ASSERT_GE(self->orig_root, 0); + + ASSERT_EQ(mount("tmpfs", self->root_mntpoint, "tmpfs", 0, NULL), 0); + + ASSERT_EQ(chroot(self->root_mntpoint), 0); + + ASSERT_EQ(chdir("/"), 0); + + ASSERT_EQ(mkdir("a", 0700), 0); + + ASSERT_EQ(mkdir("b", 0700), 0); + + self->root_id = get_unique_mnt_id("/"); + ASSERT_NE(self->root_id, 0); + + for (i = 0; i < NUM_FAN_FDS; i++) { + int fan_fd = fanotify_init(FAN_REPORT_FID, 0); + // Verify that watching tmpfs mounted inside userns is allowed + ret = fanotify_mark(fan_fd, FAN_MARK_ADD | mark_types[i], + FAN_OPEN, AT_FDCWD, "/"); + ASSERT_EQ(ret, 0); + // ...but watching entire orig root filesystem is not allowed + ret = fanotify_mark(fan_fd, FAN_MARK_ADD | FAN_MARK_FILESYSTEM, + FAN_OPEN, self->orig_root, "."); + ASSERT_NE(ret, 0); + close(fan_fd); + + self->fan_fd[i] = fanotify_init(FAN_REPORT_MNT | FAN_NONBLOCK, + 0); + ASSERT_GE(self->fan_fd[i], 0); + // Verify that watching mntns where group was created is allowed + ret = fanotify_mark(self->fan_fd[i], FAN_MARK_ADD | + FAN_MARK_MNTNS, + FAN_MNT_ATTACH | FAN_MNT_DETACH, + self->ns_fd, NULL); + ASSERT_EQ(ret, 0); + // ...but watching orig mntns is not allowed + ret = fanotify_mark(self->fan_fd[i], FAN_MARK_ADD | + FAN_MARK_MNTNS, + FAN_MNT_ATTACH | FAN_MNT_DETACH, + self->orig_ns_fd, NULL); + ASSERT_NE(ret, 0); + // On fd[0] we do an extra ADD that changes nothing. + // On fd[1]/fd[2] we REMOVE/FLUSH which removes the mark. + ret = fanotify_mark(self->fan_fd[i], mark_cmds[i] | + FAN_MARK_MNTNS, + FAN_MNT_ATTACH | FAN_MNT_DETACH, + self->ns_fd, NULL); + ASSERT_EQ(ret, 0); + } + + self->rem = 0; +} + +FIXTURE_TEARDOWN(fanotify) +{ + int i; + + ASSERT_EQ(self->rem, 0); + for (i = 0; i < NUM_FAN_FDS; i++) + close(self->fan_fd[i]); + + ASSERT_EQ(fchdir(self->orig_root), 0); + + ASSERT_EQ(chroot("."), 0); + + EXPECT_EQ(umount2(self->root_mntpoint, MNT_DETACH), 0); + EXPECT_EQ(chdir(self->root_mntpoint), 0); + EXPECT_EQ(chdir("/"), 0); + EXPECT_EQ(rmdir(self->root_mntpoint), 0); +} + +static uint64_t expect_notify(struct __test_metadata *const _metadata, + FIXTURE_DATA(fanotify) *self, + uint64_t *mask) +{ + struct fanotify_event_metadata *meta; + struct fanotify_event_info_mnt *mnt; + unsigned int thislen; + + if (!self->rem) { + ssize_t len; + int i; + + for (i = NUM_FAN_FDS - 1; i >= 0; i--) { + len = read(self->fan_fd[i], self->buf, + sizeof(self->buf)); + if (i > 0) { + // Groups 1,2 should get EAGAIN + ASSERT_EQ(len, -1); + ASSERT_EQ(errno, EAGAIN); + } else { + // Group 0 should get events + ASSERT_GT(len, 0); + } + } + + self->rem = len; + self->next = (void *) self->buf; + } + + meta = self->next; + ASSERT_TRUE(FAN_EVENT_OK(meta, self->rem)); + + thislen = meta->event_len; + self->rem -= thislen; + self->next += thislen; + + *mask = meta->mask; + thislen -= sizeof(*meta); + + mnt = ((void *) meta) + meta->event_len - thislen; + + ASSERT_EQ(thislen, sizeof(*mnt)); + + return mnt->mnt_id; +} + +static void expect_notify_n(struct __test_metadata *const _metadata, + FIXTURE_DATA(fanotify) *self, + unsigned int n, uint64_t mask[], uint64_t mnts[]) +{ + unsigned int i; + + for (i = 0; i < n; i++) + mnts[i] = expect_notify(_metadata, self, &mask[i]); +} + +static uint64_t expect_notify_mask(struct __test_metadata *const _metadata, + FIXTURE_DATA(fanotify) *self, + uint64_t expect_mask) +{ + uint64_t mntid, mask; + + mntid = expect_notify(_metadata, self, &mask); + ASSERT_EQ(expect_mask, mask); + + return mntid; +} + + +static void expect_notify_mask_n(struct __test_metadata *const _metadata, + FIXTURE_DATA(fanotify) *self, + uint64_t mask, unsigned int n, uint64_t mnts[]) +{ + unsigned int i; + + for (i = 0; i < n; i++) + mnts[i] = expect_notify_mask(_metadata, self, mask); +} + +static void verify_mount_ids(struct __test_metadata *const _metadata, + const uint64_t list1[], const uint64_t list2[], + size_t num) +{ + unsigned int i, j; + + // Check that neither list has any duplicates + for (i = 0; i < num; i++) { + for (j = 0; j < num; j++) { + if (i != j) { + ASSERT_NE(list1[i], list1[j]); + ASSERT_NE(list2[i], list2[j]); + } + } + } + // Check that all list1 memebers can be found in list2. Together with + // the above it means that the list1 and list2 represent the same sets. + for (i = 0; i < num; i++) { + for (j = 0; j < num; j++) { + if (list1[i] == list2[j]) + break; + } + ASSERT_NE(j, num); + } +} + +static void check_mounted(struct __test_metadata *const _metadata, + const uint64_t mnts[], size_t num) +{ + ssize_t ret; + uint64_t *list; + + list = malloc((num + 1) * sizeof(list[0])); + ASSERT_NE(list, NULL); + + ret = listmount(LSMT_ROOT, 0, 0, list, num + 1, 0); + ASSERT_EQ(ret, num); + + verify_mount_ids(_metadata, mnts, list, num); + + free(list); +} + +static void setup_mount_tree(struct __test_metadata *const _metadata, + int log2_num) +{ + int ret, i; + + ret = mount("", "/", NULL, MS_SHARED, NULL); + ASSERT_EQ(ret, 0); + + for (i = 0; i < log2_num; i++) { + ret = mount("/", "/", NULL, MS_BIND, NULL); + ASSERT_EQ(ret, 0); + } +} + +TEST_F(fanotify, bind) +{ + int ret; + uint64_t mnts[2] = { self->root_id }; + + ret = mount("/", "/", NULL, MS_BIND, NULL); + ASSERT_EQ(ret, 0); + + mnts[1] = expect_notify_mask(_metadata, self, FAN_MNT_ATTACH); + ASSERT_NE(mnts[0], mnts[1]); + + check_mounted(_metadata, mnts, 2); + + // Cleanup + uint64_t detach_id; + ret = umount("/"); + ASSERT_EQ(ret, 0); + + detach_id = expect_notify_mask(_metadata, self, FAN_MNT_DETACH); + ASSERT_EQ(detach_id, mnts[1]); + + check_mounted(_metadata, mnts, 1); +} + +TEST_F(fanotify, move) +{ + int ret; + uint64_t mnts[2] = { self->root_id }; + uint64_t move_id; + + ret = mount("/", "/a", NULL, MS_BIND, NULL); + ASSERT_EQ(ret, 0); + + mnts[1] = expect_notify_mask(_metadata, self, FAN_MNT_ATTACH); + ASSERT_NE(mnts[0], mnts[1]); + + check_mounted(_metadata, mnts, 2); + + ret = move_mount(AT_FDCWD, "/a", AT_FDCWD, "/b", 0); + ASSERT_EQ(ret, 0); + + move_id = expect_notify_mask(_metadata, self, FAN_MNT_ATTACH | FAN_MNT_DETACH); + ASSERT_EQ(move_id, mnts[1]); + + // Cleanup + ret = umount("/b"); + ASSERT_EQ(ret, 0); + + check_mounted(_metadata, mnts, 1); +} + +TEST_F(fanotify, propagate) +{ + const unsigned int log2_num = 4; + const unsigned int num = (1 << log2_num); + uint64_t mnts[num]; + + setup_mount_tree(_metadata, log2_num); + + expect_notify_mask_n(_metadata, self, FAN_MNT_ATTACH, num - 1, mnts + 1); + + mnts[0] = self->root_id; + check_mounted(_metadata, mnts, num); + + // Cleanup + int ret; + uint64_t mnts2[num]; + ret = umount2("/", MNT_DETACH); + ASSERT_EQ(ret, 0); + + ret = mount("", "/", NULL, MS_PRIVATE, NULL); + ASSERT_EQ(ret, 0); + + mnts2[0] = self->root_id; + expect_notify_mask_n(_metadata, self, FAN_MNT_DETACH, num - 1, mnts2 + 1); + verify_mount_ids(_metadata, mnts, mnts2, num); + + check_mounted(_metadata, mnts, 1); +} + +TEST_F(fanotify, fsmount) +{ + int ret, fs, mnt; + uint64_t mnts[2] = { self->root_id }; + + fs = fsopen("tmpfs", 0); + ASSERT_GE(fs, 0); + + ret = fsconfig(fs, FSCONFIG_CMD_CREATE, 0, 0, 0); + ASSERT_EQ(ret, 0); + + mnt = fsmount(fs, 0, 0); + ASSERT_GE(mnt, 0); + + close(fs); + + ret = move_mount(mnt, "", AT_FDCWD, "/a", MOVE_MOUNT_F_EMPTY_PATH); + ASSERT_EQ(ret, 0); + + close(mnt); + + mnts[1] = expect_notify_mask(_metadata, self, FAN_MNT_ATTACH); + ASSERT_NE(mnts[0], mnts[1]); + + check_mounted(_metadata, mnts, 2); + + // Cleanup + uint64_t detach_id; + ret = umount("/a"); + ASSERT_EQ(ret, 0); + + detach_id = expect_notify_mask(_metadata, self, FAN_MNT_DETACH); + ASSERT_EQ(detach_id, mnts[1]); + + check_mounted(_metadata, mnts, 1); +} + +TEST_F(fanotify, reparent) +{ + uint64_t mnts[6] = { self->root_id }; + uint64_t dmnts[3]; + uint64_t masks[3]; + unsigned int i; + int ret; + + // Create setup with a[1] -> b[2] propagation + ret = mount("/", "/a", NULL, MS_BIND, NULL); + ASSERT_EQ(ret, 0); + + ret = mount("", "/a", NULL, MS_SHARED, NULL); + ASSERT_EQ(ret, 0); + + ret = mount("/a", "/b", NULL, MS_BIND, NULL); + ASSERT_EQ(ret, 0); + + ret = mount("", "/b", NULL, MS_SLAVE, NULL); + ASSERT_EQ(ret, 0); + + expect_notify_mask_n(_metadata, self, FAN_MNT_ATTACH, 2, mnts + 1); + + check_mounted(_metadata, mnts, 3); + + // Mount on a[3], which is propagated to b[4] + ret = mount("/", "/a", NULL, MS_BIND, NULL); + ASSERT_EQ(ret, 0); + + expect_notify_mask_n(_metadata, self, FAN_MNT_ATTACH, 2, mnts + 3); + + check_mounted(_metadata, mnts, 5); + + // Mount on b[5], not propagated + ret = mount("/", "/b", NULL, MS_BIND, NULL); + ASSERT_EQ(ret, 0); + + mnts[5] = expect_notify_mask(_metadata, self, FAN_MNT_ATTACH); + + check_mounted(_metadata, mnts, 6); + + // Umount a[3], which is propagated to b[4], but not b[5] + // This will result in b[5] "falling" on b[2] + ret = umount("/a"); + ASSERT_EQ(ret, 0); + + expect_notify_n(_metadata, self, 3, masks, dmnts); + verify_mount_ids(_metadata, mnts + 3, dmnts, 3); + + for (i = 0; i < 3; i++) { + if (dmnts[i] == mnts[5]) { + ASSERT_EQ(masks[i], FAN_MNT_ATTACH | FAN_MNT_DETACH); + } else { + ASSERT_EQ(masks[i], FAN_MNT_DETACH); + } + } + + mnts[3] = mnts[5]; + check_mounted(_metadata, mnts, 4); + + // Cleanup + ret = umount("/b"); + ASSERT_EQ(ret, 0); + + ret = umount("/a"); + ASSERT_EQ(ret, 0); + + ret = umount("/b"); + ASSERT_EQ(ret, 0); + + expect_notify_mask_n(_metadata, self, FAN_MNT_DETACH, 3, dmnts); + verify_mount_ids(_metadata, mnts + 1, dmnts, 3); + + check_mounted(_metadata, mnts, 1); +} + +TEST_F(fanotify, rmdir) +{ + uint64_t mnts[3] = { self->root_id }; + int ret; + + ret = mount("/", "/a", NULL, MS_BIND, NULL); + ASSERT_EQ(ret, 0); + + ret = mount("/", "/a/b", NULL, MS_BIND, NULL); + ASSERT_EQ(ret, 0); + + expect_notify_mask_n(_metadata, self, FAN_MNT_ATTACH, 2, mnts + 1); + + check_mounted(_metadata, mnts, 3); + + ret = chdir("/a"); + ASSERT_EQ(ret, 0); + + ret = fork(); + ASSERT_GE(ret, 0); + + if (ret == 0) { + chdir("/"); + unshare(CLONE_NEWNS); + mount("", "/", NULL, MS_REC|MS_PRIVATE, NULL); + umount2("/a", MNT_DETACH); + // This triggers a detach in the other namespace + rmdir("/a"); + exit(0); + } + wait(NULL); + + expect_notify_mask_n(_metadata, self, FAN_MNT_DETACH, 2, mnts + 1); + check_mounted(_metadata, mnts, 1); + + // Cleanup + ret = chdir("/"); + ASSERT_EQ(ret, 0); +} + +TEST_F(fanotify, pivot_root) +{ + uint64_t mnts[3] = { self->root_id }; + uint64_t mnts2[3]; + int ret; + + ret = mount("tmpfs", "/a", "tmpfs", 0, NULL); + ASSERT_EQ(ret, 0); + + mnts[2] = expect_notify_mask(_metadata, self, FAN_MNT_ATTACH); + + ret = mkdir("/a/new", 0700); + ASSERT_EQ(ret, 0); + + ret = mkdir("/a/old", 0700); + ASSERT_EQ(ret, 0); + + ret = mount("/a", "/a/new", NULL, MS_BIND, NULL); + ASSERT_EQ(ret, 0); + + mnts[1] = expect_notify_mask(_metadata, self, FAN_MNT_ATTACH); + check_mounted(_metadata, mnts, 3); + + ret = syscall(SYS_pivot_root, "/a/new", "/a/new/old"); + ASSERT_EQ(ret, 0); + + expect_notify_mask_n(_metadata, self, FAN_MNT_ATTACH | FAN_MNT_DETACH, 2, mnts2); + verify_mount_ids(_metadata, mnts, mnts2, 2); + check_mounted(_metadata, mnts, 3); + + // Cleanup + ret = syscall(SYS_pivot_root, "/old", "/old/a/new"); + ASSERT_EQ(ret, 0); + + ret = umount("/a/new"); + ASSERT_EQ(ret, 0); + + ret = umount("/a"); + ASSERT_EQ(ret, 0); + + check_mounted(_metadata, mnts, 1); +} + +TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/filesystems/overlayfs/Makefile b/tools/testing/selftests/filesystems/overlayfs/Makefile index 6c661232b3b5..d3ad4a77db9b 100644 --- a/tools/testing/selftests/filesystems/overlayfs/Makefile +++ b/tools/testing/selftests/filesystems/overlayfs/Makefile @@ -4,7 +4,7 @@ CFLAGS += -Wall CFLAGS += $(KHDR_INCLUDES) LDLIBS += -lcap -LOCAL_HDRS += wrappers.h log.h +LOCAL_HDRS += ../wrappers.h log.h TEST_GEN_PROGS := dev_in_maps TEST_GEN_PROGS += set_layers_via_fds diff --git a/tools/testing/selftests/filesystems/overlayfs/dev_in_maps.c b/tools/testing/selftests/filesystems/overlayfs/dev_in_maps.c index 3b796264223f..31db54b00e64 100644 --- a/tools/testing/selftests/filesystems/overlayfs/dev_in_maps.c +++ b/tools/testing/selftests/filesystems/overlayfs/dev_in_maps.c @@ -17,7 +17,7 @@ #include "../../kselftest.h" #include "log.h" -#include "wrappers.h" +#include "../wrappers.h" static long get_file_dev_and_inode(void *addr, struct statx *stx) { diff --git a/tools/testing/selftests/filesystems/overlayfs/set_layers_via_fds.c b/tools/testing/selftests/filesystems/overlayfs/set_layers_via_fds.c index 5074e64e74a8..dc0449fa628f 100644 --- a/tools/testing/selftests/filesystems/overlayfs/set_layers_via_fds.c +++ b/tools/testing/selftests/filesystems/overlayfs/set_layers_via_fds.c @@ -16,7 +16,7 @@ #include "../../pidfd/pidfd.h" #include "log.h" #include "../utils.h" -#include "wrappers.h" +#include "../wrappers.h" FIXTURE(set_layers_via_fds) { int pidfd; diff --git a/tools/testing/selftests/filesystems/statmount/Makefile b/tools/testing/selftests/filesystems/statmount/Makefile index 14ee91a41650..8e354fe99b44 100644 --- a/tools/testing/selftests/filesystems/statmount/Makefile +++ b/tools/testing/selftests/filesystems/statmount/Makefile @@ -1,6 +1,10 @@ # SPDX-License-Identifier: GPL-2.0-or-later -CFLAGS += -Wall -O2 -g $(KHDR_INCLUDES) +CFLAGS += -Wall -O2 -g $(KHDR_INCLUDES) $(TOOLS_INCLUDES) +LDLIBS += -lcap + TEST_GEN_PROGS := statmount_test statmount_test_ns listmount_test include ../../lib.mk + +$(OUTPUT)/statmount_test_ns: ../utils.c diff --git a/tools/testing/selftests/filesystems/statmount/statmount.h b/tools/testing/selftests/filesystems/statmount/statmount.h index a7a5289ddae9..99e5ad082fb1 100644 --- a/tools/testing/selftests/filesystems/statmount/statmount.h +++ b/tools/testing/selftests/filesystems/statmount/statmount.h @@ -7,6 +7,42 @@ #include <linux/mount.h> #include <asm/unistd.h> +#ifndef __NR_statmount + #if defined __alpha__ + #define __NR_statmount 567 + #elif defined _MIPS_SIM + #if _MIPS_SIM == _MIPS_SIM_ABI32 /* o32 */ + #define __NR_statmount 4457 + #endif + #if _MIPS_SIM == _MIPS_SIM_NABI32 /* n32 */ + #define __NR_statmount 6457 + #endif + #if _MIPS_SIM == _MIPS_SIM_ABI64 /* n64 */ + #define __NR_statmount 5457 + #endif + #else + #define __NR_statmount 457 + #endif +#endif + +#ifndef __NR_listmount + #if defined __alpha__ + #define __NR_listmount 568 + #elif defined _MIPS_SIM + #if _MIPS_SIM == _MIPS_SIM_ABI32 /* o32 */ + #define __NR_listmount 4458 + #endif + #if _MIPS_SIM == _MIPS_SIM_NABI32 /* n32 */ + #define __NR_listmount 6458 + #endif + #if _MIPS_SIM == _MIPS_SIM_ABI64 /* n64 */ + #define __NR_listmount 5458 + #endif + #else + #define __NR_listmount 458 + #endif +#endif + static inline int statmount(uint64_t mnt_id, uint64_t mnt_ns_id, uint64_t mask, struct statmount *buf, size_t bufsize, unsigned int flags) diff --git a/tools/testing/selftests/filesystems/statmount/statmount_test_ns.c b/tools/testing/selftests/filesystems/statmount/statmount_test_ns.c index 70cb0c8b21cf..605a3fa16bf7 100644 --- a/tools/testing/selftests/filesystems/statmount/statmount_test_ns.c +++ b/tools/testing/selftests/filesystems/statmount/statmount_test_ns.c @@ -14,6 +14,7 @@ #include <linux/stat.h> #include "statmount.h" +#include "../utils.h" #include "../../kselftest.h" #define NSID_PASS 0 @@ -78,87 +79,10 @@ static int get_mnt_ns_id(const char *mnt_ns, uint64_t *mnt_ns_id) return NSID_PASS; } -static int get_mnt_id(const char *path, uint64_t *mnt_id) -{ - struct statx sx; - int ret; - - ret = statx(AT_FDCWD, path, 0, STATX_MNT_ID_UNIQUE, &sx); - if (ret == -1) { - ksft_print_msg("retrieving unique mount ID for %s: %s\n", path, - strerror(errno)); - return NSID_ERROR; - } - - if (!(sx.stx_mask & STATX_MNT_ID_UNIQUE)) { - ksft_print_msg("no unique mount ID available for %s\n", path); - return NSID_ERROR; - } - - *mnt_id = sx.stx_mnt_id; - return NSID_PASS; -} - -static int write_file(const char *path, const char *val) -{ - int fd = open(path, O_WRONLY); - size_t len = strlen(val); - int ret; - - if (fd == -1) { - ksft_print_msg("opening %s for write: %s\n", path, strerror(errno)); - return NSID_ERROR; - } - - ret = write(fd, val, len); - if (ret == -1) { - ksft_print_msg("writing to %s: %s\n", path, strerror(errno)); - return NSID_ERROR; - } - if (ret != len) { - ksft_print_msg("short write to %s\n", path); - return NSID_ERROR; - } - - ret = close(fd); - if (ret == -1) { - ksft_print_msg("closing %s\n", path); - return NSID_ERROR; - } - - return NSID_PASS; -} - static int setup_namespace(void) { - int ret; - char buf[32]; - uid_t uid = getuid(); - gid_t gid = getgid(); - - ret = unshare(CLONE_NEWNS|CLONE_NEWUSER|CLONE_NEWPID); - if (ret == -1) - ksft_exit_fail_msg("unsharing mountns and userns: %s\n", - strerror(errno)); - - sprintf(buf, "0 %d 1", uid); - ret = write_file("/proc/self/uid_map", buf); - if (ret != NSID_PASS) - return ret; - ret = write_file("/proc/self/setgroups", "deny"); - if (ret != NSID_PASS) - return ret; - sprintf(buf, "0 %d 1", gid); - ret = write_file("/proc/self/gid_map", buf); - if (ret != NSID_PASS) - return ret; - - ret = mount("", "/", NULL, MS_REC|MS_PRIVATE, NULL); - if (ret == -1) { - ksft_print_msg("making mount tree private: %s\n", - strerror(errno)); + if (setup_userns() != 0) return NSID_ERROR; - } return NSID_PASS; } @@ -174,9 +98,9 @@ static int _test_statmount_mnt_ns_id(void) if (ret != NSID_PASS) return ret; - ret = get_mnt_id("/", &root_id); - if (ret != NSID_PASS) - return ret; + root_id = get_unique_mnt_id("/"); + if (!root_id) + return NSID_ERROR; ret = statmount(root_id, 0, STATMOUNT_MNT_NS_ID, &sm, sizeof(sm), 0); if (ret == -1) { diff --git a/tools/testing/selftests/filesystems/utils.c b/tools/testing/selftests/filesystems/utils.c index e553c89c5b19..c43a69dffd83 100644 --- a/tools/testing/selftests/filesystems/utils.c +++ b/tools/testing/selftests/filesystems/utils.c @@ -18,7 +18,10 @@ #include <sys/types.h> #include <sys/wait.h> #include <sys/xattr.h> +#include <sys/mount.h> +#include "../kselftest.h" +#include "wrappers.h" #include "utils.h" #define MAX_USERNS_LEVEL 32 @@ -447,6 +450,71 @@ out_close: return fret; } +static int write_file(const char *path, const char *val) +{ + int fd = open(path, O_WRONLY); + size_t len = strlen(val); + int ret; + + if (fd == -1) { + ksft_print_msg("opening %s for write: %s\n", path, strerror(errno)); + return -1; + } + + ret = write(fd, val, len); + if (ret == -1) { + ksft_print_msg("writing to %s: %s\n", path, strerror(errno)); + return -1; + } + if (ret != len) { + ksft_print_msg("short write to %s\n", path); + return -1; + } + + ret = close(fd); + if (ret == -1) { + ksft_print_msg("closing %s\n", path); + return -1; + } + + return 0; +} + +int setup_userns(void) +{ + int ret; + char buf[32]; + uid_t uid = getuid(); + gid_t gid = getgid(); + + ret = unshare(CLONE_NEWNS|CLONE_NEWUSER|CLONE_NEWPID); + if (ret) { + ksft_exit_fail_msg("unsharing mountns and userns: %s\n", + strerror(errno)); + return ret; + } + + sprintf(buf, "0 %d 1", uid); + ret = write_file("/proc/self/uid_map", buf); + if (ret) + return ret; + ret = write_file("/proc/self/setgroups", "deny"); + if (ret) + return ret; + sprintf(buf, "0 %d 1", gid); + ret = write_file("/proc/self/gid_map", buf); + if (ret) + return ret; + + ret = mount("", "/", NULL, MS_REC|MS_PRIVATE, NULL); + if (ret) { + ksft_print_msg("making mount tree private: %s\n", strerror(errno)); + return ret; + } + + return 0; +} + /* caps_down - lower all effective caps */ int caps_down(void) { @@ -499,3 +567,23 @@ out: cap_free(caps); return fret; } + +uint64_t get_unique_mnt_id(const char *path) +{ + struct statx sx; + int ret; + + ret = statx(AT_FDCWD, path, 0, STATX_MNT_ID_UNIQUE, &sx); + if (ret == -1) { + ksft_print_msg("retrieving unique mount ID for %s: %s\n", path, + strerror(errno)); + return 0; + } + + if (!(sx.stx_mask & STATX_MNT_ID_UNIQUE)) { + ksft_print_msg("no unique mount ID available for %s\n", path); + return 0; + } + + return sx.stx_mnt_id; +} diff --git a/tools/testing/selftests/filesystems/utils.h b/tools/testing/selftests/filesystems/utils.h index 7f1df2a3e94c..70f7ccc607f4 100644 --- a/tools/testing/selftests/filesystems/utils.h +++ b/tools/testing/selftests/filesystems/utils.h @@ -27,6 +27,7 @@ extern int caps_down(void); extern int cap_down(cap_value_t down); extern bool switch_ids(uid_t uid, gid_t gid); +extern int setup_userns(void); static inline bool switch_userns(int fd, uid_t uid, gid_t gid, bool drop_caps) { @@ -42,4 +43,6 @@ static inline bool switch_userns(int fd, uid_t uid, gid_t gid, bool drop_caps) return true; } +extern uint64_t get_unique_mnt_id(const char *path); + #endif /* __IDMAP_UTILS_H */ diff --git a/tools/testing/selftests/filesystems/overlayfs/wrappers.h b/tools/testing/selftests/filesystems/wrappers.h index c38bc48e0cfa..420ae4f908cf 100644 --- a/tools/testing/selftests/filesystems/overlayfs/wrappers.h +++ b/tools/testing/selftests/filesystems/wrappers.h @@ -9,6 +9,10 @@ #include <linux/mount.h> #include <sys/syscall.h> +#ifndef STATX_MNT_ID_UNIQUE +#define STATX_MNT_ID_UNIQUE 0x00004000U /* Want/got extended stx_mount_id */ +#endif + static inline int sys_fsopen(const char *fsname, unsigned int flags) { return syscall(__NR_fsopen, fsname, flags); @@ -36,6 +40,28 @@ static inline int sys_mount(const char *src, const char *tgt, const char *fst, #define MOVE_MOUNT_F_EMPTY_PATH 0x00000004 /* Empty from path permitted */ #endif +#ifndef MOVE_MOUNT_T_EMPTY_PATH +#define MOVE_MOUNT_T_EMPTY_PATH 0x00000040 /* Empty to path permitted */ +#endif + +#ifndef __NR_move_mount + #if defined __alpha__ + #define __NR_move_mount 539 + #elif defined _MIPS_SIM + #if _MIPS_SIM == _MIPS_SIM_ABI32 /* o32 */ + #define __NR_move_mount 4429 + #endif + #if _MIPS_SIM == _MIPS_SIM_NABI32 /* n32 */ + #define __NR_move_mount 6429 + #endif + #if _MIPS_SIM == _MIPS_SIM_ABI64 /* n64 */ + #define __NR_move_mount 5429 + #endif + #else + #define __NR_move_mount 429 + #endif +#endif + static inline int sys_move_mount(int from_dfd, const char *from_pathname, int to_dfd, const char *to_pathname, unsigned int flags) @@ -53,7 +79,25 @@ static inline int sys_move_mount(int from_dfd, const char *from_pathname, #endif #ifndef AT_RECURSIVE -#define AT_RECURSIVE 0x8000 +#define AT_RECURSIVE 0x8000 /* Apply to the entire subtree */ +#endif + +#ifndef __NR_open_tree + #if defined __alpha__ + #define __NR_open_tree 538 + #elif defined _MIPS_SIM + #if _MIPS_SIM == _MIPS_SIM_ABI32 /* o32 */ + #define __NR_open_tree 4428 + #endif + #if _MIPS_SIM == _MIPS_SIM_NABI32 /* n32 */ + #define __NR_open_tree 6428 + #endif + #if _MIPS_SIM == _MIPS_SIM_ABI64 /* n64 */ + #define __NR_open_tree 5428 + #endif + #else + #define __NR_open_tree 428 + #endif #endif static inline int sys_open_tree(int dfd, const char *filename, unsigned int flags) diff --git a/tools/testing/selftests/ftrace/Makefile b/tools/testing/selftests/ftrace/Makefile index 49d96bb16355..7c12263f8260 100644 --- a/tools/testing/selftests/ftrace/Makefile +++ b/tools/testing/selftests/ftrace/Makefile @@ -6,6 +6,6 @@ TEST_PROGS := ftracetest-ktap TEST_FILES := test.d settings EXTRA_CLEAN := $(OUTPUT)/logs/* -TEST_GEN_PROGS = poll +TEST_GEN_FILES := poll include ../lib.mk diff --git a/tools/testing/selftests/ftrace/test.d/dynevent/dynevent_limitations.tc b/tools/testing/selftests/ftrace/test.d/dynevent/dynevent_limitations.tc index 6b94b678741a..f656bccb1a14 100644 --- a/tools/testing/selftests/ftrace/test.d/dynevent/dynevent_limitations.tc +++ b/tools/testing/selftests/ftrace/test.d/dynevent/dynevent_limitations.tc @@ -7,11 +7,32 @@ MAX_ARGS=128 EXCEED_ARGS=$((MAX_ARGS + 1)) +# bash and dash evaluate variables differently. +# dash will evaluate '\\' every time it is read whereas bash does not. +# +# TEST_STRING="$TEST_STRING \\$i" +# echo $TEST_STRING +# +# With i=123 +# On bash, that will print "\123" +# but on dash, that will print the escape sequence of \123 as the \ will +# be interpreted again in the echo. +# +# Set a variable "bs" to save a double backslash, then echo that +# to "ts" to see if $ts changed or not. If it changed, it's dash, +# if not, it's bash, and then bs can equal a single backslash. +bs='\\' +ts=`echo $bs` +if [ "$ts" = '\\' ]; then + # this is bash + bs='\' +fi + check_max_args() { # event_header TEST_STRING=$1 # Acceptable for i in `seq 1 $MAX_ARGS`; do - TEST_STRING="$TEST_STRING \\$i" + TEST_STRING="$TEST_STRING $bs$i" done echo "$TEST_STRING" >> dynamic_events echo > dynamic_events diff --git a/tools/testing/selftests/ftrace/test.d/filter/event-filter-function.tc b/tools/testing/selftests/ftrace/test.d/filter/event-filter-function.tc index 118247b8dd84..c62165fabd0c 100644 --- a/tools/testing/selftests/ftrace/test.d/filter/event-filter-function.tc +++ b/tools/testing/selftests/ftrace/test.d/filter/event-filter-function.tc @@ -80,6 +80,26 @@ if [ $misscnt -gt 0 ]; then exit_fail fi +# Check strings too +if [ -f events/syscalls/sys_enter_openat/filter ]; then + DIRNAME=`basename $TMPDIR` + echo "filename.ustring ~ \"*$DIRNAME*\"" > events/syscalls/sys_enter_openat/filter + echo 1 > events/syscalls/sys_enter_openat/enable + echo 1 > tracing_on + ls /bin/sh + nocnt=`grep openat trace | wc -l` + ls $TMPDIR + echo 0 > tracing_on + hitcnt=`grep openat trace | wc -l`; + echo 0 > events/syscalls/sys_enter_openat/enable + if [ $nocnt -gt 0 ]; then + exit_fail + fi + if [ $hitcnt -eq 0 ]; then + exit_fail + fi +fi + reset_events_filter exit 0 diff --git a/tools/testing/selftests/ftrace/test.d/ftrace/fgraph-multi-filter.tc b/tools/testing/selftests/ftrace/test.d/ftrace/fgraph-multi-filter.tc new file mode 100644 index 000000000000..b6d6a312ead5 --- /dev/null +++ b/tools/testing/selftests/ftrace/test.d/ftrace/fgraph-multi-filter.tc @@ -0,0 +1,177 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0 +# description: ftrace - function graph filters +# requires: set_ftrace_filter function_graph:tracer + +# Make sure that function graph filtering works + +INSTANCE1="instances/test1_$$" +INSTANCE2="instances/test2_$$" + +WD=`pwd` + +do_reset() { + cd $WD + if [ -d $INSTANCE1 ]; then + echo nop > $INSTANCE1/current_tracer + rmdir $INSTANCE1 + fi + if [ -d $INSTANCE2 ]; then + echo nop > $INSTANCE2/current_tracer + rmdir $INSTANCE2 + fi +} + +mkdir $INSTANCE1 +if ! grep -q function_graph $INSTANCE1/available_tracers; then + echo "function_graph not allowed with instances" + rmdir $INSTANCE1 + exit_unsupported +fi + +mkdir $INSTANCE2 + +fail() { # msg + do_reset + echo $1 + exit_fail +} + +disable_tracing +clear_trace + +function_count() { + search=$1 + vsearch=$2 + + if [ -z "$search" ]; then + cat enabled_functions | wc -l + elif [ -z "$vsearch" ]; then + grep $search enabled_functions | wc -l + else + grep $search enabled_functions | grep $vsearch| wc -l + fi +} + +set_fgraph() { + instance=$1 + filter="$2" + notrace="$3" + + echo "$filter" > $instance/set_ftrace_filter + echo "$notrace" > $instance/set_ftrace_notrace + echo function_graph > $instance/current_tracer +} + +check_functions() { + orig_cnt=$1 + test=$2 + + cnt=`function_count $test` + if [ $cnt -gt $orig_cnt ]; then + fail + fi +} + +check_cnt() { + orig_cnt=$1 + search=$2 + vsearch=$3 + + cnt=`function_count $search $vsearch` + if [ $cnt -gt $orig_cnt ]; then + fail + fi +} + +reset_graph() { + instance=$1 + echo nop > $instance/current_tracer +} + +# get any functions that were enabled before the test +total_cnt=`function_count` +sched_cnt=`function_count sched` +lock_cnt=`function_count lock` +time_cnt=`function_count time` +clock_cnt=`function_count clock` +locks_clock_cnt=`function_count locks clock` +clock_locks_cnt=`function_count clock locks` + +# Trace functions with "sched" but not "time" +set_fgraph $INSTANCE1 '*sched*' '*time*' + +# Make sure "time" isn't listed +check_functions $time_cnt 'time' +instance1_cnt=`function_count` + +# Trace functions with "lock" but not "clock" +set_fgraph $INSTANCE2 '*lock*' '*clock*' +instance1_2_cnt=`function_count` + +# Turn off the first instance +reset_graph $INSTANCE1 + +# The second instance doesn't trace "clock" functions +check_functions $clock_cnt 'clock' +instance2_cnt=`function_count` + +# Start from a clean slate +reset_graph $INSTANCE2 +check_functions $total_cnt + +# Trace functions with "lock" but not "clock" +set_fgraph $INSTANCE2 '*lock*' '*clock*' + +# This should match the last time instance 2 was by itself +cnt=`function_count` +if [ $instance2_cnt -ne $cnt ]; then + fail +fi + +# And it should not be tracing "clock" functions +check_functions $clock_cnt 'clock' + +# Trace functions with "sched" but not "time" +set_fgraph $INSTANCE1 '*sched*' '*time*' + +# This should match the last time both instances were enabled +cnt=`function_count` +if [ $instance1_2_cnt -ne $cnt ]; then + fail +fi + +# Turn off the second instance +reset_graph $INSTANCE2 + +# This should match the last time instance 1 was by itself +cnt=`function_count` +if [ $instance1_cnt -ne $cnt ]; then + fail +fi + +# And it should not be tracing "time" functions +check_functions $time_cnt 'time' + +# Start from a clean slate +reset_graph $INSTANCE1 +check_functions $total_cnt + +# Enable all functions but those that have "locks" +set_fgraph $INSTANCE1 '' '*locks*' + +# Enable all functions but those that have "clock" +set_fgraph $INSTANCE2 '' '*clock*' + +# If a function has "locks" it should not have "clock" +check_cnt $locks_clock_cnt locks clock + +# If a function has "clock" it should not have "locks" +check_cnt $clock_locks_cnt clock locks + +reset_graph $INSTANCE1 +reset_graph $INSTANCE2 + +do_reset + +exit 0 diff --git a/tools/testing/selftests/futex/functional/.gitignore b/tools/testing/selftests/futex/functional/.gitignore index fbcbdb6963b3..7b24ae89594a 100644 --- a/tools/testing/selftests/futex/functional/.gitignore +++ b/tools/testing/selftests/futex/functional/.gitignore @@ -1,11 +1,13 @@ # SPDX-License-Identifier: GPL-2.0-only +futex_numa_mpol +futex_priv_hash +futex_requeue futex_requeue_pi futex_requeue_pi_mismatched_ops futex_requeue_pi_signal_restart +futex_wait futex_wait_private_mapped_file futex_wait_timeout futex_wait_uninitialized_heap futex_wait_wouldblock -futex_wait -futex_requeue futex_waitv diff --git a/tools/testing/selftests/futex/functional/Makefile b/tools/testing/selftests/futex/functional/Makefile index f79f9bac7918..8cfb87f7f7c5 100644 --- a/tools/testing/selftests/futex/functional/Makefile +++ b/tools/testing/selftests/futex/functional/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 INCLUDES := -I../include -I../../ $(KHDR_INCLUDES) CFLAGS := $(CFLAGS) -g -O2 -Wall -pthread $(INCLUDES) $(KHDR_INCLUDES) -LDLIBS := -lpthread -lrt +LDLIBS := -lpthread -lrt -lnuma LOCAL_HDRS := \ ../include/futextest.h \ @@ -17,7 +17,10 @@ TEST_GEN_PROGS := \ futex_wait_private_mapped_file \ futex_wait \ futex_requeue \ - futex_waitv + futex_priv_hash \ + futex_numa_mpol \ + futex_waitv \ + futex_numa TEST_PROGS := run.sh diff --git a/tools/testing/selftests/futex/functional/futex_numa.c b/tools/testing/selftests/futex/functional/futex_numa.c new file mode 100644 index 000000000000..f29e4d627e79 --- /dev/null +++ b/tools/testing/selftests/futex/functional/futex_numa.c @@ -0,0 +1,262 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <pthread.h> +#include <sys/shm.h> +#include <sys/mman.h> +#include <fcntl.h> +#include <stdbool.h> +#include <time.h> +#include <assert.h> +#include "logging.h" +#include "futextest.h" +#include "futex2test.h" + +typedef u_int32_t u32; +typedef int32_t s32; +typedef u_int64_t u64; + +static unsigned int fflags = (FUTEX2_SIZE_U32 | FUTEX2_PRIVATE); +static int fnode = FUTEX_NO_NODE; + +/* fairly stupid test-and-set lock with a waiter flag */ + +#define N_LOCK 0x0000001 +#define N_WAITERS 0x0001000 + +struct futex_numa_32 { + union { + u64 full; + struct { + u32 val; + u32 node; + }; + }; +}; + +void futex_numa_32_lock(struct futex_numa_32 *lock) +{ + for (;;) { + struct futex_numa_32 new, old = { + .full = __atomic_load_n(&lock->full, __ATOMIC_RELAXED), + }; + + for (;;) { + new = old; + if (old.val == 0) { + /* no waiter, no lock -> first lock, set no-node */ + new.node = fnode; + } + if (old.val & N_LOCK) { + /* contention, set waiter */ + new.val |= N_WAITERS; + } + new.val |= N_LOCK; + + /* nothing changed, ready to block */ + if (old.full == new.full) + break; + + /* + * Use u64 cmpxchg to set the futex value and node in a + * consistent manner. + */ + if (__atomic_compare_exchange_n(&lock->full, + &old.full, new.full, + /* .weak */ false, + __ATOMIC_ACQUIRE, + __ATOMIC_RELAXED)) { + + /* if we just set N_LOCK, we own it */ + if (!(old.val & N_LOCK)) + return; + + /* go block */ + break; + } + } + + futex2_wait(lock, new.val, fflags, NULL, 0); + } +} + +void futex_numa_32_unlock(struct futex_numa_32 *lock) +{ + u32 val = __atomic_sub_fetch(&lock->val, N_LOCK, __ATOMIC_RELEASE); + assert((s32)val >= 0); + if (val & N_WAITERS) { + int woken = futex2_wake(lock, 1, fflags); + assert(val == N_WAITERS); + if (!woken) { + __atomic_compare_exchange_n(&lock->val, &val, 0U, + false, __ATOMIC_RELAXED, + __ATOMIC_RELAXED); + } + } +} + +static long nanos = 50000; + +struct thread_args { + pthread_t tid; + volatile int * done; + struct futex_numa_32 *lock; + int val; + int *val1, *val2; + int node; +}; + +static void *threadfn(void *_arg) +{ + struct thread_args *args = _arg; + struct timespec ts = { + .tv_nsec = nanos, + }; + int node; + + while (!*args->done) { + + futex_numa_32_lock(args->lock); + args->val++; + + assert(*args->val1 == *args->val2); + (*args->val1)++; + nanosleep(&ts, NULL); + (*args->val2)++; + + node = args->lock->node; + futex_numa_32_unlock(args->lock); + + if (node != args->node) { + args->node = node; + printf("node: %d\n", node); + } + + nanosleep(&ts, NULL); + } + + return NULL; +} + +static void *contendfn(void *_arg) +{ + struct thread_args *args = _arg; + + while (!*args->done) { + /* + * futex2_wait() will take hb-lock, verify *var == val and + * queue/abort. By knowingly setting val 'wrong' this will + * abort and thereby generate hb-lock contention. + */ + futex2_wait(&args->lock->val, ~0U, fflags, NULL, 0); + args->val++; + } + + return NULL; +} + +static volatile int done = 0; +static struct futex_numa_32 lock = { .val = 0, }; +static int val1, val2; + +int main(int argc, char *argv[]) +{ + struct thread_args *tas[512], *cas[512]; + int c, t, threads = 2, contenders = 0; + int sleeps = 10; + int total = 0; + + while ((c = getopt(argc, argv, "c:t:s:n:N::")) != -1) { + switch (c) { + case 'c': + contenders = atoi(optarg); + break; + case 't': + threads = atoi(optarg); + break; + case 's': + sleeps = atoi(optarg); + break; + case 'n': + nanos = atoi(optarg); + break; + case 'N': + fflags |= FUTEX2_NUMA; + if (optarg) + fnode = atoi(optarg); + break; + default: + exit(1); + break; + } + } + + for (t = 0; t < contenders; t++) { + struct thread_args *args = calloc(1, sizeof(*args)); + if (!args) { + perror("thread_args"); + exit(-1); + } + + args->done = &done; + args->lock = &lock; + args->val1 = &val1; + args->val2 = &val2; + args->node = -1; + + if (pthread_create(&args->tid, NULL, contendfn, args)) { + perror("pthread_create"); + exit(-1); + } + + cas[t] = args; + } + + for (t = 0; t < threads; t++) { + struct thread_args *args = calloc(1, sizeof(*args)); + if (!args) { + perror("thread_args"); + exit(-1); + } + + args->done = &done; + args->lock = &lock; + args->val1 = &val1; + args->val2 = &val2; + args->node = -1; + + if (pthread_create(&args->tid, NULL, threadfn, args)) { + perror("pthread_create"); + exit(-1); + } + + tas[t] = args; + } + + sleep(sleeps); + + done = true; + + for (t = 0; t < threads; t++) { + struct thread_args *args = tas[t]; + + pthread_join(args->tid, NULL); + total += args->val; +// printf("tval: %d\n", args->val); + } + printf("total: %d\n", total); + + if (contenders) { + total = 0; + for (t = 0; t < contenders; t++) { + struct thread_args *args = cas[t]; + + pthread_join(args->tid, NULL); + total += args->val; + // printf("tval: %d\n", args->val); + } + printf("contenders: %d\n", total); + } + + return 0; +} + diff --git a/tools/testing/selftests/futex/functional/futex_numa_mpol.c b/tools/testing/selftests/futex/functional/futex_numa_mpol.c new file mode 100644 index 000000000000..20a9d3ecf743 --- /dev/null +++ b/tools/testing/selftests/futex/functional/futex_numa_mpol.c @@ -0,0 +1,231 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Copyright (C) 2025 Sebastian Andrzej Siewior <bigeasy@linutronix.de> + */ + +#define _GNU_SOURCE + +#include <errno.h> +#include <pthread.h> +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> +#include <numa.h> +#include <numaif.h> + +#include <linux/futex.h> +#include <sys/mman.h> + +#include "logging.h" +#include "futextest.h" +#include "futex2test.h" + +#define MAX_THREADS 64 + +static pthread_barrier_t barrier_main; +static pthread_t threads[MAX_THREADS]; + +struct thread_args { + void *futex_ptr; + unsigned int flags; + int result; +}; + +static struct thread_args thread_args[MAX_THREADS]; + +#ifndef FUTEX_NO_NODE +#define FUTEX_NO_NODE (-1) +#endif + +#ifndef FUTEX2_MPOL +#define FUTEX2_MPOL 0x08 +#endif + +static void *thread_lock_fn(void *arg) +{ + struct thread_args *args = arg; + int ret; + + pthread_barrier_wait(&barrier_main); + ret = futex2_wait(args->futex_ptr, 0, args->flags, NULL, 0); + args->result = ret; + return NULL; +} + +static void create_max_threads(void *futex_ptr) +{ + int i, ret; + + for (i = 0; i < MAX_THREADS; i++) { + thread_args[i].futex_ptr = futex_ptr; + thread_args[i].flags = FUTEX2_SIZE_U32 | FUTEX_PRIVATE_FLAG | FUTEX2_NUMA; + thread_args[i].result = 0; + ret = pthread_create(&threads[i], NULL, thread_lock_fn, &thread_args[i]); + if (ret) + ksft_exit_fail_msg("pthread_create failed\n"); + } +} + +static void join_max_threads(void) +{ + int i, ret; + + for (i = 0; i < MAX_THREADS; i++) { + ret = pthread_join(threads[i], NULL); + if (ret) + ksft_exit_fail_msg("pthread_join failed for thread %d\n", i); + } +} + +static void __test_futex(void *futex_ptr, int must_fail, unsigned int futex_flags) +{ + int to_wake, ret, i, need_exit = 0; + + pthread_barrier_init(&barrier_main, NULL, MAX_THREADS + 1); + create_max_threads(futex_ptr); + pthread_barrier_wait(&barrier_main); + to_wake = MAX_THREADS; + + do { + ret = futex2_wake(futex_ptr, to_wake, futex_flags); + if (must_fail) { + if (ret < 0) + break; + ksft_exit_fail_msg("futex2_wake(%d, 0x%x) should fail, but didn't\n", + to_wake, futex_flags); + } + if (ret < 0) { + ksft_exit_fail_msg("Failed futex2_wake(%d, 0x%x): %m\n", + to_wake, futex_flags); + } + if (!ret) + usleep(50); + to_wake -= ret; + + } while (to_wake); + join_max_threads(); + + for (i = 0; i < MAX_THREADS; i++) { + if (must_fail && thread_args[i].result != -1) { + ksft_print_msg("Thread %d should fail but succeeded (%d)\n", + i, thread_args[i].result); + need_exit = 1; + } + if (!must_fail && thread_args[i].result != 0) { + ksft_print_msg("Thread %d failed (%d)\n", i, thread_args[i].result); + need_exit = 1; + } + } + if (need_exit) + ksft_exit_fail_msg("Aborting due to earlier errors.\n"); +} + +static void test_futex(void *futex_ptr, int must_fail) +{ + __test_futex(futex_ptr, must_fail, FUTEX2_SIZE_U32 | FUTEX_PRIVATE_FLAG | FUTEX2_NUMA); +} + +static void test_futex_mpol(void *futex_ptr, int must_fail) +{ + __test_futex(futex_ptr, must_fail, FUTEX2_SIZE_U32 | FUTEX_PRIVATE_FLAG | FUTEX2_NUMA | FUTEX2_MPOL); +} + +static void usage(char *prog) +{ + printf("Usage: %s\n", prog); + printf(" -c Use color\n"); + printf(" -h Display this help message\n"); + printf(" -v L Verbosity level: %d=QUIET %d=CRITICAL %d=INFO\n", + VQUIET, VCRITICAL, VINFO); +} + +int main(int argc, char *argv[]) +{ + struct futex32_numa *futex_numa; + int mem_size, i; + void *futex_ptr; + char c; + + while ((c = getopt(argc, argv, "chv:")) != -1) { + switch (c) { + case 'c': + log_color(1); + break; + case 'h': + usage(basename(argv[0])); + exit(0); + break; + case 'v': + log_verbosity(atoi(optarg)); + break; + default: + usage(basename(argv[0])); + exit(1); + } + } + + ksft_print_header(); + ksft_set_plan(1); + + mem_size = sysconf(_SC_PAGE_SIZE); + futex_ptr = mmap(NULL, mem_size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, 0, 0); + if (futex_ptr == MAP_FAILED) + ksft_exit_fail_msg("mmap() for %d bytes failed\n", mem_size); + + futex_numa = futex_ptr; + + ksft_print_msg("Regular test\n"); + futex_numa->futex = 0; + futex_numa->numa = FUTEX_NO_NODE; + test_futex(futex_ptr, 0); + + if (futex_numa->numa == FUTEX_NO_NODE) + ksft_exit_fail_msg("NUMA node is left uninitialized\n"); + + ksft_print_msg("Memory too small\n"); + test_futex(futex_ptr + mem_size - 4, 1); + + ksft_print_msg("Memory out of range\n"); + test_futex(futex_ptr + mem_size, 1); + + futex_numa->numa = FUTEX_NO_NODE; + mprotect(futex_ptr, mem_size, PROT_READ); + ksft_print_msg("Memory, RO\n"); + test_futex(futex_ptr, 1); + + mprotect(futex_ptr, mem_size, PROT_NONE); + ksft_print_msg("Memory, no access\n"); + test_futex(futex_ptr, 1); + + mprotect(futex_ptr, mem_size, PROT_READ | PROT_WRITE); + ksft_print_msg("Memory back to RW\n"); + test_futex(futex_ptr, 0); + + /* MPOL test. Does not work as expected */ + for (i = 0; i < 4; i++) { + unsigned long nodemask; + int ret; + + nodemask = 1 << i; + ret = mbind(futex_ptr, mem_size, MPOL_BIND, &nodemask, + sizeof(nodemask) * 8, 0); + if (ret == 0) { + ksft_print_msg("Node %d test\n", i); + futex_numa->futex = 0; + futex_numa->numa = FUTEX_NO_NODE; + + ret = futex2_wake(futex_ptr, 0, FUTEX2_SIZE_U32 | FUTEX_PRIVATE_FLAG | FUTEX2_NUMA | FUTEX2_MPOL); + if (ret < 0) + ksft_test_result_fail("Failed to wake 0 with MPOL: %m\n"); + if (0) + test_futex_mpol(futex_numa, 0); + if (futex_numa->numa != i) { + ksft_test_result_fail("Returned NUMA node is %d expected %d\n", + futex_numa->numa, i); + } + } + } + ksft_test_result_pass("NUMA MPOL tests passed\n"); + ksft_finished(); + return 0; +} diff --git a/tools/testing/selftests/futex/functional/futex_priv_hash.c b/tools/testing/selftests/futex/functional/futex_priv_hash.c new file mode 100644 index 000000000000..2dca18fefedc --- /dev/null +++ b/tools/testing/selftests/futex/functional/futex_priv_hash.c @@ -0,0 +1,292 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Copyright (C) 2025 Sebastian Andrzej Siewior <bigeasy@linutronix.de> + */ + +#define _GNU_SOURCE + +#include <errno.h> +#include <pthread.h> +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> + +#include <linux/prctl.h> +#include <sys/prctl.h> + +#include "logging.h" + +#define MAX_THREADS 64 + +static pthread_barrier_t barrier_main; +static pthread_mutex_t global_lock; +static pthread_t threads[MAX_THREADS]; +static int counter; + +#ifndef PR_FUTEX_HASH +#define PR_FUTEX_HASH 78 +# define PR_FUTEX_HASH_SET_SLOTS 1 +# define FH_FLAG_IMMUTABLE (1ULL << 0) +# define PR_FUTEX_HASH_GET_SLOTS 2 +# define PR_FUTEX_HASH_GET_IMMUTABLE 3 +#endif + +static int futex_hash_slots_set(unsigned int slots, int flags) +{ + return prctl(PR_FUTEX_HASH, PR_FUTEX_HASH_SET_SLOTS, slots, flags); +} + +static int futex_hash_slots_get(void) +{ + return prctl(PR_FUTEX_HASH, PR_FUTEX_HASH_GET_SLOTS); +} + +static int futex_hash_immutable_get(void) +{ + return prctl(PR_FUTEX_HASH, PR_FUTEX_HASH_GET_IMMUTABLE); +} + +static void futex_hash_slots_set_verify(int slots) +{ + int ret; + + ret = futex_hash_slots_set(slots, 0); + if (ret != 0) { + ksft_test_result_fail("Failed to set slots to %d: %m\n", slots); + ksft_finished(); + } + ret = futex_hash_slots_get(); + if (ret != slots) { + ksft_test_result_fail("Set %d slots but PR_FUTEX_HASH_GET_SLOTS returns: %d, %m\n", + slots, ret); + ksft_finished(); + } + ksft_test_result_pass("SET and GET slots %d passed\n", slots); +} + +static void futex_hash_slots_set_must_fail(int slots, int flags) +{ + int ret; + + ret = futex_hash_slots_set(slots, flags); + ksft_test_result(ret < 0, "futex_hash_slots_set(%d, %d)\n", + slots, flags); +} + +static void *thread_return_fn(void *arg) +{ + return NULL; +} + +static void *thread_lock_fn(void *arg) +{ + pthread_barrier_wait(&barrier_main); + + pthread_mutex_lock(&global_lock); + counter++; + usleep(20); + pthread_mutex_unlock(&global_lock); + return NULL; +} + +static void create_max_threads(void *(*thread_fn)(void *)) +{ + int i, ret; + + for (i = 0; i < MAX_THREADS; i++) { + ret = pthread_create(&threads[i], NULL, thread_fn, NULL); + if (ret) + ksft_exit_fail_msg("pthread_create failed: %m\n"); + } +} + +static void join_max_threads(void) +{ + int i, ret; + + for (i = 0; i < MAX_THREADS; i++) { + ret = pthread_join(threads[i], NULL); + if (ret) + ksft_exit_fail_msg("pthread_join failed for thread %d\n", i); + } +} + +static void usage(char *prog) +{ + printf("Usage: %s\n", prog); + printf(" -c Use color\n"); + printf(" -g Test global hash instead intead local immutable \n"); + printf(" -h Display this help message\n"); + printf(" -v L Verbosity level: %d=QUIET %d=CRITICAL %d=INFO\n", + VQUIET, VCRITICAL, VINFO); +} + +static const char *test_msg_auto_create = "Automatic hash bucket init on thread creation.\n"; +static const char *test_msg_auto_inc = "Automatic increase with more than 16 CPUs\n"; + +int main(int argc, char *argv[]) +{ + int futex_slots1, futex_slotsn, online_cpus; + pthread_mutexattr_t mutex_attr_pi; + int use_global_hash = 0; + int ret; + char c; + + while ((c = getopt(argc, argv, "cghv:")) != -1) { + switch (c) { + case 'c': + log_color(1); + break; + case 'g': + use_global_hash = 1; + break; + case 'h': + usage(basename(argv[0])); + exit(0); + break; + case 'v': + log_verbosity(atoi(optarg)); + break; + default: + usage(basename(argv[0])); + exit(1); + } + } + + ksft_print_header(); + ksft_set_plan(22); + + ret = pthread_mutexattr_init(&mutex_attr_pi); + ret |= pthread_mutexattr_setprotocol(&mutex_attr_pi, PTHREAD_PRIO_INHERIT); + ret |= pthread_mutex_init(&global_lock, &mutex_attr_pi); + if (ret != 0) { + ksft_exit_fail_msg("Failed to initialize pthread mutex.\n"); + } + /* First thread, expect to be 0, not yet initialized */ + ret = futex_hash_slots_get(); + if (ret != 0) + ksft_exit_fail_msg("futex_hash_slots_get() failed: %d, %m\n", ret); + + ret = futex_hash_immutable_get(); + if (ret != 0) + ksft_exit_fail_msg("futex_hash_immutable_get() failed: %d, %m\n", ret); + + ksft_test_result_pass("Basic get slots and immutable status.\n"); + ret = pthread_create(&threads[0], NULL, thread_return_fn, NULL); + if (ret != 0) + ksft_exit_fail_msg("pthread_create() failed: %d, %m\n", ret); + + ret = pthread_join(threads[0], NULL); + if (ret != 0) + ksft_exit_fail_msg("pthread_join() failed: %d, %m\n", ret); + + /* First thread, has to initialiaze private hash */ + futex_slots1 = futex_hash_slots_get(); + if (futex_slots1 <= 0) { + ksft_print_msg("Current hash buckets: %d\n", futex_slots1); + ksft_exit_fail_msg(test_msg_auto_create); + } + + ksft_test_result_pass(test_msg_auto_create); + + online_cpus = sysconf(_SC_NPROCESSORS_ONLN); + ret = pthread_barrier_init(&barrier_main, NULL, MAX_THREADS + 1); + if (ret != 0) + ksft_exit_fail_msg("pthread_barrier_init failed: %m.\n"); + + ret = pthread_mutex_lock(&global_lock); + if (ret != 0) + ksft_exit_fail_msg("pthread_mutex_lock failed: %m.\n"); + + counter = 0; + create_max_threads(thread_lock_fn); + pthread_barrier_wait(&barrier_main); + + /* + * The current default size of hash buckets is 16. The auto increase + * works only if more than 16 CPUs are available. + */ + ksft_print_msg("Online CPUs: %d\n", online_cpus); + if (online_cpus > 16) { + futex_slotsn = futex_hash_slots_get(); + if (futex_slotsn < 0 || futex_slots1 == futex_slotsn) { + ksft_print_msg("Expected increase of hash buckets but got: %d -> %d\n", + futex_slots1, futex_slotsn); + ksft_exit_fail_msg(test_msg_auto_inc); + } + ksft_test_result_pass(test_msg_auto_inc); + } else { + ksft_test_result_skip(test_msg_auto_inc); + } + ret = pthread_mutex_unlock(&global_lock); + + /* Once the user changes it, it has to be what is set */ + futex_hash_slots_set_verify(2); + futex_hash_slots_set_verify(4); + futex_hash_slots_set_verify(8); + futex_hash_slots_set_verify(32); + futex_hash_slots_set_verify(16); + + ret = futex_hash_slots_set(15, 0); + ksft_test_result(ret < 0, "Use 15 slots\n"); + + futex_hash_slots_set_verify(2); + join_max_threads(); + ksft_test_result(counter == MAX_THREADS, "Created of waited for %d of %d threads\n", + counter, MAX_THREADS); + counter = 0; + /* Once the user set something, auto reisze must be disabled */ + ret = pthread_barrier_init(&barrier_main, NULL, MAX_THREADS); + + create_max_threads(thread_lock_fn); + join_max_threads(); + + ret = futex_hash_slots_get(); + ksft_test_result(ret == 2, "No more auto-resize after manaul setting, got %d\n", + ret); + + futex_hash_slots_set_must_fail(1 << 29, 0); + + /* + * Once the private hash has been made immutable or global hash has been requested, + * then this requested can not be undone. + */ + if (use_global_hash) { + ret = futex_hash_slots_set(0, 0); + ksft_test_result(ret == 0, "Global hash request\n"); + } else { + ret = futex_hash_slots_set(4, FH_FLAG_IMMUTABLE); + ksft_test_result(ret == 0, "Immutable resize to 4\n"); + } + if (ret != 0) + goto out; + + futex_hash_slots_set_must_fail(4, 0); + futex_hash_slots_set_must_fail(4, FH_FLAG_IMMUTABLE); + futex_hash_slots_set_must_fail(8, 0); + futex_hash_slots_set_must_fail(8, FH_FLAG_IMMUTABLE); + futex_hash_slots_set_must_fail(0, FH_FLAG_IMMUTABLE); + futex_hash_slots_set_must_fail(6, FH_FLAG_IMMUTABLE); + + ret = pthread_barrier_init(&barrier_main, NULL, MAX_THREADS); + if (ret != 0) { + ksft_exit_fail_msg("pthread_barrier_init failed: %m\n"); + return 1; + } + create_max_threads(thread_lock_fn); + join_max_threads(); + + ret = futex_hash_slots_get(); + if (use_global_hash) { + ksft_test_result(ret == 0, "Continue to use global hash\n"); + } else { + ksft_test_result(ret == 4, "Continue to use the 4 hash buckets\n"); + } + + ret = futex_hash_immutable_get(); + ksft_test_result(ret == 1, "Hash reports to be immutable\n"); + +out: + ksft_finished(); + return 0; +} diff --git a/tools/testing/selftests/futex/functional/futex_wait_wouldblock.c b/tools/testing/selftests/futex/functional/futex_wait_wouldblock.c index 7d7a6a06cdb7..2d8230da9064 100644 --- a/tools/testing/selftests/futex/functional/futex_wait_wouldblock.c +++ b/tools/testing/selftests/futex/functional/futex_wait_wouldblock.c @@ -98,7 +98,7 @@ int main(int argc, char *argv[]) info("Calling futex_waitv on f1: %u @ %p with val=%u\n", f1, &f1, f1+1); res = futex_waitv(&waitv, 1, 0, &to, CLOCK_MONOTONIC); if (!res || errno != EWOULDBLOCK) { - ksft_test_result_pass("futex_waitv returned: %d %s\n", + ksft_test_result_fail("futex_waitv returned: %d %s\n", res ? errno : res, res ? strerror(errno) : ""); ret = RET_FAIL; diff --git a/tools/testing/selftests/futex/functional/run.sh b/tools/testing/selftests/futex/functional/run.sh index 5ccd599da6c3..81739849f299 100755 --- a/tools/testing/selftests/futex/functional/run.sh +++ b/tools/testing/selftests/futex/functional/run.sh @@ -82,3 +82,10 @@ echo echo ./futex_waitv $COLOR + +echo +./futex_priv_hash $COLOR +./futex_priv_hash -g $COLOR + +echo +./futex_numa_mpol $COLOR diff --git a/tools/testing/selftests/futex/include/futex2test.h b/tools/testing/selftests/futex/include/futex2test.h index 9d305520e849..ea79662405bc 100644 --- a/tools/testing/selftests/futex/include/futex2test.h +++ b/tools/testing/selftests/futex/include/futex2test.h @@ -8,6 +8,53 @@ #define u64_to_ptr(x) ((void *)(uintptr_t)(x)) +#ifndef __NR_futex_waitv +#define __NR_futex_waitv 449 +struct futex_waitv { + __u64 val; + __u64 uaddr; + __u32 flags; + __u32 __reserved; +}; +#endif + +#ifndef __NR_futex_wake +#define __NR_futex_wake 454 +#endif + +#ifndef __NR_futex_wait +#define __NR_futex_wait 455 +#endif + +#ifndef FUTEX2_SIZE_U32 +#define FUTEX2_SIZE_U32 0x02 +#endif + +#ifndef FUTEX2_NUMA +#define FUTEX2_NUMA 0x04 +#endif + +#ifndef FUTEX2_MPOL +#define FUTEX2_MPOL 0x08 +#endif + +#ifndef FUTEX2_PRIVATE +#define FUTEX2_PRIVATE FUTEX_PRIVATE_FLAG +#endif + +#ifndef FUTEX2_NO_NODE +#define FUTEX_NO_NODE (-1) +#endif + +#ifndef FUTEX_32 +#define FUTEX_32 FUTEX2_SIZE_U32 +#endif + +struct futex32_numa { + futex_t futex; + futex_t numa; +}; + /** * futex_waitv - Wait at multiple futexes, wake on any * @waiters: Array of waiters @@ -20,3 +67,26 @@ static inline int futex_waitv(volatile struct futex_waitv *waiters, unsigned lon { return syscall(__NR_futex_waitv, waiters, nr_waiters, flags, timo, clockid); } + +/* + * futex_wait() - block on uaddr with optional timeout + * @val: Expected value + * @flags: FUTEX2 flags + * @timeout: Relative timeout + * @clockid: Clock id for the timeout + */ +static inline int futex2_wait(void *uaddr, long val, unsigned int flags, + struct timespec *timeout, clockid_t clockid) +{ + return syscall(__NR_futex_wait, uaddr, val, ~0U, flags, timeout, clockid); +} + +/* + * futex2_wake() - Wake a number of futexes + * @nr: Number of threads to wake at most + * @flags: FUTEX2 flags + */ +static inline int futex2_wake(void *uaddr, int nr, unsigned int flags) +{ + return syscall(__NR_futex_wake, uaddr, ~0U, nr, flags); +} diff --git a/tools/testing/selftests/hid/config.common b/tools/testing/selftests/hid/config.common index 45b5570441ce..b1f40857307d 100644 --- a/tools/testing/selftests/hid/config.common +++ b/tools/testing/selftests/hid/config.common @@ -39,7 +39,6 @@ CONFIG_CPU_FREQ_GOV_USERSPACE=y CONFIG_CPU_FREQ_STAT=y CONFIG_CPU_IDLE_GOV_LADDER=y CONFIG_CPUSETS=y -CONFIG_CRC_T10DIF=y CONFIG_CRYPTO_BLAKE2B=y CONFIG_CRYPTO_DEV_VIRTIO=y CONFIG_CRYPTO_SEQIV=y diff --git a/tools/testing/selftests/kexec/Makefile b/tools/testing/selftests/kexec/Makefile index 67fe7a46cb62..e3000ccb9a5d 100644 --- a/tools/testing/selftests/kexec/Makefile +++ b/tools/testing/selftests/kexec/Makefile @@ -8,6 +8,13 @@ ifeq ($(ARCH_PROCESSED),$(filter $(ARCH_PROCESSED),x86 ppc64le)) TEST_PROGS := test_kexec_load.sh test_kexec_file_load.sh TEST_FILES := kexec_common_lib.sh +include ../../../scripts/Makefile.arch + +ifeq ($(IS_64_BIT)$(ARCH_PROCESSED),1x86) +TEST_PROGS += test_kexec_jump.sh +test_kexec_jump.sh: $(OUTPUT)/test_kexec_jump +endif + include ../lib.mk endif diff --git a/tools/testing/selftests/kexec/test_kexec_jump.c b/tools/testing/selftests/kexec/test_kexec_jump.c new file mode 100644 index 000000000000..fbce287866f5 --- /dev/null +++ b/tools/testing/selftests/kexec/test_kexec_jump.c @@ -0,0 +1,72 @@ +#include <unistd.h> +#include <errno.h> +#include <stdio.h> +#include <stdlib.h> +#include <linux/kexec.h> +#include <linux/reboot.h> +#include <sys/reboot.h> +#include <sys/syscall.h> + +asm( + " .code64\n" + " .data\n" + "purgatory_start:\n" + + // Trigger kexec debug exception handling + " int3\n" + + // Set load address for next time + " leaq purgatory_start_b(%rip), %r11\n" + " movq %r11, 8(%rsp)\n" + + // Back to Linux + " ret\n" + + // Same again + "purgatory_start_b:\n" + + // Trigger kexec debug exception handling + " int3\n" + + // Set load address for next time + " leaq purgatory_start(%rip), %r11\n" + " movq %r11, 8(%rsp)\n" + + // Back to Linux + " ret\n" + + "purgatory_end:\n" + ".previous" +); +extern char purgatory_start[], purgatory_end[]; + +int main (void) +{ + struct kexec_segment segment = {}; + int ret; + + segment.buf = purgatory_start; + segment.bufsz = purgatory_end - purgatory_start; + segment.mem = (void *)0x400000; + segment.memsz = 0x1000; + ret = syscall(__NR_kexec_load, 0x400000, 1, &segment, KEXEC_PRESERVE_CONTEXT); + if (ret) { + perror("kexec_load"); + exit(1); + } + + ret = syscall(__NR_reboot, LINUX_REBOOT_MAGIC1, LINUX_REBOOT_MAGIC2, LINUX_REBOOT_CMD_KEXEC); + if (ret) { + perror("kexec reboot"); + exit(1); + } + + ret = syscall(__NR_reboot, LINUX_REBOOT_MAGIC1, LINUX_REBOOT_MAGIC2, LINUX_REBOOT_CMD_KEXEC); + if (ret) { + perror("kexec reboot"); + exit(1); + } + printf("Success\n"); + return 0; +} + diff --git a/tools/testing/selftests/kexec/test_kexec_jump.sh b/tools/testing/selftests/kexec/test_kexec_jump.sh new file mode 100755 index 000000000000..6ae977054ba2 --- /dev/null +++ b/tools/testing/selftests/kexec/test_kexec_jump.sh @@ -0,0 +1,42 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0 +# +# Prevent loading a kernel image via the kexec_load syscall when +# signatures are required. (Dependent on CONFIG_IMA_ARCH_POLICY.) + +TEST="$0" +. ./kexec_common_lib.sh + +# kexec requires root privileges +require_root_privileges + +# get the kernel config +get_kconfig + +kconfig_enabled "CONFIG_KEXEC_JUMP=y" "kexec_jump is enabled" +if [ $? -eq 0 ]; then + log_skip "kexec_jump is not enabled" +fi + +kconfig_enabled "CONFIG_IMA_APPRAISE=y" "IMA enabled" +ima_appraise=$? + +kconfig_enabled "CONFIG_IMA_ARCH_POLICY=y" \ + "IMA architecture specific policy enabled" +arch_policy=$? + +get_secureboot_mode +secureboot=$? + +if [ $secureboot -eq 1 ] && [ $arch_policy -eq 1 ]; then + log_skip "Secure boot and CONFIG_IMA_ARCH_POLICY are enabled" +fi + +./test_kexec_jump +if [ $? -eq 0 ]; then + log_pass "kexec_jump succeeded" +else + # The more likely failure mode if anything went wrong is that the + # kernel just crashes. But if we get back here, sure, whine anyway. + log_fail "kexec_jump failed" +fi diff --git a/tools/testing/selftests/kvm/Makefile.kvm b/tools/testing/selftests/kvm/Makefile.kvm index f773f8f99249..f62b0a5aba35 100644 --- a/tools/testing/selftests/kvm/Makefile.kvm +++ b/tools/testing/selftests/kvm/Makefile.kvm @@ -50,8 +50,18 @@ LIBKVM_riscv += lib/riscv/ucall.c # Non-compiled test targets TEST_PROGS_x86 += x86/nx_huge_pages_test.sh +# Compiled test targets valid on all architectures with libkvm support +TEST_GEN_PROGS_COMMON = demand_paging_test +TEST_GEN_PROGS_COMMON += dirty_log_test +TEST_GEN_PROGS_COMMON += guest_print_test +TEST_GEN_PROGS_COMMON += kvm_binary_stats_test +TEST_GEN_PROGS_COMMON += kvm_create_max_vcpus +TEST_GEN_PROGS_COMMON += kvm_page_table_test +TEST_GEN_PROGS_COMMON += set_memory_region_test + # Compiled test targets -TEST_GEN_PROGS_x86 = x86/cpuid_test +TEST_GEN_PROGS_x86 = $(TEST_GEN_PROGS_COMMON) +TEST_GEN_PROGS_x86 += x86/cpuid_test TEST_GEN_PROGS_x86 += x86/cr4_cpuid_sync_test TEST_GEN_PROGS_x86 += x86/dirty_log_page_splitting_test TEST_GEN_PROGS_x86 += x86/feature_msrs_test @@ -119,27 +129,21 @@ TEST_GEN_PROGS_x86 += x86/triple_fault_event_test TEST_GEN_PROGS_x86 += x86/recalc_apic_map_test TEST_GEN_PROGS_x86 += access_tracking_perf_test TEST_GEN_PROGS_x86 += coalesced_io_test -TEST_GEN_PROGS_x86 += demand_paging_test -TEST_GEN_PROGS_x86 += dirty_log_test TEST_GEN_PROGS_x86 += dirty_log_perf_test TEST_GEN_PROGS_x86 += guest_memfd_test -TEST_GEN_PROGS_x86 += guest_print_test TEST_GEN_PROGS_x86 += hardware_disable_test -TEST_GEN_PROGS_x86 += kvm_create_max_vcpus -TEST_GEN_PROGS_x86 += kvm_page_table_test TEST_GEN_PROGS_x86 += memslot_modification_stress_test TEST_GEN_PROGS_x86 += memslot_perf_test TEST_GEN_PROGS_x86 += mmu_stress_test TEST_GEN_PROGS_x86 += rseq_test -TEST_GEN_PROGS_x86 += set_memory_region_test TEST_GEN_PROGS_x86 += steal_time -TEST_GEN_PROGS_x86 += kvm_binary_stats_test TEST_GEN_PROGS_x86 += system_counter_offset_test TEST_GEN_PROGS_x86 += pre_fault_memory_test # Compiled outputs used by test targets TEST_GEN_PROGS_EXTENDED_x86 += x86/nx_huge_pages_test +TEST_GEN_PROGS_arm64 = $(TEST_GEN_PROGS_COMMON) TEST_GEN_PROGS_arm64 += arm64/aarch32_id_regs TEST_GEN_PROGS_arm64 += arm64/arch_timer_edge_cases TEST_GEN_PROGS_arm64 += arm64/debug-exceptions @@ -158,22 +162,16 @@ TEST_GEN_PROGS_arm64 += arm64/no-vgic-v3 TEST_GEN_PROGS_arm64 += access_tracking_perf_test TEST_GEN_PROGS_arm64 += arch_timer TEST_GEN_PROGS_arm64 += coalesced_io_test -TEST_GEN_PROGS_arm64 += demand_paging_test -TEST_GEN_PROGS_arm64 += dirty_log_test TEST_GEN_PROGS_arm64 += dirty_log_perf_test -TEST_GEN_PROGS_arm64 += guest_print_test TEST_GEN_PROGS_arm64 += get-reg-list -TEST_GEN_PROGS_arm64 += kvm_create_max_vcpus -TEST_GEN_PROGS_arm64 += kvm_page_table_test TEST_GEN_PROGS_arm64 += memslot_modification_stress_test TEST_GEN_PROGS_arm64 += memslot_perf_test TEST_GEN_PROGS_arm64 += mmu_stress_test TEST_GEN_PROGS_arm64 += rseq_test -TEST_GEN_PROGS_arm64 += set_memory_region_test TEST_GEN_PROGS_arm64 += steal_time -TEST_GEN_PROGS_arm64 += kvm_binary_stats_test -TEST_GEN_PROGS_s390 = s390/memop +TEST_GEN_PROGS_s390 = $(TEST_GEN_PROGS_COMMON) +TEST_GEN_PROGS_s390 += s390/memop TEST_GEN_PROGS_s390 += s390/resets TEST_GEN_PROGS_s390 += s390/sync_regs_test TEST_GEN_PROGS_s390 += s390/tprot @@ -182,27 +180,14 @@ TEST_GEN_PROGS_s390 += s390/debug_test TEST_GEN_PROGS_s390 += s390/cpumodel_subfuncs_test TEST_GEN_PROGS_s390 += s390/shared_zeropage_test TEST_GEN_PROGS_s390 += s390/ucontrol_test -TEST_GEN_PROGS_s390 += demand_paging_test -TEST_GEN_PROGS_s390 += dirty_log_test -TEST_GEN_PROGS_s390 += guest_print_test -TEST_GEN_PROGS_s390 += kvm_create_max_vcpus -TEST_GEN_PROGS_s390 += kvm_page_table_test TEST_GEN_PROGS_s390 += rseq_test -TEST_GEN_PROGS_s390 += set_memory_region_test -TEST_GEN_PROGS_s390 += kvm_binary_stats_test +TEST_GEN_PROGS_riscv = $(TEST_GEN_PROGS_COMMON) TEST_GEN_PROGS_riscv += riscv/sbi_pmu_test TEST_GEN_PROGS_riscv += riscv/ebreak_test TEST_GEN_PROGS_riscv += arch_timer TEST_GEN_PROGS_riscv += coalesced_io_test -TEST_GEN_PROGS_riscv += demand_paging_test -TEST_GEN_PROGS_riscv += dirty_log_test TEST_GEN_PROGS_riscv += get-reg-list -TEST_GEN_PROGS_riscv += guest_print_test -TEST_GEN_PROGS_riscv += kvm_binary_stats_test -TEST_GEN_PROGS_riscv += kvm_create_max_vcpus -TEST_GEN_PROGS_riscv += kvm_page_table_test -TEST_GEN_PROGS_riscv += set_memory_region_test TEST_GEN_PROGS_riscv += steal_time SPLIT_TESTS += arch_timer diff --git a/tools/testing/selftests/kvm/arm64/page_fault_test.c b/tools/testing/selftests/kvm/arm64/page_fault_test.c index ec33a8f9c908..dc6559dad9d8 100644 --- a/tools/testing/selftests/kvm/arm64/page_fault_test.c +++ b/tools/testing/selftests/kvm/arm64/page_fault_test.c @@ -199,7 +199,7 @@ static bool guest_set_ha(void) if (hadbs == 0) return false; - tcr = read_sysreg(tcr_el1) | TCR_EL1_HA; + tcr = read_sysreg(tcr_el1) | TCR_HA; write_sysreg(tcr, tcr_el1); isb(); diff --git a/tools/testing/selftests/kvm/arm64/set_id_regs.c b/tools/testing/selftests/kvm/arm64/set_id_regs.c index 322b9d3b0125..57708de2075d 100644 --- a/tools/testing/selftests/kvm/arm64/set_id_regs.c +++ b/tools/testing/selftests/kvm/arm64/set_id_regs.c @@ -129,10 +129,10 @@ static const struct reg_ftr_bits ftr_id_aa64pfr0_el1[] = { REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR0_EL1, DIT, 0), REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR0_EL1, SEL2, 0), REG_FTR_BITS(FTR_EXACT, ID_AA64PFR0_EL1, GIC, 0), - REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR0_EL1, EL3, 0), - REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR0_EL1, EL2, 0), - REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR0_EL1, EL1, 0), - REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR0_EL1, EL0, 0), + REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR0_EL1, EL3, 1), + REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR0_EL1, EL2, 1), + REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR0_EL1, EL1, 1), + REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR0_EL1, EL0, 1), REG_FTR_END, }; diff --git a/tools/testing/selftests/kvm/include/arm64/processor.h b/tools/testing/selftests/kvm/include/arm64/processor.h index 1e8d0d531fbd..b0fc0f945766 100644 --- a/tools/testing/selftests/kvm/include/arm64/processor.h +++ b/tools/testing/selftests/kvm/include/arm64/processor.h @@ -62,6 +62,67 @@ MAIR_ATTRIDX(MAIR_ATTR_NORMAL, MT_NORMAL) | \ MAIR_ATTRIDX(MAIR_ATTR_NORMAL_WT, MT_NORMAL_WT)) +/* TCR_EL1 specific flags */ +#define TCR_T0SZ_OFFSET 0 +#define TCR_T0SZ(x) ((UL(64) - (x)) << TCR_T0SZ_OFFSET) + +#define TCR_IRGN0_SHIFT 8 +#define TCR_IRGN0_MASK (UL(3) << TCR_IRGN0_SHIFT) +#define TCR_IRGN0_NC (UL(0) << TCR_IRGN0_SHIFT) +#define TCR_IRGN0_WBWA (UL(1) << TCR_IRGN0_SHIFT) +#define TCR_IRGN0_WT (UL(2) << TCR_IRGN0_SHIFT) +#define TCR_IRGN0_WBnWA (UL(3) << TCR_IRGN0_SHIFT) + +#define TCR_ORGN0_SHIFT 10 +#define TCR_ORGN0_MASK (UL(3) << TCR_ORGN0_SHIFT) +#define TCR_ORGN0_NC (UL(0) << TCR_ORGN0_SHIFT) +#define TCR_ORGN0_WBWA (UL(1) << TCR_ORGN0_SHIFT) +#define TCR_ORGN0_WT (UL(2) << TCR_ORGN0_SHIFT) +#define TCR_ORGN0_WBnWA (UL(3) << TCR_ORGN0_SHIFT) + +#define TCR_SH0_SHIFT 12 +#define TCR_SH0_MASK (UL(3) << TCR_SH0_SHIFT) +#define TCR_SH0_INNER (UL(3) << TCR_SH0_SHIFT) + +#define TCR_TG0_SHIFT 14 +#define TCR_TG0_MASK (UL(3) << TCR_TG0_SHIFT) +#define TCR_TG0_4K (UL(0) << TCR_TG0_SHIFT) +#define TCR_TG0_64K (UL(1) << TCR_TG0_SHIFT) +#define TCR_TG0_16K (UL(2) << TCR_TG0_SHIFT) + +#define TCR_IPS_SHIFT 32 +#define TCR_IPS_MASK (UL(7) << TCR_IPS_SHIFT) +#define TCR_IPS_52_BITS (UL(6) << TCR_IPS_SHIFT) +#define TCR_IPS_48_BITS (UL(5) << TCR_IPS_SHIFT) +#define TCR_IPS_40_BITS (UL(2) << TCR_IPS_SHIFT) +#define TCR_IPS_36_BITS (UL(1) << TCR_IPS_SHIFT) + +#define TCR_HA (UL(1) << 39) +#define TCR_DS (UL(1) << 59) + +/* + * AttrIndx[2:0] encoding (mapping attributes defined in the MAIR* registers). + */ +#define PTE_ATTRINDX(t) ((t) << 2) +#define PTE_ATTRINDX_MASK GENMASK(4, 2) +#define PTE_ATTRINDX_SHIFT 2 + +#define PTE_VALID BIT(0) +#define PGD_TYPE_TABLE BIT(1) +#define PUD_TYPE_TABLE BIT(1) +#define PMD_TYPE_TABLE BIT(1) +#define PTE_TYPE_PAGE BIT(1) + +#define PTE_SHARED (UL(3) << 8) /* SH[1:0], inner shareable */ +#define PTE_AF BIT(10) + +#define PTE_ADDR_MASK(page_shift) GENMASK(47, (page_shift)) +#define PTE_ADDR_51_48 GENMASK(15, 12) +#define PTE_ADDR_51_48_SHIFT 12 +#define PTE_ADDR_MASK_LPA2(page_shift) GENMASK(49, (page_shift)) +#define PTE_ADDR_51_50_LPA2 GENMASK(9, 8) +#define PTE_ADDR_51_50_LPA2_SHIFT 8 + void aarch64_vcpu_setup(struct kvm_vcpu *vcpu, struct kvm_vcpu_init *init); struct kvm_vcpu *aarch64_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id, struct kvm_vcpu_init *init, void *guest_code); @@ -102,12 +163,6 @@ enum { (v) == VECTOR_SYNC_LOWER_64 || \ (v) == VECTOR_SYNC_LOWER_32) -/* Access flag */ -#define PTE_AF (1ULL << 10) - -/* Access flag update enable/disable */ -#define TCR_EL1_HA (1ULL << 39) - void aarch64_get_supported_page_sizes(uint32_t ipa, uint32_t *ipa4k, uint32_t *ipa16k, uint32_t *ipa64k); diff --git a/tools/testing/selftests/kvm/lib/arm64/processor.c b/tools/testing/selftests/kvm/lib/arm64/processor.c index 7ba3aa3755f3..9d69904cb608 100644 --- a/tools/testing/selftests/kvm/lib/arm64/processor.c +++ b/tools/testing/selftests/kvm/lib/arm64/processor.c @@ -72,13 +72,13 @@ static uint64_t addr_pte(struct kvm_vm *vm, uint64_t pa, uint64_t attrs) uint64_t pte; if (use_lpa2_pte_format(vm)) { - pte = pa & GENMASK(49, vm->page_shift); - pte |= FIELD_GET(GENMASK(51, 50), pa) << 8; - attrs &= ~GENMASK(9, 8); + pte = pa & PTE_ADDR_MASK_LPA2(vm->page_shift); + pte |= FIELD_GET(GENMASK(51, 50), pa) << PTE_ADDR_51_50_LPA2_SHIFT; + attrs &= ~PTE_ADDR_51_50_LPA2; } else { - pte = pa & GENMASK(47, vm->page_shift); + pte = pa & PTE_ADDR_MASK(vm->page_shift); if (vm->page_shift == 16) - pte |= FIELD_GET(GENMASK(51, 48), pa) << 12; + pte |= FIELD_GET(GENMASK(51, 48), pa) << PTE_ADDR_51_48_SHIFT; } pte |= attrs; @@ -90,12 +90,12 @@ static uint64_t pte_addr(struct kvm_vm *vm, uint64_t pte) uint64_t pa; if (use_lpa2_pte_format(vm)) { - pa = pte & GENMASK(49, vm->page_shift); - pa |= FIELD_GET(GENMASK(9, 8), pte) << 50; + pa = pte & PTE_ADDR_MASK_LPA2(vm->page_shift); + pa |= FIELD_GET(PTE_ADDR_51_50_LPA2, pte) << 50; } else { - pa = pte & GENMASK(47, vm->page_shift); + pa = pte & PTE_ADDR_MASK(vm->page_shift); if (vm->page_shift == 16) - pa |= FIELD_GET(GENMASK(15, 12), pte) << 48; + pa |= FIELD_GET(PTE_ADDR_51_48, pte) << 48; } return pa; @@ -128,7 +128,8 @@ void virt_arch_pgd_alloc(struct kvm_vm *vm) static void _virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr, uint64_t flags) { - uint8_t attr_idx = flags & 7; + uint8_t attr_idx = flags & (PTE_ATTRINDX_MASK >> PTE_ATTRINDX_SHIFT); + uint64_t pg_attr; uint64_t *ptep; TEST_ASSERT((vaddr % vm->page_size) == 0, @@ -147,18 +148,21 @@ static void _virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr, ptep = addr_gpa2hva(vm, vm->pgd) + pgd_index(vm, vaddr) * 8; if (!*ptep) - *ptep = addr_pte(vm, vm_alloc_page_table(vm), 3); + *ptep = addr_pte(vm, vm_alloc_page_table(vm), + PGD_TYPE_TABLE | PTE_VALID); switch (vm->pgtable_levels) { case 4: ptep = addr_gpa2hva(vm, pte_addr(vm, *ptep)) + pud_index(vm, vaddr) * 8; if (!*ptep) - *ptep = addr_pte(vm, vm_alloc_page_table(vm), 3); + *ptep = addr_pte(vm, vm_alloc_page_table(vm), + PUD_TYPE_TABLE | PTE_VALID); /* fall through */ case 3: ptep = addr_gpa2hva(vm, pte_addr(vm, *ptep)) + pmd_index(vm, vaddr) * 8; if (!*ptep) - *ptep = addr_pte(vm, vm_alloc_page_table(vm), 3); + *ptep = addr_pte(vm, vm_alloc_page_table(vm), + PMD_TYPE_TABLE | PTE_VALID); /* fall through */ case 2: ptep = addr_gpa2hva(vm, pte_addr(vm, *ptep)) + pte_index(vm, vaddr) * 8; @@ -167,7 +171,11 @@ static void _virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr, TEST_FAIL("Page table levels must be 2, 3, or 4"); } - *ptep = addr_pte(vm, paddr, (attr_idx << 2) | (1 << 10) | 3); /* AF */ + pg_attr = PTE_AF | PTE_ATTRINDX(attr_idx) | PTE_TYPE_PAGE | PTE_VALID; + if (!use_lpa2_pte_format(vm)) + pg_attr |= PTE_SHARED; + + *ptep = addr_pte(vm, paddr, pg_attr); } void virt_arch_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr) @@ -293,20 +301,20 @@ void aarch64_vcpu_setup(struct kvm_vcpu *vcpu, struct kvm_vcpu_init *init) case VM_MODE_P48V48_64K: case VM_MODE_P40V48_64K: case VM_MODE_P36V48_64K: - tcr_el1 |= 1ul << 14; /* TG0 = 64KB */ + tcr_el1 |= TCR_TG0_64K; break; case VM_MODE_P52V48_16K: case VM_MODE_P48V48_16K: case VM_MODE_P40V48_16K: case VM_MODE_P36V48_16K: case VM_MODE_P36V47_16K: - tcr_el1 |= 2ul << 14; /* TG0 = 16KB */ + tcr_el1 |= TCR_TG0_16K; break; case VM_MODE_P52V48_4K: case VM_MODE_P48V48_4K: case VM_MODE_P40V48_4K: case VM_MODE_P36V48_4K: - tcr_el1 |= 0ul << 14; /* TG0 = 4KB */ + tcr_el1 |= TCR_TG0_4K; break; default: TEST_FAIL("Unknown guest mode, mode: 0x%x", vm->mode); @@ -319,35 +327,35 @@ void aarch64_vcpu_setup(struct kvm_vcpu *vcpu, struct kvm_vcpu_init *init) case VM_MODE_P52V48_4K: case VM_MODE_P52V48_16K: case VM_MODE_P52V48_64K: - tcr_el1 |= 6ul << 32; /* IPS = 52 bits */ + tcr_el1 |= TCR_IPS_52_BITS; ttbr0_el1 |= FIELD_GET(GENMASK(51, 48), vm->pgd) << 2; break; case VM_MODE_P48V48_4K: case VM_MODE_P48V48_16K: case VM_MODE_P48V48_64K: - tcr_el1 |= 5ul << 32; /* IPS = 48 bits */ + tcr_el1 |= TCR_IPS_48_BITS; break; case VM_MODE_P40V48_4K: case VM_MODE_P40V48_16K: case VM_MODE_P40V48_64K: - tcr_el1 |= 2ul << 32; /* IPS = 40 bits */ + tcr_el1 |= TCR_IPS_40_BITS; break; case VM_MODE_P36V48_4K: case VM_MODE_P36V48_16K: case VM_MODE_P36V48_64K: case VM_MODE_P36V47_16K: - tcr_el1 |= 1ul << 32; /* IPS = 36 bits */ + tcr_el1 |= TCR_IPS_36_BITS; break; default: TEST_FAIL("Unknown guest mode, mode: 0x%x", vm->mode); } - sctlr_el1 |= (1 << 0) | (1 << 2) | (1 << 12) /* M | C | I */; - /* TCR_EL1 |= IRGN0:WBWA | ORGN0:WBWA | SH0:Inner-Shareable */; - tcr_el1 |= (1 << 8) | (1 << 10) | (3 << 12); - tcr_el1 |= (64 - vm->va_bits) /* T0SZ */; + sctlr_el1 |= SCTLR_ELx_M | SCTLR_ELx_C | SCTLR_ELx_I; + + tcr_el1 |= TCR_IRGN0_WBWA | TCR_ORGN0_WBWA | TCR_SH0_INNER; + tcr_el1 |= TCR_T0SZ(vm->va_bits); if (use_lpa2_pte_format(vm)) - tcr_el1 |= (1ul << 59) /* DS */; + tcr_el1 |= TCR_DS; vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_SCTLR_EL1), sctlr_el1); vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_TCR_EL1), tcr_el1); diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c index 279ad8946040..815bc45dd8dc 100644 --- a/tools/testing/selftests/kvm/lib/kvm_util.c +++ b/tools/testing/selftests/kvm/lib/kvm_util.c @@ -2019,9 +2019,8 @@ static struct exit_reason { KVM_EXIT_STRING(RISCV_SBI), KVM_EXIT_STRING(RISCV_CSR), KVM_EXIT_STRING(NOTIFY), -#ifdef KVM_EXIT_MEMORY_NOT_PRESENT - KVM_EXIT_STRING(MEMORY_NOT_PRESENT), -#endif + KVM_EXIT_STRING(LOONGARCH_IOCSR), + KVM_EXIT_STRING(MEMORY_FAULT), }; /* diff --git a/tools/testing/selftests/kvm/rseq_test.c b/tools/testing/selftests/kvm/rseq_test.c index e5898678bfab..1375fca80bcd 100644 --- a/tools/testing/selftests/kvm/rseq_test.c +++ b/tools/testing/selftests/kvm/rseq_test.c @@ -196,25 +196,27 @@ static void calc_min_max_cpu(void) static void help(const char *name) { puts(""); - printf("usage: %s [-h] [-u]\n", name); + printf("usage: %s [-h] [-u] [-l latency]\n", name); printf(" -u: Don't sanity check the number of successful KVM_RUNs\n"); + printf(" -l: Set /dev/cpu_dma_latency to suppress deep sleep states\n"); puts(""); exit(0); } int main(int argc, char *argv[]) { + int r, i, snapshot, opt, fd = -1, latency = -1; bool skip_sanity_check = false; - int r, i, snapshot; struct kvm_vm *vm; struct kvm_vcpu *vcpu; u32 cpu, rseq_cpu; - int opt; - while ((opt = getopt(argc, argv, "hu")) != -1) { + while ((opt = getopt(argc, argv, "hl:u")) != -1) { switch (opt) { case 'u': skip_sanity_check = true; + case 'l': + latency = atoi_paranoid(optarg); break; case 'h': default: @@ -243,6 +245,20 @@ int main(int argc, char *argv[]) pthread_create(&migration_thread, NULL, migration_worker, (void *)(unsigned long)syscall(SYS_gettid)); + if (latency >= 0) { + /* + * Writes to cpu_dma_latency persist only while the file is + * open, i.e. it allows userspace to provide guaranteed latency + * while running a workload. Keep the file open until the test + * completes, otherwise writing cpu_dma_latency is meaningless. + */ + fd = open("/dev/cpu_dma_latency", O_RDWR); + TEST_ASSERT(fd >= 0, __KVM_SYSCALL_ERROR("open() /dev/cpu_dma_latency", fd)); + + r = write(fd, &latency, 4); + TEST_ASSERT(r >= 1, "Error setting /dev/cpu_dma_latency"); + } + for (i = 0; !done; i++) { vcpu_run(vcpu); TEST_ASSERT(get_ucall(vcpu, NULL) == UCALL_SYNC, @@ -278,6 +294,9 @@ int main(int argc, char *argv[]) "rseq CPU = %d, sched CPU = %d", rseq_cpu, cpu); } + if (fd > 0) + close(fd); + /* * Sanity check that the test was able to enter the guest a reasonable * number of times, e.g. didn't get stalled too often/long waiting for @@ -293,8 +312,8 @@ int main(int argc, char *argv[]) TEST_ASSERT(skip_sanity_check || i > (NR_TASK_MIGRATIONS / 2), "Only performed %d KVM_RUNs, task stalled too much?\n\n" " Try disabling deep sleep states to reduce CPU wakeup latency,\n" - " e.g. via cpuidle.off=1 or setting /dev/cpu_dma_latency to '0',\n" - " or run with -u to disable this sanity check.", i); + " e.g. via cpuidle.off=1 or via -l <latency>, or run with -u to\n" + " disable this sanity check.", i); pthread_join(migration_thread, NULL); diff --git a/tools/testing/selftests/kvm/x86/monitor_mwait_test.c b/tools/testing/selftests/kvm/x86/monitor_mwait_test.c index 2b550eff35f1..390ae2d87493 100644 --- a/tools/testing/selftests/kvm/x86/monitor_mwait_test.c +++ b/tools/testing/selftests/kvm/x86/monitor_mwait_test.c @@ -7,6 +7,7 @@ #include "kvm_util.h" #include "processor.h" +#include "kselftest.h" #define CPUID_MWAIT (1u << 3) @@ -14,6 +15,8 @@ enum monitor_mwait_testcases { MWAIT_QUIRK_DISABLED = BIT(0), MISC_ENABLES_QUIRK_DISABLED = BIT(1), MWAIT_DISABLED = BIT(2), + CPUID_DISABLED = BIT(3), + TEST_MAX = CPUID_DISABLED * 2 - 1, }; /* @@ -35,11 +38,19 @@ do { \ testcase, vector); \ } while (0) -static void guest_monitor_wait(int testcase) +static void guest_monitor_wait(void *arg) { + int testcase = (int) (long) arg; u8 vector; - GUEST_SYNC(testcase); + u64 val = rdmsr(MSR_IA32_MISC_ENABLE) & ~MSR_IA32_MISC_ENABLE_MWAIT; + if (!(testcase & MWAIT_DISABLED)) + val |= MSR_IA32_MISC_ENABLE_MWAIT; + wrmsr(MSR_IA32_MISC_ENABLE, val); + + __GUEST_ASSERT(this_cpu_has(X86_FEATURE_MWAIT) == !(testcase & MWAIT_DISABLED), + "Expected CPUID.MWAIT %s\n", + (testcase & MWAIT_DISABLED) ? "cleared" : "set"); /* * Arbitrarily MONITOR this function, SVM performs fault checks before @@ -50,19 +61,6 @@ static void guest_monitor_wait(int testcase) vector = kvm_asm_safe("mwait", "a"(guest_monitor_wait), "c"(0), "d"(0)); GUEST_ASSERT_MONITOR_MWAIT("MWAIT", testcase, vector); -} - -static void guest_code(void) -{ - guest_monitor_wait(MWAIT_DISABLED); - - guest_monitor_wait(MWAIT_QUIRK_DISABLED | MWAIT_DISABLED); - - guest_monitor_wait(MISC_ENABLES_QUIRK_DISABLED | MWAIT_DISABLED); - guest_monitor_wait(MISC_ENABLES_QUIRK_DISABLED); - - guest_monitor_wait(MISC_ENABLES_QUIRK_DISABLED | MWAIT_QUIRK_DISABLED | MWAIT_DISABLED); - guest_monitor_wait(MISC_ENABLES_QUIRK_DISABLED | MWAIT_QUIRK_DISABLED); GUEST_DONE(); } @@ -74,56 +72,64 @@ int main(int argc, char *argv[]) struct kvm_vm *vm; struct ucall uc; int testcase; + char test[80]; - TEST_REQUIRE(this_cpu_has(X86_FEATURE_MWAIT)); TEST_REQUIRE(kvm_has_cap(KVM_CAP_DISABLE_QUIRKS2)); - vm = vm_create_with_one_vcpu(&vcpu, guest_code); - vcpu_clear_cpuid_feature(vcpu, X86_FEATURE_MWAIT); + ksft_print_header(); + ksft_set_plan(12); + for (testcase = 0; testcase <= TEST_MAX; testcase++) { + vm = vm_create_with_one_vcpu(&vcpu, guest_monitor_wait); + vcpu_args_set(vcpu, 1, (void *)(long)testcase); + + disabled_quirks = 0; + if (testcase & MWAIT_QUIRK_DISABLED) { + disabled_quirks |= KVM_X86_QUIRK_MWAIT_NEVER_UD_FAULTS; + strcpy(test, "MWAIT can fault"); + } else { + strcpy(test, "MWAIT never faults"); + } + if (testcase & MISC_ENABLES_QUIRK_DISABLED) { + disabled_quirks |= KVM_X86_QUIRK_MISC_ENABLE_NO_MWAIT; + strcat(test, ", MISC_ENABLE updates CPUID"); + } else { + strcat(test, ", no CPUID updates"); + } + + vm_enable_cap(vm, KVM_CAP_DISABLE_QUIRKS2, disabled_quirks); + + if (!(testcase & MISC_ENABLES_QUIRK_DISABLED) && + (!!(testcase & CPUID_DISABLED) ^ !!(testcase & MWAIT_DISABLED))) + continue; + + if (testcase & CPUID_DISABLED) { + strcat(test, ", CPUID clear"); + vcpu_clear_cpuid_feature(vcpu, X86_FEATURE_MWAIT); + } else { + strcat(test, ", CPUID set"); + vcpu_set_cpuid_feature(vcpu, X86_FEATURE_MWAIT); + } + + if (testcase & MWAIT_DISABLED) + strcat(test, ", MWAIT disabled"); - while (1) { vcpu_run(vcpu); TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO); switch (get_ucall(vcpu, &uc)) { - case UCALL_SYNC: - testcase = uc.args[1]; - break; case UCALL_ABORT: - REPORT_GUEST_ASSERT(uc); - goto done; + /* Detected in vcpu_run */ + break; case UCALL_DONE: - goto done; + ksft_test_result_pass("%s\n", test); + break; default: TEST_FAIL("Unknown ucall %lu", uc.cmd); - goto done; - } - - disabled_quirks = 0; - if (testcase & MWAIT_QUIRK_DISABLED) - disabled_quirks |= KVM_X86_QUIRK_MWAIT_NEVER_UD_FAULTS; - if (testcase & MISC_ENABLES_QUIRK_DISABLED) - disabled_quirks |= KVM_X86_QUIRK_MISC_ENABLE_NO_MWAIT; - vm_enable_cap(vm, KVM_CAP_DISABLE_QUIRKS2, disabled_quirks); - - /* - * If the MISC_ENABLES quirk (KVM neglects to update CPUID to - * enable/disable MWAIT) is disabled, toggle the ENABLE_MWAIT - * bit in MISC_ENABLES accordingly. If the quirk is enabled, - * the only valid configuration is MWAIT disabled, as CPUID - * can't be manually changed after running the vCPU. - */ - if (!(testcase & MISC_ENABLES_QUIRK_DISABLED)) { - TEST_ASSERT(testcase & MWAIT_DISABLED, - "Can't toggle CPUID features after running vCPU"); - continue; + break; } - - vcpu_set_msr(vcpu, MSR_IA32_MISC_ENABLE, - (testcase & MWAIT_DISABLED) ? 0 : MSR_IA32_MISC_ENABLE_MWAIT); + kvm_vm_free(vm); } + ksft_finished(); -done: - kvm_vm_free(vm); return 0; } diff --git a/tools/testing/selftests/landlock/audit.h b/tools/testing/selftests/landlock/audit.h index b9054086a0c9..18a6014920b5 100644 --- a/tools/testing/selftests/landlock/audit.h +++ b/tools/testing/selftests/landlock/audit.h @@ -300,15 +300,22 @@ out: return err; } -static int __maybe_unused matches_log_domain_allocated(int audit_fd, +static int __maybe_unused matches_log_domain_allocated(int audit_fd, pid_t pid, __u64 *domain_id) { - return audit_match_record( - audit_fd, AUDIT_LANDLOCK_DOMAIN, - REGEX_LANDLOCK_PREFIX - " status=allocated mode=enforcing pid=[0-9]\\+ uid=[0-9]\\+" - " exe=\"[^\"]\\+\" comm=\".*_test\"$", - domain_id); + static const char log_template[] = REGEX_LANDLOCK_PREFIX + " status=allocated mode=enforcing pid=%d uid=[0-9]\\+" + " exe=\"[^\"]\\+\" comm=\".*_test\"$"; + char log_match[sizeof(log_template) + 10]; + int log_match_len; + + log_match_len = + snprintf(log_match, sizeof(log_match), log_template, pid); + if (log_match_len > sizeof(log_match)) + return -E2BIG; + + return audit_match_record(audit_fd, AUDIT_LANDLOCK_DOMAIN, log_match, + domain_id); } static int __maybe_unused matches_log_domain_deallocated( diff --git a/tools/testing/selftests/landlock/audit_test.c b/tools/testing/selftests/landlock/audit_test.c index a0643070c403..cfc571afd0eb 100644 --- a/tools/testing/selftests/landlock/audit_test.c +++ b/tools/testing/selftests/landlock/audit_test.c @@ -9,6 +9,7 @@ #include <errno.h> #include <limits.h> #include <linux/landlock.h> +#include <pthread.h> #include <stdlib.h> #include <sys/mount.h> #include <sys/prctl.h> @@ -40,7 +41,6 @@ FIXTURE(audit) { struct audit_filter audit_filter; int audit_fd; - __u64(*domain_stack)[16]; }; FIXTURE_SETUP(audit) @@ -60,18 +60,10 @@ FIXTURE_SETUP(audit) TH_LOG("Failed to initialize audit: %s", error_msg); } clear_cap(_metadata, CAP_AUDIT_CONTROL); - - self->domain_stack = mmap(NULL, sizeof(*self->domain_stack), - PROT_READ | PROT_WRITE, - MAP_SHARED | MAP_ANONYMOUS, -1, 0); - ASSERT_NE(MAP_FAILED, self->domain_stack); - memset(self->domain_stack, 0, sizeof(*self->domain_stack)); } FIXTURE_TEARDOWN(audit) { - EXPECT_EQ(0, munmap(self->domain_stack, sizeof(*self->domain_stack))); - set_cap(_metadata, CAP_AUDIT_CONTROL); EXPECT_EQ(0, audit_cleanup(self->audit_fd, &self->audit_filter)); clear_cap(_metadata, CAP_AUDIT_CONTROL); @@ -83,9 +75,15 @@ TEST_F(audit, layers) .scoped = LANDLOCK_SCOPE_SIGNAL, }; int status, ruleset_fd, i; + __u64(*domain_stack)[16]; __u64 prev_dom = 3; pid_t child; + domain_stack = mmap(NULL, sizeof(*domain_stack), PROT_READ | PROT_WRITE, + MAP_SHARED | MAP_ANONYMOUS, -1, 0); + ASSERT_NE(MAP_FAILED, domain_stack); + memset(domain_stack, 0, sizeof(*domain_stack)); + ruleset_fd = landlock_create_ruleset(&ruleset_attr, sizeof(ruleset_attr), 0); ASSERT_LE(0, ruleset_fd); @@ -94,7 +92,7 @@ TEST_F(audit, layers) child = fork(); ASSERT_LE(0, child); if (child == 0) { - for (i = 0; i < ARRAY_SIZE(*self->domain_stack); i++) { + for (i = 0; i < ARRAY_SIZE(*domain_stack); i++) { __u64 denial_dom = 1; __u64 allocated_dom = 2; @@ -107,7 +105,8 @@ TEST_F(audit, layers) matches_log_signal(_metadata, self->audit_fd, getppid(), &denial_dom)); EXPECT_EQ(0, matches_log_domain_allocated( - self->audit_fd, &allocated_dom)); + self->audit_fd, getpid(), + &allocated_dom)); EXPECT_NE(denial_dom, 1); EXPECT_NE(denial_dom, 0); EXPECT_EQ(denial_dom, allocated_dom); @@ -115,7 +114,7 @@ TEST_F(audit, layers) /* Checks that the new domain is younger than the previous one. */ EXPECT_GT(allocated_dom, prev_dom); prev_dom = allocated_dom; - (*self->domain_stack)[i] = allocated_dom; + (*domain_stack)[i] = allocated_dom; } /* Checks that we reached the maximum number of layers. */ @@ -142,23 +141,143 @@ TEST_F(audit, layers) /* Purges log from deallocated domains. */ EXPECT_EQ(0, setsockopt(self->audit_fd, SOL_SOCKET, SO_RCVTIMEO, &audit_tv_dom_drop, sizeof(audit_tv_dom_drop))); - for (i = ARRAY_SIZE(*self->domain_stack) - 1; i >= 0; i--) { + for (i = ARRAY_SIZE(*domain_stack) - 1; i >= 0; i--) { __u64 deallocated_dom = 2; EXPECT_EQ(0, matches_log_domain_deallocated(self->audit_fd, 1, &deallocated_dom)); - EXPECT_EQ((*self->domain_stack)[i], deallocated_dom) + EXPECT_EQ((*domain_stack)[i], deallocated_dom) { TH_LOG("Failed to match domain %llx (#%d)", - (*self->domain_stack)[i], i); + (*domain_stack)[i], i); } } + EXPECT_EQ(0, munmap(domain_stack, sizeof(*domain_stack))); EXPECT_EQ(0, setsockopt(self->audit_fd, SOL_SOCKET, SO_RCVTIMEO, &audit_tv_default, sizeof(audit_tv_default))); - EXPECT_EQ(0, close(ruleset_fd)); } +struct thread_data { + pid_t parent_pid; + int ruleset_fd, pipe_child, pipe_parent; +}; + +static void *thread_audit_test(void *arg) +{ + const struct thread_data *data = (struct thread_data *)arg; + uintptr_t err = 0; + char buffer; + + /* TGID and TID are different for a second thread. */ + if (getpid() == gettid()) { + err = 1; + goto out; + } + + if (landlock_restrict_self(data->ruleset_fd, 0)) { + err = 2; + goto out; + } + + if (close(data->ruleset_fd)) { + err = 3; + goto out; + } + + /* Creates a denial to get the domain ID. */ + if (kill(data->parent_pid, 0) != -1) { + err = 4; + goto out; + } + + if (EPERM != errno) { + err = 5; + goto out; + } + + /* Signals the parent to read denial logs. */ + if (write(data->pipe_child, ".", 1) != 1) { + err = 6; + goto out; + } + + /* Waits for the parent to update audit filters. */ + if (read(data->pipe_parent, &buffer, 1) != 1) { + err = 7; + goto out; + } + +out: + close(data->pipe_child); + close(data->pipe_parent); + return (void *)err; +} + +/* Checks that the PID tied to a domain is not a TID but the TGID. */ +TEST_F(audit, thread) +{ + const struct landlock_ruleset_attr ruleset_attr = { + .scoped = LANDLOCK_SCOPE_SIGNAL, + }; + __u64 denial_dom = 1; + __u64 allocated_dom = 2; + __u64 deallocated_dom = 3; + pthread_t thread; + int pipe_child[2], pipe_parent[2]; + char buffer; + struct thread_data child_data; + + child_data.parent_pid = getppid(); + ASSERT_EQ(0, pipe2(pipe_child, O_CLOEXEC)); + child_data.pipe_child = pipe_child[1]; + ASSERT_EQ(0, pipe2(pipe_parent, O_CLOEXEC)); + child_data.pipe_parent = pipe_parent[0]; + child_data.ruleset_fd = + landlock_create_ruleset(&ruleset_attr, sizeof(ruleset_attr), 0); + ASSERT_LE(0, child_data.ruleset_fd); + + /* TGID and TID are the same for the initial thread . */ + EXPECT_EQ(getpid(), gettid()); + EXPECT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)); + ASSERT_EQ(0, pthread_create(&thread, NULL, thread_audit_test, + &child_data)); + + /* Waits for the child to generate a denial. */ + ASSERT_EQ(1, read(pipe_child[0], &buffer, 1)); + EXPECT_EQ(0, close(pipe_child[0])); + + /* Matches the signal log to get the domain ID. */ + EXPECT_EQ(0, matches_log_signal(_metadata, self->audit_fd, + child_data.parent_pid, &denial_dom)); + EXPECT_NE(denial_dom, 1); + EXPECT_NE(denial_dom, 0); + + EXPECT_EQ(0, matches_log_domain_allocated(self->audit_fd, getpid(), + &allocated_dom)); + EXPECT_EQ(denial_dom, allocated_dom); + + /* Updates filter rules to match the drop record. */ + set_cap(_metadata, CAP_AUDIT_CONTROL); + EXPECT_EQ(0, audit_filter_drop(self->audit_fd, AUDIT_ADD_RULE)); + EXPECT_EQ(0, audit_filter_exe(self->audit_fd, &self->audit_filter, + AUDIT_DEL_RULE)); + clear_cap(_metadata, CAP_AUDIT_CONTROL); + + /* Signals the thread to exit, which will generate a domain deallocation. */ + ASSERT_EQ(1, write(pipe_parent[1], ".", 1)); + EXPECT_EQ(0, close(pipe_parent[1])); + ASSERT_EQ(0, pthread_join(thread, NULL)); + + EXPECT_EQ(0, setsockopt(self->audit_fd, SOL_SOCKET, SO_RCVTIMEO, + &audit_tv_dom_drop, sizeof(audit_tv_dom_drop))); + EXPECT_EQ(0, matches_log_domain_deallocated(self->audit_fd, 1, + &deallocated_dom)); + EXPECT_EQ(denial_dom, deallocated_dom); + EXPECT_EQ(0, setsockopt(self->audit_fd, SOL_SOCKET, SO_RCVTIMEO, + &audit_tv_default, sizeof(audit_tv_default))); +} + FIXTURE(audit_flags) { struct audit_filter audit_filter; @@ -273,7 +392,8 @@ TEST_F(audit_flags, signal) /* Checks domain information records. */ EXPECT_EQ(0, matches_log_domain_allocated( - self->audit_fd, &allocated_dom)); + self->audit_fd, getpid(), + &allocated_dom)); EXPECT_NE(*self->domain_id, 1); EXPECT_NE(*self->domain_id, 0); EXPECT_EQ(*self->domain_id, allocated_dom); diff --git a/tools/testing/selftests/landlock/fs_test.c b/tools/testing/selftests/landlock/fs_test.c index f819011a8798..73729382d40f 100644 --- a/tools/testing/selftests/landlock/fs_test.c +++ b/tools/testing/selftests/landlock/fs_test.c @@ -5964,7 +5964,8 @@ TEST_F(audit_layout1, refer_handled) EXPECT_EQ(EXDEV, errno); EXPECT_EQ(0, matches_log_fs(_metadata, self->audit_fd, "fs\\.refer", dir_s1d1)); - EXPECT_EQ(0, matches_log_domain_allocated(self->audit_fd, NULL)); + EXPECT_EQ(0, + matches_log_domain_allocated(self->audit_fd, getpid(), NULL)); EXPECT_EQ(0, matches_log_fs(_metadata, self->audit_fd, "fs\\.refer", dir_s1d3)); diff --git a/tools/testing/selftests/lib/config b/tools/testing/selftests/lib/config index 81a1f64a22e8..377b3699ff31 100644 --- a/tools/testing/selftests/lib/config +++ b/tools/testing/selftests/lib/config @@ -1,2 +1,3 @@ CONFIG_TEST_BITMAP=m +CONFIG_PRIME_NUMBERS=m CONFIG_TEST_BITOPS=m diff --git a/tools/testing/selftests/mincore/mincore_selftest.c b/tools/testing/selftests/mincore/mincore_selftest.c index e949a43a6145..17ed3e9917ca 100644 --- a/tools/testing/selftests/mincore/mincore_selftest.c +++ b/tools/testing/selftests/mincore/mincore_selftest.c @@ -261,9 +261,6 @@ TEST(check_file_mmap) TH_LOG("No read-ahead pages found in memory"); } - EXPECT_LT(i, vec_size) { - TH_LOG("Read-ahead pages reached the end of the file"); - } /* * End of the readahead window. The rest of the pages shouldn't * be in memory. @@ -286,8 +283,7 @@ out_free: /* * Test mincore() behavior on a page backed by a tmpfs file. This test - * performs the same steps as the previous one. However, we don't expect - * any readahead in this case. + * performs the same steps as the previous one. */ TEST(check_tmpfs_mmap) { @@ -298,7 +294,6 @@ TEST(check_tmpfs_mmap) int page_size; int fd; int i; - int ra_pages = 0; page_size = sysconf(_SC_PAGESIZE); vec_size = FILE_SIZE / page_size; @@ -341,8 +336,7 @@ TEST(check_tmpfs_mmap) } /* - * Touch a page in the middle of the mapping. We expect only - * that page to be fetched into memory. + * Touch a page in the middle of the mapping. */ addr[FILE_SIZE / 2] = 1; retval = mincore(addr, FILE_SIZE, vec); @@ -351,15 +345,6 @@ TEST(check_tmpfs_mmap) TH_LOG("Page not found in memory after use"); } - i = FILE_SIZE / 2 / page_size + 1; - while (i < vec_size && vec[i]) { - ra_pages++; - i++; - } - ASSERT_EQ(ra_pages, 0) { - TH_LOG("Read-ahead pages found in memory"); - } - munmap(addr, FILE_SIZE); close(fd); free(vec); diff --git a/tools/testing/selftests/mm/charge_reserved_hugetlb.sh b/tools/testing/selftests/mm/charge_reserved_hugetlb.sh index 67df7b47087f..e1fe16bcbbe8 100755 --- a/tools/testing/selftests/mm/charge_reserved_hugetlb.sh +++ b/tools/testing/selftests/mm/charge_reserved_hugetlb.sh @@ -29,7 +29,7 @@ fi if [[ $cgroup2 ]]; then cgroup_path=$(mount -t cgroup2 | head -1 | awk '{print $3}') if [[ -z "$cgroup_path" ]]; then - cgroup_path=/dev/cgroup/memory + cgroup_path=$(mktemp -d) mount -t cgroup2 none $cgroup_path do_umount=1 fi @@ -37,7 +37,7 @@ if [[ $cgroup2 ]]; then else cgroup_path=$(mount -t cgroup | grep ",hugetlb" | awk '{print $3}') if [[ -z "$cgroup_path" ]]; then - cgroup_path=/dev/cgroup/memory + cgroup_path=$(mktemp -d) mount -t cgroup memory,hugetlb $cgroup_path do_umount=1 fi diff --git a/tools/testing/selftests/mm/compaction_test.c b/tools/testing/selftests/mm/compaction_test.c index 2c3a0eb6b22d..9bc4591c7b16 100644 --- a/tools/testing/selftests/mm/compaction_test.c +++ b/tools/testing/selftests/mm/compaction_test.c @@ -90,6 +90,8 @@ int check_compaction(unsigned long mem_free, unsigned long hugepage_size, int compaction_index = 0; char nr_hugepages[20] = {0}; char init_nr_hugepages[24] = {0}; + char target_nr_hugepages[24] = {0}; + int slen; snprintf(init_nr_hugepages, sizeof(init_nr_hugepages), "%lu", initial_nr_hugepages); @@ -106,11 +108,18 @@ int check_compaction(unsigned long mem_free, unsigned long hugepage_size, goto out; } - /* Request a large number of huge pages. The Kernel will allocate - as much as it can */ - if (write(fd, "100000", (6*sizeof(char))) != (6*sizeof(char))) { - ksft_print_msg("Failed to write 100000 to /proc/sys/vm/nr_hugepages: %s\n", - strerror(errno)); + /* + * Request huge pages for about half of the free memory. The Kernel + * will allocate as much as it can, and we expect it will get at least 1/3 + */ + nr_hugepages_ul = mem_free / hugepage_size / 2; + snprintf(target_nr_hugepages, sizeof(target_nr_hugepages), + "%lu", nr_hugepages_ul); + + slen = strlen(target_nr_hugepages); + if (write(fd, target_nr_hugepages, slen) != slen) { + ksft_print_msg("Failed to write %lu to /proc/sys/vm/nr_hugepages: %s\n", + nr_hugepages_ul, strerror(errno)); goto close_fd; } diff --git a/tools/testing/selftests/mm/cow.c b/tools/testing/selftests/mm/cow.c index f0cb14ea8608..b6cfe0a4b7df 100644 --- a/tools/testing/selftests/mm/cow.c +++ b/tools/testing/selftests/mm/cow.c @@ -293,7 +293,7 @@ static void do_test_vmsplice_in_parent(char *mem, size_t size, .iov_base = mem, .iov_len = size, }; - ssize_t cur, total, transferred; + ssize_t cur, total, transferred = 0; struct comm_pipes comm_pipes; char *old, *new; int ret, fds[2]; diff --git a/tools/testing/selftests/mm/guard-regions.c b/tools/testing/selftests/mm/guard-regions.c index b3d0e2771096..eba43ead13ae 100644 --- a/tools/testing/selftests/mm/guard-regions.c +++ b/tools/testing/selftests/mm/guard-regions.c @@ -271,12 +271,16 @@ FIXTURE_SETUP(guard_regions) self->page_size = (unsigned long)sysconf(_SC_PAGESIZE); setup_sighandler(); - if (variant->backing == ANON_BACKED) + switch (variant->backing) { + case ANON_BACKED: return; - - self->fd = open_file( - variant->backing == SHMEM_BACKED ? "/tmp/" : "", - self->path); + case LOCAL_FILE_BACKED: + self->fd = open_file("", self->path); + break; + case SHMEM_BACKED: + self->fd = memfd_create(self->path, 0); + break; + } /* We truncate file to at least 100 pages, tests can modify as needed. */ ASSERT_EQ(ftruncate(self->fd, 100 * self->page_size), 0); @@ -1696,7 +1700,7 @@ TEST_F(guard_regions, readonly_file) char *ptr; int i; - if (variant->backing == ANON_BACKED) + if (variant->backing != LOCAL_FILE_BACKED) SKIP(return, "Read-only test specific to file-backed"); /* Map shared so we can populate with pattern, populate it, unmap. */ diff --git a/tools/testing/selftests/mm/hugetlb_reparenting_test.sh b/tools/testing/selftests/mm/hugetlb_reparenting_test.sh index 11f9bbe7dc22..0b0d4ba1af27 100755 --- a/tools/testing/selftests/mm/hugetlb_reparenting_test.sh +++ b/tools/testing/selftests/mm/hugetlb_reparenting_test.sh @@ -23,7 +23,7 @@ fi if [[ $cgroup2 ]]; then CGROUP_ROOT=$(mount -t cgroup2 | head -1 | awk '{print $3}') if [[ -z "$CGROUP_ROOT" ]]; then - CGROUP_ROOT=/dev/cgroup/memory + CGROUP_ROOT=$(mktemp -d) mount -t cgroup2 none $CGROUP_ROOT do_umount=1 fi diff --git a/tools/testing/selftests/mm/pkey-powerpc.h b/tools/testing/selftests/mm/pkey-powerpc.h index 1bad310d282a..17bf2d1b0192 100644 --- a/tools/testing/selftests/mm/pkey-powerpc.h +++ b/tools/testing/selftests/mm/pkey-powerpc.h @@ -3,6 +3,8 @@ #ifndef _PKEYS_POWERPC_H #define _PKEYS_POWERPC_H +#include <sys/stat.h> + #ifndef SYS_pkey_alloc # define SYS_pkey_alloc 384 # define SYS_pkey_free 385 @@ -102,8 +104,18 @@ static inline void expect_fault_on_read_execonly_key(void *p1, int pkey) return; } +#define REPEAT_8(s) s s s s s s s s +#define REPEAT_64(s) REPEAT_8(s) REPEAT_8(s) REPEAT_8(s) REPEAT_8(s) \ + REPEAT_8(s) REPEAT_8(s) REPEAT_8(s) REPEAT_8(s) +#define REPEAT_512(s) REPEAT_64(s) REPEAT_64(s) REPEAT_64(s) REPEAT_64(s) \ + REPEAT_64(s) REPEAT_64(s) REPEAT_64(s) REPEAT_64(s) +#define REPEAT_4096(s) REPEAT_512(s) REPEAT_512(s) REPEAT_512(s) REPEAT_512(s) \ + REPEAT_512(s) REPEAT_512(s) REPEAT_512(s) REPEAT_512(s) +#define REPEAT_16384(s) REPEAT_4096(s) REPEAT_4096(s) \ + REPEAT_4096(s) REPEAT_4096(s) + /* 4-byte instructions * 16384 = 64K page */ -#define __page_o_noops() asm(".rept 16384 ; nop; .endr") +#define __page_o_noops() asm(REPEAT_16384("nop\n")) static inline void *malloc_pkey_with_mprotect_subpage(long size, int prot, u16 pkey) { diff --git a/tools/testing/selftests/mm/pkey_util.c b/tools/testing/selftests/mm/pkey_util.c index ca4ad0d44ab2..255b332f7a08 100644 --- a/tools/testing/selftests/mm/pkey_util.c +++ b/tools/testing/selftests/mm/pkey_util.c @@ -1,4 +1,5 @@ // SPDX-License-Identifier: GPL-2.0-only +#define __SANE_USERSPACE_TYPES__ #include <sys/syscall.h> #include <unistd.h> diff --git a/tools/testing/selftests/mount_setattr/Makefile b/tools/testing/selftests/mount_setattr/Makefile index 0c0d7b1234c1..4d4f810cdf2c 100644 --- a/tools/testing/selftests/mount_setattr/Makefile +++ b/tools/testing/selftests/mount_setattr/Makefile @@ -2,6 +2,8 @@ # Makefile for mount selftests. CFLAGS = -g $(KHDR_INCLUDES) -Wall -O2 -pthread +LOCAL_HDRS += ../filesystems/wrappers.h + TEST_GEN_PROGS := mount_setattr_test include ../lib.mk diff --git a/tools/testing/selftests/mount_setattr/mount_setattr_test.c b/tools/testing/selftests/mount_setattr/mount_setattr_test.c index 48a000cabc97..8b378c91debf 100644 --- a/tools/testing/selftests/mount_setattr/mount_setattr_test.c +++ b/tools/testing/selftests/mount_setattr/mount_setattr_test.c @@ -20,7 +20,7 @@ #include <stdarg.h> #include <linux/mount.h> -#include "../filesystems/overlayfs/wrappers.h" +#include "../filesystems/wrappers.h" #include "../kselftest_harness.h" #ifndef CLONE_NEWNS @@ -107,46 +107,6 @@ #endif #endif -#ifndef __NR_open_tree - #if defined __alpha__ - #define __NR_open_tree 538 - #elif defined _MIPS_SIM - #if _MIPS_SIM == _MIPS_SIM_ABI32 /* o32 */ - #define __NR_open_tree 4428 - #endif - #if _MIPS_SIM == _MIPS_SIM_NABI32 /* n32 */ - #define __NR_open_tree 6428 - #endif - #if _MIPS_SIM == _MIPS_SIM_ABI64 /* n64 */ - #define __NR_open_tree 5428 - #endif - #elif defined __ia64__ - #define __NR_open_tree (428 + 1024) - #else - #define __NR_open_tree 428 - #endif -#endif - -#ifndef __NR_move_mount - #if defined __alpha__ - #define __NR_move_mount 539 - #elif defined _MIPS_SIM - #if _MIPS_SIM == _MIPS_SIM_ABI32 /* o32 */ - #define __NR_move_mount 4429 - #endif - #if _MIPS_SIM == _MIPS_SIM_NABI32 /* n32 */ - #define __NR_move_mount 6429 - #endif - #if _MIPS_SIM == _MIPS_SIM_ABI64 /* n64 */ - #define __NR_move_mount 5429 - #endif - #elif defined __ia64__ - #define __NR_move_mount (428 + 1024) - #else - #define __NR_move_mount 429 - #endif -#endif - #ifndef MOUNT_ATTR_IDMAP #define MOUNT_ATTR_IDMAP 0x00100000 #endif @@ -161,23 +121,6 @@ static inline int sys_mount_setattr(int dfd, const char *path, unsigned int flag return syscall(__NR_mount_setattr, dfd, path, flags, attr, size); } -#ifndef OPEN_TREE_CLONE -#define OPEN_TREE_CLONE 1 -#endif - -#ifndef OPEN_TREE_CLOEXEC -#define OPEN_TREE_CLOEXEC O_CLOEXEC -#endif - -#ifndef AT_RECURSIVE -#define AT_RECURSIVE 0x8000 /* Apply to the entire subtree */ -#endif - -static inline int sys_open_tree(int dfd, const char *filename, unsigned int flags) -{ - return syscall(__NR_open_tree, dfd, filename, flags); -} - static ssize_t write_nointr(int fd, const void *buf, size_t count) { ssize_t ret; @@ -1076,7 +1019,7 @@ FIXTURE_SETUP(mount_setattr_idmapped) ASSERT_EQ(mkdir("/mnt/D", 0777), 0); img_fd = openat(-EBADF, "/mnt/C/ext4.img", O_CREAT | O_WRONLY, 0600); ASSERT_GE(img_fd, 0); - ASSERT_EQ(ftruncate(img_fd, 1024 * 2048), 0); + ASSERT_EQ(ftruncate(img_fd, 2147483648 /* 2 GB */), 0); ASSERT_EQ(system("mkfs.ext4 -q /mnt/C/ext4.img"), 0); ASSERT_EQ(system("mount -o loop -t ext4 /mnt/C/ext4.img /mnt/D/"), 0); ASSERT_EQ(close(img_fd), 0); diff --git a/tools/testing/selftests/net/.gitignore b/tools/testing/selftests/net/.gitignore index 679542f565a4..532bb732bc6d 100644 --- a/tools/testing/selftests/net/.gitignore +++ b/tools/testing/selftests/net/.gitignore @@ -39,6 +39,7 @@ scm_rights sk_bind_sendto_listen sk_connect_zero_addr sk_so_peek_off +skf_net_off socket so_incoming_cpu so_netns_cookie diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile index 6d718b478ed8..70a38f485d4d 100644 --- a/tools/testing/selftests/net/Makefile +++ b/tools/testing/selftests/net/Makefile @@ -31,6 +31,7 @@ TEST_PROGS += veth.sh TEST_PROGS += ioam6.sh TEST_PROGS += gro.sh TEST_PROGS += gre_gso.sh +TEST_PROGS += gre_ipv6_lladdr.sh TEST_PROGS += cmsg_so_mark.sh TEST_PROGS += cmsg_so_priority.sh TEST_PROGS += test_so_rcv.sh @@ -106,6 +107,8 @@ TEST_PROGS += ipv6_route_update_soft_lockup.sh TEST_PROGS += busy_poll_test.sh TEST_GEN_PROGS += proc_net_pktgen TEST_PROGS += lwt_dst_cache_ref_loop.sh +TEST_PROGS += skf_net_off.sh +TEST_GEN_FILES += skf_net_off # YNL files, must be before "include ..lib.mk" YNL_GEN_FILES := busy_poller netlink-dumps diff --git a/tools/testing/selftests/net/fib_rule_tests.sh b/tools/testing/selftests/net/fib_rule_tests.sh index b866bab1d92a..c7cea556b416 100755 --- a/tools/testing/selftests/net/fib_rule_tests.sh +++ b/tools/testing/selftests/net/fib_rule_tests.sh @@ -359,6 +359,23 @@ fib_rule6_test() "$getnomatch" "iif flowlabel masked redirect to table" \ "iif flowlabel masked no redirect to table" fi + + $IP link show dev $DEV | grep -q vrf0 + if [ $? -eq 0 ]; then + match="oif vrf0" + getmatch="oif $DEV" + getnomatch="oif lo" + fib_rule6_test_match_n_redirect "$match" "$getmatch" \ + "$getnomatch" "VRF oif redirect to table" \ + "VRF oif no redirect to table" + + match="from $SRC_IP6 iif vrf0" + getmatch="from $SRC_IP6 iif $DEV" + getnomatch="from $SRC_IP6 iif lo" + fib_rule6_test_match_n_redirect "$match" "$getmatch" \ + "$getnomatch" "VRF iif redirect to table" \ + "VRF iif no redirect to table" + fi } fib_rule6_vrf_test() @@ -635,6 +652,23 @@ fib_rule4_test() "$getnomatch" "iif dscp masked redirect to table" \ "iif dscp masked no redirect to table" fi + + $IP link show dev $DEV | grep -q vrf0 + if [ $? -eq 0 ]; then + match="oif vrf0" + getmatch="oif $DEV" + getnomatch="oif lo" + fib_rule4_test_match_n_redirect "$match" "$getmatch" \ + "$getnomatch" "VRF oif redirect to table" \ + "VRF oif no redirect to table" + + match="from $SRC_IP iif vrf0" + getmatch="from $SRC_IP iif $DEV" + getnomatch="from $SRC_IP iif lo" + fib_rule4_test_match_n_redirect "$match" "$getmatch" \ + "$getnomatch" "VRF iif redirect to table" \ + "VRF iif no redirect to table" + fi } fib_rule4_vrf_test() diff --git a/tools/testing/selftests/net/forwarding/bridge_vlan_aware.sh b/tools/testing/selftests/net/forwarding/bridge_vlan_aware.sh index 90f8a244ea90..e59fba366a0a 100755 --- a/tools/testing/selftests/net/forwarding/bridge_vlan_aware.sh +++ b/tools/testing/selftests/net/forwarding/bridge_vlan_aware.sh @@ -1,7 +1,7 @@ #!/bin/bash # SPDX-License-Identifier: GPL-2.0 -ALL_TESTS="ping_ipv4 ping_ipv6 learning flooding vlan_deletion extern_learn other_tpid" +ALL_TESTS="ping_ipv4 ping_ipv6 learning flooding vlan_deletion extern_learn other_tpid 8021p drop_untagged" NUM_NETIFS=4 CHECK_TC="yes" source lib.sh @@ -194,6 +194,100 @@ other_tpid() tc qdisc del dev $h2 clsact } +8021p_do() +{ + local should_fail=$1; shift + local mac=de:ad:be:ef:13:37 + + tc filter add dev $h2 ingress protocol all pref 1 handle 101 \ + flower dst_mac $mac action drop + + $MZ -q $h1 -c 1 -b $mac -a own "81:00 00:00 08:00 aa-aa-aa-aa-aa-aa-aa-aa-aa" + sleep 1 + + tc -j -s filter show dev $h2 ingress \ + | jq -e ".[] | select(.options.handle == 101) \ + | select(.options.actions[0].stats.packets == 1)" &> /dev/null + check_err_fail $should_fail $? "802.1p-tagged reception" + + tc filter del dev $h2 ingress pref 1 +} + +8021p() +{ + RET=0 + + tc qdisc add dev $h2 clsact + ip link set $h2 promisc on + + # Test that with the default_pvid, 1, packets tagged with VID 0 are + # accepted. + 8021p_do 0 + + # Test that packets tagged with VID 0 are still accepted after changing + # the default_pvid. + ip link set br0 type bridge vlan_default_pvid 10 + 8021p_do 0 + + log_test "Reception of 802.1p-tagged traffic" + + ip link set $h2 promisc off + tc qdisc del dev $h2 clsact +} + +send_untagged_and_8021p() +{ + ping_do $h1 192.0.2.2 + check_fail $? + + 8021p_do 1 +} + +drop_untagged() +{ + RET=0 + + tc qdisc add dev $h2 clsact + ip link set $h2 promisc on + + # Test that with no PVID, untagged and 802.1p-tagged traffic is + # dropped. + ip link set br0 type bridge vlan_default_pvid 1 + + # First we reconfigure the default_pvid, 1, as a non-PVID VLAN. + bridge vlan add dev $swp1 vid 1 untagged + send_untagged_and_8021p + bridge vlan add dev $swp1 vid 1 pvid untagged + + # Next we try to delete VID 1 altogether + bridge vlan del dev $swp1 vid 1 + send_untagged_and_8021p + bridge vlan add dev $swp1 vid 1 pvid untagged + + # Set up the bridge without a default_pvid, then check that the 8021q + # module, when the bridge port goes down and then up again, does not + # accidentally re-enable untagged packet reception. + ip link set br0 type bridge vlan_default_pvid 0 + ip link set $swp1 down + ip link set $swp1 up + setup_wait + send_untagged_and_8021p + + # Remove swp1 as a bridge port and let it rejoin the bridge while it + # has no default_pvid. + ip link set $swp1 nomaster + ip link set $swp1 master br0 + send_untagged_and_8021p + + # Restore settings + ip link set br0 type bridge vlan_default_pvid 1 + + log_test "Dropping of untagged and 802.1p-tagged traffic with no PVID" + + ip link set $h2 promisc off + tc qdisc del dev $h2 clsact +} + trap cleanup EXIT setup_prepare diff --git a/tools/testing/selftests/net/forwarding/tc_taprio.sh b/tools/testing/selftests/net/forwarding/tc_taprio.sh new file mode 100755 index 000000000000..8992aeabfe0b --- /dev/null +++ b/tools/testing/selftests/net/forwarding/tc_taprio.sh @@ -0,0 +1,421 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +ALL_TESTS=" \ + test_clock_jump_backward \ + test_taprio_after_ptp \ + test_max_sdu \ + test_clock_jump_backward_forward \ +" +NUM_NETIFS=4 +source tc_common.sh +source lib.sh +source tsn_lib.sh + +require_command python3 + +# The test assumes the usual topology from the README, where h1 is connected to +# swp1, h2 to swp2, and swp1 and swp2 are together in a bridge. +# Additional assumption: h1 and h2 use the same PHC, and so do swp1 and swp2. +# By synchronizing h1 to swp1 via PTP, h2 is also implicitly synchronized to +# swp1 (and both to CLOCK_REALTIME). +h1=${NETIFS[p1]} +swp1=${NETIFS[p2]} +swp2=${NETIFS[p3]} +h2=${NETIFS[p4]} + +UDS_ADDRESS_H1="/var/run/ptp4l_h1" +UDS_ADDRESS_SWP1="/var/run/ptp4l_swp1" + +H1_IPV4="192.0.2.1" +H2_IPV4="192.0.2.2" +H1_IPV6="2001:db8:1::1" +H2_IPV6="2001:db8:1::2" + +# Tunables +NUM_PKTS=100 +STREAM_VID=10 +STREAM_PRIO_1=6 +STREAM_PRIO_2=5 +STREAM_PRIO_3=4 +# PTP uses TC 0 +ALL_GATES=$((1 << 0 | 1 << STREAM_PRIO_1 | 1 << STREAM_PRIO_2)) +# Use a conservative cycle of 10 ms to allow the test to still pass when the +# kernel has some extra overhead like lockdep etc +CYCLE_TIME_NS=10000000 +# Create two Gate Control List entries, one OPEN and one CLOSE, of equal +# durations +GATE_DURATION_NS=$((CYCLE_TIME_NS / 2)) +# Give 2/3 of the cycle time to user space and 1/3 to the kernel +FUDGE_FACTOR=$((CYCLE_TIME_NS / 3)) +# Shift the isochron base time by half the gate time, so that packets are +# always received by swp1 close to the middle of the time slot, to minimize +# inaccuracies due to network sync +SHIFT_TIME_NS=$((GATE_DURATION_NS / 2)) + +path_delay= + +h1_create() +{ + simple_if_init $h1 $H1_IPV4/24 $H1_IPV6/64 +} + +h1_destroy() +{ + simple_if_fini $h1 $H1_IPV4/24 $H1_IPV6/64 +} + +h2_create() +{ + simple_if_init $h2 $H2_IPV4/24 $H2_IPV6/64 +} + +h2_destroy() +{ + simple_if_fini $h2 $H2_IPV4/24 $H2_IPV6/64 +} + +switch_create() +{ + local h2_mac_addr=$(mac_get $h2) + + ip link set $swp1 up + ip link set $swp2 up + + ip link add br0 type bridge vlan_filtering 1 + ip link set $swp1 master br0 + ip link set $swp2 master br0 + ip link set br0 up + + bridge vlan add dev $swp2 vid $STREAM_VID + bridge vlan add dev $swp1 vid $STREAM_VID + bridge fdb add dev $swp2 \ + $h2_mac_addr vlan $STREAM_VID static master +} + +switch_destroy() +{ + ip link del br0 +} + +ptp_setup() +{ + # Set up swp1 as a master PHC for h1, synchronized to the local + # CLOCK_REALTIME. + phc2sys_start $UDS_ADDRESS_SWP1 + ptp4l_start $h1 true $UDS_ADDRESS_H1 + ptp4l_start $swp1 false $UDS_ADDRESS_SWP1 +} + +ptp_cleanup() +{ + ptp4l_stop $swp1 + ptp4l_stop $h1 + phc2sys_stop +} + +txtime_setup() +{ + local if_name=$1 + + tc qdisc add dev $if_name clsact + # Classify PTP on TC 7 and isochron on TC 6 + tc filter add dev $if_name egress protocol 0x88f7 \ + flower action skbedit priority 7 + tc filter add dev $if_name egress protocol 802.1Q \ + flower vlan_ethtype 0xdead action skbedit priority 6 + tc qdisc add dev $if_name handle 100: parent root mqprio num_tc 8 \ + queues 1@0 1@1 1@2 1@3 1@4 1@5 1@6 1@7 \ + map 0 1 2 3 4 5 6 7 \ + hw 1 + # Set up TC 5, 6, 7 for SO_TXTIME. tc-mqprio queues count from 1. + tc qdisc replace dev $if_name parent 100:$((STREAM_PRIO_1 + 1)) etf \ + clockid CLOCK_TAI offload delta $FUDGE_FACTOR + tc qdisc replace dev $if_name parent 100:$((STREAM_PRIO_2 + 1)) etf \ + clockid CLOCK_TAI offload delta $FUDGE_FACTOR + tc qdisc replace dev $if_name parent 100:$((STREAM_PRIO_3 + 1)) etf \ + clockid CLOCK_TAI offload delta $FUDGE_FACTOR +} + +txtime_cleanup() +{ + local if_name=$1 + + tc qdisc del dev $if_name clsact + tc qdisc del dev $if_name root +} + +taprio_replace() +{ + local if_name="$1"; shift + local extra_args="$1"; shift + + # STREAM_PRIO_1 always has an open gate. + # STREAM_PRIO_2 has a gate open for GATE_DURATION_NS (half the cycle time) + # STREAM_PRIO_3 always has a closed gate. + tc qdisc replace dev $if_name root stab overhead 24 taprio num_tc 8 \ + queues 1@0 1@1 1@2 1@3 1@4 1@5 1@6 1@7 \ + map 0 1 2 3 4 5 6 7 \ + sched-entry S $(printf "%x" $ALL_GATES) $GATE_DURATION_NS \ + sched-entry S $(printf "%x" $((ALL_GATES & ~(1 << STREAM_PRIO_2)))) $GATE_DURATION_NS \ + base-time 0 flags 0x2 $extra_args + taprio_wait_for_admin $if_name +} + +taprio_cleanup() +{ + local if_name=$1 + + tc qdisc del dev $if_name root +} + +probe_path_delay() +{ + local isochron_dat="$(mktemp)" + local received + + log_info "Probing path delay" + + isochron_do "$h1" "$h2" "$UDS_ADDRESS_H1" "" 0 \ + "$CYCLE_TIME_NS" "" "" "$NUM_PKTS" \ + "$STREAM_VID" "$STREAM_PRIO_1" "" "$isochron_dat" + + received=$(isochron_report_num_received "$isochron_dat") + if [ "$received" != "$NUM_PKTS" ]; then + echo "Cannot establish basic data path between $h1 and $h2" + exit $ksft_fail + fi + + printf "pdelay = {}\n" > isochron_data.py + isochron report --input-file "$isochron_dat" \ + --printf-format "pdelay[%u] = %d - %d\n" \ + --printf-args "qRT" \ + >> isochron_data.py + cat <<-'EOF' > isochron_postprocess.py + #!/usr/bin/env python3 + + from isochron_data import pdelay + import numpy as np + + w = np.array(list(pdelay.values())) + print("{}".format(np.max(w))) + EOF + path_delay=$(python3 ./isochron_postprocess.py) + + log_info "Path delay from $h1 to $h2 estimated at $path_delay ns" + + if [ "$path_delay" -gt "$GATE_DURATION_NS" ]; then + echo "Path delay larger than gate duration, aborting" + exit $ksft_fail + fi + + rm -f ./isochron_data.py 2> /dev/null + rm -f ./isochron_postprocess.py 2> /dev/null + rm -f "$isochron_dat" 2> /dev/null +} + +setup_prepare() +{ + vrf_prepare + + h1_create + h2_create + switch_create + + txtime_setup $h1 + + # Temporarily set up PTP just to probe the end-to-end path delay. + ptp_setup + probe_path_delay + ptp_cleanup +} + +cleanup() +{ + pre_cleanup + + isochron_recv_stop + txtime_cleanup $h1 + + switch_destroy + h2_destroy + h1_destroy + + vrf_cleanup +} + +run_test() +{ + local base_time=$1; shift + local stream_prio=$1; shift + local expected_delay=$1; shift + local should_fail=$1; shift + local test_name=$1; shift + local isochron_dat="$(mktemp)" + local received + local median_delay + + RET=0 + + # Set the shift time equal to the cycle time, which effectively + # cancels the default advance time. Packets won't be sent early in + # software, which ensures that they won't prematurely enter through + # the open gate in __test_out_of_band(). Also, the gate is open for + # long enough that this won't cause a problem in __test_in_band(). + isochron_do "$h1" "$h2" "$UDS_ADDRESS_H1" "" "$base_time" \ + "$CYCLE_TIME_NS" "$SHIFT_TIME_NS" "$GATE_DURATION_NS" \ + "$NUM_PKTS" "$STREAM_VID" "$stream_prio" "" "$isochron_dat" + + received=$(isochron_report_num_received "$isochron_dat") + [ "$received" = "$NUM_PKTS" ] + check_err_fail $should_fail $? "Reception of $NUM_PKTS packets" + + if [ $should_fail = 0 ] && [ "$received" = "$NUM_PKTS" ]; then + printf "pdelay = {}\n" > isochron_data.py + isochron report --input-file "$isochron_dat" \ + --printf-format "pdelay[%u] = %d - %d\n" \ + --printf-args "qRT" \ + >> isochron_data.py + cat <<-'EOF' > isochron_postprocess.py + #!/usr/bin/env python3 + + from isochron_data import pdelay + import numpy as np + + w = np.array(list(pdelay.values())) + print("{}".format(int(np.median(w)))) + EOF + median_delay=$(python3 ./isochron_postprocess.py) + + # If the condition below is true, packets were delayed by a closed gate + [ "$median_delay" -gt $((path_delay + expected_delay)) ] + check_fail $? "Median delay $median_delay is greater than expected delay $expected_delay plus path delay $path_delay" + + # If the condition below is true, packets were sent expecting them to + # hit a closed gate in the switch, but were not delayed + [ "$expected_delay" -gt 0 ] && [ "$median_delay" -lt "$expected_delay" ] + check_fail $? "Median delay $median_delay is less than expected delay $expected_delay" + fi + + log_test "$test_name" + + rm -f ./isochron_data.py 2> /dev/null + rm -f ./isochron_postprocess.py 2> /dev/null + rm -f "$isochron_dat" 2> /dev/null +} + +__test_always_open() +{ + run_test 0.000000000 $STREAM_PRIO_1 0 0 "Gate always open" +} + +__test_always_closed() +{ + run_test 0.000000000 $STREAM_PRIO_3 0 1 "Gate always closed" +} + +__test_in_band() +{ + # Send packets in-band with the OPEN gate entry + run_test 0.000000000 $STREAM_PRIO_2 0 0 "In band with gate" +} + +__test_out_of_band() +{ + # Send packets in-band with the CLOSE gate entry + run_test 0.005000000 $STREAM_PRIO_2 \ + $((GATE_DURATION_NS - SHIFT_TIME_NS)) 0 \ + "Out of band with gate" +} + +run_subtests() +{ + __test_always_open + __test_always_closed + __test_in_band + __test_out_of_band +} + +test_taprio_after_ptp() +{ + log_info "Setting up taprio after PTP" + ptp_setup + taprio_replace $swp2 + run_subtests + taprio_cleanup $swp2 + ptp_cleanup +} + +__test_under_max_sdu() +{ + # Limit max-sdu for STREAM_PRIO_1 + taprio_replace "$swp2" "max-sdu 0 0 0 0 0 0 100 0" + run_test 0.000000000 $STREAM_PRIO_1 0 0 "Under maximum SDU" +} + +__test_over_max_sdu() +{ + # Limit max-sdu for STREAM_PRIO_1 + taprio_replace "$swp2" "max-sdu 0 0 0 0 0 0 20 0" + run_test 0.000000000 $STREAM_PRIO_1 0 1 "Over maximum SDU" +} + +test_max_sdu() +{ + ptp_setup + __test_under_max_sdu + __test_over_max_sdu + taprio_cleanup $swp2 + ptp_cleanup +} + +# Perform a clock jump in the past without synchronization running, so that the +# time base remains where it was set by phc_ctl. +test_clock_jump_backward() +{ + # This is a more complex schedule specifically crafted in a way that + # has been problematic on NXP LS1028A. Not much to test with it other + # than the fact that it passes traffic. + tc qdisc replace dev $swp2 root stab overhead 24 taprio num_tc 8 \ + queues 1@0 1@1 1@2 1@3 1@4 1@5 1@6 1@7 map 0 1 2 3 4 5 6 7 \ + base-time 0 sched-entry S 20 300000 sched-entry S 10 200000 \ + sched-entry S 20 300000 sched-entry S 48 200000 \ + sched-entry S 20 300000 sched-entry S 83 200000 \ + sched-entry S 40 300000 sched-entry S 00 200000 flags 2 + + log_info "Forcing a backward clock jump" + phc_ctl $swp1 set 0 + + ping_test $h1 192.0.2.2 + taprio_cleanup $swp2 +} + +# Test that taprio tolerates clock jumps. +# Since ptp4l and phc2sys are running, it is expected for the time to +# eventually recover (through yet another clock jump). Isochron waits +# until that is the case. +test_clock_jump_backward_forward() +{ + log_info "Forcing a backward and a forward clock jump" + taprio_replace $swp2 + phc_ctl $swp1 set 0 + ptp_setup + ping_test $h1 192.0.2.2 + run_subtests + ptp_cleanup + taprio_cleanup $swp2 +} + +tc_offload_check +if [[ $? -ne 0 ]]; then + log_test_skip "Could not test offloaded functionality" + exit $EXIT_STATUS +fi + +trap cleanup EXIT + +setup_prepare +setup_wait +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/forwarding/tsn_lib.sh b/tools/testing/selftests/net/forwarding/tsn_lib.sh index b91bcd8008a9..08c044ff6689 100644 --- a/tools/testing/selftests/net/forwarding/tsn_lib.sh +++ b/tools/testing/selftests/net/forwarding/tsn_lib.sh @@ -2,6 +2,8 @@ # SPDX-License-Identifier: GPL-2.0 # Copyright 2021-2022 NXP +tc_testing_scripts_dir=$(dirname $0)/../../tc-testing/scripts + REQUIRE_ISOCHRON=${REQUIRE_ISOCHRON:=yes} REQUIRE_LINUXPTP=${REQUIRE_LINUXPTP:=yes} @@ -18,6 +20,7 @@ fi if [[ "$REQUIRE_LINUXPTP" = "yes" ]]; then require_command phc2sys require_command ptp4l + require_command phc_ctl fi phc2sys_start() @@ -182,6 +185,7 @@ isochron_do() local base_time=$1; shift local cycle_time=$1; shift local shift_time=$1; shift + local window_size=$1; shift local num_pkts=$1; shift local vid=$1; shift local priority=$1; shift @@ -212,6 +216,10 @@ isochron_do() extra_args="${extra_args} --shift-time=${shift_time}" fi + if ! [ -z "${window_size}" ]; then + extra_args="${extra_args} --window-size=${window_size}" + fi + if [ "${use_l2}" = "true" ]; then extra_args="${extra_args} --l2 --etype=0xdead ${vid}" receiver_extra_args="--l2 --etype=0xdead" @@ -247,3 +255,21 @@ isochron_do() cpufreq_restore ${ISOCHRON_CPU} } + +isochron_report_num_received() +{ + local isochron_dat=$1; shift + + # Count all received packets by looking at the non-zero RX timestamps + isochron report \ + --input-file "${isochron_dat}" \ + --printf-format "%u\n" --printf-args "R" | \ + grep -w -v '0' | wc -l +} + +taprio_wait_for_admin() +{ + local if_name="$1"; shift + + "$tc_testing_scripts_dir/taprio_wait_for_admin.sh" "$(which tc)" "$if_name" +} diff --git a/tools/testing/selftests/net/gre_ipv6_lladdr.sh b/tools/testing/selftests/net/gre_ipv6_lladdr.sh new file mode 100755 index 000000000000..5b34f6e1f831 --- /dev/null +++ b/tools/testing/selftests/net/gre_ipv6_lladdr.sh @@ -0,0 +1,177 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +source ./lib.sh + +PAUSE_ON_FAIL="no" + +# The trap function handler +# +exit_cleanup_all() +{ + cleanup_all_ns + + exit "${EXIT_STATUS}" +} + +# Add fake IPv4 and IPv6 networks on the loopback device, to be used as +# underlay by future GRE devices. +# +setup_basenet() +{ + ip -netns "${NS0}" link set dev lo up + ip -netns "${NS0}" address add dev lo 192.0.2.10/24 + ip -netns "${NS0}" address add dev lo 2001:db8::10/64 nodad +} + +# Check if network device has an IPv6 link-local address assigned. +# +# Parameters: +# +# * $1: The network device to test +# * $2: An extra regular expression that should be matched (to verify the +# presence of extra attributes) +# * $3: The expected return code from grep (to allow checking the absence of +# a link-local address) +# * $4: The user visible name for the scenario being tested +# +check_ipv6_ll_addr() +{ + local DEV="$1" + local EXTRA_MATCH="$2" + local XRET="$3" + local MSG="$4" + + RET=0 + set +e + ip -netns "${NS0}" -6 address show dev "${DEV}" scope link | grep "fe80::" | grep -q "${EXTRA_MATCH}" + check_err_fail "${XRET}" $? "" + log_test "${MSG}" + set -e +} + +# Create a GRE device and verify that it gets an IPv6 link-local address as +# expected. +# +# Parameters: +# +# * $1: The device type (gre, ip6gre, gretap or ip6gretap) +# * $2: The local underlay IP address (can be an IPv4, an IPv6 or "any") +# * $3: The remote underlay IP address (can be an IPv4, an IPv6 or "any") +# * $4: The IPv6 interface identifier generation mode to use for the GRE +# device (eui64, none, stable-privacy or random). +# +test_gre_device() +{ + local GRE_TYPE="$1" + local LOCAL_IP="$2" + local REMOTE_IP="$3" + local MODE="$4" + local ADDR_GEN_MODE + local MATCH_REGEXP + local MSG + + ip link add netns "${NS0}" name gretest type "${GRE_TYPE}" local "${LOCAL_IP}" remote "${REMOTE_IP}" + + case "${MODE}" in + "eui64") + ADDR_GEN_MODE=0 + MATCH_REGEXP="" + MSG="${GRE_TYPE}, mode: 0 (EUI64), ${LOCAL_IP} -> ${REMOTE_IP}" + XRET=0 + ;; + "none") + ADDR_GEN_MODE=1 + MATCH_REGEXP="" + MSG="${GRE_TYPE}, mode: 1 (none), ${LOCAL_IP} -> ${REMOTE_IP}" + XRET=1 # No link-local address should be generated + ;; + "stable-privacy") + ADDR_GEN_MODE=2 + MATCH_REGEXP="stable-privacy" + MSG="${GRE_TYPE}, mode: 2 (stable privacy), ${LOCAL_IP} -> ${REMOTE_IP}" + XRET=0 + # Initialise stable_secret (required for stable-privacy mode) + ip netns exec "${NS0}" sysctl -qw net.ipv6.conf.gretest.stable_secret="2001:db8::abcd" + ;; + "random") + ADDR_GEN_MODE=3 + MATCH_REGEXP="stable-privacy" + MSG="${GRE_TYPE}, mode: 3 (random), ${LOCAL_IP} -> ${REMOTE_IP}" + XRET=0 + ;; + esac + + # Check that IPv6 link-local address is generated when device goes up + ip netns exec "${NS0}" sysctl -qw net.ipv6.conf.gretest.addr_gen_mode="${ADDR_GEN_MODE}" + ip -netns "${NS0}" link set dev gretest up + check_ipv6_ll_addr gretest "${MATCH_REGEXP}" "${XRET}" "config: ${MSG}" + + # Now disable link-local address generation + ip -netns "${NS0}" link set dev gretest down + ip netns exec "${NS0}" sysctl -qw net.ipv6.conf.gretest.addr_gen_mode=1 + ip -netns "${NS0}" link set dev gretest up + + # Check that link-local address generation works when re-enabled while + # the device is already up + ip netns exec "${NS0}" sysctl -qw net.ipv6.conf.gretest.addr_gen_mode="${ADDR_GEN_MODE}" + check_ipv6_ll_addr gretest "${MATCH_REGEXP}" "${XRET}" "update: ${MSG}" + + ip -netns "${NS0}" link del dev gretest +} + +test_gre4() +{ + local GRE_TYPE + local MODE + + for GRE_TYPE in "gre" "gretap"; do + printf "\n####\nTesting IPv6 link-local address generation on ${GRE_TYPE} devices\n####\n\n" + + for MODE in "eui64" "none" "stable-privacy" "random"; do + test_gre_device "${GRE_TYPE}" 192.0.2.10 192.0.2.11 "${MODE}" + test_gre_device "${GRE_TYPE}" any 192.0.2.11 "${MODE}" + test_gre_device "${GRE_TYPE}" 192.0.2.10 any "${MODE}" + done + done +} + +test_gre6() +{ + local GRE_TYPE + local MODE + + for GRE_TYPE in "ip6gre" "ip6gretap"; do + printf "\n####\nTesting IPv6 link-local address generation on ${GRE_TYPE} devices\n####\n\n" + + for MODE in "eui64" "none" "stable-privacy" "random"; do + test_gre_device "${GRE_TYPE}" 2001:db8::10 2001:db8::11 "${MODE}" + test_gre_device "${GRE_TYPE}" any 2001:db8::11 "${MODE}" + test_gre_device "${GRE_TYPE}" 2001:db8::10 any "${MODE}" + done + done +} + +usage() +{ + echo "Usage: $0 [-p]" + exit 1 +} + +while getopts :p o +do + case $o in + p) PAUSE_ON_FAIL="yes";; + *) usage;; + esac +done + +setup_ns NS0 + +set -e +trap exit_cleanup_all EXIT + +setup_basenet + +test_gre4 +test_gre6 diff --git a/tools/testing/selftests/net/mptcp/diag.sh b/tools/testing/selftests/net/mptcp/diag.sh index 4f55477ffe08..e7a75341f0f3 100755 --- a/tools/testing/selftests/net/mptcp/diag.sh +++ b/tools/testing/selftests/net/mptcp/diag.sh @@ -206,9 +206,8 @@ chk_dump_one() local token local msg - ss_token="$(ss -inmHMN $ns | grep 'token:' |\ - head -n 1 |\ - sed 's/.*token:\([0-9a-f]*\).*/\1/')" + ss_token="$(ss -inmHMN $ns | + mptcp_lib_get_info_value "token" "token")" token="$(ip netns exec $ns ./mptcp_diag -t $ss_token |\ awk -F':[ \t]+' '/^token/ {print $2}')" diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh index 13a3b68181ee..befa66f5a366 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_join.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh @@ -1441,6 +1441,15 @@ chk_join_nr() fi fi + count=$(mptcp_lib_get_counter ${ns2} "MPTcpExtMPJoinSynAckHMacFailure") + if [ -z "$count" ]; then + rc=${KSFT_SKIP} + elif [ "$count" != "0" ]; then + rc=${KSFT_FAIL} + print_check "synack HMAC" + fail_test "got $count JOIN[s] synack HMAC failure expected 0" + fi + count=$(mptcp_lib_get_counter ${ns1} "MPTcpExtMPJoinAckRx") if [ -z "$count" ]; then rc=${KSFT_SKIP} @@ -1450,6 +1459,15 @@ chk_join_nr() fail_test "got $count JOIN[s] ack rx expected $ack_nr" fi + count=$(mptcp_lib_get_counter ${ns1} "MPTcpExtMPJoinAckHMacFailure") + if [ -z "$count" ]; then + rc=${KSFT_SKIP} + elif [ "$count" != "0" ]; then + rc=${KSFT_FAIL} + print_check "ack HMAC" + fail_test "got $count JOIN[s] ack HMAC failure expected 0" + fi + print_results "join Rx" ${rc} join_syn_tx="${join_syn_tx:-${syn_nr}}" \ diff --git a/tools/testing/selftests/net/netfilter/nft_concat_range.sh b/tools/testing/selftests/net/netfilter/nft_concat_range.sh index 47088b005390..1f5979c1510c 100755 --- a/tools/testing/selftests/net/netfilter/nft_concat_range.sh +++ b/tools/testing/selftests/net/netfilter/nft_concat_range.sh @@ -27,7 +27,7 @@ TYPES="net_port port_net net6_port port_proto net6_port_mac net6_port_mac_proto net6_port_net6_port net_port_mac_proto_net" # Reported bugs, also described by TYPE_ variables below -BUGS="flush_remove_add reload net_port_proto_match" +BUGS="flush_remove_add reload net_port_proto_match avx2_mismatch" # List of possible paths to pktgen script from kernel tree for performance tests PKTGEN_SCRIPT_PATHS=" @@ -387,6 +387,25 @@ race_repeat 0 perf_duration 0 " + +TYPE_avx2_mismatch=" +display avx2 false match +type_spec inet_proto . ipv6_addr +chain_spec meta l4proto . ip6 daddr +dst proto addr6 +src +start 1 +count 1 +src_delta 1 +tools ping +proto icmp6 + +race_repeat 0 + +perf_duration 0 +" + + # Set template for all tests, types and rules are filled in depending on test set_template=' flush ruleset @@ -1629,6 +1648,24 @@ test_bug_net_port_proto_match() { nft flush ruleset } +test_bug_avx2_mismatch() +{ + setup veth send_"${proto}" set || return ${ksft_skip} + + local a1="fe80:dead:01ff:0a02:0b03:6007:8009:a001" + local a2="fe80:dead:01fe:0a02:0b03:6007:8009:a001" + + nft "add element inet filter test { icmpv6 . $a1 }" + + dst_addr6="$a2" + send_icmp6 + + if [ "$(count_packets)" -gt "0" ]; then + err "False match for $a2" + return 1 + fi +} + test_reported_issues() { eval test_bug_"${subtest}" } diff --git a/tools/testing/selftests/net/skf_net_off.c b/tools/testing/selftests/net/skf_net_off.c new file mode 100644 index 000000000000..1fdf61d6cd7f --- /dev/null +++ b/tools/testing/selftests/net/skf_net_off.c @@ -0,0 +1,244 @@ +// SPDX-License-Identifier: GPL-2.0 + +/* Open a tun device. + * + * [modifications: use IFF_NAPI_FRAGS, add sk filter] + * + * Expects the device to have been configured previously, e.g.: + * sudo ip tuntap add name tap1 mode tap + * sudo ip link set tap1 up + * sudo ip link set dev tap1 addr 02:00:00:00:00:01 + * sudo ip -6 addr add fdab::1 peer fdab::2 dev tap1 nodad + * + * And to avoid premature pskb_may_pull: + * + * sudo ethtool -K tap1 gro off + * sudo bash -c 'echo 0 > /proc/sys/net/ipv4/ip_early_demux' + */ + +#define _GNU_SOURCE + +#include <arpa/inet.h> +#include <errno.h> +#include <error.h> +#include <fcntl.h> +#include <getopt.h> +#include <linux/filter.h> +#include <linux/if.h> +#include <linux/if_packet.h> +#include <linux/if_tun.h> +#include <linux/ipv6.h> +#include <netinet/if_ether.h> +#include <netinet/in.h> +#include <netinet/ip.h> +#include <netinet/ip6.h> +#include <netinet/udp.h> +#include <poll.h> +#include <signal.h> +#include <stdbool.h> +#include <stddef.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/ioctl.h> +#include <sys/socket.h> +#include <sys/poll.h> +#include <sys/types.h> +#include <sys/uio.h> +#include <unistd.h> + +static bool cfg_do_filter; +static bool cfg_do_frags; +static int cfg_dst_port = 8000; +static char *cfg_ifname; + +static int tun_open(const char *tun_name) +{ + struct ifreq ifr = {0}; + int fd, ret; + + fd = open("/dev/net/tun", O_RDWR); + if (fd == -1) + error(1, errno, "open /dev/net/tun"); + + ifr.ifr_flags = IFF_TAP; + if (cfg_do_frags) + ifr.ifr_flags |= IFF_NAPI | IFF_NAPI_FRAGS; + + strncpy(ifr.ifr_name, tun_name, IFNAMSIZ - 1); + + ret = ioctl(fd, TUNSETIFF, &ifr); + if (ret) + error(1, ret, "ioctl TUNSETIFF"); + + return fd; +} + +static void sk_set_filter(int fd) +{ + const int offset_proto = offsetof(struct ip6_hdr, ip6_nxt); + const int offset_dport = sizeof(struct ip6_hdr) + offsetof(struct udphdr, dest); + + /* Filter UDP packets with destination port cfg_dst_port */ + struct sock_filter filter_code[] = { + BPF_STMT(BPF_LD + BPF_B + BPF_ABS, SKF_AD_OFF + SKF_AD_PKTTYPE), + BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, PACKET_HOST, 0, 4), + BPF_STMT(BPF_LD + BPF_B + BPF_ABS, SKF_NET_OFF + offset_proto), + BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, IPPROTO_UDP, 0, 2), + BPF_STMT(BPF_LD + BPF_H + BPF_ABS, SKF_NET_OFF + offset_dport), + BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, cfg_dst_port, 1, 0), + BPF_STMT(BPF_RET + BPF_K, 0), + BPF_STMT(BPF_RET + BPF_K, 0xFFFF), + }; + + struct sock_fprog filter = { + sizeof(filter_code) / sizeof(filter_code[0]), + filter_code, + }; + + if (setsockopt(fd, SOL_SOCKET, SO_ATTACH_FILTER, &filter, sizeof(filter))) + error(1, errno, "setsockopt attach filter"); +} + +static int raw_open(void) +{ + int fd; + + fd = socket(PF_INET6, SOCK_RAW, IPPROTO_UDP); + if (fd == -1) + error(1, errno, "socket raw (udp)"); + + if (cfg_do_filter) + sk_set_filter(fd); + + return fd; +} + +static void tun_write(int fd) +{ + const char eth_src[] = { 0x02, 0x00, 0x00, 0x00, 0x00, 0x02 }; + const char eth_dst[] = { 0x02, 0x00, 0x00, 0x00, 0x00, 0x01 }; + struct tun_pi pi = {0}; + struct ipv6hdr ip6h = {0}; + struct udphdr uh = {0}; + struct ethhdr eth = {0}; + uint32_t payload; + struct iovec iov[5]; + int ret; + + pi.proto = htons(ETH_P_IPV6); + + memcpy(eth.h_source, eth_src, sizeof(eth_src)); + memcpy(eth.h_dest, eth_dst, sizeof(eth_dst)); + eth.h_proto = htons(ETH_P_IPV6); + + ip6h.version = 6; + ip6h.payload_len = htons(sizeof(uh) + sizeof(uint32_t)); + ip6h.nexthdr = IPPROTO_UDP; + ip6h.hop_limit = 8; + if (inet_pton(AF_INET6, "fdab::2", &ip6h.saddr) != 1) + error(1, errno, "inet_pton src"); + if (inet_pton(AF_INET6, "fdab::1", &ip6h.daddr) != 1) + error(1, errno, "inet_pton src"); + + uh.source = htons(8000); + uh.dest = htons(cfg_dst_port); + uh.len = ip6h.payload_len; + uh.check = 0; + + payload = htonl(0xABABABAB); /* Covered in IPv6 length */ + + iov[0].iov_base = π + iov[0].iov_len = sizeof(pi); + iov[1].iov_base = ð + iov[1].iov_len = sizeof(eth); + iov[2].iov_base = &ip6h; + iov[2].iov_len = sizeof(ip6h); + iov[3].iov_base = &uh; + iov[3].iov_len = sizeof(uh); + iov[4].iov_base = &payload; + iov[4].iov_len = sizeof(payload); + + ret = writev(fd, iov, sizeof(iov) / sizeof(iov[0])); + if (ret <= 0) + error(1, errno, "writev"); +} + +static void raw_read(int fd) +{ + struct timeval tv = { .tv_usec = 100 * 1000 }; + struct msghdr msg = {0}; + struct iovec iov[2]; + struct udphdr uh; + uint32_t payload[2]; + int ret; + + if (setsockopt(fd, SOL_SOCKET, SO_RCVTIMEO, &tv, sizeof(tv))) + error(1, errno, "setsockopt rcvtimeo udp"); + + iov[0].iov_base = &uh; + iov[0].iov_len = sizeof(uh); + + iov[1].iov_base = payload; + iov[1].iov_len = sizeof(payload); + + msg.msg_iov = iov; + msg.msg_iovlen = sizeof(iov) / sizeof(iov[0]); + + ret = recvmsg(fd, &msg, 0); + if (ret <= 0) + error(1, errno, "read raw"); + if (ret != sizeof(uh) + sizeof(payload[0])) + error(1, errno, "read raw: len=%d\n", ret); + + fprintf(stderr, "raw recv: 0x%x\n", payload[0]); +} + +static void parse_opts(int argc, char **argv) +{ + int c; + + while ((c = getopt(argc, argv, "fFi:")) != -1) { + switch (c) { + case 'f': + cfg_do_filter = true; + printf("bpf filter enabled\n"); + break; + case 'F': + cfg_do_frags = true; + printf("napi frags mode enabled\n"); + break; + case 'i': + cfg_ifname = optarg; + break; + default: + error(1, 0, "unknown option %c", optopt); + break; + } + } + + if (!cfg_ifname) + error(1, 0, "must specify tap interface name (-i)"); +} + +int main(int argc, char **argv) +{ + int fdt, fdr; + + parse_opts(argc, argv); + + fdr = raw_open(); + fdt = tun_open(cfg_ifname); + + tun_write(fdt); + raw_read(fdr); + + if (close(fdt)) + error(1, errno, "close tun"); + if (close(fdr)) + error(1, errno, "close udp"); + + fprintf(stderr, "OK\n"); + return 0; +} + diff --git a/tools/testing/selftests/net/skf_net_off.sh b/tools/testing/selftests/net/skf_net_off.sh new file mode 100755 index 000000000000..5da5066fb465 --- /dev/null +++ b/tools/testing/selftests/net/skf_net_off.sh @@ -0,0 +1,30 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +readonly NS="ns-$(mktemp -u XXXXXX)" + +cleanup() { + ip netns del $NS +} + +ip netns add $NS +trap cleanup EXIT + +ip -netns $NS link set lo up +ip -netns $NS tuntap add name tap1 mode tap +ip -netns $NS link set tap1 up +ip -netns $NS link set dev tap1 addr 02:00:00:00:00:01 +ip -netns $NS -6 addr add fdab::1 peer fdab::2 dev tap1 nodad +ip netns exec $NS ethtool -K tap1 gro off + +# disable early demux, else udp_v6_early_demux pulls udp header into linear +ip netns exec $NS sysctl -w net.ipv4.ip_early_demux=0 + +echo "no filter" +ip netns exec $NS ./skf_net_off -i tap1 + +echo "filter, linear skb (-f)" +ip netns exec $NS ./skf_net_off -i tap1 -f + +echo "filter, fragmented skb (-f) (-F)" +ip netns exec $NS ./skf_net_off -i tap1 -f -F diff --git a/tools/testing/selftests/net/tls.c b/tools/testing/selftests/net/tls.c index 9a85f93c33d8..5ded3b3a7538 100644 --- a/tools/testing/selftests/net/tls.c +++ b/tools/testing/selftests/net/tls.c @@ -1753,6 +1753,42 @@ TEST_F(tls_basic, rekey_tx) EXPECT_EQ(memcmp(buf, test_str, send_len), 0); } +TEST_F(tls_basic, disconnect) +{ + char const *test_str = "test_message"; + int send_len = strlen(test_str) + 1; + struct tls_crypto_info_keys key; + struct sockaddr_in addr; + char buf[20]; + int ret; + + if (self->notls) + return; + + tls_crypto_info_init(TLS_1_3_VERSION, TLS_CIPHER_AES_GCM_128, + &key, 0); + + ret = setsockopt(self->fd, SOL_TLS, TLS_TX, &key, key.len); + ASSERT_EQ(ret, 0); + + /* Pre-queue the data so that setsockopt parses it but doesn't + * dequeue it from the TCP socket. recvmsg would dequeue. + */ + EXPECT_EQ(send(self->fd, test_str, send_len, 0), send_len); + + ret = setsockopt(self->cfd, SOL_TLS, TLS_RX, &key, key.len); + ASSERT_EQ(ret, 0); + + addr.sin_family = AF_UNSPEC; + addr.sin_addr.s_addr = htonl(INADDR_ANY); + addr.sin_port = 0; + ret = connect(self->cfd, &addr, sizeof(addr)); + EXPECT_EQ(ret, -1); + EXPECT_EQ(errno, EOPNOTSUPP); + + EXPECT_EQ(recv(self->cfd, buf, send_len, 0), send_len); +} + TEST_F(tls, rekey) { char const *test_str_1 = "test_message_before_rekey"; diff --git a/tools/testing/selftests/pcie_bwctrl/Makefile b/tools/testing/selftests/pcie_bwctrl/Makefile index 48ec048f47af..277f92f9d753 100644 --- a/tools/testing/selftests/pcie_bwctrl/Makefile +++ b/tools/testing/selftests/pcie_bwctrl/Makefile @@ -1,2 +1,3 @@ -TEST_PROGS = set_pcie_cooling_state.sh set_pcie_speed.sh +TEST_PROGS = set_pcie_cooling_state.sh +TEST_FILES = set_pcie_speed.sh include ../lib.mk diff --git a/tools/testing/selftests/perf_events/watermark_signal.c b/tools/testing/selftests/perf_events/watermark_signal.c index 49dc1e831174..e03fe1b9bba2 100644 --- a/tools/testing/selftests/perf_events/watermark_signal.c +++ b/tools/testing/selftests/perf_events/watermark_signal.c @@ -75,7 +75,7 @@ TEST(watermark_signal) if (waitpid(child, &child_status, WSTOPPED) != child || !(WIFSTOPPED(child_status) && WSTOPSIG(child_status) == SIGSTOP)) { fprintf(stderr, - "failed to sycnhronize with child errno=%d status=%x\n", + "failed to synchronize with child errno=%d status=%x\n", errno, child_status); goto cleanup; diff --git a/tools/testing/selftests/pid_namespace/pid_max.c b/tools/testing/selftests/pid_namespace/pid_max.c index 51c414faabb0..96f274f0582b 100644 --- a/tools/testing/selftests/pid_namespace/pid_max.c +++ b/tools/testing/selftests/pid_namespace/pid_max.c @@ -10,6 +10,7 @@ #include <stdlib.h> #include <string.h> #include <syscall.h> +#include <sys/mount.h> #include <sys/wait.h> #include "../kselftest_harness.h" diff --git a/tools/testing/selftests/pidfd/pidfd.h b/tools/testing/selftests/pidfd/pidfd.h index 55bcf81a2b9a..efd74063126e 100644 --- a/tools/testing/selftests/pidfd/pidfd.h +++ b/tools/testing/selftests/pidfd/pidfd.h @@ -131,6 +131,26 @@ #define PIDFD_INFO_EXIT (1UL << 3) /* Always returned if available, even if not requested */ #endif +#ifndef PIDFD_INFO_COREDUMP +#define PIDFD_INFO_COREDUMP (1UL << 4) +#endif + +#ifndef PIDFD_COREDUMPED +#define PIDFD_COREDUMPED (1U << 0) /* Did crash and... */ +#endif + +#ifndef PIDFD_COREDUMP_SKIP +#define PIDFD_COREDUMP_SKIP (1U << 1) /* coredumping generation was skipped. */ +#endif + +#ifndef PIDFD_COREDUMP_USER +#define PIDFD_COREDUMP_USER (1U << 2) /* coredump was done as the user. */ +#endif + +#ifndef PIDFD_COREDUMP_ROOT +#define PIDFD_COREDUMP_ROOT (1U << 3) /* coredump was done as root. */ +#endif + #ifndef PIDFD_THREAD #define PIDFD_THREAD O_EXCL #endif @@ -150,6 +170,8 @@ struct pidfd_info { __u32 fsuid; __u32 fsgid; __s32 exit_code; + __u32 coredump_mask; + __u32 __spare1; }; /* diff --git a/tools/testing/selftests/pidfd/pidfd_bind_mount.c b/tools/testing/selftests/pidfd/pidfd_bind_mount.c index 7822dd080258..c094aeb1c620 100644 --- a/tools/testing/selftests/pidfd/pidfd_bind_mount.c +++ b/tools/testing/selftests/pidfd/pidfd_bind_mount.c @@ -15,79 +15,7 @@ #include "pidfd.h" #include "../kselftest_harness.h" - -#ifndef __NR_open_tree - #if defined __alpha__ - #define __NR_open_tree 538 - #elif defined _MIPS_SIM - #if _MIPS_SIM == _MIPS_SIM_ABI32 /* o32 */ - #define __NR_open_tree 4428 - #endif - #if _MIPS_SIM == _MIPS_SIM_NABI32 /* n32 */ - #define __NR_open_tree 6428 - #endif - #if _MIPS_SIM == _MIPS_SIM_ABI64 /* n64 */ - #define __NR_open_tree 5428 - #endif - #elif defined __ia64__ - #define __NR_open_tree (428 + 1024) - #else - #define __NR_open_tree 428 - #endif -#endif - -#ifndef __NR_move_mount - #if defined __alpha__ - #define __NR_move_mount 539 - #elif defined _MIPS_SIM - #if _MIPS_SIM == _MIPS_SIM_ABI32 /* o32 */ - #define __NR_move_mount 4429 - #endif - #if _MIPS_SIM == _MIPS_SIM_NABI32 /* n32 */ - #define __NR_move_mount 6429 - #endif - #if _MIPS_SIM == _MIPS_SIM_ABI64 /* n64 */ - #define __NR_move_mount 5429 - #endif - #elif defined __ia64__ - #define __NR_move_mount (428 + 1024) - #else - #define __NR_move_mount 429 - #endif -#endif - -#ifndef MOVE_MOUNT_F_EMPTY_PATH -#define MOVE_MOUNT_F_EMPTY_PATH 0x00000004 /* Empty from path permitted */ -#endif - -#ifndef MOVE_MOUNT_F_EMPTY_PATH -#define MOVE_MOUNT_T_EMPTY_PATH 0x00000040 /* Empty to path permitted */ -#endif - -static inline int sys_move_mount(int from_dfd, const char *from_pathname, - int to_dfd, const char *to_pathname, - unsigned int flags) -{ - return syscall(__NR_move_mount, from_dfd, from_pathname, to_dfd, - to_pathname, flags); -} - -#ifndef OPEN_TREE_CLONE -#define OPEN_TREE_CLONE 1 -#endif - -#ifndef OPEN_TREE_CLOEXEC -#define OPEN_TREE_CLOEXEC O_CLOEXEC -#endif - -#ifndef AT_RECURSIVE -#define AT_RECURSIVE 0x8000 /* Apply to the entire subtree */ -#endif - -static inline int sys_open_tree(int dfd, const char *filename, unsigned int flags) -{ - return syscall(__NR_open_tree, dfd, filename, flags); -} +#include "../filesystems/wrappers.h" FIXTURE(pidfd_bind_mount) { char template[PATH_MAX]; diff --git a/tools/testing/selftests/pidfd/pidfd_info_test.c b/tools/testing/selftests/pidfd/pidfd_info_test.c index 1758a1b0457b..a0eb6e81eaa2 100644 --- a/tools/testing/selftests/pidfd/pidfd_info_test.c +++ b/tools/testing/selftests/pidfd/pidfd_info_test.c @@ -299,6 +299,7 @@ TEST_F(pidfd_info, thread_group) /* Opening a thread as a thread-group leader must fail. */ pidfd_thread = sys_pidfd_open(pid_thread, 0); ASSERT_LT(pidfd_thread, 0); + ASSERT_EQ(errno, ENOENT); /* Opening a thread as a PIDFD_THREAD must succeed. */ pidfd_thread = sys_pidfd_open(pid_thread, PIDFD_THREAD); @@ -362,9 +363,9 @@ TEST_F(pidfd_info, thread_group) ASSERT_EQ(ioctl(pidfd_leader, PIDFD_GET_INFO, &info), 0); ASSERT_FALSE(!!(info.mask & PIDFD_INFO_CREDS)); ASSERT_TRUE(!!(info.mask & PIDFD_INFO_EXIT)); - /* The thread-group leader exited successfully. Only the specific thread was SIGKILLed. */ - ASSERT_TRUE(WIFEXITED(info.exit_code)); - ASSERT_EQ(WEXITSTATUS(info.exit_code), 0); + /* Even though the thread-group exited successfully it will still report the group exit code. */ + ASSERT_TRUE(WIFSIGNALED(info.exit_code)); + ASSERT_EQ(WTERMSIG(info.exit_code), SIGKILL); /* * Retrieve exit information for the thread-group leader via the @@ -375,9 +376,9 @@ TEST_F(pidfd_info, thread_group) ASSERT_FALSE(!!(info2.mask & PIDFD_INFO_CREDS)); ASSERT_TRUE(!!(info2.mask & PIDFD_INFO_EXIT)); - /* The thread-group leader exited successfully. Only the specific thread was SIGKILLed. */ - ASSERT_TRUE(WIFEXITED(info2.exit_code)); - ASSERT_EQ(WEXITSTATUS(info2.exit_code), 0); + /* Even though the thread-group exited successfully it will still report the group exit code. */ + ASSERT_TRUE(WIFSIGNALED(info2.exit_code)); + ASSERT_EQ(WTERMSIG(info2.exit_code), SIGKILL); /* Retrieve exit information for the thread. */ info.mask = PIDFD_INFO_CGROUPID | PIDFD_INFO_EXIT; diff --git a/tools/testing/selftests/rcutorture/bin/console-badness.sh b/tools/testing/selftests/rcutorture/bin/console-badness.sh index aad51e7c0183..991fb11306eb 100755 --- a/tools/testing/selftests/rcutorture/bin/console-badness.sh +++ b/tools/testing/selftests/rcutorture/bin/console-badness.sh @@ -10,7 +10,7 @@ # # Authors: Paul E. McKenney <paulmck@kernel.org> -grep -E 'Badness|WARNING:|Warn|BUG|===========|BUG: KCSAN:|Call Trace:|Oops:|detected stalls on CPUs/tasks:|self-detected stall on CPU|Stall ended before state dump start|\?\?\? Writer stall state|rcu_.*kthread starved for|!!!' | +grep -E 'Badness|WARNING:|Warn|BUG|===========|BUG: KCSAN:|Call Trace:|Call trace:|Oops:|detected stalls on CPUs/tasks:|self-detected stall on CPU|Stall ended before state dump start|\?\?\? Writer stall state|rcu_.*kthread starved for|!!!' | grep -v 'ODEBUG: ' | grep -v 'This means that this is a DEBUG kernel and it is' | grep -v 'Warning: unable to open an initial console' | diff --git a/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh b/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh index ad79784e552d..957800c9ffba 100755 --- a/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh +++ b/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh @@ -73,7 +73,7 @@ config_override_param "$config_dir/CFcommon.$(uname -m)" KcList \ cp $T/KcList $resdir/ConfigFragment base_resdir=`echo $resdir | sed -e 's/\.[0-9]\+$//'` -if test "$base_resdir" != "$resdir" && test -f $base_resdir/bzImage && test -f $base_resdir/vmlinux +if test "$base_resdir" != "$resdir" && (test -f $base_resdir/bzImage || test -f $base_resdir/Image) && test -f $base_resdir/vmlinux then # Rerunning previous test, so use that test's kernel. QEMU="`identify_qemu $base_resdir/vmlinux`" diff --git a/tools/testing/selftests/rcutorture/bin/parse-console.sh b/tools/testing/selftests/rcutorture/bin/parse-console.sh index b07c11cf6929..21e6ba3615f6 100755 --- a/tools/testing/selftests/rcutorture/bin/parse-console.sh +++ b/tools/testing/selftests/rcutorture/bin/parse-console.sh @@ -148,7 +148,7 @@ then summary="$summary KCSAN: $n_kcsan" fi fi - n_calltrace=`grep -c 'Call Trace:' $file` + n_calltrace=`grep -Ec 'Call Trace:|Call trace:' $file` if test "$n_calltrace" -ne 0 then summary="$summary Call Traces: $n_calltrace" diff --git a/tools/testing/selftests/rcutorture/bin/srcu_lockdep.sh b/tools/testing/selftests/rcutorture/bin/srcu_lockdep.sh index 2db12c5cad9c..208be7d09a61 100755 --- a/tools/testing/selftests/rcutorture/bin/srcu_lockdep.sh +++ b/tools/testing/selftests/rcutorture/bin/srcu_lockdep.sh @@ -39,8 +39,9 @@ do shift done -err= nerrs=0 + +# Test lockdep's handling of deadlocks. for d in 0 1 do for t in 0 1 2 @@ -52,6 +53,12 @@ do tools/testing/selftests/rcutorture/bin/kvm.sh --allcpus --duration 5s --configs "SRCU-P" --kconfig "CONFIG_FORCE_NEED_SRCU_NMI_SAFE=y" --bootargs "rcutorture.test_srcu_lockdep=$val rcutorture.reader_flavor=0x2" --trust-make --datestamp "$ds/$val" > "$T/kvm.sh.out" 2>&1 ret=$? mv "$T/kvm.sh.out" "$RCUTORTURE/res/$ds/$val" + if ! grep -q '^CONFIG_PROVE_LOCKING=y' .config + then + echo "rcu_torture_init_srcu_lockdep:Error: CONFIG_PROVE_LOCKING disabled in rcutorture SRCU-P scenario" + nerrs=$((nerrs+1)) + err=1 + fi if test "$d" -ne 0 && test "$ret" -eq 0 then err=1 @@ -71,6 +78,39 @@ do done done done + +# Test lockdep-enabled testing of mixed SRCU readers. +for val in 0x1 0xf +do + err= + tools/testing/selftests/rcutorture/bin/kvm.sh --allcpus --duration 5s --configs "SRCU-P" --kconfig "CONFIG_FORCE_NEED_SRCU_NMI_SAFE=y" --bootargs "rcutorture.reader_flavor=$val" --trust-make --datestamp "$ds/$val" > "$T/kvm.sh.out" 2>&1 + ret=$? + mv "$T/kvm.sh.out" "$RCUTORTURE/res/$ds/$val" + if ! grep -q '^CONFIG_PROVE_LOCKING=y' .config + then + echo "rcu_torture_init_srcu_lockdep:Error: CONFIG_PROVE_LOCKING disabled in rcutorture SRCU-P scenario" + nerrs=$((nerrs+1)) + err=1 + fi + if test "$val" -eq 0xf && test "$ret" -eq 0 + then + err=1 + echo -n Unexpected success for > "$RCUTORTURE/res/$ds/$val/kvm.sh.err" + fi + if test "$val" -eq 0x1 && test "$ret" -ne 0 + then + err=1 + echo -n Unexpected failure for > "$RCUTORTURE/res/$ds/$val/kvm.sh.err" + fi + if test -n "$err" + then + grep "rcu_torture_init_srcu_lockdep: test_srcu_lockdep = " "$RCUTORTURE/res/$ds/$val/SRCU-P/console.log" | sed -e 's/^.*rcu_torture_init_srcu_lockdep://' >> "$RCUTORTURE/res/$ds/$val/kvm.sh.err" + cat "$RCUTORTURE/res/$ds/$val/kvm.sh.err" + nerrs=$((nerrs+1)) + fi +done + +# Set up exit code. if test "$nerrs" -ne 0 then exit 1 diff --git a/tools/testing/selftests/rcutorture/bin/torture.sh b/tools/testing/selftests/rcutorture/bin/torture.sh index 0447c4a00cc4..e03fdaca89b3 100755 --- a/tools/testing/selftests/rcutorture/bin/torture.sh +++ b/tools/testing/selftests/rcutorture/bin/torture.sh @@ -51,12 +51,15 @@ do_scftorture=yes do_rcuscale=yes do_refscale=yes do_kvfree=yes +do_normal=yes +explicit_normal=no do_kasan=yes do_kcsan=no do_clocksourcewd=yes do_rt=yes do_rcutasksflavors=yes do_srcu_lockdep=yes +do_rcu_rust=no # doyesno - Helper function for yes/no arguments function doyesno () { @@ -87,6 +90,7 @@ usage () { echo " --do-rcutorture / --do-no-rcutorture / --no-rcutorture" echo " --do-refscale / --do-no-refscale / --no-refscale" echo " --do-rt / --do-no-rt / --no-rt" + echo " --do-rcu-rust / --do-no-rcu-rust / --no-rcu-rust" echo " --do-scftorture / --do-no-scftorture / --no-scftorture" echo " --do-srcu-lockdep / --do-no-srcu-lockdep / --no-srcu-lockdep" echo " --duration [ <minutes> | <hours>h | <days>d ]" @@ -128,6 +132,8 @@ do do_refscale=yes do_rt=yes do_kvfree=yes + do_normal=yes + explicit_normal=no do_kasan=yes do_kcsan=yes do_clocksourcewd=yes @@ -161,11 +167,17 @@ do do_refscale=no do_rt=no do_kvfree=no + do_normal=no + explicit_normal=no do_kasan=no do_kcsan=no do_clocksourcewd=no do_srcu_lockdep=no ;; + --do-normal|--do-no-normal|--no-normal) + do_normal=`doyesno "$1" --do-normal` + explicit_normal=yes + ;; --do-rcuscale|--do-no-rcuscale|--no-rcuscale) do_rcuscale=`doyesno "$1" --do-rcuscale` ;; @@ -181,6 +193,9 @@ do --do-rt|--do-no-rt|--no-rt) do_rt=`doyesno "$1" --do-rt` ;; + --do-rcu-rust|--do-no-rcu-rust|--no-rcu-rust) + do_rcu_rust=`doyesno "$1" --do-rcu-rust` + ;; --do-scftorture|--do-no-scftorture|--no-scftorture) do_scftorture=`doyesno "$1" --do-scftorture` ;; @@ -242,6 +257,17 @@ trap 'rm -rf $T' 0 2 echo " --- " $scriptname $args | tee -a $T/log echo " --- Results directory: " $ds | tee -a $T/log +if test "$do_normal" = "no" && test "$do_kasan" = "no" && test "$do_kcsan" = "no" +then + # Match old scripts so that "--do-none --do-rcutorture" does + # normal rcutorture testing, but no KASAN or KCSAN testing. + if test $explicit_normal = yes + then + echo " --- Everything disabled, so explicit --do-normal overridden" | tee -a $T/log + fi + do_normal=yes +fi + # Calculate rcutorture defaults and apportion time if test -z "$configs_rcutorture" then @@ -332,9 +358,12 @@ function torture_set { local kcsan_kmake_tag= local flavor=$1 shift - curflavor=$flavor - torture_one "$@" - mv $T/last-resdir $T/last-resdir-nodebug || : + if test "$do_normal" = "yes" + then + curflavor=$flavor + torture_one "$@" + mv $T/last-resdir $T/last-resdir-nodebug || : + fi if test "$do_kasan" = "yes" then curflavor=${flavor}-kasan @@ -448,13 +477,57 @@ fi if test "$do_rt" = "yes" then - # With all post-boot grace periods forced to normal. - torture_bootargs="rcupdate.rcu_cpu_stall_suppress_at_boot=1 torture.disable_onoff_at_boot rcupdate.rcu_task_stall_timeout=30000 rcupdate.rcu_normal=1" - torture_set "rcurttorture" tools/testing/selftests/rcutorture/bin/kvm.sh --allcpus --duration "$duration_rcutorture" --configs "TREE03" --trust-make + # In both runs, disable testing of RCU priority boosting because + # -rt doesn't like its interaction with testing of callback + # flooding. + + # With all post-boot grace periods forced to normal (default for PREEMPT_RT). + torture_bootargs="rcupdate.rcu_cpu_stall_suppress_at_boot=1 torture.disable_onoff_at_boot rcupdate.rcu_task_stall_timeout=30000 rcutorture.test_boost=0 rcutorture.preempt_duration=0" + torture_set "rcurttorture" tools/testing/selftests/rcutorture/bin/kvm.sh --allcpus --duration "$duration_rcutorture" --configs "TREE03" --kconfig "CONFIG_PREEMPT_RT=y CONFIG_EXPERT=y CONFIG_HZ_PERIODIC=n CONFIG_NO_HZ_IDLE=y CONFIG_RCU_NOCB_CPU=y" --trust-make # With all post-boot grace periods forced to expedited. - torture_bootargs="rcupdate.rcu_cpu_stall_suppress_at_boot=1 torture.disable_onoff_at_boot rcupdate.rcu_task_stall_timeout=30000 rcupdate.rcu_expedited=1" - torture_set "rcurttorture-exp" tools/testing/selftests/rcutorture/bin/kvm.sh --allcpus --duration "$duration_rcutorture" --configs "TREE03" --trust-make + torture_bootargs="rcupdate.rcu_cpu_stall_suppress_at_boot=1 torture.disable_onoff_at_boot rcupdate.rcu_task_stall_timeout=30000 rcutorture.test_boost=0 rcupdate.rcu_normal_after_boot=0 rcupdate.rcu_expedited=1 rcutorture.preempt_duration=0" + torture_set "rcurttorture-exp" tools/testing/selftests/rcutorture/bin/kvm.sh --allcpus --duration "$duration_rcutorture" --configs "TREE03" --kconfig "CONFIG_PREEMPT_RT=y CONFIG_EXPERT=y CONFIG_HZ_PERIODIC=n CONFIG_NO_HZ_FULL=y CONFIG_RCU_NOCB_CPU=y" --trust-make +fi + +if test "$do_rcu_rust" = "yes" +then + echo " --- do-rcu-rust:" Start `date` | tee -a $T/log + rrdir="tools/testing/selftests/rcutorture/res/$ds/results-rcu-rust" + mkdir -p "$rrdir" + echo " --- make LLVM=1 rustavailable " | tee -a $rrdir/log > $rrdir/rustavailable.out + make LLVM=1 rustavailable > $T/rustavailable.out 2>&1 + retcode=$? + echo $retcode > $rrdir/rustavailable.exitcode + cat $T/rustavailable.out | tee -a $rrdir/log >> $rrdir/rustavailable.out 2>&1 + buildphase=rustavailable + if test "$retcode" -eq 0 + then + echo " --- Running 'make mrproper' in order to run kunit." | tee -a $rrdir/log > $rrdir/mrproper.out + make mrproper > $rrdir/mrproper.out 2>&1 + retcode=$? + echo $retcode > $rrdir/mrproper.exitcode + buildphase=mrproper + fi + if test "$retcode" -eq 0 + then + echo " --- Running rust_doctests_kernel." | tee -a $rrdir/log > $rrdir/rust_doctests_kernel.out + ./tools/testing/kunit/kunit.py run --make_options LLVM=1 --make_options CLIPPY=1 --arch arm64 --kconfig_add CONFIG_SMP=y --kconfig_add CONFIG_WERROR=y --kconfig_add CONFIG_RUST=y rust_doctests_kernel >> $rrdir/rust_doctests_kernel.out 2>&1 + # @@@ Remove "--arch arm64" in order to test on native architecture? + # @@@ Analyze $rrdir/rust_doctests_kernel.out contents? + retcode=$? + echo $retcode > $rrdir/rust_doctests_kernel.exitcode + buildphase=rust_doctests_kernel + fi + if test "$retcode" -eq 0 + then + echo "rcu-rust($retcode)" $rrdir >> $T/successes + echo Success >> $rrdir/log + else + echo "rcu-rust($retcode)" $rrdir >> $T/failures + echo " --- rcu-rust Test summary:" >> $rrdir/log + echo " --- Summary: Exit code $retcode from $buildphase, see $rrdir/$buildphase.out" >> $rrdir/log + fi fi if test "$do_srcu_lockdep" = "yes" diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE01 b/tools/testing/selftests/rcutorture/configs/rcu/TREE01 index 8ae41d5f81a3..54b1600c7eb5 100644 --- a/tools/testing/selftests/rcutorture/configs/rcu/TREE01 +++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE01 @@ -8,8 +8,6 @@ CONFIG_NO_HZ_IDLE=y CONFIG_NO_HZ_FULL=n CONFIG_RCU_TRACE=y CONFIG_HOTPLUG_CPU=y -CONFIG_MAXSMP=y -CONFIG_CPUMASK_OFFSTACK=y CONFIG_RCU_NOCB_CPU=y CONFIG_DEBUG_LOCK_ALLOC=n CONFIG_RCU_BOOST=n diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE01.boot b/tools/testing/selftests/rcutorture/configs/rcu/TREE01.boot index 40af3df0f397..1cc5b47dde28 100644 --- a/tools/testing/selftests/rcutorture/configs/rcu/TREE01.boot +++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE01.boot @@ -1,4 +1,4 @@ -maxcpus=8 nr_cpus=43 +maxcpus=8 nr_cpus=17 rcutree.gp_preinit_delay=3 rcutree.gp_init_delay=3 rcutree.gp_cleanup_delay=3 diff --git a/tools/testing/selftests/run_kselftest.sh b/tools/testing/selftests/run_kselftest.sh index 50e03eefe7ac..0443beacf362 100755 --- a/tools/testing/selftests/run_kselftest.sh +++ b/tools/testing/selftests/run_kselftest.sh @@ -3,7 +3,14 @@ # # Run installed kselftest tests. # -BASE_DIR=$(realpath $(dirname $0)) + +# Fallback to readlink if realpath is not available +if which realpath > /dev/null; then + BASE_DIR=$(realpath $(dirname $0)) +else + BASE_DIR=$(readlink -f $(dirname $0)) +fi + cd $BASE_DIR TESTS="$BASE_DIR"/kselftest-list.txt if [ ! -r "$TESTS" ] ; then diff --git a/tools/testing/selftests/tc-testing/tc-tests/infra/actions.json b/tools/testing/selftests/tc-testing/tc-tests/infra/actions.json index 1ba96c467754..d9fc62ab476c 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/infra/actions.json +++ b/tools/testing/selftests/tc-testing/tc-tests/infra/actions.json @@ -412,5 +412,27 @@ "teardown": [ "$TC qdisc del dev $DUMMY ingress" ] + }, + { + "id": "33f4", + "name": "Check echo of big filter command", + "category": [ + "infra", + "u32" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "$TC qdisc add dev $DUMMY parent root handle 10: fq_codel" + ], + "cmdUnderTest": "bash -c '$TC -echo filter add dev $DUMMY parent 10: u32 match u32 0 0 $(for i in $(seq 32); do echo action pedit munge ip dport set 22; done) | grep \"added filter\"'", + "verifyCmd": "", + "expExitCode": "0", + "matchCount": "0", + "matchPattern": "", + "teardown": [ + "$TC qdisc del dev $DUMMY parent root fq_codel" + ] } ] diff --git a/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json b/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json index 25454fd95537..ddc97ecd8b39 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json +++ b/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json @@ -158,5 +158,447 @@ "$TC qdisc del dev $DUMMY handle 1: root", "$IP addr del 10.10.10.10/24 dev $DUMMY || true" ] + }, + { + "id": "a4bb", + "name": "Test FQ_CODEL with HTB parent - force packet drop with empty queue", + "category": [ + "qdisc", + "fq_codel", + "htb" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "$IP link set dev $DUMMY up || true", + "$IP addr add 10.10.10.10/24 dev $DUMMY || true", + "$TC qdisc add dev $DUMMY handle 1: root htb default 10", + "$TC class add dev $DUMMY parent 1: classid 1:10 htb rate 1kbit", + "$TC qdisc add dev $DUMMY parent 1:10 handle 10: fq_codel memory_limit 1 flows 1 target 0.1ms interval 1ms", + "$TC filter add dev $DUMMY parent 1: protocol ip prio 1 u32 match ip protocol 1 0xff flowid 1:10", + "ping -c 5 -f -I $DUMMY 10.10.10.1 > /dev/null || true", + "sleep 0.1" + ], + "cmdUnderTest": "$TC -s qdisc show dev $DUMMY", + "expExitCode": "0", + "verifyCmd": "$TC -s qdisc show dev $DUMMY | grep -A 5 'qdisc fq_codel'", + "matchPattern": "dropped [1-9][0-9]*", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DUMMY handle 1: root", + "$IP addr del 10.10.10.10/24 dev $DUMMY || true" + ] + }, + { + "id": "a4be", + "name": "Test FQ_CODEL with QFQ parent - force packet drop with empty queue", + "category": [ + "qdisc", + "fq_codel", + "qfq" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "$IP link set dev $DUMMY up || true", + "$IP addr add 10.10.10.10/24 dev $DUMMY || true", + "$TC qdisc add dev $DUMMY handle 1: root qfq", + "$TC class add dev $DUMMY parent 1: classid 1:10 qfq weight 1 maxpkt 1000", + "$TC qdisc add dev $DUMMY parent 1:10 handle 10: fq_codel memory_limit 1 flows 1 target 0.1ms interval 1ms", + "$TC filter add dev $DUMMY parent 1: protocol ip prio 1 u32 match ip protocol 1 0xff flowid 1:10", + "ping -c 10 -s 1000 -f -I $DUMMY 10.10.10.1 > /dev/null || true", + "sleep 0.1" + ], + "cmdUnderTest": "$TC -s qdisc show dev $DUMMY", + "expExitCode": "0", + "verifyCmd": "$TC -s qdisc show dev $DUMMY | grep -A 5 'qdisc fq_codel'", + "matchPattern": "dropped [1-9][0-9]*", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DUMMY handle 1: root", + "$IP addr del 10.10.10.10/24 dev $DUMMY || true" + ] + }, + { + "id": "a4bf", + "name": "Test FQ_CODEL with HFSC parent - force packet drop with empty queue", + "category": [ + "qdisc", + "fq_codel", + "hfsc" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "$IP link set dev $DUMMY up || true", + "$IP addr add 10.10.10.10/24 dev $DUMMY || true", + "$TC qdisc add dev $DUMMY handle 1: root hfsc default 10", + "$TC class add dev $DUMMY parent 1: classid 1:10 hfsc sc rate 1kbit ul rate 1kbit", + "$TC qdisc add dev $DUMMY parent 1:10 handle 10: fq_codel memory_limit 1 flows 1 target 0.1ms interval 1ms", + "$TC filter add dev $DUMMY parent 1: protocol ip prio 1 u32 match ip protocol 1 0xff flowid 1:10", + "ping -c 5 -f -I $DUMMY 10.10.10.1 > /dev/null || true", + "sleep 0.1" + ], + "cmdUnderTest": "$TC -s qdisc show dev $DUMMY", + "expExitCode": "0", + "verifyCmd": "$TC -s qdisc show dev $DUMMY | grep -A 5 'qdisc fq_codel'", + "matchPattern": "dropped [1-9][0-9]*", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DUMMY handle 1: root", + "$IP addr del 10.10.10.10/24 dev $DUMMY || true" + ] + }, + { + "id": "a4c0", + "name": "Test FQ_CODEL with DRR parent - force packet drop with empty queue", + "category": [ + "qdisc", + "fq_codel", + "drr" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "$IP link set dev $DUMMY up || true", + "$IP addr add 10.10.10.10/24 dev $DUMMY || true", + "$TC qdisc add dev $DUMMY handle 1: root drr", + "$TC class add dev $DUMMY parent 1: classid 1:10 drr quantum 1500", + "$TC qdisc add dev $DUMMY parent 1:10 handle 10: fq_codel memory_limit 1 flows 1 target 0.1ms interval 1ms", + "$TC filter add dev $DUMMY parent 1: protocol ip prio 1 u32 match ip protocol 1 0xff flowid 1:10", + "ping -c 5 -f -I $DUMMY 10.10.10.1 > /dev/null || true", + "sleep 0.1" + ], + "cmdUnderTest": "$TC -s qdisc show dev $DUMMY", + "expExitCode": "0", + "verifyCmd": "$TC -s qdisc show dev $DUMMY | grep -A 5 'qdisc fq_codel'", + "matchPattern": "dropped [1-9][0-9]*", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DUMMY handle 1: root", + "$IP addr del 10.10.10.10/24 dev $DUMMY || true" + ] + }, + { + "id": "a4c1", + "name": "Test FQ_CODEL with ETS parent - force packet drop with empty queue", + "category": [ + "qdisc", + "fq_codel", + "ets" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "$IP link set dev $DUMMY up || true", + "$IP addr add 10.10.10.10/24 dev $DUMMY || true", + "$TC qdisc add dev $DUMMY handle 1: root ets bands 2 strict 1", + "$TC class change dev $DUMMY parent 1: classid 1:1 ets", + "$TC qdisc add dev $DUMMY parent 1:1 handle 10: fq_codel memory_limit 1 flows 1 target 0.1ms interval 1ms", + "$TC filter add dev $DUMMY parent 1: protocol ip prio 1 u32 match ip protocol 1 0xff flowid 1:1", + "ping -c 5 -f -I $DUMMY 10.10.10.1 > /dev/null || true", + "sleep 0.1" + ], + "cmdUnderTest": "$TC -s qdisc show dev $DUMMY", + "expExitCode": "0", + "verifyCmd": "$TC -s qdisc show dev $DUMMY | grep -A 5 'qdisc fq_codel'", + "matchPattern": "dropped [1-9][0-9]*", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DUMMY handle 1: root", + "$IP addr del 10.10.10.10/24 dev $DUMMY || true" + ] + }, + { + "id": "a4c3", + "name": "Test HFSC with netem/blackhole - queue emptying during peek operation", + "category": [ + "qdisc", + "hfsc", + "netem", + "blackhole" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "$IP link set dev $DUMMY up || true", + "$IP addr add 10.10.10.10/24 dev $DUMMY || true", + "$TC qdisc add dev $DUMMY handle 1:0 root drr", + "$TC class add dev $DUMMY parent 1:0 classid 1:1 drr", + "$TC class add dev $DUMMY parent 1:0 classid 1:2 drr", + "$TC qdisc add dev $DUMMY parent 1:1 handle 2:0 plug limit 1024", + "$TC qdisc add dev $DUMMY parent 1:2 handle 3:0 hfsc default 1", + "$TC class add dev $DUMMY parent 3:0 classid 3:1 hfsc rt m1 5Mbit d 10ms m2 10Mbit", + "$TC qdisc add dev $DUMMY parent 3:1 handle 4:0 netem delay 1ms", + "$TC qdisc add dev $DUMMY parent 4:1 handle 5:0 blackhole", + "ping -c 3 -W 0.01 -i 0.001 -s 1 10.10.10.10 -I $DUMMY > /dev/null 2>&1 || true", + "$TC class change dev $DUMMY parent 3:0 classid 3:1 hfsc sc m1 5Mbit d 10ms m2 10Mbit", + "$TC class del dev $DUMMY parent 3:0 classid 3:1", + "$TC class add dev $DUMMY parent 3:0 classid 3:1 hfsc rt m1 5Mbit d 10ms m2 10Mbit", + "ping -c 3 -W 0.01 -i 0.001 -s 1 10.10.10.10 -I $DUMMY > /dev/null 2>&1 || true" + ], + "cmdUnderTest": "$TC class change dev $DUMMY parent 3:0 classid 3:1 hfsc sc m1 5Mbit d 10ms m2 10Mbit", + "expExitCode": "0", + "verifyCmd": "$TC -s qdisc show dev $DUMMY", + "matchPattern": "qdisc hfsc 3:.*parent 1:2.*default 1", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DUMMY handle 1:0 root", + "$IP addr del 10.10.10.10/24 dev $DUMMY || true" + ] + }, + { + "id": "90ec", + "name": "Test DRR's enqueue reentrant behaviour with netem", + "category": [ + "qdisc", + "drr" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "$IP link set dev $DUMMY up || true", + "$IP addr add 10.10.10.10/24 dev $DUMMY || true", + "$TC qdisc add dev $DUMMY handle 1:0 root drr", + "$TC class replace dev $DUMMY parent 1:0 classid 1:1 drr", + "$TC qdisc add dev $DUMMY parent 1:1 handle 2:0 netem duplicate 100%", + "$TC filter add dev $DUMMY parent 1:0 protocol ip prio 1 u32 match ip protocol 1 0xff flowid 1:1" + ], + "cmdUnderTest": "ping -c 1 -I $DUMMY 10.10.10.1 > /dev/null || true", + "expExitCode": "0", + "verifyCmd": "$TC -j -s qdisc ls dev $DUMMY handle 1:0", + "matchJSON": [ + { + "kind": "drr", + "handle": "1:", + "bytes": 196, + "packets": 2 + } + ], + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DUMMY handle 1:0 root", + "$IP addr del 10.10.10.10/24 dev $DUMMY || true" + ] + }, + { + "id": "1f1f", + "name": "Test ETS's enqueue reentrant behaviour with netem", + "category": [ + "qdisc", + "ets" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "$IP link set dev $DUMMY up || true", + "$IP addr add 10.10.10.10/24 dev $DUMMY || true", + "$TC qdisc add dev $DUMMY handle 1:0 root ets bands 2", + "$TC class replace dev $DUMMY parent 1:0 classid 1:1 ets quantum 1500", + "$TC qdisc add dev $DUMMY parent 1:1 handle 2:0 netem duplicate 100%", + "$TC filter add dev $DUMMY parent 1:0 protocol ip prio 1 u32 match ip protocol 1 0xff flowid 1:1" + ], + "cmdUnderTest": "ping -c 1 -I $DUMMY 10.10.10.1 > /dev/null || true", + "expExitCode": "0", + "verifyCmd": "$TC -j -s class show dev $DUMMY", + "matchJSON": [ + { + "class": "ets", + "handle": "1:1", + "stats": { + "bytes": 196, + "packets": 2 + } + } + ], + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DUMMY handle 1:0 root", + "$IP addr del 10.10.10.10/24 dev $DUMMY || true" + ] + }, + { + "id": "5e6d", + "name": "Test QFQ's enqueue reentrant behaviour with netem", + "category": [ + "qdisc", + "qfq" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "$IP link set dev $DUMMY up || true", + "$IP addr add 10.10.10.10/24 dev $DUMMY || true", + "$TC qdisc add dev $DUMMY handle 1:0 root qfq", + "$TC class replace dev $DUMMY parent 1:0 classid 1:1 qfq weight 100 maxpkt 1500", + "$TC qdisc add dev $DUMMY parent 1:1 handle 2:0 netem duplicate 100%", + "$TC filter add dev $DUMMY parent 1:0 protocol ip prio 1 u32 match ip protocol 1 0xff flowid 1:1" + ], + "cmdUnderTest": "ping -c 1 -I $DUMMY 10.10.10.1 > /dev/null || true", + "expExitCode": "0", + "verifyCmd": "$TC -j -s qdisc ls dev $DUMMY handle 1:0", + "matchJSON": [ + { + "kind": "qfq", + "handle": "1:", + "bytes": 196, + "packets": 2 + } + ], + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DUMMY handle 1:0 root", + "$IP addr del 10.10.10.10/24 dev $DUMMY || true" + ] + }, + { + "id": "bf1d", + "name": "Test HFSC's enqueue reentrant behaviour with netem", + "category": [ + "qdisc", + "hfsc" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "$IP link set dev $DUMMY up || true", + "$IP addr add 10.10.10.10/24 dev $DUMMY || true", + "$TC qdisc add dev $DUMMY handle 1:0 root hfsc", + "$TC class add dev $DUMMY parent 1:0 classid 1:1 hfsc ls m2 10Mbit", + "$TC qdisc add dev $DUMMY parent 1:1 handle 2:0 netem duplicate 100%", + "$TC filter add dev $DUMMY parent 1:0 protocol ip prio 1 u32 match ip dst 10.10.10.1/32 flowid 1:1", + "$TC class add dev $DUMMY parent 1:0 classid 1:2 hfsc ls m2 10Mbit", + "$TC qdisc add dev $DUMMY parent 1:2 handle 3:0 netem duplicate 100%", + "$TC filter add dev $DUMMY parent 1:0 protocol ip prio 2 u32 match ip dst 10.10.10.2/32 flowid 1:2", + "ping -c 1 10.10.10.1 -I$DUMMY > /dev/null || true", + "$TC filter del dev $DUMMY parent 1:0 protocol ip prio 1", + "$TC class del dev $DUMMY classid 1:1" + ], + "cmdUnderTest": "ping -c 1 10.10.10.2 -I$DUMMY > /dev/null || true", + "expExitCode": "0", + "verifyCmd": "$TC -j -s qdisc ls dev $DUMMY handle 1:0", + "matchJSON": [ + { + "kind": "hfsc", + "handle": "1:", + "bytes": 392, + "packets": 4 + } + ], + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DUMMY handle 1:0 root", + "$IP addr del 10.10.10.10/24 dev $DUMMY || true" + ] + }, + { + "id": "7c3b", + "name": "Test nested DRR's enqueue reentrant behaviour with netem", + "category": [ + "qdisc", + "drr" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "$IP link set dev $DUMMY up || true", + "$IP addr add 10.10.10.10/24 dev $DUMMY || true", + "$TC qdisc add dev $DUMMY handle 1:0 root drr", + "$TC class add dev $DUMMY parent 1:0 classid 1:1 drr", + "$TC filter add dev $DUMMY parent 1:0 protocol ip prio 1 u32 match ip protocol 1 0xff flowid 1:1", + "$TC qdisc add dev $DUMMY handle 2:0 parent 1:1 drr", + "$TC class add dev $DUMMY classid 2:1 parent 2:0 drr", + "$TC filter add dev $DUMMY parent 2:0 protocol ip prio 1 u32 match ip protocol 1 0xff flowid 2:1", + "$TC qdisc add dev $DUMMY parent 2:1 handle 3:0 netem duplicate 100%" + ], + "cmdUnderTest": "ping -c 1 -I $DUMMY 10.10.10.1 > /dev/null || true", + "expExitCode": "0", + "verifyCmd": "$TC -j -s qdisc ls dev $DUMMY handle 1:0", + "matchJSON": [ + { + "kind": "drr", + "handle": "1:", + "bytes": 196, + "packets": 2 + } + ], + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DUMMY handle 1:0 root", + "$IP addr del 10.10.10.10/24 dev $DUMMY || true" + ] + }, + { + "id": "62c4", + "name": "Test HTB with FQ_CODEL - basic functionality", + "category": [ + "qdisc", + "htb", + "fq_codel" + ], + "plugins": { + "requires": [ + "nsPlugin", + "scapyPlugin" + ] + }, + "setup": [ + "$TC qdisc add dev $DEV1 root handle 1: htb default 11", + "$TC class add dev $DEV1 parent 1: classid 1:1 htb rate 10kbit", + "$TC class add dev $DEV1 parent 1:1 classid 1:11 htb rate 10kbit prio 0 quantum 1486", + "$TC qdisc add dev $DEV1 parent 1:11 fq_codel quantum 300 noecn", + "sleep 0.5" + ], + "scapy": { + "iface": "$DEV0", + "count": 5, + "packet": "Ether()/IP(dst='10.10.10.1', src='10.10.10.10')/TCP(sport=12345, dport=80)" + }, + "cmdUnderTest": "$TC -s qdisc show dev $DEV1", + "expExitCode": "0", + "verifyCmd": "$TC -s qdisc show dev $DEV1 | grep -A 5 'qdisc fq_codel'", + "matchPattern": "Sent [0-9]+ bytes [0-9]+ pkt", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DEV1 handle 1: root" + ] + }, + { + "id": "831d", + "name": "Test HFSC qlen accounting with DRR/NETEM/BLACKHOLE chain", + "category": ["qdisc", "hfsc", "drr", "netem", "blackhole"], + "plugins": { "requires": ["nsPlugin", "scapyPlugin"] }, + "setup": [ + "$IP link set dev $DEV1 up || true", + "$TC qdisc add dev $DEV1 root handle 1: drr", + "$TC filter add dev $DEV1 parent 1: basic classid 1:1", + "$TC class add dev $DEV1 parent 1: classid 1:1 drr", + "$TC qdisc add dev $DEV1 parent 1:1 handle 2: hfsc def 1", + "$TC class add dev $DEV1 parent 2: classid 2:1 hfsc rt m1 8 d 1 m2 0", + "$TC qdisc add dev $DEV1 parent 2:1 handle 3: netem", + "$TC qdisc add dev $DEV1 parent 3:1 handle 4: blackhole" + ], + "scapy": { + "iface": "$DEV0", + "count": 5, + "packet": "Ether()/IP(dst='10.10.10.1', src='10.10.10.10')/ICMP()" + }, + "cmdUnderTest": "$TC -s qdisc show dev $DEV1", + "expExitCode": "0", + "verifyCmd": "$TC -s qdisc show dev $DEV1", + "matchPattern": "qdisc hfsc", + "matchCount": "1", + "teardown": ["$TC qdisc del dev $DEV1 root handle 1: drr"] } ] diff --git a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/codel.json b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/codel.json index e9469ee71e6f..6d515d0e5ed6 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/codel.json +++ b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/codel.json @@ -189,5 +189,29 @@ "teardown": [ "$TC qdisc del dev $DUMMY handle 1: root" ] + }, + { + "id": "deb1", + "name": "CODEL test qdisc limit trimming", + "category": ["qdisc", "codel"], + "plugins": { + "requires": ["nsPlugin", "scapyPlugin"] + }, + "setup": [ + "$TC qdisc add dev $DEV1 handle 1: root codel limit 10" + ], + "scapy": [ + { + "iface": "$DEV0", + "count": 10, + "packet": "Ether(type=0x800)/IP(src='10.0.0.10',dst='10.0.0.20')/TCP(sport=5000,dport=10)" + } + ], + "cmdUnderTest": "$TC qdisc change dev $DEV1 handle 1: root codel limit 1", + "expExitCode": "0", + "verifyCmd": "$TC qdisc show dev $DEV1", + "matchPattern": "qdisc codel 1: root refcnt [0-9]+ limit 1p target 5ms interval 100ms", + "matchCount": "1", + "teardown": ["$TC qdisc del dev $DEV1 handle 1: root"] } ] diff --git a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/fq.json b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/fq.json index 3a537b2ec4c9..24faf4e12dfa 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/fq.json +++ b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/fq.json @@ -377,5 +377,27 @@ "teardown": [ "$TC qdisc del dev $DUMMY handle 1: root" ] + }, + { + "id": "9479", + "name": "FQ test qdisc limit trimming", + "category": ["qdisc", "fq"], + "plugins": {"requires": ["nsPlugin", "scapyPlugin"]}, + "setup": [ + "$TC qdisc add dev $DEV1 handle 1: root fq limit 10" + ], + "scapy": [ + { + "iface": "$DEV0", + "count": 10, + "packet": "Ether(type=0x800)/IP(src='10.0.0.10',dst='10.0.0.20')/TCP(sport=5000,dport=10)" + } + ], + "cmdUnderTest": "$TC qdisc change dev $DEV1 handle 1: root fq limit 1", + "expExitCode": "0", + "verifyCmd": "$TC qdisc show dev $DEV1", + "matchPattern": "qdisc fq 1: root refcnt [0-9]+ limit 1p", + "matchCount": "1", + "teardown": ["$TC qdisc del dev $DEV1 handle 1: root"] } ] diff --git a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/fq_codel.json b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/fq_codel.json index 9774b1e8801b..4ce62b857fd7 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/fq_codel.json +++ b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/fq_codel.json @@ -294,5 +294,27 @@ "teardown": [ "$TC qdisc del dev $DUMMY handle 1: root" ] + }, + { + "id": "0436", + "name": "FQ_CODEL test qdisc limit trimming", + "category": ["qdisc", "fq_codel"], + "plugins": {"requires": ["nsPlugin", "scapyPlugin"]}, + "setup": [ + "$TC qdisc add dev $DEV1 handle 1: root fq_codel limit 10" + ], + "scapy": [ + { + "iface": "$DEV0", + "count": 10, + "packet": "Ether(type=0x800)/IP(src='10.0.0.10',dst='10.0.0.20')/TCP(sport=5000,dport=10)" + } + ], + "cmdUnderTest": "$TC qdisc change dev $DEV1 handle 1: root fq_codel limit 1", + "expExitCode": "0", + "verifyCmd": "$TC qdisc show dev $DEV1", + "matchPattern": "qdisc fq_codel 1: root refcnt [0-9]+ limit 1p flows 1024 quantum.*target 5ms interval 100ms memory_limit 32Mb ecn drop_batch 64", + "matchCount": "1", + "teardown": ["$TC qdisc del dev $DEV1 handle 1: root"] } ] diff --git a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/fq_pie.json b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/fq_pie.json index d012d88d67fe..229fe1bf4a90 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/fq_pie.json +++ b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/fq_pie.json @@ -18,5 +18,27 @@ "matchCount": "1", "teardown": [ ] + }, + { + "id": "83bf", + "name": "FQ_PIE test qdisc limit trimming", + "category": ["qdisc", "fq_pie"], + "plugins": {"requires": ["nsPlugin", "scapyPlugin"]}, + "setup": [ + "$TC qdisc add dev $DEV1 handle 1: root fq_pie limit 10" + ], + "scapy": [ + { + "iface": "$DEV0", + "count": 10, + "packet": "Ether(type=0x800)/IP(src='10.0.0.10',dst='10.0.0.20')/TCP(sport=5000,dport=10)" + } + ], + "cmdUnderTest": "$TC qdisc change dev $DEV1 handle 1: root fq_pie limit 1", + "expExitCode": "0", + "verifyCmd": "$TC qdisc show dev $DEV1", + "matchPattern": "qdisc fq_pie 1: root refcnt [0-9]+ limit 1p", + "matchCount": "1", + "teardown": ["$TC qdisc del dev $DEV1 handle 1: root"] } ] diff --git a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/hhf.json b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/hhf.json index dbef5474b26b..0ca19fac54a5 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/hhf.json +++ b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/hhf.json @@ -188,5 +188,27 @@ "teardown": [ "$TC qdisc del dev $DUMMY handle 1: root" ] + }, + { + "id": "385f", + "name": "HHF test qdisc limit trimming", + "category": ["qdisc", "hhf"], + "plugins": {"requires": ["nsPlugin", "scapyPlugin"]}, + "setup": [ + "$TC qdisc add dev $DEV1 handle 1: root hhf limit 10" + ], + "scapy": [ + { + "iface": "$DEV0", + "count": 10, + "packet": "Ether(type=0x800)/IP(src='10.0.0.10',dst='10.0.0.20')/TCP(sport=5000,dport=10)" + } + ], + "cmdUnderTest": "$TC qdisc change dev $DEV1 handle 1: root hhf limit 1", + "expExitCode": "0", + "verifyCmd": "$TC qdisc show dev $DEV1", + "matchPattern": "qdisc hhf 1: root refcnt [0-9]+ limit 1p.*hh_limit 2048 reset_timeout 40ms admit_bytes 128Kb evict_timeout 1s non_hh_weight 2", + "matchCount": "1", + "teardown": ["$TC qdisc del dev $DEV1 handle 1: root"] } ] diff --git a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/pie.json b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/pie.json new file mode 100644 index 000000000000..1a98b66e8030 --- /dev/null +++ b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/pie.json @@ -0,0 +1,24 @@ +[ + { + "id": "6158", + "name": "PIE test qdisc limit trimming", + "category": ["qdisc", "pie"], + "plugins": {"requires": ["nsPlugin", "scapyPlugin"]}, + "setup": [ + "$TC qdisc add dev $DEV1 handle 1: root pie limit 10" + ], + "scapy": [ + { + "iface": "$DEV0", + "count": 10, + "packet": "Ether(type=0x800)/IP(src='10.0.0.10',dst='10.0.0.20')/TCP(sport=5000,dport=10)" + } + ], + "cmdUnderTest": "$TC qdisc change dev $DEV1 handle 1: root pie limit 1", + "expExitCode": "0", + "verifyCmd": "$TC qdisc show dev $DEV1", + "matchPattern": "qdisc pie 1: root refcnt [0-9]+ limit 1p", + "matchCount": "1", + "teardown": ["$TC qdisc del dev $DEV1 handle 1: root"] + } +] diff --git a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/sfq.json b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/sfq.json index 50e8d72781cb..28c6ce6da7db 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/sfq.json +++ b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/sfq.json @@ -228,5 +228,41 @@ "matchCount": "0", "teardown": [ ] + }, + { + "id": "7f8f", + "name": "Check that a derived limit of 1 is rejected (limit 2 depth 1 flows 1)", + "category": [ + "qdisc", + "sfq" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [], + "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root sfq limit 2 depth 1 flows 1", + "expExitCode": "2", + "verifyCmd": "$TC qdisc show dev $DUMMY", + "matchPattern": "sfq", + "matchCount": "0", + "teardown": [] + }, + { + "id": "5168", + "name": "Check that a derived limit of 1 is rejected (limit 2 depth 1 divisor 1)", + "category": [ + "qdisc", + "sfq" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [], + "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root sfq limit 2 depth 1 divisor 1", + "expExitCode": "2", + "verifyCmd": "$TC qdisc show dev $DUMMY", + "matchPattern": "sfq", + "matchCount": "0", + "teardown": [] } ] diff --git a/tools/testing/selftests/timens/clock_nanosleep.c b/tools/testing/selftests/timens/clock_nanosleep.c index 72d41b955fb2..5cc0010e85ff 100644 --- a/tools/testing/selftests/timens/clock_nanosleep.c +++ b/tools/testing/selftests/timens/clock_nanosleep.c @@ -38,7 +38,7 @@ void *call_nanosleep(void *_args) return NULL; } -int run_test(int clockid, int abs) +static int run_test(int clockid, int abs) { struct timespec now = {}, rem; struct thread_args args = { .now = &now, .rem = &rem, .clockid = clockid}; @@ -115,6 +115,8 @@ int main(int argc, char *argv[]) { int ret, nsfd; + ksft_print_header(); + nscheck(); ksft_set_plan(4); diff --git a/tools/testing/selftests/timens/exec.c b/tools/testing/selftests/timens/exec.c index d12ff955de0d..a644162d56fd 100644 --- a/tools/testing/selftests/timens/exec.c +++ b/tools/testing/selftests/timens/exec.c @@ -36,6 +36,8 @@ int main(int argc, char *argv[]) return 0; } + ksft_print_header(); + nscheck(); ksft_set_plan(1); diff --git a/tools/testing/selftests/timens/futex.c b/tools/testing/selftests/timens/futex.c index 6b2b9264e851..339633ae037a 100644 --- a/tools/testing/selftests/timens/futex.c +++ b/tools/testing/selftests/timens/futex.c @@ -66,6 +66,8 @@ int main(int argc, char *argv[]) pid_t pid; struct timespec mtime_now; + ksft_print_header(); + nscheck(); ksft_set_plan(2); diff --git a/tools/testing/selftests/timens/gettime_perf.c b/tools/testing/selftests/timens/gettime_perf.c index 6b13dc277724..d6658b7b7548 100644 --- a/tools/testing/selftests/timens/gettime_perf.c +++ b/tools/testing/selftests/timens/gettime_perf.c @@ -67,6 +67,8 @@ int main(int argc, char *argv[]) time_t offset = 10; int nsfd; + ksft_print_header(); + ksft_set_plan(8); fill_function_pointers(); diff --git a/tools/testing/selftests/timens/procfs.c b/tools/testing/selftests/timens/procfs.c index 1833ca97eb24..0a9ff90ee69a 100644 --- a/tools/testing/selftests/timens/procfs.c +++ b/tools/testing/selftests/timens/procfs.c @@ -180,6 +180,8 @@ int main(int argc, char *argv[]) { int ret = 0; + ksft_print_header(); + nscheck(); ksft_set_plan(2); diff --git a/tools/testing/selftests/timens/timens.c b/tools/testing/selftests/timens/timens.c index 387220791a05..a9c0534ef8f6 100644 --- a/tools/testing/selftests/timens/timens.c +++ b/tools/testing/selftests/timens/timens.c @@ -151,6 +151,8 @@ int main(int argc, char *argv[]) time_t offset; int ret = 0; + ksft_print_header(); + nscheck(); check_supported_timers(); diff --git a/tools/testing/selftests/timens/timer.c b/tools/testing/selftests/timens/timer.c index 5b939f59dfa4..79543ceb2c0f 100644 --- a/tools/testing/selftests/timens/timer.c +++ b/tools/testing/selftests/timens/timer.c @@ -15,7 +15,7 @@ #include "log.h" #include "timens.h" -int run_test(int clockid, struct timespec now) +static int run_test(int clockid, struct timespec now) { struct itimerspec new_value; long long elapsed; @@ -75,6 +75,8 @@ int main(int argc, char *argv[]) pid_t pid; struct timespec btime_now, mtime_now; + ksft_print_header(); + nscheck(); check_supported_timers(); diff --git a/tools/testing/selftests/timens/timerfd.c b/tools/testing/selftests/timens/timerfd.c index a4196bbd6e33..402e2e415545 100644 --- a/tools/testing/selftests/timens/timerfd.c +++ b/tools/testing/selftests/timens/timerfd.c @@ -15,14 +15,14 @@ #include "log.h" #include "timens.h" -static int tclock_gettime(clock_t clockid, struct timespec *now) +static int tclock_gettime(clockid_t clockid, struct timespec *now) { if (clockid == CLOCK_BOOTTIME_ALARM) clockid = CLOCK_BOOTTIME; return clock_gettime(clockid, now); } -int run_test(int clockid, struct timespec now) +static int run_test(int clockid, struct timespec now) { struct itimerspec new_value; long long elapsed; @@ -82,6 +82,8 @@ int main(int argc, char *argv[]) pid_t pid; struct timespec btime_now, mtime_now; + ksft_print_header(); + nscheck(); check_supported_timers(); diff --git a/tools/testing/selftests/timens/vfork_exec.c b/tools/testing/selftests/timens/vfork_exec.c index 5b8907bf451d..b957e1a65124 100644 --- a/tools/testing/selftests/timens/vfork_exec.c +++ b/tools/testing/selftests/timens/vfork_exec.c @@ -91,6 +91,8 @@ int main(int argc, char *argv[]) return check("child after exec", &now); } + ksft_print_header(); + nscheck(); ksft_set_plan(4); diff --git a/tools/testing/selftests/tpm2/.gitignore b/tools/testing/selftests/tpm2/.gitignore new file mode 100644 index 000000000000..6d6165c5e35d --- /dev/null +++ b/tools/testing/selftests/tpm2/.gitignore @@ -0,0 +1,3 @@ +# SPDX-License-Identifier: GPL-2.0-only +AsyncTest.log +SpaceTest.log diff --git a/tools/testing/selftests/tpm2/test_smoke.sh b/tools/testing/selftests/tpm2/test_smoke.sh index 168f4b166234..3a60e6c6f5c9 100755 --- a/tools/testing/selftests/tpm2/test_smoke.sh +++ b/tools/testing/selftests/tpm2/test_smoke.sh @@ -6,6 +6,6 @@ ksft_skip=4 [ -e /dev/tpm0 ] || exit $ksft_skip read tpm_version < /sys/class/tpm/tpm0/tpm_version_major -[ "$tpm_version" == 2 ] || exit $ksft_skip +[ "$tpm_version" = 2 ] || exit $ksft_skip python3 -m unittest -v tpm2_tests.SmokeTest 2>&1 diff --git a/tools/testing/selftests/ublk/Makefile b/tools/testing/selftests/ublk/Makefile index c7781efea0f3..4dde8838261d 100644 --- a/tools/testing/selftests/ublk/Makefile +++ b/tools/testing/selftests/ublk/Makefile @@ -1,11 +1,24 @@ # SPDX-License-Identifier: GPL-2.0 -CFLAGS += -O3 -Wl,-no-as-needed -Wall -I $(top_srcdir) +CFLAGS += -O3 -Wl,-no-as-needed -Wall -I $(top_srcdir)/usr/include +ifneq ($(WERROR),0) + CFLAGS += -Werror +endif + LDLIBS += -lpthread -lm -luring TEST_PROGS := test_generic_01.sh TEST_PROGS += test_generic_02.sh TEST_PROGS += test_generic_03.sh +TEST_PROGS += test_generic_04.sh +TEST_PROGS += test_generic_05.sh +TEST_PROGS += test_generic_06.sh +TEST_PROGS += test_generic_07.sh + +TEST_PROGS += test_generic_08.sh +TEST_PROGS += test_generic_09.sh +TEST_PROGS += test_generic_10.sh +TEST_PROGS += test_generic_11.sh TEST_PROGS += test_null_01.sh TEST_PROGS += test_null_02.sh @@ -21,12 +34,16 @@ TEST_PROGS += test_stripe_04.sh TEST_PROGS += test_stress_01.sh TEST_PROGS += test_stress_02.sh +TEST_PROGS += test_stress_03.sh +TEST_PROGS += test_stress_04.sh +TEST_PROGS += test_stress_05.sh TEST_GEN_PROGS_EXTENDED = kublk include ../lib.mk -$(TEST_GEN_PROGS_EXTENDED): kublk.c null.c file_backed.c common.c stripe.c +$(TEST_GEN_PROGS_EXTENDED): kublk.c null.c file_backed.c common.c stripe.c \ + fault_inject.c check: shellcheck -x -f gcc *.sh diff --git a/tools/testing/selftests/ublk/fault_inject.c b/tools/testing/selftests/ublk/fault_inject.c new file mode 100644 index 000000000000..5421774d7867 --- /dev/null +++ b/tools/testing/selftests/ublk/fault_inject.c @@ -0,0 +1,103 @@ +// SPDX-License-Identifier: GPL-2.0 + +/* + * Fault injection ublk target. Hack this up however you like for + * testing specific behaviors of ublk_drv. Currently is a null target + * with a configurable delay before completing each I/O. This delay can + * be used to test ublk_drv's handling of I/O outstanding to the ublk + * server when it dies. + */ + +#include "kublk.h" + +static int ublk_fault_inject_tgt_init(const struct dev_ctx *ctx, + struct ublk_dev *dev) +{ + const struct ublksrv_ctrl_dev_info *info = &dev->dev_info; + unsigned long dev_size = 250UL << 30; + + if (ctx->auto_zc_fallback) { + ublk_err("%s: not support auto_zc_fallback\n", __func__); + return -EINVAL; + } + + dev->tgt.dev_size = dev_size; + dev->tgt.params = (struct ublk_params) { + .types = UBLK_PARAM_TYPE_BASIC, + .basic = { + .logical_bs_shift = 9, + .physical_bs_shift = 12, + .io_opt_shift = 12, + .io_min_shift = 9, + .max_sectors = info->max_io_buf_bytes >> 9, + .dev_sectors = dev_size >> 9, + }, + }; + + dev->private_data = (void *)(unsigned long)(ctx->fault_inject.delay_us * 1000); + return 0; +} + +static int ublk_fault_inject_queue_io(struct ublk_queue *q, int tag) +{ + const struct ublksrv_io_desc *iod = ublk_get_iod(q, tag); + struct io_uring_sqe *sqe; + struct __kernel_timespec ts = { + .tv_nsec = (long long)q->dev->private_data, + }; + + ublk_queue_alloc_sqes(q, &sqe, 1); + io_uring_prep_timeout(sqe, &ts, 1, 0); + sqe->user_data = build_user_data(tag, ublksrv_get_op(iod), 0, 1); + + ublk_queued_tgt_io(q, tag, 1); + + return 0; +} + +static void ublk_fault_inject_tgt_io_done(struct ublk_queue *q, int tag, + const struct io_uring_cqe *cqe) +{ + const struct ublksrv_io_desc *iod = ublk_get_iod(q, tag); + + if (cqe->res != -ETIME) + ublk_err("%s: unexpected cqe res %d\n", __func__, cqe->res); + + if (ublk_completed_tgt_io(q, tag)) + ublk_complete_io(q, tag, iod->nr_sectors << 9); + else + ublk_err("%s: io not complete after 1 cqe\n", __func__); +} + +static void ublk_fault_inject_cmd_line(struct dev_ctx *ctx, int argc, char *argv[]) +{ + static const struct option longopts[] = { + { "delay_us", 1, NULL, 0 }, + { 0, 0, 0, 0 } + }; + int option_idx, opt; + + ctx->fault_inject.delay_us = 0; + while ((opt = getopt_long(argc, argv, "", + longopts, &option_idx)) != -1) { + switch (opt) { + case 0: + if (!strcmp(longopts[option_idx].name, "delay_us")) + ctx->fault_inject.delay_us = strtoll(optarg, NULL, 10); + } + } +} + +static void ublk_fault_inject_usage(const struct ublk_tgt_ops *ops) +{ + printf("\tfault_inject: [--delay_us us (default 0)]\n"); +} + +const struct ublk_tgt_ops fault_inject_tgt_ops = { + .name = "fault_inject", + .init_tgt = ublk_fault_inject_tgt_init, + .queue_io = ublk_fault_inject_queue_io, + .tgt_io_done = ublk_fault_inject_tgt_io_done, + .parse_cmd_line = ublk_fault_inject_cmd_line, + .usage = ublk_fault_inject_usage, +}; diff --git a/tools/testing/selftests/ublk/file_backed.c b/tools/testing/selftests/ublk/file_backed.c index 6f34eabfae97..509842df9bee 100644 --- a/tools/testing/selftests/ublk/file_backed.c +++ b/tools/testing/selftests/ublk/file_backed.c @@ -29,19 +29,23 @@ static int loop_queue_flush_io(struct ublk_queue *q, const struct ublksrv_io_des static int loop_queue_tgt_rw_io(struct ublk_queue *q, const struct ublksrv_io_desc *iod, int tag) { unsigned ublk_op = ublksrv_get_op(iod); - int zc = ublk_queue_use_zc(q); - enum io_uring_op op = ublk_to_uring_op(iod, zc); + unsigned zc = ublk_queue_use_zc(q); + unsigned auto_zc = ublk_queue_use_auto_zc(q); + enum io_uring_op op = ublk_to_uring_op(iod, zc | auto_zc); struct io_uring_sqe *sqe[3]; + void *addr = (zc | auto_zc) ? NULL : (void *)iod->addr; - if (!zc) { + if (!zc || auto_zc) { ublk_queue_alloc_sqes(q, sqe, 1); if (!sqe[0]) return -ENOMEM; io_uring_prep_rw(op, sqe[0], 1 /*fds[1]*/, - (void *)iod->addr, + addr, iod->nr_sectors << 9, iod->start_sector << 9); + if (auto_zc) + sqe[0]->buf_index = tag; io_uring_sqe_set_flags(sqe[0], IOSQE_FIXED_FILE); /* bit63 marks us as tgt io */ sqe[0]->user_data = build_user_data(tag, ublk_op, 0, 1); @@ -145,6 +149,11 @@ static int ublk_loop_tgt_init(const struct dev_ctx *ctx, struct ublk_dev *dev) }, }; + if (ctx->auto_zc_fallback) { + ublk_err("%s: not support auto_zc_fallback\n", __func__); + return -EINVAL; + } + ret = backing_file_tgt_init(dev); if (ret) return ret; diff --git a/tools/testing/selftests/ublk/kublk.c b/tools/testing/selftests/ublk/kublk.c index 91c282bc7674..b5131a000795 100644 --- a/tools/testing/selftests/ublk/kublk.c +++ b/tools/testing/selftests/ublk/kublk.c @@ -5,22 +5,24 @@ #include "kublk.h" +#define MAX_NR_TGT_ARG 64 + unsigned int ublk_dbg_mask = UBLK_LOG; static const struct ublk_tgt_ops *tgt_ops_list[] = { &null_tgt_ops, &loop_tgt_ops, &stripe_tgt_ops, + &fault_inject_tgt_ops, }; static const struct ublk_tgt_ops *ublk_find_tgt(const char *name) { - const struct ublk_tgt_ops *ops; int i; if (name == NULL) return NULL; - for (i = 0; sizeof(tgt_ops_list) / sizeof(ops); i++) + for (i = 0; i < ARRAY_SIZE(tgt_ops_list); i++) if (strcmp(tgt_ops_list[i]->name, name) == 0) return tgt_ops_list[i]; return NULL; @@ -118,6 +120,27 @@ static int ublk_ctrl_start_dev(struct ublk_dev *dev, return __ublk_ctrl_cmd(dev, &data); } +static int ublk_ctrl_start_user_recovery(struct ublk_dev *dev) +{ + struct ublk_ctrl_cmd_data data = { + .cmd_op = UBLK_U_CMD_START_USER_RECOVERY, + }; + + return __ublk_ctrl_cmd(dev, &data); +} + +static int ublk_ctrl_end_user_recovery(struct ublk_dev *dev, int daemon_pid) +{ + struct ublk_ctrl_cmd_data data = { + .cmd_op = UBLK_U_CMD_END_USER_RECOVERY, + .flags = CTRL_CMD_HAS_DATA, + }; + + dev->dev_info.ublksrv_pid = data.data[0] = daemon_pid; + + return __ublk_ctrl_cmd(dev, &data); +} + static int ublk_ctrl_add_dev(struct ublk_dev *dev) { struct ublk_ctrl_cmd_data data = { @@ -193,6 +216,30 @@ static int ublk_ctrl_get_features(struct ublk_dev *dev, return __ublk_ctrl_cmd(dev, &data); } +static int ublk_ctrl_update_size(struct ublk_dev *dev, + __u64 nr_sects) +{ + struct ublk_ctrl_cmd_data data = { + .cmd_op = UBLK_U_CMD_UPDATE_SIZE, + .flags = CTRL_CMD_HAS_DATA, + }; + + data.data[0] = nr_sects; + return __ublk_ctrl_cmd(dev, &data); +} + +static int ublk_ctrl_quiesce_dev(struct ublk_dev *dev, + unsigned int timeout_ms) +{ + struct ublk_ctrl_cmd_data data = { + .cmd_op = UBLK_U_CMD_QUIESCE_DEV, + .flags = CTRL_CMD_HAS_DATA, + }; + + data.data[0] = timeout_ms; + return __ublk_ctrl_cmd(dev, &data); +} + static const char *ublk_dev_state_desc(struct ublk_dev *dev) { switch (dev->dev_info.state) { @@ -207,10 +254,73 @@ static const char *ublk_dev_state_desc(struct ublk_dev *dev) }; } +static void ublk_print_cpu_set(const cpu_set_t *set, char *buf, unsigned len) +{ + unsigned done = 0; + int i; + + for (i = 0; i < CPU_SETSIZE; i++) { + if (CPU_ISSET(i, set)) + done += snprintf(&buf[done], len - done, "%d ", i); + } +} + +static void ublk_adjust_affinity(cpu_set_t *set) +{ + int j, updated = 0; + + /* + * Just keep the 1st CPU now. + * + * In future, auto affinity selection can be tried. + */ + for (j = 0; j < CPU_SETSIZE; j++) { + if (CPU_ISSET(j, set)) { + if (!updated) { + updated = 1; + continue; + } + CPU_CLR(j, set); + } + } +} + +/* Caller must free the allocated buffer */ +static int ublk_ctrl_get_affinity(struct ublk_dev *ctrl_dev, cpu_set_t **ptr_buf) +{ + struct ublk_ctrl_cmd_data data = { + .cmd_op = UBLK_U_CMD_GET_QUEUE_AFFINITY, + .flags = CTRL_CMD_HAS_DATA | CTRL_CMD_HAS_BUF, + }; + cpu_set_t *buf; + int i, ret; + + buf = malloc(sizeof(cpu_set_t) * ctrl_dev->dev_info.nr_hw_queues); + if (!buf) + return -ENOMEM; + + for (i = 0; i < ctrl_dev->dev_info.nr_hw_queues; i++) { + data.data[0] = i; + data.len = sizeof(cpu_set_t); + data.addr = (__u64)&buf[i]; + + ret = __ublk_ctrl_cmd(ctrl_dev, &data); + if (ret < 0) { + free(buf); + return ret; + } + ublk_adjust_affinity(&buf[i]); + } + + *ptr_buf = buf; + return 0; +} + static void ublk_ctrl_dump(struct ublk_dev *dev) { struct ublksrv_ctrl_dev_info *info = &dev->dev_info; struct ublk_params p; + cpu_set_t *affinity; int ret; ret = ublk_ctrl_get_params(dev, &p); @@ -219,12 +329,31 @@ static void ublk_ctrl_dump(struct ublk_dev *dev) return; } + ret = ublk_ctrl_get_affinity(dev, &affinity); + if (ret < 0) { + ublk_err("failed to get affinity %m\n"); + return; + } + ublk_log("dev id %d: nr_hw_queues %d queue_depth %d block size %d dev_capacity %lld\n", info->dev_id, info->nr_hw_queues, info->queue_depth, 1 << p.basic.logical_bs_shift, p.basic.dev_sectors); ublk_log("\tmax rq size %d daemon pid %d flags 0x%llx state %s\n", info->max_io_buf_bytes, info->ublksrv_pid, info->flags, ublk_dev_state_desc(dev)); + + if (affinity) { + char buf[512]; + int i; + + for (i = 0; i < info->nr_hw_queues; i++) { + ublk_print_cpu_set(&affinity[i], buf, sizeof(buf)); + printf("\tqueue %u: tid %d affinity(%s)\n", + i, dev->q[i].tid, buf); + } + free(affinity); + } + fflush(stdout); } @@ -300,7 +429,7 @@ static void ublk_queue_deinit(struct ublk_queue *q) free(q->ios[i].buf_addr); } -static int ublk_queue_init(struct ublk_queue *q) +static int ublk_queue_init(struct ublk_queue *q, unsigned extra_flags) { struct ublk_dev *dev = q->dev; int depth = dev->dev_info.queue_depth; @@ -315,10 +444,14 @@ static int ublk_queue_init(struct ublk_queue *q) q->cmd_inflight = 0; q->tid = gettid(); - if (dev->dev_info.flags & UBLK_F_SUPPORT_ZERO_COPY) { + if (dev->dev_info.flags & (UBLK_F_SUPPORT_ZERO_COPY | UBLK_F_AUTO_BUF_REG)) { q->state |= UBLKSRV_NO_BUF; - q->state |= UBLKSRV_ZC; + if (dev->dev_info.flags & UBLK_F_SUPPORT_ZERO_COPY) + q->state |= UBLKSRV_ZC; + if (dev->dev_info.flags & UBLK_F_AUTO_BUF_REG) + q->state |= UBLKSRV_AUTO_BUF_REG; } + q->state |= extra_flags; cmd_buf_size = ublk_queue_cmd_buf_sz(q); off = UBLKSRV_CMD_BUF_OFFSET + q->q_id * ublk_queue_max_cmd_buf_sz(); @@ -347,14 +480,16 @@ static int ublk_queue_init(struct ublk_queue *q) } ret = ublk_setup_ring(&q->ring, ring_depth, cq_depth, - IORING_SETUP_COOP_TASKRUN); + IORING_SETUP_COOP_TASKRUN | + IORING_SETUP_SINGLE_ISSUER | + IORING_SETUP_DEFER_TASKRUN); if (ret < 0) { ublk_err("ublk dev %d queue %d setup io_uring failed %d\n", q->dev->dev_info.dev_id, q->q_id, ret); goto fail; } - if (dev->dev_info.flags & UBLK_F_SUPPORT_ZERO_COPY) { + if (dev->dev_info.flags & (UBLK_F_SUPPORT_ZERO_COPY | UBLK_F_AUTO_BUF_REG)) { ret = io_uring_register_buffers_sparse(&q->ring, q->q_depth); if (ret) { ublk_err("ublk dev %d queue %d register spare buffers failed %d", @@ -418,6 +553,23 @@ static void ublk_dev_unprep(struct ublk_dev *dev) close(dev->fds[0]); } +static void ublk_set_auto_buf_reg(const struct ublk_queue *q, + struct io_uring_sqe *sqe, + unsigned short tag) +{ + struct ublk_auto_buf_reg buf = {}; + + if (q->tgt_ops->buf_index) + buf.index = q->tgt_ops->buf_index(q, tag); + else + buf.index = tag; + + if (q->state & UBLKSRV_AUTO_BUF_REG_FALLBACK) + buf.flags = UBLK_AUTO_BUF_REG_FALLBACK; + + sqe->addr = ublk_auto_buf_reg_to_sqe_addr(&buf); +} + int ublk_queue_io_cmd(struct ublk_queue *q, struct ublk_io *io, unsigned tag) { struct ublksrv_io_cmd *cmd; @@ -429,12 +581,17 @@ int ublk_queue_io_cmd(struct ublk_queue *q, struct ublk_io *io, unsigned tag) if (!(io->flags & UBLKSRV_IO_FREE)) return 0; - /* we issue because we need either fetching or committing */ + /* + * we issue because we need either fetching or committing or + * getting data + */ if (!(io->flags & - (UBLKSRV_NEED_FETCH_RQ | UBLKSRV_NEED_COMMIT_RQ_COMP))) + (UBLKSRV_NEED_FETCH_RQ | UBLKSRV_NEED_COMMIT_RQ_COMP | UBLKSRV_NEED_GET_DATA))) return 0; - if (io->flags & UBLKSRV_NEED_COMMIT_RQ_COMP) + if (io->flags & UBLKSRV_NEED_GET_DATA) + cmd_op = UBLK_U_IO_NEED_GET_DATA; + else if (io->flags & UBLKSRV_NEED_COMMIT_RQ_COMP) cmd_op = UBLK_U_IO_COMMIT_AND_FETCH_REQ; else if (io->flags & UBLKSRV_NEED_FETCH_RQ) cmd_op = UBLK_U_IO_FETCH_REQ; @@ -467,6 +624,9 @@ int ublk_queue_io_cmd(struct ublk_queue *q, struct ublk_io *io, unsigned tag) else cmd->addr = 0; + if (q->state & UBLKSRV_AUTO_BUF_REG) + ublk_set_auto_buf_reg(q, sqe[0], tag); + user_data = build_user_data(tag, _IOC_NR(cmd_op), 0, 0); io_uring_sqe_set_data64(sqe[0], user_data); @@ -551,6 +711,9 @@ static void ublk_handle_cqe(struct io_uring *r, assert(tag < q->q_depth); if (q->tgt_ops->queue_io) q->tgt_ops->queue_io(q, tag); + } else if (cqe->res == UBLK_IO_RES_NEED_GET_DATA) { + io->flags |= UBLKSRV_NEED_GET_DATA | UBLKSRV_IO_FREE; + ublk_queue_io_cmd(q, io, tag); } else { /* * COMMIT_REQ will be completed immediately since no fetching @@ -602,18 +765,42 @@ static int ublk_process_io(struct ublk_queue *q) return reapped; } +static void ublk_queue_set_sched_affinity(const struct ublk_queue *q, + cpu_set_t *cpuset) +{ + if (sched_setaffinity(0, sizeof(*cpuset), cpuset) < 0) + ublk_err("ublk dev %u queue %u set affinity failed", + q->dev->dev_info.dev_id, q->q_id); +} + +struct ublk_queue_info { + struct ublk_queue *q; + sem_t *queue_sem; + cpu_set_t *affinity; + unsigned char auto_zc_fallback; +}; + static void *ublk_io_handler_fn(void *data) { - struct ublk_queue *q = data; + struct ublk_queue_info *info = data; + struct ublk_queue *q = info->q; int dev_id = q->dev->dev_info.dev_id; + unsigned extra_flags = 0; int ret; - ret = ublk_queue_init(q); + if (info->auto_zc_fallback) + extra_flags = UBLKSRV_AUTO_BUF_REG_FALLBACK; + + ret = ublk_queue_init(q, extra_flags); if (ret) { ublk_err("ublk dev %d queue %d init queue failed\n", dev_id, q->q_id); return NULL; } + /* IO perf is sensitive with queue pthread affinity on NUMA machine*/ + ublk_queue_set_sched_affinity(q, info->affinity); + sem_post(info->queue_sem); + ublk_dbg(UBLK_DBG_QUEUE, "tid %d: ublk dev %d queue %d started\n", q->tid, dev_id, q->q_id); @@ -639,7 +826,7 @@ static void ublk_set_parameters(struct ublk_dev *dev) dev->dev_info.dev_id, ret); } -static int ublk_send_dev_event(const struct dev_ctx *ctx, int dev_id) +static int ublk_send_dev_event(const struct dev_ctx *ctx, struct ublk_dev *dev, int dev_id) { uint64_t id; int evtfd = ctx->_evtfd; @@ -652,36 +839,69 @@ static int ublk_send_dev_event(const struct dev_ctx *ctx, int dev_id) else id = ERROR_EVTFD_DEVID; + if (dev && ctx->shadow_dev) + memcpy(&ctx->shadow_dev->q, &dev->q, sizeof(dev->q)); + if (write(evtfd, &id, sizeof(id)) != sizeof(id)) return -EINVAL; + close(evtfd); + shmdt(ctx->shadow_dev); + return 0; } static int ublk_start_daemon(const struct dev_ctx *ctx, struct ublk_dev *dev) { - int ret, i; - void *thread_ret; const struct ublksrv_ctrl_dev_info *dinfo = &dev->dev_info; + struct ublk_queue_info *qinfo; + cpu_set_t *affinity_buf; + void *thread_ret; + sem_t queue_sem; + int ret, i; ublk_dbg(UBLK_DBG_DEV, "%s enter\n", __func__); + qinfo = (struct ublk_queue_info *)calloc(sizeof(struct ublk_queue_info), + dinfo->nr_hw_queues); + if (!qinfo) + return -ENOMEM; + + sem_init(&queue_sem, 0, 0); ret = ublk_dev_prep(ctx, dev); if (ret) return ret; + ret = ublk_ctrl_get_affinity(dev, &affinity_buf); + if (ret) + return ret; + for (i = 0; i < dinfo->nr_hw_queues; i++) { dev->q[i].dev = dev; dev->q[i].q_id = i; + + qinfo[i].q = &dev->q[i]; + qinfo[i].queue_sem = &queue_sem; + qinfo[i].affinity = &affinity_buf[i]; + qinfo[i].auto_zc_fallback = ctx->auto_zc_fallback; pthread_create(&dev->q[i].thread, NULL, ublk_io_handler_fn, - &dev->q[i]); + &qinfo[i]); } + for (i = 0; i < dinfo->nr_hw_queues; i++) + sem_wait(&queue_sem); + free(qinfo); + free(affinity_buf); + /* everything is fine now, start us */ - ublk_set_parameters(dev); - ret = ublk_ctrl_start_dev(dev, getpid()); + if (ctx->recovery) + ret = ublk_ctrl_end_user_recovery(dev, getpid()); + else { + ublk_set_parameters(dev); + ret = ublk_ctrl_start_dev(dev, getpid()); + } if (ret < 0) { ublk_err("%s: ublk_ctrl_start_dev failed: %d\n", __func__, ret); goto fail; @@ -691,7 +911,7 @@ static int ublk_start_daemon(const struct dev_ctx *ctx, struct ublk_dev *dev) if (ctx->fg) ublk_ctrl_dump(dev); else - ublk_send_dev_event(ctx, dev->dev_info.dev_id); + ublk_send_dev_event(ctx, dev, dev->dev_info.dev_id); /* wait until we are terminated */ for (i = 0; i < dinfo->nr_hw_queues; i++) @@ -845,6 +1065,9 @@ static int __cmd_dev_add(const struct dev_ctx *ctx) info->nr_hw_queues = nr_queues; info->queue_depth = depth; info->flags = ctx->flags; + if ((features & UBLK_F_QUIESCE) && + (info->flags & UBLK_F_USER_RECOVERY)) + info->flags |= UBLK_F_QUIESCE; dev->tgt.ops = ops; dev->tgt.sq_depth = depth; dev->tgt.cq_depth = depth; @@ -856,7 +1079,10 @@ static int __cmd_dev_add(const struct dev_ctx *ctx) } } - ret = ublk_ctrl_add_dev(dev); + if (ctx->recovery) + ret = ublk_ctrl_start_user_recovery(dev); + else + ret = ublk_ctrl_add_dev(dev); if (ret < 0) { ublk_err("%s: can't add dev id %d, type %s ret %d\n", __func__, dev_id, tgt_type, ret); @@ -870,7 +1096,7 @@ static int __cmd_dev_add(const struct dev_ctx *ctx) fail: if (ret < 0) - ublk_send_dev_event(ctx, -1); + ublk_send_dev_event(ctx, dev, -1); ublk_ctrl_deinit(dev); return ret; } @@ -884,30 +1110,58 @@ static int cmd_dev_add(struct dev_ctx *ctx) if (ctx->fg) goto run; + ctx->_shmid = shmget(IPC_PRIVATE, sizeof(struct ublk_dev), IPC_CREAT | 0666); + if (ctx->_shmid < 0) { + ublk_err("%s: failed to shmget %s\n", __func__, strerror(errno)); + exit(-1); + } + ctx->shadow_dev = (struct ublk_dev *)shmat(ctx->_shmid, NULL, 0); + if (ctx->shadow_dev == (struct ublk_dev *)-1) { + ublk_err("%s: failed to shmat %s\n", __func__, strerror(errno)); + exit(-1); + } ctx->_evtfd = eventfd(0, 0); if (ctx->_evtfd < 0) { ublk_err("%s: failed to create eventfd %s\n", __func__, strerror(errno)); exit(-1); } - setsid(); res = fork(); if (res == 0) { + int res2; + + setsid(); + res2 = fork(); + if (res2 == 0) { + /* prepare for detaching */ + close(STDIN_FILENO); + close(STDOUT_FILENO); + close(STDERR_FILENO); run: - res = __cmd_dev_add(ctx); - return res; + res = __cmd_dev_add(ctx); + return res; + } else { + /* detached from the foreground task */ + exit(EXIT_SUCCESS); + } } else if (res > 0) { uint64_t id; + int exit_code = EXIT_FAILURE; res = read(ctx->_evtfd, &id, sizeof(id)); close(ctx->_evtfd); if (res == sizeof(id) && id != ERROR_EVTFD_DEVID) { ctx->dev_id = id - 1; - return __cmd_dev_list(ctx); + if (__cmd_dev_list(ctx) >= 0) + exit_code = EXIT_SUCCESS; } - exit(EXIT_FAILURE); + shmdt(ctx->shadow_dev); + shmctl(ctx->_shmid, IPC_RMID, NULL); + /* wait for child and detach from it */ + wait(NULL); + exit(exit_code); } else { - return res; + exit(EXIT_FAILURE); } } @@ -969,6 +1223,9 @@ static int __cmd_dev_list(struct dev_ctx *ctx) ublk_err("%s: can't get dev info from %d: %d\n", __func__, ctx->dev_id, ret); } else { + if (ctx->shadow_dev) + memcpy(&dev->q, ctx->shadow_dev->q, sizeof(dev->q)); + ublk_ctrl_dump(dev); } @@ -1006,6 +1263,9 @@ static int cmd_dev_get_features(void) [const_ilog2(UBLK_F_USER_COPY)] = "USER_COPY", [const_ilog2(UBLK_F_ZONED)] = "ZONED", [const_ilog2(UBLK_F_USER_RECOVERY_FAIL_IO)] = "RECOVERY_FAIL_IO", + [const_ilog2(UBLK_F_UPDATE_SIZE)] = "UPDATE_SIZE", + [const_ilog2(UBLK_F_AUTO_BUF_REG)] = "AUTO_BUF_REG", + [const_ilog2(UBLK_F_QUIESCE)] = "QUIESCE", }; struct ublk_dev *dev; __u64 features = 0; @@ -1039,15 +1299,103 @@ static int cmd_dev_get_features(void) return ret; } +static int cmd_dev_update_size(struct dev_ctx *ctx) +{ + struct ublk_dev *dev = ublk_ctrl_init(); + struct ublk_params p; + int ret = -EINVAL; + + if (!dev) + return -ENODEV; + + if (ctx->dev_id < 0) { + fprintf(stderr, "device id isn't provided\n"); + goto out; + } + + dev->dev_info.dev_id = ctx->dev_id; + ret = ublk_ctrl_get_params(dev, &p); + if (ret < 0) { + ublk_err("failed to get params %d %s\n", ret, strerror(-ret)); + goto out; + } + + if (ctx->size & ((1 << p.basic.logical_bs_shift) - 1)) { + ublk_err("size isn't aligned with logical block size\n"); + ret = -EINVAL; + goto out; + } + + ret = ublk_ctrl_update_size(dev, ctx->size >> 9); +out: + ublk_ctrl_deinit(dev); + return ret; +} + +static int cmd_dev_quiesce(struct dev_ctx *ctx) +{ + struct ublk_dev *dev = ublk_ctrl_init(); + int ret = -EINVAL; + + if (!dev) + return -ENODEV; + + if (ctx->dev_id < 0) { + fprintf(stderr, "device id isn't provided for quiesce\n"); + goto out; + } + dev->dev_info.dev_id = ctx->dev_id; + ret = ublk_ctrl_quiesce_dev(dev, 10000); + +out: + ublk_ctrl_deinit(dev); + return ret; +} + +static void __cmd_create_help(char *exe, bool recovery) +{ + int i; + + printf("%s %s -t [null|loop|stripe|fault_inject] [-q nr_queues] [-d depth] [-n dev_id]\n", + exe, recovery ? "recover" : "add"); + printf("\t[--foreground] [--quiet] [-z] [--auto_zc] [--auto_zc_fallback] [--debug_mask mask] [-r 0|1 ] [-g]\n"); + printf("\t[-e 0|1 ] [-i 0|1]\n"); + printf("\t[target options] [backfile1] [backfile2] ...\n"); + printf("\tdefault: nr_queues=2(max 32), depth=128(max 1024), dev_id=-1(auto allocation)\n"); + + for (i = 0; i < sizeof(tgt_ops_list) / sizeof(tgt_ops_list[0]); i++) { + const struct ublk_tgt_ops *ops = tgt_ops_list[i]; + + if (ops->usage) + ops->usage(ops); + } +} + +static void cmd_add_help(char *exe) +{ + __cmd_create_help(exe, false); + printf("\n"); +} + +static void cmd_recover_help(char *exe) +{ + __cmd_create_help(exe, true); + printf("\tPlease provide exact command line for creating this device with real dev_id\n"); + printf("\n"); +} + static int cmd_dev_help(char *exe) { - printf("%s add -t [null|loop] [-q nr_queues] [-d depth] [-n dev_id] [backfile1] [backfile2] ...\n", exe); - printf("\t default: nr_queues=2(max 4), depth=128(max 128), dev_id=-1(auto allocation)\n"); + cmd_add_help(exe); + cmd_recover_help(exe); + printf("%s del [-n dev_id] -a \n", exe); - printf("\t -a delete all devices -n delete specified device\n"); + printf("\t -a delete all devices -n delete specified device\n\n"); printf("%s list [-n dev_id] -a \n", exe); - printf("\t -a list all devices, -n list specified device, default -a \n"); + printf("\t -a list all devices, -n list specified device, default -a \n\n"); printf("%s features\n", exe); + printf("%s update_size -n dev_id -s|--size size_in_bytes \n", exe); + printf("%s quiesce -n dev_id\n", exe); return 0; } @@ -1063,9 +1411,16 @@ int main(int argc, char *argv[]) { "quiet", 0, NULL, 0 }, { "zero_copy", 0, NULL, 'z' }, { "foreground", 0, NULL, 0 }, - { "chunk_size", 1, NULL, 0 }, + { "recovery", 1, NULL, 'r' }, + { "recovery_fail_io", 1, NULL, 'e'}, + { "recovery_reissue", 1, NULL, 'i'}, + { "get_data", 1, NULL, 'g'}, + { "auto_zc", 0, NULL, 0 }, + { "auto_zc_fallback", 0, NULL, 0 }, + { "size", 1, NULL, 's'}, { 0, 0, 0, 0 } }; + const struct ublk_tgt_ops *ops = NULL; int option_idx, opt; const char *cmd = argv[1]; struct dev_ctx ctx = { @@ -1073,15 +1428,18 @@ int main(int argc, char *argv[]) .nr_hw_queues = 2, .dev_id = -1, .tgt_type = "unknown", - .chunk_size = 65536, /* def chunk size is 64K */ }; int ret = -EINVAL, i; + int tgt_argc = 1; + char *tgt_argv[MAX_NR_TGT_ARG] = { NULL }; + int value; if (argc == 1) return ret; + opterr = 0; optind = 2; - while ((opt = getopt_long(argc, argv, "t:n:d:q:az", + while ((opt = getopt_long(argc, argv, "t:n:d:q:r:e:i:s:gaz", longopts, &option_idx)) != -1) { switch (opt) { case 'a': @@ -1103,6 +1461,27 @@ int main(int argc, char *argv[]) case 'z': ctx.flags |= UBLK_F_SUPPORT_ZERO_COPY | UBLK_F_USER_COPY; break; + case 'r': + value = strtol(optarg, NULL, 10); + if (value) + ctx.flags |= UBLK_F_USER_RECOVERY; + break; + case 'e': + value = strtol(optarg, NULL, 10); + if (value) + ctx.flags |= UBLK_F_USER_RECOVERY | UBLK_F_USER_RECOVERY_FAIL_IO; + break; + case 'i': + value = strtol(optarg, NULL, 10); + if (value) + ctx.flags |= UBLK_F_USER_RECOVERY | UBLK_F_USER_RECOVERY_REISSUE; + break; + case 'g': + ctx.flags |= UBLK_F_NEED_GET_DATA; + break; + case 's': + ctx.size = strtoull(optarg, NULL, 10); + break; case 0: if (!strcmp(longopts[option_idx].name, "debug_mask")) ublk_dbg_mask = strtol(optarg, NULL, 16); @@ -1110,19 +1489,67 @@ int main(int argc, char *argv[]) ublk_dbg_mask = 0; if (!strcmp(longopts[option_idx].name, "foreground")) ctx.fg = 1; - if (!strcmp(longopts[option_idx].name, "chunk_size")) - ctx.chunk_size = strtol(optarg, NULL, 10); + if (!strcmp(longopts[option_idx].name, "auto_zc")) + ctx.flags |= UBLK_F_AUTO_BUF_REG; + if (!strcmp(longopts[option_idx].name, "auto_zc_fallback")) + ctx.auto_zc_fallback = 1; + break; + case '?': + /* + * target requires every option must have argument + */ + if (argv[optind][0] == '-' || argv[optind - 1][0] != '-') { + fprintf(stderr, "every target option requires argument: %s %s\n", + argv[optind - 1], argv[optind]); + exit(EXIT_FAILURE); + } + + if (tgt_argc < (MAX_NR_TGT_ARG - 1) / 2) { + tgt_argv[tgt_argc++] = argv[optind - 1]; + tgt_argv[tgt_argc++] = argv[optind]; + } else { + fprintf(stderr, "too many target options\n"); + exit(EXIT_FAILURE); + } + optind += 1; + break; } } + /* auto_zc_fallback depends on F_AUTO_BUF_REG & F_SUPPORT_ZERO_COPY */ + if (ctx.auto_zc_fallback && + !((ctx.flags & UBLK_F_AUTO_BUF_REG) && + (ctx.flags & UBLK_F_SUPPORT_ZERO_COPY))) { + ublk_err("%s: auto_zc_fallback is set but neither " + "F_AUTO_BUF_REG nor F_SUPPORT_ZERO_COPY is enabled\n", + __func__); + return -EINVAL; + } + i = optind; while (i < argc && ctx.nr_files < MAX_BACK_FILES) { ctx.files[ctx.nr_files++] = argv[i++]; } + ops = ublk_find_tgt(ctx.tgt_type); + if (ops && ops->parse_cmd_line) { + optind = 0; + + tgt_argv[0] = ctx.tgt_type; + ops->parse_cmd_line(&ctx, tgt_argc, tgt_argv); + } + if (!strcmp(cmd, "add")) ret = cmd_dev_add(&ctx); - else if (!strcmp(cmd, "del")) + else if (!strcmp(cmd, "recover")) { + if (ctx.dev_id < 0) { + fprintf(stderr, "device id isn't provided for recovering\n"); + ret = -EINVAL; + } else { + ctx.recovery = 1; + ret = cmd_dev_add(&ctx); + } + } else if (!strcmp(cmd, "del")) ret = cmd_dev_del(&ctx); else if (!strcmp(cmd, "list")) { ctx.all = 1; @@ -1131,6 +1558,10 @@ int main(int argc, char *argv[]) ret = cmd_dev_help(argv[0]); else if (!strcmp(cmd, "features")) ret = cmd_dev_get_features(); + else if (!strcmp(cmd, "update_size")) + ret = cmd_dev_update_size(&ctx); + else if (!strcmp(cmd, "quiesce")) + ret = cmd_dev_quiesce(&ctx); else cmd_dev_help(argv[0]); diff --git a/tools/testing/selftests/ublk/kublk.h b/tools/testing/selftests/ublk/kublk.h index 760ff8ffb810..e34508bf5798 100644 --- a/tools/testing/selftests/ublk/kublk.h +++ b/tools/testing/selftests/ublk/kublk.h @@ -19,10 +19,15 @@ #include <sys/inotify.h> #include <sys/wait.h> #include <sys/eventfd.h> -#include <sys/uio.h> +#include <sys/ipc.h> +#include <sys/shm.h> +#include <linux/io_uring.h> #include <liburing.h> -#include <linux/ublk_cmd.h> +#include <semaphore.h> + +/* allow ublk_dep.h to override ublk_cmd.h */ #include "ublk_dep.h" +#include <linux/ublk_cmd.h> #define __maybe_unused __attribute__((unused)) #define MAX_BACK_FILES 4 @@ -30,6 +35,8 @@ #define min(a, b) ((a) < (b) ? (a) : (b)) #endif +#define ARRAY_SIZE(x) (sizeof(x) / sizeof(x[0])) + /****************** part 1: libublk ********************/ #define CTRL_DEV "/dev/ublk-control" @@ -42,8 +49,8 @@ #define UBLKSRV_IO_IDLE_SECS 20 #define UBLK_IO_MAX_BYTES (1 << 20) -#define UBLK_MAX_QUEUES 4 -#define UBLK_QUEUE_DEPTH 128 +#define UBLK_MAX_QUEUES 32 +#define UBLK_QUEUE_DEPTH 1024 #define UBLK_DBG_DEV (1U << 0) #define UBLK_DBG_QUEUE (1U << 1) @@ -55,6 +62,16 @@ struct ublk_dev; struct ublk_queue; +struct stripe_ctx { + /* stripe */ + unsigned int chunk_size; +}; + +struct fault_inject_ctx { + /* fault_inject */ + unsigned long delay_us; +}; + struct dev_ctx { char tgt_type[16]; unsigned long flags; @@ -66,11 +83,22 @@ struct dev_ctx { unsigned int logging:1; unsigned int all:1; unsigned int fg:1; - - /* stripe */ - unsigned int chunk_size; + unsigned int recovery:1; + unsigned int auto_zc_fallback:1; int _evtfd; + int _shmid; + + /* built from shmem, only for ublk_dump_dev() */ + struct ublk_dev *shadow_dev; + + /* for 'update_size' command */ + unsigned long long size; + + union { + struct stripe_ctx stripe; + struct fault_inject_ctx fault_inject; + }; }; struct ublk_ctrl_cmd_data { @@ -90,6 +118,8 @@ struct ublk_io { #define UBLKSRV_NEED_FETCH_RQ (1UL << 0) #define UBLKSRV_NEED_COMMIT_RQ_COMP (1UL << 1) #define UBLKSRV_IO_FREE (1UL << 2) +#define UBLKSRV_NEED_GET_DATA (1UL << 3) +#define UBLKSRV_NEED_REG_BUF (1UL << 4) unsigned short flags; unsigned short refs; /* used by target code only */ @@ -107,6 +137,17 @@ struct ublk_tgt_ops { int (*queue_io)(struct ublk_queue *, int tag); void (*tgt_io_done)(struct ublk_queue *, int tag, const struct io_uring_cqe *); + + /* + * Target specific command line handling + * + * each option requires argument for target command line + */ + void (*parse_cmd_line)(struct dev_ctx *ctx, int argc, char *argv[]); + void (*usage)(const struct ublk_tgt_ops *ops); + + /* return buffer index for UBLK_F_AUTO_BUF_REG */ + unsigned short (*buf_index)(const struct ublk_queue *, int tag); }; struct ublk_tgt { @@ -135,6 +176,8 @@ struct ublk_queue { #define UBLKSRV_QUEUE_IDLE (1U << 1) #define UBLKSRV_NO_BUF (1U << 2) #define UBLKSRV_ZC (1U << 3) +#define UBLKSRV_AUTO_BUF_REG (1U << 4) +#define UBLKSRV_AUTO_BUF_REG_FALLBACK (1U << 5) unsigned state; pid_t tid; pthread_t thread; @@ -170,6 +213,12 @@ struct ublk_dev { extern unsigned int ublk_dbg_mask; extern int ublk_queue_io_cmd(struct ublk_queue *q, struct ublk_io *io, unsigned tag); + +static inline int ublk_io_auto_zc_fallback(const struct ublksrv_io_desc *iod) +{ + return !!(iod->op_flags & UBLK_IO_F_NEED_REG_BUF); +} + static inline int is_target_io(__u64 user_data) { return (user_data & (1ULL << 63)) != 0; @@ -354,9 +403,15 @@ static inline int ublk_queue_use_zc(const struct ublk_queue *q) return q->state & UBLKSRV_ZC; } +static inline int ublk_queue_use_auto_zc(const struct ublk_queue *q) +{ + return q->state & UBLKSRV_AUTO_BUF_REG; +} + extern const struct ublk_tgt_ops null_tgt_ops; extern const struct ublk_tgt_ops loop_tgt_ops; extern const struct ublk_tgt_ops stripe_tgt_ops; +extern const struct ublk_tgt_ops fault_inject_tgt_ops; void backing_file_tgt_deinit(struct ublk_dev *dev); int backing_file_tgt_init(struct ublk_dev *dev); diff --git a/tools/testing/selftests/ublk/null.c b/tools/testing/selftests/ublk/null.c index 91fec3690d4b..44aca31cf2b0 100644 --- a/tools/testing/selftests/ublk/null.c +++ b/tools/testing/selftests/ublk/null.c @@ -42,10 +42,22 @@ static int ublk_null_tgt_init(const struct dev_ctx *ctx, struct ublk_dev *dev) return 0; } +static void __setup_nop_io(int tag, const struct ublksrv_io_desc *iod, + struct io_uring_sqe *sqe) +{ + unsigned ublk_op = ublksrv_get_op(iod); + + io_uring_prep_nop(sqe); + sqe->buf_index = tag; + sqe->flags |= IOSQE_FIXED_FILE; + sqe->rw_flags = IORING_NOP_FIXED_BUFFER | IORING_NOP_INJECT_RESULT; + sqe->len = iod->nr_sectors << 9; /* injected result */ + sqe->user_data = build_user_data(tag, ublk_op, 0, 1); +} + static int null_queue_zc_io(struct ublk_queue *q, int tag) { const struct ublksrv_io_desc *iod = ublk_get_iod(q, tag); - unsigned ublk_op = ublksrv_get_op(iod); struct io_uring_sqe *sqe[3]; ublk_queue_alloc_sqes(q, sqe, 3); @@ -55,12 +67,8 @@ static int null_queue_zc_io(struct ublk_queue *q, int tag) ublk_cmd_op_nr(sqe[0]->cmd_op), 0, 1); sqe[0]->flags |= IOSQE_CQE_SKIP_SUCCESS | IOSQE_IO_HARDLINK; - io_uring_prep_nop(sqe[1]); - sqe[1]->buf_index = tag; - sqe[1]->flags |= IOSQE_FIXED_FILE | IOSQE_IO_HARDLINK; - sqe[1]->rw_flags = IORING_NOP_FIXED_BUFFER | IORING_NOP_INJECT_RESULT; - sqe[1]->len = iod->nr_sectors << 9; /* injected result */ - sqe[1]->user_data = build_user_data(tag, ublk_op, 0, 1); + __setup_nop_io(tag, iod, sqe[1]); + sqe[1]->flags |= IOSQE_IO_HARDLINK; io_uring_prep_buf_unregister(sqe[2], 0, tag, q->q_id, tag); sqe[2]->user_data = build_user_data(tag, ublk_cmd_op_nr(sqe[2]->cmd_op), 0, 1); @@ -69,6 +77,16 @@ static int null_queue_zc_io(struct ublk_queue *q, int tag) return 2; } +static int null_queue_auto_zc_io(struct ublk_queue *q, int tag) +{ + const struct ublksrv_io_desc *iod = ublk_get_iod(q, tag); + struct io_uring_sqe *sqe[1]; + + ublk_queue_alloc_sqes(q, sqe, 1); + __setup_nop_io(tag, iod, sqe[0]); + return 1; +} + static void ublk_null_io_done(struct ublk_queue *q, int tag, const struct io_uring_cqe *cqe) { @@ -94,22 +112,37 @@ static void ublk_null_io_done(struct ublk_queue *q, int tag, static int ublk_null_queue_io(struct ublk_queue *q, int tag) { const struct ublksrv_io_desc *iod = ublk_get_iod(q, tag); - int zc = ublk_queue_use_zc(q); + unsigned auto_zc = ublk_queue_use_auto_zc(q); + unsigned zc = ublk_queue_use_zc(q); int queued; - if (!zc) { + if (auto_zc && !ublk_io_auto_zc_fallback(iod)) + queued = null_queue_auto_zc_io(q, tag); + else if (zc) + queued = null_queue_zc_io(q, tag); + else { ublk_complete_io(q, tag, iod->nr_sectors << 9); return 0; } - - queued = null_queue_zc_io(q, tag); ublk_queued_tgt_io(q, tag, queued); return 0; } +/* + * return invalid buffer index for triggering auto buffer register failure, + * then UBLK_IO_RES_NEED_REG_BUF handling is covered + */ +static unsigned short ublk_null_buf_index(const struct ublk_queue *q, int tag) +{ + if (q->state & UBLKSRV_AUTO_BUF_REG_FALLBACK) + return (unsigned short)-1; + return tag; +} + const struct ublk_tgt_ops null_tgt_ops = { .name = "null", .init_tgt = ublk_null_tgt_init, .queue_io = ublk_null_queue_io, .tgt_io_done = ublk_null_io_done, + .buf_index = ublk_null_buf_index, }; diff --git a/tools/testing/selftests/ublk/stripe.c b/tools/testing/selftests/ublk/stripe.c index 179731c3dd6f..404a143bf3d6 100644 --- a/tools/testing/selftests/ublk/stripe.c +++ b/tools/testing/selftests/ublk/stripe.c @@ -70,7 +70,7 @@ static void free_stripe_array(struct stripe_array *s) } static void calculate_stripe_array(const struct stripe_conf *conf, - const struct ublksrv_io_desc *iod, struct stripe_array *s) + const struct ublksrv_io_desc *iod, struct stripe_array *s, void *base) { const unsigned shift = conf->shift - 9; const unsigned chunk_sects = 1 << shift; @@ -102,7 +102,7 @@ static void calculate_stripe_array(const struct stripe_conf *conf, } assert(this->nr_vec < this->cap); - this->vec[this->nr_vec].iov_base = (void *)(iod->addr + done); + this->vec[this->nr_vec].iov_base = (void *)(base + done); this->vec[this->nr_vec++].iov_len = nr_sects << 9; start += nr_sects; @@ -126,15 +126,17 @@ static inline enum io_uring_op stripe_to_uring_op( static int stripe_queue_tgt_rw_io(struct ublk_queue *q, const struct ublksrv_io_desc *iod, int tag) { const struct stripe_conf *conf = get_chunk_shift(q); - int zc = !!(ublk_queue_use_zc(q) != 0); - enum io_uring_op op = stripe_to_uring_op(iod, zc); + unsigned auto_zc = (ublk_queue_use_auto_zc(q) != 0); + unsigned zc = (ublk_queue_use_zc(q) != 0); + enum io_uring_op op = stripe_to_uring_op(iod, zc | auto_zc); struct io_uring_sqe *sqe[NR_STRIPE]; struct stripe_array *s = alloc_stripe_array(conf, iod); struct ublk_io *io = ublk_get_io(q, tag); int i, extra = zc ? 2 : 0; + void *base = (zc | auto_zc) ? NULL : (void *)iod->addr; io->private_data = s; - calculate_stripe_array(conf, iod, s); + calculate_stripe_array(conf, iod, s, base); ublk_queue_alloc_sqes(q, sqe, s->nr + extra); @@ -153,12 +155,11 @@ static int stripe_queue_tgt_rw_io(struct ublk_queue *q, const struct ublksrv_io_ (void *)t->vec, t->nr_vec, t->start << 9); - if (zc) { + io_uring_sqe_set_flags(sqe[i], IOSQE_FIXED_FILE); + if (auto_zc || zc) { sqe[i]->buf_index = tag; - io_uring_sqe_set_flags(sqe[i], - IOSQE_FIXED_FILE | IOSQE_IO_HARDLINK); - } else { - io_uring_sqe_set_flags(sqe[i], IOSQE_FIXED_FILE); + if (zc) + sqe[i]->flags |= IOSQE_IO_HARDLINK; } /* bit63 marks us as tgt io */ sqe[i]->user_data = build_user_data(tag, ublksrv_get_op(iod), i - zc, 1); @@ -281,12 +282,17 @@ static int ublk_stripe_tgt_init(const struct dev_ctx *ctx, struct ublk_dev *dev) .max_sectors = dev->dev_info.max_io_buf_bytes >> 9, }, }; - unsigned chunk_size = ctx->chunk_size; + unsigned chunk_size = ctx->stripe.chunk_size; struct stripe_conf *conf; unsigned chunk_shift; loff_t bytes = 0; int ret, i, mul = 1; + if (ctx->auto_zc_fallback) { + ublk_err("%s: not support auto_zc_fallback\n", __func__); + return -EINVAL; + } + if ((chunk_size & (chunk_size - 1)) || !chunk_size) { ublk_err("invalid chunk size %u\n", chunk_size); return -EINVAL; @@ -344,10 +350,36 @@ static void ublk_stripe_tgt_deinit(struct ublk_dev *dev) backing_file_tgt_deinit(dev); } +static void ublk_stripe_cmd_line(struct dev_ctx *ctx, int argc, char *argv[]) +{ + static const struct option longopts[] = { + { "chunk_size", 1, NULL, 0 }, + { 0, 0, 0, 0 } + }; + int option_idx, opt; + + ctx->stripe.chunk_size = 65536; + while ((opt = getopt_long(argc, argv, "", + longopts, &option_idx)) != -1) { + switch (opt) { + case 0: + if (!strcmp(longopts[option_idx].name, "chunk_size")) + ctx->stripe.chunk_size = strtol(optarg, NULL, 10); + } + } +} + +static void ublk_stripe_usage(const struct ublk_tgt_ops *ops) +{ + printf("\tstripe: [--chunk_size chunk_size (default 65536)]\n"); +} + const struct ublk_tgt_ops stripe_tgt_ops = { .name = "stripe", .init_tgt = ublk_stripe_tgt_init, .deinit_tgt = ublk_stripe_tgt_deinit, .queue_io = ublk_stripe_queue_io, .tgt_io_done = ublk_stripe_io_done, + .parse_cmd_line = ublk_stripe_cmd_line, + .usage = ublk_stripe_usage, }; diff --git a/tools/testing/selftests/ublk/test_common.sh b/tools/testing/selftests/ublk/test_common.sh index a88b35943227..0145569ee7e9 100755 --- a/tools/testing/selftests/ublk/test_common.sh +++ b/tools/testing/selftests/ublk/test_common.sh @@ -17,12 +17,17 @@ _get_disk_dev_t() { local minor dev=/dev/ublkb"${dev_id}" - major=$(stat -c '%Hr' "$dev") - minor=$(stat -c '%Lr' "$dev") + major="0x"$(stat -c '%t' "$dev") + minor="0x"$(stat -c '%T' "$dev") echo $(( (major & 0xfff) << 20 | (minor & 0xfffff) )) } +_get_disk_size() +{ + lsblk -b -o SIZE -n "$1" +} + _run_fio_verify_io() { fio --name=verify --rw=randwrite --direct=1 --ioengine=libaio \ --bs=8k --iodepth=32 --verify=crc32c --do_verify=1 \ @@ -30,18 +35,26 @@ _run_fio_verify_io() { } _create_backfile() { - local my_size=$1 - local my_file + local index=$1 + local new_size=$2 + local old_file + local new_file - my_file=$(mktemp ublk_file_"${my_size}"_XXXXX) - truncate -s "${my_size}" "${my_file}" - echo "$my_file" + old_file="${UBLK_BACKFILES[$index]}" + [ -f "$old_file" ] && rm -f "$old_file" + + new_file=$(mktemp ublk_file_"${new_size}"_XXXXX) + truncate -s "${new_size}" "${new_file}" + UBLK_BACKFILES["$index"]="$new_file" } -_remove_backfile() { - local file=$1 +_remove_files() { + local file - [ -f "$file" ] && rm -f "$file" + for file in "${UBLK_BACKFILES[@]}"; do + [ -f "$file" ] && rm -f "$file" + done + [ -f "$UBLK_TMP" ] && rm -f "$UBLK_TMP" } _create_tmp_dir() { @@ -106,6 +119,7 @@ _prep_test() { local type=$1 shift 1 modprobe ublk_drv > /dev/null 2>&1 + UBLK_TMP=$(mktemp ublk_test_XXXXX) [ "$UBLK_TEST_QUIET" -eq 0 ] && echo "ublk $type: $*" } @@ -129,7 +143,10 @@ _show_result() echo "$1 : [FAIL]" fi fi - [ "$2" -ne 0 ] && exit "$2" + if [ "$2" -ne 0 ]; then + _remove_files + exit "$2" + fi return 0 } @@ -138,16 +155,16 @@ _check_add_dev() { local tid=$1 local code=$2 - shift 2 + if [ "${code}" -ne 0 ]; then - _remove_test_files "$@" _show_result "${tid}" "${code}" fi } _cleanup_test() { "${UBLK_PROG}" del -a - rm -f "$UBLK_TMP" + + _remove_files } _have_feature() @@ -158,9 +175,11 @@ _have_feature() return 1 } -_add_ublk_dev() { - local kublk_temp; +_create_ublk_dev() { local dev_id; + local cmd=$1 + + shift 1 if [ ! -c /dev/ublk-control ]; then return ${UBLK_SKIP_CODE} @@ -171,17 +190,54 @@ _add_ublk_dev() { fi fi - kublk_temp=$(mktemp /tmp/kublk-XXXXXX) - if ! "${UBLK_PROG}" add "$@" > "${kublk_temp}" 2>&1; then + if ! dev_id=$("${UBLK_PROG}" "$cmd" "$@" | grep "dev id" | awk -F '[ :]' '{print $3}'); then echo "fail to add ublk dev $*" - rm -f "${kublk_temp}" return 255 fi - - dev_id=$(grep "dev id" "${kublk_temp}" | awk -F '[ :]' '{print $3}') udevadm settle - rm -f "${kublk_temp}" - echo "${dev_id}" + + if [[ "$dev_id" =~ ^[0-9]+$ ]]; then + echo "${dev_id}" + else + return 255 + fi +} + +_add_ublk_dev() { + _create_ublk_dev "add" "$@" +} + +_recover_ublk_dev() { + local dev_id + local state + + dev_id=$(_create_ublk_dev "recover" "$@") + for ((j=0;j<20;j++)); do + state=$(_get_ublk_dev_state "${dev_id}") + [ "$state" == "LIVE" ] && break + sleep 1 + done + echo "$state" +} + +# quiesce device and return ublk device state +__ublk_quiesce_dev() +{ + local dev_id=$1 + local exp_state=$2 + local state + + if ! ${UBLK_PROG} quiesce -n "${dev_id}"; then + state=$(_get_ublk_dev_state "${dev_id}") + return "$state" + fi + + for ((j=0;j<50;j++)); do + state=$(_get_ublk_dev_state "${dev_id}") + [ "$state" == "$exp_state" ] && break + sleep 1 + done + echo "$state" } # kill the ublk daemon and return ublk device state @@ -220,7 +276,7 @@ __run_io_and_remove() local kill_server=$3 fio --name=job1 --filename=/dev/ublkb"${dev_id}" --ioengine=libaio \ - --rw=readwrite --iodepth=64 --size="${size}" --numjobs=4 \ + --rw=randrw --norandommap --iodepth=256 --size="${size}" --numjobs="$(nproc)" \ --runtime=20 --time_based > /dev/null 2>&1 & sleep 2 if [ "${kill_server}" = "yes" ]; then @@ -238,15 +294,86 @@ __run_io_and_remove() wait } +run_io_and_remove() +{ + local size=$1 + local dev_id + shift 1 + + dev_id=$(_add_ublk_dev "$@") + _check_add_dev "$TID" $? + + [ "$UBLK_TEST_QUIET" -eq 0 ] && echo "run ublk IO vs. remove device(ublk add $*)" + if ! __run_io_and_remove "$dev_id" "${size}" "no"; then + echo "/dev/ublkc$dev_id isn't removed" + exit 255 + fi +} + +run_io_and_kill_daemon() +{ + local size=$1 + local dev_id + shift 1 + + dev_id=$(_add_ublk_dev "$@") + _check_add_dev "$TID" $? + + [ "$UBLK_TEST_QUIET" -eq 0 ] && echo "run ublk IO vs kill ublk server(ublk add $*)" + if ! __run_io_and_remove "$dev_id" "${size}" "yes"; then + echo "/dev/ublkc$dev_id isn't removed res ${res}" + exit 255 + fi +} + +run_io_and_recover() +{ + local action=$1 + local state + local dev_id + + shift 1 + dev_id=$(_add_ublk_dev "$@") + _check_add_dev "$TID" $? + + fio --name=job1 --filename=/dev/ublkb"${dev_id}" --ioengine=libaio \ + --rw=randread --iodepth=256 --size="${size}" --numjobs=4 \ + --runtime=20 --time_based > /dev/null 2>&1 & + sleep 4 + + if [ "$action" == "kill_daemon" ]; then + state=$(__ublk_kill_daemon "${dev_id}" "QUIESCED") + elif [ "$action" == "quiesce_dev" ]; then + state=$(__ublk_quiesce_dev "${dev_id}" "QUIESCED") + fi + if [ "$state" != "QUIESCED" ]; then + echo "device isn't quiesced($state) after $action" + return 255 + fi + + state=$(_recover_ublk_dev -n "$dev_id" "$@") + if [ "$state" != "LIVE" ]; then + echo "faile to recover to LIVE($state)" + return 255 + fi + + if ! __remove_ublk_dev_return "${dev_id}"; then + echo "delete dev ${dev_id} failed" + return 255 + fi + wait +} + + _ublk_test_top_dir() { cd "$(dirname "$0")" && pwd } -UBLK_TMP=$(mktemp ublk_test_XXXXX) UBLK_PROG=$(_ublk_test_top_dir)/kublk UBLK_TEST_QUIET=1 UBLK_TEST_SHOW_RESULT=1 +UBLK_BACKFILES=() export UBLK_PROG export UBLK_TEST_QUIET export UBLK_TEST_SHOW_RESULT diff --git a/tools/testing/selftests/ublk/test_generic_04.sh b/tools/testing/selftests/ublk/test_generic_04.sh new file mode 100755 index 000000000000..8b533217d4a1 --- /dev/null +++ b/tools/testing/selftests/ublk/test_generic_04.sh @@ -0,0 +1,40 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +. "$(cd "$(dirname "$0")" && pwd)"/test_common.sh + +TID="generic_04" +ERR_CODE=0 + +ublk_run_recover_test() +{ + run_io_and_recover "kill_daemon" "$@" + ERR_CODE=$? + if [ ${ERR_CODE} -ne 0 ]; then + echo "$TID failure: $*" + _show_result $TID $ERR_CODE + fi +} + +if ! _have_program fio; then + exit "$UBLK_SKIP_CODE" +fi + +_prep_test "recover" "basic recover function verification" + +_create_backfile 0 256M +_create_backfile 1 128M +_create_backfile 2 128M + +ublk_run_recover_test -t null -q 2 -r 1 & +ublk_run_recover_test -t loop -q 2 -r 1 "${UBLK_BACKFILES[0]}" & +ublk_run_recover_test -t stripe -q 2 -r 1 "${UBLK_BACKFILES[1]}" "${UBLK_BACKFILES[2]}" & +wait + +ublk_run_recover_test -t null -q 2 -r 1 -i 1 & +ublk_run_recover_test -t loop -q 2 -r 1 -i 1 "${UBLK_BACKFILES[0]}" & +ublk_run_recover_test -t stripe -q 2 -r 1 -i 1 "${UBLK_BACKFILES[1]}" "${UBLK_BACKFILES[2]}" & +wait + +_cleanup_test "recover" +_show_result $TID $ERR_CODE diff --git a/tools/testing/selftests/ublk/test_generic_05.sh b/tools/testing/selftests/ublk/test_generic_05.sh new file mode 100755 index 000000000000..398e9e2b58e1 --- /dev/null +++ b/tools/testing/selftests/ublk/test_generic_05.sh @@ -0,0 +1,44 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +. "$(cd "$(dirname "$0")" && pwd)"/test_common.sh + +TID="generic_05" +ERR_CODE=0 + +ublk_run_recover_test() +{ + run_io_and_recover "kill_daemon" "$@" + ERR_CODE=$? + if [ ${ERR_CODE} -ne 0 ]; then + echo "$TID failure: $*" + _show_result $TID $ERR_CODE + fi +} + +if ! _have_program fio; then + exit "$UBLK_SKIP_CODE" +fi + +if ! _have_feature "ZERO_COPY"; then + exit "$UBLK_SKIP_CODE" +fi + +_prep_test "recover" "basic recover function verification (zero copy)" + +_create_backfile 0 256M +_create_backfile 1 128M +_create_backfile 2 128M + +ublk_run_recover_test -t null -q 2 -r 1 -z & +ublk_run_recover_test -t loop -q 2 -r 1 -z "${UBLK_BACKFILES[0]}" & +ublk_run_recover_test -t stripe -q 2 -r 1 -z "${UBLK_BACKFILES[1]}" "${UBLK_BACKFILES[2]}" & +wait + +ublk_run_recover_test -t null -q 2 -r 1 -z -i 1 & +ublk_run_recover_test -t loop -q 2 -r 1 -z -i 1 "${UBLK_BACKFILES[0]}" & +ublk_run_recover_test -t stripe -q 2 -r 1 -z -i 1 "${UBLK_BACKFILES[1]}" "${UBLK_BACKFILES[2]}" & +wait + +_cleanup_test "recover" +_show_result $TID $ERR_CODE diff --git a/tools/testing/selftests/ublk/test_generic_06.sh b/tools/testing/selftests/ublk/test_generic_06.sh new file mode 100755 index 000000000000..fd42062b7b76 --- /dev/null +++ b/tools/testing/selftests/ublk/test_generic_06.sh @@ -0,0 +1,41 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +. "$(cd "$(dirname "$0")" && pwd)"/test_common.sh + +TID="generic_06" +ERR_CODE=0 + +_prep_test "fault_inject" "fast cleanup when all I/Os of one hctx are in server" + +# configure ublk server to sleep 2s before completing each I/O +dev_id=$(_add_ublk_dev -t fault_inject -q 2 -d 1 --delay_us 2000000) +_check_add_dev $TID $? + +STARTTIME=${SECONDS} + +dd if=/dev/urandom of=/dev/ublkb${dev_id} oflag=direct bs=4k count=1 status=none > /dev/null 2>&1 & +dd_pid=$! + +__ublk_kill_daemon ${dev_id} "DEAD" >/dev/null + +wait $dd_pid +dd_exitcode=$? + +ENDTIME=${SECONDS} +ELAPSED=$(($ENDTIME - $STARTTIME)) + +# assert that dd sees an error and exits quickly after ublk server is +# killed. previously this relied on seeing an I/O timeout and so would +# take ~30s +if [ $dd_exitcode -eq 0 ]; then + echo "dd unexpectedly exited successfully!" + ERR_CODE=255 +fi +if [ $ELAPSED -ge 5 ]; then + echo "dd took $ELAPSED seconds to exit (>= 5s tolerance)!" + ERR_CODE=255 +fi + +_cleanup_test "fault_inject" +_show_result $TID $ERR_CODE diff --git a/tools/testing/selftests/ublk/test_generic_07.sh b/tools/testing/selftests/ublk/test_generic_07.sh new file mode 100755 index 000000000000..cba86451fa5e --- /dev/null +++ b/tools/testing/selftests/ublk/test_generic_07.sh @@ -0,0 +1,28 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +. "$(cd "$(dirname "$0")" && pwd)"/test_common.sh + +TID="generic_07" +ERR_CODE=0 + +if ! _have_program fio; then + exit "$UBLK_SKIP_CODE" +fi + +_prep_test "generic" "test UBLK_F_NEED_GET_DATA" + +_create_backfile 0 256M +dev_id=$(_add_ublk_dev -t loop -q 2 -g "${UBLK_BACKFILES[0]}") +_check_add_dev $TID $? + +# run fio over the ublk disk +_run_fio_verify_io --filename=/dev/ublkb"${dev_id}" --size=256M +ERR_CODE=$? +if [ "$ERR_CODE" -eq 0 ]; then + _mkfs_mount_test /dev/ublkb"${dev_id}" + ERR_CODE=$? +fi + +_cleanup_test "generic" +_show_result $TID $ERR_CODE diff --git a/tools/testing/selftests/ublk/test_generic_08.sh b/tools/testing/selftests/ublk/test_generic_08.sh new file mode 100755 index 000000000000..b222f3a77e12 --- /dev/null +++ b/tools/testing/selftests/ublk/test_generic_08.sh @@ -0,0 +1,32 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +. "$(cd "$(dirname "$0")" && pwd)"/test_common.sh + +TID="generic_08" +ERR_CODE=0 + +if ! _have_feature "AUTO_BUF_REG"; then + exit "$UBLK_SKIP_CODE" +fi + +_prep_test "generic" "test UBLK_F_AUTO_BUF_REG" + +_create_backfile 0 256M +_create_backfile 1 256M + +dev_id=$(_add_ublk_dev -t loop -q 2 --auto_zc "${UBLK_BACKFILES[0]}") +_check_add_dev $TID $? + +if ! _mkfs_mount_test /dev/ublkb"${dev_id}"; then + _cleanup_test "generic" + _show_result $TID 255 +fi + +dev_id=$(_add_ublk_dev -t stripe --auto_zc "${UBLK_BACKFILES[0]}" "${UBLK_BACKFILES[1]}") +_check_add_dev $TID $? +_mkfs_mount_test /dev/ublkb"${dev_id}" +ERR_CODE=$? + +_cleanup_test "generic" +_show_result $TID $ERR_CODE diff --git a/tools/testing/selftests/ublk/test_generic_09.sh b/tools/testing/selftests/ublk/test_generic_09.sh new file mode 100755 index 000000000000..bb6f77ca5522 --- /dev/null +++ b/tools/testing/selftests/ublk/test_generic_09.sh @@ -0,0 +1,28 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +. "$(cd "$(dirname "$0")" && pwd)"/test_common.sh + +TID="generic_09" +ERR_CODE=0 + +if ! _have_feature "AUTO_BUF_REG"; then + exit "$UBLK_SKIP_CODE" +fi + +if ! _have_program fio; then + exit "$UBLK_SKIP_CODE" +fi + +_prep_test "null" "basic IO test" + +dev_id=$(_add_ublk_dev -t null -z --auto_zc --auto_zc_fallback) +_check_add_dev $TID $? + +# run fio over the two disks +fio --name=job1 --filename=/dev/ublkb"${dev_id}" --ioengine=libaio --rw=readwrite --iodepth=32 --size=256M > /dev/null 2>&1 +ERR_CODE=$? + +_cleanup_test "null" + +_show_result $TID $ERR_CODE diff --git a/tools/testing/selftests/ublk/test_generic_10.sh b/tools/testing/selftests/ublk/test_generic_10.sh new file mode 100755 index 000000000000..abc11c3d416b --- /dev/null +++ b/tools/testing/selftests/ublk/test_generic_10.sh @@ -0,0 +1,30 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +. "$(cd "$(dirname "$0")" && pwd)"/test_common.sh + +TID="generic_10" +ERR_CODE=0 + +if ! _have_feature "UPDATE_SIZE"; then + exit "$UBLK_SKIP_CODE" +fi + +_prep_test "null" "check update size" + +dev_id=$(_add_ublk_dev -t null) +_check_add_dev $TID $? + +size=$(_get_disk_size /dev/ublkb"${dev_id}") +size=$(( size / 2 )) +if ! "$UBLK_PROG" update_size -n "$dev_id" -s "$size"; then + ERR_CODE=255 +fi + +new_size=$(_get_disk_size /dev/ublkb"${dev_id}") +if [ "$new_size" != "$size" ]; then + ERR_CODE=255 +fi + +_cleanup_test "null" +_show_result $TID $ERR_CODE diff --git a/tools/testing/selftests/ublk/test_generic_11.sh b/tools/testing/selftests/ublk/test_generic_11.sh new file mode 100755 index 000000000000..a00357a5ec6b --- /dev/null +++ b/tools/testing/selftests/ublk/test_generic_11.sh @@ -0,0 +1,44 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +. "$(cd "$(dirname "$0")" && pwd)"/test_common.sh + +TID="generic_11" +ERR_CODE=0 + +ublk_run_quiesce_recover() +{ + run_io_and_recover "quiesce_dev" "$@" + ERR_CODE=$? + if [ ${ERR_CODE} -ne 0 ]; then + echo "$TID failure: $*" + _show_result $TID $ERR_CODE + fi +} + +if ! _have_feature "QUIESCE"; then + exit "$UBLK_SKIP_CODE" +fi + +if ! _have_program fio; then + exit "$UBLK_SKIP_CODE" +fi + +_prep_test "quiesce" "basic quiesce & recover function verification" + +_create_backfile 0 256M +_create_backfile 1 128M +_create_backfile 2 128M + +ublk_run_quiesce_recover -t null -q 2 -r 1 & +ublk_run_quiesce_recover -t loop -q 2 -r 1 "${UBLK_BACKFILES[0]}" & +ublk_run_quiesce_recover -t stripe -q 2 -r 1 "${UBLK_BACKFILES[1]}" "${UBLK_BACKFILES[2]}" & +wait + +ublk_run_quiesce_recover -t null -q 2 -r 1 -i 1 & +ublk_run_quiesce_recover -t loop -q 2 -r 1 -i 1 "${UBLK_BACKFILES[0]}" & +ublk_run_quiesce_recover -t stripe -q 2 -r 1 -i 1 "${UBLK_BACKFILES[1]}" "${UBLK_BACKFILES[2]}" & +wait + +_cleanup_test "quiesce" +_show_result $TID $ERR_CODE diff --git a/tools/testing/selftests/ublk/test_loop_01.sh b/tools/testing/selftests/ublk/test_loop_01.sh index 1ef8b6044777..833fa0dbc700 100755 --- a/tools/testing/selftests/ublk/test_loop_01.sh +++ b/tools/testing/selftests/ublk/test_loop_01.sh @@ -12,10 +12,10 @@ fi _prep_test "loop" "write and verify test" -backfile_0=$(_create_backfile 256M) +_create_backfile 0 256M -dev_id=$(_add_ublk_dev -t loop "$backfile_0") -_check_add_dev $TID $? "${backfile_0}" +dev_id=$(_add_ublk_dev -t loop "${UBLK_BACKFILES[0]}") +_check_add_dev $TID $? # run fio over the ublk disk _run_fio_verify_io --filename=/dev/ublkb"${dev_id}" --size=256M @@ -23,6 +23,4 @@ ERR_CODE=$? _cleanup_test "loop" -_remove_backfile "$backfile_0" - _show_result $TID $ERR_CODE diff --git a/tools/testing/selftests/ublk/test_loop_02.sh b/tools/testing/selftests/ublk/test_loop_02.sh index 03863d825e07..874568b3646b 100755 --- a/tools/testing/selftests/ublk/test_loop_02.sh +++ b/tools/testing/selftests/ublk/test_loop_02.sh @@ -8,15 +8,13 @@ ERR_CODE=0 _prep_test "loop" "mkfs & mount & umount" -backfile_0=$(_create_backfile 256M) -dev_id=$(_add_ublk_dev -t loop "$backfile_0") -_check_add_dev $TID $? "$backfile_0" +_create_backfile 0 256M +dev_id=$(_add_ublk_dev -t loop "${UBLK_BACKFILES[0]}") +_check_add_dev $TID $? _mkfs_mount_test /dev/ublkb"${dev_id}" ERR_CODE=$? _cleanup_test "loop" -_remove_backfile "$backfile_0" - _show_result $TID $ERR_CODE diff --git a/tools/testing/selftests/ublk/test_loop_03.sh b/tools/testing/selftests/ublk/test_loop_03.sh index e9ca744de8b1..c30f797c6429 100755 --- a/tools/testing/selftests/ublk/test_loop_03.sh +++ b/tools/testing/selftests/ublk/test_loop_03.sh @@ -12,9 +12,9 @@ fi _prep_test "loop" "write and verify over zero copy" -backfile_0=$(_create_backfile 256M) -dev_id=$(_add_ublk_dev -t loop -z "$backfile_0") -_check_add_dev $TID $? "$backfile_0" +_create_backfile 0 256M +dev_id=$(_add_ublk_dev -t loop -z "${UBLK_BACKFILES[0]}") +_check_add_dev $TID $? # run fio over the ublk disk _run_fio_verify_io --filename=/dev/ublkb"${dev_id}" --size=256M @@ -22,6 +22,4 @@ ERR_CODE=$? _cleanup_test "loop" -_remove_backfile "$backfile_0" - _show_result $TID $ERR_CODE diff --git a/tools/testing/selftests/ublk/test_loop_04.sh b/tools/testing/selftests/ublk/test_loop_04.sh index 1435422c38ec..b01d75b3214d 100755 --- a/tools/testing/selftests/ublk/test_loop_04.sh +++ b/tools/testing/selftests/ublk/test_loop_04.sh @@ -8,15 +8,14 @@ ERR_CODE=0 _prep_test "loop" "mkfs & mount & umount with zero copy" -backfile_0=$(_create_backfile 256M) -dev_id=$(_add_ublk_dev -t loop -z "$backfile_0") -_check_add_dev $TID $? "$backfile_0" +_create_backfile 0 256M + +dev_id=$(_add_ublk_dev -t loop -z "${UBLK_BACKFILES[0]}") +_check_add_dev $TID $? _mkfs_mount_test /dev/ublkb"${dev_id}" ERR_CODE=$? _cleanup_test "loop" -_remove_backfile "$backfile_0" - _show_result $TID $ERR_CODE diff --git a/tools/testing/selftests/ublk/test_loop_05.sh b/tools/testing/selftests/ublk/test_loop_05.sh index 2e6e2e6978fc..de2141533074 100755 --- a/tools/testing/selftests/ublk/test_loop_05.sh +++ b/tools/testing/selftests/ublk/test_loop_05.sh @@ -12,10 +12,10 @@ fi _prep_test "loop" "write and verify test" -backfile_0=$(_create_backfile 256M) +_create_backfile 0 256M -dev_id=$(_add_ublk_dev -q 2 -t loop "$backfile_0") -_check_add_dev $TID $? "${backfile_0}" +dev_id=$(_add_ublk_dev -q 2 -t loop "${UBLK_BACKFILES[0]}") +_check_add_dev $TID $? # run fio over the ublk disk _run_fio_verify_io --filename=/dev/ublkb"${dev_id}" --size=256M @@ -23,6 +23,4 @@ ERR_CODE=$? _cleanup_test "loop" -_remove_backfile "$backfile_0" - _show_result $TID $ERR_CODE diff --git a/tools/testing/selftests/ublk/test_stress_01.sh b/tools/testing/selftests/ublk/test_stress_01.sh index a8be24532b24..7d3150f057d4 100755 --- a/tools/testing/selftests/ublk/test_stress_01.sh +++ b/tools/testing/selftests/ublk/test_stress_01.sh @@ -4,44 +4,31 @@ . "$(cd "$(dirname "$0")" && pwd)"/test_common.sh TID="stress_01" ERR_CODE=0 -DEV_ID=-1 ublk_io_and_remove() { - local size=$1 - shift 1 - local backfile="" - if echo "$@" | grep -q "loop"; then - backfile=${*: -1} - fi - DEV_ID=$(_add_ublk_dev "$@") - _check_add_dev $TID $? "${backfile}" - - [ "$UBLK_TEST_QUIET" -eq 0 ] && echo "run ublk IO vs. remove device(ublk add $*)" - if ! __run_io_and_remove "${DEV_ID}" "${size}" "no"; then - echo "/dev/ublkc${DEV_ID} isn't removed" - _remove_backfile "${backfile}" - exit 255 + run_io_and_remove "$@" + ERR_CODE=$? + if [ ${ERR_CODE} -ne 0 ]; then + echo "$TID failure: $*" + _show_result $TID $ERR_CODE fi } +if ! _have_program fio; then + exit "$UBLK_SKIP_CODE" +fi + _prep_test "stress" "run IO and remove device" -ublk_io_and_remove 8G -t null -q 4 -ERR_CODE=$? -if [ ${ERR_CODE} -ne 0 ]; then - _show_result $TID $ERR_CODE -fi +_create_backfile 0 256M +_create_backfile 1 128M +_create_backfile 2 128M -BACK_FILE=$(_create_backfile 256M) -ublk_io_and_remove 256M -t loop -q 4 "${BACK_FILE}" -ERR_CODE=$? -if [ ${ERR_CODE} -ne 0 ]; then - _show_result $TID $ERR_CODE -fi +ublk_io_and_remove 8G -t null -q 4 & +ublk_io_and_remove 256M -t loop -q 4 "${UBLK_BACKFILES[0]}" & +ublk_io_and_remove 256M -t stripe -q 4 "${UBLK_BACKFILES[1]}" "${UBLK_BACKFILES[2]}" & +wait -ublk_io_and_remove 256M -t loop -q 4 -z "${BACK_FILE}" -ERR_CODE=$? _cleanup_test "stress" -_remove_backfile "${BACK_FILE}" _show_result $TID $ERR_CODE diff --git a/tools/testing/selftests/ublk/test_stress_02.sh b/tools/testing/selftests/ublk/test_stress_02.sh index 2159e4cc8140..4bdd921081e5 100755 --- a/tools/testing/selftests/ublk/test_stress_02.sh +++ b/tools/testing/selftests/ublk/test_stress_02.sh @@ -4,44 +4,33 @@ . "$(cd "$(dirname "$0")" && pwd)"/test_common.sh TID="stress_02" ERR_CODE=0 -DEV_ID=-1 + +if ! _have_program fio; then + exit "$UBLK_SKIP_CODE" +fi ublk_io_and_kill_daemon() { - local size=$1 - shift 1 - local backfile="" - if echo "$@" | grep -q "loop"; then - backfile=${*: -1} - fi - DEV_ID=$(_add_ublk_dev "$@") - _check_add_dev $TID $? "${backfile}" - - [ "$UBLK_TEST_QUIET" -eq 0 ] && echo "run ublk IO vs kill ublk server(ublk add $*)" - if ! __run_io_and_remove "${DEV_ID}" "${size}" "yes"; then - echo "/dev/ublkc${DEV_ID} isn't removed res ${res}" - _remove_backfile "${backfile}" - exit 255 + run_io_and_kill_daemon "$@" + ERR_CODE=$? + if [ ${ERR_CODE} -ne 0 ]; then + echo "$TID failure: $*" + _show_result $TID $ERR_CODE fi } _prep_test "stress" "run IO and kill ublk server" -ublk_io_and_kill_daemon 8G -t null -q 4 -ERR_CODE=$? -if [ ${ERR_CODE} -ne 0 ]; then - _show_result $TID $ERR_CODE -fi +_create_backfile 0 256M +_create_backfile 1 128M +_create_backfile 2 128M -BACK_FILE=$(_create_backfile 256M) -ublk_io_and_kill_daemon 256M -t loop -q 4 "${BACK_FILE}" -ERR_CODE=$? -if [ ${ERR_CODE} -ne 0 ]; then - _show_result $TID $ERR_CODE -fi +for nr_queue in 1 4; do + ublk_io_and_kill_daemon 8G -t null -q "$nr_queue" & + ublk_io_and_kill_daemon 256M -t loop -q "$nr_queue" "${UBLK_BACKFILES[0]}" & + ublk_io_and_kill_daemon 256M -t stripe -q "$nr_queue" "${UBLK_BACKFILES[1]}" "${UBLK_BACKFILES[2]}" & + wait +done -ublk_io_and_kill_daemon 256M -t loop -q 4 -z "${BACK_FILE}" -ERR_CODE=$? _cleanup_test "stress" -_remove_backfile "${BACK_FILE}" _show_result $TID $ERR_CODE diff --git a/tools/testing/selftests/ublk/test_stress_03.sh b/tools/testing/selftests/ublk/test_stress_03.sh new file mode 100755 index 000000000000..7d728ce50774 --- /dev/null +++ b/tools/testing/selftests/ublk/test_stress_03.sh @@ -0,0 +1,45 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +. "$(cd "$(dirname "$0")" && pwd)"/test_common.sh +TID="stress_03" +ERR_CODE=0 + +ublk_io_and_remove() +{ + run_io_and_remove "$@" + ERR_CODE=$? + if [ ${ERR_CODE} -ne 0 ]; then + echo "$TID failure: $*" + _show_result $TID $ERR_CODE + fi +} + +if ! _have_program fio; then + exit "$UBLK_SKIP_CODE" +fi + +if ! _have_feature "ZERO_COPY"; then + exit "$UBLK_SKIP_CODE" +fi + +_prep_test "stress" "run IO and remove device(zero copy)" + +_create_backfile 0 256M +_create_backfile 1 128M +_create_backfile 2 128M + +ublk_io_and_remove 8G -t null -q 4 -z & +ublk_io_and_remove 256M -t loop -q 4 -z "${UBLK_BACKFILES[0]}" & +ublk_io_and_remove 256M -t stripe -q 4 -z "${UBLK_BACKFILES[1]}" "${UBLK_BACKFILES[2]}" & + +if _have_feature "AUTO_BUF_REG"; then + ublk_io_and_remove 8G -t null -q 4 --auto_zc & + ublk_io_and_remove 256M -t loop -q 4 --auto_zc "${UBLK_BACKFILES[0]}" & + ublk_io_and_remove 256M -t stripe -q 4 --auto_zc "${UBLK_BACKFILES[1]}" "${UBLK_BACKFILES[2]}" & + ublk_io_and_remove 8G -t null -q 4 -z --auto_zc --auto_zc_fallback & +fi +wait + +_cleanup_test "stress" +_show_result $TID $ERR_CODE diff --git a/tools/testing/selftests/ublk/test_stress_04.sh b/tools/testing/selftests/ublk/test_stress_04.sh new file mode 100755 index 000000000000..9bcfa64ea1f0 --- /dev/null +++ b/tools/testing/selftests/ublk/test_stress_04.sh @@ -0,0 +1,44 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +. "$(cd "$(dirname "$0")" && pwd)"/test_common.sh +TID="stress_04" +ERR_CODE=0 + +ublk_io_and_kill_daemon() +{ + run_io_and_kill_daemon "$@" + ERR_CODE=$? + if [ ${ERR_CODE} -ne 0 ]; then + echo "$TID failure: $*" + _show_result $TID $ERR_CODE + fi +} + +if ! _have_program fio; then + exit "$UBLK_SKIP_CODE" +fi +if ! _have_feature "ZERO_COPY"; then + exit "$UBLK_SKIP_CODE" +fi + +_prep_test "stress" "run IO and kill ublk server(zero copy)" + +_create_backfile 0 256M +_create_backfile 1 128M +_create_backfile 2 128M + +ublk_io_and_kill_daemon 8G -t null -q 4 -z & +ublk_io_and_kill_daemon 256M -t loop -q 4 -z "${UBLK_BACKFILES[0]}" & +ublk_io_and_kill_daemon 256M -t stripe -q 4 -z "${UBLK_BACKFILES[1]}" "${UBLK_BACKFILES[2]}" & + +if _have_feature "AUTO_BUF_REG"; then + ublk_io_and_kill_daemon 8G -t null -q 4 --auto_zc & + ublk_io_and_kill_daemon 256M -t loop -q 4 --auto_zc "${UBLK_BACKFILES[0]}" & + ublk_io_and_kill_daemon 256M -t stripe -q 4 --auto_zc "${UBLK_BACKFILES[1]}" "${UBLK_BACKFILES[2]}" & + ublk_io_and_kill_daemon 8G -t null -q 4 -z --auto_zc --auto_zc_fallback & +fi +wait + +_cleanup_test "stress" +_show_result $TID $ERR_CODE diff --git a/tools/testing/selftests/ublk/test_stress_05.sh b/tools/testing/selftests/ublk/test_stress_05.sh new file mode 100755 index 000000000000..bcfc904cefc6 --- /dev/null +++ b/tools/testing/selftests/ublk/test_stress_05.sh @@ -0,0 +1,73 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +. "$(cd "$(dirname "$0")" && pwd)"/test_common.sh +TID="stress_05" +ERR_CODE=0 + +run_io_and_remove() +{ + local size=$1 + local dev_id + local dev_pid + shift 1 + + dev_id=$(_add_ublk_dev "$@") + _check_add_dev $TID $? + + [ "$UBLK_TEST_QUIET" -eq 0 ] && echo "run ublk IO vs. remove device(ublk add $*)" + + fio --name=job1 --filename=/dev/ublkb"${dev_id}" --ioengine=libaio \ + --rw=readwrite --iodepth=128 --size="${size}" --numjobs=4 \ + --runtime=40 --time_based > /dev/null 2>&1 & + sleep 4 + + dev_pid=$(_get_ublk_daemon_pid "$dev_id") + kill -9 "$dev_pid" + + if ! __remove_ublk_dev_return "${dev_id}"; then + echo "delete dev ${dev_id} failed" + return 255 + fi +} + +ublk_io_and_remove() +{ + run_io_and_remove "$@" + ERR_CODE=$? + if [ ${ERR_CODE} -ne 0 ]; then + echo "$TID failure: $*" + _show_result $TID $ERR_CODE + fi +} + +_prep_test "stress" "run IO and remove device with recovery enabled" + +_create_backfile 0 256M +_create_backfile 1 256M + +for reissue in $(seq 0 1); do + ublk_io_and_remove 8G -t null -q 4 -g -r 1 -i "$reissue" & + ublk_io_and_remove 256M -t loop -q 4 -g -r 1 -i "$reissue" "${UBLK_BACKFILES[0]}" & + wait +done + +if _have_feature "ZERO_COPY"; then + for reissue in $(seq 0 1); do + ublk_io_and_remove 8G -t null -q 4 -g -z -r 1 -i "$reissue" & + ublk_io_and_remove 256M -t loop -q 4 -g -z -r 1 -i "$reissue" "${UBLK_BACKFILES[1]}" & + wait + done +fi + +if _have_feature "AUTO_BUF_REG"; then + for reissue in $(seq 0 1); do + ublk_io_and_remove 8G -t null -q 4 -g --auto_zc -r 1 -i "$reissue" & + ublk_io_and_remove 256M -t loop -q 4 -g --auto_zc -r 1 -i "$reissue" "${UBLK_BACKFILES[1]}" & + ublk_io_and_remove 8G -t null -q 4 -g -z --auto_zc --auto_zc_fallback -r 1 -i "$reissue" & + wait + done +fi + +_cleanup_test "stress" +_show_result $TID $ERR_CODE diff --git a/tools/testing/selftests/ublk/test_stripe_01.sh b/tools/testing/selftests/ublk/test_stripe_01.sh index 7e387ef656ea..4e4f0fdf3c9b 100755 --- a/tools/testing/selftests/ublk/test_stripe_01.sh +++ b/tools/testing/selftests/ublk/test_stripe_01.sh @@ -12,19 +12,15 @@ fi _prep_test "stripe" "write and verify test" -backfile_0=$(_create_backfile 256M) -backfile_1=$(_create_backfile 256M) +_create_backfile 0 256M +_create_backfile 1 256M -dev_id=$(_add_ublk_dev -t stripe "$backfile_0" "$backfile_1") -_check_add_dev $TID $? "${backfile_0}" +dev_id=$(_add_ublk_dev -t stripe "${UBLK_BACKFILES[0]}" "${UBLK_BACKFILES[1]}") +_check_add_dev $TID $? # run fio over the ublk disk _run_fio_verify_io --filename=/dev/ublkb"${dev_id}" --size=512M ERR_CODE=$? _cleanup_test "stripe" - -_remove_backfile "$backfile_0" -_remove_backfile "$backfile_1" - _show_result $TID $ERR_CODE diff --git a/tools/testing/selftests/ublk/test_stripe_02.sh b/tools/testing/selftests/ublk/test_stripe_02.sh index e8a45fa82dde..5820ab2efba4 100755 --- a/tools/testing/selftests/ublk/test_stripe_02.sh +++ b/tools/testing/selftests/ublk/test_stripe_02.sh @@ -8,17 +8,14 @@ ERR_CODE=0 _prep_test "stripe" "mkfs & mount & umount" -backfile_0=$(_create_backfile 256M) -backfile_1=$(_create_backfile 256M) -dev_id=$(_add_ublk_dev -t stripe "$backfile_0" "$backfile_1") -_check_add_dev $TID $? "$backfile_0" "$backfile_1" +_create_backfile 0 256M +_create_backfile 1 256M + +dev_id=$(_add_ublk_dev -t stripe "${UBLK_BACKFILES[0]}" "${UBLK_BACKFILES[1]}") +_check_add_dev $TID $? _mkfs_mount_test /dev/ublkb"${dev_id}" ERR_CODE=$? _cleanup_test "stripe" - -_remove_backfile "$backfile_0" -_remove_backfile "$backfile_1" - _show_result $TID $ERR_CODE diff --git a/tools/testing/selftests/ublk/test_stripe_03.sh b/tools/testing/selftests/ublk/test_stripe_03.sh index c1b34af36145..20b977e27814 100755 --- a/tools/testing/selftests/ublk/test_stripe_03.sh +++ b/tools/testing/selftests/ublk/test_stripe_03.sh @@ -12,19 +12,15 @@ fi _prep_test "stripe" "write and verify test" -backfile_0=$(_create_backfile 256M) -backfile_1=$(_create_backfile 256M) +_create_backfile 0 256M +_create_backfile 1 256M -dev_id=$(_add_ublk_dev -q 2 -t stripe "$backfile_0" "$backfile_1") -_check_add_dev $TID $? "${backfile_0}" +dev_id=$(_add_ublk_dev -q 2 -t stripe "${UBLK_BACKFILES[0]}" "${UBLK_BACKFILES[1]}") +_check_add_dev $TID $? # run fio over the ublk disk _run_fio_verify_io --filename=/dev/ublkb"${dev_id}" --size=512M ERR_CODE=$? _cleanup_test "stripe" - -_remove_backfile "$backfile_0" -_remove_backfile "$backfile_1" - _show_result $TID $ERR_CODE diff --git a/tools/testing/selftests/ublk/test_stripe_04.sh b/tools/testing/selftests/ublk/test_stripe_04.sh new file mode 100755 index 000000000000..1b51ed2f1d84 --- /dev/null +++ b/tools/testing/selftests/ublk/test_stripe_04.sh @@ -0,0 +1,21 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +. "$(cd "$(dirname "$0")" && pwd)"/test_common.sh + +TID="stripe_04" +ERR_CODE=0 + +_prep_test "stripe" "mkfs & mount & umount on zero copy" + +_create_backfile 0 256M +_create_backfile 1 256M + +dev_id=$(_add_ublk_dev -t stripe -z -q 2 "${UBLK_BACKFILES[0]}" "${UBLK_BACKFILES[1]}") +_check_add_dev $TID $? + +_mkfs_mount_test /dev/ublkb"${dev_id}" +ERR_CODE=$? + +_cleanup_test "stripe" +_show_result $TID $ERR_CODE diff --git a/tools/testing/selftests/x86/Makefile b/tools/testing/selftests/x86/Makefile index 28422c32cc8f..f703fcfe9f7c 100644 --- a/tools/testing/selftests/x86/Makefile +++ b/tools/testing/selftests/x86/Makefile @@ -19,7 +19,7 @@ TARGETS_C_32BIT_ONLY := entry_from_vm86 test_syscall_vdso unwind_vdso \ test_FCMOV test_FCOMI test_FISTTP \ vdso_restorer TARGETS_C_64BIT_ONLY := fsgsbase sysret_rip syscall_numbering \ - corrupt_xstate_header amx lam test_shadow_stack avx + corrupt_xstate_header amx lam test_shadow_stack avx apx # Some selftests require 32bit support enabled also on 64bit systems TARGETS_C_32BIT_NEEDED := ldt_gdt ptrace_syscall @@ -136,3 +136,4 @@ $(OUTPUT)/nx_stack_64: CFLAGS += -Wl,-z,noexecstack $(OUTPUT)/avx_64: CFLAGS += -mno-avx -mno-avx512f $(OUTPUT)/amx_64: EXTRA_FILES += xstate.c $(OUTPUT)/avx_64: EXTRA_FILES += xstate.c +$(OUTPUT)/apx_64: EXTRA_FILES += xstate.c diff --git a/tools/testing/selftests/x86/apx.c b/tools/testing/selftests/x86/apx.c new file mode 100644 index 000000000000..d9c8d41b8c5a --- /dev/null +++ b/tools/testing/selftests/x86/apx.c @@ -0,0 +1,10 @@ +// SPDX-License-Identifier: GPL-2.0 + +#define _GNU_SOURCE + +#include "xstate.h" + +int main(void) +{ + test_xstate(XFEATURE_APX); +} diff --git a/tools/testing/selftests/x86/bugs/Makefile b/tools/testing/selftests/x86/bugs/Makefile new file mode 100644 index 000000000000..8ff2d7226c7f --- /dev/null +++ b/tools/testing/selftests/x86/bugs/Makefile @@ -0,0 +1,3 @@ +TEST_PROGS := its_sysfs.py its_permutations.py its_indirect_alignment.py its_ret_alignment.py +TEST_FILES := common.py +include ../../lib.mk diff --git a/tools/testing/selftests/x86/bugs/common.py b/tools/testing/selftests/x86/bugs/common.py new file mode 100755 index 000000000000..2f9664a80617 --- /dev/null +++ b/tools/testing/selftests/x86/bugs/common.py @@ -0,0 +1,164 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0 +# +# Copyright (c) 2025 Intel Corporation +# +# This contains kselftest framework adapted common functions for testing +# mitigation for x86 bugs. + +import os, sys, re, shutil + +sys.path.insert(0, '../../kselftest') +import ksft + +def read_file(path): + if not os.path.exists(path): + return None + with open(path, 'r') as file: + return file.read().strip() + +def cpuinfo_has(arg): + cpuinfo = read_file('/proc/cpuinfo') + if arg in cpuinfo: + return True + return False + +def cmdline_has(arg): + cmdline = read_file('/proc/cmdline') + if arg in cmdline: + return True + return False + +def cmdline_has_either(args): + cmdline = read_file('/proc/cmdline') + for arg in args: + if arg in cmdline: + return True + return False + +def cmdline_has_none(args): + return not cmdline_has_either(args) + +def cmdline_has_all(args): + cmdline = read_file('/proc/cmdline') + for arg in args: + if arg not in cmdline: + return False + return True + +def get_sysfs(bug): + return read_file("/sys/devices/system/cpu/vulnerabilities/" + bug) + +def sysfs_has(bug, mitigation): + status = get_sysfs(bug) + if mitigation in status: + return True + return False + +def sysfs_has_either(bugs, mitigations): + for bug in bugs: + for mitigation in mitigations: + if sysfs_has(bug, mitigation): + return True + return False + +def sysfs_has_none(bugs, mitigations): + return not sysfs_has_either(bugs, mitigations) + +def sysfs_has_all(bugs, mitigations): + for bug in bugs: + for mitigation in mitigations: + if not sysfs_has(bug, mitigation): + return False + return True + +def bug_check_pass(bug, found): + ksft.print_msg(f"\nFound: {found}") + # ksft.print_msg(f"\ncmdline: {read_file('/proc/cmdline')}") + ksft.test_result_pass(f'{bug}: {found}') + +def bug_check_fail(bug, found, expected): + ksft.print_msg(f'\nFound:\t {found}') + ksft.print_msg(f'Expected:\t {expected}') + ksft.print_msg(f"\ncmdline: {read_file('/proc/cmdline')}") + ksft.test_result_fail(f'{bug}: {found}') + +def bug_status_unknown(bug, found): + ksft.print_msg(f'\nUnknown status: {found}') + ksft.print_msg(f"\ncmdline: {read_file('/proc/cmdline')}") + ksft.test_result_fail(f'{bug}: {found}') + +def basic_checks_sufficient(bug, mitigation): + if not mitigation: + bug_status_unknown(bug, "None") + return True + elif mitigation == "Not affected": + ksft.test_result_pass(bug) + return True + elif mitigation == "Vulnerable": + if cmdline_has_either([f'{bug}=off', 'mitigations=off']): + bug_check_pass(bug, mitigation) + return True + return False + +def get_section_info(vmlinux, section_name): + from elftools.elf.elffile import ELFFile + with open(vmlinux, 'rb') as f: + elffile = ELFFile(f) + section = elffile.get_section_by_name(section_name) + if section is None: + ksft.print_msg("Available sections in vmlinux:") + for sec in elffile.iter_sections(): + ksft.print_msg(sec.name) + raise ValueError(f"Section {section_name} not found in {vmlinux}") + return section['sh_addr'], section['sh_offset'], section['sh_size'] + +def get_patch_sites(vmlinux, offset, size): + import struct + output = [] + with open(vmlinux, 'rb') as f: + f.seek(offset) + i = 0 + while i < size: + data = f.read(4) # s32 + if not data: + break + sym_offset = struct.unpack('<i', data)[0] + i + i += 4 + output.append(sym_offset) + return output + +def get_instruction_from_vmlinux(elffile, section, virtual_address, target_address): + from capstone import Cs, CS_ARCH_X86, CS_MODE_64 + section_start = section['sh_addr'] + section_end = section_start + section['sh_size'] + + if not (section_start <= target_address < section_end): + return None + + offset = target_address - section_start + code = section.data()[offset:offset + 16] + + cap = init_capstone() + for instruction in cap.disasm(code, target_address): + if instruction.address == target_address: + return instruction + return None + +def init_capstone(): + from capstone import Cs, CS_ARCH_X86, CS_MODE_64, CS_OPT_SYNTAX_ATT + cap = Cs(CS_ARCH_X86, CS_MODE_64) + cap.syntax = CS_OPT_SYNTAX_ATT + return cap + +def get_runtime_kernel(): + import drgn + return drgn.program_from_kernel() + +def check_dependencies_or_skip(modules, script_name="unknown test"): + for mod in modules: + try: + __import__(mod) + except ImportError: + ksft.test_result_skip(f"Skipping {script_name}: missing module '{mod}'") + ksft.finished() diff --git a/tools/testing/selftests/x86/bugs/its_indirect_alignment.py b/tools/testing/selftests/x86/bugs/its_indirect_alignment.py new file mode 100755 index 000000000000..cdc33ae6a91c --- /dev/null +++ b/tools/testing/selftests/x86/bugs/its_indirect_alignment.py @@ -0,0 +1,150 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0 +# +# Copyright (c) 2025 Intel Corporation +# +# Test for indirect target selection (ITS) mitigation. +# +# Test if indirect CALL/JMP are correctly patched by evaluating +# the vmlinux .retpoline_sites in /proc/kcore. + +# Install dependencies +# add-apt-repository ppa:michel-slm/kernel-utils +# apt update +# apt install -y python3-drgn python3-pyelftools python3-capstone +# +# Best to copy the vmlinux at a standard location: +# mkdir -p /usr/lib/debug/lib/modules/$(uname -r) +# cp $VMLINUX /usr/lib/debug/lib/modules/$(uname -r)/vmlinux +# +# Usage: ./its_indirect_alignment.py [vmlinux] + +import os, sys, argparse +from pathlib import Path + +this_dir = os.path.dirname(os.path.realpath(__file__)) +sys.path.insert(0, this_dir + '/../../kselftest') +import ksft +import common as c + +bug = "indirect_target_selection" + +mitigation = c.get_sysfs(bug) +if not mitigation or "Aligned branch/return thunks" not in mitigation: + ksft.test_result_skip("Skipping its_indirect_alignment.py: Aligned branch/return thunks not enabled") + ksft.finished() + +if c.sysfs_has("spectre_v2", "Retpolines"): + ksft.test_result_skip("Skipping its_indirect_alignment.py: Retpolines deployed") + ksft.finished() + +c.check_dependencies_or_skip(['drgn', 'elftools', 'capstone'], script_name="its_indirect_alignment.py") + +from elftools.elf.elffile import ELFFile +from drgn.helpers.common.memory import identify_address + +cap = c.init_capstone() + +if len(os.sys.argv) > 1: + arg_vmlinux = os.sys.argv[1] + if not os.path.exists(arg_vmlinux): + ksft.test_result_fail(f"its_indirect_alignment.py: vmlinux not found at argument path: {arg_vmlinux}") + ksft.exit_fail() + os.makedirs(f"/usr/lib/debug/lib/modules/{os.uname().release}", exist_ok=True) + os.system(f'cp {arg_vmlinux} /usr/lib/debug/lib/modules/$(uname -r)/vmlinux') + +vmlinux = f"/usr/lib/debug/lib/modules/{os.uname().release}/vmlinux" +if not os.path.exists(vmlinux): + ksft.test_result_fail(f"its_indirect_alignment.py: vmlinux not found at {vmlinux}") + ksft.exit_fail() + +ksft.print_msg(f"Using vmlinux: {vmlinux}") + +retpolines_start_vmlinux, retpolines_sec_offset, size = c.get_section_info(vmlinux, '.retpoline_sites') +ksft.print_msg(f"vmlinux: Section .retpoline_sites (0x{retpolines_start_vmlinux:x}) found at 0x{retpolines_sec_offset:x} with size 0x{size:x}") + +sites_offset = c.get_patch_sites(vmlinux, retpolines_sec_offset, size) +total_retpoline_tests = len(sites_offset) +ksft.print_msg(f"Found {total_retpoline_tests} retpoline sites") + +prog = c.get_runtime_kernel() +retpolines_start_kcore = prog.symbol('__retpoline_sites').address +ksft.print_msg(f'kcore: __retpoline_sites: 0x{retpolines_start_kcore:x}') + +x86_indirect_its_thunk_r15 = prog.symbol('__x86_indirect_its_thunk_r15').address +ksft.print_msg(f'kcore: __x86_indirect_its_thunk_r15: 0x{x86_indirect_its_thunk_r15:x}') + +tests_passed = 0 +tests_failed = 0 +tests_unknown = 0 + +with open(vmlinux, 'rb') as f: + elffile = ELFFile(f) + text_section = elffile.get_section_by_name('.text') + + for i in range(0, len(sites_offset)): + site = retpolines_start_kcore + sites_offset[i] + vmlinux_site = retpolines_start_vmlinux + sites_offset[i] + passed = unknown = failed = False + try: + vmlinux_insn = c.get_instruction_from_vmlinux(elffile, text_section, text_section['sh_addr'], vmlinux_site) + kcore_insn = list(cap.disasm(prog.read(site, 16), site))[0] + operand = kcore_insn.op_str + insn_end = site + kcore_insn.size - 1 # TODO handle Jcc.32 __x86_indirect_thunk_\reg + safe_site = insn_end & 0x20 + site_status = "" if safe_site else "(unsafe)" + + ksft.print_msg(f"\nSite {i}: {identify_address(prog, site)} <0x{site:x}> {site_status}") + ksft.print_msg(f"\tvmlinux: 0x{vmlinux_insn.address:x}:\t{vmlinux_insn.mnemonic}\t{vmlinux_insn.op_str}") + ksft.print_msg(f"\tkcore: 0x{kcore_insn.address:x}:\t{kcore_insn.mnemonic}\t{kcore_insn.op_str}") + + if (site & 0x20) ^ (insn_end & 0x20): + ksft.print_msg(f"\tSite at safe/unsafe boundary: {str(kcore_insn.bytes)} {kcore_insn.mnemonic} {operand}") + if safe_site: + tests_passed += 1 + passed = True + ksft.print_msg(f"\tPASSED: At safe address") + continue + + if operand.startswith('0xffffffff'): + thunk = int(operand, 16) + if thunk > x86_indirect_its_thunk_r15: + insn_at_thunk = list(cap.disasm(prog.read(thunk, 16), thunk))[0] + operand += ' -> ' + insn_at_thunk.mnemonic + ' ' + insn_at_thunk.op_str + ' <dynamic-thunk?>' + if 'jmp' in insn_at_thunk.mnemonic and thunk & 0x20: + ksft.print_msg(f"\tPASSED: Found {operand} at safe address") + passed = True + if not passed: + if kcore_insn.operands[0].type == capstone.CS_OP_IMM: + operand += ' <' + prog.symbol(int(operand, 16)) + '>' + if '__x86_indirect_its_thunk_' in operand: + ksft.print_msg(f"\tPASSED: Found {operand}") + else: + ksft.print_msg(f"\tPASSED: Found direct branch: {kcore_insn}, ITS thunk not required.") + passed = True + else: + unknown = True + if passed: + tests_passed += 1 + elif unknown: + ksft.print_msg(f"UNKNOWN: unexpected operand: {kcore_insn}") + tests_unknown += 1 + else: + ksft.print_msg(f'\t************* FAILED *************') + ksft.print_msg(f"\tFound {kcore_insn.bytes} {kcore_insn.mnemonic} {operand}") + ksft.print_msg(f'\t**********************************') + tests_failed += 1 + except Exception as e: + ksft.print_msg(f"UNKNOWN: An unexpected error occurred: {e}") + tests_unknown += 1 + +ksft.print_msg(f"\n\nSummary:") +ksft.print_msg(f"PASS: \t{tests_passed} \t/ {total_retpoline_tests}") +ksft.print_msg(f"FAIL: \t{tests_failed} \t/ {total_retpoline_tests}") +ksft.print_msg(f"UNKNOWN: \t{tests_unknown} \t/ {total_retpoline_tests}") + +if tests_failed == 0: + ksft.test_result_pass("All ITS return thunk sites passed") +else: + ksft.test_result_fail(f"{tests_failed} ITS return thunk sites failed") +ksft.finished() diff --git a/tools/testing/selftests/x86/bugs/its_permutations.py b/tools/testing/selftests/x86/bugs/its_permutations.py new file mode 100755 index 000000000000..3204f4728c62 --- /dev/null +++ b/tools/testing/selftests/x86/bugs/its_permutations.py @@ -0,0 +1,109 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0 +# +# Copyright (c) 2025 Intel Corporation +# +# Test for indirect target selection (ITS) cmdline permutations with other bugs +# like spectre_v2 and retbleed. + +import os, sys, subprocess, itertools, re, shutil + +test_dir = os.path.dirname(os.path.realpath(__file__)) +sys.path.insert(0, test_dir + '/../../kselftest') +import ksft +import common as c + +bug = "indirect_target_selection" +mitigation = c.get_sysfs(bug) + +if not mitigation or "Not affected" in mitigation: + ksft.test_result_skip("Skipping its_permutations.py: not applicable") + ksft.finished() + +if shutil.which('vng') is None: + ksft.test_result_skip("Skipping its_permutations.py: virtme-ng ('vng') not found in PATH.") + ksft.finished() + +TEST = f"{test_dir}/its_sysfs.py" +default_kparam = ['clearcpuid=hypervisor', 'panic=5', 'panic_on_warn=1', 'oops=panic', 'nmi_watchdog=1', 'hung_task_panic=1'] + +DEBUG = " -v " + +# Install dependencies +# https://github.com/arighi/virtme-ng +# apt install virtme-ng +BOOT_CMD = f"vng --run {test_dir}/../../../../../arch/x86/boot/bzImage " +#BOOT_CMD += DEBUG + +bug = "indirect_target_selection" + +input_options = { + 'indirect_target_selection' : ['off', 'on', 'stuff', 'vmexit'], + 'retbleed' : ['off', 'stuff', 'auto'], + 'spectre_v2' : ['off', 'on', 'eibrs', 'retpoline', 'ibrs', 'eibrs,retpoline'], +} + +def pretty_print(output): + OKBLUE = '\033[94m' + OKGREEN = '\033[92m' + WARNING = '\033[93m' + FAIL = '\033[91m' + ENDC = '\033[0m' + BOLD = '\033[1m' + + # Define patterns and their corresponding colors + patterns = { + r"^ok \d+": OKGREEN, + r"^not ok \d+": FAIL, + r"^# Testing .*": OKBLUE, + r"^# Found: .*": WARNING, + r"^# Totals: .*": BOLD, + r"pass:([1-9]\d*)": OKGREEN, + r"fail:([1-9]\d*)": FAIL, + r"skip:([1-9]\d*)": WARNING, + } + + # Apply colors based on patterns + for pattern, color in patterns.items(): + output = re.sub(pattern, lambda match: f"{color}{match.group(0)}{ENDC}", output, flags=re.MULTILINE) + + print(output) + +combinations = list(itertools.product(*input_options.values())) +ksft.print_header() +ksft.set_plan(len(combinations)) + +logs = "" + +for combination in combinations: + append = "" + log = "" + for p in default_kparam: + append += f' --append={p}' + command = BOOT_CMD + append + test_params = "" + for i, key in enumerate(input_options.keys()): + param = f'{key}={combination[i]}' + test_params += f' {param}' + command += f" --append={param}" + command += f" -- {TEST}" + test_name = f"{bug} {test_params}" + pretty_print(f'# Testing {test_name}') + t = subprocess.Popen(command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT) + t.wait() + output, _ = t.communicate() + if t.returncode == 0: + ksft.test_result_pass(test_name) + else: + ksft.test_result_fail(test_name) + output = output.decode() + log += f" {output}" + pretty_print(log) + logs += output + "\n" + +# Optionally use tappy to parse the output +# apt install python3-tappy +with open("logs.txt", "w") as f: + f.write(logs) + +ksft.finished() diff --git a/tools/testing/selftests/x86/bugs/its_ret_alignment.py b/tools/testing/selftests/x86/bugs/its_ret_alignment.py new file mode 100755 index 000000000000..f40078d9f6ff --- /dev/null +++ b/tools/testing/selftests/x86/bugs/its_ret_alignment.py @@ -0,0 +1,139 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0 +# +# Copyright (c) 2025 Intel Corporation +# +# Test for indirect target selection (ITS) mitigation. +# +# Tests if the RETs are correctly patched by evaluating the +# vmlinux .return_sites in /proc/kcore. +# +# Install dependencies +# add-apt-repository ppa:michel-slm/kernel-utils +# apt update +# apt install -y python3-drgn python3-pyelftools python3-capstone +# +# Run on target machine +# mkdir -p /usr/lib/debug/lib/modules/$(uname -r) +# cp $VMLINUX /usr/lib/debug/lib/modules/$(uname -r)/vmlinux +# +# Usage: ./its_ret_alignment.py + +import os, sys, argparse +from pathlib import Path + +this_dir = os.path.dirname(os.path.realpath(__file__)) +sys.path.insert(0, this_dir + '/../../kselftest') +import ksft +import common as c + +bug = "indirect_target_selection" +mitigation = c.get_sysfs(bug) +if not mitigation or "Aligned branch/return thunks" not in mitigation: + ksft.test_result_skip("Skipping its_ret_alignment.py: Aligned branch/return thunks not enabled") + ksft.finished() + +c.check_dependencies_or_skip(['drgn', 'elftools', 'capstone'], script_name="its_ret_alignment.py") + +from elftools.elf.elffile import ELFFile +from drgn.helpers.common.memory import identify_address + +cap = c.init_capstone() + +if len(os.sys.argv) > 1: + arg_vmlinux = os.sys.argv[1] + if not os.path.exists(arg_vmlinux): + ksft.test_result_fail(f"its_ret_alignment.py: vmlinux not found at user-supplied path: {arg_vmlinux}") + ksft.exit_fail() + os.makedirs(f"/usr/lib/debug/lib/modules/{os.uname().release}", exist_ok=True) + os.system(f'cp {arg_vmlinux} /usr/lib/debug/lib/modules/$(uname -r)/vmlinux') + +vmlinux = f"/usr/lib/debug/lib/modules/{os.uname().release}/vmlinux" +if not os.path.exists(vmlinux): + ksft.test_result_fail(f"its_ret_alignment.py: vmlinux not found at {vmlinux}") + ksft.exit_fail() + +ksft.print_msg(f"Using vmlinux: {vmlinux}") + +rethunks_start_vmlinux, rethunks_sec_offset, size = c.get_section_info(vmlinux, '.return_sites') +ksft.print_msg(f"vmlinux: Section .return_sites (0x{rethunks_start_vmlinux:x}) found at 0x{rethunks_sec_offset:x} with size 0x{size:x}") + +sites_offset = c.get_patch_sites(vmlinux, rethunks_sec_offset, size) +total_rethunk_tests = len(sites_offset) +ksft.print_msg(f"Found {total_rethunk_tests} rethunk sites") + +prog = c.get_runtime_kernel() +rethunks_start_kcore = prog.symbol('__return_sites').address +ksft.print_msg(f'kcore: __rethunk_sites: 0x{rethunks_start_kcore:x}') + +its_return_thunk = prog.symbol('its_return_thunk').address +ksft.print_msg(f'kcore: its_return_thunk: 0x{its_return_thunk:x}') + +tests_passed = 0 +tests_failed = 0 +tests_unknown = 0 +tests_skipped = 0 + +with open(vmlinux, 'rb') as f: + elffile = ELFFile(f) + text_section = elffile.get_section_by_name('.text') + + for i in range(len(sites_offset)): + site = rethunks_start_kcore + sites_offset[i] + vmlinux_site = rethunks_start_vmlinux + sites_offset[i] + try: + passed = unknown = failed = skipped = False + + symbol = identify_address(prog, site) + vmlinux_insn = c.get_instruction_from_vmlinux(elffile, text_section, text_section['sh_addr'], vmlinux_site) + kcore_insn = list(cap.disasm(prog.read(site, 16), site))[0] + + insn_end = site + kcore_insn.size - 1 + + safe_site = insn_end & 0x20 + site_status = "" if safe_site else "(unsafe)" + + ksft.print_msg(f"\nSite {i}: {symbol} <0x{site:x}> {site_status}") + ksft.print_msg(f"\tvmlinux: 0x{vmlinux_insn.address:x}:\t{vmlinux_insn.mnemonic}\t{vmlinux_insn.op_str}") + ksft.print_msg(f"\tkcore: 0x{kcore_insn.address:x}:\t{kcore_insn.mnemonic}\t{kcore_insn.op_str}") + + if safe_site: + tests_passed += 1 + passed = True + ksft.print_msg(f"\tPASSED: At safe address") + continue + + if "jmp" in kcore_insn.mnemonic: + passed = True + elif "ret" not in kcore_insn.mnemonic: + skipped = True + + if passed: + ksft.print_msg(f"\tPASSED: Found {kcore_insn.mnemonic} {kcore_insn.op_str}") + tests_passed += 1 + elif skipped: + ksft.print_msg(f"\tSKIPPED: Found '{kcore_insn.mnemonic}'") + tests_skipped += 1 + elif unknown: + ksft.print_msg(f"UNKNOWN: An unknown instruction: {kcore_insn}") + tests_unknown += 1 + else: + ksft.print_msg(f'\t************* FAILED *************') + ksft.print_msg(f"\tFound {kcore_insn.mnemonic} {kcore_insn.op_str}") + ksft.print_msg(f'\t**********************************') + tests_failed += 1 + except Exception as e: + ksft.print_msg(f"UNKNOWN: An unexpected error occurred: {e}") + tests_unknown += 1 + +ksft.print_msg(f"\n\nSummary:") +ksft.print_msg(f"PASSED: \t{tests_passed} \t/ {total_rethunk_tests}") +ksft.print_msg(f"FAILED: \t{tests_failed} \t/ {total_rethunk_tests}") +ksft.print_msg(f"SKIPPED: \t{tests_skipped} \t/ {total_rethunk_tests}") +ksft.print_msg(f"UNKNOWN: \t{tests_unknown} \t/ {total_rethunk_tests}") + +if tests_failed == 0: + ksft.test_result_pass("All ITS return thunk sites passed.") +else: + ksft.test_result_fail(f"{tests_failed} failed sites need ITS return thunks.") +ksft.finished() diff --git a/tools/testing/selftests/x86/bugs/its_sysfs.py b/tools/testing/selftests/x86/bugs/its_sysfs.py new file mode 100755 index 000000000000..7bca81f2f606 --- /dev/null +++ b/tools/testing/selftests/x86/bugs/its_sysfs.py @@ -0,0 +1,65 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0 +# +# Copyright (c) 2025 Intel Corporation +# +# Test for Indirect Target Selection(ITS) mitigation sysfs status. + +import sys, os, re +this_dir = os.path.dirname(os.path.realpath(__file__)) +sys.path.insert(0, this_dir + '/../../kselftest') +import ksft + +from common import * + +bug = "indirect_target_selection" +mitigation = get_sysfs(bug) + +ITS_MITIGATION_ALIGNED_THUNKS = "Mitigation: Aligned branch/return thunks" +ITS_MITIGATION_RETPOLINE_STUFF = "Mitigation: Retpolines, Stuffing RSB" +ITS_MITIGATION_VMEXIT_ONLY = "Mitigation: Vulnerable, KVM: Not affected" +ITS_MITIGATION_VULNERABLE = "Vulnerable" + +def check_mitigation(): + if mitigation == ITS_MITIGATION_ALIGNED_THUNKS: + if cmdline_has(f'{bug}=stuff') and sysfs_has("spectre_v2", "Retpolines"): + bug_check_fail(bug, ITS_MITIGATION_ALIGNED_THUNKS, ITS_MITIGATION_RETPOLINE_STUFF) + return + if cmdline_has(f'{bug}=vmexit') and cpuinfo_has('its_native_only'): + bug_check_fail(bug, ITS_MITIGATION_ALIGNED_THUNKS, ITS_MITIGATION_VMEXIT_ONLY) + return + bug_check_pass(bug, ITS_MITIGATION_ALIGNED_THUNKS) + return + + if mitigation == ITS_MITIGATION_RETPOLINE_STUFF: + if cmdline_has(f'{bug}=stuff') and sysfs_has("spectre_v2", "Retpolines"): + bug_check_pass(bug, ITS_MITIGATION_RETPOLINE_STUFF) + return + if sysfs_has('retbleed', 'Stuffing'): + bug_check_pass(bug, ITS_MITIGATION_RETPOLINE_STUFF) + return + bug_check_fail(bug, ITS_MITIGATION_RETPOLINE_STUFF, ITS_MITIGATION_ALIGNED_THUNKS) + + if mitigation == ITS_MITIGATION_VMEXIT_ONLY: + if cmdline_has(f'{bug}=vmexit') and cpuinfo_has('its_native_only'): + bug_check_pass(bug, ITS_MITIGATION_VMEXIT_ONLY) + return + bug_check_fail(bug, ITS_MITIGATION_VMEXIT_ONLY, ITS_MITIGATION_ALIGNED_THUNKS) + + if mitigation == ITS_MITIGATION_VULNERABLE: + if sysfs_has("spectre_v2", "Vulnerable"): + bug_check_pass(bug, ITS_MITIGATION_VULNERABLE) + else: + bug_check_fail(bug, "Mitigation", ITS_MITIGATION_VULNERABLE) + + bug_status_unknown(bug, mitigation) + return + +ksft.print_header() +ksft.set_plan(1) +ksft.print_msg(f'{bug}: {mitigation} ...') + +if not basic_checks_sufficient(bug, mitigation): + check_mitigation() + +ksft.finished() diff --git a/tools/testing/selftests/x86/lam.c b/tools/testing/selftests/x86/lam.c index 18d736640ece..0873b0e5f48b 100644 --- a/tools/testing/selftests/x86/lam.c +++ b/tools/testing/selftests/x86/lam.c @@ -682,7 +682,7 @@ int do_uring(unsigned long lam) return 1; if (fstat(file_fd, &st) < 0) - return 1; + goto cleanup; off_t file_sz = st.st_size; @@ -690,7 +690,7 @@ int do_uring(unsigned long lam) fi = malloc(sizeof(*fi) + sizeof(struct iovec) * blocks); if (!fi) - return 1; + goto cleanup; fi->file_sz = file_sz; fi->file_fd = file_fd; @@ -698,7 +698,7 @@ int do_uring(unsigned long lam) ring = malloc(sizeof(*ring)); if (!ring) { free(fi); - return 1; + goto cleanup; } memset(ring, 0, sizeof(struct io_ring)); @@ -729,6 +729,8 @@ out: } free(fi); +cleanup: + close(file_fd); return ret; } @@ -1189,6 +1191,7 @@ void *allocate_dsa_pasid(void) wq = mmap(NULL, 0x1000, PROT_WRITE, MAP_SHARED | MAP_POPULATE, fd, 0); + close(fd); if (wq == MAP_FAILED) perror("mmap"); diff --git a/tools/testing/selftests/x86/xstate.c b/tools/testing/selftests/x86/xstate.c index 23c1d6c964ea..97fe4bd8bc77 100644 --- a/tools/testing/selftests/x86/xstate.c +++ b/tools/testing/selftests/x86/xstate.c @@ -31,7 +31,8 @@ (1 << XFEATURE_OPMASK) | \ (1 << XFEATURE_ZMM_Hi256) | \ (1 << XFEATURE_Hi16_ZMM) | \ - (1 << XFEATURE_XTILEDATA)) + (1 << XFEATURE_XTILEDATA) | \ + (1 << XFEATURE_APX)) static inline uint64_t xgetbv(uint32_t index) { diff --git a/tools/testing/selftests/x86/xstate.h b/tools/testing/selftests/x86/xstate.h index 42af36ec852f..e91e3092b5d2 100644 --- a/tools/testing/selftests/x86/xstate.h +++ b/tools/testing/selftests/x86/xstate.h @@ -33,6 +33,7 @@ enum xfeature { XFEATURE_RSRVD_COMP_16, XFEATURE_XTILECFG, XFEATURE_XTILEDATA, + XFEATURE_APX, XFEATURE_MAX, }; @@ -59,6 +60,7 @@ static const char *xfeature_names[] = "unknown xstate feature", "AMX Tile config", "AMX Tile data", + "APX registers", "unknown xstate feature", }; diff --git a/tools/testing/shared/linux.c b/tools/testing/shared/linux.c index 66dbb362385f..0f97fb0d19e1 100644 --- a/tools/testing/shared/linux.c +++ b/tools/testing/shared/linux.c @@ -150,7 +150,7 @@ void kmem_cache_free(struct kmem_cache *cachep, void *objp) void kmem_cache_free_bulk(struct kmem_cache *cachep, size_t size, void **list) { if (kmalloc_verbose) - pr_debug("Bulk free %p[0-%lu]\n", list, size - 1); + pr_debug("Bulk free %p[0-%zu]\n", list, size - 1); pthread_mutex_lock(&cachep->lock); for (int i = 0; i < size; i++) @@ -168,7 +168,7 @@ int kmem_cache_alloc_bulk(struct kmem_cache *cachep, gfp_t gfp, size_t size, size_t i; if (kmalloc_verbose) - pr_debug("Bulk alloc %lu\n", size); + pr_debug("Bulk alloc %zu\n", size); pthread_mutex_lock(&cachep->lock); if (cachep->nr_objs >= size) { diff --git a/tools/testing/shared/linux/cleanup.h b/tools/testing/shared/linux/cleanup.h new file mode 100644 index 000000000000..ea3081426ee9 --- /dev/null +++ b/tools/testing/shared/linux/cleanup.h @@ -0,0 +1,2 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#include "../../../../include/linux/cleanup.h" diff --git a/tools/testing/vsock/vsock_test.c b/tools/testing/vsock/vsock_test.c index d0f6d253ac72..613551132a96 100644 --- a/tools/testing/vsock/vsock_test.c +++ b/tools/testing/vsock/vsock_test.c @@ -1264,21 +1264,25 @@ static void test_unsent_bytes_client(const struct test_opts *opts, int type) send_buf(fd, buf, sizeof(buf), 0, sizeof(buf)); control_expectln("RECEIVED"); - ret = ioctl(fd, SIOCOUTQ, &sock_bytes_unsent); - if (ret < 0) { - if (errno == EOPNOTSUPP) { - fprintf(stderr, "Test skipped, SIOCOUTQ not supported.\n"); - } else { + /* SIOCOUTQ isn't guaranteed to instantly track sent data. Even though + * the "RECEIVED" message means that the other side has received the + * data, there can be a delay in our kernel before updating the "unsent + * bytes" counter. Repeat SIOCOUTQ until it returns 0. + */ + timeout_begin(TIMEOUT); + do { + ret = ioctl(fd, SIOCOUTQ, &sock_bytes_unsent); + if (ret < 0) { + if (errno == EOPNOTSUPP) { + fprintf(stderr, "Test skipped, SIOCOUTQ not supported.\n"); + break; + } perror("ioctl"); exit(EXIT_FAILURE); } - } else if (ret == 0 && sock_bytes_unsent != 0) { - fprintf(stderr, - "Unexpected 'SIOCOUTQ' value, expected 0, got %i\n", - sock_bytes_unsent); - exit(EXIT_FAILURE); - } - + timeout_check("SIOCOUTQ"); + } while (sock_bytes_unsent != 0); + timeout_end(); close(fd); } |